aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
committerMichal Marek <mmarek@suse.cz>2011-03-09 10:15:44 -0500
commit2d8ad8719591fa803b0d589ed057fa46f49b7155 (patch)
tree4ae051577dad1161c91dafbf4207bb10a9dc91bb /kernel/trace
parent9b4ce7bce5f30712fd926ab4599a803314a07719 (diff)
parentc56eb8fb6dccb83d9fe62fd4dc00c834de9bc470 (diff)
Merge commit 'v2.6.38-rc1' into kbuild/packaging
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig131
-rw-r--r--kernel/trace/Makefile14
-rw-r--r--kernel/trace/blktrace.c269
-rw-r--r--kernel/trace/ftrace.c321
-rw-r--r--kernel/trace/kmemtrace.c511
-rw-r--r--kernel/trace/power-traces.c6
-rw-r--r--kernel/trace/ring_buffer.c655
-rw-r--r--kernel/trace/ring_buffer_benchmark.c6
-rw-r--r--kernel/trace/trace.c637
-rw-r--r--kernel/trace/trace.h178
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_branch.c27
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_entries.h108
-rw-r--r--kernel/trace/trace_event_perf.c216
-rw-r--r--kernel/trace/trace_event_profile.c122
-rw-r--r--kernel/trace/trace_events.c476
-rw-r--r--kernel/trace/trace_events_filter.c52
-rw-r--r--kernel/trace/trace_export.c119
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c494
-rw-r--r--kernel/trace/trace_hw_branches.c312
-rw-r--r--kernel/trace/trace_irqsoff.c258
-rw-r--r--kernel/trace/trace_kdb.c135
-rw-r--r--kernel/trace/trace_kprobe.c1192
-rw-r--r--kernel/trace/trace_ksym.c519
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_output.c198
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_sched_switch.c21
-rw-r--r--kernel/trace/trace_sched_wakeup.c292
-rw-r--r--kernel/trace/trace_selftest.c154
-rw-r--r--kernel/trace/trace_stack.c33
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c366
-rw-r--r--kernel/trace/trace_sysprof.c329
-rw-r--r--kernel/trace/trace_workqueue.c37
37 files changed, 4032 insertions, 4363 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..14674dce77a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
@@ -46,14 +44,16 @@ config HAVE_FTRACE_MCOUNT_RECORD
46 help 44 help
47 See Documentation/trace/ftrace-design.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER
50 bool
51
52config HAVE_SYSCALL_TRACEPOINTS 47config HAVE_SYSCALL_TRACEPOINTS
53 bool 48 bool
54 help 49 help
55 See Documentation/trace/ftrace-design.txt 50 See Documentation/trace/ftrace-design.txt
56 51
52config HAVE_C_RECORDMCOUNT
53 bool
54 help
55 C version of recordmcount available?
56
57config TRACER_MAX_TRACE 57config TRACER_MAX_TRACE
58 bool 58 bool
59 59
@@ -69,6 +69,21 @@ config EVENT_TRACING
69 select CONTEXT_SWITCH_TRACER 69 select CONTEXT_SWITCH_TRACER
70 bool 70 bool
71 71
72config EVENT_POWER_TRACING_DEPRECATED
73 depends on EVENT_TRACING
74 bool "Deprecated power event trace API, to be removed"
75 default y
76 help
77 Provides old power event types:
78 C-state/idle accounting events:
79 power:power_start
80 power:power_end
81 and old cpufreq accounting event:
82 power:power_frequency
83 This is for userspace compatibility
84 and will vanish after 5 kernel iterations,
85 namely 2.6.41.
86
72config CONTEXT_SWITCH_TRACER 87config CONTEXT_SWITCH_TRACER
73 bool 88 bool
74 89
@@ -126,7 +141,7 @@ if FTRACE
126config FUNCTION_TRACER 141config FUNCTION_TRACER
127 bool "Kernel Function Tracer" 142 bool "Kernel Function Tracer"
128 depends on HAVE_FUNCTION_TRACER 143 depends on HAVE_FUNCTION_TRACER
129 select FRAME_POINTER 144 select FRAME_POINTER if !ARM_UNWIND && !S390
130 select KALLSYMS 145 select KALLSYMS
131 select GENERIC_TRACER 146 select GENERIC_TRACER
132 select CONTEXT_SWITCH_TRACER 147 select CONTEXT_SWITCH_TRACER
@@ -158,7 +173,7 @@ config IRQSOFF_TRACER
158 bool "Interrupts-off Latency Tracer" 173 bool "Interrupts-off Latency Tracer"
159 default n 174 default n
160 depends on TRACE_IRQFLAGS_SUPPORT 175 depends on TRACE_IRQFLAGS_SUPPORT
161 depends on GENERIC_TIME 176 depends on !ARCH_USES_GETTIMEOFFSET
162 select TRACE_IRQFLAGS 177 select TRACE_IRQFLAGS
163 select GENERIC_TRACER 178 select GENERIC_TRACER
164 select TRACER_MAX_TRACE 179 select TRACER_MAX_TRACE
@@ -180,7 +195,7 @@ config IRQSOFF_TRACER
180config PREEMPT_TRACER 195config PREEMPT_TRACER
181 bool "Preemption-off Latency Tracer" 196 bool "Preemption-off Latency Tracer"
182 default n 197 default n
183 depends on GENERIC_TIME 198 depends on !ARCH_USES_GETTIMEOFFSET
184 depends on PREEMPT 199 depends on PREEMPT
185 select GENERIC_TRACER 200 select GENERIC_TRACER
186 select TRACER_MAX_TRACE 201 select TRACER_MAX_TRACE
@@ -199,15 +214,6 @@ config PREEMPT_TRACER
199 enabled. This option and the irqs-off timing option can be 214 enabled. This option and the irqs-off timing option can be
200 used together or separately.) 215 used together or separately.)
201 216
202config SYSPROF_TRACER
203 bool "Sysprof Tracer"
204 depends on X86
205 select GENERIC_TRACER
206 select CONTEXT_SWITCH_TRACER
207 help
208 This tracer provides the trace needed by the 'Sysprof' userspace
209 tool.
210
211config SCHED_TRACER 217config SCHED_TRACER
212 bool "Scheduling Latency Tracer" 218 bool "Scheduling Latency Tracer"
213 select GENERIC_TRACER 219 select GENERIC_TRACER
@@ -234,23 +240,6 @@ config FTRACE_SYSCALLS
234 help 240 help
235 Basic tracer to catch the syscall entry and exit events. 241 Basic tracer to catch the syscall entry and exit events.
236 242
237config BOOT_TRACER
238 bool "Trace boot initcalls"
239 select GENERIC_TRACER
240 select CONTEXT_SWITCH_TRACER
241 help
242 This tracer helps developers to optimize boot times: it records
243 the timings of the initcalls and traces key events and the identity
244 of tasks that can cause boot delays, such as context-switches.
245
246 Its aim is to be parsed by the scripts/bootgraph.pl tool to
247 produce pretty graphics about boot inefficiencies, giving a visual
248 representation of the delays during initcalls - but the raw
249 /debug/tracing/trace text output is readable too.
250
251 You must pass in initcall_debug and ftrace=initcall to the kernel
252 command line to enable this on bootup.
253
254config TRACE_BRANCH_PROFILING 243config TRACE_BRANCH_PROFILING
255 bool 244 bool
256 select GENERIC_TRACER 245 select GENERIC_TRACER
@@ -330,37 +319,6 @@ config BRANCH_TRACER
330 319
331 Say N if unsure. 320 Say N if unsure.
332 321
333config POWER_TRACER
334 bool "Trace power consumption behavior"
335 depends on X86
336 select GENERIC_TRACER
337 help
338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state
340 behavior.
341
342config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT
345 select TRACING
346 help
347 This tracer helps find read and write operations on any given kernel
348 symbol i.e. /proc/kallsyms.
349
350config PROFILE_KSYM_TRACER
351 bool "Profile all kernel memory accesses on 'watched' variables"
352 depends on KSYM_TRACER
353 help
354 This tracer profiles kernel accesses on variables watched through the
355 ksym tracer ftrace plugin. Depending upon the hardware, all read
356 and write operations on kernel variables can be monitored for
357 accesses.
358
359 The results will be displayed in:
360 /debugfs/tracing/profile_ksym
361
362 Say N if unsure.
363
364config STACK_TRACER 322config STACK_TRACER
365 bool "Trace max stack" 323 bool "Trace max stack"
366 depends on HAVE_FUNCTION_TRACER 324 depends on HAVE_FUNCTION_TRACER
@@ -385,45 +343,6 @@ config STACK_TRACER
385 343
386 Say N if unsure. 344 Say N if unsure.
387 345
388config HW_BRANCH_TRACER
389 depends on HAVE_HW_BRANCH_TRACER
390 bool "Trace hw branches"
391 select GENERIC_TRACER
392 help
393 This tracer records all branches on the system in a circular
394 buffer, giving access to the last N branches for each cpu.
395
396config KMEMTRACE
397 bool "Trace SLAB allocations"
398 select GENERIC_TRACER
399 help
400 kmemtrace provides tracing for slab allocator functions, such as
401 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
402 data is then fed to the userspace application in order to analyse
403 allocation hotspots, internal fragmentation and so on, making it
404 possible to see how well an allocator performs, as well as debug
405 and profile kernel code.
406
407 This requires an userspace application to use. See
408 Documentation/trace/kmemtrace.txt for more information.
409
410 Saying Y will make the kernel somewhat larger and slower. However,
411 if you disable kmemtrace at run-time or boot-time, the performance
412 impact is minimal (depending on the arch the kernel is built for).
413
414 If unsure, say N.
415
416config WORKQUEUE_TRACER
417 bool "Trace workqueues"
418 select GENERIC_TRACER
419 help
420 The workqueue tracer provides some statistical information
421 about each cpu workqueue thread such as the number of the
422 works inserted and executed since their creation. It can help
423 to evaluate the amount of work each of them has to perform.
424 For example it can help a developer to decide whether he should
425 choose a per-cpu workqueue instead of a singlethreaded one.
426
427config BLK_DEV_IO_TRACE 346config BLK_DEV_IO_TRACE
428 bool "Support for tracing block IO actions" 347 bool "Support for tracing block IO actions"
429 depends on SYSFS 348 depends on SYSFS
@@ -451,7 +370,7 @@ config BLK_DEV_IO_TRACE
451 370
452config KPROBE_EVENT 371config KPROBE_EVENT
453 depends on KPROBES 372 depends on KPROBES
454 depends on X86 373 depends on HAVE_REGS_AND_STACK_ACCESS_API
455 bool "Enable kprobes-based dynamic events" 374 bool "Enable kprobes-based dynamic events"
456 select TRACING 375 select TRACING
457 default y 376 default y
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..761c510a06c5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
30obj-$(CONFIG_TRACING) += trace_stat.o 30obj-$(CONFIG_TRACING) += trace_stat.o
31obj-$(CONFIG_TRACING) += trace_printk.o 31obj-$(CONFIG_TRACING) += trace_printk.o
32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 32obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
33obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
34obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o 33obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
35obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 34obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
36obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 35obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,11 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
38obj-$(CONFIG_NOP_TRACER) += trace_nop.o 37obj-$(CONFIG_NOP_TRACER) += trace_nop.o
39obj-$(CONFIG_STACK_TRACER) += trace_stack.o 38obj-$(CONFIG_STACK_TRACER) += trace_stack.o
40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 39obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 40obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 41obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 42obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 43obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
48ifeq ($(CONFIG_BLOCK),y) 44ifeq ($(CONFIG_BLOCK),y)
@@ -51,10 +47,14 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 47obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 48obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 49obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 50ifeq ($(CONFIG_PERF_EVENTS),y)
51obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
52endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 53obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 54obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 55obj-$(CONFIG_TRACEPOINTS) += power-traces.o
58obj-$(CONFIG_EVENT_TRACING) += power-traces.o 56ifeq ($(CONFIG_TRACING),y)
57obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
58endif
59 59
60libftrace-y := ftrace.o 60libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..153562d0b93c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,8 +21,8 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h>
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28 28
@@ -168,9 +168,11 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
169 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
170 170
171#define BLK_TC_RAHEAD BLK_TC_AHEAD
172
171/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
172#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ 174#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
173 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) 175 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
174 176
175/* 177/*
176 * The worker for the various blk_add_trace*() types. Fills out a 178 * The worker for the various blk_add_trace*() types. Fills out a
@@ -193,9 +195,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
193 return; 195 return;
194 196
195 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
196 what |= MASK_TC_BIT(rw, BARRIER); 198 what |= MASK_TC_BIT(rw, SYNC);
197 what |= MASK_TC_BIT(rw, SYNCIO); 199 what |= MASK_TC_BIT(rw, RAHEAD);
198 what |= MASK_TC_BIT(rw, AHEAD);
199 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
200 what |= MASK_TC_BIT(rw, DISCARD); 201 what |= MASK_TC_BIT(rw, DISCARD);
201 202
@@ -322,6 +323,7 @@ static const struct file_operations blk_dropped_fops = {
322 .owner = THIS_MODULE, 323 .owner = THIS_MODULE,
323 .open = blk_dropped_open, 324 .open = blk_dropped_open,
324 .read = blk_dropped_read, 325 .read = blk_dropped_read,
326 .llseek = default_llseek,
325}; 327};
326 328
327static int blk_msg_open(struct inode *inode, struct file *filp) 329static int blk_msg_open(struct inode *inode, struct file *filp)
@@ -361,6 +363,7 @@ static const struct file_operations blk_msg_fops = {
361 .owner = THIS_MODULE, 363 .owner = THIS_MODULE,
362 .open = blk_msg_open, 364 .open = blk_msg_open,
363 .write = blk_msg_write, 365 .write = blk_msg_write,
366 .llseek = noop_llseek,
364}; 367};
365 368
366/* 369/*
@@ -540,13 +543,49 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 543 if (ret)
541 return ret; 544 return ret;
542 545
543 if (copy_to_user(arg, &buts, sizeof(buts))) 546 if (copy_to_user(arg, &buts, sizeof(buts))) {
547 blk_trace_remove(q);
544 return -EFAULT; 548 return -EFAULT;
545 549 }
546 return 0; 550 return 0;
547} 551}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 552EXPORT_SYMBOL_GPL(blk_trace_setup);
549 553
554#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
555static int compat_blk_trace_setup(struct request_queue *q, char *name,
556 dev_t dev, struct block_device *bdev,
557 char __user *arg)
558{
559 struct blk_user_trace_setup buts;
560 struct compat_blk_user_trace_setup cbuts;
561 int ret;
562
563 if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
564 return -EFAULT;
565
566 buts = (struct blk_user_trace_setup) {
567 .act_mask = cbuts.act_mask,
568 .buf_size = cbuts.buf_size,
569 .buf_nr = cbuts.buf_nr,
570 .start_lba = cbuts.start_lba,
571 .end_lba = cbuts.end_lba,
572 .pid = cbuts.pid,
573 };
574 memcpy(&buts.name, &cbuts.name, 32);
575
576 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
577 if (ret)
578 return ret;
579
580 if (copy_to_user(arg, &buts.name, 32)) {
581 blk_trace_remove(q);
582 return -EFAULT;
583 }
584
585 return 0;
586}
587#endif
588
550int blk_trace_startstop(struct request_queue *q, int start) 589int blk_trace_startstop(struct request_queue *q, int start)
551{ 590{
552 int ret; 591 int ret;
@@ -606,6 +645,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
606 bdevname(bdev, b); 645 bdevname(bdev, b);
607 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); 646 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
608 break; 647 break;
648#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
649 case BLKTRACESETUP32:
650 bdevname(bdev, b);
651 ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
652 break;
653#endif
609 case BLKTRACESTART: 654 case BLKTRACESTART:
610 start = 1; 655 start = 1;
611 case BLKTRACESTOP: 656 case BLKTRACESTOP:
@@ -659,10 +704,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
659 if (likely(!bt)) 704 if (likely(!bt))
660 return; 705 return;
661 706
662 if (blk_discard_rq(rq)) 707 if (rq->cmd_flags & REQ_DISCARD)
663 rw |= (1 << BIO_RW_DISCARD); 708 rw |= REQ_DISCARD;
709
710 if (rq->cmd_flags & REQ_SECURE)
711 rw |= REQ_SECURE;
664 712
665 if (blk_pc_request(rq)) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
666 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
667 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
668 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
@@ -673,28 +721,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
673 } 721 }
674} 722}
675 723
676static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) 724static void blk_add_trace_rq_abort(void *ignore,
725 struct request_queue *q, struct request *rq)
677{ 726{
678 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 727 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
679} 728}
680 729
681static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) 730static void blk_add_trace_rq_insert(void *ignore,
731 struct request_queue *q, struct request *rq)
682{ 732{
683 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 733 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
684} 734}
685 735
686static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) 736static void blk_add_trace_rq_issue(void *ignore,
737 struct request_queue *q, struct request *rq)
687{ 738{
688 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 739 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
689} 740}
690 741
691static void blk_add_trace_rq_requeue(struct request_queue *q, 742static void blk_add_trace_rq_requeue(void *ignore,
743 struct request_queue *q,
692 struct request *rq) 744 struct request *rq)
693{ 745{
694 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 746 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
695} 747}
696 748
697static void blk_add_trace_rq_complete(struct request_queue *q, 749static void blk_add_trace_rq_complete(void *ignore,
750 struct request_queue *q,
698 struct request *rq) 751 struct request *rq)
699{ 752{
700 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 753 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
@@ -705,55 +758,66 @@ static void blk_add_trace_rq_complete(struct request_queue *q,
705 * @q: queue the io is for 758 * @q: queue the io is for
706 * @bio: the source bio 759 * @bio: the source bio
707 * @what: the action 760 * @what: the action
761 * @error: error, if any
708 * 762 *
709 * Description: 763 * Description:
710 * Records an action against a bio. Will log the bio offset + size. 764 * Records an action against a bio. Will log the bio offset + size.
711 * 765 *
712 **/ 766 **/
713static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 767static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
714 u32 what) 768 u32 what, int error)
715{ 769{
716 struct blk_trace *bt = q->blk_trace; 770 struct blk_trace *bt = q->blk_trace;
717 771
718 if (likely(!bt)) 772 if (likely(!bt))
719 return; 773 return;
720 774
775 if (!error && !bio_flagged(bio, BIO_UPTODATE))
776 error = EIO;
777
721 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, 778 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
722 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 779 error, 0, NULL);
723} 780}
724 781
725static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) 782static void blk_add_trace_bio_bounce(void *ignore,
783 struct request_queue *q, struct bio *bio)
726{ 784{
727 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 785 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
728} 786}
729 787
730static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) 788static void blk_add_trace_bio_complete(void *ignore,
789 struct request_queue *q, struct bio *bio,
790 int error)
731{ 791{
732 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 792 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
733} 793}
734 794
735static void blk_add_trace_bio_backmerge(struct request_queue *q, 795static void blk_add_trace_bio_backmerge(void *ignore,
796 struct request_queue *q,
736 struct bio *bio) 797 struct bio *bio)
737{ 798{
738 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 799 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
739} 800}
740 801
741static void blk_add_trace_bio_frontmerge(struct request_queue *q, 802static void blk_add_trace_bio_frontmerge(void *ignore,
803 struct request_queue *q,
742 struct bio *bio) 804 struct bio *bio)
743{ 805{
744 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 806 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
745} 807}
746 808
747static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) 809static void blk_add_trace_bio_queue(void *ignore,
810 struct request_queue *q, struct bio *bio)
748{ 811{
749 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 812 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
750} 813}
751 814
752static void blk_add_trace_getrq(struct request_queue *q, 815static void blk_add_trace_getrq(void *ignore,
816 struct request_queue *q,
753 struct bio *bio, int rw) 817 struct bio *bio, int rw)
754{ 818{
755 if (bio) 819 if (bio)
756 blk_add_trace_bio(q, bio, BLK_TA_GETRQ); 820 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
757 else { 821 else {
758 struct blk_trace *bt = q->blk_trace; 822 struct blk_trace *bt = q->blk_trace;
759 823
@@ -763,11 +827,12 @@ static void blk_add_trace_getrq(struct request_queue *q,
763} 827}
764 828
765 829
766static void blk_add_trace_sleeprq(struct request_queue *q, 830static void blk_add_trace_sleeprq(void *ignore,
831 struct request_queue *q,
767 struct bio *bio, int rw) 832 struct bio *bio, int rw)
768{ 833{
769 if (bio) 834 if (bio)
770 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); 835 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
771 else { 836 else {
772 struct blk_trace *bt = q->blk_trace; 837 struct blk_trace *bt = q->blk_trace;
773 838
@@ -777,7 +842,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q,
777 } 842 }
778} 843}
779 844
780static void blk_add_trace_plug(struct request_queue *q) 845static void blk_add_trace_plug(void *ignore, struct request_queue *q)
781{ 846{
782 struct blk_trace *bt = q->blk_trace; 847 struct blk_trace *bt = q->blk_trace;
783 848
@@ -785,7 +850,7 @@ static void blk_add_trace_plug(struct request_queue *q)
785 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 850 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
786} 851}
787 852
788static void blk_add_trace_unplug_io(struct request_queue *q) 853static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
789{ 854{
790 struct blk_trace *bt = q->blk_trace; 855 struct blk_trace *bt = q->blk_trace;
791 856
@@ -798,7 +863,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q)
798 } 863 }
799} 864}
800 865
801static void blk_add_trace_unplug_timer(struct request_queue *q) 866static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
802{ 867{
803 struct blk_trace *bt = q->blk_trace; 868 struct blk_trace *bt = q->blk_trace;
804 869
@@ -811,7 +876,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q)
811 } 876 }
812} 877}
813 878
814static void blk_add_trace_split(struct request_queue *q, struct bio *bio, 879static void blk_add_trace_split(void *ignore,
880 struct request_queue *q, struct bio *bio,
815 unsigned int pdu) 881 unsigned int pdu)
816{ 882{
817 struct blk_trace *bt = q->blk_trace; 883 struct blk_trace *bt = q->blk_trace;
@@ -826,7 +892,8 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
826} 892}
827 893
828/** 894/**
829 * blk_add_trace_remap - Add a trace for a remap operation 895 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation
896 * @ignore: trace callback data parameter (not used)
830 * @q: queue the io is for 897 * @q: queue the io is for
831 * @bio: the source bio 898 * @bio: the source bio
832 * @dev: target device 899 * @dev: target device
@@ -837,8 +904,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
837 * it spans a stripe (or similar). Add a trace for that action. 904 * it spans a stripe (or similar). Add a trace for that action.
838 * 905 *
839 **/ 906 **/
840static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 907static void blk_add_trace_bio_remap(void *ignore,
841 dev_t dev, sector_t from) 908 struct request_queue *q, struct bio *bio,
909 dev_t dev, sector_t from)
842{ 910{
843 struct blk_trace *bt = q->blk_trace; 911 struct blk_trace *bt = q->blk_trace;
844 struct blk_io_trace_remap r; 912 struct blk_io_trace_remap r;
@@ -857,6 +925,7 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
857 925
858/** 926/**
859 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 927 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
928 * @ignore: trace callback data parameter (not used)
860 * @q: queue the io is for 929 * @q: queue the io is for
861 * @rq: the source request 930 * @rq: the source request
862 * @dev: target device 931 * @dev: target device
@@ -867,7 +936,8 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
867 * Add a trace for that action. 936 * Add a trace for that action.
868 * 937 *
869 **/ 938 **/
870static void blk_add_trace_rq_remap(struct request_queue *q, 939static void blk_add_trace_rq_remap(void *ignore,
940 struct request_queue *q,
871 struct request *rq, dev_t dev, 941 struct request *rq, dev_t dev,
872 sector_t from) 942 sector_t from)
873{ 943{
@@ -906,7 +976,7 @@ void blk_add_driver_data(struct request_queue *q,
906 if (likely(!bt)) 976 if (likely(!bt))
907 return; 977 return;
908 978
909 if (blk_pc_request(rq)) 979 if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
910 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 980 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
911 BLK_TA_DRV_DATA, rq->errors, len, data); 981 BLK_TA_DRV_DATA, rq->errors, len, data);
912 else 982 else
@@ -919,64 +989,64 @@ static void blk_register_tracepoints(void)
919{ 989{
920 int ret; 990 int ret;
921 991
922 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); 992 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
923 WARN_ON(ret); 993 WARN_ON(ret);
924 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); 994 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
925 WARN_ON(ret); 995 WARN_ON(ret);
926 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); 996 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
927 WARN_ON(ret); 997 WARN_ON(ret);
928 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); 998 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
929 WARN_ON(ret); 999 WARN_ON(ret);
930 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); 1000 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
931 WARN_ON(ret); 1001 WARN_ON(ret);
932 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); 1002 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
933 WARN_ON(ret); 1003 WARN_ON(ret);
934 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); 1004 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
935 WARN_ON(ret); 1005 WARN_ON(ret);
936 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 1006 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
937 WARN_ON(ret); 1007 WARN_ON(ret);
938 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 1008 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
939 WARN_ON(ret); 1009 WARN_ON(ret);
940 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); 1010 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
941 WARN_ON(ret); 1011 WARN_ON(ret);
942 ret = register_trace_block_getrq(blk_add_trace_getrq); 1012 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
943 WARN_ON(ret); 1013 WARN_ON(ret);
944 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); 1014 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
945 WARN_ON(ret); 1015 WARN_ON(ret);
946 ret = register_trace_block_plug(blk_add_trace_plug); 1016 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
947 WARN_ON(ret); 1017 WARN_ON(ret);
948 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); 1018 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
949 WARN_ON(ret); 1019 WARN_ON(ret);
950 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); 1020 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
951 WARN_ON(ret); 1021 WARN_ON(ret);
952 ret = register_trace_block_split(blk_add_trace_split); 1022 ret = register_trace_block_split(blk_add_trace_split, NULL);
953 WARN_ON(ret); 1023 WARN_ON(ret);
954 ret = register_trace_block_remap(blk_add_trace_remap); 1024 ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
955 WARN_ON(ret); 1025 WARN_ON(ret);
956 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); 1026 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
957 WARN_ON(ret); 1027 WARN_ON(ret);
958} 1028}
959 1029
960static void blk_unregister_tracepoints(void) 1030static void blk_unregister_tracepoints(void)
961{ 1031{
962 unregister_trace_block_rq_remap(blk_add_trace_rq_remap); 1032 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
963 unregister_trace_block_remap(blk_add_trace_remap); 1033 unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
964 unregister_trace_block_split(blk_add_trace_split); 1034 unregister_trace_block_split(blk_add_trace_split, NULL);
965 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 1035 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
966 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); 1036 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
967 unregister_trace_block_plug(blk_add_trace_plug); 1037 unregister_trace_block_plug(blk_add_trace_plug, NULL);
968 unregister_trace_block_sleeprq(blk_add_trace_sleeprq); 1038 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
969 unregister_trace_block_getrq(blk_add_trace_getrq); 1039 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
970 unregister_trace_block_bio_queue(blk_add_trace_bio_queue); 1040 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
971 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 1041 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
972 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 1042 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
973 unregister_trace_block_bio_complete(blk_add_trace_bio_complete); 1043 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
974 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); 1044 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
975 unregister_trace_block_rq_complete(blk_add_trace_rq_complete); 1045 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
976 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); 1046 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
977 unregister_trace_block_rq_issue(blk_add_trace_rq_issue); 1047 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
978 unregister_trace_block_rq_insert(blk_add_trace_rq_insert); 1048 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
979 unregister_trace_block_rq_abort(blk_add_trace_rq_abort); 1049 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
980 1050
981 tracepoint_synchronize_unregister(); 1051 tracepoint_synchronize_unregister();
982} 1052}
@@ -1319,7 +1389,7 @@ out:
1319} 1389}
1320 1390
1321static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1391static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1322 int flags) 1392 int flags, struct trace_event *event)
1323{ 1393{
1324 return print_one_line(iter, false); 1394 return print_one_line(iter, false);
1325} 1395}
@@ -1341,7 +1411,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1341} 1411}
1342 1412
1343static enum print_line_t 1413static enum print_line_t
1344blk_trace_event_print_binary(struct trace_iterator *iter, int flags) 1414blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
1415 struct trace_event *event)
1345{ 1416{
1346 return blk_trace_synthesize_old_trace(iter) ? 1417 return blk_trace_synthesize_old_trace(iter) ?
1347 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1418 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
@@ -1379,12 +1450,16 @@ static struct tracer blk_tracer __read_mostly = {
1379 .set_flag = blk_tracer_set_flag, 1450 .set_flag = blk_tracer_set_flag,
1380}; 1451};
1381 1452
1382static struct trace_event trace_blk_event = { 1453static struct trace_event_functions trace_blk_event_funcs = {
1383 .type = TRACE_BLK,
1384 .trace = blk_trace_event_print, 1454 .trace = blk_trace_event_print,
1385 .binary = blk_trace_event_print_binary, 1455 .binary = blk_trace_event_print_binary,
1386}; 1456};
1387 1457
1458static struct trace_event trace_blk_event = {
1459 .type = TRACE_BLK,
1460 .funcs = &trace_blk_event_funcs,
1461};
1462
1388static int __init init_blk_tracer(void) 1463static int __init init_blk_tracer(void)
1389{ 1464{
1390 if (!register_ftrace_event(&trace_blk_event)) { 1465 if (!register_ftrace_event(&trace_blk_event)) {
@@ -1579,10 +1654,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
1579 struct block_device *bdev; 1654 struct block_device *bdev;
1580 ssize_t ret = -ENXIO; 1655 ssize_t ret = -ENXIO;
1581 1656
1582 lock_kernel();
1583 bdev = bdget(part_devt(p)); 1657 bdev = bdget(part_devt(p));
1584 if (bdev == NULL) 1658 if (bdev == NULL)
1585 goto out_unlock_kernel; 1659 goto out;
1586 1660
1587 q = blk_trace_get_queue(bdev); 1661 q = blk_trace_get_queue(bdev);
1588 if (q == NULL) 1662 if (q == NULL)
@@ -1610,8 +1684,7 @@ out_unlock_bdev:
1610 mutex_unlock(&bdev->bd_mutex); 1684 mutex_unlock(&bdev->bd_mutex);
1611out_bdput: 1685out_bdput:
1612 bdput(bdev); 1686 bdput(bdev);
1613out_unlock_kernel: 1687out:
1614 unlock_kernel();
1615 return ret; 1688 return ret;
1616} 1689}
1617 1690
@@ -1641,11 +1714,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1641 1714
1642 ret = -ENXIO; 1715 ret = -ENXIO;
1643 1716
1644 lock_kernel();
1645 p = dev_to_part(dev); 1717 p = dev_to_part(dev);
1646 bdev = bdget(part_devt(p)); 1718 bdev = bdget(part_devt(p));
1647 if (bdev == NULL) 1719 if (bdev == NULL)
1648 goto out_unlock_kernel; 1720 goto out;
1649 1721
1650 q = blk_trace_get_queue(bdev); 1722 q = blk_trace_get_queue(bdev);
1651 if (q == NULL) 1723 if (q == NULL)
@@ -1680,8 +1752,6 @@ out_unlock_bdev:
1680 mutex_unlock(&bdev->bd_mutex); 1752 mutex_unlock(&bdev->bd_mutex);
1681out_bdput: 1753out_bdput:
1682 bdput(bdev); 1754 bdput(bdev);
1683out_unlock_kernel:
1684 unlock_kernel();
1685out: 1755out:
1686 return ret ? ret : count; 1756 return ret ? ret : count;
1687} 1757}
@@ -1706,7 +1776,7 @@ void blk_dump_cmd(char *buf, struct request *rq)
1706 int len = rq->cmd_len; 1776 int len = rq->cmd_len;
1707 unsigned char *cmd = rq->cmd; 1777 unsigned char *cmd = rq->cmd;
1708 1778
1709 if (!blk_pc_request(rq)) { 1779 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
1710 buf[0] = '\0'; 1780 buf[0] = '\0';
1711 return; 1781 return;
1712 } 1782 }
@@ -1731,21 +1801,21 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1731 1801
1732 if (rw & WRITE) 1802 if (rw & WRITE)
1733 rwbs[i++] = 'W'; 1803 rwbs[i++] = 'W';
1734 else if (rw & 1 << BIO_RW_DISCARD) 1804 else if (rw & REQ_DISCARD)
1735 rwbs[i++] = 'D'; 1805 rwbs[i++] = 'D';
1736 else if (bytes) 1806 else if (bytes)
1737 rwbs[i++] = 'R'; 1807 rwbs[i++] = 'R';
1738 else 1808 else
1739 rwbs[i++] = 'N'; 1809 rwbs[i++] = 'N';
1740 1810
1741 if (rw & 1 << BIO_RW_AHEAD) 1811 if (rw & REQ_RAHEAD)
1742 rwbs[i++] = 'A'; 1812 rwbs[i++] = 'A';
1743 if (rw & 1 << BIO_RW_BARRIER) 1813 if (rw & REQ_SYNC)
1744 rwbs[i++] = 'B';
1745 if (rw & 1 << BIO_RW_SYNCIO)
1746 rwbs[i++] = 'S'; 1814 rwbs[i++] = 'S';
1747 if (rw & 1 << BIO_RW_META) 1815 if (rw & REQ_META)
1748 rwbs[i++] = 'M'; 1816 rwbs[i++] = 'M';
1817 if (rw & REQ_SECURE)
1818 rwbs[i++] = 'E';
1749 1819
1750 rwbs[i] = '\0'; 1820 rwbs[i] = '\0';
1751} 1821}
@@ -1755,8 +1825,11 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
1755 int rw = rq->cmd_flags & 0x03; 1825 int rw = rq->cmd_flags & 0x03;
1756 int bytes; 1826 int bytes;
1757 1827
1758 if (blk_discard_rq(rq)) 1828 if (rq->cmd_flags & REQ_DISCARD)
1759 rw |= (1 << BIO_RW_DISCARD); 1829 rw |= REQ_DISCARD;
1830
1831 if (rq->cmd_flags & REQ_SECURE)
1832 rw |= REQ_SECURE;
1760 1833
1761 bytes = blk_rq_bytes(rq); 1834 bytes = blk_rq_bytes(rq);
1762 1835
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e6640f80454..f3dadae83883 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
33 34
@@ -85,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
87 88
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER 89/*
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 90 * Traverse the ftrace_list, invoking all entries. The reason that we
90#endif 91 * can use rcu_dereference_raw() is that elements removed from this list
91 92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
93{ 99{
94 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
95
96 /* in case someone actually ports this to alpha! */
97 read_barrier_depends();
98 101
99 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
100 /* silly alpha */
101 read_barrier_depends();
102 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
103 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
104 }; 105 };
105} 106}
106 107
@@ -155,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
156 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
157 */ 158 */
158 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
159 ftrace_list = ops;
160 160
161 if (ftrace_enabled) { 161 if (ftrace_enabled) {
162 ftrace_func_t func; 162 ftrace_func_t func;
@@ -264,6 +264,7 @@ struct ftrace_profile {
264 unsigned long counter; 264 unsigned long counter;
265#ifdef CONFIG_FUNCTION_GRAPH_TRACER 265#ifdef CONFIG_FUNCTION_GRAPH_TRACER
266 unsigned long long time; 266 unsigned long long time;
267 unsigned long long time_squared;
267#endif 268#endif
268}; 269};
269 270
@@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m)
366{ 367{
367#ifdef CONFIG_FUNCTION_GRAPH_TRACER 368#ifdef CONFIG_FUNCTION_GRAPH_TRACER
368 seq_printf(m, " Function " 369 seq_printf(m, " Function "
369 "Hit Time Avg\n" 370 "Hit Time Avg s^2\n"
370 " -------- " 371 " -------- "
371 "--- ---- ---\n"); 372 "--- ---- --- ---\n");
372#else 373#else
373 seq_printf(m, " Function Hit\n" 374 seq_printf(m, " Function Hit\n"
374 " -------- ---\n"); 375 " -------- ---\n");
@@ -380,11 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v)
380{ 381{
381 struct ftrace_profile *rec = v; 382 struct ftrace_profile *rec = v;
382 char str[KSYM_SYMBOL_LEN]; 383 char str[KSYM_SYMBOL_LEN];
384 int ret = 0;
383#ifdef CONFIG_FUNCTION_GRAPH_TRACER 385#ifdef CONFIG_FUNCTION_GRAPH_TRACER
384 static DEFINE_MUTEX(mutex);
385 static struct trace_seq s; 386 static struct trace_seq s;
386 unsigned long long avg; 387 unsigned long long avg;
388 unsigned long long stddev;
387#endif 389#endif
390 mutex_lock(&ftrace_profile_lock);
391
392 /* we raced with function_profile_reset() */
393 if (unlikely(rec->counter == 0)) {
394 ret = -EBUSY;
395 goto out;
396 }
388 397
389 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 398 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
390 seq_printf(m, " %-30.30s %10lu", str, rec->counter); 399 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
@@ -394,17 +403,31 @@ static int function_stat_show(struct seq_file *m, void *v)
394 avg = rec->time; 403 avg = rec->time;
395 do_div(avg, rec->counter); 404 do_div(avg, rec->counter);
396 405
397 mutex_lock(&mutex); 406 /* Sample standard deviation (s^2) */
407 if (rec->counter <= 1)
408 stddev = 0;
409 else {
410 stddev = rec->time_squared - rec->counter * avg * avg;
411 /*
412 * Divide only 1000 for ns^2 -> us^2 conversion.
413 * trace_print_graph_duration will divide 1000 again.
414 */
415 do_div(stddev, (rec->counter - 1) * 1000);
416 }
417
398 trace_seq_init(&s); 418 trace_seq_init(&s);
399 trace_print_graph_duration(rec->time, &s); 419 trace_print_graph_duration(rec->time, &s);
400 trace_seq_puts(&s, " "); 420 trace_seq_puts(&s, " ");
401 trace_print_graph_duration(avg, &s); 421 trace_print_graph_duration(avg, &s);
422 trace_seq_puts(&s, " ");
423 trace_print_graph_duration(stddev, &s);
402 trace_print_seq(m, &s); 424 trace_print_seq(m, &s);
403 mutex_unlock(&mutex);
404#endif 425#endif
405 seq_putc(m, '\n'); 426 seq_putc(m, '\n');
427out:
428 mutex_unlock(&ftrace_profile_lock);
406 429
407 return 0; 430 return ret;
408} 431}
409 432
410static void ftrace_profile_reset(struct ftrace_profile_stat *stat) 433static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -650,6 +673,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
650 if (!stat->hash || !ftrace_profile_enabled) 673 if (!stat->hash || !ftrace_profile_enabled)
651 goto out; 674 goto out;
652 675
676 /* If the calltime was zero'd ignore it */
677 if (!trace->calltime)
678 goto out;
679
653 calltime = trace->rettime - trace->calltime; 680 calltime = trace->rettime - trace->calltime;
654 681
655 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { 682 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
@@ -668,8 +695,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
668 } 695 }
669 696
670 rec = ftrace_find_profiled_func(stat, trace->func); 697 rec = ftrace_find_profiled_func(stat, trace->func);
671 if (rec) 698 if (rec) {
672 rec->time += calltime; 699 rec->time += calltime;
700 rec->time_squared += calltime * calltime;
701 }
673 702
674 out: 703 out:
675 local_irq_restore(flags); 704 local_irq_restore(flags);
@@ -771,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = {
771 .open = tracing_open_generic, 800 .open = tracing_open_generic,
772 .read = ftrace_profile_read, 801 .read = ftrace_profile_read,
773 .write = ftrace_profile_write, 802 .write = ftrace_profile_write,
803 .llseek = default_llseek,
774}; 804};
775 805
776/* used to initialize the real stat files */ 806/* used to initialize the real stat files */
@@ -855,10 +885,8 @@ enum {
855 FTRACE_ENABLE_CALLS = (1 << 0), 885 FTRACE_ENABLE_CALLS = (1 << 0),
856 FTRACE_DISABLE_CALLS = (1 << 1), 886 FTRACE_DISABLE_CALLS = (1 << 1),
857 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 887 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
858 FTRACE_ENABLE_MCOUNT = (1 << 3), 888 FTRACE_START_FUNC_RET = (1 << 3),
859 FTRACE_DISABLE_MCOUNT = (1 << 4), 889 FTRACE_STOP_FUNC_RET = (1 << 4),
860 FTRACE_START_FUNC_RET = (1 << 5),
861 FTRACE_STOP_FUNC_RET = (1 << 6),
862}; 890};
863 891
864static int ftrace_filtered; 892static int ftrace_filtered;
@@ -898,36 +926,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 926 } \
899 } 927 }
900 928
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 929static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 930{
933 rec->freelist = ftrace_free_records; 931 rec->freelist = ftrace_free_records;
@@ -1025,6 +1023,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 1023}
1026 1024
1027 1025
1026/* Return 1 if the address range is reserved for ftrace */
1027int ftrace_text_reserved(void *start, void *end)
1028{
1029 struct dyn_ftrace *rec;
1030 struct ftrace_page *pg;
1031
1032 do_for_each_ftrace_rec(pg, rec) {
1033 if (rec->ip <= (unsigned long)end &&
1034 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1035 return 1;
1036 } while_for_each_ftrace_rec();
1037 return 0;
1038}
1039
1040
1028static int 1041static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1042__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1043{
@@ -1076,14 +1089,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1089 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1090 continue;
1078 1091
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1092 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1093 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1094 rec->flags |= FTRACE_FL_FAILED;
@@ -1220,8 +1225,6 @@ static void ftrace_shutdown(int command)
1220 1225
1221static void ftrace_startup_sysctl(void) 1226static void ftrace_startup_sysctl(void)
1222{ 1227{
1223 int command = FTRACE_ENABLE_MCOUNT;
1224
1225 if (unlikely(ftrace_disabled)) 1228 if (unlikely(ftrace_disabled))
1226 return; 1229 return;
1227 1230
@@ -1229,23 +1232,17 @@ static void ftrace_startup_sysctl(void)
1229 saved_ftrace_func = NULL; 1232 saved_ftrace_func = NULL;
1230 /* ftrace_start_up is true if we want ftrace running */ 1233 /* ftrace_start_up is true if we want ftrace running */
1231 if (ftrace_start_up) 1234 if (ftrace_start_up)
1232 command |= FTRACE_ENABLE_CALLS; 1235 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1233
1234 ftrace_run_update_code(command);
1235} 1236}
1236 1237
1237static void ftrace_shutdown_sysctl(void) 1238static void ftrace_shutdown_sysctl(void)
1238{ 1239{
1239 int command = FTRACE_DISABLE_MCOUNT;
1240
1241 if (unlikely(ftrace_disabled)) 1240 if (unlikely(ftrace_disabled))
1242 return; 1241 return;
1243 1242
1244 /* ftrace_start_up is true if ftrace is running */ 1243 /* ftrace_start_up is true if ftrace is running */
1245 if (ftrace_start_up) 1244 if (ftrace_start_up)
1246 command |= FTRACE_DISABLE_CALLS; 1245 ftrace_run_update_code(FTRACE_DISABLE_CALLS);
1247
1248 ftrace_run_update_code(command);
1249} 1246}
1250 1247
1251static cycle_t ftrace_update_time; 1248static cycle_t ftrace_update_time;
@@ -1362,24 +1359,29 @@ enum {
1362#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 1359#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
1363 1360
1364struct ftrace_iterator { 1361struct ftrace_iterator {
1365 struct ftrace_page *pg; 1362 loff_t pos;
1366 int hidx; 1363 loff_t func_pos;
1367 int idx; 1364 struct ftrace_page *pg;
1368 unsigned flags; 1365 struct dyn_ftrace *func;
1369 struct trace_parser parser; 1366 struct ftrace_func_probe *probe;
1367 struct trace_parser parser;
1368 int hidx;
1369 int idx;
1370 unsigned flags;
1370}; 1371};
1371 1372
1372static void * 1373static void *
1373t_hash_next(struct seq_file *m, void *v, loff_t *pos) 1374t_hash_next(struct seq_file *m, loff_t *pos)
1374{ 1375{
1375 struct ftrace_iterator *iter = m->private; 1376 struct ftrace_iterator *iter = m->private;
1376 struct hlist_node *hnd = v; 1377 struct hlist_node *hnd = NULL;
1377 struct hlist_head *hhd; 1378 struct hlist_head *hhd;
1378 1379
1379 WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
1380
1381 (*pos)++; 1380 (*pos)++;
1381 iter->pos = *pos;
1382 1382
1383 if (iter->probe)
1384 hnd = &iter->probe->node;
1383 retry: 1385 retry:
1384 if (iter->hidx >= FTRACE_FUNC_HASHSIZE) 1386 if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
1385 return NULL; 1387 return NULL;
@@ -1402,7 +1404,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos)
1402 } 1404 }
1403 } 1405 }
1404 1406
1405 return hnd; 1407 if (WARN_ON_ONCE(!hnd))
1408 return NULL;
1409
1410 iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node);
1411
1412 return iter;
1406} 1413}
1407 1414
1408static void *t_hash_start(struct seq_file *m, loff_t *pos) 1415static void *t_hash_start(struct seq_file *m, loff_t *pos)
@@ -1411,26 +1418,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1411 void *p = NULL; 1418 void *p = NULL;
1412 loff_t l; 1419 loff_t l;
1413 1420
1414 if (!(iter->flags & FTRACE_ITER_HASH)) 1421 if (iter->func_pos > *pos)
1415 *pos = 0; 1422 return NULL;
1416
1417 iter->flags |= FTRACE_ITER_HASH;
1418 1423
1419 iter->hidx = 0; 1424 iter->hidx = 0;
1420 for (l = 0; l <= *pos; ) { 1425 for (l = 0; l <= (*pos - iter->func_pos); ) {
1421 p = t_hash_next(m, p, &l); 1426 p = t_hash_next(m, &l);
1422 if (!p) 1427 if (!p)
1423 break; 1428 break;
1424 } 1429 }
1425 return p; 1430 if (!p)
1431 return NULL;
1432
1433 /* Only set this if we have an item */
1434 iter->flags |= FTRACE_ITER_HASH;
1435
1436 return iter;
1426} 1437}
1427 1438
1428static int t_hash_show(struct seq_file *m, void *v) 1439static int
1440t_hash_show(struct seq_file *m, struct ftrace_iterator *iter)
1429{ 1441{
1430 struct ftrace_func_probe *rec; 1442 struct ftrace_func_probe *rec;
1431 struct hlist_node *hnd = v;
1432 1443
1433 rec = hlist_entry(hnd, struct ftrace_func_probe, node); 1444 rec = iter->probe;
1445 if (WARN_ON_ONCE(!rec))
1446 return -EIO;
1434 1447
1435 if (rec->ops->print) 1448 if (rec->ops->print)
1436 return rec->ops->print(m, rec->ip, rec->ops, rec->data); 1449 return rec->ops->print(m, rec->ip, rec->ops, rec->data);
@@ -1451,12 +1464,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1451 struct dyn_ftrace *rec = NULL; 1464 struct dyn_ftrace *rec = NULL;
1452 1465
1453 if (iter->flags & FTRACE_ITER_HASH) 1466 if (iter->flags & FTRACE_ITER_HASH)
1454 return t_hash_next(m, v, pos); 1467 return t_hash_next(m, pos);
1455 1468
1456 (*pos)++; 1469 (*pos)++;
1470 iter->pos = *pos;
1457 1471
1458 if (iter->flags & FTRACE_ITER_PRINTALL) 1472 if (iter->flags & FTRACE_ITER_PRINTALL)
1459 return NULL; 1473 return t_hash_start(m, pos);
1460 1474
1461 retry: 1475 retry:
1462 if (iter->idx >= iter->pg->index) { 1476 if (iter->idx >= iter->pg->index) {
@@ -1485,7 +1499,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1485 } 1499 }
1486 } 1500 }
1487 1501
1488 return rec; 1502 if (!rec)
1503 return t_hash_start(m, pos);
1504
1505 iter->func_pos = *pos;
1506 iter->func = rec;
1507
1508 return iter;
1509}
1510
1511static void reset_iter_read(struct ftrace_iterator *iter)
1512{
1513 iter->pos = 0;
1514 iter->func_pos = 0;
1515 iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH);
1489} 1516}
1490 1517
1491static void *t_start(struct seq_file *m, loff_t *pos) 1518static void *t_start(struct seq_file *m, loff_t *pos)
@@ -1496,6 +1523,12 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1496 1523
1497 mutex_lock(&ftrace_lock); 1524 mutex_lock(&ftrace_lock);
1498 /* 1525 /*
1526 * If an lseek was done, then reset and start from beginning.
1527 */
1528 if (*pos < iter->pos)
1529 reset_iter_read(iter);
1530
1531 /*
1499 * For set_ftrace_filter reading, if we have the filter 1532 * For set_ftrace_filter reading, if we have the filter
1500 * off, we can short cut and just print out that all 1533 * off, we can short cut and just print out that all
1501 * functions are enabled. 1534 * functions are enabled.
@@ -1504,12 +1537,19 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1504 if (*pos > 0) 1537 if (*pos > 0)
1505 return t_hash_start(m, pos); 1538 return t_hash_start(m, pos);
1506 iter->flags |= FTRACE_ITER_PRINTALL; 1539 iter->flags |= FTRACE_ITER_PRINTALL;
1540 /* reset in case of seek/pread */
1541 iter->flags &= ~FTRACE_ITER_HASH;
1507 return iter; 1542 return iter;
1508 } 1543 }
1509 1544
1510 if (iter->flags & FTRACE_ITER_HASH) 1545 if (iter->flags & FTRACE_ITER_HASH)
1511 return t_hash_start(m, pos); 1546 return t_hash_start(m, pos);
1512 1547
1548 /*
1549 * Unfortunately, we need to restart at ftrace_pages_start
1550 * every time we let go of the ftrace_mutex. This is because
1551 * those pointers can change without the lock.
1552 */
1513 iter->pg = ftrace_pages_start; 1553 iter->pg = ftrace_pages_start;
1514 iter->idx = 0; 1554 iter->idx = 0;
1515 for (l = 0; l <= *pos; ) { 1555 for (l = 0; l <= *pos; ) {
@@ -1518,10 +1558,14 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1518 break; 1558 break;
1519 } 1559 }
1520 1560
1521 if (!p && iter->flags & FTRACE_ITER_FILTER) 1561 if (!p) {
1522 return t_hash_start(m, pos); 1562 if (iter->flags & FTRACE_ITER_FILTER)
1563 return t_hash_start(m, pos);
1564
1565 return NULL;
1566 }
1523 1567
1524 return p; 1568 return iter;
1525} 1569}
1526 1570
1527static void t_stop(struct seq_file *m, void *p) 1571static void t_stop(struct seq_file *m, void *p)
@@ -1532,16 +1576,18 @@ static void t_stop(struct seq_file *m, void *p)
1532static int t_show(struct seq_file *m, void *v) 1576static int t_show(struct seq_file *m, void *v)
1533{ 1577{
1534 struct ftrace_iterator *iter = m->private; 1578 struct ftrace_iterator *iter = m->private;
1535 struct dyn_ftrace *rec = v; 1579 struct dyn_ftrace *rec;
1536 1580
1537 if (iter->flags & FTRACE_ITER_HASH) 1581 if (iter->flags & FTRACE_ITER_HASH)
1538 return t_hash_show(m, v); 1582 return t_hash_show(m, iter);
1539 1583
1540 if (iter->flags & FTRACE_ITER_PRINTALL) { 1584 if (iter->flags & FTRACE_ITER_PRINTALL) {
1541 seq_printf(m, "#### all functions enabled ####\n"); 1585 seq_printf(m, "#### all functions enabled ####\n");
1542 return 0; 1586 return 0;
1543 } 1587 }
1544 1588
1589 rec = iter->func;
1590
1545 if (!rec) 1591 if (!rec)
1546 return 0; 1592 return 0;
1547 1593
@@ -1593,8 +1639,8 @@ ftrace_failures_open(struct inode *inode, struct file *file)
1593 1639
1594 ret = ftrace_avail_open(inode, file); 1640 ret = ftrace_avail_open(inode, file);
1595 if (!ret) { 1641 if (!ret) {
1596 m = (struct seq_file *)file->private_data; 1642 m = file->private_data;
1597 iter = (struct ftrace_iterator *)m->private; 1643 iter = m->private;
1598 iter->flags = FTRACE_ITER_FAILURES; 1644 iter->flags = FTRACE_ITER_FAILURES;
1599 } 1645 }
1600 1646
@@ -1884,7 +1930,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1884 struct hlist_head *hhd; 1930 struct hlist_head *hhd;
1885 struct hlist_node *n; 1931 struct hlist_node *n;
1886 unsigned long key; 1932 unsigned long key;
1887 int resched;
1888 1933
1889 key = hash_long(ip, FTRACE_HASH_BITS); 1934 key = hash_long(ip, FTRACE_HASH_BITS);
1890 1935
@@ -1898,12 +1943,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1898 * period. This syncs the hash iteration and freeing of items 1943 * period. This syncs the hash iteration and freeing of items
1899 * on the hash. rcu_read_lock is too dangerous here. 1944 * on the hash. rcu_read_lock is too dangerous here.
1900 */ 1945 */
1901 resched = ftrace_preempt_disable(); 1946 preempt_disable_notrace();
1902 hlist_for_each_entry_rcu(entry, n, hhd, node) { 1947 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1903 if (entry->ip == ip) 1948 if (entry->ip == ip)
1904 entry->ops->func(ip, parent_ip, &entry->data); 1949 entry->ops->func(ip, parent_ip, &entry->data);
1905 } 1950 }
1906 ftrace_preempt_enable(resched); 1951 preempt_enable_notrace();
1907} 1952}
1908 1953
1909static struct ftrace_ops trace_probe_ops __read_mostly = 1954static struct ftrace_ops trace_probe_ops __read_mostly =
@@ -2300,6 +2345,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2300 2345
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2346#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2347static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2348static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2349
2303static int __init set_graph_function(char *str) 2350static int __init set_graph_function(char *str)
2304{ 2351{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2352 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2426,6 +2473,7 @@ static const struct file_operations ftrace_notrace_fops = {
2426static DEFINE_MUTEX(graph_lock); 2473static DEFINE_MUTEX(graph_lock);
2427 2474
2428int ftrace_graph_count; 2475int ftrace_graph_count;
2476int ftrace_graph_filter_enabled;
2429unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2477unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2430 2478
2431static void * 2479static void *
@@ -2448,7 +2496,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2448 mutex_lock(&graph_lock); 2496 mutex_lock(&graph_lock);
2449 2497
2450 /* Nothing, tell g_show to print all functions are enabled */ 2498 /* Nothing, tell g_show to print all functions are enabled */
2451 if (!ftrace_graph_count && !*pos) 2499 if (!ftrace_graph_filter_enabled && !*pos)
2452 return (void *)1; 2500 return (void *)1;
2453 2501
2454 return __g_next(m, pos); 2502 return __g_next(m, pos);
@@ -2494,6 +2542,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2494 mutex_lock(&graph_lock); 2542 mutex_lock(&graph_lock);
2495 if ((file->f_mode & FMODE_WRITE) && 2543 if ((file->f_mode & FMODE_WRITE) &&
2496 (file->f_flags & O_TRUNC)) { 2544 (file->f_flags & O_TRUNC)) {
2545 ftrace_graph_filter_enabled = 0;
2497 ftrace_graph_count = 0; 2546 ftrace_graph_count = 0;
2498 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2547 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2499 } 2548 }
@@ -2519,7 +2568,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2519 struct dyn_ftrace *rec; 2568 struct dyn_ftrace *rec;
2520 struct ftrace_page *pg; 2569 struct ftrace_page *pg;
2521 int search_len; 2570 int search_len;
2522 int found = 0; 2571 int fail = 1;
2523 int type, not; 2572 int type, not;
2524 char *search; 2573 char *search;
2525 bool exists; 2574 bool exists;
@@ -2530,37 +2579,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2530 2579
2531 /* decode regex */ 2580 /* decode regex */
2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2581 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2533 if (not) 2582 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2534 return -EINVAL; 2583 return -EBUSY;
2535 2584
2536 search_len = strlen(search); 2585 search_len = strlen(search);
2537 2586
2538 mutex_lock(&ftrace_lock); 2587 mutex_lock(&ftrace_lock);
2539 do_for_each_ftrace_rec(pg, rec) { 2588 do_for_each_ftrace_rec(pg, rec) {
2540 2589
2541 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2542 break;
2543
2544 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2590 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2545 continue; 2591 continue;
2546 2592
2547 if (ftrace_match_record(rec, search, search_len, type)) { 2593 if (ftrace_match_record(rec, search, search_len, type)) {
2548 /* ensure it is not already in the array */ 2594 /* if it is in the array */
2549 exists = false; 2595 exists = false;
2550 for (i = 0; i < *idx; i++) 2596 for (i = 0; i < *idx; i++) {
2551 if (array[i] == rec->ip) { 2597 if (array[i] == rec->ip) {
2552 exists = true; 2598 exists = true;
2553 break; 2599 break;
2554 } 2600 }
2555 if (!exists) 2601 }
2556 array[(*idx)++] = rec->ip; 2602
2557 found = 1; 2603 if (!not) {
2604 fail = 0;
2605 if (!exists) {
2606 array[(*idx)++] = rec->ip;
2607 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2608 goto out;
2609 }
2610 } else {
2611 if (exists) {
2612 array[i] = array[--(*idx)];
2613 array[*idx] = 0;
2614 fail = 0;
2615 }
2616 }
2558 } 2617 }
2559 } while_for_each_ftrace_rec(); 2618 } while_for_each_ftrace_rec();
2560 2619out:
2561 mutex_unlock(&ftrace_lock); 2620 mutex_unlock(&ftrace_lock);
2562 2621
2563 return found ? 0 : -EINVAL; 2622 if (fail)
2623 return -EINVAL;
2624
2625 ftrace_graph_filter_enabled = 1;
2626 return 0;
2564} 2627}
2565 2628
2566static ssize_t 2629static ssize_t
@@ -2570,16 +2633,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2570 struct trace_parser parser; 2633 struct trace_parser parser;
2571 ssize_t read, ret; 2634 ssize_t read, ret;
2572 2635
2573 if (!cnt || cnt < 0) 2636 if (!cnt)
2574 return 0; 2637 return 0;
2575 2638
2576 mutex_lock(&graph_lock); 2639 mutex_lock(&graph_lock);
2577 2640
2578 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2579 ret = -EBUSY;
2580 goto out_unlock;
2581 }
2582
2583 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2641 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2584 ret = -ENOMEM; 2642 ret = -ENOMEM;
2585 goto out_unlock; 2643 goto out_unlock;
@@ -2612,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = {
2612 .read = seq_read, 2670 .read = seq_read,
2613 .write = ftrace_graph_write, 2671 .write = ftrace_graph_write,
2614 .release = ftrace_graph_release, 2672 .release = ftrace_graph_release,
2673 .llseek = seq_lseek,
2615}; 2674};
2616#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2675#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2617 2676
@@ -3222,8 +3281,8 @@ free:
3222} 3281}
3223 3282
3224static void 3283static void
3225ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, 3284ftrace_graph_probe_sched_switch(void *ignore,
3226 struct task_struct *next) 3285 struct task_struct *prev, struct task_struct *next)
3227{ 3286{
3228 unsigned long long timestamp; 3287 unsigned long long timestamp;
3229 int index; 3288 int index;
@@ -3277,7 +3336,7 @@ static int start_graph_tracing(void)
3277 } while (ret == -EAGAIN); 3336 } while (ret == -EAGAIN);
3278 3337
3279 if (!ret) { 3338 if (!ret) {
3280 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); 3339 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3281 if (ret) 3340 if (ret)
3282 pr_info("ftrace_graph: Couldn't activate tracepoint" 3341 pr_info("ftrace_graph: Couldn't activate tracepoint"
3283 " probe to kernel_sched_switch\n"); 3342 " probe to kernel_sched_switch\n");
@@ -3349,11 +3408,11 @@ void unregister_ftrace_graph(void)
3349 goto out; 3408 goto out;
3350 3409
3351 ftrace_graph_active--; 3410 ftrace_graph_active--;
3352 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
3353 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3411 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
3354 ftrace_graph_entry = ftrace_graph_entry_stub; 3412 ftrace_graph_entry = ftrace_graph_entry_stub;
3355 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 3413 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
3356 unregister_pm_notifier(&ftrace_suspend_notifier); 3414 unregister_pm_notifier(&ftrace_suspend_notifier);
3415 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3357 3416
3358 out: 3417 out:
3359 mutex_unlock(&ftrace_lock); 3418 mutex_unlock(&ftrace_lock);
@@ -3364,6 +3423,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3364{ 3423{
3365 /* Make sure we do not use the parent ret_stack */ 3424 /* Make sure we do not use the parent ret_stack */
3366 t->ret_stack = NULL; 3425 t->ret_stack = NULL;
3426 t->curr_ret_stack = -1;
3367 3427
3368 if (ftrace_graph_active) { 3428 if (ftrace_graph_active) {
3369 struct ftrace_ret_stack *ret_stack; 3429 struct ftrace_ret_stack *ret_stack;
@@ -3373,7 +3433,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3373 GFP_KERNEL); 3433 GFP_KERNEL);
3374 if (!ret_stack) 3434 if (!ret_stack)
3375 return; 3435 return;
3376 t->curr_ret_stack = -1;
3377 atomic_set(&t->tracing_graph_pause, 0); 3436 atomic_set(&t->tracing_graph_pause, 0);
3378 atomic_set(&t->trace_overrun, 0); 3437 atomic_set(&t->trace_overrun, 0);
3379 t->ftrace_timestamp = 0; 3438 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index a91da69f153a..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,511 +0,0 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
11#include <linux/debugfs.h>
12#include <linux/dcache.h>
13#include <linux/fs.h>
14
15#include <linux/kmemtrace.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20/* Select an alternative, minimalistic output than the original one */
21#define TRACE_KMEM_OPT_MINIMAL 0x1
22
23static struct tracer_opt kmem_opts[] = {
24 /* Default disable the minimalistic output */
25 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
26 { }
27};
28
29static struct tracer_flags kmem_tracer_flags = {
30 .val = 0,
31 .opts = kmem_opts
32};
33
34static struct trace_array *kmemtrace_array;
35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
46 struct trace_array *tr = kmemtrace_array;
47 struct kmemtrace_alloc_entry *entry;
48 struct ring_buffer_event *event;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
51 if (!event)
52 return;
53
54 entry = ring_buffer_event_data(event);
55 tracing_generic_entry_update(&entry->ent, 0, 0);
56
57 entry->ent.type = TRACE_KMEM_ALLOC;
58 entry->type_id = type_id;
59 entry->call_site = call_site;
60 entry->ptr = ptr;
61 entry->bytes_req = bytes_req;
62 entry->bytes_alloc = bytes_alloc;
63 entry->gfp_flags = gfp_flags;
64 entry->node = node;
65
66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
68
69 trace_wake_up();
70}
71
72static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
73 unsigned long call_site,
74 const void *ptr)
75{
76 struct ftrace_event_call *call = &event_kmem_free;
77 struct trace_array *tr = kmemtrace_array;
78 struct kmemtrace_free_entry *entry;
79 struct ring_buffer_event *event;
80
81 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
82 if (!event)
83 return;
84 entry = ring_buffer_event_data(event);
85 tracing_generic_entry_update(&entry->ent, 0, 0);
86
87 entry->ent.type = TRACE_KMEM_FREE;
88 entry->type_id = type_id;
89 entry->call_site = call_site;
90 entry->ptr = ptr;
91
92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
94
95 trace_wake_up();
96}
97
98static void kmemtrace_kmalloc(unsigned long call_site,
99 const void *ptr,
100 size_t bytes_req,
101 size_t bytes_alloc,
102 gfp_t gfp_flags)
103{
104 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
105 bytes_req, bytes_alloc, gfp_flags, -1);
106}
107
108static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
109 const void *ptr,
110 size_t bytes_req,
111 size_t bytes_alloc,
112 gfp_t gfp_flags)
113{
114 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
115 bytes_req, bytes_alloc, gfp_flags, -1);
116}
117
118static void kmemtrace_kmalloc_node(unsigned long call_site,
119 const void *ptr,
120 size_t bytes_req,
121 size_t bytes_alloc,
122 gfp_t gfp_flags,
123 int node)
124{
125 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
126 bytes_req, bytes_alloc, gfp_flags, node);
127}
128
129static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
130 const void *ptr,
131 size_t bytes_req,
132 size_t bytes_alloc,
133 gfp_t gfp_flags,
134 int node)
135{
136 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
137 bytes_req, bytes_alloc, gfp_flags, node);
138}
139
140static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
141{
142 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
143}
144
145static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
146{
147 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
148}
149
150static int kmemtrace_start_probes(void)
151{
152 int err;
153
154 err = register_trace_kmalloc(kmemtrace_kmalloc);
155 if (err)
156 return err;
157 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
158 if (err)
159 return err;
160 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
161 if (err)
162 return err;
163 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
164 if (err)
165 return err;
166 err = register_trace_kfree(kmemtrace_kfree);
167 if (err)
168 return err;
169 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
170
171 return err;
172}
173
174static void kmemtrace_stop_probes(void)
175{
176 unregister_trace_kmalloc(kmemtrace_kmalloc);
177 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
178 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
179 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
180 unregister_trace_kfree(kmemtrace_kfree);
181 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
182}
183
184static int kmem_trace_init(struct trace_array *tr)
185{
186 kmemtrace_array = tr;
187
188 tracing_reset_online_cpus(tr);
189
190 kmemtrace_start_probes();
191
192 return 0;
193}
194
195static void kmem_trace_reset(struct trace_array *tr)
196{
197 kmemtrace_stop_probes();
198}
199
200static void kmemtrace_headers(struct seq_file *s)
201{
202 /* Don't need headers for the original kmemtrace output */
203 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
204 return;
205
206 seq_printf(s, "#\n");
207 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
208 " POINTER NODE CALLER\n");
209 seq_printf(s, "# FREE | | | | "
210 " | | | |\n");
211 seq_printf(s, "# |\n\n");
212}
213
214/*
215 * The following functions give the original output from kmemtrace,
216 * plus the origin CPU, since reordering occurs in-kernel now.
217 */
218
219#define KMEMTRACE_USER_ALLOC 0
220#define KMEMTRACE_USER_FREE 1
221
222struct kmemtrace_user_event {
223 u8 event_id;
224 u8 type_id;
225 u16 event_size;
226 u32 cpu;
227 u64 timestamp;
228 unsigned long call_site;
229 unsigned long ptr;
230};
231
232struct kmemtrace_user_event_alloc {
233 size_t bytes_req;
234 size_t bytes_alloc;
235 unsigned gfp_flags;
236 int node;
237};
238
239static enum print_line_t
240kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
241{
242 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry;
244 int ret;
245
246 trace_assign_type(entry, iter->ent);
247
248 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
249 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
250 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
251 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
252 (unsigned long)entry->gfp_flags, entry->node);
253
254 if (!ret)
255 return TRACE_TYPE_PARTIAL_LINE;
256 return TRACE_TYPE_HANDLED;
257}
258
259static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags)
261{
262 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry;
264 int ret;
265
266 trace_assign_type(entry, iter->ent);
267
268 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
269 entry->type_id, (void *)entry->call_site,
270 (unsigned long)entry->ptr);
271
272 if (!ret)
273 return TRACE_TYPE_PARTIAL_LINE;
274 return TRACE_TYPE_HANDLED;
275}
276
277static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
279{
280 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry;
282 struct kmemtrace_user_event *ev;
283 struct kmemtrace_user_event_alloc *ev_alloc;
284
285 trace_assign_type(entry, iter->ent);
286
287 ev = trace_seq_reserve(s, sizeof(*ev));
288 if (!ev)
289 return TRACE_TYPE_PARTIAL_LINE;
290
291 ev->event_id = KMEMTRACE_USER_ALLOC;
292 ev->type_id = entry->type_id;
293 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
294 ev->cpu = iter->cpu;
295 ev->timestamp = iter->ts;
296 ev->call_site = entry->call_site;
297 ev->ptr = (unsigned long)entry->ptr;
298
299 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
300 if (!ev_alloc)
301 return TRACE_TYPE_PARTIAL_LINE;
302
303 ev_alloc->bytes_req = entry->bytes_req;
304 ev_alloc->bytes_alloc = entry->bytes_alloc;
305 ev_alloc->gfp_flags = entry->gfp_flags;
306 ev_alloc->node = entry->node;
307
308 return TRACE_TYPE_HANDLED;
309}
310
311static enum print_line_t
312kmemtrace_print_free_user(struct trace_iterator *iter, int flags)
313{
314 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry;
316 struct kmemtrace_user_event *ev;
317
318 trace_assign_type(entry, iter->ent);
319
320 ev = trace_seq_reserve(s, sizeof(*ev));
321 if (!ev)
322 return TRACE_TYPE_PARTIAL_LINE;
323
324 ev->event_id = KMEMTRACE_USER_FREE;
325 ev->type_id = entry->type_id;
326 ev->event_size = sizeof(*ev);
327 ev->cpu = iter->cpu;
328 ev->timestamp = iter->ts;
329 ev->call_site = entry->call_site;
330 ev->ptr = (unsigned long)entry->ptr;
331
332 return TRACE_TYPE_HANDLED;
333}
334
335/* The two other following provide a more minimalistic output */
336static enum print_line_t
337kmemtrace_print_alloc_compress(struct trace_iterator *iter)
338{
339 struct kmemtrace_alloc_entry *entry;
340 struct trace_seq *s = &iter->seq;
341 int ret;
342
343 trace_assign_type(entry, iter->ent);
344
345 /* Alloc entry */
346 ret = trace_seq_printf(s, " + ");
347 if (!ret)
348 return TRACE_TYPE_PARTIAL_LINE;
349
350 /* Type */
351 switch (entry->type_id) {
352 case KMEMTRACE_TYPE_KMALLOC:
353 ret = trace_seq_printf(s, "K ");
354 break;
355 case KMEMTRACE_TYPE_CACHE:
356 ret = trace_seq_printf(s, "C ");
357 break;
358 case KMEMTRACE_TYPE_PAGES:
359 ret = trace_seq_printf(s, "P ");
360 break;
361 default:
362 ret = trace_seq_printf(s, "? ");
363 }
364
365 if (!ret)
366 return TRACE_TYPE_PARTIAL_LINE;
367
368 /* Requested */
369 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
370 if (!ret)
371 return TRACE_TYPE_PARTIAL_LINE;
372
373 /* Allocated */
374 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
375 if (!ret)
376 return TRACE_TYPE_PARTIAL_LINE;
377
378 /* Flags
379 * TODO: would be better to see the name of the GFP flag names
380 */
381 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
382 if (!ret)
383 return TRACE_TYPE_PARTIAL_LINE;
384
385 /* Pointer to allocated */
386 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
387 if (!ret)
388 return TRACE_TYPE_PARTIAL_LINE;
389
390 /* Node and call site*/
391 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
392 (void *)entry->call_site);
393 if (!ret)
394 return TRACE_TYPE_PARTIAL_LINE;
395
396 return TRACE_TYPE_HANDLED;
397}
398
399static enum print_line_t
400kmemtrace_print_free_compress(struct trace_iterator *iter)
401{
402 struct kmemtrace_free_entry *entry;
403 struct trace_seq *s = &iter->seq;
404 int ret;
405
406 trace_assign_type(entry, iter->ent);
407
408 /* Free entry */
409 ret = trace_seq_printf(s, " - ");
410 if (!ret)
411 return TRACE_TYPE_PARTIAL_LINE;
412
413 /* Type */
414 switch (entry->type_id) {
415 case KMEMTRACE_TYPE_KMALLOC:
416 ret = trace_seq_printf(s, "K ");
417 break;
418 case KMEMTRACE_TYPE_CACHE:
419 ret = trace_seq_printf(s, "C ");
420 break;
421 case KMEMTRACE_TYPE_PAGES:
422 ret = trace_seq_printf(s, "P ");
423 break;
424 default:
425 ret = trace_seq_printf(s, "? ");
426 }
427
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 /* Skip requested/allocated/flags */
432 ret = trace_seq_printf(s, " ");
433 if (!ret)
434 return TRACE_TYPE_PARTIAL_LINE;
435
436 /* Pointer to allocated */
437 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 /* Skip node and print call site*/
442 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
443 if (!ret)
444 return TRACE_TYPE_PARTIAL_LINE;
445
446 return TRACE_TYPE_HANDLED;
447}
448
449static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
450{
451 struct trace_entry *entry = iter->ent;
452
453 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
454 return TRACE_TYPE_UNHANDLED;
455
456 switch (entry->type) {
457 case TRACE_KMEM_ALLOC:
458 return kmemtrace_print_alloc_compress(iter);
459 case TRACE_KMEM_FREE:
460 return kmemtrace_print_free_compress(iter);
461 default:
462 return TRACE_TYPE_UNHANDLED;
463 }
464}
465
466static struct trace_event kmem_trace_alloc = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user,
470};
471
472static struct trace_event kmem_trace_free = {
473 .type = TRACE_KMEM_FREE,
474 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user,
476};
477
478static struct tracer kmem_tracer __read_mostly = {
479 .name = "kmemtrace",
480 .init = kmem_trace_init,
481 .reset = kmem_trace_reset,
482 .print_line = kmemtrace_print_line,
483 .print_header = kmemtrace_headers,
484 .flags = &kmem_tracer_flags
485};
486
487void kmemtrace_init(void)
488{
489 /* earliest opportunity to start kmem tracing */
490}
491
492static int __init init_kmem_tracer(void)
493{
494 if (!register_ftrace_event(&kmem_trace_alloc)) {
495 pr_warning("Warning: could not register kmem events\n");
496 return 1;
497 }
498
499 if (!register_ftrace_event(&kmem_trace_free)) {
500 pr_warning("Warning: could not register kmem events\n");
501 return 1;
502 }
503
504 if (register_tracer(&kmem_tracer) != 0) {
505 pr_warning("Warning: could not register the kmem tracer\n");
506 return 1;
507 }
508
509 return 0;
510}
511device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..f55fcf61b223 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,10 +9,12 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
16 15
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency); 16#ifdef EVENT_POWER_TRACING_DEPRECATED
17EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
18#endif
19EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
18 20
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..bd1c35a4fbcc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -214,6 +224,9 @@ enum {
214 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
215}; 225};
216 226
227#define skip_time_extend(event) \
228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
229
217static inline int rb_null_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
218{ 231{
219 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -238,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
238 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
239} 252}
240 253
241/* inline for ring buffer fast paths */ 254/*
242static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
243rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
244{ 261{
245 switch (event->type_len) { 262 switch (event->type_len) {
@@ -264,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
264 return 0; 281 return 0;
265} 282}
266 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
267/** 301/**
268 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
269 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
270 */ 310 */
271unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
272{ 312{
273 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
274 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
275 return length; 320 return length;
276 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -284,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
284static void * 329static void *
285rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
286{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
287 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
288 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
289 if (event->type_len) 336 if (event->type_len)
@@ -309,6 +356,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
309#define TS_MASK ((1ULL << TS_SHIFT) - 1) 356#define TS_MASK ((1ULL << TS_SHIFT) - 1)
310#define TS_DELTA_TEST (~TS_MASK) 357#define TS_DELTA_TEST (~TS_MASK)
311 358
359/* Flag when events were overwritten */
360#define RB_MISSED_EVENTS (1 << 31)
361/* Missed count stored at end */
362#define RB_MISSED_STORED (1 << 30)
363
312struct buffer_data_page { 364struct buffer_data_page {
313 u64 time_stamp; /* page time stamp */ 365 u64 time_stamp; /* page time stamp */
314 local_t commit; /* write committed index */ 366 local_t commit; /* write committed index */
@@ -328,6 +380,7 @@ struct buffer_page {
328 local_t write; /* index for next write */ 380 local_t write; /* index for next write */
329 unsigned read; /* index for next read */ 381 unsigned read; /* index for next read */
330 local_t entries; /* entries on this page */ 382 local_t entries; /* entries on this page */
383 unsigned long real_end; /* real end of data */
331 struct buffer_data_page *page; /* Actual data page */ 384 struct buffer_data_page *page; /* Actual data page */
332}; 385};
333 386
@@ -388,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
388/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 441/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
389#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 442#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
390 443
391/* Max number of timestamps that can fit on a page */
392#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
393
394int ring_buffer_print_page_header(struct trace_seq *s) 444int ring_buffer_print_page_header(struct trace_seq *s)
395{ 445{
396 struct buffer_data_page field; 446 struct buffer_data_page field;
@@ -407,6 +457,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
407 (unsigned int)sizeof(field.commit), 457 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long)); 458 (unsigned int)is_signed_type(long));
409 459
460 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
461 "offset:%u;\tsize:%u;\tsigned:%u;\n",
462 (unsigned int)offsetof(typeof(field), commit),
463 1,
464 (unsigned int)is_signed_type(long));
465
410 ret = trace_seq_printf(s, "\tfield: char data;\t" 466 ret = trace_seq_printf(s, "\tfield: char data;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n", 467 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), data), 468 (unsigned int)offsetof(typeof(field), data),
@@ -421,6 +477,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
421 */ 477 */
422struct ring_buffer_per_cpu { 478struct ring_buffer_per_cpu {
423 int cpu; 479 int cpu;
480 atomic_t record_disabled;
424 struct ring_buffer *buffer; 481 struct ring_buffer *buffer;
425 spinlock_t reader_lock; /* serialize readers */ 482 spinlock_t reader_lock; /* serialize readers */
426 arch_spinlock_t lock; 483 arch_spinlock_t lock;
@@ -430,6 +487,8 @@ struct ring_buffer_per_cpu {
430 struct buffer_page *tail_page; /* write to tail */ 487 struct buffer_page *tail_page; /* write to tail */
431 struct buffer_page *commit_page; /* committed pages */ 488 struct buffer_page *commit_page; /* committed pages */
432 struct buffer_page *reader_page; 489 struct buffer_page *reader_page;
490 unsigned long lost_events;
491 unsigned long last_overrun;
433 local_t commit_overrun; 492 local_t commit_overrun;
434 local_t overrun; 493 local_t overrun;
435 local_t entries; 494 local_t entries;
@@ -438,7 +497,6 @@ struct ring_buffer_per_cpu {
438 unsigned long read; 497 unsigned long read;
439 u64 write_stamp; 498 u64 write_stamp;
440 u64 read_stamp; 499 u64 read_stamp;
441 atomic_t record_disabled;
442}; 500};
443 501
444struct ring_buffer { 502struct ring_buffer {
@@ -464,6 +522,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 522 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 523 unsigned long head;
466 struct buffer_page *head_page; 524 struct buffer_page *head_page;
525 struct buffer_page *cache_reader_page;
526 unsigned long cache_read;
467 u64 read_stamp; 527 u64 read_stamp;
468}; 528};
469 529
@@ -1198,18 +1258,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1198 1258
1199 for (i = 0; i < nr_pages; i++) { 1259 for (i = 0; i < nr_pages; i++) {
1200 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1260 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1201 return; 1261 goto out;
1202 p = cpu_buffer->pages->next; 1262 p = cpu_buffer->pages->next;
1203 bpage = list_entry(p, struct buffer_page, list); 1263 bpage = list_entry(p, struct buffer_page, list);
1204 list_del_init(&bpage->list); 1264 list_del_init(&bpage->list);
1205 free_buffer_page(bpage); 1265 free_buffer_page(bpage);
1206 } 1266 }
1207 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1267 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1208 return; 1268 goto out;
1209 1269
1210 rb_reset_cpu(cpu_buffer); 1270 rb_reset_cpu(cpu_buffer);
1211 rb_check_pages(cpu_buffer); 1271 rb_check_pages(cpu_buffer);
1212 1272
1273out:
1213 spin_unlock_irq(&cpu_buffer->reader_lock); 1274 spin_unlock_irq(&cpu_buffer->reader_lock);
1214} 1275}
1215 1276
@@ -1226,7 +1287,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1226 1287
1227 for (i = 0; i < nr_pages; i++) { 1288 for (i = 0; i < nr_pages; i++) {
1228 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1289 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1229 return; 1290 goto out;
1230 p = pages->next; 1291 p = pages->next;
1231 bpage = list_entry(p, struct buffer_page, list); 1292 bpage = list_entry(p, struct buffer_page, list);
1232 list_del_init(&bpage->list); 1293 list_del_init(&bpage->list);
@@ -1235,6 +1296,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1235 rb_reset_cpu(cpu_buffer); 1296 rb_reset_cpu(cpu_buffer);
1236 rb_check_pages(cpu_buffer); 1297 rb_check_pages(cpu_buffer);
1237 1298
1299out:
1238 spin_unlock_irq(&cpu_buffer->reader_lock); 1300 spin_unlock_irq(&cpu_buffer->reader_lock);
1239} 1301}
1240 1302
@@ -1518,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1518 iter->head = 0; 1580 iter->head = 0;
1519} 1581}
1520 1582
1583/* Slow path, do not inline */
1584static noinline struct ring_buffer_event *
1585rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1586{
1587 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1588
1589 /* Not the first event on the page? */
1590 if (rb_event_index(event)) {
1591 event->time_delta = delta & TS_MASK;
1592 event->array[0] = delta >> TS_SHIFT;
1593 } else {
1594 /* nope, just zero it */
1595 event->time_delta = 0;
1596 event->array[0] = 0;
1597 }
1598
1599 return skip_time_extend(event);
1600}
1601
1521/** 1602/**
1522 * ring_buffer_update_event - update event type and data 1603 * ring_buffer_update_event - update event type and data
1523 * @event: the even to update 1604 * @event: the even to update
@@ -1530,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1530 * data field. 1611 * data field.
1531 */ 1612 */
1532static void 1613static void
1533rb_update_event(struct ring_buffer_event *event, 1614rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1534 unsigned type, unsigned length) 1615 struct ring_buffer_event *event, unsigned length,
1616 int add_timestamp, u64 delta)
1535{ 1617{
1536 event->type_len = type; 1618 /* Only a commit updates the timestamp */
1537 1619 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1538 switch (type) { 1620 delta = 0;
1539
1540 case RINGBUF_TYPE_PADDING:
1541 case RINGBUF_TYPE_TIME_EXTEND:
1542 case RINGBUF_TYPE_TIME_STAMP:
1543 break;
1544 1621
1545 case 0: 1622 /*
1546 length -= RB_EVNT_HDR_SIZE; 1623 * If we need to add a timestamp, then we
1547 if (length > RB_MAX_SMALL_DATA) 1624 * add it to the start of the resevered space.
1548 event->array[0] = length; 1625 */
1549 else 1626 if (unlikely(add_timestamp)) {
1550 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1627 event = rb_add_time_stamp(event, delta);
1551 break; 1628 length -= RB_LEN_TIME_EXTEND;
1552 default: 1629 delta = 0;
1553 BUG();
1554 } 1630 }
1631
1632 event->time_delta = delta;
1633 length -= RB_EVNT_HDR_SIZE;
1634 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1635 event->type_len = 0;
1636 event->array[0] = length;
1637 } else
1638 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1555} 1639}
1556 1640
1557/* 1641/*
@@ -1719,11 +1803,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1719 if (!length) 1803 if (!length)
1720 length = 1; 1804 length = 1;
1721 1805
1722 if (length > RB_MAX_SMALL_DATA) 1806 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1723 length += sizeof(event.array[0]); 1807 length += sizeof(event.array[0]);
1724 1808
1725 length += RB_EVNT_HDR_SIZE; 1809 length += RB_EVNT_HDR_SIZE;
1726 length = ALIGN(length, RB_ALIGNMENT); 1810 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1727 1811
1728 return length; 1812 return length;
1729} 1813}
@@ -1740,6 +1824,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1740 * must fill the old tail_page with padding. 1824 * must fill the old tail_page with padding.
1741 */ 1825 */
1742 if (tail >= BUF_PAGE_SIZE) { 1826 if (tail >= BUF_PAGE_SIZE) {
1827 /*
1828 * If the page was filled, then we still need
1829 * to update the real_end. Reset it to zero
1830 * and the reader will ignore it.
1831 */
1832 if (tail == BUF_PAGE_SIZE)
1833 tail_page->real_end = 0;
1834
1743 local_sub(length, &tail_page->write); 1835 local_sub(length, &tail_page->write);
1744 return; 1836 return;
1745 } 1837 }
@@ -1748,6 +1840,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1748 kmemcheck_annotate_bitfield(event, bitfield); 1840 kmemcheck_annotate_bitfield(event, bitfield);
1749 1841
1750 /* 1842 /*
1843 * Save the original length to the meta data.
1844 * This will be used by the reader to add lost event
1845 * counter.
1846 */
1847 tail_page->real_end = tail;
1848
1849 /*
1751 * If this event is bigger than the minimum size, then 1850 * If this event is bigger than the minimum size, then
1752 * we need to be careful that we don't subtract the 1851 * we need to be careful that we don't subtract the
1753 * write counter enough to allow another writer to slip 1852 * write counter enough to allow another writer to slip
@@ -1780,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1780 local_sub(length, &tail_page->write); 1879 local_sub(length, &tail_page->write);
1781} 1880}
1782 1881
1783static struct ring_buffer_event * 1882/*
1883 * This is the slow path, force gcc not to inline it.
1884 */
1885static noinline struct ring_buffer_event *
1784rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1886rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1785 unsigned long length, unsigned long tail, 1887 unsigned long length, unsigned long tail,
1786 struct buffer_page *tail_page, u64 *ts) 1888 struct buffer_page *tail_page, u64 ts)
1787{ 1889{
1788 struct buffer_page *commit_page = cpu_buffer->commit_page; 1890 struct buffer_page *commit_page = cpu_buffer->commit_page;
1789 struct ring_buffer *buffer = cpu_buffer->buffer; 1891 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1866,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1866 * Nested commits always have zero deltas, so 1968 * Nested commits always have zero deltas, so
1867 * just reread the time stamp 1969 * just reread the time stamp
1868 */ 1970 */
1869 *ts = rb_time_stamp(buffer); 1971 ts = rb_time_stamp(buffer);
1870 next_page->page->time_stamp = *ts; 1972 next_page->page->time_stamp = ts;
1871 } 1973 }
1872 1974
1873 out_again: 1975 out_again:
@@ -1886,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1886 1988
1887static struct ring_buffer_event * 1989static struct ring_buffer_event *
1888__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1990__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1889 unsigned type, unsigned long length, u64 *ts) 1991 unsigned long length, u64 ts,
1992 u64 delta, int add_timestamp)
1890{ 1993{
1891 struct buffer_page *tail_page; 1994 struct buffer_page *tail_page;
1892 struct ring_buffer_event *event; 1995 struct ring_buffer_event *event;
1893 unsigned long tail, write; 1996 unsigned long tail, write;
1894 1997
1998 /*
1999 * If the time delta since the last event is too big to
2000 * hold in the time field of the event, then we append a
2001 * TIME EXTEND event ahead of the data event.
2002 */
2003 if (unlikely(add_timestamp))
2004 length += RB_LEN_TIME_EXTEND;
2005
1895 tail_page = cpu_buffer->tail_page; 2006 tail_page = cpu_buffer->tail_page;
1896 write = local_add_return(length, &tail_page->write); 2007 write = local_add_return(length, &tail_page->write);
1897 2008
@@ -1900,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1900 tail = write - length; 2011 tail = write - length;
1901 2012
1902 /* See if we shot pass the end of this buffer page */ 2013 /* See if we shot pass the end of this buffer page */
1903 if (write > BUF_PAGE_SIZE) 2014 if (unlikely(write > BUF_PAGE_SIZE))
1904 return rb_move_tail(cpu_buffer, length, tail, 2015 return rb_move_tail(cpu_buffer, length, tail,
1905 tail_page, ts); 2016 tail_page, ts);
1906 2017
@@ -1908,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1908 2019
1909 event = __rb_page_index(tail_page, tail); 2020 event = __rb_page_index(tail_page, tail);
1910 kmemcheck_annotate_bitfield(event, bitfield); 2021 kmemcheck_annotate_bitfield(event, bitfield);
1911 rb_update_event(event, type, length); 2022 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1912 2023
1913 /* The passed in type is zero for DATA */ 2024 local_inc(&tail_page->entries);
1914 if (likely(!type))
1915 local_inc(&tail_page->entries);
1916 2025
1917 /* 2026 /*
1918 * If this is the first commit on the page, then update 2027 * If this is the first commit on the page, then update
1919 * its timestamp. 2028 * its timestamp.
1920 */ 2029 */
1921 if (!tail) 2030 if (!tail)
1922 tail_page->page->time_stamp = *ts; 2031 tail_page->page->time_stamp = ts;
1923 2032
1924 return event; 2033 return event;
1925} 2034}
@@ -1934,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1934 unsigned long addr; 2043 unsigned long addr;
1935 2044
1936 new_index = rb_event_index(event); 2045 new_index = rb_event_index(event);
1937 old_index = new_index + rb_event_length(event); 2046 old_index = new_index + rb_event_ts_length(event);
1938 addr = (unsigned long)event; 2047 addr = (unsigned long)event;
1939 addr &= PAGE_MASK; 2048 addr &= PAGE_MASK;
1940 2049
@@ -1960,80 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1960 return 0; 2069 return 0;
1961} 2070}
1962 2071
1963static int
1964rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1965 u64 *ts, u64 *delta)
1966{
1967 struct ring_buffer_event *event;
1968 static int once;
1969 int ret;
1970
1971 if (unlikely(*delta > (1ULL << 59) && !once++)) {
1972 printk(KERN_WARNING "Delta way too big! %llu"
1973 " ts=%llu write stamp = %llu\n",
1974 (unsigned long long)*delta,
1975 (unsigned long long)*ts,
1976 (unsigned long long)cpu_buffer->write_stamp);
1977 WARN_ON(1);
1978 }
1979
1980 /*
1981 * The delta is too big, we to add a
1982 * new timestamp.
1983 */
1984 event = __rb_reserve_next(cpu_buffer,
1985 RINGBUF_TYPE_TIME_EXTEND,
1986 RB_LEN_TIME_EXTEND,
1987 ts);
1988 if (!event)
1989 return -EBUSY;
1990
1991 if (PTR_ERR(event) == -EAGAIN)
1992 return -EAGAIN;
1993
1994 /* Only a commited time event can update the write stamp */
1995 if (rb_event_is_commit(cpu_buffer, event)) {
1996 /*
1997 * If this is the first on the page, then it was
1998 * updated with the page itself. Try to discard it
1999 * and if we can't just make it zero.
2000 */
2001 if (rb_event_index(event)) {
2002 event->time_delta = *delta & TS_MASK;
2003 event->array[0] = *delta >> TS_SHIFT;
2004 } else {
2005 /* try to discard, since we do not need this */
2006 if (!rb_try_to_discard(cpu_buffer, event)) {
2007 /* nope, just zero it */
2008 event->time_delta = 0;
2009 event->array[0] = 0;
2010 }
2011 }
2012 cpu_buffer->write_stamp = *ts;
2013 /* let the caller know this was the commit */
2014 ret = 1;
2015 } else {
2016 /* Try to discard the event */
2017 if (!rb_try_to_discard(cpu_buffer, event)) {
2018 /* Darn, this is just wasted space */
2019 event->time_delta = 0;
2020 event->array[0] = 0;
2021 }
2022 ret = 0;
2023 }
2024
2025 *delta = 0;
2026
2027 return ret;
2028}
2029
2030static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2072static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2031{ 2073{
2032 local_inc(&cpu_buffer->committing); 2074 local_inc(&cpu_buffer->committing);
2033 local_inc(&cpu_buffer->commits); 2075 local_inc(&cpu_buffer->commits);
2034} 2076}
2035 2077
2036static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2078static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2037{ 2079{
2038 unsigned long commits; 2080 unsigned long commits;
2039 2081
@@ -2071,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2071 unsigned long length) 2113 unsigned long length)
2072{ 2114{
2073 struct ring_buffer_event *event; 2115 struct ring_buffer_event *event;
2074 u64 ts, delta = 0; 2116 u64 ts, delta;
2075 int commit = 0;
2076 int nr_loops = 0; 2117 int nr_loops = 0;
2118 int add_timestamp;
2119 u64 diff;
2077 2120
2078 rb_start_commit(cpu_buffer); 2121 rb_start_commit(cpu_buffer);
2079 2122
@@ -2094,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2094 2137
2095 length = rb_calculate_event_length(length); 2138 length = rb_calculate_event_length(length);
2096 again: 2139 again:
2140 add_timestamp = 0;
2141 delta = 0;
2142
2097 /* 2143 /*
2098 * We allow for interrupts to reenter here and do a trace. 2144 * We allow for interrupts to reenter here and do a trace.
2099 * If one does, it will cause this original code to loop 2145 * If one does, it will cause this original code to loop
@@ -2107,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2107 goto out_fail; 2153 goto out_fail;
2108 2154
2109 ts = rb_time_stamp(cpu_buffer->buffer); 2155 ts = rb_time_stamp(cpu_buffer->buffer);
2156 diff = ts - cpu_buffer->write_stamp;
2110 2157
2111 /* 2158 /* make sure this diff is calculated here */
2112 * Only the first commit can update the timestamp. 2159 barrier();
2113 * Yes there is a race here. If an interrupt comes in
2114 * just after the conditional and it traces too, then it
2115 * will also check the deltas. More than one timestamp may
2116 * also be made. But only the entry that did the actual
2117 * commit will be something other than zero.
2118 */
2119 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2120 rb_page_write(cpu_buffer->tail_page) ==
2121 rb_commit_index(cpu_buffer))) {
2122 u64 diff;
2123
2124 diff = ts - cpu_buffer->write_stamp;
2125
2126 /* make sure this diff is calculated here */
2127 barrier();
2128
2129 /* Did the write stamp get updated already? */
2130 if (unlikely(ts < cpu_buffer->write_stamp))
2131 goto get_event;
2132 2160
2161 /* Did the write stamp get updated already? */
2162 if (likely(ts >= cpu_buffer->write_stamp)) {
2133 delta = diff; 2163 delta = diff;
2134 if (unlikely(test_time_stamp(delta))) { 2164 if (unlikely(test_time_stamp(delta))) {
2135 2165 WARN_ONCE(delta > (1ULL << 59),
2136 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2137 if (commit == -EBUSY) 2167 (unsigned long long)delta,
2138 goto out_fail; 2168 (unsigned long long)ts,
2139 2169 (unsigned long long)cpu_buffer->write_stamp);
2140 if (commit == -EAGAIN) 2170 add_timestamp = 1;
2141 goto again;
2142
2143 RB_WARN_ON(cpu_buffer, commit < 0);
2144 } 2171 }
2145 } 2172 }
2146 2173
2147 get_event: 2174 event = __rb_reserve_next(cpu_buffer, length, ts,
2148 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2175 delta, add_timestamp);
2149 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2176 if (unlikely(PTR_ERR(event) == -EAGAIN))
2150 goto again; 2177 goto again;
2151 2178
2152 if (!event) 2179 if (!event)
2153 goto out_fail; 2180 goto out_fail;
2154 2181
2155 if (!rb_event_is_commit(cpu_buffer, event))
2156 delta = 0;
2157
2158 event->time_delta = delta;
2159
2160 return event; 2182 return event;
2161 2183
2162 out_fail: 2184 out_fail:
@@ -2168,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2168 2190
2169#define TRACE_RECURSIVE_DEPTH 16 2191#define TRACE_RECURSIVE_DEPTH 16
2170 2192
2171static int trace_recursive_lock(void) 2193/* Keep this code out of the fast path cache */
2194static noinline void trace_recursive_fail(void)
2172{ 2195{
2173 current->trace_recursion++;
2174
2175 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2176 return 0;
2177
2178 /* Disable all tracing before we do anything else */ 2196 /* Disable all tracing before we do anything else */
2179 tracing_off_permanent(); 2197 tracing_off_permanent();
2180 2198
@@ -2186,10 +2204,21 @@ static int trace_recursive_lock(void)
2186 in_nmi()); 2204 in_nmi());
2187 2205
2188 WARN_ON_ONCE(1); 2206 WARN_ON_ONCE(1);
2207}
2208
2209static inline int trace_recursive_lock(void)
2210{
2211 current->trace_recursion++;
2212
2213 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2214 return 0;
2215
2216 trace_recursive_fail();
2217
2189 return -1; 2218 return -1;
2190} 2219}
2191 2220
2192static void trace_recursive_unlock(void) 2221static inline void trace_recursive_unlock(void)
2193{ 2222{
2194 WARN_ON_ONCE(!current->trace_recursion); 2223 WARN_ON_ONCE(!current->trace_recursion);
2195 2224
@@ -2203,8 +2232,6 @@ static void trace_recursive_unlock(void)
2203 2232
2204#endif 2233#endif
2205 2234
2206static DEFINE_PER_CPU(int, rb_need_resched);
2207
2208/** 2235/**
2209 * ring_buffer_lock_reserve - reserve a part of the buffer 2236 * ring_buffer_lock_reserve - reserve a part of the buffer
2210 * @buffer: the ring buffer to reserve from 2237 * @buffer: the ring buffer to reserve from
@@ -2225,16 +2252,16 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2225{ 2252{
2226 struct ring_buffer_per_cpu *cpu_buffer; 2253 struct ring_buffer_per_cpu *cpu_buffer;
2227 struct ring_buffer_event *event; 2254 struct ring_buffer_event *event;
2228 int cpu, resched; 2255 int cpu;
2229 2256
2230 if (ring_buffer_flags != RB_BUFFERS_ON) 2257 if (ring_buffer_flags != RB_BUFFERS_ON)
2231 return NULL; 2258 return NULL;
2232 2259
2233 if (atomic_read(&buffer->record_disabled))
2234 return NULL;
2235
2236 /* If we are tracing schedule, we don't want to recurse */ 2260 /* If we are tracing schedule, we don't want to recurse */
2237 resched = ftrace_preempt_disable(); 2261 preempt_disable_notrace();
2262
2263 if (atomic_read(&buffer->record_disabled))
2264 goto out_nocheck;
2238 2265
2239 if (trace_recursive_lock()) 2266 if (trace_recursive_lock())
2240 goto out_nocheck; 2267 goto out_nocheck;
@@ -2256,21 +2283,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2256 if (!event) 2283 if (!event)
2257 goto out; 2284 goto out;
2258 2285
2259 /*
2260 * Need to store resched state on this cpu.
2261 * Only the first needs to.
2262 */
2263
2264 if (preempt_count() == 1)
2265 per_cpu(rb_need_resched, cpu) = resched;
2266
2267 return event; 2286 return event;
2268 2287
2269 out: 2288 out:
2270 trace_recursive_unlock(); 2289 trace_recursive_unlock();
2271 2290
2272 out_nocheck: 2291 out_nocheck:
2273 ftrace_preempt_enable(resched); 2292 preempt_enable_notrace();
2274 return NULL; 2293 return NULL;
2275} 2294}
2276EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2295EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2279,12 +2298,28 @@ static void
2279rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2298rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2280 struct ring_buffer_event *event) 2299 struct ring_buffer_event *event)
2281{ 2300{
2301 u64 delta;
2302
2282 /* 2303 /*
2283 * The event first in the commit queue updates the 2304 * The event first in the commit queue updates the
2284 * time stamp. 2305 * time stamp.
2285 */ 2306 */
2286 if (rb_event_is_commit(cpu_buffer, event)) 2307 if (rb_event_is_commit(cpu_buffer, event)) {
2287 cpu_buffer->write_stamp += event->time_delta; 2308 /*
2309 * A commit event that is first on a page
2310 * updates the write timestamp with the page stamp
2311 */
2312 if (!rb_event_index(event))
2313 cpu_buffer->write_stamp =
2314 cpu_buffer->commit_page->page->time_stamp;
2315 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2316 delta = event->array[0];
2317 delta <<= TS_SHIFT;
2318 delta += event->time_delta;
2319 cpu_buffer->write_stamp += delta;
2320 } else
2321 cpu_buffer->write_stamp += event->time_delta;
2322 }
2288} 2323}
2289 2324
2290static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2325static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2316,13 +2351,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2316 2351
2317 trace_recursive_unlock(); 2352 trace_recursive_unlock();
2318 2353
2319 /* 2354 preempt_enable_notrace();
2320 * Only the last preempt count needs to restore preemption.
2321 */
2322 if (preempt_count() == 1)
2323 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2324 else
2325 preempt_enable_no_resched_notrace();
2326 2355
2327 return 0; 2356 return 0;
2328} 2357}
@@ -2330,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2330 2359
2331static inline void rb_event_discard(struct ring_buffer_event *event) 2360static inline void rb_event_discard(struct ring_buffer_event *event)
2332{ 2361{
2362 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2363 event = skip_time_extend(event);
2364
2333 /* array[0] holds the actual length for the discarded event */ 2365 /* array[0] holds the actual length for the discarded event */
2334 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2366 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2335 event->type_len = RINGBUF_TYPE_PADDING; 2367 event->type_len = RINGBUF_TYPE_PADDING;
@@ -2430,13 +2462,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
2430 2462
2431 trace_recursive_unlock(); 2463 trace_recursive_unlock();
2432 2464
2433 /* 2465 preempt_enable_notrace();
2434 * Only the last preempt count needs to restore preemption.
2435 */
2436 if (preempt_count() == 1)
2437 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2438 else
2439 preempt_enable_no_resched_notrace();
2440 2466
2441} 2467}
2442EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 2468EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2462,15 +2488,15 @@ int ring_buffer_write(struct ring_buffer *buffer,
2462 struct ring_buffer_event *event; 2488 struct ring_buffer_event *event;
2463 void *body; 2489 void *body;
2464 int ret = -EBUSY; 2490 int ret = -EBUSY;
2465 int cpu, resched; 2491 int cpu;
2466 2492
2467 if (ring_buffer_flags != RB_BUFFERS_ON) 2493 if (ring_buffer_flags != RB_BUFFERS_ON)
2468 return -EBUSY; 2494 return -EBUSY;
2469 2495
2470 if (atomic_read(&buffer->record_disabled)) 2496 preempt_disable_notrace();
2471 return -EBUSY;
2472 2497
2473 resched = ftrace_preempt_disable(); 2498 if (atomic_read(&buffer->record_disabled))
2499 goto out;
2474 2500
2475 cpu = raw_smp_processor_id(); 2501 cpu = raw_smp_processor_id();
2476 2502
@@ -2497,7 +2523,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
2497 2523
2498 ret = 0; 2524 ret = 0;
2499 out: 2525 out:
2500 ftrace_preempt_enable(resched); 2526 preempt_enable_notrace();
2501 2527
2502 return ret; 2528 return ret;
2503} 2529}
@@ -2539,7 +2565,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2539 * @buffer: The ring buffer to enable writes 2565 * @buffer: The ring buffer to enable writes
2540 * 2566 *
2541 * Note, multiple disables will need the same number of enables 2567 * Note, multiple disables will need the same number of enables
2542 * to truely enable the writing (much like preempt_disable). 2568 * to truly enable the writing (much like preempt_disable).
2543 */ 2569 */
2544void ring_buffer_record_enable(struct ring_buffer *buffer) 2570void ring_buffer_record_enable(struct ring_buffer *buffer)
2545{ 2571{
@@ -2575,7 +2601,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2575 * @cpu: The CPU to enable. 2601 * @cpu: The CPU to enable.
2576 * 2602 *
2577 * Note, multiple disables will need the same number of enables 2603 * Note, multiple disables will need the same number of enables
2578 * to truely enable the writing (much like preempt_disable). 2604 * to truly enable the writing (much like preempt_disable).
2579 */ 2605 */
2580void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2606void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2581{ 2607{
@@ -2589,6 +2615,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2589} 2615}
2590EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 2616EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2591 2617
2618/*
2619 * The total entries in the ring buffer is the running counter
2620 * of entries entered into the ring buffer, minus the sum of
2621 * the entries read from the ring buffer and the number of
2622 * entries that were overwritten.
2623 */
2624static inline unsigned long
2625rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
2626{
2627 return local_read(&cpu_buffer->entries) -
2628 (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
2629}
2630
2592/** 2631/**
2593 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer 2632 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
2594 * @buffer: The ring buffer 2633 * @buffer: The ring buffer
@@ -2597,16 +2636,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
2597unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) 2636unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
2598{ 2637{
2599 struct ring_buffer_per_cpu *cpu_buffer; 2638 struct ring_buffer_per_cpu *cpu_buffer;
2600 unsigned long ret;
2601 2639
2602 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2640 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2603 return 0; 2641 return 0;
2604 2642
2605 cpu_buffer = buffer->buffers[cpu]; 2643 cpu_buffer = buffer->buffers[cpu];
2606 ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
2607 - cpu_buffer->read;
2608 2644
2609 return ret; 2645 return rb_num_of_entries(cpu_buffer);
2610} 2646}
2611EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 2647EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
2612 2648
@@ -2667,8 +2703,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
2667 /* if you care about this being correct, lock the buffer */ 2703 /* if you care about this being correct, lock the buffer */
2668 for_each_buffer_cpu(buffer, cpu) { 2704 for_each_buffer_cpu(buffer, cpu) {
2669 cpu_buffer = buffer->buffers[cpu]; 2705 cpu_buffer = buffer->buffers[cpu];
2670 entries += (local_read(&cpu_buffer->entries) - 2706 entries += rb_num_of_entries(cpu_buffer);
2671 local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
2672 } 2707 }
2673 2708
2674 return entries; 2709 return entries;
@@ -2716,6 +2751,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2751 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2752 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2753 iter->read_stamp = iter->head_page->page->time_stamp;
2754 iter->cache_reader_page = cpu_buffer->reader_page;
2755 iter->cache_read = cpu_buffer->read;
2719} 2756}
2720 2757
2721/** 2758/**
@@ -2822,6 +2859,7 @@ static struct buffer_page *
2822rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2859rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2823{ 2860{
2824 struct buffer_page *reader = NULL; 2861 struct buffer_page *reader = NULL;
2862 unsigned long overwrite;
2825 unsigned long flags; 2863 unsigned long flags;
2826 int nr_loops = 0; 2864 int nr_loops = 0;
2827 int ret; 2865 int ret;
@@ -2863,6 +2901,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2863 local_set(&cpu_buffer->reader_page->write, 0); 2901 local_set(&cpu_buffer->reader_page->write, 0);
2864 local_set(&cpu_buffer->reader_page->entries, 0); 2902 local_set(&cpu_buffer->reader_page->entries, 0);
2865 local_set(&cpu_buffer->reader_page->page->commit, 0); 2903 local_set(&cpu_buffer->reader_page->page->commit, 0);
2904 cpu_buffer->reader_page->real_end = 0;
2866 2905
2867 spin: 2906 spin:
2868 /* 2907 /*
@@ -2883,6 +2922,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2883 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2922 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2884 2923
2885 /* 2924 /*
2925 * We want to make sure we read the overruns after we set up our
2926 * pointers to the next object. The writer side does a
2927 * cmpxchg to cross pages which acts as the mb on the writer
2928 * side. Note, the reader will constantly fail the swap
2929 * while the writer is updating the pointers, so this
2930 * guarantees that the overwrite recorded here is the one we
2931 * want to compare with the last_overrun.
2932 */
2933 smp_mb();
2934 overwrite = local_read(&(cpu_buffer->overrun));
2935
2936 /*
2886 * Here's the tricky part. 2937 * Here's the tricky part.
2887 * 2938 *
2888 * We need to move the pointer past the header page. 2939 * We need to move the pointer past the header page.
@@ -2913,6 +2964,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 cpu_buffer->reader_page = reader; 2964 cpu_buffer->reader_page = reader;
2914 rb_reset_reader_page(cpu_buffer); 2965 rb_reset_reader_page(cpu_buffer);
2915 2966
2967 if (overwrite != cpu_buffer->last_overrun) {
2968 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2969 cpu_buffer->last_overrun = overwrite;
2970 }
2971
2916 goto again; 2972 goto again;
2917 2973
2918 out: 2974 out:
@@ -2947,13 +3003,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2947 3003
2948static void rb_advance_iter(struct ring_buffer_iter *iter) 3004static void rb_advance_iter(struct ring_buffer_iter *iter)
2949{ 3005{
2950 struct ring_buffer *buffer;
2951 struct ring_buffer_per_cpu *cpu_buffer; 3006 struct ring_buffer_per_cpu *cpu_buffer;
2952 struct ring_buffer_event *event; 3007 struct ring_buffer_event *event;
2953 unsigned length; 3008 unsigned length;
2954 3009
2955 cpu_buffer = iter->cpu_buffer; 3010 cpu_buffer = iter->cpu_buffer;
2956 buffer = cpu_buffer->buffer;
2957 3011
2958 /* 3012 /*
2959 * Check if we are at the end of the buffer. 3013 * Check if we are at the end of the buffer.
@@ -2989,8 +3043,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2989 rb_advance_iter(iter); 3043 rb_advance_iter(iter);
2990} 3044}
2991 3045
3046static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3047{
3048 return cpu_buffer->lost_events;
3049}
3050
2992static struct ring_buffer_event * 3051static struct ring_buffer_event *
2993rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3052rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3053 unsigned long *lost_events)
2994{ 3054{
2995 struct ring_buffer_event *event; 3055 struct ring_buffer_event *event;
2996 struct buffer_page *reader; 3056 struct buffer_page *reader;
@@ -2998,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
2998 3058
2999 again: 3059 again:
3000 /* 3060 /*
3001 * We repeat when a timestamp is encountered. It is possible 3061 * We repeat when a time extend is encountered.
3002 * to get multiple timestamps from an interrupt entering just 3062 * Since the time extend is always attached to a data event,
3003 * as one timestamp is about to be written, or from discarded 3063 * we should never loop more than once.
3004 * commits. The most that we can have is the number on a single page. 3064 * (We never hit the following condition more than twice).
3005 */ 3065 */
3006 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3066 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3007 return NULL; 3067 return NULL;
3008 3068
3009 reader = rb_get_reader_page(cpu_buffer); 3069 reader = rb_get_reader_page(cpu_buffer);
@@ -3042,6 +3102,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3042 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3102 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3043 cpu_buffer->cpu, ts); 3103 cpu_buffer->cpu, ts);
3044 } 3104 }
3105 if (lost_events)
3106 *lost_events = rb_lost_events(cpu_buffer);
3045 return event; 3107 return event;
3046 3108
3047 default: 3109 default:
@@ -3060,27 +3122,39 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3122 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3123 int nr_loops = 0;
3062 3124
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3125 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3126 buffer = cpu_buffer->buffer;
3068 3127
3128 /*
3129 * Check if someone performed a consuming read to
3130 * the buffer. A consuming read invalidates the iterator
3131 * and we need to reset the iterator in this case.
3132 */
3133 if (unlikely(iter->cache_read != cpu_buffer->read ||
3134 iter->cache_reader_page != cpu_buffer->reader_page))
3135 rb_iter_reset(iter);
3136
3069 again: 3137 again:
3138 if (ring_buffer_iter_empty(iter))
3139 return NULL;
3140
3070 /* 3141 /*
3071 * We repeat when a timestamp is encountered. 3142 * We repeat when a time extend is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3143 * Since the time extend is always attached to a data event,
3073 * if filtering is on (discarding commits). Since discarding 3144 * we should never loop more than once.
3074 * commits can be frequent we can get a lot of timestamps. 3145 * (We never hit the following condition more than twice).
3075 * But we limit them by not adding timestamps if they begin
3076 * at the start of a page.
3077 */ 3146 */
3078 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3147 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3079 return NULL; 3148 return NULL;
3080 3149
3081 if (rb_per_cpu_empty(cpu_buffer)) 3150 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3151 return NULL;
3083 3152
3153 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3154 rb_inc_iter(iter);
3155 goto again;
3156 }
3157
3084 event = rb_iter_head_event(iter); 3158 event = rb_iter_head_event(iter);
3085 3159
3086 switch (event->type_len) { 3160 switch (event->type_len) {
@@ -3138,12 +3212,14 @@ static inline int rb_ok_to_lock(void)
3138 * @buffer: The ring buffer to read 3212 * @buffer: The ring buffer to read
3139 * @cpu: The cpu to peak at 3213 * @cpu: The cpu to peak at
3140 * @ts: The timestamp counter of this event. 3214 * @ts: The timestamp counter of this event.
3215 * @lost_events: a variable to store if events were lost (may be NULL)
3141 * 3216 *
3142 * This will return the event that will be read next, but does 3217 * This will return the event that will be read next, but does
3143 * not consume the data. 3218 * not consume the data.
3144 */ 3219 */
3145struct ring_buffer_event * 3220struct ring_buffer_event *
3146ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3221ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3222 unsigned long *lost_events)
3147{ 3223{
3148 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3224 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3149 struct ring_buffer_event *event; 3225 struct ring_buffer_event *event;
@@ -3158,7 +3234,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3158 local_irq_save(flags); 3234 local_irq_save(flags);
3159 if (dolock) 3235 if (dolock)
3160 spin_lock(&cpu_buffer->reader_lock); 3236 spin_lock(&cpu_buffer->reader_lock);
3161 event = rb_buffer_peek(cpu_buffer, ts); 3237 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3162 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3238 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3163 rb_advance_reader(cpu_buffer); 3239 rb_advance_reader(cpu_buffer);
3164 if (dolock) 3240 if (dolock)
@@ -3200,13 +3276,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3200/** 3276/**
3201 * ring_buffer_consume - return an event and consume it 3277 * ring_buffer_consume - return an event and consume it
3202 * @buffer: The ring buffer to get the next event from 3278 * @buffer: The ring buffer to get the next event from
3279 * @cpu: the cpu to read the buffer from
3280 * @ts: a variable to store the timestamp (may be NULL)
3281 * @lost_events: a variable to store if events were lost (may be NULL)
3203 * 3282 *
3204 * Returns the next event in the ring buffer, and that event is consumed. 3283 * Returns the next event in the ring buffer, and that event is consumed.
3205 * Meaning, that sequential reads will keep returning a different event, 3284 * Meaning, that sequential reads will keep returning a different event,
3206 * and eventually empty the ring buffer if the producer is slower. 3285 * and eventually empty the ring buffer if the producer is slower.
3207 */ 3286 */
3208struct ring_buffer_event * 3287struct ring_buffer_event *
3209ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3288ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3289 unsigned long *lost_events)
3210{ 3290{
3211 struct ring_buffer_per_cpu *cpu_buffer; 3291 struct ring_buffer_per_cpu *cpu_buffer;
3212 struct ring_buffer_event *event = NULL; 3292 struct ring_buffer_event *event = NULL;
@@ -3227,9 +3307,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3227 if (dolock) 3307 if (dolock)
3228 spin_lock(&cpu_buffer->reader_lock); 3308 spin_lock(&cpu_buffer->reader_lock);
3229 3309
3230 event = rb_buffer_peek(cpu_buffer, ts); 3310 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3231 if (event) 3311 if (event) {
3312 cpu_buffer->lost_events = 0;
3232 rb_advance_reader(cpu_buffer); 3313 rb_advance_reader(cpu_buffer);
3314 }
3233 3315
3234 if (dolock) 3316 if (dolock)
3235 spin_unlock(&cpu_buffer->reader_lock); 3317 spin_unlock(&cpu_buffer->reader_lock);
@@ -3246,23 +3328,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3246EXPORT_SYMBOL_GPL(ring_buffer_consume); 3328EXPORT_SYMBOL_GPL(ring_buffer_consume);
3247 3329
3248/** 3330/**
3249 * ring_buffer_read_start - start a non consuming read of the buffer 3331 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3250 * @buffer: The ring buffer to read from 3332 * @buffer: The ring buffer to read from
3251 * @cpu: The cpu buffer to iterate over 3333 * @cpu: The cpu buffer to iterate over
3252 * 3334 *
3253 * This starts up an iteration through the buffer. It also disables 3335 * This performs the initial preparations necessary to iterate
3254 * the recording to the buffer until the reading is finished. 3336 * through the buffer. Memory is allocated, buffer recording
3255 * This prevents the reading from being corrupted. This is not 3337 * is disabled, and the iterator pointer is returned to the caller.
3256 * a consuming read, so a producer is not expected.
3257 * 3338 *
3258 * Must be paired with ring_buffer_finish. 3339 * Disabling buffer recordng prevents the reading from being
3340 * corrupted. This is not a consuming read, so a producer is not
3341 * expected.
3342 *
3343 * After a sequence of ring_buffer_read_prepare calls, the user is
3344 * expected to make at least one call to ring_buffer_prepare_sync.
3345 * Afterwards, ring_buffer_read_start is invoked to get things going
3346 * for real.
3347 *
3348 * This overall must be paired with ring_buffer_finish.
3259 */ 3349 */
3260struct ring_buffer_iter * 3350struct ring_buffer_iter *
3261ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3351ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3262{ 3352{
3263 struct ring_buffer_per_cpu *cpu_buffer; 3353 struct ring_buffer_per_cpu *cpu_buffer;
3264 struct ring_buffer_iter *iter; 3354 struct ring_buffer_iter *iter;
3265 unsigned long flags;
3266 3355
3267 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3356 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3268 return NULL; 3357 return NULL;
@@ -3276,15 +3365,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3276 iter->cpu_buffer = cpu_buffer; 3365 iter->cpu_buffer = cpu_buffer;
3277 3366
3278 atomic_inc(&cpu_buffer->record_disabled); 3367 atomic_inc(&cpu_buffer->record_disabled);
3368
3369 return iter;
3370}
3371EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3372
3373/**
3374 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3375 *
3376 * All previously invoked ring_buffer_read_prepare calls to prepare
3377 * iterators will be synchronized. Afterwards, read_buffer_read_start
3378 * calls on those iterators are allowed.
3379 */
3380void
3381ring_buffer_read_prepare_sync(void)
3382{
3279 synchronize_sched(); 3383 synchronize_sched();
3384}
3385EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3386
3387/**
3388 * ring_buffer_read_start - start a non consuming read of the buffer
3389 * @iter: The iterator returned by ring_buffer_read_prepare
3390 *
3391 * This finalizes the startup of an iteration through the buffer.
3392 * The iterator comes from a call to ring_buffer_read_prepare and
3393 * an intervening ring_buffer_read_prepare_sync must have been
3394 * performed.
3395 *
3396 * Must be paired with ring_buffer_finish.
3397 */
3398void
3399ring_buffer_read_start(struct ring_buffer_iter *iter)
3400{
3401 struct ring_buffer_per_cpu *cpu_buffer;
3402 unsigned long flags;
3403
3404 if (!iter)
3405 return;
3406
3407 cpu_buffer = iter->cpu_buffer;
3280 3408
3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3409 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3282 arch_spin_lock(&cpu_buffer->lock); 3410 arch_spin_lock(&cpu_buffer->lock);
3283 rb_iter_reset(iter); 3411 rb_iter_reset(iter);
3284 arch_spin_unlock(&cpu_buffer->lock); 3412 arch_spin_unlock(&cpu_buffer->lock);
3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3413 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3286
3287 return iter;
3288} 3414}
3289EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3415EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3290 3416
@@ -3378,6 +3504,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3378 cpu_buffer->write_stamp = 0; 3504 cpu_buffer->write_stamp = 0;
3379 cpu_buffer->read_stamp = 0; 3505 cpu_buffer->read_stamp = 0;
3380 3506
3507 cpu_buffer->lost_events = 0;
3508 cpu_buffer->last_overrun = 0;
3509
3381 rb_head_page_activate(cpu_buffer); 3510 rb_head_page_activate(cpu_buffer);
3382} 3511}
3383 3512
@@ -3653,6 +3782,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3653 struct ring_buffer_event *event; 3782 struct ring_buffer_event *event;
3654 struct buffer_data_page *bpage; 3783 struct buffer_data_page *bpage;
3655 struct buffer_page *reader; 3784 struct buffer_page *reader;
3785 unsigned long missed_events;
3656 unsigned long flags; 3786 unsigned long flags;
3657 unsigned int commit; 3787 unsigned int commit;
3658 unsigned int read; 3788 unsigned int read;
@@ -3689,6 +3819,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3689 read = reader->read; 3819 read = reader->read;
3690 commit = rb_page_commit(reader); 3820 commit = rb_page_commit(reader);
3691 3821
3822 /* Check if any events were dropped */
3823 missed_events = cpu_buffer->lost_events;
3824
3692 /* 3825 /*
3693 * If this page has been partially read or 3826 * If this page has been partially read or
3694 * if len is not big enough to read the rest of the page or 3827 * if len is not big enough to read the rest of the page or
@@ -3709,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3709 if (len > (commit - read)) 3842 if (len > (commit - read))
3710 len = (commit - read); 3843 len = (commit - read);
3711 3844
3712 size = rb_event_length(event); 3845 /* Always keep the time extend and data together */
3846 size = rb_event_ts_length(event);
3713 3847
3714 if (len < size) 3848 if (len < size)
3715 goto out_unlock; 3849 goto out_unlock;
@@ -3719,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3719 3853
3720 /* Need to copy one event at a time */ 3854 /* Need to copy one event at a time */
3721 do { 3855 do {
3856 /* We need the size of one event, because
3857 * rb_advance_reader only advances by one event,
3858 * whereas rb_event_ts_length may include the size of
3859 * one or two events.
3860 * We have already ensured there's enough space if this
3861 * is a time extend. */
3862 size = rb_event_length(event);
3722 memcpy(bpage->data + pos, rpage->data + rpos, size); 3863 memcpy(bpage->data + pos, rpage->data + rpos, size);
3723 3864
3724 len -= size; 3865 len -= size;
@@ -3727,9 +3868,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3727 rpos = reader->read; 3868 rpos = reader->read;
3728 pos += size; 3869 pos += size;
3729 3870
3871 if (rpos >= commit)
3872 break;
3873
3730 event = rb_reader_event(cpu_buffer); 3874 event = rb_reader_event(cpu_buffer);
3731 size = rb_event_length(event); 3875 /* Always keep the time extend and data together */
3732 } while (len > size); 3876 size = rb_event_ts_length(event);
3877 } while (len >= size);
3733 3878
3734 /* update bpage */ 3879 /* update bpage */
3735 local_set(&bpage->commit, pos); 3880 local_set(&bpage->commit, pos);
@@ -3749,9 +3894,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3749 local_set(&reader->entries, 0); 3894 local_set(&reader->entries, 0);
3750 reader->read = 0; 3895 reader->read = 0;
3751 *data_page = bpage; 3896 *data_page = bpage;
3897
3898 /*
3899 * Use the real_end for the data size,
3900 * This gives us a chance to store the lost events
3901 * on the page.
3902 */
3903 if (reader->real_end)
3904 local_set(&bpage->commit, reader->real_end);
3752 } 3905 }
3753 ret = read; 3906 ret = read;
3754 3907
3908 cpu_buffer->lost_events = 0;
3909
3910 commit = local_read(&bpage->commit);
3911 /*
3912 * Set a flag in the commit field if we lost events
3913 */
3914 if (missed_events) {
3915 /* If there is room at the end of the page to save the
3916 * missed events, then record it there.
3917 */
3918 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3919 memcpy(&bpage->data[commit], &missed_events,
3920 sizeof(missed_events));
3921 local_add(RB_MISSED_STORED, &bpage->commit);
3922 commit += sizeof(missed_events);
3923 }
3924 local_add(RB_MISSED_EVENTS, &bpage->commit);
3925 }
3926
3927 /*
3928 * This page may be off to user land. Zero it out here.
3929 */
3930 if (commit < BUF_PAGE_SIZE)
3931 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3932
3755 out_unlock: 3933 out_unlock:
3756 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3934 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3757 3935
@@ -3812,6 +3990,7 @@ static const struct file_operations rb_simple_fops = {
3812 .open = tracing_open_generic, 3990 .open = tracing_open_generic,
3813 .read = rb_simple_read, 3991 .read = rb_simple_read,
3814 .write = rb_simple_write, 3992 .write = rb_simple_write,
3993 .llseek = default_llseek,
3815}; 3994};
3816 3995
3817 3996
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..302f8a614635 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
@@ -80,7 +81,7 @@ static enum event_status read_event(int cpu)
80 int *entry; 81 int *entry;
81 u64 ts; 82 u64 ts;
82 83
83 event = ring_buffer_consume(buffer, cpu, &ts); 84 event = ring_buffer_consume(buffer, cpu, &ts, NULL);
84 if (!event) 85 if (!event)
85 return EVENT_DROPPED; 86 return EVENT_DROPPED;
86 87
@@ -112,7 +113,8 @@ static enum event_status read_page(int cpu)
112 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); 113 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
113 if (ret >= 0) { 114 if (ret >= 0) {
114 rpage = bpage; 115 rpage = bpage;
115 commit = local_read(&rpage->commit); 116 /* The commit may have missed event flags set, clear them */
117 commit = local_read(&rpage->commit) & 0xfffff;
116 for (i = 0; i < commit && !kill_test; i += inc) { 118 for (i = 0; i < commit && !kill_test; i += inc) {
117 119
118 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { 120 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..dc53ecb80589 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,7 +17,6 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
21#include <linux/notifier.h> 20#include <linux/notifier.h>
22#include <linux/irqflags.h> 21#include <linux/irqflags.h>
23#include <linux/debugfs.h> 22#include <linux/debugfs.h>
@@ -32,10 +31,11 @@
32#include <linux/splice.h> 31#include <linux/splice.h>
33#include <linux/kdebug.h> 32#include <linux/kdebug.h>
34#include <linux/string.h> 33#include <linux/string.h>
34#include <linux/rwsem.h>
35#include <linux/slab.h>
35#include <linux/ctype.h> 36#include <linux/ctype.h>
36#include <linux/init.h> 37#include <linux/init.h>
37#include <linux/poll.h> 38#include <linux/poll.h>
38#include <linux/gfp.h>
39#include <linux/fs.h> 39#include <linux/fs.h>
40 40
41#include "trace.h" 41#include "trace.h"
@@ -91,22 +91,16 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
91static inline void ftrace_disable_cpu(void) 91static inline void ftrace_disable_cpu(void)
92{ 92{
93 preempt_disable(); 93 preempt_disable();
94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); 94 __this_cpu_inc(ftrace_cpu_disabled);
95} 95}
96 96
97static inline void ftrace_enable_cpu(void) 97static inline void ftrace_enable_cpu(void)
98{ 98{
99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); 99 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 100 preempt_enable();
101} 101}
102 102
103static cpumask_var_t __read_mostly tracing_buffer_mask; 103cpumask_var_t __read_mostly tracing_buffer_mask;
104
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask)
110 104
111/* 105/*
112 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 106 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -119,9 +113,12 @@ static cpumask_var_t tracing_reader_cpumask;
119 * 113 *
120 * It is default off, but you can enable it with either specifying 114 * It is default off, but you can enable it with either specifying
121 * "ftrace_dump_on_oops" in the kernel command line, or setting 115 * "ftrace_dump_on_oops" in the kernel command line, or setting
122 * /proc/sys/kernel/ftrace_dump_on_oops to true. 116 * /proc/sys/kernel/ftrace_dump_on_oops
117 * Set 1 if you want to dump buffers of all CPUs
118 * Set 2 if you want to dump the buffer of the CPU that triggered oops
123 */ 119 */
124int ftrace_dump_on_oops; 120
121enum ftrace_dump_mode ftrace_dump_on_oops;
125 122
126static int tracing_set_tracer(const char *buf); 123static int tracing_set_tracer(const char *buf);
127 124
@@ -141,8 +138,17 @@ __setup("ftrace=", set_cmdline_ftrace);
141 138
142static int __init set_ftrace_dump_on_oops(char *str) 139static int __init set_ftrace_dump_on_oops(char *str)
143{ 140{
144 ftrace_dump_on_oops = 1; 141 if (*str++ != '=' || !*str) {
145 return 1; 142 ftrace_dump_on_oops = DUMP_ALL;
143 return 1;
144 }
145
146 if (!strcmp("orig_cpu", str)) {
147 ftrace_dump_on_oops = DUMP_ORIG;
148 return 1;
149 }
150
151 return 0;
146} 152}
147__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 153__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
148 154
@@ -243,19 +249,98 @@ static struct tracer *current_trace __read_mostly;
243 249
244/* 250/*
245 * trace_types_lock is used to protect the trace_types list. 251 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 252 */
250static DEFINE_MUTEX(trace_types_lock); 253static DEFINE_MUTEX(trace_types_lock);
251 254
255/*
256 * serialize the access of the ring buffer
257 *
258 * ring buffer serializes readers, but it is low level protection.
259 * The validity of the events (which returns by ring_buffer_peek() ..etc)
260 * are not protected by ring buffer.
261 *
262 * The content of events may become garbage if we allow other process consumes
263 * these events concurrently:
264 * A) the page of the consumed events may become a normal page
265 * (not reader page) in ring buffer, and this page will be rewrited
266 * by events producer.
267 * B) The page of the consumed events may become a page for splice_read,
268 * and this page will be returned to system.
269 *
270 * These primitives allow multi process access to different cpu ring buffer
271 * concurrently.
272 *
273 * These primitives don't distinguish read-only and read-consume access.
274 * Multi read-only access are also serialized.
275 */
276
277#ifdef CONFIG_SMP
278static DECLARE_RWSEM(all_cpu_access_lock);
279static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
280
281static inline void trace_access_lock(int cpu)
282{
283 if (cpu == TRACE_PIPE_ALL_CPU) {
284 /* gain it for accessing the whole ring buffer. */
285 down_write(&all_cpu_access_lock);
286 } else {
287 /* gain it for accessing a cpu ring buffer. */
288
289 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
290 down_read(&all_cpu_access_lock);
291
292 /* Secondly block other access to this @cpu ring buffer. */
293 mutex_lock(&per_cpu(cpu_access_lock, cpu));
294 }
295}
296
297static inline void trace_access_unlock(int cpu)
298{
299 if (cpu == TRACE_PIPE_ALL_CPU) {
300 up_write(&all_cpu_access_lock);
301 } else {
302 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
303 up_read(&all_cpu_access_lock);
304 }
305}
306
307static inline void trace_access_lock_init(void)
308{
309 int cpu;
310
311 for_each_possible_cpu(cpu)
312 mutex_init(&per_cpu(cpu_access_lock, cpu));
313}
314
315#else
316
317static DEFINE_MUTEX(access_lock);
318
319static inline void trace_access_lock(int cpu)
320{
321 (void)cpu;
322 mutex_lock(&access_lock);
323}
324
325static inline void trace_access_unlock(int cpu)
326{
327 (void)cpu;
328 mutex_unlock(&access_lock);
329}
330
331static inline void trace_access_lock_init(void)
332{
333}
334
335#endif
336
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 337/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 338static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 339
255/* trace_flags holds trace_options default values */ 340/* trace_flags holds trace_options default values */
256unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 341unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
257 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | 342 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
258 TRACE_ITER_GRAPH_TIME; 343 TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
259 344
260static int trace_stop_count; 345static int trace_stop_count;
261static DEFINE_SPINLOCK(tracing_start_lock); 346static DEFINE_SPINLOCK(tracing_start_lock);
@@ -297,6 +382,21 @@ static int __init set_buf_size(char *str)
297} 382}
298__setup("trace_buf_size=", set_buf_size); 383__setup("trace_buf_size=", set_buf_size);
299 384
385static int __init set_tracing_thresh(char *str)
386{
387 unsigned long threshhold;
388 int ret;
389
390 if (!str)
391 return 0;
392 ret = strict_strtoul(str, 0, &threshhold);
393 if (ret < 0)
394 return 0;
395 tracing_thresh = threshhold * 1000;
396 return 1;
397}
398__setup("tracing_thresh=", set_tracing_thresh);
399
300unsigned long nsecs_to_usecs(unsigned long nsecs) 400unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 401{
302 return nsecs / 1000; 402 return nsecs / 1000;
@@ -324,6 +424,7 @@ static const char *trace_options[] = {
324 "latency-format", 424 "latency-format",
325 "sleep-time", 425 "sleep-time",
326 "graph-time", 426 "graph-time",
427 "record-cmd",
327 NULL 428 NULL
328}; 429};
329 430
@@ -502,9 +603,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
502static arch_spinlock_t ftrace_max_lock = 603static arch_spinlock_t ftrace_max_lock =
503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 604 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
504 605
606unsigned long __read_mostly tracing_thresh;
607
505#ifdef CONFIG_TRACER_MAX_TRACE 608#ifdef CONFIG_TRACER_MAX_TRACE
506unsigned long __read_mostly tracing_max_latency; 609unsigned long __read_mostly tracing_max_latency;
507unsigned long __read_mostly tracing_thresh;
508 610
509/* 611/*
510 * Copy the new maximum trace into the separate maximum-trace 612 * Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +617,7 @@ static void
515__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 617__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
516{ 618{
517 struct trace_array_cpu *data = tr->data[cpu]; 619 struct trace_array_cpu *data = tr->data[cpu];
518 struct trace_array_cpu *max_data = tr->data[cpu]; 620 struct trace_array_cpu *max_data;
519 621
520 max_tr.cpu = cpu; 622 max_tr.cpu = cpu;
521 max_tr.time_start = data->preempt_timestamp; 623 max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +627,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
525 max_data->critical_start = data->critical_start; 627 max_data->critical_start = data->critical_start;
526 max_data->critical_end = data->critical_end; 628 max_data->critical_end = data->critical_end;
527 629
528 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 630 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
529 max_data->pid = tsk->pid; 631 max_data->pid = tsk->pid;
530 max_data->uid = task_uid(tsk); 632 max_data->uid = task_uid(tsk);
531 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 633 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -554,6 +656,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
554 return; 656 return;
555 657
556 WARN_ON_ONCE(!irqs_disabled()); 658 WARN_ON_ONCE(!irqs_disabled());
659 if (!current_trace->use_max_tr) {
660 WARN_ON_ONCE(1);
661 return;
662 }
557 arch_spin_lock(&ftrace_max_lock); 663 arch_spin_lock(&ftrace_max_lock);
558 664
559 tr->buffer = max_tr.buffer; 665 tr->buffer = max_tr.buffer;
@@ -580,6 +686,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
580 return; 686 return;
581 687
582 WARN_ON_ONCE(!irqs_disabled()); 688 WARN_ON_ONCE(!irqs_disabled());
689 if (!current_trace->use_max_tr) {
690 WARN_ON_ONCE(1);
691 return;
692 }
693
583 arch_spin_lock(&ftrace_max_lock); 694 arch_spin_lock(&ftrace_max_lock);
584 695
585 ftrace_disable_cpu(); 696 ftrace_disable_cpu();
@@ -624,18 +735,11 @@ __acquires(kernel_lock)
624 return -1; 735 return -1;
625 } 736 }
626 737
627 if (strlen(type->name) > MAX_TRACER_SIZE) { 738 if (strlen(type->name) >= MAX_TRACER_SIZE) {
628 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 739 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
629 return -1; 740 return -1;
630 } 741 }
631 742
632 /*
633 * When this gets called we hold the BKL which means that
634 * preemption is disabled. Various trace selftests however
635 * need to disable and enable preemption for successful tests.
636 * So we drop the BKL here and grab it after the tests again.
637 */
638 unlock_kernel();
639 mutex_lock(&trace_types_lock); 743 mutex_lock(&trace_types_lock);
640 744
641 tracing_selftest_running = true; 745 tracing_selftest_running = true;
@@ -717,7 +821,6 @@ __acquires(kernel_lock)
717#endif 821#endif
718 822
719 out_unlock: 823 out_unlock:
720 lock_kernel();
721 return ret; 824 return ret;
722} 825}
723 826
@@ -747,10 +850,10 @@ out:
747 mutex_unlock(&trace_types_lock); 850 mutex_unlock(&trace_types_lock);
748} 851}
749 852
750static void __tracing_reset(struct trace_array *tr, int cpu) 853static void __tracing_reset(struct ring_buffer *buffer, int cpu)
751{ 854{
752 ftrace_disable_cpu(); 855 ftrace_disable_cpu();
753 ring_buffer_reset_cpu(tr->buffer, cpu); 856 ring_buffer_reset_cpu(buffer, cpu);
754 ftrace_enable_cpu(); 857 ftrace_enable_cpu();
755} 858}
756 859
@@ -762,7 +865,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
762 865
763 /* Make sure all commits have finished */ 866 /* Make sure all commits have finished */
764 synchronize_sched(); 867 synchronize_sched();
765 __tracing_reset(tr, cpu); 868 __tracing_reset(buffer, cpu);
766 869
767 ring_buffer_record_enable(buffer); 870 ring_buffer_record_enable(buffer);
768} 871}
@@ -780,7 +883,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
780 tr->time_start = ftrace_now(tr->cpu); 883 tr->time_start = ftrace_now(tr->cpu);
781 884
782 for_each_online_cpu(cpu) 885 for_each_online_cpu(cpu)
783 __tracing_reset(tr, cpu); 886 __tracing_reset(buffer, cpu);
784 887
785 ring_buffer_record_enable(buffer); 888 ring_buffer_record_enable(buffer);
786} 889}
@@ -857,6 +960,8 @@ void tracing_start(void)
857 goto out; 960 goto out;
858 } 961 }
859 962
963 /* Prevent the buffers from switching */
964 arch_spin_lock(&ftrace_max_lock);
860 965
861 buffer = global_trace.buffer; 966 buffer = global_trace.buffer;
862 if (buffer) 967 if (buffer)
@@ -866,6 +971,8 @@ void tracing_start(void)
866 if (buffer) 971 if (buffer)
867 ring_buffer_record_enable(buffer); 972 ring_buffer_record_enable(buffer);
868 973
974 arch_spin_unlock(&ftrace_max_lock);
975
869 ftrace_start(); 976 ftrace_start();
870 out: 977 out:
871 spin_unlock_irqrestore(&tracing_start_lock, flags); 978 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +994,9 @@ void tracing_stop(void)
887 if (trace_stop_count++) 994 if (trace_stop_count++)
888 goto out; 995 goto out;
889 996
997 /* Prevent the buffers from switching */
998 arch_spin_lock(&ftrace_max_lock);
999
890 buffer = global_trace.buffer; 1000 buffer = global_trace.buffer;
891 if (buffer) 1001 if (buffer)
892 ring_buffer_record_disable(buffer); 1002 ring_buffer_record_disable(buffer);
@@ -895,6 +1005,8 @@ void tracing_stop(void)
895 if (buffer) 1005 if (buffer)
896 ring_buffer_record_disable(buffer); 1006 ring_buffer_record_disable(buffer);
897 1007
1008 arch_spin_unlock(&ftrace_max_lock);
1009
898 out: 1010 out:
899 spin_unlock_irqrestore(&tracing_start_lock, flags); 1011 spin_unlock_irqrestore(&tracing_start_lock, flags);
900} 1012}
@@ -951,6 +1063,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 1063 return;
952 } 1064 }
953 1065
1066 if (WARN_ON_ONCE(pid < 0)) {
1067 strcpy(comm, "<XXX>");
1068 return;
1069 }
1070
954 if (pid > PID_MAX_DEFAULT) { 1071 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 1072 strcpy(comm, "<...>");
956 return; 1073 return;
@@ -1084,7 +1201,7 @@ trace_function(struct trace_array *tr,
1084 struct ftrace_entry *entry; 1201 struct ftrace_entry *entry;
1085 1202
1086 /* If we are reading the ring buffer, don't trace */ 1203 /* If we are reading the ring buffer, don't trace */
1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 1204 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1088 return; 1205 return;
1089 1206
1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1207 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1166,6 +1283,8 @@ void trace_dump_stack(void)
1166 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1167} 1284}
1168 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1169void 1288void
1170ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1171{ 1290{
@@ -1177,10 +1296,27 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1177 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1296 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1178 return; 1297 return;
1179 1298
1299 /*
1300 * NMIs can not handle page faults, even with fix ups.
1301 * The save user stack can (and often does) fault.
1302 */
1303 if (unlikely(in_nmi()))
1304 return;
1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1180 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1316 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1181 sizeof(*entry), flags, pc); 1317 sizeof(*entry), flags, pc);
1182 if (!event) 1318 if (!event)
1183 return; 1319 goto out_drop_count;
1184 entry = ring_buffer_event_data(event); 1320 entry = ring_buffer_event_data(event);
1185 1321
1186 entry->tgid = current->tgid; 1322 entry->tgid = current->tgid;
@@ -1194,6 +1330,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1194 save_stack_trace_user(&trace); 1330 save_stack_trace_user(&trace);
1195 if (!filter_check_discard(call, entry, buffer, event)) 1331 if (!filter_check_discard(call, entry, buffer, event))
1196 ring_buffer_unlock_commit(buffer, event); 1332 ring_buffer_unlock_commit(buffer, event);
1333
1334 out_drop_count:
1335 __this_cpu_dec(user_stack_count);
1336 out:
1337 preempt_enable();
1197} 1338}
1198 1339
1199#ifdef UNUSED 1340#ifdef UNUSED
@@ -1205,61 +1346,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1205 1346
1206#endif /* CONFIG_STACKTRACE */ 1347#endif /* CONFIG_STACKTRACE */
1207 1348
1208static void
1209ftrace_trace_special(void *__tr,
1210 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1211 int pc)
1212{
1213 struct ftrace_event_call *call = &event_special;
1214 struct ring_buffer_event *event;
1215 struct trace_array *tr = __tr;
1216 struct ring_buffer *buffer = tr->buffer;
1217 struct special_entry *entry;
1218
1219 event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
1220 sizeof(*entry), 0, pc);
1221 if (!event)
1222 return;
1223 entry = ring_buffer_event_data(event);
1224 entry->arg1 = arg1;
1225 entry->arg2 = arg2;
1226 entry->arg3 = arg3;
1227
1228 if (!filter_check_discard(call, entry, buffer, event))
1229 trace_buffer_unlock_commit(buffer, event, 0, pc);
1230}
1231
1232void
1233__trace_special(void *__tr, void *__data,
1234 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1235{
1236 ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1237}
1238
1239void
1240ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1241{
1242 struct trace_array *tr = &global_trace;
1243 struct trace_array_cpu *data;
1244 unsigned long flags;
1245 int cpu;
1246 int pc;
1247
1248 if (tracing_disabled)
1249 return;
1250
1251 pc = preempt_count();
1252 local_irq_save(flags);
1253 cpu = raw_smp_processor_id();
1254 data = tr->data[cpu];
1255
1256 if (likely(atomic_inc_return(&data->disabled) == 1))
1257 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1258
1259 atomic_dec(&data->disabled);
1260 local_irq_restore(flags);
1261}
1262
1263/** 1349/**
1264 * trace_vbprintk - write binary msg to tracing buffer 1350 * trace_vbprintk - write binary msg to tracing buffer
1265 * 1351 *
@@ -1278,7 +1364,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1278 struct bprint_entry *entry; 1364 struct bprint_entry *entry;
1279 unsigned long flags; 1365 unsigned long flags;
1280 int disable; 1366 int disable;
1281 int resched;
1282 int cpu, len = 0, size, pc; 1367 int cpu, len = 0, size, pc;
1283 1368
1284 if (unlikely(tracing_selftest_running || tracing_disabled)) 1369 if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1288,7 +1373,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1288 pause_graph_tracing(); 1373 pause_graph_tracing();
1289 1374
1290 pc = preempt_count(); 1375 pc = preempt_count();
1291 resched = ftrace_preempt_disable(); 1376 preempt_disable_notrace();
1292 cpu = raw_smp_processor_id(); 1377 cpu = raw_smp_processor_id();
1293 data = tr->data[cpu]; 1378 data = tr->data[cpu];
1294 1379
@@ -1315,8 +1400,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1315 entry->fmt = fmt; 1400 entry->fmt = fmt;
1316 1401
1317 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1402 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1318 if (!filter_check_discard(call, entry, buffer, event)) 1403 if (!filter_check_discard(call, entry, buffer, event)) {
1319 ring_buffer_unlock_commit(buffer, event); 1404 ring_buffer_unlock_commit(buffer, event);
1405 ftrace_trace_stack(buffer, flags, 6, pc);
1406 }
1320 1407
1321out_unlock: 1408out_unlock:
1322 arch_spin_unlock(&trace_buf_lock); 1409 arch_spin_unlock(&trace_buf_lock);
@@ -1324,7 +1411,7 @@ out_unlock:
1324 1411
1325out: 1412out:
1326 atomic_dec_return(&data->disabled); 1413 atomic_dec_return(&data->disabled);
1327 ftrace_preempt_enable(resched); 1414 preempt_enable_notrace();
1328 unpause_graph_tracing(); 1415 unpause_graph_tracing();
1329 1416
1330 return len; 1417 return len;
@@ -1389,8 +1476,10 @@ int trace_array_vprintk(struct trace_array *tr,
1389 1476
1390 memcpy(&entry->buf, trace_buf, len); 1477 memcpy(&entry->buf, trace_buf, len);
1391 entry->buf[len] = '\0'; 1478 entry->buf[len] = '\0';
1392 if (!filter_check_discard(call, entry, buffer, event)) 1479 if (!filter_check_discard(call, entry, buffer, event)) {
1393 ring_buffer_unlock_commit(buffer, event); 1480 ring_buffer_unlock_commit(buffer, event);
1481 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1482 }
1394 1483
1395 out_unlock: 1484 out_unlock:
1396 arch_spin_unlock(&trace_buf_lock); 1485 arch_spin_unlock(&trace_buf_lock);
@@ -1409,11 +1498,6 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1409} 1498}
1410EXPORT_SYMBOL_GPL(trace_vprintk); 1499EXPORT_SYMBOL_GPL(trace_vprintk);
1411 1500
1412enum trace_file_type {
1413 TRACE_FILE_LAT_FMT = 1,
1414 TRACE_FILE_ANNOTATE = 2,
1415};
1416
1417static void trace_iterator_increment(struct trace_iterator *iter) 1501static void trace_iterator_increment(struct trace_iterator *iter)
1418{ 1502{
1419 /* Don't allow ftrace to trace into the ring buffers */ 1503 /* Don't allow ftrace to trace into the ring buffers */
@@ -1427,7 +1511,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
1427} 1511}
1428 1512
1429static struct trace_entry * 1513static struct trace_entry *
1430peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) 1514peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1515 unsigned long *lost_events)
1431{ 1516{
1432 struct ring_buffer_event *event; 1517 struct ring_buffer_event *event;
1433 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1518 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1438,7 +1523,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1438 if (buf_iter) 1523 if (buf_iter)
1439 event = ring_buffer_iter_peek(buf_iter, ts); 1524 event = ring_buffer_iter_peek(buf_iter, ts);
1440 else 1525 else
1441 event = ring_buffer_peek(iter->tr->buffer, cpu, ts); 1526 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1527 lost_events);
1442 1528
1443 ftrace_enable_cpu(); 1529 ftrace_enable_cpu();
1444 1530
@@ -1446,10 +1532,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1446} 1532}
1447 1533
1448static struct trace_entry * 1534static struct trace_entry *
1449__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1535__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1536 unsigned long *missing_events, u64 *ent_ts)
1450{ 1537{
1451 struct ring_buffer *buffer = iter->tr->buffer; 1538 struct ring_buffer *buffer = iter->tr->buffer;
1452 struct trace_entry *ent, *next = NULL; 1539 struct trace_entry *ent, *next = NULL;
1540 unsigned long lost_events = 0, next_lost = 0;
1453 int cpu_file = iter->cpu_file; 1541 int cpu_file = iter->cpu_file;
1454 u64 next_ts = 0, ts; 1542 u64 next_ts = 0, ts;
1455 int next_cpu = -1; 1543 int next_cpu = -1;
@@ -1462,7 +1550,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1462 if (cpu_file > TRACE_PIPE_ALL_CPU) { 1550 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1463 if (ring_buffer_empty_cpu(buffer, cpu_file)) 1551 if (ring_buffer_empty_cpu(buffer, cpu_file))
1464 return NULL; 1552 return NULL;
1465 ent = peek_next_entry(iter, cpu_file, ent_ts); 1553 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1466 if (ent_cpu) 1554 if (ent_cpu)
1467 *ent_cpu = cpu_file; 1555 *ent_cpu = cpu_file;
1468 1556
@@ -1474,7 +1562,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1474 if (ring_buffer_empty_cpu(buffer, cpu)) 1562 if (ring_buffer_empty_cpu(buffer, cpu))
1475 continue; 1563 continue;
1476 1564
1477 ent = peek_next_entry(iter, cpu, &ts); 1565 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1478 1566
1479 /* 1567 /*
1480 * Pick the entry with the smallest timestamp: 1568 * Pick the entry with the smallest timestamp:
@@ -1483,6 +1571,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1483 next = ent; 1571 next = ent;
1484 next_cpu = cpu; 1572 next_cpu = cpu;
1485 next_ts = ts; 1573 next_ts = ts;
1574 next_lost = lost_events;
1486 } 1575 }
1487 } 1576 }
1488 1577
@@ -1492,6 +1581,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1492 if (ent_ts) 1581 if (ent_ts)
1493 *ent_ts = next_ts; 1582 *ent_ts = next_ts;
1494 1583
1584 if (missing_events)
1585 *missing_events = next_lost;
1586
1495 return next; 1587 return next;
1496} 1588}
1497 1589
@@ -1499,13 +1591,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1499struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 1591struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1500 int *ent_cpu, u64 *ent_ts) 1592 int *ent_cpu, u64 *ent_ts)
1501{ 1593{
1502 return __find_next_entry(iter, ent_cpu, ent_ts); 1594 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1503} 1595}
1504 1596
1505/* Find the next real entry, and increment the iterator to the next entry */ 1597/* Find the next real entry, and increment the iterator to the next entry */
1506static void *find_next_entry_inc(struct trace_iterator *iter) 1598void *trace_find_next_entry_inc(struct trace_iterator *iter)
1507{ 1599{
1508 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); 1600 iter->ent = __find_next_entry(iter, &iter->cpu,
1601 &iter->lost_events, &iter->ts);
1509 1602
1510 if (iter->ent) 1603 if (iter->ent)
1511 trace_iterator_increment(iter); 1604 trace_iterator_increment(iter);
@@ -1517,7 +1610,8 @@ static void trace_consume(struct trace_iterator *iter)
1517{ 1610{
1518 /* Don't allow ftrace to trace into the ring buffers */ 1611 /* Don't allow ftrace to trace into the ring buffers */
1519 ftrace_disable_cpu(); 1612 ftrace_disable_cpu();
1520 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 1613 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1614 &iter->lost_events);
1521 ftrace_enable_cpu(); 1615 ftrace_enable_cpu();
1522} 1616}
1523 1617
@@ -1536,19 +1630,19 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1536 return NULL; 1630 return NULL;
1537 1631
1538 if (iter->idx < 0) 1632 if (iter->idx < 0)
1539 ent = find_next_entry_inc(iter); 1633 ent = trace_find_next_entry_inc(iter);
1540 else 1634 else
1541 ent = iter; 1635 ent = iter;
1542 1636
1543 while (ent && iter->idx < i) 1637 while (ent && iter->idx < i)
1544 ent = find_next_entry_inc(iter); 1638 ent = trace_find_next_entry_inc(iter);
1545 1639
1546 iter->pos = *pos; 1640 iter->pos = *pos;
1547 1641
1548 return ent; 1642 return ent;
1549} 1643}
1550 1644
1551static void tracing_iter_reset(struct trace_iterator *iter, int cpu) 1645void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1552{ 1646{
1553 struct trace_array *tr = iter->tr; 1647 struct trace_array *tr = iter->tr;
1554 struct ring_buffer_event *event; 1648 struct ring_buffer_event *event;
@@ -1580,12 +1674,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1580} 1674}
1581 1675
1582/* 1676/*
1583 * No necessary locking here. The worst thing which can
1584 * happen is loosing events consumed at the same time
1585 * by a trace_pipe reader.
1586 * Other than that, we don't risk to crash the ring buffer
1587 * because it serializes the readers.
1588 *
1589 * The current tracer is copied to avoid a global locking 1677 * The current tracer is copied to avoid a global locking
1590 * all around. 1678 * all around.
1591 */ 1679 */
@@ -1623,6 +1711,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1623 1711
1624 ftrace_enable_cpu(); 1712 ftrace_enable_cpu();
1625 1713
1714 iter->leftover = 0;
1626 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1715 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1627 ; 1716 ;
1628 1717
@@ -1640,12 +1729,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1640 } 1729 }
1641 1730
1642 trace_event_read_lock(); 1731 trace_event_read_lock();
1732 trace_access_lock(cpu_file);
1643 return p; 1733 return p;
1644} 1734}
1645 1735
1646static void s_stop(struct seq_file *m, void *p) 1736static void s_stop(struct seq_file *m, void *p)
1647{ 1737{
1738 struct trace_iterator *iter = m->private;
1739
1648 atomic_dec(&trace_record_cmdline_disabled); 1740 atomic_dec(&trace_record_cmdline_disabled);
1741 trace_access_unlock(iter->cpu_file);
1649 trace_event_read_unlock(); 1742 trace_event_read_unlock();
1650} 1743}
1651 1744
@@ -1669,7 +1762,7 @@ static void print_func_help_header(struct seq_file *m)
1669} 1762}
1670 1763
1671 1764
1672static void 1765void
1673print_trace_header(struct seq_file *m, struct trace_iterator *iter) 1766print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1674{ 1767{
1675 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1768 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1797,7 +1890,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1797 } 1890 }
1798 1891
1799 if (event) 1892 if (event)
1800 return event->trace(iter, sym_flags); 1893 return event->funcs->trace(iter, sym_flags, event);
1801 1894
1802 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) 1895 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1803 goto partial; 1896 goto partial;
@@ -1823,7 +1916,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1823 1916
1824 event = ftrace_find_event(entry->type); 1917 event = ftrace_find_event(entry->type);
1825 if (event) 1918 if (event)
1826 return event->raw(iter, 0); 1919 return event->funcs->raw(iter, 0, event);
1827 1920
1828 if (!trace_seq_printf(s, "%d ?\n", entry->type)) 1921 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1829 goto partial; 1922 goto partial;
@@ -1850,7 +1943,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1850 1943
1851 event = ftrace_find_event(entry->type); 1944 event = ftrace_find_event(entry->type);
1852 if (event) { 1945 if (event) {
1853 enum print_line_t ret = event->hex(iter, 0); 1946 enum print_line_t ret = event->funcs->hex(iter, 0, event);
1854 if (ret != TRACE_TYPE_HANDLED) 1947 if (ret != TRACE_TYPE_HANDLED)
1855 return ret; 1948 return ret;
1856 } 1949 }
@@ -1875,10 +1968,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1875 } 1968 }
1876 1969
1877 event = ftrace_find_event(entry->type); 1970 event = ftrace_find_event(entry->type);
1878 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; 1971 return event ? event->funcs->binary(iter, 0, event) :
1972 TRACE_TYPE_HANDLED;
1879} 1973}
1880 1974
1881static int trace_empty(struct trace_iterator *iter) 1975int trace_empty(struct trace_iterator *iter)
1882{ 1976{
1883 int cpu; 1977 int cpu;
1884 1978
@@ -1909,10 +2003,14 @@ static int trace_empty(struct trace_iterator *iter)
1909} 2003}
1910 2004
1911/* Called with trace_event_read_lock() held. */ 2005/* Called with trace_event_read_lock() held. */
1912static enum print_line_t print_trace_line(struct trace_iterator *iter) 2006enum print_line_t print_trace_line(struct trace_iterator *iter)
1913{ 2007{
1914 enum print_line_t ret; 2008 enum print_line_t ret;
1915 2009
2010 if (iter->lost_events)
2011 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2012 iter->cpu, iter->lost_events);
2013
1916 if (iter->trace && iter->trace->print_line) { 2014 if (iter->trace && iter->trace->print_line) {
1917 ret = iter->trace->print_line(iter); 2015 ret = iter->trace->print_line(iter);
1918 if (ret != TRACE_TYPE_UNHANDLED) 2016 if (ret != TRACE_TYPE_UNHANDLED)
@@ -1941,6 +2039,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1941 return print_trace_fmt(iter); 2039 return print_trace_fmt(iter);
1942} 2040}
1943 2041
2042void trace_default_header(struct seq_file *m)
2043{
2044 struct trace_iterator *iter = m->private;
2045
2046 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2047 /* print nothing if the buffers are empty */
2048 if (trace_empty(iter))
2049 return;
2050 print_trace_header(m, iter);
2051 if (!(trace_flags & TRACE_ITER_VERBOSE))
2052 print_lat_help_header(m);
2053 } else {
2054 if (!(trace_flags & TRACE_ITER_VERBOSE))
2055 print_func_help_header(m);
2056 }
2057}
2058
1944static int s_show(struct seq_file *m, void *v) 2059static int s_show(struct seq_file *m, void *v)
1945{ 2060{
1946 struct trace_iterator *iter = v; 2061 struct trace_iterator *iter = v;
@@ -1953,17 +2068,9 @@ static int s_show(struct seq_file *m, void *v)
1953 } 2068 }
1954 if (iter->trace && iter->trace->print_header) 2069 if (iter->trace && iter->trace->print_header)
1955 iter->trace->print_header(m); 2070 iter->trace->print_header(m);
1956 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2071 else
1957 /* print nothing if the buffers are empty */ 2072 trace_default_header(m);
1958 if (trace_empty(iter)) 2073
1959 return 0;
1960 print_trace_header(m, iter);
1961 if (!(trace_flags & TRACE_ITER_VERBOSE))
1962 print_lat_help_header(m);
1963 } else {
1964 if (!(trace_flags & TRACE_ITER_VERBOSE))
1965 print_func_help_header(m);
1966 }
1967 } else if (iter->leftover) { 2074 } else if (iter->leftover) {
1968 /* 2075 /*
1969 * If we filled the seq_file buffer earlier, we 2076 * If we filled the seq_file buffer earlier, we
@@ -2049,15 +2156,20 @@ __tracing_open(struct inode *inode, struct file *file)
2049 2156
2050 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2157 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2051 for_each_tracing_cpu(cpu) { 2158 for_each_tracing_cpu(cpu) {
2052
2053 iter->buffer_iter[cpu] = 2159 iter->buffer_iter[cpu] =
2054 ring_buffer_read_start(iter->tr->buffer, cpu); 2160 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2161 }
2162 ring_buffer_read_prepare_sync();
2163 for_each_tracing_cpu(cpu) {
2164 ring_buffer_read_start(iter->buffer_iter[cpu]);
2055 tracing_iter_reset(iter, cpu); 2165 tracing_iter_reset(iter, cpu);
2056 } 2166 }
2057 } else { 2167 } else {
2058 cpu = iter->cpu_file; 2168 cpu = iter->cpu_file;
2059 iter->buffer_iter[cpu] = 2169 iter->buffer_iter[cpu] =
2060 ring_buffer_read_start(iter->tr->buffer, cpu); 2170 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2171 ring_buffer_read_prepare_sync();
2172 ring_buffer_read_start(iter->buffer_iter[cpu]);
2061 tracing_iter_reset(iter, cpu); 2173 tracing_iter_reset(iter, cpu);
2062 } 2174 }
2063 2175
@@ -2100,7 +2212,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
2100 2212
2101static int tracing_release(struct inode *inode, struct file *file) 2213static int tracing_release(struct inode *inode, struct file *file)
2102{ 2214{
2103 struct seq_file *m = (struct seq_file *)file->private_data; 2215 struct seq_file *m = file->private_data;
2104 struct trace_iterator *iter; 2216 struct trace_iterator *iter;
2105 int cpu; 2217 int cpu;
2106 2218
@@ -2224,11 +2336,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
2224 return count; 2336 return count;
2225} 2337}
2226 2338
2339static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
2340{
2341 if (file->f_mode & FMODE_READ)
2342 return seq_lseek(file, offset, origin);
2343 else
2344 return 0;
2345}
2346
2227static const struct file_operations tracing_fops = { 2347static const struct file_operations tracing_fops = {
2228 .open = tracing_open, 2348 .open = tracing_open,
2229 .read = seq_read, 2349 .read = seq_read,
2230 .write = tracing_write_stub, 2350 .write = tracing_write_stub,
2231 .llseek = seq_lseek, 2351 .llseek = tracing_seek,
2232 .release = tracing_release, 2352 .release = tracing_release,
2233}; 2353};
2234 2354
@@ -2236,6 +2356,7 @@ static const struct file_operations show_traces_fops = {
2236 .open = show_traces_open, 2356 .open = show_traces_open,
2237 .read = seq_read, 2357 .read = seq_read,
2238 .release = seq_release, 2358 .release = seq_release,
2359 .llseek = seq_lseek,
2239}; 2360};
2240 2361
2241/* 2362/*
@@ -2329,6 +2450,7 @@ static const struct file_operations tracing_cpumask_fops = {
2329 .open = tracing_open_generic, 2450 .open = tracing_open_generic,
2330 .read = tracing_cpumask_read, 2451 .read = tracing_cpumask_read,
2331 .write = tracing_cpumask_write, 2452 .write = tracing_cpumask_write,
2453 .llseek = generic_file_llseek,
2332}; 2454};
2333 2455
2334static int tracing_trace_options_show(struct seq_file *m, void *v) 2456static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2404,6 +2526,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2404 trace_flags |= mask; 2526 trace_flags |= mask;
2405 else 2527 else
2406 trace_flags &= ~mask; 2528 trace_flags &= ~mask;
2529
2530 if (mask == TRACE_ITER_RECORD_CMD)
2531 trace_event_enable_cmd_record(enabled);
2407} 2532}
2408 2533
2409static ssize_t 2534static ssize_t
@@ -2495,6 +2620,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
2495static const struct file_operations tracing_readme_fops = { 2620static const struct file_operations tracing_readme_fops = {
2496 .open = tracing_open_generic, 2621 .open = tracing_open_generic,
2497 .read = tracing_readme_read, 2622 .read = tracing_readme_read,
2623 .llseek = generic_file_llseek,
2498}; 2624};
2499 2625
2500static ssize_t 2626static ssize_t
@@ -2545,6 +2671,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2545static const struct file_operations tracing_saved_cmdlines_fops = { 2671static const struct file_operations tracing_saved_cmdlines_fops = {
2546 .open = tracing_open_generic, 2672 .open = tracing_open_generic,
2547 .read = tracing_saved_cmdlines_read, 2673 .read = tracing_saved_cmdlines_read,
2674 .llseek = generic_file_llseek,
2548}; 2675};
2549 2676
2550static ssize_t 2677static ssize_t
@@ -2640,6 +2767,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
2640 if (ret < 0) 2767 if (ret < 0)
2641 return ret; 2768 return ret;
2642 2769
2770 if (!current_trace->use_max_tr)
2771 goto out;
2772
2643 ret = ring_buffer_resize(max_tr.buffer, size); 2773 ret = ring_buffer_resize(max_tr.buffer, size);
2644 if (ret < 0) { 2774 if (ret < 0) {
2645 int r; 2775 int r;
@@ -2667,11 +2797,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
2667 return ret; 2797 return ret;
2668 } 2798 }
2669 2799
2800 max_tr.entries = size;
2801 out:
2670 global_trace.entries = size; 2802 global_trace.entries = size;
2671 2803
2672 return ret; 2804 return ret;
2673} 2805}
2674 2806
2807
2675/** 2808/**
2676 * tracing_update_buffers - used by tracing facility to expand ring buffers 2809 * tracing_update_buffers - used by tracing facility to expand ring buffers
2677 * 2810 *
@@ -2732,12 +2865,26 @@ static int tracing_set_tracer(const char *buf)
2732 trace_branch_disable(); 2865 trace_branch_disable();
2733 if (current_trace && current_trace->reset) 2866 if (current_trace && current_trace->reset)
2734 current_trace->reset(tr); 2867 current_trace->reset(tr);
2735 2868 if (current_trace && current_trace->use_max_tr) {
2869 /*
2870 * We don't free the ring buffer. instead, resize it because
2871 * The max_tr ring buffer has some state (e.g. ring->clock) and
2872 * we want preserve it.
2873 */
2874 ring_buffer_resize(max_tr.buffer, 1);
2875 max_tr.entries = 1;
2876 }
2736 destroy_trace_option_files(topts); 2877 destroy_trace_option_files(topts);
2737 2878
2738 current_trace = t; 2879 current_trace = t;
2739 2880
2740 topts = create_trace_option_files(current_trace); 2881 topts = create_trace_option_files(current_trace);
2882 if (current_trace->use_max_tr) {
2883 ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
2884 if (ret < 0)
2885 goto out;
2886 max_tr.entries = global_trace.entries;
2887 }
2741 2888
2742 if (t->init) { 2889 if (t->init) {
2743 ret = tracer_init(t, tr); 2890 ret = tracer_init(t, tr);
@@ -2836,22 +2983,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2836 2983
2837 mutex_lock(&trace_types_lock); 2984 mutex_lock(&trace_types_lock);
2838 2985
2839 /* We only allow one reader per cpu */
2840 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2841 if (!cpumask_empty(tracing_reader_cpumask)) {
2842 ret = -EBUSY;
2843 goto out;
2844 }
2845 cpumask_setall(tracing_reader_cpumask);
2846 } else {
2847 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2848 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2849 else {
2850 ret = -EBUSY;
2851 goto out;
2852 }
2853 }
2854
2855 /* create a buffer to store the information to pass to userspace */ 2986 /* create a buffer to store the information to pass to userspace */
2856 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2987 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2857 if (!iter) { 2988 if (!iter) {
@@ -2890,6 +3021,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2890 if (iter->trace->pipe_open) 3021 if (iter->trace->pipe_open)
2891 iter->trace->pipe_open(iter); 3022 iter->trace->pipe_open(iter);
2892 3023
3024 nonseekable_open(inode, filp);
2893out: 3025out:
2894 mutex_unlock(&trace_types_lock); 3026 mutex_unlock(&trace_types_lock);
2895 return ret; 3027 return ret;
@@ -2907,12 +3039,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2907 3039
2908 mutex_lock(&trace_types_lock); 3040 mutex_lock(&trace_types_lock);
2909 3041
2910 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2911 cpumask_clear(tracing_reader_cpumask);
2912 else
2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2914
2915
2916 if (iter->trace->pipe_close) 3042 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter); 3043 iter->trace->pipe_close(iter);
2918 3044
@@ -3074,7 +3200,8 @@ waitagain:
3074 iter->pos = -1; 3200 iter->pos = -1;
3075 3201
3076 trace_event_read_lock(); 3202 trace_event_read_lock();
3077 while (find_next_entry_inc(iter) != NULL) { 3203 trace_access_lock(iter->cpu_file);
3204 while (trace_find_next_entry_inc(iter) != NULL) {
3078 enum print_line_t ret; 3205 enum print_line_t ret;
3079 int len = iter->seq.len; 3206 int len = iter->seq.len;
3080 3207
@@ -3090,6 +3217,7 @@ waitagain:
3090 if (iter->seq.len >= cnt) 3217 if (iter->seq.len >= cnt)
3091 break; 3218 break;
3092 } 3219 }
3220 trace_access_unlock(iter->cpu_file);
3093 trace_event_read_unlock(); 3221 trace_event_read_unlock();
3094 3222
3095 /* Now copy what we have to the user */ 3223 /* Now copy what we have to the user */
@@ -3156,7 +3284,7 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3156 if (ret != TRACE_TYPE_NO_CONSUME) 3284 if (ret != TRACE_TYPE_NO_CONSUME)
3157 trace_consume(iter); 3285 trace_consume(iter);
3158 rem -= count; 3286 rem -= count;
3159 if (!find_next_entry_inc(iter)) { 3287 if (!trace_find_next_entry_inc(iter)) {
3160 rem = 0; 3288 rem = 0;
3161 iter->ent = NULL; 3289 iter->ent = NULL;
3162 break; 3290 break;
@@ -3172,12 +3300,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3172 size_t len, 3300 size_t len,
3173 unsigned int flags) 3301 unsigned int flags)
3174{ 3302{
3175 struct page *pages[PIPE_BUFFERS]; 3303 struct page *pages_def[PIPE_DEF_BUFFERS];
3176 struct partial_page partial[PIPE_BUFFERS]; 3304 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3177 struct trace_iterator *iter = filp->private_data; 3305 struct trace_iterator *iter = filp->private_data;
3178 struct splice_pipe_desc spd = { 3306 struct splice_pipe_desc spd = {
3179 .pages = pages, 3307 .pages = pages_def,
3180 .partial = partial, 3308 .partial = partial_def,
3181 .nr_pages = 0, /* This gets updated below. */ 3309 .nr_pages = 0, /* This gets updated below. */
3182 .flags = flags, 3310 .flags = flags,
3183 .ops = &tracing_pipe_buf_ops, 3311 .ops = &tracing_pipe_buf_ops,
@@ -3188,6 +3316,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3188 size_t rem; 3316 size_t rem;
3189 unsigned int i; 3317 unsigned int i;
3190 3318
3319 if (splice_grow_spd(pipe, &spd))
3320 return -ENOMEM;
3321
3191 /* copy the tracer to avoid using a global lock all around */ 3322 /* copy the tracer to avoid using a global lock all around */
3192 mutex_lock(&trace_types_lock); 3323 mutex_lock(&trace_types_lock);
3193 if (unlikely(old_tracer != current_trace && current_trace)) { 3324 if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3209,46 +3340,50 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3209 if (ret <= 0) 3340 if (ret <= 0)
3210 goto out_err; 3341 goto out_err;
3211 3342
3212 if (!iter->ent && !find_next_entry_inc(iter)) { 3343 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
3213 ret = -EFAULT; 3344 ret = -EFAULT;
3214 goto out_err; 3345 goto out_err;
3215 } 3346 }
3216 3347
3217 trace_event_read_lock(); 3348 trace_event_read_lock();
3349 trace_access_lock(iter->cpu_file);
3218 3350
3219 /* Fill as many pages as possible. */ 3351 /* Fill as many pages as possible. */
3220 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3352 for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3221 pages[i] = alloc_page(GFP_KERNEL); 3353 spd.pages[i] = alloc_page(GFP_KERNEL);
3222 if (!pages[i]) 3354 if (!spd.pages[i])
3223 break; 3355 break;
3224 3356
3225 rem = tracing_fill_pipe_page(rem, iter); 3357 rem = tracing_fill_pipe_page(rem, iter);
3226 3358
3227 /* Copy the data into the page, so we can start over. */ 3359 /* Copy the data into the page, so we can start over. */
3228 ret = trace_seq_to_buffer(&iter->seq, 3360 ret = trace_seq_to_buffer(&iter->seq,
3229 page_address(pages[i]), 3361 page_address(spd.pages[i]),
3230 iter->seq.len); 3362 iter->seq.len);
3231 if (ret < 0) { 3363 if (ret < 0) {
3232 __free_page(pages[i]); 3364 __free_page(spd.pages[i]);
3233 break; 3365 break;
3234 } 3366 }
3235 partial[i].offset = 0; 3367 spd.partial[i].offset = 0;
3236 partial[i].len = iter->seq.len; 3368 spd.partial[i].len = iter->seq.len;
3237 3369
3238 trace_seq_init(&iter->seq); 3370 trace_seq_init(&iter->seq);
3239 } 3371 }
3240 3372
3373 trace_access_unlock(iter->cpu_file);
3241 trace_event_read_unlock(); 3374 trace_event_read_unlock();
3242 mutex_unlock(&iter->mutex); 3375 mutex_unlock(&iter->mutex);
3243 3376
3244 spd.nr_pages = i; 3377 spd.nr_pages = i;
3245 3378
3246 return splice_to_pipe(pipe, &spd); 3379 ret = splice_to_pipe(pipe, &spd);
3380out:
3381 splice_shrink_spd(pipe, &spd);
3382 return ret;
3247 3383
3248out_err: 3384out_err:
3249 mutex_unlock(&iter->mutex); 3385 mutex_unlock(&iter->mutex);
3250 3386 goto out;
3251 return ret;
3252} 3387}
3253 3388
3254static ssize_t 3389static ssize_t
@@ -3332,7 +3467,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3332 } 3467 }
3333 3468
3334 tracing_start(); 3469 tracing_start();
3335 max_tr.entries = global_trace.entries;
3336 mutex_unlock(&trace_types_lock); 3470 mutex_unlock(&trace_types_lock);
3337 3471
3338 return cnt; 3472 return cnt;
@@ -3353,6 +3487,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3353 size_t cnt, loff_t *fpos) 3487 size_t cnt, loff_t *fpos)
3354{ 3488{
3355 char *buf; 3489 char *buf;
3490 size_t written;
3356 3491
3357 if (tracing_disabled) 3492 if (tracing_disabled)
3358 return -EINVAL; 3493 return -EINVAL;
@@ -3374,11 +3509,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
3374 } else 3509 } else
3375 buf[cnt] = '\0'; 3510 buf[cnt] = '\0';
3376 3511
3377 cnt = mark_printk("%s", buf); 3512 written = mark_printk("%s", buf);
3378 kfree(buf); 3513 kfree(buf);
3379 *fpos += cnt; 3514 *fpos += written;
3380 3515
3381 return cnt; 3516 /* don't tell userspace we wrote more - it might confuse them */
3517 if (written > cnt)
3518 written = cnt;
3519
3520 return written;
3382} 3521}
3383 3522
3384static int tracing_clock_show(struct seq_file *m, void *v) 3523static int tracing_clock_show(struct seq_file *m, void *v)
@@ -3445,18 +3584,21 @@ static const struct file_operations tracing_max_lat_fops = {
3445 .open = tracing_open_generic, 3584 .open = tracing_open_generic,
3446 .read = tracing_max_lat_read, 3585 .read = tracing_max_lat_read,
3447 .write = tracing_max_lat_write, 3586 .write = tracing_max_lat_write,
3587 .llseek = generic_file_llseek,
3448}; 3588};
3449 3589
3450static const struct file_operations tracing_ctrl_fops = { 3590static const struct file_operations tracing_ctrl_fops = {
3451 .open = tracing_open_generic, 3591 .open = tracing_open_generic,
3452 .read = tracing_ctrl_read, 3592 .read = tracing_ctrl_read,
3453 .write = tracing_ctrl_write, 3593 .write = tracing_ctrl_write,
3594 .llseek = generic_file_llseek,
3454}; 3595};
3455 3596
3456static const struct file_operations set_tracer_fops = { 3597static const struct file_operations set_tracer_fops = {
3457 .open = tracing_open_generic, 3598 .open = tracing_open_generic,
3458 .read = tracing_set_trace_read, 3599 .read = tracing_set_trace_read,
3459 .write = tracing_set_trace_write, 3600 .write = tracing_set_trace_write,
3601 .llseek = generic_file_llseek,
3460}; 3602};
3461 3603
3462static const struct file_operations tracing_pipe_fops = { 3604static const struct file_operations tracing_pipe_fops = {
@@ -3465,17 +3607,20 @@ static const struct file_operations tracing_pipe_fops = {
3465 .read = tracing_read_pipe, 3607 .read = tracing_read_pipe,
3466 .splice_read = tracing_splice_read_pipe, 3608 .splice_read = tracing_splice_read_pipe,
3467 .release = tracing_release_pipe, 3609 .release = tracing_release_pipe,
3610 .llseek = no_llseek,
3468}; 3611};
3469 3612
3470static const struct file_operations tracing_entries_fops = { 3613static const struct file_operations tracing_entries_fops = {
3471 .open = tracing_open_generic, 3614 .open = tracing_open_generic,
3472 .read = tracing_entries_read, 3615 .read = tracing_entries_read,
3473 .write = tracing_entries_write, 3616 .write = tracing_entries_write,
3617 .llseek = generic_file_llseek,
3474}; 3618};
3475 3619
3476static const struct file_operations tracing_mark_fops = { 3620static const struct file_operations tracing_mark_fops = {
3477 .open = tracing_open_generic, 3621 .open = tracing_open_generic,
3478 .write = tracing_mark_write, 3622 .write = tracing_mark_write,
3623 .llseek = generic_file_llseek,
3479}; 3624};
3480 3625
3481static const struct file_operations trace_clock_fops = { 3626static const struct file_operations trace_clock_fops = {
@@ -3521,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3521 size_t count, loff_t *ppos) 3666 size_t count, loff_t *ppos)
3522{ 3667{
3523 struct ftrace_buffer_info *info = filp->private_data; 3668 struct ftrace_buffer_info *info = filp->private_data;
3524 unsigned int pos;
3525 ssize_t ret; 3669 ssize_t ret;
3526 size_t size; 3670 size_t size;
3527 3671
@@ -3539,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3539 3683
3540 info->read = 0; 3684 info->read = 0;
3541 3685
3686 trace_access_lock(info->cpu);
3542 ret = ring_buffer_read_page(info->tr->buffer, 3687 ret = ring_buffer_read_page(info->tr->buffer,
3543 &info->spare, 3688 &info->spare,
3544 count, 3689 count,
3545 info->cpu, 0); 3690 info->cpu, 0);
3691 trace_access_unlock(info->cpu);
3546 if (ret < 0) 3692 if (ret < 0)
3547 return 0; 3693 return 0;
3548 3694
3549 pos = ring_buffer_page_len(info->spare);
3550
3551 if (pos < PAGE_SIZE)
3552 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3553
3554read: 3695read:
3555 size = PAGE_SIZE - info->read; 3696 size = PAGE_SIZE - info->read;
3556 if (size > count) 3697 if (size > count)
@@ -3645,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3645 unsigned int flags) 3786 unsigned int flags)
3646{ 3787{
3647 struct ftrace_buffer_info *info = file->private_data; 3788 struct ftrace_buffer_info *info = file->private_data;
3648 struct partial_page partial[PIPE_BUFFERS]; 3789 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3649 struct page *pages[PIPE_BUFFERS]; 3790 struct page *pages_def[PIPE_DEF_BUFFERS];
3650 struct splice_pipe_desc spd = { 3791 struct splice_pipe_desc spd = {
3651 .pages = pages, 3792 .pages = pages_def,
3652 .partial = partial, 3793 .partial = partial_def,
3653 .flags = flags, 3794 .flags = flags,
3654 .ops = &buffer_pipe_buf_ops, 3795 .ops = &buffer_pipe_buf_ops,
3655 .spd_release = buffer_spd_release, 3796 .spd_release = buffer_spd_release,
@@ -3658,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3658 int entries, size, i; 3799 int entries, size, i;
3659 size_t ret; 3800 size_t ret;
3660 3801
3802 if (splice_grow_spd(pipe, &spd))
3803 return -ENOMEM;
3804
3661 if (*ppos & (PAGE_SIZE - 1)) { 3805 if (*ppos & (PAGE_SIZE - 1)) {
3662 WARN_ONCE(1, "Ftrace: previous read must page-align\n"); 3806 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3663 return -EINVAL; 3807 ret = -EINVAL;
3808 goto out;
3664 } 3809 }
3665 3810
3666 if (len & (PAGE_SIZE - 1)) { 3811 if (len & (PAGE_SIZE - 1)) {
3667 WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); 3812 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3668 if (len < PAGE_SIZE) 3813 if (len < PAGE_SIZE) {
3669 return -EINVAL; 3814 ret = -EINVAL;
3815 goto out;
3816 }
3670 len &= PAGE_MASK; 3817 len &= PAGE_MASK;
3671 } 3818 }
3672 3819
3820 trace_access_lock(info->cpu);
3673 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3821 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3674 3822
3675 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3823 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
3676 struct page *page; 3824 struct page *page;
3677 int r; 3825 int r;
3678 3826
@@ -3717,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3717 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3865 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3718 } 3866 }
3719 3867
3868 trace_access_unlock(info->cpu);
3720 spd.nr_pages = i; 3869 spd.nr_pages = i;
3721 3870
3722 /* did we read anything? */ 3871 /* did we read anything? */
@@ -3726,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3726 else 3875 else
3727 ret = 0; 3876 ret = 0;
3728 /* TODO: block */ 3877 /* TODO: block */
3729 return ret; 3878 goto out;
3730 } 3879 }
3731 3880
3732 ret = splice_to_pipe(pipe, &spd); 3881 ret = splice_to_pipe(pipe, &spd);
3733 3882 splice_shrink_spd(pipe, &spd);
3883out:
3734 return ret; 3884 return ret;
3735} 3885}
3736 3886
@@ -3776,6 +3926,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
3776static const struct file_operations tracing_stats_fops = { 3926static const struct file_operations tracing_stats_fops = {
3777 .open = tracing_open_generic, 3927 .open = tracing_open_generic,
3778 .read = tracing_stats_read, 3928 .read = tracing_stats_read,
3929 .llseek = generic_file_llseek,
3779}; 3930};
3780 3931
3781#ifdef CONFIG_DYNAMIC_FTRACE 3932#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3812,6 +3963,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3812static const struct file_operations tracing_dyn_info_fops = { 3963static const struct file_operations tracing_dyn_info_fops = {
3813 .open = tracing_open_generic, 3964 .open = tracing_open_generic,
3814 .read = tracing_read_dyn_info, 3965 .read = tracing_read_dyn_info,
3966 .llseek = generic_file_llseek,
3815}; 3967};
3816#endif 3968#endif
3817 3969
@@ -3868,13 +4020,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3868{ 4020{
3869 struct dentry *d_percpu = tracing_dentry_percpu(); 4021 struct dentry *d_percpu = tracing_dentry_percpu();
3870 struct dentry *d_cpu; 4022 struct dentry *d_cpu;
3871 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 4023 char cpu_dir[30]; /* 30 characters should be more than enough */
3872 char cpu_dir[7];
3873 4024
3874 if (cpu > 999 || cpu < 0) 4025 snprintf(cpu_dir, 30, "cpu%ld", cpu);
3875 return;
3876
3877 sprintf(cpu_dir, "cpu%ld", cpu);
3878 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4026 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3879 if (!d_cpu) { 4027 if (!d_cpu) {
3880 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4028 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
@@ -3965,6 +4113,7 @@ static const struct file_operations trace_options_fops = {
3965 .open = tracing_open_generic, 4113 .open = tracing_open_generic,
3966 .read = trace_options_read, 4114 .read = trace_options_read,
3967 .write = trace_options_write, 4115 .write = trace_options_write,
4116 .llseek = generic_file_llseek,
3968}; 4117};
3969 4118
3970static ssize_t 4119static ssize_t
@@ -4016,6 +4165,7 @@ static const struct file_operations trace_options_core_fops = {
4016 .open = tracing_open_generic, 4165 .open = tracing_open_generic,
4017 .read = trace_options_core_read, 4166 .read = trace_options_core_read,
4018 .write = trace_options_core_write, 4167 .write = trace_options_core_write,
4168 .llseek = generic_file_llseek,
4019}; 4169};
4020 4170
4021struct dentry *trace_create_file(const char *name, 4171struct dentry *trace_create_file(const char *name,
@@ -4153,6 +4303,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4303 struct dentry *d_tracer;
4154 int cpu; 4304 int cpu;
4155 4305
4306 trace_access_lock_init();
4307
4156 d_tracer = tracing_init_dentry(); 4308 d_tracer = tracing_init_dentry();
4157 4309
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4310 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4328,10 @@ static __init int tracer_init_debugfs(void)
4176#ifdef CONFIG_TRACER_MAX_TRACE 4328#ifdef CONFIG_TRACER_MAX_TRACE
4177 trace_create_file("tracing_max_latency", 0644, d_tracer, 4329 trace_create_file("tracing_max_latency", 0644, d_tracer,
4178 &tracing_max_latency, &tracing_max_lat_fops); 4330 &tracing_max_latency, &tracing_max_lat_fops);
4331#endif
4179 4332
4180 trace_create_file("tracing_thresh", 0644, d_tracer, 4333 trace_create_file("tracing_thresh", 0644, d_tracer,
4181 &tracing_thresh, &tracing_max_lat_fops); 4334 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4183 4335
4184 trace_create_file("README", 0444, d_tracer, 4336 trace_create_file("README", 0444, d_tracer,
4185 NULL, &tracing_readme_fops); 4337 NULL, &tracing_readme_fops);
@@ -4203,9 +4355,6 @@ static __init int tracer_init_debugfs(void)
4203 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4355 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
4204 &ftrace_update_tot_cnt, &tracing_dyn_info_fops); 4356 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
4205#endif 4357#endif
4206#ifdef CONFIG_SYSPROF_TRACER
4207 init_tracer_sysprof_debugfs(d_tracer);
4208#endif
4209 4358
4210 create_trace_options_dir(); 4359 create_trace_options_dir();
4211 4360
@@ -4219,7 +4368,7 @@ static int trace_panic_handler(struct notifier_block *this,
4219 unsigned long event, void *unused) 4368 unsigned long event, void *unused)
4220{ 4369{
4221 if (ftrace_dump_on_oops) 4370 if (ftrace_dump_on_oops)
4222 ftrace_dump(); 4371 ftrace_dump(ftrace_dump_on_oops);
4223 return NOTIFY_OK; 4372 return NOTIFY_OK;
4224} 4373}
4225 4374
@@ -4236,7 +4385,7 @@ static int trace_die_handler(struct notifier_block *self,
4236 switch (val) { 4385 switch (val) {
4237 case DIE_OOPS: 4386 case DIE_OOPS:
4238 if (ftrace_dump_on_oops) 4387 if (ftrace_dump_on_oops)
4239 ftrace_dump(); 4388 ftrace_dump(ftrace_dump_on_oops);
4240 break; 4389 break;
4241 default: 4390 default:
4242 break; 4391 break;
@@ -4262,7 +4411,7 @@ static struct notifier_block trace_die_notifier = {
4262 */ 4411 */
4263#define KERN_TRACE KERN_EMERG 4412#define KERN_TRACE KERN_EMERG
4264 4413
4265static void 4414void
4266trace_printk_seq(struct trace_seq *s) 4415trace_printk_seq(struct trace_seq *s)
4267{ 4416{
4268 /* Probably should print a warning here. */ 4417 /* Probably should print a warning here. */
@@ -4277,7 +4426,15 @@ trace_printk_seq(struct trace_seq *s)
4277 trace_seq_init(s); 4426 trace_seq_init(s);
4278} 4427}
4279 4428
4280static void __ftrace_dump(bool disable_tracing) 4429void trace_init_global_iter(struct trace_iterator *iter)
4430{
4431 iter->tr = &global_trace;
4432 iter->trace = current_trace;
4433 iter->cpu_file = TRACE_PIPE_ALL_CPU;
4434}
4435
4436static void
4437__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4281{ 4438{
4282 static arch_spinlock_t ftrace_dump_lock = 4439 static arch_spinlock_t ftrace_dump_lock =
4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 4440 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -4301,8 +4458,10 @@ static void __ftrace_dump(bool disable_tracing)
4301 if (disable_tracing) 4458 if (disable_tracing)
4302 ftrace_kill(); 4459 ftrace_kill();
4303 4460
4461 trace_init_global_iter(&iter);
4462
4304 for_each_tracing_cpu(cpu) { 4463 for_each_tracing_cpu(cpu) {
4305 atomic_inc(&global_trace.data[cpu]->disabled); 4464 atomic_inc(&iter.tr->data[cpu]->disabled);
4306 } 4465 }
4307 4466
4308 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ; 4467 old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -4310,12 +4469,25 @@ static void __ftrace_dump(bool disable_tracing)
4310 /* don't look at user memory in panic mode */ 4469 /* don't look at user memory in panic mode */
4311 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 4470 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4312 4471
4313 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4314
4315 /* Simulate the iterator */ 4472 /* Simulate the iterator */
4316 iter.tr = &global_trace; 4473 iter.tr = &global_trace;
4317 iter.trace = current_trace; 4474 iter.trace = current_trace;
4318 iter.cpu_file = TRACE_PIPE_ALL_CPU; 4475
4476 switch (oops_dump_mode) {
4477 case DUMP_ALL:
4478 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4479 break;
4480 case DUMP_ORIG:
4481 iter.cpu_file = raw_smp_processor_id();
4482 break;
4483 case DUMP_NONE:
4484 goto out_enable;
4485 default:
4486 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
4487 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4488 }
4489
4490 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4319 4491
4320 /* 4492 /*
4321 * We need to stop all tracing on all CPUS to read the 4493 * We need to stop all tracing on all CPUS to read the
@@ -4338,7 +4510,7 @@ static void __ftrace_dump(bool disable_tracing)
4338 iter.iter_flags |= TRACE_FILE_LAT_FMT; 4510 iter.iter_flags |= TRACE_FILE_LAT_FMT;
4339 iter.pos = -1; 4511 iter.pos = -1;
4340 4512
4341 if (find_next_entry_inc(&iter) != NULL) { 4513 if (trace_find_next_entry_inc(&iter) != NULL) {
4342 int ret; 4514 int ret;
4343 4515
4344 ret = print_trace_line(&iter); 4516 ret = print_trace_line(&iter);
@@ -4354,12 +4526,13 @@ static void __ftrace_dump(bool disable_tracing)
4354 else 4526 else
4355 printk(KERN_TRACE "---------------------------------\n"); 4527 printk(KERN_TRACE "---------------------------------\n");
4356 4528
4529 out_enable:
4357 /* Re-enable tracing if requested */ 4530 /* Re-enable tracing if requested */
4358 if (!disable_tracing) { 4531 if (!disable_tracing) {
4359 trace_flags |= old_userobj; 4532 trace_flags |= old_userobj;
4360 4533
4361 for_each_tracing_cpu(cpu) { 4534 for_each_tracing_cpu(cpu) {
4362 atomic_dec(&global_trace.data[cpu]->disabled); 4535 atomic_dec(&iter.tr->data[cpu]->disabled);
4363 } 4536 }
4364 tracing_on(); 4537 tracing_on();
4365 } 4538 }
@@ -4370,9 +4543,9 @@ static void __ftrace_dump(bool disable_tracing)
4370} 4543}
4371 4544
4372/* By default: disable tracing after the dump */ 4545/* By default: disable tracing after the dump */
4373void ftrace_dump(void) 4546void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4374{ 4547{
4375 __ftrace_dump(true); 4548 __ftrace_dump(true, oops_dump_mode);
4376} 4549}
4377 4550
4378__init static int tracer_alloc_buffers(void) 4551__init static int tracer_alloc_buffers(void)
@@ -4387,9 +4560,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4560 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4561 goto out_free_buffer_mask;
4389 4562
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4563 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4564 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4565 ring_buf_size = trace_buf_size;
@@ -4411,16 +4581,14 @@ __init static int tracer_alloc_buffers(void)
4411 4581
4412 4582
4413#ifdef CONFIG_TRACER_MAX_TRACE 4583#ifdef CONFIG_TRACER_MAX_TRACE
4414 max_tr.buffer = ring_buffer_alloc(ring_buf_size, 4584 max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
4415 TRACE_BUFFER_FLAGS);
4416 if (!max_tr.buffer) { 4585 if (!max_tr.buffer) {
4417 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n"); 4586 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
4418 WARN_ON(1); 4587 WARN_ON(1);
4419 ring_buffer_free(global_trace.buffer); 4588 ring_buffer_free(global_trace.buffer);
4420 goto out_free_cpumask; 4589 goto out_free_cpumask;
4421 } 4590 }
4422 max_tr.entries = ring_buffer_size(max_tr.buffer); 4591 max_tr.entries = 1;
4423 WARN_ON(max_tr.entries != global_trace.entries);
4424#endif 4592#endif
4425 4593
4426 /* Allocate the first page for all buffers */ 4594 /* Allocate the first page for all buffers */
@@ -4433,9 +4601,6 @@ __init static int tracer_alloc_buffers(void)
4433 4601
4434 register_tracer(&nop_trace); 4602 register_tracer(&nop_trace);
4435 current_trace = &nop_trace; 4603 current_trace = &nop_trace;
4436#ifdef CONFIG_BOOT_TRACER
4437 register_tracer(&boot_tracer);
4438#endif
4439 /* All seems OK, enable tracing */ 4604 /* All seems OK, enable tracing */
4440 tracing_disabled = 0; 4605 tracing_disabled = 0;
4441 4606
@@ -4447,8 +4612,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4612 return 0;
4448 4613
4449out_free_cpumask: 4614out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4615 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4616out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4617 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..9021f8c0c0c3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h> 10#include <linux/tracepoint.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h>
13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h> 12#include <linux/hw_breakpoint.h>
15
16#include <linux/trace_seq.h> 13#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 14#include <linux/ftrace_event.h>
18 15
@@ -25,31 +22,17 @@ enum trace_type {
25 TRACE_STACK, 22 TRACE_STACK,
26 TRACE_PRINT, 23 TRACE_PRINT,
27 TRACE_BPRINT, 24 TRACE_BPRINT,
28 TRACE_SPECIAL,
29 TRACE_MMIO_RW, 25 TRACE_MMIO_RW,
30 TRACE_MMIO_MAP, 26 TRACE_MMIO_MAP,
31 TRACE_BRANCH, 27 TRACE_BRANCH,
32 TRACE_BOOT_CALL,
33 TRACE_BOOT_RET,
34 TRACE_GRAPH_RET, 28 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 29 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 30 TRACE_USER_STACK,
37 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE,
40 TRACE_BLK, 31 TRACE_BLK,
41 TRACE_KSYM,
42 32
43 __TRACE_LAST_TYPE, 33 __TRACE_LAST_TYPE,
44}; 34};
45 35
46enum kmemtrace_type_id {
47 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
48 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
49 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
50};
51
52extern struct tracer boot_tracer;
53 36
54#undef __field 37#undef __field
55#define __field(type, item) type item; 38#define __field(type, item) type item;
@@ -103,29 +86,17 @@ struct syscall_trace_exit {
103 long ret; 86 long ret;
104}; 87};
105 88
106struct kprobe_trace_entry { 89struct kprobe_trace_entry_head {
107 struct trace_entry ent; 90 struct trace_entry ent;
108 unsigned long ip; 91 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111}; 92};
112 93
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ 94struct kretprobe_trace_entry_head {
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent; 95 struct trace_entry ent;
119 unsigned long func; 96 unsigned long func;
120 unsigned long ret_ip; 97 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123}; 98};
124 99
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
129/* 100/*
130 * trace_flag_type is an enumeration that holds different 101 * trace_flag_type is an enumeration that holds different
131 * states when a trace occurs. These are: 102 * states when a trace occurs. These are:
@@ -217,24 +188,15 @@ extern void __ftrace_bad_type(void);
217 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ 188 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
218 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 189 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
219 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ 190 IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
220 IF_ASSIGN(var, ent, struct special_entry, 0); \
221 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 191 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
222 TRACE_MMIO_RW); \ 192 TRACE_MMIO_RW); \
223 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 193 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
224 TRACE_MMIO_MAP); \ 194 TRACE_MMIO_MAP); \
225 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
226 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
227 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 195 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
228 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ 196 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
229 TRACE_GRAPH_ENT); \ 197 TRACE_GRAPH_ENT); \
230 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 198 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
231 TRACE_GRAPH_RET); \ 199 TRACE_GRAPH_RET); \
232 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
233 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
234 TRACE_KMEM_ALLOC); \
235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
236 TRACE_KMEM_FREE); \
237 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
238 __ftrace_bad_type(); \ 200 __ftrace_bad_type(); \
239 } while (0) 201 } while (0)
240 202
@@ -312,6 +274,7 @@ struct tracer {
312 struct tracer *next; 274 struct tracer *next;
313 int print_max; 275 int print_max;
314 struct tracer_flags *flags; 276 struct tracer_flags *flags;
277 int use_max_tr;
315}; 278};
316 279
317 280
@@ -332,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
332 const struct file_operations *fops); 295 const struct file_operations *fops);
333 296
334struct dentry *tracing_init_dentry(void); 297struct dentry *tracing_init_dentry(void);
335void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
336 298
337struct ring_buffer_event; 299struct ring_buffer_event;
338 300
@@ -352,6 +314,14 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
352struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 314struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
353 int *ent_cpu, u64 *ent_ts); 315 int *ent_cpu, u64 *ent_ts);
354 316
317int trace_empty(struct trace_iterator *iter);
318
319void *trace_find_next_entry_inc(struct trace_iterator *iter);
320
321void trace_init_global_iter(struct trace_iterator *iter);
322
323void tracing_iter_reset(struct trace_iterator *iter, int cpu);
324
355void default_wait_pipe(struct trace_iterator *iter); 325void default_wait_pipe(struct trace_iterator *iter);
356void poll_wait_pipe(struct trace_iterator *iter); 326void poll_wait_pipe(struct trace_iterator *iter);
357 327
@@ -369,15 +339,17 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
369 struct task_struct *wakee, 339 struct task_struct *wakee,
370 struct task_struct *cur, 340 struct task_struct *cur,
371 unsigned long flags, int pc); 341 unsigned long flags, int pc);
372void trace_special(struct trace_array *tr,
373 struct trace_array_cpu *data,
374 unsigned long arg1,
375 unsigned long arg2,
376 unsigned long arg3, int pc);
377void trace_function(struct trace_array *tr, 342void trace_function(struct trace_array *tr,
378 unsigned long ip, 343 unsigned long ip,
379 unsigned long parent_ip, 344 unsigned long parent_ip,
380 unsigned long flags, int pc); 345 unsigned long flags, int pc);
346void trace_graph_function(struct trace_array *tr,
347 unsigned long ip,
348 unsigned long parent_ip,
349 unsigned long flags, int pc);
350void trace_default_header(struct seq_file *m);
351void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
352int trace_empty(struct trace_iterator *iter);
381 353
382void trace_graph_return(struct ftrace_graph_ret *trace); 354void trace_graph_return(struct ftrace_graph_ret *trace);
383int trace_graph_entry(struct ftrace_graph_ent *trace); 355int trace_graph_entry(struct ftrace_graph_ent *trace);
@@ -391,14 +363,22 @@ void tracing_start_sched_switch_record(void);
391int register_tracer(struct tracer *type); 363int register_tracer(struct tracer *type);
392void unregister_tracer(struct tracer *type); 364void unregister_tracer(struct tracer *type);
393int is_tracing_stopped(void); 365int is_tracing_stopped(void);
366enum trace_file_type {
367 TRACE_FILE_LAT_FMT = 1,
368 TRACE_FILE_ANNOTATE = 2,
369};
370
371extern cpumask_var_t __read_mostly tracing_buffer_mask;
394 372
395extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); 373#define for_each_tracing_cpu(cpu) \
374 for_each_cpu(cpu, tracing_buffer_mask)
396 375
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 376extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 377
378extern unsigned long tracing_thresh;
379
399#ifdef CONFIG_TRACER_MAX_TRACE 380#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 381extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 382
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 383void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 384void update_max_tr_single(struct trace_array *tr,
@@ -415,12 +395,12 @@ void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
415void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 395void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
416 int pc); 396 int pc);
417#else 397#else
418static inline void ftrace_trace_stack(struct trace_array *tr, 398static inline void ftrace_trace_stack(struct ring_buffer *buffer,
419 unsigned long flags, int skip, int pc) 399 unsigned long flags, int skip, int pc)
420{ 400{
421} 401}
422 402
423static inline void ftrace_trace_userstack(struct trace_array *tr, 403static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
424 unsigned long flags, int pc) 404 unsigned long flags, int pc)
425{ 405{
426} 406}
@@ -462,14 +442,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
462 struct trace_array *tr); 442 struct trace_array *tr);
463extern int trace_selftest_startup_sched_switch(struct tracer *trace, 443extern int trace_selftest_startup_sched_switch(struct tracer *trace,
464 struct trace_array *tr); 444 struct trace_array *tr);
465extern int trace_selftest_startup_sysprof(struct tracer *trace,
466 struct trace_array *tr);
467extern int trace_selftest_startup_branch(struct tracer *trace, 445extern int trace_selftest_startup_branch(struct tracer *trace,
468 struct trace_array *tr); 446 struct trace_array *tr);
469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr);
473#endif /* CONFIG_FTRACE_STARTUP_TEST */ 447#endif /* CONFIG_FTRACE_STARTUP_TEST */
474 448
475extern void *head_page(struct trace_array_cpu *data); 449extern void *head_page(struct trace_array_cpu *data);
@@ -483,6 +457,8 @@ trace_array_vprintk(struct trace_array *tr,
483 unsigned long ip, const char *fmt, va_list args); 457 unsigned long ip, const char *fmt, va_list args);
484int trace_array_printk(struct trace_array *tr, 458int trace_array_printk(struct trace_array *tr,
485 unsigned long ip, const char *fmt, ...); 459 unsigned long ip, const char *fmt, ...);
460void trace_printk_seq(struct trace_seq *s);
461enum print_line_t print_trace_line(struct trace_iterator *iter);
486 462
487extern unsigned long trace_flags; 463extern unsigned long trace_flags;
488 464
@@ -490,13 +466,34 @@ extern int trace_clock_id;
490 466
491/* Standard output formatting function used for function return traces */ 467/* Standard output formatting function used for function return traces */
492#ifdef CONFIG_FUNCTION_GRAPH_TRACER 468#ifdef CONFIG_FUNCTION_GRAPH_TRACER
493extern enum print_line_t print_graph_function(struct trace_iterator *iter); 469
470/* Flag options */
471#define TRACE_GRAPH_PRINT_OVERRUN 0x1
472#define TRACE_GRAPH_PRINT_CPU 0x2
473#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
474#define TRACE_GRAPH_PRINT_PROC 0x8
475#define TRACE_GRAPH_PRINT_DURATION 0x10
476#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
477
478extern enum print_line_t
479print_graph_function_flags(struct trace_iterator *iter, u32 flags);
480extern void print_graph_headers_flags(struct seq_file *s, u32 flags);
494extern enum print_line_t 481extern enum print_line_t
495trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); 482trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
483extern void graph_trace_open(struct trace_iterator *iter);
484extern void graph_trace_close(struct trace_iterator *iter);
485extern int __trace_graph_entry(struct trace_array *tr,
486 struct ftrace_graph_ent *trace,
487 unsigned long flags, int pc);
488extern void __trace_graph_return(struct trace_array *tr,
489 struct ftrace_graph_ret *trace,
490 unsigned long flags, int pc);
491
496 492
497#ifdef CONFIG_DYNAMIC_FTRACE 493#ifdef CONFIG_DYNAMIC_FTRACE
498/* TODO: make this variable */ 494/* TODO: make this variable */
499#define FTRACE_GRAPH_MAX_FUNCS 32 495#define FTRACE_GRAPH_MAX_FUNCS 32
496extern int ftrace_graph_filter_enabled;
500extern int ftrace_graph_count; 497extern int ftrace_graph_count;
501extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 498extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
502 499
@@ -504,7 +501,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
504{ 501{
505 int i; 502 int i;
506 503
507 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 504 if (!ftrace_graph_filter_enabled)
508 return 1; 505 return 1;
509 506
510 for (i = 0; i < ftrace_graph_count; i++) { 507 for (i = 0; i < ftrace_graph_count; i++) {
@@ -522,7 +519,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
522#endif /* CONFIG_DYNAMIC_FTRACE */ 519#endif /* CONFIG_DYNAMIC_FTRACE */
523#else /* CONFIG_FUNCTION_GRAPH_TRACER */ 520#else /* CONFIG_FUNCTION_GRAPH_TRACER */
524static inline enum print_line_t 521static inline enum print_line_t
525print_graph_function(struct trace_iterator *iter) 522print_graph_function_flags(struct trace_iterator *iter, u32 flags)
526{ 523{
527 return TRACE_TYPE_UNHANDLED; 524 return TRACE_TYPE_UNHANDLED;
528} 525}
@@ -549,7 +546,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
549 * struct trace_parser - servers for reading the user input separated by spaces 546 * struct trace_parser - servers for reading the user input separated by spaces
550 * @cont: set if the input is not complete - no final space char was found 547 * @cont: set if the input is not complete - no final space char was found
551 * @buffer: holds the parsed user input 548 * @buffer: holds the parsed user input
552 * @idx: user input lenght 549 * @idx: user input length
553 * @size: buffer size 550 * @size: buffer size
554 */ 551 */
555struct trace_parser { 552struct trace_parser {
@@ -608,6 +605,7 @@ enum trace_iterator_flags {
608 TRACE_ITER_LATENCY_FMT = 0x20000, 605 TRACE_ITER_LATENCY_FMT = 0x20000,
609 TRACE_ITER_SLEEP_TIME = 0x40000, 606 TRACE_ITER_SLEEP_TIME = 0x40000,
610 TRACE_ITER_GRAPH_TIME = 0x80000, 607 TRACE_ITER_GRAPH_TIME = 0x80000,
608 TRACE_ITER_RECORD_CMD = 0x100000,
611}; 609};
612 610
613/* 611/*
@@ -619,54 +617,6 @@ enum trace_iterator_flags {
619 617
620extern struct tracer nop_trace; 618extern struct tracer nop_trace;
621 619
622/**
623 * ftrace_preempt_disable - disable preemption scheduler safe
624 *
625 * When tracing can happen inside the scheduler, there exists
626 * cases that the tracing might happen before the need_resched
627 * flag is checked. If this happens and the tracer calls
628 * preempt_enable (after a disable), a schedule might take place
629 * causing an infinite recursion.
630 *
631 * To prevent this, we read the need_resched flag before
632 * disabling preemption. When we want to enable preemption we
633 * check the flag, if it is set, then we call preempt_enable_no_resched.
634 * Otherwise, we call preempt_enable.
635 *
636 * The rational for doing the above is that if need_resched is set
637 * and we have yet to reschedule, we are either in an atomic location
638 * (where we do not need to check for scheduling) or we are inside
639 * the scheduler and do not want to resched.
640 */
641static inline int ftrace_preempt_disable(void)
642{
643 int resched;
644
645 resched = need_resched();
646 preempt_disable_notrace();
647
648 return resched;
649}
650
651/**
652 * ftrace_preempt_enable - enable preemption scheduler safe
653 * @resched: the return value from ftrace_preempt_disable
654 *
655 * This is a scheduler safe way to enable preemption and not miss
656 * any preemption checks. The disabled saved the state of preemption.
657 * If resched is set, then we are either inside an atomic or
658 * are inside the scheduler (we would have already scheduled
659 * otherwise). In this case, we do not want to call normal
660 * preempt_enable, but preempt_enable_no_resched instead.
661 */
662static inline void ftrace_preempt_enable(int resched)
663{
664 if (resched)
665 preempt_enable_no_resched_notrace();
666 else
667 preempt_enable_notrace();
668}
669
670#ifdef CONFIG_BRANCH_TRACER 620#ifdef CONFIG_BRANCH_TRACER
671extern int enable_branch_tracing(struct trace_array *tr); 621extern int enable_branch_tracing(struct trace_array *tr);
672extern void disable_branch_tracing(void); 622extern void disable_branch_tracing(void);
@@ -757,6 +707,8 @@ struct filter_pred {
757 int pop_n; 707 int pop_n;
758}; 708};
759 709
710extern struct list_head ftrace_common_fields;
711
760extern enum regex_type 712extern enum regex_type
761filter_parse_regex(char *buff, int len, char **search, int *not); 713filter_parse_regex(char *buff, int len, char **search, int *not);
762extern void print_event_filter(struct ftrace_event_call *call, 714extern void print_event_filter(struct ftrace_event_call *call,
@@ -769,12 +721,15 @@ extern void print_subsystem_event_filter(struct event_subsystem *system,
769 struct trace_seq *s); 721 struct trace_seq *s);
770extern int filter_assign_type(const char *type); 722extern int filter_assign_type(const char *type);
771 723
724struct list_head *
725trace_get_fields(struct ftrace_event_call *event_call);
726
772static inline int 727static inline int
773filter_check_discard(struct ftrace_event_call *call, void *rec, 728filter_check_discard(struct ftrace_event_call *call, void *rec,
774 struct ring_buffer *buffer, 729 struct ring_buffer *buffer,
775 struct ring_buffer_event *event) 730 struct ring_buffer_event *event)
776{ 731{
777 if (unlikely(call->filter_active) && 732 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
778 !filter_match_preds(call->filter, rec)) { 733 !filter_match_preds(call->filter, rec)) {
779 ring_buffer_discard_commit(buffer, event); 734 ring_buffer_discard_commit(buffer, event);
780 return 1; 735 return 1;
@@ -783,6 +738,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
783 return 0; 738 return 0;
784} 739}
785 740
741extern void trace_event_enable_cmd_record(bool enable);
742
786extern struct mutex event_mutex; 743extern struct mutex event_mutex;
787extern struct list_head ftrace_events; 744extern struct list_head ftrace_events;
788 745
@@ -791,7 +748,8 @@ extern const char *__stop___trace_bprintk_fmt[];
791 748
792#undef FTRACE_ENTRY 749#undef FTRACE_ENTRY
793#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 750#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
794 extern struct ftrace_event_call event_##call; 751 extern struct ftrace_event_call \
752 __attribute__((__aligned__(4))) event_##call;
795#undef FTRACE_ENTRY_DUP 753#undef FTRACE_ENTRY_DUP
796#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 754#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
797 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 755 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956ad..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12#include <linux/time.h>
13
14#include "trace.h"
15#include "trace_output.h"
16
17static struct trace_array *boot_trace;
18static bool pre_initcalls_finished;
19
20/* Tells the boot tracer that the pre_smp_initcalls are finished.
21 * So we are ready .
22 * It doesn't enable sched events tracing however.
23 * You have to call enable_boot_trace to do so.
24 */
25void start_boot_trace(void)
26{
27 pre_initcalls_finished = true;
28}
29
30void enable_boot_trace(void)
31{
32 if (boot_trace && pre_initcalls_finished)
33 tracing_start_sched_switch_record();
34}
35
36void disable_boot_trace(void)
37{
38 if (boot_trace && pre_initcalls_finished)
39 tracing_stop_sched_switch_record();
40}
41
42static int boot_trace_init(struct trace_array *tr)
43{
44 boot_trace = tr;
45
46 if (!tr)
47 return 0;
48
49 tracing_reset_online_cpus(tr);
50
51 tracing_sched_switch_assign_trace(tr);
52 return 0;
53}
54
55static enum print_line_t
56initcall_call_print_line(struct trace_iterator *iter)
57{
58 struct trace_entry *entry = iter->ent;
59 struct trace_seq *s = &iter->seq;
60 struct trace_boot_call *field;
61 struct boot_trace_call *call;
62 u64 ts;
63 unsigned long nsec_rem;
64 int ret;
65
66 trace_assign_type(field, entry);
67 call = &field->boot_call;
68 ts = iter->ts;
69 nsec_rem = do_div(ts, NSEC_PER_SEC);
70
71 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
72 (unsigned long)ts, nsec_rem, call->func, call->caller);
73
74 if (!ret)
75 return TRACE_TYPE_PARTIAL_LINE;
76 else
77 return TRACE_TYPE_HANDLED;
78}
79
80static enum print_line_t
81initcall_ret_print_line(struct trace_iterator *iter)
82{
83 struct trace_entry *entry = iter->ent;
84 struct trace_seq *s = &iter->seq;
85 struct trace_boot_ret *field;
86 struct boot_trace_ret *init_ret;
87 u64 ts;
88 unsigned long nsec_rem;
89 int ret;
90
91 trace_assign_type(field, entry);
92 init_ret = &field->boot_ret;
93 ts = iter->ts;
94 nsec_rem = do_div(ts, NSEC_PER_SEC);
95
96 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
97 "returned %d after %llu msecs\n",
98 (unsigned long) ts,
99 nsec_rem,
100 init_ret->func, init_ret->result, init_ret->duration);
101
102 if (!ret)
103 return TRACE_TYPE_PARTIAL_LINE;
104 else
105 return TRACE_TYPE_HANDLED;
106}
107
108static enum print_line_t initcall_print_line(struct trace_iterator *iter)
109{
110 struct trace_entry *entry = iter->ent;
111
112 switch (entry->type) {
113 case TRACE_BOOT_CALL:
114 return initcall_call_print_line(iter);
115 case TRACE_BOOT_RET:
116 return initcall_ret_print_line(iter);
117 default:
118 return TRACE_TYPE_UNHANDLED;
119 }
120}
121
122struct tracer boot_tracer __read_mostly =
123{
124 .name = "initcall",
125 .init = boot_trace_init,
126 .reset = tracing_reset_online_cpus,
127 .print_line = initcall_print_line,
128};
129
130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
131{
132 struct ftrace_event_call *call = &event_boot_call;
133 struct ring_buffer_event *event;
134 struct ring_buffer *buffer;
135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace;
137
138 if (!tr || !pre_initcalls_finished)
139 return;
140
141 /* Get its name now since this function could
142 * disappear because it is in the .init section.
143 */
144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable();
146
147 buffer = tr->buffer;
148 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
149 sizeof(*entry), 0, 0);
150 if (!event)
151 goto out;
152 entry = ring_buffer_event_data(event);
153 entry->boot_call = *bt;
154 if (!filter_check_discard(call, entry, buffer, event))
155 trace_buffer_unlock_commit(buffer, event, 0, 0);
156 out:
157 preempt_enable();
158}
159
160void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
161{
162 struct ftrace_event_call *call = &event_boot_ret;
163 struct ring_buffer_event *event;
164 struct ring_buffer *buffer;
165 struct trace_boot_ret *entry;
166 struct trace_array *tr = boot_trace;
167
168 if (!tr || !pre_initcalls_finished)
169 return;
170
171 sprint_symbol(bt->func, (unsigned long)fn);
172 preempt_disable();
173
174 buffer = tr->buffer;
175 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
176 sizeof(*entry), 0, 0);
177 if (!event)
178 goto out;
179 entry = ring_buffer_event_data(event);
180 entry->boot_ret = *bt;
181 if (!filter_check_discard(call, entry, buffer, event))
182 trace_buffer_unlock_commit(buffer, event, 0, 0);
183 out:
184 preempt_enable();
185}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..8d3538b4ea5f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -143,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr)
143} 143}
144 144
145static enum print_line_t trace_branch_print(struct trace_iterator *iter, 145static enum print_line_t trace_branch_print(struct trace_iterator *iter,
146 int flags) 146 int flags, struct trace_event *event)
147{ 147{
148 struct trace_branch *field; 148 struct trace_branch *field;
149 149
@@ -167,9 +167,13 @@ static void branch_print_header(struct seq_file *s)
167 " |\n"); 167 " |\n");
168} 168}
169 169
170static struct trace_event_functions trace_branch_funcs = {
171 .trace = trace_branch_print,
172};
173
170static struct trace_event trace_branch_event = { 174static struct trace_event trace_branch_event = {
171 .type = TRACE_BRANCH, 175 .type = TRACE_BRANCH,
172 .trace = trace_branch_print, 176 .funcs = &trace_branch_funcs,
173}; 177};
174 178
175static struct tracer branch_trace __read_mostly = 179static struct tracer branch_trace __read_mostly =
@@ -307,8 +311,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 311 return -1;
308 if (percent_a > percent_b) 312 if (percent_a > percent_b)
309 return 1; 313 return 1;
310 else 314
311 return 0; 315 if (a->incorrect < b->incorrect)
316 return -1;
317 if (a->incorrect > b->incorrect)
318 return 1;
319
320 /*
321 * Since the above shows worse (incorrect) cases
322 * first, we continue that by showing best (correct)
323 * cases last.
324 */
325 if (a->correct > b->correct)
326 return -1;
327 if (a->correct < b->correct)
328 return 1;
329
330 return 0;
312} 331}
313 332
314static struct tracer_stat annotated_branch_stats = { 333static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..685a67d55db0 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -31,16 +32,15 @@
31u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
32{ 33{
33 u64 clock; 34 u64 clock;
34 int resched;
35 35
36 /* 36 /*
37 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
38 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
39 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
40 */ 40 */
41 resched = ftrace_preempt_disable(); 41 preempt_disable_notrace();
42 clock = sched_clock(); 42 clock = sched_clock();
43 ftrace_preempt_enable(resched); 43 preempt_enable_notrace();
44 44
45 return clock; 45 return clock;
46} 46}
@@ -55,7 +55,7 @@ u64 notrace trace_clock_local(void)
55 */ 55 */
56u64 notrace trace_clock(void) 56u64 notrace trace_clock(void)
57{ 57{
58 return cpu_clock(raw_smp_processor_id()); 58 return local_clock();
59} 59}
60 60
61 61
@@ -83,7 +83,7 @@ u64 notrace trace_clock_global(void)
83 int this_cpu; 83 int this_cpu;
84 u64 now; 84 u64 now;
85 85
86 raw_local_irq_save(flags); 86 local_irq_save(flags);
87 87
88 this_cpu = raw_smp_processor_id(); 88 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 89 now = cpu_clock(this_cpu);
@@ -109,7 +109,7 @@ u64 notrace trace_clock_global(void)
109 arch_spin_unlock(&trace_clock_struct.lock); 109 arch_spin_unlock(&trace_clock_struct.lock);
110 110
111 out: 111 out:
112 raw_local_irq_restore(flags); 112 local_irq_restore(flags);
113 113
114 return now; 114 return now;
115} 115}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c16a08f399df..6cf223764be8 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -53,7 +53,7 @@
53 */ 53 */
54 54
55/* 55/*
56 * Function trace entry - function address and parent function addres: 56 * Function trace entry - function address and parent function address:
57 */ 57 */
58FTRACE_ENTRY(function, ftrace_entry, 58FTRACE_ENTRY(function, ftrace_entry,
59 59
@@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
151); 151);
152 152
153/* 153/*
154 * Special (free-form) trace entry:
155 */
156FTRACE_ENTRY(special, special_entry,
157
158 TRACE_SPECIAL,
159
160 F_STRUCT(
161 __field( unsigned long, arg1 )
162 __field( unsigned long, arg2 )
163 __field( unsigned long, arg3 )
164 ),
165
166 F_printk("(%08lx) (%08lx) (%08lx)",
167 __entry->arg1, __entry->arg2, __entry->arg3)
168);
169
170/*
171 * Stack-trace entry: 154 * Stack-trace entry:
172 */ 155 */
173 156
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
271 __entry->map_id, __entry->opcode) 254 __entry->map_id, __entry->opcode)
272); 255);
273 256
274FTRACE_ENTRY(boot_call, trace_boot_call,
275
276 TRACE_BOOT_CALL,
277
278 F_STRUCT(
279 __field_struct( struct boot_trace_call, boot_call )
280 __field_desc( pid_t, boot_call, caller )
281 __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
282 ),
283
284 F_printk("%d %s", __entry->caller, __entry->func)
285);
286
287FTRACE_ENTRY(boot_ret, trace_boot_ret,
288
289 TRACE_BOOT_RET,
290
291 F_STRUCT(
292 __field_struct( struct boot_trace_ret, boot_ret )
293 __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
294 __field_desc( int, boot_ret, result )
295 __field_desc( unsigned long, boot_ret, duration )
296 ),
297
298 F_printk("%s %d %lx",
299 __entry->func, __entry->result, __entry->duration)
300);
301 257
302#define TRACE_FUNC_SIZE 30 258#define TRACE_FUNC_SIZE 30
303#define TRACE_FILE_SIZE 20 259#define TRACE_FILE_SIZE 20
@@ -318,65 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 274 __entry->func, __entry->file, __entry->correct)
319); 275);
320 276
321FTRACE_ENTRY(hw_branch, hw_branch_entry,
322
323 TRACE_HW_BRANCHES,
324
325 F_STRUCT(
326 __field( u64, from )
327 __field( u64, to )
328 ),
329
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331);
332
333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
334
335 TRACE_KMEM_ALLOC,
336
337 F_STRUCT(
338 __field( enum kmemtrace_type_id, type_id )
339 __field( unsigned long, call_site )
340 __field( const void *, ptr )
341 __field( size_t, bytes_req )
342 __field( size_t, bytes_alloc )
343 __field( gfp_t, gfp_flags )
344 __field( int, node )
345 ),
346
347 F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
348 " flags:%x node:%d",
349 __entry->type_id, __entry->call_site, __entry->ptr,
350 __entry->bytes_req, __entry->bytes_alloc,
351 __entry->gfp_flags, __entry->node)
352);
353
354FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
355
356 TRACE_KMEM_FREE,
357
358 F_STRUCT(
359 __field( enum kmemtrace_type_id, type_id )
360 __field( unsigned long, call_site )
361 __field( const void *, ptr )
362 ),
363
364 F_printk("type:%u call_site:%lx ptr:%p",
365 __entry->type_id, __entry->call_site, __entry->ptr)
366);
367
368FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
369
370 TRACE_KSYM,
371
372 F_STRUCT(
373 __field( unsigned long, ip )
374 __field( unsigned char, type )
375 __array( char , cmd, TASK_COMM_LEN )
376 __field( unsigned long, addr )
377 ),
378
379 F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
380 (void *)__entry->ip, (unsigned int)__entry->type,
381 (void *)__entry->addr, __entry->cmd)
382);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..19a359d5e6d5
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,216 @@
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13
14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses
16 * suprises
17 */
18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 perf_trace_t;
20
21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count;
23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
48 struct perf_event *p_event)
49{
50 struct hlist_head __percpu *list;
51 int ret;
52 int cpu;
53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
58 p_event->tp_event = tp_event;
59 if (tp_event->perf_refcount++ > 0)
60 return 0;
61
62 ret = -ENOMEM;
63
64 list = alloc_percpu(struct hlist_head);
65 if (!list)
66 goto fail;
67
68 for_each_possible_cpu(cpu)
69 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
70
71 tp_event->perf_events = list;
72
73 if (!total_ref_count) {
74 char __percpu *buf;
75 int i;
76
77 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
78 buf = (char __percpu *)alloc_percpu(perf_trace_t);
79 if (!buf)
80 goto fail;
81
82 perf_trace_buf[i] = buf;
83 }
84 }
85
86 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
87 if (ret)
88 goto fail;
89
90 total_ref_count++;
91 return 0;
92
93fail:
94 if (!total_ref_count) {
95 int i;
96
97 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
98 free_percpu(perf_trace_buf[i]);
99 perf_trace_buf[i] = NULL;
100 }
101 }
102
103 if (!--tp_event->perf_refcount) {
104 free_percpu(tp_event->perf_events);
105 tp_event->perf_events = NULL;
106 }
107
108 return ret;
109}
110
111int perf_trace_init(struct perf_event *p_event)
112{
113 struct ftrace_event_call *tp_event;
114 int event_id = p_event->attr.config;
115 int ret = -EINVAL;
116
117 mutex_lock(&event_mutex);
118 list_for_each_entry(tp_event, &ftrace_events, list) {
119 if (tp_event->event.type == event_id &&
120 tp_event->class && tp_event->class->reg &&
121 try_module_get(tp_event->mod)) {
122 ret = perf_trace_event_init(tp_event, p_event);
123 if (ret)
124 module_put(tp_event->mod);
125 break;
126 }
127 }
128 mutex_unlock(&event_mutex);
129
130 return ret;
131}
132
133int perf_trace_add(struct perf_event *p_event, int flags)
134{
135 struct ftrace_event_call *tp_event = p_event->tp_event;
136 struct hlist_head __percpu *pcpu_list;
137 struct hlist_head *list;
138
139 pcpu_list = tp_event->perf_events;
140 if (WARN_ON_ONCE(!pcpu_list))
141 return -EINVAL;
142
143 if (!(flags & PERF_EF_START))
144 p_event->hw.state = PERF_HES_STOPPED;
145
146 list = this_cpu_ptr(pcpu_list);
147 hlist_add_head_rcu(&p_event->hlist_entry, list);
148
149 return 0;
150}
151
152void perf_trace_del(struct perf_event *p_event, int flags)
153{
154 hlist_del_rcu(&p_event->hlist_entry);
155}
156
157void perf_trace_destroy(struct perf_event *p_event)
158{
159 struct ftrace_event_call *tp_event = p_event->tp_event;
160 int i;
161
162 mutex_lock(&event_mutex);
163 if (--tp_event->perf_refcount > 0)
164 goto out;
165
166 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
167
168 /*
169 * Ensure our callback won't be called anymore. The buffers
170 * will be freed after that.
171 */
172 tracepoint_synchronize_unregister();
173
174 free_percpu(tp_event->perf_events);
175 tp_event->perf_events = NULL;
176
177 if (!--total_ref_count) {
178 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
179 free_percpu(perf_trace_buf[i]);
180 perf_trace_buf[i] = NULL;
181 }
182 }
183out:
184 module_put(tp_event->mod);
185 mutex_unlock(&event_mutex);
186}
187
188__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
189 struct pt_regs *regs, int *rctxp)
190{
191 struct trace_entry *entry;
192 unsigned long flags;
193 char *raw_data;
194 int pc;
195
196 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
197
198 pc = preempt_count();
199
200 *rctxp = perf_swevent_get_recursion_context();
201 if (*rctxp < 0)
202 return NULL;
203
204 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
205
206 /* zero the dead bytes from align to not leak stack to user */
207 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
208
209 entry = (struct trace_entry *)raw_data;
210 local_save_flags(flags);
211 tracing_generic_entry_update(entry, flags, pc);
212 entry->type = type;
213
214 return raw_data;
215}
216EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index 9e25573242cf..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include "trace.h"
10
11
12char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19
20/* Count the events in use (per event id, not per instance) */
21static int total_profile_count;
22
23static int ftrace_profile_enable_event(struct ftrace_event_call *event)
24{
25 char *buf;
26 int ret = -ENOMEM;
27
28 if (event->profile_count++ > 0)
29 return 0;
30
31 if (!total_profile_count) {
32 buf = (char *)alloc_percpu(perf_trace_t);
33 if (!buf)
34 goto fail_buf;
35
36 rcu_assign_pointer(perf_trace_buf, buf);
37
38 buf = (char *)alloc_percpu(perf_trace_t);
39 if (!buf)
40 goto fail_buf_nmi;
41
42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
43 }
44
45 ret = event->profile_enable(event);
46 if (!ret) {
47 total_profile_count++;
48 return 0;
49 }
50
51fail_buf_nmi:
52 if (!total_profile_count) {
53 free_percpu(perf_trace_buf_nmi);
54 free_percpu(perf_trace_buf);
55 perf_trace_buf_nmi = NULL;
56 perf_trace_buf = NULL;
57 }
58fail_buf:
59 event->profile_count--;
60
61 return ret;
62}
63
64int ftrace_profile_enable(int event_id)
65{
66 struct ftrace_event_call *event;
67 int ret = -EINVAL;
68
69 mutex_lock(&event_mutex);
70 list_for_each_entry(event, &ftrace_events, list) {
71 if (event->id == event_id && event->profile_enable &&
72 try_module_get(event->mod)) {
73 ret = ftrace_profile_enable_event(event);
74 break;
75 }
76 }
77 mutex_unlock(&event_mutex);
78
79 return ret;
80}
81
82static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{
84 char *buf, *nmi_buf;
85
86 if (--event->profile_count > 0)
87 return;
88
89 event->profile_disable(event);
90
91 if (!--total_profile_count) {
92 buf = perf_trace_buf;
93 rcu_assign_pointer(perf_trace_buf, NULL);
94
95 nmi_buf = perf_trace_buf_nmi;
96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
97
98 /*
99 * Ensure every events in profiling have finished before
100 * releasing the buffers
101 */
102 synchronize_sched();
103
104 free_percpu(buf);
105 free_percpu(nmi_buf);
106 }
107}
108
109void ftrace_profile_disable(int event_id)
110{
111 struct ftrace_event_call *event;
112
113 mutex_lock(&event_mutex);
114 list_for_each_entry(event, &ftrace_events, list) {
115 if (event->id == event_id) {
116 ftrace_profile_disable_event(event);
117 module_put(event->mod);
118 break;
119 }
120 }
121 mutex_unlock(&event_mutex);
122}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..35fde09b81de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -26,11 +27,26 @@
26 27
27DEFINE_MUTEX(event_mutex); 28DEFINE_MUTEX(event_mutex);
28 29
30DEFINE_MUTEX(event_storage_mutex);
31EXPORT_SYMBOL_GPL(event_storage_mutex);
32
33char event_storage[EVENT_STORAGE_SIZE];
34EXPORT_SYMBOL_GPL(event_storage);
35
29LIST_HEAD(ftrace_events); 36LIST_HEAD(ftrace_events);
37LIST_HEAD(ftrace_common_fields);
30 38
31int trace_define_field(struct ftrace_event_call *call, const char *type, 39struct list_head *
32 const char *name, int offset, int size, int is_signed, 40trace_get_fields(struct ftrace_event_call *event_call)
33 int filter_type) 41{
42 if (!event_call->class->get_fields)
43 return &event_call->class->fields;
44 return event_call->class->get_fields(event_call);
45}
46
47static int __trace_define_field(struct list_head *head, const char *type,
48 const char *name, int offset, int size,
49 int is_signed, int filter_type)
34{ 50{
35 struct ftrace_event_field *field; 51 struct ftrace_event_field *field;
36 52
@@ -55,30 +71,43 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
55 field->size = size; 71 field->size = size;
56 field->is_signed = is_signed; 72 field->is_signed = is_signed;
57 73
58 list_add(&field->link, &call->fields); 74 list_add(&field->link, head);
59 75
60 return 0; 76 return 0;
61 77
62err: 78err:
63 if (field) { 79 if (field)
64 kfree(field->name); 80 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 81 kfree(field);
68 82
69 return -ENOMEM; 83 return -ENOMEM;
70} 84}
85
86int trace_define_field(struct ftrace_event_call *call, const char *type,
87 const char *name, int offset, int size, int is_signed,
88 int filter_type)
89{
90 struct list_head *head;
91
92 if (WARN_ON(!call->class))
93 return 0;
94
95 head = trace_get_fields(call);
96 return __trace_define_field(head, type, name, offset, size,
97 is_signed, filter_type);
98}
71EXPORT_SYMBOL_GPL(trace_define_field); 99EXPORT_SYMBOL_GPL(trace_define_field);
72 100
73#define __common_field(type, item) \ 101#define __common_field(type, item) \
74 ret = trace_define_field(call, #type, "common_" #item, \ 102 ret = __trace_define_field(&ftrace_common_fields, #type, \
75 offsetof(typeof(ent), item), \ 103 "common_" #item, \
76 sizeof(ent.item), \ 104 offsetof(typeof(ent), item), \
77 is_signed_type(type), FILTER_OTHER); \ 105 sizeof(ent.item), \
106 is_signed_type(type), FILTER_OTHER); \
78 if (ret) \ 107 if (ret) \
79 return ret; 108 return ret;
80 109
81static int trace_define_common_fields(struct ftrace_event_call *call) 110static int trace_define_common_fields(void)
82{ 111{
83 int ret; 112 int ret;
84 struct trace_entry ent; 113 struct trace_entry ent;
@@ -95,8 +124,10 @@ static int trace_define_common_fields(struct ftrace_event_call *call)
95void trace_destroy_fields(struct ftrace_event_call *call) 124void trace_destroy_fields(struct ftrace_event_call *call)
96{ 125{
97 struct ftrace_event_field *field, *next; 126 struct ftrace_event_field *field, *next;
127 struct list_head *head;
98 128
99 list_for_each_entry_safe(field, next, &call->fields, link) { 129 head = trace_get_fields(call);
130 list_for_each_entry_safe(field, next, head, link) {
100 list_del(&field->link); 131 list_del(&field->link);
101 kfree(field->type); 132 kfree(field->type);
102 kfree(field->name); 133 kfree(field->name);
@@ -108,16 +139,63 @@ int trace_event_raw_init(struct ftrace_event_call *call)
108{ 139{
109 int id; 140 int id;
110 141
111 id = register_ftrace_event(call->event); 142 id = register_ftrace_event(&call->event);
112 if (!id) 143 if (!id)
113 return -ENODEV; 144 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116 145
117 return 0; 146 return 0;
118} 147}
119EXPORT_SYMBOL_GPL(trace_event_raw_init); 148EXPORT_SYMBOL_GPL(trace_event_raw_init);
120 149
150int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
151{
152 switch (type) {
153 case TRACE_REG_REGISTER:
154 return tracepoint_probe_register(call->name,
155 call->class->probe,
156 call);
157 case TRACE_REG_UNREGISTER:
158 tracepoint_probe_unregister(call->name,
159 call->class->probe,
160 call);
161 return 0;
162
163#ifdef CONFIG_PERF_EVENTS
164 case TRACE_REG_PERF_REGISTER:
165 return tracepoint_probe_register(call->name,
166 call->class->perf_probe,
167 call);
168 case TRACE_REG_PERF_UNREGISTER:
169 tracepoint_probe_unregister(call->name,
170 call->class->perf_probe,
171 call);
172 return 0;
173#endif
174 }
175 return 0;
176}
177EXPORT_SYMBOL_GPL(ftrace_event_reg);
178
179void trace_event_enable_cmd_record(bool enable)
180{
181 struct ftrace_event_call *call;
182
183 mutex_lock(&event_mutex);
184 list_for_each_entry(call, &ftrace_events, list) {
185 if (!(call->flags & TRACE_EVENT_FL_ENABLED))
186 continue;
187
188 if (enable) {
189 tracing_start_cmdline_record();
190 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
191 } else {
192 tracing_stop_cmdline_record();
193 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
194 }
195 }
196 mutex_unlock(&event_mutex);
197}
198
121static int ftrace_event_enable_disable(struct ftrace_event_call *call, 199static int ftrace_event_enable_disable(struct ftrace_event_call *call,
122 int enable) 200 int enable)
123{ 201{
@@ -125,23 +203,29 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
125 203
126 switch (enable) { 204 switch (enable) {
127 case 0: 205 case 0:
128 if (call->enabled) { 206 if (call->flags & TRACE_EVENT_FL_ENABLED) {
129 call->enabled = 0; 207 call->flags &= ~TRACE_EVENT_FL_ENABLED;
130 tracing_stop_cmdline_record(); 208 if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
131 call->unregfunc(call); 209 tracing_stop_cmdline_record();
210 call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
211 }
212 call->class->reg(call, TRACE_REG_UNREGISTER);
132 } 213 }
133 break; 214 break;
134 case 1: 215 case 1:
135 if (!call->enabled) { 216 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
136 tracing_start_cmdline_record(); 217 if (trace_flags & TRACE_ITER_RECORD_CMD) {
137 ret = call->regfunc(call); 218 tracing_start_cmdline_record();
219 call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
220 }
221 ret = call->class->reg(call, TRACE_REG_REGISTER);
138 if (ret) { 222 if (ret) {
139 tracing_stop_cmdline_record(); 223 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event " 224 pr_info("event trace: Could not enable event "
141 "%s\n", call->name); 225 "%s\n", call->name);
142 break; 226 break;
143 } 227 }
144 call->enabled = 1; 228 call->flags |= TRACE_EVENT_FL_ENABLED;
145 } 229 }
146 break; 230 break;
147 } 231 }
@@ -172,15 +256,15 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
172 mutex_lock(&event_mutex); 256 mutex_lock(&event_mutex);
173 list_for_each_entry(call, &ftrace_events, list) { 257 list_for_each_entry(call, &ftrace_events, list) {
174 258
175 if (!call->name || !call->regfunc) 259 if (!call->name || !call->class || !call->class->reg)
176 continue; 260 continue;
177 261
178 if (match && 262 if (match &&
179 strcmp(match, call->name) != 0 && 263 strcmp(match, call->name) != 0 &&
180 strcmp(match, call->system) != 0) 264 strcmp(match, call->class->system) != 0)
181 continue; 265 continue;
182 266
183 if (sub && strcmp(sub, call->system) != 0) 267 if (sub && strcmp(sub, call->class->system) != 0)
184 continue; 268 continue;
185 269
186 if (event && strcmp(event, call->name) != 0) 270 if (event && strcmp(event, call->name) != 0)
@@ -298,7 +382,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
298 * The ftrace subsystem is for showing formats only. 382 * The ftrace subsystem is for showing formats only.
299 * They can not be enabled or disabled via the event files. 383 * They can not be enabled or disabled via the event files.
300 */ 384 */
301 if (call->regfunc) 385 if (call->class && call->class->reg)
302 return call; 386 return call;
303 } 387 }
304 388
@@ -329,7 +413,7 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
329 (*pos)++; 413 (*pos)++;
330 414
331 list_for_each_entry_continue(call, &ftrace_events, list) { 415 list_for_each_entry_continue(call, &ftrace_events, list) {
332 if (call->enabled) 416 if (call->flags & TRACE_EVENT_FL_ENABLED)
333 return call; 417 return call;
334 } 418 }
335 419
@@ -356,8 +440,8 @@ static int t_show(struct seq_file *m, void *v)
356{ 440{
357 struct ftrace_event_call *call = v; 441 struct ftrace_event_call *call = v;
358 442
359 if (strcmp(call->system, TRACE_SYSTEM) != 0) 443 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
360 seq_printf(m, "%s:", call->system); 444 seq_printf(m, "%s:", call->class->system);
361 seq_printf(m, "%s\n", call->name); 445 seq_printf(m, "%s\n", call->name);
362 446
363 return 0; 447 return 0;
@@ -388,7 +472,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
388 struct ftrace_event_call *call = filp->private_data; 472 struct ftrace_event_call *call = filp->private_data;
389 char *buf; 473 char *buf;
390 474
391 if (call->enabled) 475 if (call->flags & TRACE_EVENT_FL_ENABLED)
392 buf = "1\n"; 476 buf = "1\n";
393 else 477 else
394 buf = "0\n"; 478 buf = "0\n";
@@ -451,10 +535,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
451 535
452 mutex_lock(&event_mutex); 536 mutex_lock(&event_mutex);
453 list_for_each_entry(call, &ftrace_events, list) { 537 list_for_each_entry(call, &ftrace_events, list) {
454 if (!call->name || !call->regfunc) 538 if (!call->name || !call->class || !call->class->reg)
455 continue; 539 continue;
456 540
457 if (system && strcmp(call->system, system) != 0) 541 if (system && strcmp(call->class->system, system) != 0)
458 continue; 542 continue;
459 543
460 /* 544 /*
@@ -462,7 +546,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
462 * or if all events or cleared, or if we have 546 * or if all events or cleared, or if we have
463 * a mixture. 547 * a mixture.
464 */ 548 */
465 set |= (1 << !!call->enabled); 549 set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
466 550
467 /* 551 /*
468 * If we have a mixture, no need to look further. 552 * If we have a mixture, no need to look further.
@@ -520,74 +604,146 @@ out:
520 return ret; 604 return ret;
521} 605}
522 606
523extern char *__bad_type_size(void); 607enum {
608 FORMAT_HEADER = 1,
609 FORMAT_FIELD_SEPERATOR = 2,
610 FORMAT_PRINTFMT = 3,
611};
612
613static void *f_next(struct seq_file *m, void *v, loff_t *pos)
614{
615 struct ftrace_event_call *call = m->private;
616 struct ftrace_event_field *field;
617 struct list_head *common_head = &ftrace_common_fields;
618 struct list_head *head = trace_get_fields(call);
619
620 (*pos)++;
621
622 switch ((unsigned long)v) {
623 case FORMAT_HEADER:
624 if (unlikely(list_empty(common_head)))
625 return NULL;
626
627 field = list_entry(common_head->prev,
628 struct ftrace_event_field, link);
629 return field;
630
631 case FORMAT_FIELD_SEPERATOR:
632 if (unlikely(list_empty(head)))
633 return NULL;
634
635 field = list_entry(head->prev, struct ftrace_event_field, link);
636 return field;
637
638 case FORMAT_PRINTFMT:
639 /* all done */
640 return NULL;
641 }
524 642
525#undef FIELD 643 field = v;
526#define FIELD(type, name) \ 644 if (field->link.prev == common_head)
527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ 645 return (void *)FORMAT_FIELD_SEPERATOR;
528 #type, "common_" #name, offsetof(typeof(field), name), \ 646 else if (field->link.prev == head)
529 sizeof(field.name), is_signed_type(type) 647 return (void *)FORMAT_PRINTFMT;
530 648
531static int trace_write_header(struct trace_seq *s) 649 field = list_entry(field->link.prev, struct ftrace_event_field, link);
650
651 return field;
652}
653
654static void *f_start(struct seq_file *m, loff_t *pos)
532{ 655{
533 struct trace_entry field; 656 loff_t l = 0;
534 657 void *p;
535 /* struct trace_entry */ 658
536 return trace_seq_printf(s, 659 /* Start by showing the header */
537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" 660 if (!*pos)
538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" 661 return (void *)FORMAT_HEADER;
539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" 662
540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" 663 p = (void *)FORMAT_HEADER;
541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" 664 do {
542 "\n", 665 p = f_next(m, p, &l);
543 FIELD(unsigned short, type), 666 } while (p && l < *pos);
544 FIELD(unsigned char, flags), 667
545 FIELD(unsigned char, preempt_count), 668 return p;
546 FIELD(int, pid),
547 FIELD(int, lock_depth));
548} 669}
549 670
550static ssize_t 671static int f_show(struct seq_file *m, void *v)
551event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
552 loff_t *ppos)
553{ 672{
554 struct ftrace_event_call *call = filp->private_data; 673 struct ftrace_event_call *call = m->private;
555 struct trace_seq *s; 674 struct ftrace_event_field *field;
556 char *buf; 675 const char *array_descriptor;
557 int r;
558 676
559 if (*ppos) 677 switch ((unsigned long)v) {
678 case FORMAT_HEADER:
679 seq_printf(m, "name: %s\n", call->name);
680 seq_printf(m, "ID: %d\n", call->event.type);
681 seq_printf(m, "format:\n");
560 return 0; 682 return 0;
561 683
562 s = kmalloc(sizeof(*s), GFP_KERNEL); 684 case FORMAT_FIELD_SEPERATOR:
563 if (!s) 685 seq_putc(m, '\n');
564 return -ENOMEM; 686 return 0;
565 687
566 trace_seq_init(s); 688 case FORMAT_PRINTFMT:
689 seq_printf(m, "\nprint fmt: %s\n",
690 call->print_fmt);
691 return 0;
692 }
567 693
568 /* If any of the first writes fail, so will the show_format. */ 694 field = v;
569 695
570 trace_seq_printf(s, "name: %s\n", call->name); 696 /*
571 trace_seq_printf(s, "ID: %d\n", call->id); 697 * Smartly shows the array type(except dynamic array).
572 trace_seq_printf(s, "format:\n"); 698 * Normal:
573 trace_write_header(s); 699 * field:TYPE VAR
700 * If TYPE := TYPE[LEN], it is shown:
701 * field:TYPE VAR[LEN]
702 */
703 array_descriptor = strchr(field->type, '[');
574 704
575 r = call->show_format(call, s); 705 if (!strncmp(field->type, "__data_loc", 10))
576 if (!r) { 706 array_descriptor = NULL;
577 /*
578 * ug! The format output is bigger than a PAGE!!
579 */
580 buf = "FORMAT TOO BIG\n";
581 r = simple_read_from_buffer(ubuf, cnt, ppos,
582 buf, strlen(buf));
583 goto out;
584 }
585 707
586 r = simple_read_from_buffer(ubuf, cnt, ppos, 708 if (!array_descriptor)
587 s->buffer, s->len); 709 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
588 out: 710 field->type, field->name, field->offset,
589 kfree(s); 711 field->size, !!field->is_signed);
590 return r; 712 else
713 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
714 (int)(array_descriptor - field->type),
715 field->type, field->name,
716 array_descriptor, field->offset,
717 field->size, !!field->is_signed);
718
719 return 0;
720}
721
722static void f_stop(struct seq_file *m, void *p)
723{
724}
725
726static const struct seq_operations trace_format_seq_ops = {
727 .start = f_start,
728 .next = f_next,
729 .stop = f_stop,
730 .show = f_show,
731};
732
733static int trace_format_open(struct inode *inode, struct file *file)
734{
735 struct ftrace_event_call *call = inode->i_private;
736 struct seq_file *m;
737 int ret;
738
739 ret = seq_open(file, &trace_format_seq_ops);
740 if (ret < 0)
741 return ret;
742
743 m = file->private_data;
744 m->private = call;
745
746 return 0;
591} 747}
592 748
593static ssize_t 749static ssize_t
@@ -605,7 +761,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
605 return -ENOMEM; 761 return -ENOMEM;
606 762
607 trace_seq_init(s); 763 trace_seq_init(s);
608 trace_seq_printf(s, "%d\n", call->id); 764 trace_seq_printf(s, "%d\n", call->event.type);
609 765
610 r = simple_read_from_buffer(ubuf, cnt, ppos, 766 r = simple_read_from_buffer(ubuf, cnt, ppos,
611 s->buffer, s->len); 767 s->buffer, s->len);
@@ -782,39 +938,47 @@ static const struct file_operations ftrace_enable_fops = {
782 .open = tracing_open_generic, 938 .open = tracing_open_generic,
783 .read = event_enable_read, 939 .read = event_enable_read,
784 .write = event_enable_write, 940 .write = event_enable_write,
941 .llseek = default_llseek,
785}; 942};
786 943
787static const struct file_operations ftrace_event_format_fops = { 944static const struct file_operations ftrace_event_format_fops = {
788 .open = tracing_open_generic, 945 .open = trace_format_open,
789 .read = event_format_read, 946 .read = seq_read,
947 .llseek = seq_lseek,
948 .release = seq_release,
790}; 949};
791 950
792static const struct file_operations ftrace_event_id_fops = { 951static const struct file_operations ftrace_event_id_fops = {
793 .open = tracing_open_generic, 952 .open = tracing_open_generic,
794 .read = event_id_read, 953 .read = event_id_read,
954 .llseek = default_llseek,
795}; 955};
796 956
797static const struct file_operations ftrace_event_filter_fops = { 957static const struct file_operations ftrace_event_filter_fops = {
798 .open = tracing_open_generic, 958 .open = tracing_open_generic,
799 .read = event_filter_read, 959 .read = event_filter_read,
800 .write = event_filter_write, 960 .write = event_filter_write,
961 .llseek = default_llseek,
801}; 962};
802 963
803static const struct file_operations ftrace_subsystem_filter_fops = { 964static const struct file_operations ftrace_subsystem_filter_fops = {
804 .open = tracing_open_generic, 965 .open = tracing_open_generic,
805 .read = subsystem_filter_read, 966 .read = subsystem_filter_read,
806 .write = subsystem_filter_write, 967 .write = subsystem_filter_write,
968 .llseek = default_llseek,
807}; 969};
808 970
809static const struct file_operations ftrace_system_enable_fops = { 971static const struct file_operations ftrace_system_enable_fops = {
810 .open = tracing_open_generic, 972 .open = tracing_open_generic,
811 .read = system_enable_read, 973 .read = system_enable_read,
812 .write = system_enable_write, 974 .write = system_enable_write,
975 .llseek = default_llseek,
813}; 976};
814 977
815static const struct file_operations ftrace_show_header_fops = { 978static const struct file_operations ftrace_show_header_fops = {
816 .open = tracing_open_generic, 979 .open = tracing_open_generic,
817 .read = show_header, 980 .read = show_header,
981 .llseek = default_llseek,
818}; 982};
819 983
820static struct dentry *event_trace_events_dir(void) 984static struct dentry *event_trace_events_dir(void)
@@ -911,14 +1075,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
911 const struct file_operations *filter, 1075 const struct file_operations *filter,
912 const struct file_operations *format) 1076 const struct file_operations *format)
913{ 1077{
1078 struct list_head *head;
914 int ret; 1079 int ret;
915 1080
916 /* 1081 /*
917 * If the trace point header did not define TRACE_SYSTEM 1082 * If the trace point header did not define TRACE_SYSTEM
918 * then the system would be called "TRACE_SYSTEM". 1083 * then the system would be called "TRACE_SYSTEM".
919 */ 1084 */
920 if (strcmp(call->system, TRACE_SYSTEM) != 0) 1085 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
921 d_events = event_subsystem_dir(call->system, d_events); 1086 d_events = event_subsystem_dir(call->class->system, d_events);
922 1087
923 call->dir = debugfs_create_dir(call->name, d_events); 1088 call->dir = debugfs_create_dir(call->name, d_events);
924 if (!call->dir) { 1089 if (!call->dir) {
@@ -927,30 +1092,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
927 return -1; 1092 return -1;
928 } 1093 }
929 1094
930 if (call->regfunc) 1095 if (call->class->reg)
931 trace_create_file("enable", 0644, call->dir, call, 1096 trace_create_file("enable", 0644, call->dir, call,
932 enable); 1097 enable);
933 1098
934 if (call->id && call->profile_enable) 1099#ifdef CONFIG_PERF_EVENTS
1100 if (call->event.type && call->class->reg)
935 trace_create_file("id", 0444, call->dir, call, 1101 trace_create_file("id", 0444, call->dir, call,
936 id); 1102 id);
1103#endif
937 1104
938 if (call->define_fields) { 1105 /*
939 ret = trace_define_common_fields(call); 1106 * Other events may have the same class. Only update
940 if (!ret) 1107 * the fields if they are not already defined.
941 ret = call->define_fields(call); 1108 */
1109 head = trace_get_fields(call);
1110 if (list_empty(head)) {
1111 ret = call->class->define_fields(call);
942 if (ret < 0) { 1112 if (ret < 0) {
943 pr_warning("Could not initialize trace point" 1113 pr_warning("Could not initialize trace point"
944 " events/%s\n", call->name); 1114 " events/%s\n", call->name);
945 return ret; 1115 return ret;
946 } 1116 }
947 trace_create_file("filter", 0644, call->dir, call,
948 filter);
949 } 1117 }
950 1118 trace_create_file("filter", 0644, call->dir, call,
951 /* A trace may not want to export its format */ 1119 filter);
952 if (!call->show_format)
953 return 0;
954 1120
955 trace_create_file("format", 0444, call->dir, call, 1121 trace_create_file("format", 0444, call->dir, call,
956 format); 1122 format);
@@ -958,20 +1124,26 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
958 return 0; 1124 return 0;
959} 1125}
960 1126
961static int __trace_add_event_call(struct ftrace_event_call *call) 1127static int
1128__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
1129 const struct file_operations *id,
1130 const struct file_operations *enable,
1131 const struct file_operations *filter,
1132 const struct file_operations *format)
962{ 1133{
963 struct dentry *d_events; 1134 struct dentry *d_events;
964 int ret; 1135 int ret;
965 1136
1137 /* The linker may leave blanks */
966 if (!call->name) 1138 if (!call->name)
967 return -EINVAL; 1139 return -EINVAL;
968 1140
969 if (call->raw_init) { 1141 if (call->class->raw_init) {
970 ret = call->raw_init(call); 1142 ret = call->class->raw_init(call);
971 if (ret < 0) { 1143 if (ret < 0) {
972 if (ret != -ENOSYS) 1144 if (ret != -ENOSYS)
973 pr_warning("Could not initialize trace " 1145 pr_warning("Could not initialize trace events/%s\n",
974 "events/%s\n", call->name); 1146 call->name);
975 return ret; 1147 return ret;
976 } 1148 }
977 } 1149 }
@@ -980,11 +1152,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
980 if (!d_events) 1152 if (!d_events)
981 return -ENOENT; 1153 return -ENOENT;
982 1154
983 ret = event_create_dir(call, d_events, &ftrace_event_id_fops, 1155 ret = event_create_dir(call, d_events, id, enable, filter, format);
984 &ftrace_enable_fops, &ftrace_event_filter_fops,
985 &ftrace_event_format_fops);
986 if (!ret) 1156 if (!ret)
987 list_add(&call->list, &ftrace_events); 1157 list_add(&call->list, &ftrace_events);
1158 call->mod = mod;
988 1159
989 return ret; 1160 return ret;
990} 1161}
@@ -994,7 +1165,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
994{ 1165{
995 int ret; 1166 int ret;
996 mutex_lock(&event_mutex); 1167 mutex_lock(&event_mutex);
997 ret = __trace_add_event_call(call); 1168 ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1169 &ftrace_enable_fops,
1170 &ftrace_event_filter_fops,
1171 &ftrace_event_format_fops);
998 mutex_unlock(&event_mutex); 1172 mutex_unlock(&event_mutex);
999 return ret; 1173 return ret;
1000} 1174}
@@ -1031,13 +1205,13 @@ static void remove_subsystem_dir(const char *name)
1031static void __trace_remove_event_call(struct ftrace_event_call *call) 1205static void __trace_remove_event_call(struct ftrace_event_call *call)
1032{ 1206{
1033 ftrace_event_enable_disable(call, 0); 1207 ftrace_event_enable_disable(call, 0);
1034 if (call->event) 1208 if (call->event.funcs)
1035 __unregister_ftrace_event(call->event); 1209 __unregister_ftrace_event(&call->event);
1036 debugfs_remove_recursive(call->dir); 1210 debugfs_remove_recursive(call->dir);
1037 list_del(&call->list); 1211 list_del(&call->list);
1038 trace_destroy_fields(call); 1212 trace_destroy_fields(call);
1039 destroy_preds(call); 1213 destroy_preds(call);
1040 remove_subsystem_dir(call->system); 1214 remove_subsystem_dir(call->class->system);
1041} 1215}
1042 1216
1043/* Remove an event_call */ 1217/* Remove an event_call */
@@ -1111,8 +1285,6 @@ static void trace_module_add_events(struct module *mod)
1111{ 1285{
1112 struct ftrace_module_file_ops *file_ops = NULL; 1286 struct ftrace_module_file_ops *file_ops = NULL;
1113 struct ftrace_event_call *call, *start, *end; 1287 struct ftrace_event_call *call, *start, *end;
1114 struct dentry *d_events;
1115 int ret;
1116 1288
1117 start = mod->trace_events; 1289 start = mod->trace_events;
1118 end = mod->trace_events + mod->num_trace_events; 1290 end = mod->trace_events + mod->num_trace_events;
@@ -1120,38 +1292,14 @@ static void trace_module_add_events(struct module *mod)
1120 if (start == end) 1292 if (start == end)
1121 return; 1293 return;
1122 1294
1123 d_events = event_trace_events_dir(); 1295 file_ops = trace_create_file_ops(mod);
1124 if (!d_events) 1296 if (!file_ops)
1125 return; 1297 return;
1126 1298
1127 for_each_event(call, start, end) { 1299 for_each_event(call, start, end) {
1128 /* The linker may leave blanks */ 1300 __trace_add_event_call(call, mod,
1129 if (!call->name)
1130 continue;
1131 if (call->raw_init) {
1132 ret = call->raw_init(call);
1133 if (ret < 0) {
1134 if (ret != -ENOSYS)
1135 pr_warning("Could not initialize trace "
1136 "point events/%s\n", call->name);
1137 continue;
1138 }
1139 }
1140 /*
1141 * This module has events, create file ops for this module
1142 * if not already done.
1143 */
1144 if (!file_ops) {
1145 file_ops = trace_create_file_ops(mod);
1146 if (!file_ops)
1147 return;
1148 }
1149 call->mod = mod;
1150 ret = event_create_dir(call, d_events,
1151 &file_ops->id, &file_ops->enable, 1301 &file_ops->id, &file_ops->enable,
1152 &file_ops->filter, &file_ops->format); 1302 &file_ops->filter, &file_ops->format);
1153 if (!ret)
1154 list_add(&call->list, &ftrace_events);
1155 } 1303 }
1156} 1304}
1157 1305
@@ -1278,25 +1426,14 @@ static __init int event_trace_init(void)
1278 trace_create_file("enable", 0644, d_events, 1426 trace_create_file("enable", 0644, d_events,
1279 NULL, &ftrace_system_enable_fops); 1427 NULL, &ftrace_system_enable_fops);
1280 1428
1429 if (trace_define_common_fields())
1430 pr_warning("tracing: Failed to allocate common fields");
1431
1281 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1282 /* The linker may leave blanks */ 1433 __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
1283 if (!call->name)
1284 continue;
1285 if (call->raw_init) {
1286 ret = call->raw_init(call);
1287 if (ret < 0) {
1288 if (ret != -ENOSYS)
1289 pr_warning("Could not initialize trace "
1290 "point events/%s\n", call->name);
1291 continue;
1292 }
1293 }
1294 ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1295 &ftrace_enable_fops, 1434 &ftrace_enable_fops,
1296 &ftrace_event_filter_fops, 1435 &ftrace_event_filter_fops,
1297 &ftrace_event_format_fops); 1436 &ftrace_event_format_fops);
1298 if (!ret)
1299 list_add(&call->list, &ftrace_events);
1300 } 1437 }
1301 1438
1302 while (true) { 1439 while (true) {
@@ -1384,8 +1521,8 @@ static __init void event_trace_self_tests(void)
1384 1521
1385 list_for_each_entry(call, &ftrace_events, list) { 1522 list_for_each_entry(call, &ftrace_events, list) {
1386 1523
1387 /* Only test those that have a regfunc */ 1524 /* Only test those that have a probe */
1388 if (!call->regfunc) 1525 if (!call->class || !call->class->probe)
1389 continue; 1526 continue;
1390 1527
1391/* 1528/*
@@ -1395,8 +1532,8 @@ static __init void event_trace_self_tests(void)
1395 * syscalls as we test. 1532 * syscalls as we test.
1396 */ 1533 */
1397#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 1534#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1398 if (call->system && 1535 if (call->class->system &&
1399 strcmp(call->system, "syscalls") == 0) 1536 strcmp(call->class->system, "syscalls") == 0)
1400 continue; 1537 continue;
1401#endif 1538#endif
1402 1539
@@ -1406,7 +1543,7 @@ static __init void event_trace_self_tests(void)
1406 * If an event is already enabled, someone is using 1543 * If an event is already enabled, someone is using
1407 * it and the self test should not be on. 1544 * it and the self test should not be on.
1408 */ 1545 */
1409 if (call->enabled) { 1546 if (call->flags & TRACE_EVENT_FL_ENABLED) {
1410 pr_warning("Enabled event during self test!\n"); 1547 pr_warning("Enabled event during self test!\n");
1411 WARN_ON_ONCE(1); 1548 WARN_ON_ONCE(1);
1412 continue; 1549 continue;
@@ -1483,12 +1620,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1483 struct ftrace_entry *entry; 1620 struct ftrace_entry *entry;
1484 unsigned long flags; 1621 unsigned long flags;
1485 long disabled; 1622 long disabled;
1486 int resched;
1487 int cpu; 1623 int cpu;
1488 int pc; 1624 int pc;
1489 1625
1490 pc = preempt_count(); 1626 pc = preempt_count();
1491 resched = ftrace_preempt_disable(); 1627 preempt_disable_notrace();
1492 cpu = raw_smp_processor_id(); 1628 cpu = raw_smp_processor_id();
1493 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 1629 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1494 1630
@@ -1510,7 +1646,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1510 1646
1511 out: 1647 out:
1512 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 1648 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1513 ftrace_preempt_enable(resched); 1649 preempt_enable_notrace();
1514} 1650}
1515 1651
1516static struct ftrace_ops trace_ops __initdata = 1652static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9aad69f..36d40104b17f 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/slab.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
@@ -496,11 +497,11 @@ void print_subsystem_event_filter(struct event_subsystem *system,
496} 497}
497 498
498static struct ftrace_event_field * 499static struct ftrace_event_field *
499find_event_field(struct ftrace_event_call *call, char *name) 500__find_event_field(struct list_head *head, char *name)
500{ 501{
501 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
502 503
503 list_for_each_entry(field, &call->fields, link) { 504 list_for_each_entry(field, head, link) {
504 if (!strcmp(field->name, name)) 505 if (!strcmp(field->name, name))
505 return field; 506 return field;
506 } 507 }
@@ -508,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
508 return NULL; 509 return NULL;
509} 510}
510 511
512static struct ftrace_event_field *
513find_event_field(struct ftrace_event_call *call, char *name)
514{
515 struct ftrace_event_field *field;
516 struct list_head *head;
517
518 field = __find_event_field(&ftrace_common_fields, name);
519 if (field)
520 return field;
521
522 head = trace_get_fields(call);
523 return __find_event_field(head, name);
524}
525
511static void filter_free_pred(struct filter_pred *pred) 526static void filter_free_pred(struct filter_pred *pred)
512{ 527{
513 if (!pred) 528 if (!pred)
@@ -544,7 +559,7 @@ static void filter_disable_preds(struct ftrace_event_call *call)
544 struct event_filter *filter = call->filter; 559 struct event_filter *filter = call->filter;
545 int i; 560 int i;
546 561
547 call->filter_active = 0; 562 call->flags &= ~TRACE_EVENT_FL_FILTERED;
548 filter->n_preds = 0; 563 filter->n_preds = 0;
549 564
550 for (i = 0; i < MAX_FILTER_PRED; i++) 565 for (i = 0; i < MAX_FILTER_PRED; i++)
@@ -571,7 +586,7 @@ void destroy_preds(struct ftrace_event_call *call)
571{ 586{
572 __free_preds(call->filter); 587 __free_preds(call->filter);
573 call->filter = NULL; 588 call->filter = NULL;
574 call->filter_active = 0; 589 call->flags &= ~TRACE_EVENT_FL_FILTERED;
575} 590}
576 591
577static struct event_filter *__alloc_preds(void) 592static struct event_filter *__alloc_preds(void)
@@ -610,7 +625,7 @@ static int init_preds(struct ftrace_event_call *call)
610 if (call->filter) 625 if (call->filter)
611 return 0; 626 return 0;
612 627
613 call->filter_active = 0; 628 call->flags &= ~TRACE_EVENT_FL_FILTERED;
614 call->filter = __alloc_preds(); 629 call->filter = __alloc_preds();
615 if (IS_ERR(call->filter)) 630 if (IS_ERR(call->filter))
616 return PTR_ERR(call->filter); 631 return PTR_ERR(call->filter);
@@ -624,10 +639,7 @@ static int init_subsystem_preds(struct event_subsystem *system)
624 int err; 639 int err;
625 640
626 list_for_each_entry(call, &ftrace_events, list) { 641 list_for_each_entry(call, &ftrace_events, list) {
627 if (!call->define_fields) 642 if (strcmp(call->class->system, system->name) != 0)
628 continue;
629
630 if (strcmp(call->system, system->name) != 0)
631 continue; 643 continue;
632 644
633 err = init_preds(call); 645 err = init_preds(call);
@@ -643,10 +655,7 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
643 struct ftrace_event_call *call; 655 struct ftrace_event_call *call;
644 656
645 list_for_each_entry(call, &ftrace_events, list) { 657 list_for_each_entry(call, &ftrace_events, list) {
646 if (!call->define_fields) 658 if (strcmp(call->class->system, system->name) != 0)
647 continue;
648
649 if (strcmp(call->system, system->name) != 0)
650 continue; 659 continue;
651 660
652 filter_disable_preds(call); 661 filter_disable_preds(call);
@@ -1248,10 +1257,7 @@ static int replace_system_preds(struct event_subsystem *system,
1248 list_for_each_entry(call, &ftrace_events, list) { 1257 list_for_each_entry(call, &ftrace_events, list) {
1249 struct event_filter *filter = call->filter; 1258 struct event_filter *filter = call->filter;
1250 1259
1251 if (!call->define_fields) 1260 if (strcmp(call->class->system, system->name) != 0)
1252 continue;
1253
1254 if (strcmp(call->system, system->name) != 0)
1255 continue; 1261 continue;
1256 1262
1257 /* try to see if the filter can be applied */ 1263 /* try to see if the filter can be applied */
@@ -1265,7 +1271,7 @@ static int replace_system_preds(struct event_subsystem *system,
1265 if (err) 1271 if (err)
1266 filter_disable_preds(call); 1272 filter_disable_preds(call);
1267 else { 1273 else {
1268 call->filter_active = 1; 1274 call->flags |= TRACE_EVENT_FL_FILTERED;
1269 replace_filter_string(filter, filter_string); 1275 replace_filter_string(filter, filter_string);
1270 } 1276 }
1271 fail = false; 1277 fail = false;
@@ -1314,7 +1320,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1314 if (err) 1320 if (err)
1315 append_filter_err(ps, call->filter); 1321 append_filter_err(ps, call->filter);
1316 else 1322 else
1317 call->filter_active = 1; 1323 call->flags |= TRACE_EVENT_FL_FILTERED;
1318out: 1324out:
1319 filter_opstack_clear(ps); 1325 filter_opstack_clear(ps);
1320 postfix_clear(ps); 1326 postfix_clear(ps);
@@ -1371,7 +1377,7 @@ out_unlock:
1371 return err; 1377 return err;
1372} 1378}
1373 1379
1374#ifdef CONFIG_EVENT_PROFILE 1380#ifdef CONFIG_PERF_EVENTS
1375 1381
1376void ftrace_profile_free_filter(struct perf_event *event) 1382void ftrace_profile_free_filter(struct perf_event *event)
1377{ 1383{
@@ -1392,12 +1398,12 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1392 mutex_lock(&event_mutex); 1398 mutex_lock(&event_mutex);
1393 1399
1394 list_for_each_entry(call, &ftrace_events, list) { 1400 list_for_each_entry(call, &ftrace_events, list) {
1395 if (call->id == event_id) 1401 if (call->event.type == event_id)
1396 break; 1402 break;
1397 } 1403 }
1398 1404
1399 err = -EINVAL; 1405 err = -EINVAL;
1400 if (!call) 1406 if (&call->list == &ftrace_events)
1401 goto out_unlock; 1407 goto out_unlock;
1402 1408
1403 err = -EEXIST; 1409 err = -EEXIST;
@@ -1439,5 +1445,5 @@ out_unlock:
1439 return err; 1445 return err;
1440} 1446}
1441 1447
1442#endif /* CONFIG_EVENT_PROFILE */ 1448#endif /* CONFIG_PERF_EVENTS */
1443 1449
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..4b74d71705c0 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -155,13 +83,19 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
155 83
156#undef __array 84#undef __array
157#define __array(type, item, len) \ 85#define __array(type, item, len) \
158 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ 86 do { \
159 ret = trace_define_field(event_call, #type "[" #len "]", #item, \ 87 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
88 mutex_lock(&event_storage_mutex); \
89 snprintf(event_storage, sizeof(event_storage), \
90 "%s[%d]", #type, len); \
91 ret = trace_define_field(event_call, event_storage, #item, \
160 offsetof(typeof(field), item), \ 92 offsetof(typeof(field), item), \
161 sizeof(field.item), \ 93 sizeof(field.item), \
162 is_signed_type(type), FILTER_OTHER); \ 94 is_signed_type(type), FILTER_OTHER); \
163 if (ret) \ 95 mutex_unlock(&event_storage_mutex); \
164 return ret; 96 if (ret) \
97 return ret; \
98 } while (0);
165 99
166#undef __array_desc 100#undef __array_desc
167#define __array_desc(type, container, item, len) \ 101#define __array_desc(type, container, item, len) \
@@ -175,7 +109,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
175 return ret; 109 return ret;
176 110
177#undef __dynamic_array 111#undef __dynamic_array
178#define __dynamic_array(type, item) 112#define __dynamic_array(type, item) \
113 ret = trace_define_field(event_call, #type, #item, \
114 offsetof(typeof(field), item), \
115 0, is_signed_type(type), FILTER_OTHER);\
116 if (ret) \
117 return ret;
179 118
180#undef FTRACE_ENTRY 119#undef FTRACE_ENTRY
181#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -192,11 +131,8 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
192 131
193#include "trace_entries.h" 132#include "trace_entries.h"
194 133
195static int ftrace_raw_init_event(struct ftrace_event_call *call) 134#undef __entry
196{ 135#define __entry REC
197 INIT_LIST_HEAD(&call->fields);
198 return 0;
199}
200 136
201#undef __field 137#undef __field
202#define __field(type, item) 138#define __field(type, item)
@@ -213,18 +149,25 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
213#undef __dynamic_array 149#undef __dynamic_array
214#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
215 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
216#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
157 \
158struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \
161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \
218 \ 163 \
219struct ftrace_event_call __used \ 164struct ftrace_event_call __used \
220__attribute__((__aligned__(4))) \ 165__attribute__((__aligned__(4))) \
221__attribute__((section("_ftrace_events"))) event_##call = { \ 166__attribute__((section("_ftrace_events"))) event_##call = { \
222 .name = #call, \ 167 .name = #call, \
223 .id = type, \ 168 .event.type = etype, \
224 .system = __stringify(TRACE_SYSTEM), \ 169 .class = &event_class_ftrace_##call, \
225 .raw_init = ftrace_raw_init_event, \ 170 .print_fmt = print, \
226 .show_format = ftrace_format_##call, \
227 .define_fields = ftrace_define_fields_##call, \
228}; \ 171}; \
229 172
230#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd6..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
54 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
55 unsigned long flags; 55 unsigned long flags;
56 long disabled; 56 long disabled;
57 int cpu, resched; 57 int cpu;
58 int pc; 58 int pc;
59 59
60 if (unlikely(!ftrace_function_enabled)) 60 if (unlikely(!ftrace_function_enabled))
61 return; 61 return;
62 62
63 pc = preempt_count(); 63 pc = preempt_count();
64 resched = ftrace_preempt_disable(); 64 preempt_disable_notrace();
65 local_save_flags(flags); 65 local_save_flags(flags);
66 cpu = raw_smp_processor_id(); 66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu]; 67 data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
71 trace_function(tr, ip, parent_ip, flags, pc); 71 trace_function(tr, ip, parent_ip, flags, pc);
72 72
73 atomic_dec(&data->disabled); 73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched); 74 preempt_enable_notrace();
75} 75}
76 76
77static void 77static void
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..76b05980225c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,19 +9,25 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
15#include "trace_output.h" 16#include "trace_output.h"
16 17
18/* When set, irq functions will be ignored */
19static int ftrace_graph_skip_irqs;
20
17struct fgraph_cpu_data { 21struct fgraph_cpu_data {
18 pid_t last_pid; 22 pid_t last_pid;
19 int depth; 23 int depth;
24 int depth_irq;
20 int ignore; 25 int ignore;
26 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
21}; 27};
22 28
23struct fgraph_data { 29struct fgraph_data {
24 struct fgraph_cpu_data *cpu_data; 30 struct fgraph_cpu_data __percpu *cpu_data;
25 31
26 /* Place to preserve last processed entry. */ 32 /* Place to preserve last processed entry. */
27 struct ftrace_graph_ent_entry ent; 33 struct ftrace_graph_ent_entry ent;
@@ -38,7 +44,8 @@ struct fgraph_data {
38#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 44#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
39#define TRACE_GRAPH_PRINT_PROC 0x8 45#define TRACE_GRAPH_PRINT_PROC 0x8
40#define TRACE_GRAPH_PRINT_DURATION 0x10 46#define TRACE_GRAPH_PRINT_DURATION 0x10
41#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 47#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
48#define TRACE_GRAPH_PRINT_IRQS 0x40
42 49
43static struct tracer_opt trace_opts[] = { 50static struct tracer_opt trace_opts[] = {
44 /* Display overruns? (for self-debug purpose) */ 51 /* Display overruns? (for self-debug purpose) */
@@ -53,13 +60,15 @@ static struct tracer_opt trace_opts[] = {
53 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, 60 { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
54 /* Display absolute time of an entry */ 61 /* Display absolute time of an entry */
55 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, 62 { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
63 /* Display interrupts */
64 { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
56 { } /* Empty entry */ 65 { } /* Empty entry */
57}; 66};
58 67
59static struct tracer_flags tracer_flags = { 68static struct tracer_flags tracer_flags = {
60 /* Don't display overruns and proc by default */ 69 /* Don't display overruns and proc by default */
61 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | 70 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
62 TRACE_GRAPH_PRINT_DURATION, 71 TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
63 .opts = trace_opts 72 .opts = trace_opts
64}; 73};
65 74
@@ -177,7 +186,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
177 return ret; 186 return ret;
178} 187}
179 188
180static int __trace_graph_entry(struct trace_array *tr, 189int __trace_graph_entry(struct trace_array *tr,
181 struct ftrace_graph_ent *trace, 190 struct ftrace_graph_ent *trace,
182 unsigned long flags, 191 unsigned long flags,
183 int pc) 192 int pc)
@@ -187,7 +196,7 @@ static int __trace_graph_entry(struct trace_array *tr,
187 struct ring_buffer *buffer = tr->buffer; 196 struct ring_buffer *buffer = tr->buffer;
188 struct ftrace_graph_ent_entry *entry; 197 struct ftrace_graph_ent_entry *entry;
189 198
190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 199 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
191 return 0; 200 return 0;
192 201
193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 202 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -202,6 +211,14 @@ static int __trace_graph_entry(struct trace_array *tr,
202 return 1; 211 return 1;
203} 212}
204 213
214static inline int ftrace_graph_ignore_irqs(void)
215{
216 if (!ftrace_graph_skip_irqs)
217 return 0;
218
219 return in_irq();
220}
221
205int trace_graph_entry(struct ftrace_graph_ent *trace) 222int trace_graph_entry(struct ftrace_graph_ent *trace)
206{ 223{
207 struct trace_array *tr = graph_array; 224 struct trace_array *tr = graph_array;
@@ -212,13 +229,12 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
212 int cpu; 229 int cpu;
213 int pc; 230 int pc;
214 231
215 if (unlikely(!tr))
216 return 0;
217
218 if (!ftrace_trace_task(current)) 232 if (!ftrace_trace_task(current))
219 return 0; 233 return 0;
220 234
221 if (!ftrace_graph_addr(trace->func)) 235 /* trace it when it is-nested-in or is a function enabled. */
236 if (!(trace->depth || ftrace_graph_addr(trace->func)) ||
237 ftrace_graph_ignore_irqs())
222 return 0; 238 return 0;
223 239
224 local_irq_save(flags); 240 local_irq_save(flags);
@@ -231,9 +247,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
231 } else { 247 } else {
232 ret = 0; 248 ret = 0;
233 } 249 }
234 /* Only do the atomic if it is not already set */
235 if (!test_tsk_trace_graph(current))
236 set_tsk_trace_graph(current);
237 250
238 atomic_dec(&data->disabled); 251 atomic_dec(&data->disabled);
239 local_irq_restore(flags); 252 local_irq_restore(flags);
@@ -241,7 +254,43 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
241 return ret; 254 return ret;
242} 255}
243 256
244static void __trace_graph_return(struct trace_array *tr, 257int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
258{
259 if (tracing_thresh)
260 return 1;
261 else
262 return trace_graph_entry(trace);
263}
264
265static void
266__trace_graph_function(struct trace_array *tr,
267 unsigned long ip, unsigned long flags, int pc)
268{
269 u64 time = trace_clock_local();
270 struct ftrace_graph_ent ent = {
271 .func = ip,
272 .depth = 0,
273 };
274 struct ftrace_graph_ret ret = {
275 .func = ip,
276 .depth = 0,
277 .calltime = time,
278 .rettime = time,
279 };
280
281 __trace_graph_entry(tr, &ent, flags, pc);
282 __trace_graph_return(tr, &ret, flags, pc);
283}
284
285void
286trace_graph_function(struct trace_array *tr,
287 unsigned long ip, unsigned long parent_ip,
288 unsigned long flags, int pc)
289{
290 __trace_graph_function(tr, ip, flags, pc);
291}
292
293void __trace_graph_return(struct trace_array *tr,
245 struct ftrace_graph_ret *trace, 294 struct ftrace_graph_ret *trace,
246 unsigned long flags, 295 unsigned long flags,
247 int pc) 296 int pc)
@@ -251,7 +300,7 @@ static void __trace_graph_return(struct trace_array *tr,
251 struct ring_buffer *buffer = tr->buffer; 300 struct ring_buffer *buffer = tr->buffer;
252 struct ftrace_graph_ret_entry *entry; 301 struct ftrace_graph_ret_entry *entry;
253 302
254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 303 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
255 return; 304 return;
256 305
257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 306 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -281,19 +330,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
281 pc = preempt_count(); 330 pc = preempt_count();
282 __trace_graph_return(tr, trace, flags, pc); 331 __trace_graph_return(tr, trace, flags, pc);
283 } 332 }
284 if (!trace->depth)
285 clear_tsk_trace_graph(current);
286 atomic_dec(&data->disabled); 333 atomic_dec(&data->disabled);
287 local_irq_restore(flags); 334 local_irq_restore(flags);
288} 335}
289 336
337void set_graph_array(struct trace_array *tr)
338{
339 graph_array = tr;
340
341 /* Make graph_array visible before we start tracing */
342
343 smp_mb();
344}
345
346void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
347{
348 if (tracing_thresh &&
349 (trace->rettime - trace->calltime < tracing_thresh))
350 return;
351 else
352 trace_graph_return(trace);
353}
354
290static int graph_trace_init(struct trace_array *tr) 355static int graph_trace_init(struct trace_array *tr)
291{ 356{
292 int ret; 357 int ret;
293 358
294 graph_array = tr; 359 set_graph_array(tr);
295 ret = register_ftrace_graph(&trace_graph_return, 360 if (tracing_thresh)
296 &trace_graph_entry); 361 ret = register_ftrace_graph(&trace_graph_thresh_return,
362 &trace_graph_thresh_entry);
363 else
364 ret = register_ftrace_graph(&trace_graph_return,
365 &trace_graph_entry);
297 if (ret) 366 if (ret)
298 return ret; 367 return ret;
299 tracing_start_cmdline_record(); 368 tracing_start_cmdline_record();
@@ -301,11 +370,6 @@ static int graph_trace_init(struct trace_array *tr)
301 return 0; 370 return 0;
302} 371}
303 372
304void set_graph_array(struct trace_array *tr)
305{
306 graph_array = tr;
307}
308
309static void graph_trace_reset(struct trace_array *tr) 373static void graph_trace_reset(struct trace_array *tr)
310{ 374{
311 tracing_stop_cmdline_record(); 375 tracing_stop_cmdline_record();
@@ -470,9 +534,10 @@ get_return_for_leaf(struct trace_iterator *iter,
470 * We need to consume the current entry to see 534 * We need to consume the current entry to see
471 * the next one. 535 * the next one.
472 */ 536 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 537 ring_buffer_consume(iter->tr->buffer, iter->cpu,
538 NULL, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 539 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL); 540 NULL, NULL);
476 } 541 }
477 542
478 if (!event) 543 if (!event)
@@ -486,7 +551,15 @@ get_return_for_leaf(struct trace_iterator *iter,
486 * if the output fails. 551 * if the output fails.
487 */ 552 */
488 data->ent = *curr; 553 data->ent = *curr;
489 data->ret = *next; 554 /*
555 * If the next event is not a return type, then
556 * we only care about what type it is. Otherwise we can
557 * safely copy the entire event.
558 */
559 if (next->ent.type == TRACE_GRAPH_RET)
560 data->ret = *next;
561 else
562 data->ret.ent.type = next->ent.type;
490 } 563 }
491 } 564 }
492 565
@@ -506,17 +579,18 @@ get_return_for_leaf(struct trace_iterator *iter,
506 579
507/* Signal a overhead of time execution to the output */ 580/* Signal a overhead of time execution to the output */
508static int 581static int
509print_graph_overhead(unsigned long long duration, struct trace_seq *s) 582print_graph_overhead(unsigned long long duration, struct trace_seq *s,
583 u32 flags)
510{ 584{
511 /* If duration disappear, we don't need anything */ 585 /* If duration disappear, we don't need anything */
512 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) 586 if (!(flags & TRACE_GRAPH_PRINT_DURATION))
513 return 1; 587 return 1;
514 588
515 /* Non nested entry or return */ 589 /* Non nested entry or return */
516 if (duration == -1) 590 if (duration == -1)
517 return trace_seq_printf(s, " "); 591 return trace_seq_printf(s, " ");
518 592
519 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 593 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
520 /* Duration exceeded 100 msecs */ 594 /* Duration exceeded 100 msecs */
521 if (duration > 100000ULL) 595 if (duration > 100000ULL)
522 return trace_seq_printf(s, "! "); 596 return trace_seq_printf(s, "! ");
@@ -542,7 +616,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s)
542 616
543static enum print_line_t 617static enum print_line_t
544print_graph_irq(struct trace_iterator *iter, unsigned long addr, 618print_graph_irq(struct trace_iterator *iter, unsigned long addr,
545 enum trace_type type, int cpu, pid_t pid) 619 enum trace_type type, int cpu, pid_t pid, u32 flags)
546{ 620{
547 int ret; 621 int ret;
548 struct trace_seq *s = &iter->seq; 622 struct trace_seq *s = &iter->seq;
@@ -552,21 +626,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
552 return TRACE_TYPE_UNHANDLED; 626 return TRACE_TYPE_UNHANDLED;
553 627
554 /* Absolute time */ 628 /* Absolute time */
555 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 629 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
556 ret = print_graph_abs_time(iter->ts, s); 630 ret = print_graph_abs_time(iter->ts, s);
557 if (!ret) 631 if (!ret)
558 return TRACE_TYPE_PARTIAL_LINE; 632 return TRACE_TYPE_PARTIAL_LINE;
559 } 633 }
560 634
561 /* Cpu */ 635 /* Cpu */
562 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 636 if (flags & TRACE_GRAPH_PRINT_CPU) {
563 ret = print_graph_cpu(s, cpu); 637 ret = print_graph_cpu(s, cpu);
564 if (ret == TRACE_TYPE_PARTIAL_LINE) 638 if (ret == TRACE_TYPE_PARTIAL_LINE)
565 return TRACE_TYPE_PARTIAL_LINE; 639 return TRACE_TYPE_PARTIAL_LINE;
566 } 640 }
567 641
568 /* Proc */ 642 /* Proc */
569 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 643 if (flags & TRACE_GRAPH_PRINT_PROC) {
570 ret = print_graph_proc(s, pid); 644 ret = print_graph_proc(s, pid);
571 if (ret == TRACE_TYPE_PARTIAL_LINE) 645 if (ret == TRACE_TYPE_PARTIAL_LINE)
572 return TRACE_TYPE_PARTIAL_LINE; 646 return TRACE_TYPE_PARTIAL_LINE;
@@ -576,7 +650,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
576 } 650 }
577 651
578 /* No overhead */ 652 /* No overhead */
579 ret = print_graph_overhead(-1, s); 653 ret = print_graph_overhead(-1, s, flags);
580 if (!ret) 654 if (!ret)
581 return TRACE_TYPE_PARTIAL_LINE; 655 return TRACE_TYPE_PARTIAL_LINE;
582 656
@@ -589,7 +663,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
589 return TRACE_TYPE_PARTIAL_LINE; 663 return TRACE_TYPE_PARTIAL_LINE;
590 664
591 /* Don't close the duration column if haven't one */ 665 /* Don't close the duration column if haven't one */
592 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 666 if (flags & TRACE_GRAPH_PRINT_DURATION)
593 trace_seq_printf(s, " |"); 667 trace_seq_printf(s, " |");
594 ret = trace_seq_printf(s, "\n"); 668 ret = trace_seq_printf(s, "\n");
595 669
@@ -619,7 +693,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
619 693
620 /* Print nsecs (we don't want to exceed 7 numbers) */ 694 /* Print nsecs (we don't want to exceed 7 numbers) */
621 if (len < 7) { 695 if (len < 7) {
622 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); 696 size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);
697
698 snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
623 ret = trace_seq_printf(s, ".%s", nsecs_str); 699 ret = trace_seq_printf(s, ".%s", nsecs_str);
624 if (!ret) 700 if (!ret)
625 return TRACE_TYPE_PARTIAL_LINE; 701 return TRACE_TYPE_PARTIAL_LINE;
@@ -659,7 +735,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
659static enum print_line_t 735static enum print_line_t
660print_graph_entry_leaf(struct trace_iterator *iter, 736print_graph_entry_leaf(struct trace_iterator *iter,
661 struct ftrace_graph_ent_entry *entry, 737 struct ftrace_graph_ent_entry *entry,
662 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) 738 struct ftrace_graph_ret_entry *ret_entry,
739 struct trace_seq *s, u32 flags)
663{ 740{
664 struct fgraph_data *data = iter->private; 741 struct fgraph_data *data = iter->private;
665 struct ftrace_graph_ret *graph_ret; 742 struct ftrace_graph_ret *graph_ret;
@@ -673,24 +750,30 @@ print_graph_entry_leaf(struct trace_iterator *iter,
673 duration = graph_ret->rettime - graph_ret->calltime; 750 duration = graph_ret->rettime - graph_ret->calltime;
674 751
675 if (data) { 752 if (data) {
753 struct fgraph_cpu_data *cpu_data;
676 int cpu = iter->cpu; 754 int cpu = iter->cpu;
677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 755
756 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
678 757
679 /* 758 /*
680 * Comments display at + 1 to depth. Since 759 * Comments display at + 1 to depth. Since
681 * this is a leaf function, keep the comments 760 * this is a leaf function, keep the comments
682 * equal to this depth. 761 * equal to this depth.
683 */ 762 */
684 *depth = call->depth - 1; 763 cpu_data->depth = call->depth - 1;
764
765 /* No need to keep this function around for this depth */
766 if (call->depth < FTRACE_RETFUNC_DEPTH)
767 cpu_data->enter_funcs[call->depth] = 0;
685 } 768 }
686 769
687 /* Overhead */ 770 /* Overhead */
688 ret = print_graph_overhead(duration, s); 771 ret = print_graph_overhead(duration, s, flags);
689 if (!ret) 772 if (!ret)
690 return TRACE_TYPE_PARTIAL_LINE; 773 return TRACE_TYPE_PARTIAL_LINE;
691 774
692 /* Duration */ 775 /* Duration */
693 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 776 if (flags & TRACE_GRAPH_PRINT_DURATION) {
694 ret = print_graph_duration(duration, s); 777 ret = print_graph_duration(duration, s);
695 if (ret == TRACE_TYPE_PARTIAL_LINE) 778 if (ret == TRACE_TYPE_PARTIAL_LINE)
696 return TRACE_TYPE_PARTIAL_LINE; 779 return TRACE_TYPE_PARTIAL_LINE;
@@ -713,7 +796,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
713static enum print_line_t 796static enum print_line_t
714print_graph_entry_nested(struct trace_iterator *iter, 797print_graph_entry_nested(struct trace_iterator *iter,
715 struct ftrace_graph_ent_entry *entry, 798 struct ftrace_graph_ent_entry *entry,
716 struct trace_seq *s, int cpu) 799 struct trace_seq *s, int cpu, u32 flags)
717{ 800{
718 struct ftrace_graph_ent *call = &entry->graph_ent; 801 struct ftrace_graph_ent *call = &entry->graph_ent;
719 struct fgraph_data *data = iter->private; 802 struct fgraph_data *data = iter->private;
@@ -721,19 +804,24 @@ print_graph_entry_nested(struct trace_iterator *iter,
721 int i; 804 int i;
722 805
723 if (data) { 806 if (data) {
807 struct fgraph_cpu_data *cpu_data;
724 int cpu = iter->cpu; 808 int cpu = iter->cpu;
725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
726 809
727 *depth = call->depth; 810 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
811 cpu_data->depth = call->depth;
812
813 /* Save this function pointer to see if the exit matches */
814 if (call->depth < FTRACE_RETFUNC_DEPTH)
815 cpu_data->enter_funcs[call->depth] = call->func;
728 } 816 }
729 817
730 /* No overhead */ 818 /* No overhead */
731 ret = print_graph_overhead(-1, s); 819 ret = print_graph_overhead(-1, s, flags);
732 if (!ret) 820 if (!ret)
733 return TRACE_TYPE_PARTIAL_LINE; 821 return TRACE_TYPE_PARTIAL_LINE;
734 822
735 /* No time */ 823 /* No time */
736 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 824 if (flags & TRACE_GRAPH_PRINT_DURATION) {
737 ret = trace_seq_printf(s, " | "); 825 ret = trace_seq_printf(s, " | ");
738 if (!ret) 826 if (!ret)
739 return TRACE_TYPE_PARTIAL_LINE; 827 return TRACE_TYPE_PARTIAL_LINE;
@@ -759,7 +847,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
759 847
760static enum print_line_t 848static enum print_line_t
761print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, 849print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
762 int type, unsigned long addr) 850 int type, unsigned long addr, u32 flags)
763{ 851{
764 struct fgraph_data *data = iter->private; 852 struct fgraph_data *data = iter->private;
765 struct trace_entry *ent = iter->ent; 853 struct trace_entry *ent = iter->ent;
@@ -772,27 +860,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
772 860
773 if (type) { 861 if (type) {
774 /* Interrupt */ 862 /* Interrupt */
775 ret = print_graph_irq(iter, addr, type, cpu, ent->pid); 863 ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
776 if (ret == TRACE_TYPE_PARTIAL_LINE) 864 if (ret == TRACE_TYPE_PARTIAL_LINE)
777 return TRACE_TYPE_PARTIAL_LINE; 865 return TRACE_TYPE_PARTIAL_LINE;
778 } 866 }
779 867
780 /* Absolute time */ 868 /* Absolute time */
781 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 869 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
782 ret = print_graph_abs_time(iter->ts, s); 870 ret = print_graph_abs_time(iter->ts, s);
783 if (!ret) 871 if (!ret)
784 return TRACE_TYPE_PARTIAL_LINE; 872 return TRACE_TYPE_PARTIAL_LINE;
785 } 873 }
786 874
787 /* Cpu */ 875 /* Cpu */
788 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 876 if (flags & TRACE_GRAPH_PRINT_CPU) {
789 ret = print_graph_cpu(s, cpu); 877 ret = print_graph_cpu(s, cpu);
790 if (ret == TRACE_TYPE_PARTIAL_LINE) 878 if (ret == TRACE_TYPE_PARTIAL_LINE)
791 return TRACE_TYPE_PARTIAL_LINE; 879 return TRACE_TYPE_PARTIAL_LINE;
792 } 880 }
793 881
794 /* Proc */ 882 /* Proc */
795 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 883 if (flags & TRACE_GRAPH_PRINT_PROC) {
796 ret = print_graph_proc(s, ent->pid); 884 ret = print_graph_proc(s, ent->pid);
797 if (ret == TRACE_TYPE_PARTIAL_LINE) 885 if (ret == TRACE_TYPE_PARTIAL_LINE)
798 return TRACE_TYPE_PARTIAL_LINE; 886 return TRACE_TYPE_PARTIAL_LINE;
@@ -812,9 +900,111 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
812 return 0; 900 return 0;
813} 901}
814 902
903/*
904 * Entry check for irq code
905 *
906 * returns 1 if
907 * - we are inside irq code
908 * - we just extered irq code
909 *
910 * retunns 0 if
911 * - funcgraph-interrupts option is set
912 * - we are not inside irq code
913 */
914static int
915check_irq_entry(struct trace_iterator *iter, u32 flags,
916 unsigned long addr, int depth)
917{
918 int cpu = iter->cpu;
919 int *depth_irq;
920 struct fgraph_data *data = iter->private;
921
922 /*
923 * If we are either displaying irqs, or we got called as
924 * a graph event and private data does not exist,
925 * then we bypass the irq check.
926 */
927 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
928 (!data))
929 return 0;
930
931 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
932
933 /*
934 * We are inside the irq code
935 */
936 if (*depth_irq >= 0)
937 return 1;
938
939 if ((addr < (unsigned long)__irqentry_text_start) ||
940 (addr >= (unsigned long)__irqentry_text_end))
941 return 0;
942
943 /*
944 * We are entering irq code.
945 */
946 *depth_irq = depth;
947 return 1;
948}
949
950/*
951 * Return check for irq code
952 *
953 * returns 1 if
954 * - we are inside irq code
955 * - we just left irq code
956 *
957 * returns 0 if
958 * - funcgraph-interrupts option is set
959 * - we are not inside irq code
960 */
961static int
962check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
963{
964 int cpu = iter->cpu;
965 int *depth_irq;
966 struct fgraph_data *data = iter->private;
967
968 /*
969 * If we are either displaying irqs, or we got called as
970 * a graph event and private data does not exist,
971 * then we bypass the irq check.
972 */
973 if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
974 (!data))
975 return 0;
976
977 depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
978
979 /*
980 * We are not inside the irq code.
981 */
982 if (*depth_irq == -1)
983 return 0;
984
985 /*
986 * We are inside the irq code, and this is returning entry.
987 * Let's not trace it and clear the entry depth, since
988 * we are out of irq code.
989 *
990 * This condition ensures that we 'leave the irq code' once
991 * we are out of the entry depth. Thus protecting us from
992 * the RETURN entry loss.
993 */
994 if (*depth_irq >= depth) {
995 *depth_irq = -1;
996 return 1;
997 }
998
999 /*
1000 * We are inside the irq code, and this is not the entry.
1001 */
1002 return 1;
1003}
1004
815static enum print_line_t 1005static enum print_line_t
816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 1006print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
817 struct trace_iterator *iter) 1007 struct trace_iterator *iter, u32 flags)
818{ 1008{
819 struct fgraph_data *data = iter->private; 1009 struct fgraph_data *data = iter->private;
820 struct ftrace_graph_ent *call = &field->graph_ent; 1010 struct ftrace_graph_ent *call = &field->graph_ent;
@@ -822,14 +1012,17 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
822 static enum print_line_t ret; 1012 static enum print_line_t ret;
823 int cpu = iter->cpu; 1013 int cpu = iter->cpu;
824 1014
825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 1015 if (check_irq_entry(iter, flags, call->func, call->depth))
1016 return TRACE_TYPE_HANDLED;
1017
1018 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
826 return TRACE_TYPE_PARTIAL_LINE; 1019 return TRACE_TYPE_PARTIAL_LINE;
827 1020
828 leaf_ret = get_return_for_leaf(iter, field); 1021 leaf_ret = get_return_for_leaf(iter, field);
829 if (leaf_ret) 1022 if (leaf_ret)
830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s); 1023 ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
831 else 1024 else
832 ret = print_graph_entry_nested(iter, field, s, cpu); 1025 ret = print_graph_entry_nested(iter, field, s, cpu, flags);
833 1026
834 if (data) { 1027 if (data) {
835 /* 1028 /*
@@ -848,37 +1041,50 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
848 1041
849static enum print_line_t 1042static enum print_line_t
850print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 1043print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
851 struct trace_entry *ent, struct trace_iterator *iter) 1044 struct trace_entry *ent, struct trace_iterator *iter,
1045 u32 flags)
852{ 1046{
853 unsigned long long duration = trace->rettime - trace->calltime; 1047 unsigned long long duration = trace->rettime - trace->calltime;
854 struct fgraph_data *data = iter->private; 1048 struct fgraph_data *data = iter->private;
855 pid_t pid = ent->pid; 1049 pid_t pid = ent->pid;
856 int cpu = iter->cpu; 1050 int cpu = iter->cpu;
1051 int func_match = 1;
857 int ret; 1052 int ret;
858 int i; 1053 int i;
859 1054
1055 if (check_irq_return(iter, flags, trace->depth))
1056 return TRACE_TYPE_HANDLED;
1057
860 if (data) { 1058 if (data) {
1059 struct fgraph_cpu_data *cpu_data;
861 int cpu = iter->cpu; 1060 int cpu = iter->cpu;
862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1061
1062 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
863 1063
864 /* 1064 /*
865 * Comments display at + 1 to depth. This is the 1065 * Comments display at + 1 to depth. This is the
866 * return from a function, we now want the comments 1066 * return from a function, we now want the comments
867 * to display at the same level of the bracket. 1067 * to display at the same level of the bracket.
868 */ 1068 */
869 *depth = trace->depth - 1; 1069 cpu_data->depth = trace->depth - 1;
1070
1071 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
1072 if (cpu_data->enter_funcs[trace->depth] != trace->func)
1073 func_match = 0;
1074 cpu_data->enter_funcs[trace->depth] = 0;
1075 }
870 } 1076 }
871 1077
872 if (print_graph_prologue(iter, s, 0, 0)) 1078 if (print_graph_prologue(iter, s, 0, 0, flags))
873 return TRACE_TYPE_PARTIAL_LINE; 1079 return TRACE_TYPE_PARTIAL_LINE;
874 1080
875 /* Overhead */ 1081 /* Overhead */
876 ret = print_graph_overhead(duration, s); 1082 ret = print_graph_overhead(duration, s, flags);
877 if (!ret) 1083 if (!ret)
878 return TRACE_TYPE_PARTIAL_LINE; 1084 return TRACE_TYPE_PARTIAL_LINE;
879 1085
880 /* Duration */ 1086 /* Duration */
881 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 1087 if (flags & TRACE_GRAPH_PRINT_DURATION) {
882 ret = print_graph_duration(duration, s); 1088 ret = print_graph_duration(duration, s);
883 if (ret == TRACE_TYPE_PARTIAL_LINE) 1089 if (ret == TRACE_TYPE_PARTIAL_LINE)
884 return TRACE_TYPE_PARTIAL_LINE; 1090 return TRACE_TYPE_PARTIAL_LINE;
@@ -891,19 +1097,32 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
891 return TRACE_TYPE_PARTIAL_LINE; 1097 return TRACE_TYPE_PARTIAL_LINE;
892 } 1098 }
893 1099
894 ret = trace_seq_printf(s, "}\n"); 1100 /*
895 if (!ret) 1101 * If the return function does not have a matching entry,
896 return TRACE_TYPE_PARTIAL_LINE; 1102 * then the entry was lost. Instead of just printing
1103 * the '}' and letting the user guess what function this
1104 * belongs to, write out the function name.
1105 */
1106 if (func_match) {
1107 ret = trace_seq_printf(s, "}\n");
1108 if (!ret)
1109 return TRACE_TYPE_PARTIAL_LINE;
1110 } else {
1111 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
1112 if (!ret)
1113 return TRACE_TYPE_PARTIAL_LINE;
1114 }
897 1115
898 /* Overrun */ 1116 /* Overrun */
899 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 1117 if (flags & TRACE_GRAPH_PRINT_OVERRUN) {
900 ret = trace_seq_printf(s, " (Overruns: %lu)\n", 1118 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
901 trace->overrun); 1119 trace->overrun);
902 if (!ret) 1120 if (!ret)
903 return TRACE_TYPE_PARTIAL_LINE; 1121 return TRACE_TYPE_PARTIAL_LINE;
904 } 1122 }
905 1123
906 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); 1124 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET,
1125 cpu, pid, flags);
907 if (ret == TRACE_TYPE_PARTIAL_LINE) 1126 if (ret == TRACE_TYPE_PARTIAL_LINE)
908 return TRACE_TYPE_PARTIAL_LINE; 1127 return TRACE_TYPE_PARTIAL_LINE;
909 1128
@@ -911,8 +1130,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
911} 1130}
912 1131
913static enum print_line_t 1132static enum print_line_t
914print_graph_comment(struct trace_seq *s, struct trace_entry *ent, 1133print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
915 struct trace_iterator *iter) 1134 struct trace_iterator *iter, u32 flags)
916{ 1135{
917 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1136 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
918 struct fgraph_data *data = iter->private; 1137 struct fgraph_data *data = iter->private;
@@ -924,16 +1143,16 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
924 if (data) 1143 if (data)
925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; 1144 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
926 1145
927 if (print_graph_prologue(iter, s, 0, 0)) 1146 if (print_graph_prologue(iter, s, 0, 0, flags))
928 return TRACE_TYPE_PARTIAL_LINE; 1147 return TRACE_TYPE_PARTIAL_LINE;
929 1148
930 /* No overhead */ 1149 /* No overhead */
931 ret = print_graph_overhead(-1, s); 1150 ret = print_graph_overhead(-1, s, flags);
932 if (!ret) 1151 if (!ret)
933 return TRACE_TYPE_PARTIAL_LINE; 1152 return TRACE_TYPE_PARTIAL_LINE;
934 1153
935 /* No time */ 1154 /* No time */
936 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 1155 if (flags & TRACE_GRAPH_PRINT_DURATION) {
937 ret = trace_seq_printf(s, " | "); 1156 ret = trace_seq_printf(s, " | ");
938 if (!ret) 1157 if (!ret)
939 return TRACE_TYPE_PARTIAL_LINE; 1158 return TRACE_TYPE_PARTIAL_LINE;
@@ -968,7 +1187,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
968 if (!event) 1187 if (!event)
969 return TRACE_TYPE_UNHANDLED; 1188 return TRACE_TYPE_UNHANDLED;
970 1189
971 ret = event->trace(iter, sym_flags); 1190 ret = event->funcs->trace(iter, sym_flags, event);
972 if (ret != TRACE_TYPE_HANDLED) 1191 if (ret != TRACE_TYPE_HANDLED)
973 return ret; 1192 return ret;
974 } 1193 }
@@ -988,7 +1207,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
988 1207
989 1208
990enum print_line_t 1209enum print_line_t
991print_graph_function(struct trace_iterator *iter) 1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags)
992{ 1211{
993 struct ftrace_graph_ent_entry *field; 1212 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private; 1213 struct fgraph_data *data = iter->private;
@@ -1009,7 +1228,7 @@ print_graph_function(struct trace_iterator *iter)
1009 if (data && data->failed) { 1228 if (data && data->failed) {
1010 field = &data->ent; 1229 field = &data->ent;
1011 iter->cpu = data->cpu; 1230 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter); 1231 ret = print_graph_entry(field, s, iter, flags);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { 1232 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; 1233 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME; 1234 ret = TRACE_TYPE_NO_CONSUME;
@@ -1029,32 +1248,61 @@ print_graph_function(struct trace_iterator *iter)
1029 struct ftrace_graph_ent_entry saved; 1248 struct ftrace_graph_ent_entry saved;
1030 trace_assign_type(field, entry); 1249 trace_assign_type(field, entry);
1031 saved = *field; 1250 saved = *field;
1032 return print_graph_entry(&saved, s, iter); 1251 return print_graph_entry(&saved, s, iter, flags);
1033 } 1252 }
1034 case TRACE_GRAPH_RET: { 1253 case TRACE_GRAPH_RET: {
1035 struct ftrace_graph_ret_entry *field; 1254 struct ftrace_graph_ret_entry *field;
1036 trace_assign_type(field, entry); 1255 trace_assign_type(field, entry);
1037 return print_graph_return(&field->ret, s, entry, iter); 1256 return print_graph_return(&field->ret, s, entry, iter, flags);
1038 } 1257 }
1258 case TRACE_STACK:
1259 case TRACE_FN:
1260 /* dont trace stack and functions as comments */
1261 return TRACE_TYPE_UNHANDLED;
1262
1039 default: 1263 default:
1040 return print_graph_comment(s, entry, iter); 1264 return print_graph_comment(s, entry, iter, flags);
1041 } 1265 }
1042 1266
1043 return TRACE_TYPE_HANDLED; 1267 return TRACE_TYPE_HANDLED;
1044} 1268}
1045 1269
1046static void print_lat_header(struct seq_file *s) 1270static enum print_line_t
1271print_graph_function(struct trace_iterator *iter)
1272{
1273 return __print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1285}
1286
1287static enum print_line_t
1288print_graph_function_event(struct trace_iterator *iter, int flags,
1289 struct trace_event *event)
1290{
1291 return print_graph_function(iter);
1292}
1293
1294static void print_lat_header(struct seq_file *s, u32 flags)
1047{ 1295{
1048 static const char spaces[] = " " /* 16 spaces */ 1296 static const char spaces[] = " " /* 16 spaces */
1049 " " /* 4 spaces */ 1297 " " /* 4 spaces */
1050 " "; /* 17 spaces */ 1298 " "; /* 17 spaces */
1051 int size = 0; 1299 int size = 0;
1052 1300
1053 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1301 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1054 size += 16; 1302 size += 16;
1055 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1303 if (flags & TRACE_GRAPH_PRINT_CPU)
1056 size += 4; 1304 size += 4;
1057 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1305 if (flags & TRACE_GRAPH_PRINT_PROC)
1058 size += 17; 1306 size += 17;
1059 1307
1060 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); 1308 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces);
@@ -1065,43 +1313,65 @@ static void print_lat_header(struct seq_file *s)
1065 seq_printf(s, "#%.*s|||| / \n", size, spaces); 1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1066} 1314}
1067 1315
1068static void print_graph_headers(struct seq_file *s) 1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1069{ 1317{
1070 int lat = trace_flags & TRACE_ITER_LATENCY_FMT; 1318 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1071 1319
1072 if (lat) 1320 if (lat)
1073 print_lat_header(s); 1321 print_lat_header(s, flags);
1074 1322
1075 /* 1st line */ 1323 /* 1st line */
1076 seq_printf(s, "#"); 1324 seq_printf(s, "#");
1077 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1325 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1078 seq_printf(s, " TIME "); 1326 seq_printf(s, " TIME ");
1079 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1327 if (flags & TRACE_GRAPH_PRINT_CPU)
1080 seq_printf(s, " CPU"); 1328 seq_printf(s, " CPU");
1081 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1329 if (flags & TRACE_GRAPH_PRINT_PROC)
1082 seq_printf(s, " TASK/PID "); 1330 seq_printf(s, " TASK/PID ");
1083 if (lat) 1331 if (lat)
1084 seq_printf(s, "|||||"); 1332 seq_printf(s, "|||||");
1085 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1333 if (flags & TRACE_GRAPH_PRINT_DURATION)
1086 seq_printf(s, " DURATION "); 1334 seq_printf(s, " DURATION ");
1087 seq_printf(s, " FUNCTION CALLS\n"); 1335 seq_printf(s, " FUNCTION CALLS\n");
1088 1336
1089 /* 2nd line */ 1337 /* 2nd line */
1090 seq_printf(s, "#"); 1338 seq_printf(s, "#");
1091 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1339 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1092 seq_printf(s, " | "); 1340 seq_printf(s, " | ");
1093 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1341 if (flags & TRACE_GRAPH_PRINT_CPU)
1094 seq_printf(s, " | "); 1342 seq_printf(s, " | ");
1095 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1343 if (flags & TRACE_GRAPH_PRINT_PROC)
1096 seq_printf(s, " | | "); 1344 seq_printf(s, " | | ");
1097 if (lat) 1345 if (lat)
1098 seq_printf(s, "|||||"); 1346 seq_printf(s, "|||||");
1099 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1347 if (flags & TRACE_GRAPH_PRINT_DURATION)
1100 seq_printf(s, " | | "); 1348 seq_printf(s, " | | ");
1101 seq_printf(s, " | | | |\n"); 1349 seq_printf(s, " | | | |\n");
1102} 1350}
1103 1351
1104static void graph_trace_open(struct trace_iterator *iter) 1352void print_graph_headers(struct seq_file *s)
1353{
1354 print_graph_headers_flags(s, tracer_flags.val);
1355}
1356
1357void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{
1359 struct trace_iterator *iter = s->private;
1360
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter))
1364 return;
1365
1366 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION;
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370
1371 __print_graph_headers_flags(s, flags);
1372}
1373
1374void graph_trace_open(struct trace_iterator *iter)
1105{ 1375{
1106 /* pid and depth on the last trace processed */ 1376 /* pid and depth on the last trace processed */
1107 struct fgraph_data *data; 1377 struct fgraph_data *data;
@@ -1121,9 +1391,12 @@ static void graph_trace_open(struct trace_iterator *iter)
1121 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); 1391 pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
1122 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 1392 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
1123 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); 1393 int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore);
1394 int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);
1395
1124 *pid = -1; 1396 *pid = -1;
1125 *depth = 0; 1397 *depth = 0;
1126 *ignore = 0; 1398 *ignore = 0;
1399 *depth_irq = -1;
1127 } 1400 }
1128 1401
1129 iter->private = data; 1402 iter->private = data;
@@ -1136,7 +1409,7 @@ static void graph_trace_open(struct trace_iterator *iter)
1136 pr_warning("function graph tracer: not enough memory\n"); 1409 pr_warning("function graph tracer: not enough memory\n");
1137} 1410}
1138 1411
1139static void graph_trace_close(struct trace_iterator *iter) 1412void graph_trace_close(struct trace_iterator *iter)
1140{ 1413{
1141 struct fgraph_data *data = iter->private; 1414 struct fgraph_data *data = iter->private;
1142 1415
@@ -1146,6 +1419,28 @@ static void graph_trace_close(struct trace_iterator *iter)
1146 } 1419 }
1147} 1420}
1148 1421
1422static int func_graph_set_flag(u32 old_flags, u32 bit, int set)
1423{
1424 if (bit == TRACE_GRAPH_PRINT_IRQS)
1425 ftrace_graph_skip_irqs = !set;
1426
1427 return 0;
1428}
1429
1430static struct trace_event_functions graph_functions = {
1431 .trace = print_graph_function_event,
1432};
1433
1434static struct trace_event graph_trace_entry_event = {
1435 .type = TRACE_GRAPH_ENT,
1436 .funcs = &graph_functions,
1437};
1438
1439static struct trace_event graph_trace_ret_event = {
1440 .type = TRACE_GRAPH_RET,
1441 .funcs = &graph_functions
1442};
1443
1149static struct tracer graph_trace __read_mostly = { 1444static struct tracer graph_trace __read_mostly = {
1150 .name = "function_graph", 1445 .name = "function_graph",
1151 .open = graph_trace_open, 1446 .open = graph_trace_open,
@@ -1158,6 +1453,7 @@ static struct tracer graph_trace __read_mostly = {
1158 .print_line = print_graph_function, 1453 .print_line = print_graph_function,
1159 .print_header = print_graph_headers, 1454 .print_header = print_graph_headers,
1160 .flags = &tracer_flags, 1455 .flags = &tracer_flags,
1456 .set_flag = func_graph_set_flag,
1161#ifdef CONFIG_FTRACE_SELFTEST 1457#ifdef CONFIG_FTRACE_SELFTEST
1162 .selftest = trace_selftest_startup_function_graph, 1458 .selftest = trace_selftest_startup_function_graph,
1163#endif 1459#endif
@@ -1167,6 +1463,16 @@ static __init int init_graph_trace(void)
1167{ 1463{
1168 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1464 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1169 1465
1466 if (!register_ftrace_event(&graph_trace_entry_event)) {
1467 pr_warning("Warning: could not register graph trace events\n");
1468 return 1;
1469 }
1470
1471 if (!register_ftrace_event(&graph_trace_ret_event)) {
1472 pr_warning("Warning: could not register graph trace events\n");
1473 return 1;
1474 }
1475
1170 return register_tracer(&graph_trace); 1476 return register_tracer(&graph_trace);
1171} 1477}
1172 1478
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index 7b97000745f5..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * h/w branch tracer for x86 based on BTS
3 *
4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */
7#include <linux/kallsyms.h>
8#include <linux/debugfs.h>
9#include <linux/ftrace.h>
10#include <linux/module.h>
11#include <linux/cpu.h>
12#include <linux/smp.h>
13#include <linux/fs.h>
14
15#include <asm/ds.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20
21#define BTS_BUFFER_SIZE (1 << 13)
22
23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25
26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27
28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
30static struct trace_array *hw_branch_trace __read_mostly;
31
32
33static void bts_trace_init_cpu(int cpu)
34{
35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
39
40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
41 per_cpu(hwb_tracer, cpu) = NULL;
42}
43
44static int bts_trace_init(struct trace_array *tr)
45{
46 int cpu;
47
48 hw_branch_trace = tr;
49 trace_hw_branches_enabled = 0;
50
51 get_online_cpus();
52 for_each_online_cpu(cpu) {
53 bts_trace_init_cpu(cpu);
54
55 if (likely(per_cpu(hwb_tracer, cpu)))
56 trace_hw_branches_enabled = 1;
57 }
58 trace_hw_branches_suspended = 0;
59 put_online_cpus();
60
61 /* If we could not enable tracing on a single cpu, we fail. */
62 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
63}
64
65static void bts_trace_reset(struct trace_array *tr)
66{
67 int cpu;
68
69 get_online_cpus();
70 for_each_online_cpu(cpu) {
71 if (likely(per_cpu(hwb_tracer, cpu))) {
72 ds_release_bts(per_cpu(hwb_tracer, cpu));
73 per_cpu(hwb_tracer, cpu) = NULL;
74 }
75 }
76 trace_hw_branches_enabled = 0;
77 trace_hw_branches_suspended = 0;
78 put_online_cpus();
79}
80
81static void bts_trace_start(struct trace_array *tr)
82{
83 int cpu;
84
85 get_online_cpus();
86 for_each_online_cpu(cpu)
87 if (likely(per_cpu(hwb_tracer, cpu)))
88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
89 trace_hw_branches_suspended = 0;
90 put_online_cpus();
91}
92
93static void bts_trace_stop(struct trace_array *tr)
94{
95 int cpu;
96
97 get_online_cpus();
98 for_each_online_cpu(cpu)
99 if (likely(per_cpu(hwb_tracer, cpu)))
100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
101 trace_hw_branches_suspended = 1;
102 put_online_cpus();
103}
104
105static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
106 unsigned long action, void *hcpu)
107{
108 int cpu = (long)hcpu;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 /* The notification is sent with interrupts enabled. */
114 if (trace_hw_branches_enabled) {
115 bts_trace_init_cpu(cpu);
116
117 if (trace_hw_branches_suspended &&
118 likely(per_cpu(hwb_tracer, cpu)))
119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
120 }
121 break;
122
123 case CPU_DOWN_PREPARE:
124 /* The notification is sent with interrupts enabled. */
125 if (likely(per_cpu(hwb_tracer, cpu))) {
126 ds_release_bts(per_cpu(hwb_tracer, cpu));
127 per_cpu(hwb_tracer, cpu) = NULL;
128 }
129 }
130
131 return NOTIFY_DONE;
132}
133
134static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
135 .notifier_call = bts_hotcpu_handler
136};
137
138static void bts_trace_print_header(struct seq_file *m)
139{
140 seq_puts(m, "# CPU# TO <- FROM\n");
141}
142
143static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
144{
145 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
146 struct trace_entry *entry = iter->ent;
147 struct trace_seq *seq = &iter->seq;
148 struct hw_branch_entry *it;
149
150 trace_assign_type(it, entry);
151
152 if (entry->type == TRACE_HW_BRANCHES) {
153 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
154 seq_print_ip_sym(seq, it->to, symflags) &&
155 trace_seq_printf(seq, "\t <- ") &&
156 seq_print_ip_sym(seq, it->from, symflags) &&
157 trace_seq_printf(seq, "\n"))
158 return TRACE_TYPE_HANDLED;
159 return TRACE_TYPE_PARTIAL_LINE;
160 }
161 return TRACE_TYPE_UNHANDLED;
162}
163
164void trace_hw_branch(u64 from, u64 to)
165{
166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
170 struct hw_branch_entry *entry;
171 unsigned long irq1;
172 int cpu;
173
174 if (unlikely(!tr))
175 return;
176
177 if (unlikely(!trace_hw_branches_enabled))
178 return;
179
180 local_irq_save(irq1);
181 cpu = raw_smp_processor_id();
182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
183 goto out;
184
185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
187 sizeof(*entry), 0, 0);
188 if (!event)
189 goto out;
190 entry = ring_buffer_event_data(event);
191 tracing_generic_entry_update(&entry->ent, 0, from);
192 entry->ent.type = TRACE_HW_BRANCHES;
193 entry->from = from;
194 entry->to = to;
195 if (!filter_check_discard(call, entry, buf, event))
196 trace_buffer_unlock_commit(buf, event, 0, 0);
197
198 out:
199 atomic_dec(&tr->data[cpu]->disabled);
200 local_irq_restore(irq1);
201}
202
203static void trace_bts_at(const struct bts_trace *trace, void *at)
204{
205 struct bts_struct bts;
206 int err = 0;
207
208 WARN_ON_ONCE(!trace->read);
209 if (!trace->read)
210 return;
211
212 err = trace->read(this_tracer, at, &bts);
213 if (err < 0)
214 return;
215
216 switch (bts.qualifier) {
217 case BTS_BRANCH:
218 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
219 break;
220 }
221}
222
223/*
224 * Collect the trace on the current cpu and write it into the ftrace buffer.
225 *
226 * pre: tracing must be suspended on the current cpu
227 */
228static void trace_bts_cpu(void *arg)
229{
230 struct trace_array *tr = (struct trace_array *)arg;
231 const struct bts_trace *trace;
232 unsigned char *at;
233
234 if (unlikely(!tr))
235 return;
236
237 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
238 return;
239
240 if (unlikely(!this_tracer))
241 return;
242
243 trace = ds_read_bts(this_tracer);
244 if (!trace)
245 return;
246
247 for (at = trace->ds.top; (void *)at < trace->ds.end;
248 at += trace->ds.size)
249 trace_bts_at(trace, at);
250
251 for (at = trace->ds.begin; (void *)at < trace->ds.top;
252 at += trace->ds.size)
253 trace_bts_at(trace, at);
254}
255
256static void trace_bts_prepare(struct trace_iterator *iter)
257{
258 int cpu;
259
260 get_online_cpus();
261 for_each_online_cpu(cpu)
262 if (likely(per_cpu(hwb_tracer, cpu)))
263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
264 /*
265 * We need to collect the trace on the respective cpu since ftrace
266 * implicitly adds the record for the current cpu.
267 * Once that is more flexible, we could collect the data from any cpu.
268 */
269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
270
271 for_each_online_cpu(cpu)
272 if (likely(per_cpu(hwb_tracer, cpu)))
273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
274 put_online_cpus();
275}
276
277static void trace_bts_close(struct trace_iterator *iter)
278{
279 tracing_reset_online_cpus(iter->tr);
280}
281
282void trace_hw_branch_oops(void)
283{
284 if (this_tracer) {
285 ds_suspend_bts_noirq(this_tracer);
286 trace_bts_cpu(hw_branch_trace);
287 ds_resume_bts_noirq(this_tracer);
288 }
289}
290
291struct tracer bts_tracer __read_mostly =
292{
293 .name = "hw-branch-tracer",
294 .init = bts_trace_init,
295 .reset = bts_trace_reset,
296 .print_header = bts_trace_print_header,
297 .print_line = bts_trace_print_line,
298 .start = bts_trace_start,
299 .stop = bts_trace_stop,
300 .open = trace_bts_prepare,
301 .close = trace_bts_close,
302#ifdef CONFIG_FTRACE_SELFTEST
303 .selftest = trace_selftest_startup_hw_branches,
304#endif /* CONFIG_FTRACE_SELFTEST */
305};
306
307__init static int init_bts_trace(void)
308{
309 register_hotcpu_notifier(&bts_hotcpu_notifier);
310 return register_tracer(&bts_tracer);
311}
312device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2974bc7538c7..5cf8c602b880 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -34,6 +34,9 @@ static int trace_type __read_mostly;
34 34
35static int save_lat_flag; 35static int save_lat_flag;
36 36
37static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
38static int start_irqsoff_tracer(struct trace_array *tr, int graph);
39
37#ifdef CONFIG_PREEMPT_TRACER 40#ifdef CONFIG_PREEMPT_TRACER
38static inline int 41static inline int
39preempt_trace(void) 42preempt_trace(void)
@@ -55,6 +58,23 @@ irq_trace(void)
55# define irq_trace() (0) 58# define irq_trace() (0)
56#endif 59#endif
57 60
61#define TRACE_DISPLAY_GRAPH 1
62
63static struct tracer_opt trace_opts[] = {
64#ifdef CONFIG_FUNCTION_GRAPH_TRACER
65 /* display latency trace as call graph */
66 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
67#endif
68 { } /* Empty entry */
69};
70
71static struct tracer_flags tracer_flags = {
72 .val = 0,
73 .opts = trace_opts,
74};
75
76#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
77
58/* 78/*
59 * Sequence count - we record it when starting a measurement and 79 * Sequence count - we record it when starting a measurement and
60 * skip the latency if the sequence has changed - some other section 80 * skip the latency if the sequence has changed - some other section
@@ -67,14 +87,22 @@ static __cacheline_aligned_in_smp unsigned long max_sequence;
67 87
68#ifdef CONFIG_FUNCTION_TRACER 88#ifdef CONFIG_FUNCTION_TRACER
69/* 89/*
70 * irqsoff uses its own tracer function to keep the overhead down: 90 * Prologue for the preempt and irqs off function tracers.
91 *
92 * Returns 1 if it is OK to continue, and data->disabled is
93 * incremented.
94 * 0 if the trace is to be ignored, and data->disabled
95 * is kept the same.
96 *
97 * Note, this function is also used outside this ifdef but
98 * inside the #ifdef of the function graph tracer below.
99 * This is OK, since the function graph tracer is
100 * dependent on the function tracer.
71 */ 101 */
72static void 102static int func_prolog_dec(struct trace_array *tr,
73irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) 103 struct trace_array_cpu **data,
104 unsigned long *flags)
74{ 105{
75 struct trace_array *tr = irqsoff_trace;
76 struct trace_array_cpu *data;
77 unsigned long flags;
78 long disabled; 106 long disabled;
79 int cpu; 107 int cpu;
80 108
@@ -86,18 +114,38 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
86 */ 114 */
87 cpu = raw_smp_processor_id(); 115 cpu = raw_smp_processor_id();
88 if (likely(!per_cpu(tracing_cpu, cpu))) 116 if (likely(!per_cpu(tracing_cpu, cpu)))
89 return; 117 return 0;
90 118
91 local_save_flags(flags); 119 local_save_flags(*flags);
92 /* slight chance to get a false positive on tracing_cpu */ 120 /* slight chance to get a false positive on tracing_cpu */
93 if (!irqs_disabled_flags(flags)) 121 if (!irqs_disabled_flags(*flags))
94 return; 122 return 0;
95 123
96 data = tr->data[cpu]; 124 *data = tr->data[cpu];
97 disabled = atomic_inc_return(&data->disabled); 125 disabled = atomic_inc_return(&(*data)->disabled);
98 126
99 if (likely(disabled == 1)) 127 if (likely(disabled == 1))
100 trace_function(tr, ip, parent_ip, flags, preempt_count()); 128 return 1;
129
130 atomic_dec(&(*data)->disabled);
131
132 return 0;
133}
134
135/*
136 * irqsoff uses its own tracer function to keep the overhead down:
137 */
138static void
139irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
140{
141 struct trace_array *tr = irqsoff_trace;
142 struct trace_array_cpu *data;
143 unsigned long flags;
144
145 if (!func_prolog_dec(tr, &data, &flags))
146 return;
147
148 trace_function(tr, ip, parent_ip, flags, preempt_count());
101 149
102 atomic_dec(&data->disabled); 150 atomic_dec(&data->disabled);
103} 151}
@@ -108,6 +156,132 @@ static struct ftrace_ops trace_ops __read_mostly =
108}; 156};
109#endif /* CONFIG_FUNCTION_TRACER */ 157#endif /* CONFIG_FUNCTION_TRACER */
110 158
159#ifdef CONFIG_FUNCTION_GRAPH_TRACER
160static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
161{
162 int cpu;
163
164 if (!(bit & TRACE_DISPLAY_GRAPH))
165 return -EINVAL;
166
167 if (!(is_graph() ^ set))
168 return 0;
169
170 stop_irqsoff_tracer(irqsoff_trace, !set);
171
172 for_each_possible_cpu(cpu)
173 per_cpu(tracing_cpu, cpu) = 0;
174
175 tracing_max_latency = 0;
176 tracing_reset_online_cpus(irqsoff_trace);
177
178 return start_irqsoff_tracer(irqsoff_trace, set);
179}
180
181static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = irqsoff_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int ret;
187 int pc;
188
189 if (!func_prolog_dec(tr, &data, &flags))
190 return 0;
191
192 pc = preempt_count();
193 ret = __trace_graph_entry(tr, trace, flags, pc);
194 atomic_dec(&data->disabled);
195
196 return ret;
197}
198
199static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = irqsoff_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_dec(tr, &data, &flags))
207 return;
208
209 pc = preempt_count();
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212}
213
214static void irqsoff_trace_open(struct trace_iterator *iter)
215{
216 if (is_graph())
217 graph_trace_open(iter);
218
219}
220
221static void irqsoff_trace_close(struct trace_iterator *iter)
222{
223 if (iter->private)
224 graph_trace_close(iter);
225}
226
227#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
228 TRACE_GRAPH_PRINT_PROC)
229
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{
232 /*
233 * In graph mode call the graph tracer output function,
234 * otherwise go with the TRACE_FN event handler
235 */
236 if (is_graph())
237 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
238
239 return TRACE_TYPE_UNHANDLED;
240}
241
242static void irqsoff_print_header(struct seq_file *s)
243{
244 if (is_graph())
245 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
246 else
247 trace_default_header(s);
248}
249
250static void
251__trace_function(struct trace_array *tr,
252 unsigned long ip, unsigned long parent_ip,
253 unsigned long flags, int pc)
254{
255 if (is_graph())
256 trace_graph_function(tr, ip, parent_ip, flags, pc);
257 else
258 trace_function(tr, ip, parent_ip, flags, pc);
259}
260
261#else
262#define __trace_function trace_function
263
264static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
280static void irqsoff_print_header(struct seq_file *s) { }
281static void irqsoff_trace_open(struct trace_iterator *iter) { }
282static void irqsoff_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
111/* 285/*
112 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
113 */ 287 */
@@ -150,7 +324,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 324 if (!report_latency(delta))
151 goto out_unlock; 325 goto out_unlock;
152 326
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 327 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */ 328 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc); 329 __trace_stack(tr, flags, 5, pc);
156 330
@@ -172,7 +346,7 @@ out_unlock:
172out: 346out:
173 data->critical_sequence = max_sequence; 347 data->critical_sequence = max_sequence;
174 data->preempt_timestamp = ftrace_now(cpu); 348 data->preempt_timestamp = ftrace_now(cpu);
175 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 349 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
176} 350}
177 351
178static inline void 352static inline void
@@ -204,7 +378,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
204 378
205 local_save_flags(flags); 379 local_save_flags(flags);
206 380
207 trace_function(tr, ip, parent_ip, flags, preempt_count()); 381 __trace_function(tr, ip, parent_ip, flags, preempt_count());
208 382
209 per_cpu(tracing_cpu, cpu) = 1; 383 per_cpu(tracing_cpu, cpu) = 1;
210 384
@@ -238,7 +412,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
238 atomic_inc(&data->disabled); 412 atomic_inc(&data->disabled);
239 413
240 local_save_flags(flags); 414 local_save_flags(flags);
241 trace_function(tr, ip, parent_ip, flags, preempt_count()); 415 __trace_function(tr, ip, parent_ip, flags, preempt_count());
242 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 416 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
243 data->critical_start = 0; 417 data->critical_start = 0;
244 atomic_dec(&data->disabled); 418 atomic_dec(&data->disabled);
@@ -347,19 +521,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
347} 521}
348#endif /* CONFIG_PREEMPT_TRACER */ 522#endif /* CONFIG_PREEMPT_TRACER */
349 523
350static void start_irqsoff_tracer(struct trace_array *tr) 524static int start_irqsoff_tracer(struct trace_array *tr, int graph)
351{ 525{
352 register_ftrace_function(&trace_ops); 526 int ret = 0;
353 if (tracing_is_enabled()) 527
528 if (!graph)
529 ret = register_ftrace_function(&trace_ops);
530 else
531 ret = register_ftrace_graph(&irqsoff_graph_return,
532 &irqsoff_graph_entry);
533
534 if (!ret && tracing_is_enabled())
354 tracer_enabled = 1; 535 tracer_enabled = 1;
355 else 536 else
356 tracer_enabled = 0; 537 tracer_enabled = 0;
538
539 return ret;
357} 540}
358 541
359static void stop_irqsoff_tracer(struct trace_array *tr) 542static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
360{ 543{
361 tracer_enabled = 0; 544 tracer_enabled = 0;
362 unregister_ftrace_function(&trace_ops); 545
546 if (!graph)
547 unregister_ftrace_function(&trace_ops);
548 else
549 unregister_ftrace_graph();
363} 550}
364 551
365static void __irqsoff_tracer_init(struct trace_array *tr) 552static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -372,12 +559,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
372 /* make sure that the tracer is visible */ 559 /* make sure that the tracer is visible */
373 smp_wmb(); 560 smp_wmb();
374 tracing_reset_online_cpus(tr); 561 tracing_reset_online_cpus(tr);
375 start_irqsoff_tracer(tr); 562
563 if (start_irqsoff_tracer(tr, is_graph()))
564 printk(KERN_ERR "failed to start irqsoff tracer\n");
376} 565}
377 566
378static void irqsoff_tracer_reset(struct trace_array *tr) 567static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 568{
380 stop_irqsoff_tracer(tr); 569 stop_irqsoff_tracer(tr, is_graph());
381 570
382 if (!save_lat_flag) 571 if (!save_lat_flag)
383 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 572 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
@@ -409,9 +598,16 @@ static struct tracer irqsoff_tracer __read_mostly =
409 .start = irqsoff_tracer_start, 598 .start = irqsoff_tracer_start,
410 .stop = irqsoff_tracer_stop, 599 .stop = irqsoff_tracer_stop,
411 .print_max = 1, 600 .print_max = 1,
601 .print_header = irqsoff_print_header,
602 .print_line = irqsoff_print_line,
603 .flags = &tracer_flags,
604 .set_flag = irqsoff_set_flag,
412#ifdef CONFIG_FTRACE_SELFTEST 605#ifdef CONFIG_FTRACE_SELFTEST
413 .selftest = trace_selftest_startup_irqsoff, 606 .selftest = trace_selftest_startup_irqsoff,
414#endif 607#endif
608 .open = irqsoff_trace_open,
609 .close = irqsoff_trace_close,
610 .use_max_tr = 1,
415}; 611};
416# define register_irqsoff(trace) register_tracer(&trace) 612# define register_irqsoff(trace) register_tracer(&trace)
417#else 613#else
@@ -435,9 +631,16 @@ static struct tracer preemptoff_tracer __read_mostly =
435 .start = irqsoff_tracer_start, 631 .start = irqsoff_tracer_start,
436 .stop = irqsoff_tracer_stop, 632 .stop = irqsoff_tracer_stop,
437 .print_max = 1, 633 .print_max = 1,
634 .print_header = irqsoff_print_header,
635 .print_line = irqsoff_print_line,
636 .flags = &tracer_flags,
637 .set_flag = irqsoff_set_flag,
438#ifdef CONFIG_FTRACE_SELFTEST 638#ifdef CONFIG_FTRACE_SELFTEST
439 .selftest = trace_selftest_startup_preemptoff, 639 .selftest = trace_selftest_startup_preemptoff,
440#endif 640#endif
641 .open = irqsoff_trace_open,
642 .close = irqsoff_trace_close,
643 .use_max_tr = 1,
441}; 644};
442# define register_preemptoff(trace) register_tracer(&trace) 645# define register_preemptoff(trace) register_tracer(&trace)
443#else 646#else
@@ -463,9 +666,16 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
463 .start = irqsoff_tracer_start, 666 .start = irqsoff_tracer_start,
464 .stop = irqsoff_tracer_stop, 667 .stop = irqsoff_tracer_stop,
465 .print_max = 1, 668 .print_max = 1,
669 .print_header = irqsoff_print_header,
670 .print_line = irqsoff_print_line,
671 .flags = &tracer_flags,
672 .set_flag = irqsoff_set_flag,
466#ifdef CONFIG_FTRACE_SELFTEST 673#ifdef CONFIG_FTRACE_SELFTEST
467 .selftest = trace_selftest_startup_preemptirqsoff, 674 .selftest = trace_selftest_startup_preemptirqsoff,
468#endif 675#endif
676 .open = irqsoff_trace_open,
677 .close = irqsoff_trace_close,
678 .use_max_tr = 1,
469}; 679};
470 680
471# define register_preemptirqsoff(trace) register_tracer(&trace) 681# define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
new file mode 100644
index 000000000000..3c5c5dfea0b3
--- /dev/null
+++ b/kernel/trace/trace_kdb.c
@@ -0,0 +1,135 @@
1/*
2 * kdb helper for dumping the ftrace buffer
3 *
4 * Copyright (C) 2010 Jason Wessel <jason.wessel@windriver.com>
5 *
6 * ftrace_dump_buf based on ftrace_dump:
7 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
8 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
9 *
10 */
11#include <linux/init.h>
12#include <linux/kgdb.h>
13#include <linux/kdb.h>
14#include <linux/ftrace.h>
15
16#include "trace.h"
17#include "trace_output.h"
18
19static void ftrace_dump_buf(int skip_lines, long cpu_file)
20{
21 /* use static because iter can be a bit big for the stack */
22 static struct trace_iterator iter;
23 unsigned int old_userobj;
24 int cnt = 0, cpu;
25
26 trace_init_global_iter(&iter);
27
28 for_each_tracing_cpu(cpu) {
29 atomic_inc(&iter.tr->data[cpu]->disabled);
30 }
31
32 old_userobj = trace_flags;
33
34 /* don't look at user memory in panic mode */
35 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
36
37 kdb_printf("Dumping ftrace buffer:\n");
38
39 /* reset all but tr, trace, and overruns */
40 memset(&iter.seq, 0,
41 sizeof(struct trace_iterator) -
42 offsetof(struct trace_iterator, seq));
43 iter.iter_flags |= TRACE_FILE_LAT_FMT;
44 iter.pos = -1;
45
46 if (cpu_file == TRACE_PIPE_ALL_CPU) {
47 for_each_tracing_cpu(cpu) {
48 iter.buffer_iter[cpu] =
49 ring_buffer_read_prepare(iter.tr->buffer, cpu);
50 ring_buffer_read_start(iter.buffer_iter[cpu]);
51 tracing_iter_reset(&iter, cpu);
52 }
53 } else {
54 iter.cpu_file = cpu_file;
55 iter.buffer_iter[cpu_file] =
56 ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
57 ring_buffer_read_start(iter.buffer_iter[cpu_file]);
58 tracing_iter_reset(&iter, cpu_file);
59 }
60 if (!trace_empty(&iter))
61 trace_find_next_entry_inc(&iter);
62 while (!trace_empty(&iter)) {
63 if (!cnt)
64 kdb_printf("---------------------------------\n");
65 cnt++;
66
67 if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines)
68 print_trace_line(&iter);
69 if (!skip_lines)
70 trace_printk_seq(&iter.seq);
71 else
72 skip_lines--;
73 if (KDB_FLAG(CMD_INTERRUPT))
74 goto out;
75 }
76
77 if (!cnt)
78 kdb_printf(" (ftrace buffer empty)\n");
79 else
80 kdb_printf("---------------------------------\n");
81
82out:
83 trace_flags = old_userobj;
84
85 for_each_tracing_cpu(cpu) {
86 atomic_dec(&iter.tr->data[cpu]->disabled);
87 }
88
89 for_each_tracing_cpu(cpu)
90 if (iter.buffer_iter[cpu])
91 ring_buffer_read_finish(iter.buffer_iter[cpu]);
92}
93
94/*
95 * kdb_ftdump - Dump the ftrace log buffer
96 */
97static int kdb_ftdump(int argc, const char **argv)
98{
99 int skip_lines = 0;
100 long cpu_file;
101 char *cp;
102
103 if (argc > 2)
104 return KDB_ARGCOUNT;
105
106 if (argc) {
107 skip_lines = simple_strtol(argv[1], &cp, 0);
108 if (*cp)
109 skip_lines = 0;
110 }
111
112 if (argc == 2) {
113 cpu_file = simple_strtol(argv[2], &cp, 0);
114 if (*cp || cpu_file >= NR_CPUS || cpu_file < 0 ||
115 !cpu_online(cpu_file))
116 return KDB_BADINT;
117 } else {
118 cpu_file = TRACE_PIPE_ALL_CPU;
119 }
120
121 kdb_trap_printk++;
122 ftrace_dump_buf(skip_lines, cpu_file);
123 kdb_trap_printk--;
124
125 return 0;
126}
127
128static __init int kdb_ftrace_register(void)
129{
130 kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
131 "Dump ftrace log", 0, KDB_REPEAT_NONE);
132 return 0;
133}
134
135late_initcall(kdb_ftrace_register);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -29,6 +29,9 @@
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <linux/limits.h>
34#include <asm/bitsperlong.h>
32 35
33#include "trace.h" 36#include "trace.h"
34#include "trace_output.h" 37#include "trace_output.h"
@@ -36,11 +39,11 @@
36#define MAX_TRACE_ARGS 128 39#define MAX_TRACE_ARGS 128
37#define MAX_ARGSTR_LEN 63 40#define MAX_ARGSTR_LEN 63
38#define MAX_EVENT_NAME_LEN 64 41#define MAX_EVENT_NAME_LEN 64
42#define MAX_STRING_SIZE PATH_MAX
39#define KPROBE_EVENT_SYSTEM "kprobes" 43#define KPROBE_EVENT_SYSTEM "kprobes"
40 44
41/* Reserved field names */ 45/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip" 46#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip" 47#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func" 48#define FIELD_STRING_FUNC "__probe_func"
46 49
@@ -52,60 +55,214 @@ const char *reserved_field_names[] = {
52 "common_tgid", 55 "common_tgid",
53 "common_lock_depth", 56 "common_lock_depth",
54 FIELD_STRING_IP, 57 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP, 58 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC, 59 FIELD_STRING_FUNC,
58}; 60};
59 61
60struct fetch_func { 62/* Printing function type */
61 unsigned long (*func)(struct pt_regs *, void *); 63typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
62 void *data; 64 void *);
63}; 65#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
64 66#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
65static __kprobes unsigned long call_fetch(struct fetch_func *f, 67
66 struct pt_regs *regs) 68/* Printing in basic type function template */
69#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
70static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
71 const char *name, \
72 void *data, void *ent)\
73{ \
74 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
75} \
76static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
77
78DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
82DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
83DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
84DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
85DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
86
87/* data_rloc: data relative location, compatible with u32 */
88#define make_data_rloc(len, roffs) \
89 (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
90#define get_rloc_len(dl) ((u32)(dl) >> 16)
91#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
92
93static inline void *get_rloc_data(u32 *dl)
67{ 94{
68 return f->func(regs, f->data); 95 return (u8 *)dl + get_rloc_offs(*dl);
69} 96}
70 97
71/* fetch handlers */ 98/* For data_loc conversion */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs, 99static inline void *get_loc_data(u32 *dl, void *ent)
73 void *offset)
74{ 100{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset)); 101 return (u8 *)ent + get_rloc_offs(*dl);
76} 102}
77 103
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs, 104/*
79 void *num) 105 * Convert data_rloc to data_loc:
80{ 106 * data_rloc stores the offset from data_rloc itself, but data_loc
81 return regs_get_kernel_stack_nth(regs, 107 * stores the offset from event entry.
82 (unsigned int)((unsigned long)num)); 108 */
83} 109#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
110
111/* For defining macros, define string/string_size types */
112typedef u32 string;
113typedef u32 string_size;
84 114
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) 115/* Print type function for string type */
116static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
117 const char *name,
118 void *data, void *ent)
86{ 119{
87 unsigned long retval; 120 int len = *(u32 *)data >> 16;
88 121
89 if (probe_kernel_address(addr, retval)) 122 if (!len)
90 return 0; 123 return trace_seq_printf(s, " %s=(fault)", name);
91 return retval; 124 else
125 return trace_seq_printf(s, " %s=\"%s\"", name,
126 (const char *)get_loc_data(data, ent));
92} 127}
128static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
129
130/* Data fetch function type */
131typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
132
133struct fetch_param {
134 fetch_func_t fn;
135 void *data;
136};
93 137
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) 138static __kprobes void call_fetch(struct fetch_param *fprm,
139 struct pt_regs *regs, void *dest)
95{ 140{
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); 141 return fprm->fn(regs, fprm->data, dest);
97} 142}
98 143
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 144#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
100 void *dummy) 145/*
146 * Define macro for basic types - we don't need to define s* types, because
147 * we have to care only about bitwidth at recording time.
148 */
149#define DEFINE_BASIC_FETCH_FUNCS(method) \
150DEFINE_FETCH_##method(u8) \
151DEFINE_FETCH_##method(u16) \
152DEFINE_FETCH_##method(u32) \
153DEFINE_FETCH_##method(u64)
154
155#define CHECK_FETCH_FUNCS(method, fn) \
156 (((FETCH_FUNC_NAME(method, u8) == fn) || \
157 (FETCH_FUNC_NAME(method, u16) == fn) || \
158 (FETCH_FUNC_NAME(method, u32) == fn) || \
159 (FETCH_FUNC_NAME(method, u64) == fn) || \
160 (FETCH_FUNC_NAME(method, string) == fn) || \
161 (FETCH_FUNC_NAME(method, string_size) == fn)) \
162 && (fn != NULL))
163
164/* Data fetch function templates */
165#define DEFINE_FETCH_reg(type) \
166static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
167 void *offset, void *dest) \
168{ \
169 *(type *)dest = (type)regs_get_register(regs, \
170 (unsigned int)((unsigned long)offset)); \
171}
172DEFINE_BASIC_FETCH_FUNCS(reg)
173/* No string on the register */
174#define fetch_reg_string NULL
175#define fetch_reg_string_size NULL
176
177#define DEFINE_FETCH_stack(type) \
178static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
179 void *offset, void *dest) \
180{ \
181 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
182 (unsigned int)((unsigned long)offset)); \
183}
184DEFINE_BASIC_FETCH_FUNCS(stack)
185/* No string on the stack entry */
186#define fetch_stack_string NULL
187#define fetch_stack_string_size NULL
188
189#define DEFINE_FETCH_retval(type) \
190static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
191 void *dummy, void *dest) \
192{ \
193 *(type *)dest = (type)regs_return_value(regs); \
194}
195DEFINE_BASIC_FETCH_FUNCS(retval)
196/* No string on the retval */
197#define fetch_retval_string NULL
198#define fetch_retval_string_size NULL
199
200#define DEFINE_FETCH_memory(type) \
201static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
202 void *addr, void *dest) \
203{ \
204 type retval; \
205 if (probe_kernel_address(addr, retval)) \
206 *(type *)dest = 0; \
207 else \
208 *(type *)dest = retval; \
209}
210DEFINE_BASIC_FETCH_FUNCS(memory)
211/*
212 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
213 * length and relative data location.
214 */
215static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
216 void *addr, void *dest)
101{ 217{
102 return regs_return_value(regs); 218 long ret;
219 int maxlen = get_rloc_len(*(u32 *)dest);
220 u8 *dst = get_rloc_data(dest);
221 u8 *src = addr;
222 mm_segment_t old_fs = get_fs();
223 if (!maxlen)
224 return;
225 /*
226 * Try to get string again, since the string can be changed while
227 * probing.
228 */
229 set_fs(KERNEL_DS);
230 pagefault_disable();
231 do
232 ret = __copy_from_user_inatomic(dst++, src++, 1);
233 while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
234 dst[-1] = '\0';
235 pagefault_enable();
236 set_fs(old_fs);
237
238 if (ret < 0) { /* Failed to fetch string */
239 ((u8 *)get_rloc_data(dest))[0] = '\0';
240 *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
241 } else
242 *(u32 *)dest = make_data_rloc(src - (u8 *)addr,
243 get_rloc_offs(*(u32 *)dest));
103} 244}
104 245/* Return the length of string -- including null terminal byte */
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, 246static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
106 void *dummy) 247 void *addr, void *dest)
107{ 248{
108 return kernel_stack_pointer(regs); 249 int ret, len = 0;
250 u8 c;
251 mm_segment_t old_fs = get_fs();
252
253 set_fs(KERNEL_DS);
254 pagefault_disable();
255 do {
256 ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
257 len++;
258 } while (c && ret == 0 && len < MAX_STRING_SIZE);
259 pagefault_enable();
260 set_fs(old_fs);
261
262 if (ret < 0) /* Failed to check the length */
263 *(u32 *)dest = 0;
264 else
265 *(u32 *)dest = len;
109} 266}
110 267
111/* Memory fetching by symbol */ 268/* Memory fetching by symbol */
@@ -150,51 +307,168 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
150 return sc; 307 return sc;
151} 308}
152 309
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) 310#define DEFINE_FETCH_symbol(type) \
154{ 311static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
155 struct symbol_cache *sc = data; 312 void *data, void *dest) \
156 313{ \
157 if (sc->addr) 314 struct symbol_cache *sc = data; \
158 return fetch_memory(regs, (void *)sc->addr); 315 if (sc->addr) \
159 else 316 fetch_memory_##type(regs, (void *)sc->addr, dest); \
160 return 0; 317 else \
318 *(type *)dest = 0; \
161} 319}
320DEFINE_BASIC_FETCH_FUNCS(symbol)
321DEFINE_FETCH_symbol(string)
322DEFINE_FETCH_symbol(string_size)
162 323
163/* Special indirect memory access interface */ 324/* Dereference memory access function */
164struct indirect_fetch_data { 325struct deref_fetch_param {
165 struct fetch_func orig; 326 struct fetch_param orig;
166 long offset; 327 long offset;
167}; 328};
168 329
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) 330#define DEFINE_FETCH_deref(type) \
170{ 331static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
171 struct indirect_fetch_data *ind = data; 332 void *data, void *dest) \
172 unsigned long addr; 333{ \
173 334 struct deref_fetch_param *dprm = data; \
174 addr = call_fetch(&ind->orig, regs); 335 unsigned long addr; \
175 if (addr) { 336 call_fetch(&dprm->orig, regs, &addr); \
176 addr += ind->offset; 337 if (addr) { \
177 return fetch_memory(regs, (void *)addr); 338 addr += dprm->offset; \
178 } else 339 fetch_memory_##type(regs, (void *)addr, dest); \
179 return 0; 340 } else \
341 *(type *)dest = 0; \
180} 342}
343DEFINE_BASIC_FETCH_FUNCS(deref)
344DEFINE_FETCH_deref(string)
345DEFINE_FETCH_deref(string_size)
181 346
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) 347static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
183{ 348{
184 if (data->orig.func == fetch_indirect) 349 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
185 free_indirect_fetch_data(data->orig.data); 350 free_deref_fetch_param(data->orig.data);
186 else if (data->orig.func == fetch_symbol) 351 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
187 free_symbol_cache(data->orig.data); 352 free_symbol_cache(data->orig.data);
188 kfree(data); 353 kfree(data);
189} 354}
190 355
356/* Default (unsigned long) fetch type */
357#define __DEFAULT_FETCH_TYPE(t) u##t
358#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
359#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
360#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
361
362/* Fetch types */
363enum {
364 FETCH_MTD_reg = 0,
365 FETCH_MTD_stack,
366 FETCH_MTD_retval,
367 FETCH_MTD_memory,
368 FETCH_MTD_symbol,
369 FETCH_MTD_deref,
370 FETCH_MTD_END,
371};
372
373#define ASSIGN_FETCH_FUNC(method, type) \
374 [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
375
376#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
377 {.name = _name, \
378 .size = _size, \
379 .is_signed = sign, \
380 .print = PRINT_TYPE_FUNC_NAME(ptype), \
381 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
382 .fmttype = _fmttype, \
383 .fetch = { \
384ASSIGN_FETCH_FUNC(reg, ftype), \
385ASSIGN_FETCH_FUNC(stack, ftype), \
386ASSIGN_FETCH_FUNC(retval, ftype), \
387ASSIGN_FETCH_FUNC(memory, ftype), \
388ASSIGN_FETCH_FUNC(symbol, ftype), \
389ASSIGN_FETCH_FUNC(deref, ftype), \
390 } \
391 }
392
393#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
394 __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
395
396#define FETCH_TYPE_STRING 0
397#define FETCH_TYPE_STRSIZE 1
398
399/* Fetch type information table */
400static const struct fetch_type {
401 const char *name; /* Name of type */
402 size_t size; /* Byte size of type */
403 int is_signed; /* Signed flag */
404 print_type_func_t print; /* Print functions */
405 const char *fmt; /* Fromat string */
406 const char *fmttype; /* Name in format file */
407 /* Fetch functions */
408 fetch_func_t fetch[FETCH_MTD_END];
409} fetch_type_table[] = {
410 /* Special types */
411 [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
412 sizeof(u32), 1, "__data_loc char[]"),
413 [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
414 string_size, sizeof(u32), 0, "u32"),
415 /* Basic types */
416 ASSIGN_FETCH_TYPE(u8, u8, 0),
417 ASSIGN_FETCH_TYPE(u16, u16, 0),
418 ASSIGN_FETCH_TYPE(u32, u32, 0),
419 ASSIGN_FETCH_TYPE(u64, u64, 0),
420 ASSIGN_FETCH_TYPE(s8, u8, 1),
421 ASSIGN_FETCH_TYPE(s16, u16, 1),
422 ASSIGN_FETCH_TYPE(s32, u32, 1),
423 ASSIGN_FETCH_TYPE(s64, u64, 1),
424};
425
426static const struct fetch_type *find_fetch_type(const char *type)
427{
428 int i;
429
430 if (!type)
431 type = DEFAULT_FETCH_TYPE_STR;
432
433 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
434 if (strcmp(type, fetch_type_table[i].name) == 0)
435 return &fetch_type_table[i];
436 return NULL;
437}
438
439/* Special function : only accept unsigned long */
440static __kprobes void fetch_stack_address(struct pt_regs *regs,
441 void *dummy, void *dest)
442{
443 *(unsigned long *)dest = kernel_stack_pointer(regs);
444}
445
446static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
447 fetch_func_t orig_fn)
448{
449 int i;
450
451 if (type != &fetch_type_table[FETCH_TYPE_STRING])
452 return NULL; /* Only string type needs size function */
453 for (i = 0; i < FETCH_MTD_END; i++)
454 if (type->fetch[i] == orig_fn)
455 return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
456
457 WARN_ON(1); /* This should not happen */
458 return NULL;
459}
460
191/** 461/**
192 * Kprobe event core functions 462 * Kprobe event core functions
193 */ 463 */
194 464
195struct probe_arg { 465struct probe_arg {
196 struct fetch_func fetch; 466 struct fetch_param fetch;
197 const char *name; 467 struct fetch_param fetch_size;
468 unsigned int offset; /* Offset from argument entry */
469 const char *name; /* Name of this argument */
470 const char *comm; /* Command of this argument */
471 const struct fetch_type *type; /* Type of this argument */
198}; 472};
199 473
200/* Flags for trace_probe */ 474/* Flags for trace_probe */
@@ -207,8 +481,9 @@ struct trace_probe {
207 unsigned long nhit; 481 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */ 482 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */ 483 const char *symbol; /* symbol name */
484 struct ftrace_event_class class;
210 struct ftrace_event_call call; 485 struct ftrace_event_call call;
211 struct trace_event event; 486 ssize_t size; /* trace entry size */
212 unsigned int nr_args; 487 unsigned int nr_args;
213 struct probe_arg args[]; 488 struct probe_arg args[];
214}; 489};
@@ -217,6 +492,7 @@ struct trace_probe {
217 (offsetof(struct trace_probe, args) + \ 492 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n))) 493 (sizeof(struct probe_arg) * (n)))
219 494
495
220static __kprobes int probe_is_return(struct trace_probe *tp) 496static __kprobes int probe_is_return(struct trace_probe *tp)
221{ 497{
222 return tp->rp.handler != NULL; 498 return tp->rp.handler != NULL;
@@ -227,51 +503,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
227 return tp->symbol ? tp->symbol : "unknown"; 503 return tp->symbol ? tp->symbol : "unknown";
228} 504}
229 505
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp); 506static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp); 507static void unregister_probe_event(struct trace_probe *tp);
277 508
@@ -282,8 +513,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
282static int kretprobe_dispatcher(struct kretprobe_instance *ri, 513static int kretprobe_dispatcher(struct kretprobe_instance *ri,
283 struct pt_regs *regs); 514 struct pt_regs *regs);
284 515
285/* Check the name is good for event/group */ 516/* Check the name is good for event/group/fields */
286static int check_event_name(const char *name) 517static int is_good_name(const char *name)
287{ 518{
288 if (!isalpha(*name) && *name != '_') 519 if (!isalpha(*name) && *name != '_')
289 return 0; 520 return 0;
@@ -325,22 +556,23 @@ static struct trace_probe *alloc_trace_probe(const char *group,
325 else 556 else
326 tp->rp.kp.pre_handler = kprobe_dispatcher; 557 tp->rp.kp.pre_handler = kprobe_dispatcher;
327 558
328 if (!event || !check_event_name(event)) { 559 if (!event || !is_good_name(event)) {
329 ret = -EINVAL; 560 ret = -EINVAL;
330 goto error; 561 goto error;
331 } 562 }
332 563
564 tp->call.class = &tp->class;
333 tp->call.name = kstrdup(event, GFP_KERNEL); 565 tp->call.name = kstrdup(event, GFP_KERNEL);
334 if (!tp->call.name) 566 if (!tp->call.name)
335 goto error; 567 goto error;
336 568
337 if (!group || !check_event_name(group)) { 569 if (!group || !is_good_name(group)) {
338 ret = -EINVAL; 570 ret = -EINVAL;
339 goto error; 571 goto error;
340 } 572 }
341 573
342 tp->call.system = kstrdup(group, GFP_KERNEL); 574 tp->class.system = kstrdup(group, GFP_KERNEL);
343 if (!tp->call.system) 575 if (!tp->class.system)
344 goto error; 576 goto error;
345 577
346 INIT_LIST_HEAD(&tp->list); 578 INIT_LIST_HEAD(&tp->list);
@@ -354,11 +586,12 @@ error:
354 586
355static void free_probe_arg(struct probe_arg *arg) 587static void free_probe_arg(struct probe_arg *arg)
356{ 588{
357 if (arg->fetch.func == fetch_symbol) 589 if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
590 free_deref_fetch_param(arg->fetch.data);
591 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
358 free_symbol_cache(arg->fetch.data); 592 free_symbol_cache(arg->fetch.data);
359 else if (arg->fetch.func == fetch_indirect)
360 free_indirect_fetch_data(arg->fetch.data);
361 kfree(arg->name); 593 kfree(arg->name);
594 kfree(arg->comm);
362} 595}
363 596
364static void free_trace_probe(struct trace_probe *tp) 597static void free_trace_probe(struct trace_probe *tp)
@@ -368,7 +601,7 @@ static void free_trace_probe(struct trace_probe *tp)
368 for (i = 0; i < tp->nr_args; i++) 601 for (i = 0; i < tp->nr_args; i++)
369 free_probe_arg(&tp->args[i]); 602 free_probe_arg(&tp->args[i]);
370 603
371 kfree(tp->call.system); 604 kfree(tp->call.class->system);
372 kfree(tp->call.name); 605 kfree(tp->call.name);
373 kfree(tp->symbol); 606 kfree(tp->symbol);
374 kfree(tp); 607 kfree(tp);
@@ -381,7 +614,7 @@ static struct trace_probe *find_probe_event(const char *event,
381 614
382 list_for_each_entry(tp, &probe_list, list) 615 list_for_each_entry(tp, &probe_list, list)
383 if (strcmp(tp->call.name, event) == 0 && 616 if (strcmp(tp->call.name, event) == 0 &&
384 strcmp(tp->call.system, group) == 0) 617 strcmp(tp->call.class->system, group) == 0)
385 return tp; 618 return tp;
386 return NULL; 619 return NULL;
387} 620}
@@ -406,7 +639,7 @@ static int register_trace_probe(struct trace_probe *tp)
406 mutex_lock(&probe_lock); 639 mutex_lock(&probe_lock);
407 640
408 /* register as an event */ 641 /* register as an event */
409 old_tp = find_probe_event(tp->call.name, tp->call.system); 642 old_tp = find_probe_event(tp->call.name, tp->call.class->system);
410 if (old_tp) { 643 if (old_tp) {
411 /* delete old event */ 644 /* delete old event */
412 unregister_trace_probe(old_tp); 645 unregister_trace_probe(old_tp);
@@ -414,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp)
414 } 647 }
415 ret = register_probe_event(tp); 648 ret = register_probe_event(tp);
416 if (ret) { 649 if (ret) {
417 pr_warning("Faild to register probe event(%d)\n", ret); 650 pr_warning("Failed to register probe event(%d)\n", ret);
418 goto end; 651 goto end;
419 } 652 }
420 653
@@ -464,46 +697,41 @@ static int split_symbol_offset(char *symbol, unsigned long *offset)
464#define PARAM_MAX_ARGS 16 697#define PARAM_MAX_ARGS 16
465#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 698#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
466 699
467static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) 700static int parse_probe_vars(char *arg, const struct fetch_type *t,
701 struct fetch_param *f, int is_return)
468{ 702{
469 int ret = 0; 703 int ret = 0;
470 unsigned long param; 704 unsigned long param;
471 705
472 if (strcmp(arg, "retval") == 0) { 706 if (strcmp(arg, "retval") == 0) {
473 if (is_return) { 707 if (is_return)
474 ff->func = fetch_retvalue; 708 f->fn = t->fetch[FETCH_MTD_retval];
475 ff->data = NULL; 709 else
476 } else
477 ret = -EINVAL; 710 ret = -EINVAL;
478 } else if (strncmp(arg, "stack", 5) == 0) { 711 } else if (strncmp(arg, "stack", 5) == 0) {
479 if (arg[5] == '\0') { 712 if (arg[5] == '\0') {
480 ff->func = fetch_stack_address; 713 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
481 ff->data = NULL; 714 f->fn = fetch_stack_address;
715 else
716 ret = -EINVAL;
482 } else if (isdigit(arg[5])) { 717 } else if (isdigit(arg[5])) {
483 ret = strict_strtoul(arg + 5, 10, &param); 718 ret = strict_strtoul(arg + 5, 10, &param);
484 if (ret || param > PARAM_MAX_STACK) 719 if (ret || param > PARAM_MAX_STACK)
485 ret = -EINVAL; 720 ret = -EINVAL;
486 else { 721 else {
487 ff->func = fetch_stack; 722 f->fn = t->fetch[FETCH_MTD_stack];
488 ff->data = (void *)param; 723 f->data = (void *)param;
489 } 724 }
490 } else 725 } else
491 ret = -EINVAL; 726 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else 727 } else
501 ret = -EINVAL; 728 ret = -EINVAL;
502 return ret; 729 return ret;
503} 730}
504 731
505/* Recursive argument parser */ 732/* Recursive argument parser */
506static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 733static int __parse_probe_arg(char *arg, const struct fetch_type *t,
734 struct fetch_param *f, int is_return)
507{ 735{
508 int ret = 0; 736 int ret = 0;
509 unsigned long param; 737 unsigned long param;
@@ -512,13 +740,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
512 740
513 switch (arg[0]) { 741 switch (arg[0]) {
514 case '$': 742 case '$':
515 ret = parse_probe_vars(arg + 1, ff, is_return); 743 ret = parse_probe_vars(arg + 1, t, f, is_return);
516 break; 744 break;
517 case '%': /* named register */ 745 case '%': /* named register */
518 ret = regs_query_register_offset(arg + 1); 746 ret = regs_query_register_offset(arg + 1);
519 if (ret >= 0) { 747 if (ret >= 0) {
520 ff->func = fetch_register; 748 f->fn = t->fetch[FETCH_MTD_reg];
521 ff->data = (void *)(unsigned long)ret; 749 f->data = (void *)(unsigned long)ret;
522 ret = 0; 750 ret = 0;
523 } 751 }
524 break; 752 break;
@@ -527,26 +755,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
527 ret = strict_strtoul(arg + 1, 0, &param); 755 ret = strict_strtoul(arg + 1, 0, &param);
528 if (ret) 756 if (ret)
529 break; 757 break;
530 ff->func = fetch_memory; 758 f->fn = t->fetch[FETCH_MTD_memory];
531 ff->data = (void *)param; 759 f->data = (void *)param;
532 } else { 760 } else {
533 ret = split_symbol_offset(arg + 1, &offset); 761 ret = split_symbol_offset(arg + 1, &offset);
534 if (ret) 762 if (ret)
535 break; 763 break;
536 ff->data = alloc_symbol_cache(arg + 1, offset); 764 f->data = alloc_symbol_cache(arg + 1, offset);
537 if (ff->data) 765 if (f->data)
538 ff->func = fetch_symbol; 766 f->fn = t->fetch[FETCH_MTD_symbol];
539 else
540 ret = -EINVAL;
541 } 767 }
542 break; 768 break;
543 case '+': /* indirect memory */ 769 case '+': /* deref memory */
544 case '-': 770 case '-':
545 tmp = strchr(arg, '('); 771 tmp = strchr(arg, '(');
546 if (!tmp) { 772 if (!tmp)
547 ret = -EINVAL;
548 break; 773 break;
549 }
550 *tmp = '\0'; 774 *tmp = '\0';
551 ret = strict_strtol(arg + 1, 0, &offset); 775 ret = strict_strtol(arg + 1, 0, &offset);
552 if (ret) 776 if (ret)
@@ -556,38 +780,68 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
556 arg = tmp + 1; 780 arg = tmp + 1;
557 tmp = strrchr(arg, ')'); 781 tmp = strrchr(arg, ')');
558 if (tmp) { 782 if (tmp) {
559 struct indirect_fetch_data *id; 783 struct deref_fetch_param *dprm;
784 const struct fetch_type *t2 = find_fetch_type(NULL);
560 *tmp = '\0'; 785 *tmp = '\0';
561 id = kzalloc(sizeof(struct indirect_fetch_data), 786 dprm = kzalloc(sizeof(struct deref_fetch_param),
562 GFP_KERNEL); 787 GFP_KERNEL);
563 if (!id) 788 if (!dprm)
564 return -ENOMEM; 789 return -ENOMEM;
565 id->offset = offset; 790 dprm->offset = offset;
566 ret = __parse_probe_arg(arg, &id->orig, is_return); 791 ret = __parse_probe_arg(arg, t2, &dprm->orig,
792 is_return);
567 if (ret) 793 if (ret)
568 kfree(id); 794 kfree(dprm);
569 else { 795 else {
570 ff->func = fetch_indirect; 796 f->fn = t->fetch[FETCH_MTD_deref];
571 ff->data = (void *)id; 797 f->data = (void *)dprm;
572 } 798 }
573 } else 799 }
574 ret = -EINVAL;
575 break; 800 break;
576 default: 801 }
577 /* TODO: support custom handler */ 802 if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
803 pr_info("%s type has no corresponding fetch method.\n",
804 t->name);
578 ret = -EINVAL; 805 ret = -EINVAL;
579 } 806 }
580 return ret; 807 return ret;
581} 808}
582 809
583/* String length checking wrapper */ 810/* String length checking wrapper */
584static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 811static int parse_probe_arg(char *arg, struct trace_probe *tp,
812 struct probe_arg *parg, int is_return)
585{ 813{
814 const char *t;
815 int ret;
816
586 if (strlen(arg) > MAX_ARGSTR_LEN) { 817 if (strlen(arg) > MAX_ARGSTR_LEN) {
587 pr_info("Argument is too long.: %s\n", arg); 818 pr_info("Argument is too long.: %s\n", arg);
588 return -ENOSPC; 819 return -ENOSPC;
589 } 820 }
590 return __parse_probe_arg(arg, ff, is_return); 821 parg->comm = kstrdup(arg, GFP_KERNEL);
822 if (!parg->comm) {
823 pr_info("Failed to allocate memory for command '%s'.\n", arg);
824 return -ENOMEM;
825 }
826 t = strchr(parg->comm, ':');
827 if (t) {
828 arg[t - parg->comm] = '\0';
829 t++;
830 }
831 parg->type = find_fetch_type(t);
832 if (!parg->type) {
833 pr_info("Unsupported type: %s\n", t);
834 return -EINVAL;
835 }
836 parg->offset = tp->size;
837 tp->size += parg->type->size;
838 ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
839 if (ret >= 0) {
840 parg->fetch_size.fn = get_fetch_size_function(parg->type,
841 parg->fetch.fn);
842 parg->fetch_size.data = parg->fetch.data;
843 }
844 return ret;
591} 845}
592 846
593/* Return 1 if name is reserved or already used by another argument */ 847/* Return 1 if name is reserved or already used by another argument */
@@ -611,22 +865,24 @@ static int create_trace_probe(int argc, char **argv)
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 865 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 866 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args: 867 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value 868 * $retval : fetch return value
616 * $stack : fetch stack address 869 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-) 870 * $stackN : fetch Nth of stack (N:0-)
618 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 871 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
619 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 872 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
620 * %REG : fetch register REG 873 * %REG : fetch register REG
621 * Indirect memory fetch: 874 * Dereferencing memory fetch:
622 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 875 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
623 * Alias name of args: 876 * Alias name of args:
624 * NAME=FETCHARG : set NAME as alias of FETCHARG. 877 * NAME=FETCHARG : set NAME as alias of FETCHARG.
878 * Type of args:
879 * FETCHARG:TYPE : use TYPE instead of unsigned long.
625 */ 880 */
626 struct trace_probe *tp; 881 struct trace_probe *tp;
627 int i, ret = 0; 882 int i, ret = 0;
628 int is_return = 0, is_delete = 0; 883 int is_return = 0, is_delete = 0;
629 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 884 char *symbol = NULL, *event = NULL, *group = NULL;
885 char *arg;
630 unsigned long offset = 0; 886 unsigned long offset = 0;
631 void *addr = NULL; 887 void *addr = NULL;
632 char buf[MAX_EVENT_NAME_LEN]; 888 char buf[MAX_EVENT_NAME_LEN];
@@ -651,12 +907,12 @@ static int create_trace_probe(int argc, char **argv)
651 event = strchr(group, '/') + 1; 907 event = strchr(group, '/') + 1;
652 event[-1] = '\0'; 908 event[-1] = '\0';
653 if (strlen(group) == 0) { 909 if (strlen(group) == 0) {
654 pr_info("Group name is not specifiled\n"); 910 pr_info("Group name is not specified\n");
655 return -EINVAL; 911 return -EINVAL;
656 } 912 }
657 } 913 }
658 if (strlen(event) == 0) { 914 if (strlen(event) == 0) {
659 pr_info("Event name is not specifiled\n"); 915 pr_info("Event name is not specified\n");
660 return -EINVAL; 916 return -EINVAL;
661 } 917 }
662 } 918 }
@@ -668,14 +924,17 @@ static int create_trace_probe(int argc, char **argv)
668 pr_info("Delete command needs an event name.\n"); 924 pr_info("Delete command needs an event name.\n");
669 return -EINVAL; 925 return -EINVAL;
670 } 926 }
927 mutex_lock(&probe_lock);
671 tp = find_probe_event(event, group); 928 tp = find_probe_event(event, group);
672 if (!tp) { 929 if (!tp) {
930 mutex_unlock(&probe_lock);
673 pr_info("Event %s/%s doesn't exist.\n", group, event); 931 pr_info("Event %s/%s doesn't exist.\n", group, event);
674 return -ENOENT; 932 return -ENOENT;
675 } 933 }
676 /* delete an event */ 934 /* delete an event */
677 unregister_trace_probe(tp); 935 unregister_trace_probe(tp);
678 free_trace_probe(tp); 936 free_trace_probe(tp);
937 mutex_unlock(&probe_lock);
679 return 0; 938 return 0;
680 } 939 }
681 940
@@ -689,7 +948,7 @@ static int create_trace_probe(int argc, char **argv)
689 return -EINVAL; 948 return -EINVAL;
690 } 949 }
691 /* an address specified */ 950 /* an address specified */
692 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 951 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
693 if (ret) { 952 if (ret) {
694 pr_info("Failed to parse address.\n"); 953 pr_info("Failed to parse address.\n");
695 return ret; 954 return ret;
@@ -732,37 +991,47 @@ static int create_trace_probe(int argc, char **argv)
732 /* parse arguments */ 991 /* parse arguments */
733 ret = 0; 992 ret = 0;
734 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 993 for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
994 /* Increment count for freeing args in error case */
995 tp->nr_args++;
996
735 /* Parse argument name */ 997 /* Parse argument name */
736 arg = strchr(argv[i], '='); 998 arg = strchr(argv[i], '=');
737 if (arg) 999 if (arg) {
738 *arg++ = '\0'; 1000 *arg++ = '\0';
739 else 1001 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1002 } else {
740 arg = argv[i]; 1003 arg = argv[i];
1004 /* If argument name is omitted, set "argN" */
1005 snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
1006 tp->args[i].name = kstrdup(buf, GFP_KERNEL);
1007 }
741 1008
742 if (conflict_field_name(argv[i], tp->args, i)) { 1009 if (!tp->args[i].name) {
743 pr_info("Argument%d name '%s' conflicts with " 1010 pr_info("Failed to allocate argument[%d] name.\n", i);
744 "another field.\n", i, argv[i]); 1011 ret = -ENOMEM;
1012 goto error;
1013 }
1014
1015 if (!is_good_name(tp->args[i].name)) {
1016 pr_info("Invalid argument[%d] name: %s\n",
1017 i, tp->args[i].name);
745 ret = -EINVAL; 1018 ret = -EINVAL;
746 goto error; 1019 goto error;
747 } 1020 }
748 1021
749 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 1022 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
750 if (!tp->args[i].name) { 1023 pr_info("Argument[%d] name '%s' conflicts with "
751 pr_info("Failed to allocate argument%d name '%s'.\n", 1024 "another field.\n", i, argv[i]);
752 i, argv[i]); 1025 ret = -EINVAL;
753 ret = -ENOMEM;
754 goto error; 1026 goto error;
755 } 1027 }
756 1028
757 /* Parse fetch argument */ 1029 /* Parse fetch argument */
758 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); 1030 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
759 if (ret) { 1031 if (ret) {
760 pr_info("Parse error at argument%d. (%d)\n", i, ret); 1032 pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
761 kfree(tp->args[i].name);
762 goto error; 1033 goto error;
763 } 1034 }
764
765 tp->nr_args++;
766 } 1035 }
767 1036
768 ret = register_trace_probe(tp); 1037 ret = register_trace_probe(tp);
@@ -810,11 +1079,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
810static int probes_seq_show(struct seq_file *m, void *v) 1079static int probes_seq_show(struct seq_file *m, void *v)
811{ 1080{
812 struct trace_probe *tp = v; 1081 struct trace_probe *tp = v;
813 int i, ret; 1082 int i;
814 char buf[MAX_ARGSTR_LEN + 1];
815 1083
816 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 1084 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
817 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); 1085 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
818 1086
819 if (!tp->symbol) 1087 if (!tp->symbol)
820 seq_printf(m, " 0x%p", tp->rp.kp.addr); 1088 seq_printf(m, " 0x%p", tp->rp.kp.addr);
@@ -823,15 +1091,10 @@ static int probes_seq_show(struct seq_file *m, void *v)
823 else 1091 else
824 seq_printf(m, " %s", probe_symbol(tp)); 1092 seq_printf(m, " %s", probe_symbol(tp));
825 1093
826 for (i = 0; i < tp->nr_args; i++) { 1094 for (i = 0; i < tp->nr_args; i++)
827 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); 1095 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
828 if (ret < 0) {
829 pr_warning("Argument%d decoding error(%d).\n", i, ret);
830 return ret;
831 }
832 seq_printf(m, " %s=%s", tp->args[i].name, buf);
833 }
834 seq_printf(m, "\n"); 1096 seq_printf(m, "\n");
1097
835 return 0; 1098 return 0;
836} 1099}
837 1100
@@ -957,14 +1220,62 @@ static const struct file_operations kprobe_profile_ops = {
957 .release = seq_release, 1220 .release = seq_release,
958}; 1221};
959 1222
1223/* Sum up total data length for dynamic arraies (strings) */
1224static __kprobes int __get_data_size(struct trace_probe *tp,
1225 struct pt_regs *regs)
1226{
1227 int i, ret = 0;
1228 u32 len;
1229
1230 for (i = 0; i < tp->nr_args; i++)
1231 if (unlikely(tp->args[i].fetch_size.fn)) {
1232 call_fetch(&tp->args[i].fetch_size, regs, &len);
1233 ret += len;
1234 }
1235
1236 return ret;
1237}
1238
1239/* Store the value of each argument */
1240static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1241 struct pt_regs *regs,
1242 u8 *data, int maxlen)
1243{
1244 int i;
1245 u32 end = tp->size;
1246 u32 *dl; /* Data (relative) location */
1247
1248 for (i = 0; i < tp->nr_args; i++) {
1249 if (unlikely(tp->args[i].fetch_size.fn)) {
1250 /*
1251 * First, we set the relative location and
1252 * maximum data length to *dl
1253 */
1254 dl = (u32 *)(data + tp->args[i].offset);
1255 *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1256 /* Then try to fetch string or dynamic array data */
1257 call_fetch(&tp->args[i].fetch, regs, dl);
1258 /* Reduce maximum length */
1259 end += get_rloc_len(*dl);
1260 maxlen -= get_rloc_len(*dl);
1261 /* Trick here, convert data_rloc to data_loc */
1262 *dl = convert_rloc_to_loc(*dl,
1263 ent_size + tp->args[i].offset);
1264 } else
1265 /* Just fetching data normally */
1266 call_fetch(&tp->args[i].fetch, regs,
1267 data + tp->args[i].offset);
1268 }
1269}
1270
960/* Kprobe handler */ 1271/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1272static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{ 1273{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1274 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry; 1275 struct kprobe_trace_entry_head *entry;
965 struct ring_buffer_event *event; 1276 struct ring_buffer_event *event;
966 struct ring_buffer *buffer; 1277 struct ring_buffer *buffer;
967 int size, i, pc; 1278 int size, dsize, pc;
968 unsigned long irq_flags; 1279 unsigned long irq_flags;
969 struct ftrace_event_call *call = &tp->call; 1280 struct ftrace_event_call *call = &tp->call;
970 1281
@@ -973,72 +1284,67 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
973 local_save_flags(irq_flags); 1284 local_save_flags(irq_flags);
974 pc = preempt_count(); 1285 pc = preempt_count();
975 1286
976 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1287 dsize = __get_data_size(tp, regs);
1288 size = sizeof(*entry) + tp->size + dsize;
977 1289
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1290 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
979 irq_flags, pc); 1291 size, irq_flags, pc);
980 if (!event) 1292 if (!event)
981 return 0; 1293 return;
982 1294
983 entry = ring_buffer_event_data(event); 1295 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args;
985 entry->ip = (unsigned long)kp->addr; 1296 entry->ip = (unsigned long)kp->addr;
986 for (i = 0; i < tp->nr_args; i++) 1297 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
987 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
988 1298
989 if (!filter_current_check_discard(buffer, call, entry, event)) 1299 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1300 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992} 1301}
993 1302
994/* Kretprobe handler */ 1303/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 1304static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs) 1305 struct pt_regs *regs)
997{ 1306{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1307 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
999 struct kretprobe_trace_entry *entry; 1308 struct kretprobe_trace_entry_head *entry;
1000 struct ring_buffer_event *event; 1309 struct ring_buffer_event *event;
1001 struct ring_buffer *buffer; 1310 struct ring_buffer *buffer;
1002 int size, i, pc; 1311 int size, pc, dsize;
1003 unsigned long irq_flags; 1312 unsigned long irq_flags;
1004 struct ftrace_event_call *call = &tp->call; 1313 struct ftrace_event_call *call = &tp->call;
1005 1314
1006 local_save_flags(irq_flags); 1315 local_save_flags(irq_flags);
1007 pc = preempt_count(); 1316 pc = preempt_count();
1008 1317
1009 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1318 dsize = __get_data_size(tp, regs);
1319 size = sizeof(*entry) + tp->size + dsize;
1010 1320
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1321 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1012 irq_flags, pc); 1322 size, irq_flags, pc);
1013 if (!event) 1323 if (!event)
1014 return 0; 1324 return;
1015 1325
1016 entry = ring_buffer_event_data(event); 1326 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args;
1018 entry->func = (unsigned long)tp->rp.kp.addr; 1327 entry->func = (unsigned long)tp->rp.kp.addr;
1019 entry->ret_ip = (unsigned long)ri->ret_addr; 1328 entry->ret_ip = (unsigned long)ri->ret_addr;
1020 for (i = 0; i < tp->nr_args; i++) 1329 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1021 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1022 1330
1023 if (!filter_current_check_discard(buffer, call, entry, event)) 1331 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1332 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027} 1333}
1028 1334
1029/* Event entry printers */ 1335/* Event entry printers */
1030enum print_line_t 1336enum print_line_t
1031print_kprobe_event(struct trace_iterator *iter, int flags) 1337print_kprobe_event(struct trace_iterator *iter, int flags,
1338 struct trace_event *event)
1032{ 1339{
1033 struct kprobe_trace_entry *field; 1340 struct kprobe_trace_entry_head *field;
1034 struct trace_seq *s = &iter->seq; 1341 struct trace_seq *s = &iter->seq;
1035 struct trace_event *event;
1036 struct trace_probe *tp; 1342 struct trace_probe *tp;
1343 u8 *data;
1037 int i; 1344 int i;
1038 1345
1039 field = (struct kprobe_trace_entry *)iter->ent; 1346 field = (struct kprobe_trace_entry_head *)iter->ent;
1040 event = ftrace_find_event(field->ent.type); 1347 tp = container_of(event, struct trace_probe, call.event);
1041 tp = container_of(event, struct trace_probe, event);
1042 1348
1043 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1349 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1044 goto partial; 1350 goto partial;
@@ -1049,9 +1355,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
1049 if (!trace_seq_puts(s, ")")) 1355 if (!trace_seq_puts(s, ")"))
1050 goto partial; 1356 goto partial;
1051 1357
1052 for (i = 0; i < field->nargs; i++) 1358 data = (u8 *)&field[1];
1053 if (!trace_seq_printf(s, " %s=%lx", 1359 for (i = 0; i < tp->nr_args; i++)
1054 tp->args[i].name, field->args[i])) 1360 if (!tp->args[i].type->print(s, tp->args[i].name,
1361 data + tp->args[i].offset, field))
1055 goto partial; 1362 goto partial;
1056 1363
1057 if (!trace_seq_puts(s, "\n")) 1364 if (!trace_seq_puts(s, "\n"))
@@ -1063,17 +1370,17 @@ partial:
1063} 1370}
1064 1371
1065enum print_line_t 1372enum print_line_t
1066print_kretprobe_event(struct trace_iterator *iter, int flags) 1373print_kretprobe_event(struct trace_iterator *iter, int flags,
1374 struct trace_event *event)
1067{ 1375{
1068 struct kretprobe_trace_entry *field; 1376 struct kretprobe_trace_entry_head *field;
1069 struct trace_seq *s = &iter->seq; 1377 struct trace_seq *s = &iter->seq;
1070 struct trace_event *event;
1071 struct trace_probe *tp; 1378 struct trace_probe *tp;
1379 u8 *data;
1072 int i; 1380 int i;
1073 1381
1074 field = (struct kretprobe_trace_entry *)iter->ent; 1382 field = (struct kretprobe_trace_entry_head *)iter->ent;
1075 event = ftrace_find_event(field->ent.type); 1383 tp = container_of(event, struct trace_probe, call.event);
1076 tp = container_of(event, struct trace_probe, event);
1077 1384
1078 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1385 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1079 goto partial; 1386 goto partial;
@@ -1090,9 +1397,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
1090 if (!trace_seq_puts(s, ")")) 1397 if (!trace_seq_puts(s, ")"))
1091 goto partial; 1398 goto partial;
1092 1399
1093 for (i = 0; i < field->nargs; i++) 1400 data = (u8 *)&field[1];
1094 if (!trace_seq_printf(s, " %s=%lx", 1401 for (i = 0; i < tp->nr_args; i++)
1095 tp->args[i].name, field->args[i])) 1402 if (!tp->args[i].type->print(s, tp->args[i].name,
1403 data + tp->args[i].offset, field))
1096 goto partial; 1404 goto partial;
1097 1405
1098 if (!trace_seq_puts(s, "\n")) 1406 if (!trace_seq_puts(s, "\n"))
@@ -1127,13 +1435,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
1127 } 1435 }
1128} 1436}
1129 1437
1130static int probe_event_raw_init(struct ftrace_event_call *event_call)
1131{
1132 INIT_LIST_HEAD(&event_call->fields);
1133
1134 return 0;
1135}
1136
1137#undef DEFINE_FIELD 1438#undef DEFINE_FIELD
1138#define DEFINE_FIELD(type, item, name, is_signed) \ 1439#define DEFINE_FIELD(type, item, name, is_signed) \
1139 do { \ 1440 do { \
@@ -1148,242 +1449,172 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call)
1148static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1449static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1149{ 1450{
1150 int ret, i; 1451 int ret, i;
1151 struct kprobe_trace_entry field; 1452 struct kprobe_trace_entry_head field;
1152 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1453 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1153 1454
1154 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1455 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1155 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1156 /* Set argument names as fields */ 1456 /* Set argument names as fields */
1157 for (i = 0; i < tp->nr_args; i++) 1457 for (i = 0; i < tp->nr_args; i++) {
1158 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1458 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1459 tp->args[i].name,
1460 sizeof(field) + tp->args[i].offset,
1461 tp->args[i].type->size,
1462 tp->args[i].type->is_signed,
1463 FILTER_OTHER);
1464 if (ret)
1465 return ret;
1466 }
1159 return 0; 1467 return 0;
1160} 1468}
1161 1469
1162static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1470static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1163{ 1471{
1164 int ret, i; 1472 int ret, i;
1165 struct kretprobe_trace_entry field; 1473 struct kretprobe_trace_entry_head field;
1166 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1474 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1167 1475
1168 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1476 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1169 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1477 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1170 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1171 /* Set argument names as fields */ 1478 /* Set argument names as fields */
1172 for (i = 0; i < tp->nr_args; i++) 1479 for (i = 0; i < tp->nr_args; i++) {
1173 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1480 ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1481 tp->args[i].name,
1482 sizeof(field) + tp->args[i].offset,
1483 tp->args[i].type->size,
1484 tp->args[i].type->is_signed,
1485 FILTER_OTHER);
1486 if (ret)
1487 return ret;
1488 }
1174 return 0; 1489 return 0;
1175} 1490}
1176 1491
1177static int __probe_event_show_format(struct trace_seq *s, 1492static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1178 struct trace_probe *tp, const char *fmt,
1179 const char *arg)
1180{ 1493{
1181 int i; 1494 int i;
1495 int pos = 0;
1182 1496
1183 /* Show format */ 1497 const char *fmt, *arg;
1184 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1185 return 0;
1186
1187 for (i = 0; i < tp->nr_args; i++)
1188 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
1189 return 0;
1190 1498
1191 if (!trace_seq_printf(s, "\", %s", arg)) 1499 if (!probe_is_return(tp)) {
1192 return 0; 1500 fmt = "(%lx)";
1501 arg = "REC->" FIELD_STRING_IP;
1502 } else {
1503 fmt = "(%lx <- %lx)";
1504 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1505 }
1193 1506
1194 for (i = 0; i < tp->nr_args; i++) 1507 /* When len=0, we just calculate the needed length */
1195 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) 1508#define LEN_OR_ZERO (len ? len - pos : 0)
1196 return 0;
1197 1509
1198 return trace_seq_puts(s, "\n"); 1510 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1199}
1200 1511
1201#undef SHOW_FIELD 1512 for (i = 0; i < tp->nr_args; i++) {
1202#define SHOW_FIELD(type, item, name) \ 1513 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1203 do { \ 1514 tp->args[i].name, tp->args[i].type->fmt);
1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ 1515 }
1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1209 if (!ret) \
1210 return 0; \
1211 } while (0)
1212 1516
1213static int kprobe_event_show_format(struct ftrace_event_call *call, 1517 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1214 struct trace_seq *s)
1215{
1216 struct kprobe_trace_entry field __attribute__((unused));
1217 int ret, i;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219 1518
1220 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); 1519 for (i = 0; i < tp->nr_args; i++) {
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1520 if (strcmp(tp->args[i].type->name, "string") == 0)
1521 pos += snprintf(buf + pos, LEN_OR_ZERO,
1522 ", __get_str(%s)",
1523 tp->args[i].name);
1524 else
1525 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1526 tp->args[i].name);
1527 }
1222 1528
1223 /* Show fields */ 1529#undef LEN_OR_ZERO
1224 for (i = 0; i < tp->nr_args; i++)
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227 1530
1228 return __probe_event_show_format(s, tp, "(%lx)", 1531 /* return the length of print_fmt */
1229 "REC->" FIELD_STRING_IP); 1532 return pos;
1230} 1533}
1231 1534
1232static int kretprobe_event_show_format(struct ftrace_event_call *call, 1535static int set_print_fmt(struct trace_probe *tp)
1233 struct trace_seq *s)
1234{ 1536{
1235 struct kretprobe_trace_entry field __attribute__((unused)); 1537 int len;
1236 int ret, i; 1538 char *print_fmt;
1237 struct trace_probe *tp = (struct trace_probe *)call->data;
1238 1539
1239 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1540 /* First: called with 0 length to calculate the needed length */
1240 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1541 len = __set_print_fmt(tp, NULL, 0);
1241 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1542 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1543 if (!print_fmt)
1544 return -ENOMEM;
1242 1545
1243 /* Show fields */ 1546 /* Second: actually write the @print_fmt */
1244 for (i = 0; i < tp->nr_args; i++) 1547 __set_print_fmt(tp, print_fmt, len + 1);
1245 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1548 tp->call.print_fmt = print_fmt;
1246 trace_seq_puts(s, "\n");
1247 1549
1248 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1550 return 0;
1249 "REC->" FIELD_STRING_FUNC
1250 ", REC->" FIELD_STRING_RETIP);
1251} 1551}
1252 1552
1253#ifdef CONFIG_EVENT_PROFILE 1553#ifdef CONFIG_PERF_EVENTS
1254 1554
1255/* Kprobe profile handler */ 1555/* Kprobe profile handler */
1256static __kprobes int kprobe_profile_func(struct kprobe *kp, 1556static __kprobes void kprobe_perf_func(struct kprobe *kp,
1257 struct pt_regs *regs) 1557 struct pt_regs *regs)
1258{ 1558{
1259 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1559 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1260 struct ftrace_event_call *call = &tp->call; 1560 struct ftrace_event_call *call = &tp->call;
1261 struct kprobe_trace_entry *entry; 1561 struct kprobe_trace_entry_head *entry;
1262 struct trace_entry *ent; 1562 struct hlist_head *head;
1263 int size, __size, i, pc, __cpu; 1563 int size, __size, dsize;
1264 unsigned long irq_flags;
1265 char *trace_buf;
1266 char *raw_data;
1267 int rctx; 1564 int rctx;
1268 1565
1269 pc = preempt_count(); 1566 dsize = __get_data_size(tp, regs);
1270 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1567 __size = sizeof(*entry) + tp->size + dsize;
1271 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1568 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1272 size -= sizeof(u32); 1569 size -= sizeof(u32);
1273 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1570 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1274 "profile buffer not large enough")) 1571 "profile buffer not large enough"))
1275 return 0; 1572 return;
1276
1277 /*
1278 * Protect the non nmi buffer
1279 * This also protects the rcu read side
1280 */
1281 local_irq_save(irq_flags);
1282
1283 rctx = perf_swevent_get_recursion_context();
1284 if (rctx < 0)
1285 goto end_recursion;
1286
1287 __cpu = smp_processor_id();
1288 1573
1289 if (in_nmi()) 1574 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1290 trace_buf = rcu_dereference(perf_trace_buf_nmi); 1575 if (!entry)
1291 else 1576 return;
1292 trace_buf = rcu_dereference(perf_trace_buf);
1293
1294 if (!trace_buf)
1295 goto end;
1296
1297 raw_data = per_cpu_ptr(trace_buf, __cpu);
1298
1299 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1300 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1301 entry = (struct kprobe_trace_entry *)raw_data;
1302 ent = &entry->ent;
1303 1577
1304 tracing_generic_entry_update(ent, irq_flags, pc);
1305 ent->type = call->id;
1306 entry->nargs = tp->nr_args;
1307 entry->ip = (unsigned long)kp->addr; 1578 entry->ip = (unsigned long)kp->addr;
1308 for (i = 0; i < tp->nr_args; i++) 1579 memset(&entry[1], 0, dsize);
1309 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1580 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1310 perf_tp_event(call->id, entry->ip, 1, entry, size);
1311
1312end:
1313 perf_swevent_put_recursion_context(rctx);
1314end_recursion:
1315 local_irq_restore(irq_flags);
1316 1581
1317 return 0; 1582 head = this_cpu_ptr(call->perf_events);
1583 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1318} 1584}
1319 1585
1320/* Kretprobe profile handler */ 1586/* Kretprobe profile handler */
1321static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1587static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1322 struct pt_regs *regs) 1588 struct pt_regs *regs)
1323{ 1589{
1324 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1590 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1325 struct ftrace_event_call *call = &tp->call; 1591 struct ftrace_event_call *call = &tp->call;
1326 struct kretprobe_trace_entry *entry; 1592 struct kretprobe_trace_entry_head *entry;
1327 struct trace_entry *ent; 1593 struct hlist_head *head;
1328 int size, __size, i, pc, __cpu; 1594 int size, __size, dsize;
1329 unsigned long irq_flags;
1330 char *trace_buf;
1331 char *raw_data;
1332 int rctx; 1595 int rctx;
1333 1596
1334 pc = preempt_count(); 1597 dsize = __get_data_size(tp, regs);
1335 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1598 __size = sizeof(*entry) + tp->size + dsize;
1336 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1599 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1337 size -= sizeof(u32); 1600 size -= sizeof(u32);
1338 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1601 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1339 "profile buffer not large enough")) 1602 "profile buffer not large enough"))
1340 return 0; 1603 return;
1341
1342 /*
1343 * Protect the non nmi buffer
1344 * This also protects the rcu read side
1345 */
1346 local_irq_save(irq_flags);
1347 1604
1348 rctx = perf_swevent_get_recursion_context(); 1605 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1349 if (rctx < 0) 1606 if (!entry)
1350 goto end_recursion; 1607 return;
1351
1352 __cpu = smp_processor_id();
1353
1354 if (in_nmi())
1355 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1356 else
1357 trace_buf = rcu_dereference(perf_trace_buf);
1358
1359 if (!trace_buf)
1360 goto end;
1361
1362 raw_data = per_cpu_ptr(trace_buf, __cpu);
1363
1364 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1365 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1366 entry = (struct kretprobe_trace_entry *)raw_data;
1367 ent = &entry->ent;
1368 1608
1369 tracing_generic_entry_update(ent, irq_flags, pc);
1370 ent->type = call->id;
1371 entry->nargs = tp->nr_args;
1372 entry->func = (unsigned long)tp->rp.kp.addr; 1609 entry->func = (unsigned long)tp->rp.kp.addr;
1373 entry->ret_ip = (unsigned long)ri->ret_addr; 1610 entry->ret_ip = (unsigned long)ri->ret_addr;
1374 for (i = 0; i < tp->nr_args; i++) 1611 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1375 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1376 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1377
1378end:
1379 perf_swevent_put_recursion_context(rctx);
1380end_recursion:
1381 local_irq_restore(irq_flags);
1382 1612
1383 return 0; 1613 head = this_cpu_ptr(call->perf_events);
1614 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1384} 1615}
1385 1616
1386static int probe_profile_enable(struct ftrace_event_call *call) 1617static int probe_perf_enable(struct ftrace_event_call *call)
1387{ 1618{
1388 struct trace_probe *tp = (struct trace_probe *)call->data; 1619 struct trace_probe *tp = (struct trace_probe *)call->data;
1389 1620
@@ -1395,7 +1626,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1395 return enable_kprobe(&tp->rp.kp); 1626 return enable_kprobe(&tp->rp.kp);
1396} 1627}
1397 1628
1398static void probe_profile_disable(struct ftrace_event_call *call) 1629static void probe_perf_disable(struct ftrace_event_call *call)
1399{ 1630{
1400 struct trace_probe *tp = (struct trace_probe *)call->data; 1631 struct trace_probe *tp = (struct trace_probe *)call->data;
1401 1632
@@ -1408,8 +1639,28 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1408 disable_kprobe(&tp->rp.kp); 1639 disable_kprobe(&tp->rp.kp);
1409 } 1640 }
1410} 1641}
1411#endif /* CONFIG_EVENT_PROFILE */ 1642#endif /* CONFIG_PERF_EVENTS */
1643
1644static __kprobes
1645int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1646{
1647 switch (type) {
1648 case TRACE_REG_REGISTER:
1649 return probe_event_enable(event);
1650 case TRACE_REG_UNREGISTER:
1651 probe_event_disable(event);
1652 return 0;
1412 1653
1654#ifdef CONFIG_PERF_EVENTS
1655 case TRACE_REG_PERF_REGISTER:
1656 return probe_perf_enable(event);
1657 case TRACE_REG_PERF_UNREGISTER:
1658 probe_perf_disable(event);
1659 return 0;
1660#endif
1661 }
1662 return 0;
1663}
1413 1664
1414static __kprobes 1665static __kprobes
1415int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1666int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
@@ -1418,10 +1669,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1418 1669
1419 if (tp->flags & TP_FLAG_TRACE) 1670 if (tp->flags & TP_FLAG_TRACE)
1420 kprobe_trace_func(kp, regs); 1671 kprobe_trace_func(kp, regs);
1421#ifdef CONFIG_EVENT_PROFILE 1672#ifdef CONFIG_PERF_EVENTS
1422 if (tp->flags & TP_FLAG_PROFILE) 1673 if (tp->flags & TP_FLAG_PROFILE)
1423 kprobe_profile_func(kp, regs); 1674 kprobe_perf_func(kp, regs);
1424#endif /* CONFIG_EVENT_PROFILE */ 1675#endif
1425 return 0; /* We don't tweek kernel, so just return 0 */ 1676 return 0; /* We don't tweek kernel, so just return 0 */
1426} 1677}
1427 1678
@@ -1432,47 +1683,50 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1432 1683
1433 if (tp->flags & TP_FLAG_TRACE) 1684 if (tp->flags & TP_FLAG_TRACE)
1434 kretprobe_trace_func(ri, regs); 1685 kretprobe_trace_func(ri, regs);
1435#ifdef CONFIG_EVENT_PROFILE 1686#ifdef CONFIG_PERF_EVENTS
1436 if (tp->flags & TP_FLAG_PROFILE) 1687 if (tp->flags & TP_FLAG_PROFILE)
1437 kretprobe_profile_func(ri, regs); 1688 kretprobe_perf_func(ri, regs);
1438#endif /* CONFIG_EVENT_PROFILE */ 1689#endif
1439 return 0; /* We don't tweek kernel, so just return 0 */ 1690 return 0; /* We don't tweek kernel, so just return 0 */
1440} 1691}
1441 1692
1693static struct trace_event_functions kretprobe_funcs = {
1694 .trace = print_kretprobe_event
1695};
1696
1697static struct trace_event_functions kprobe_funcs = {
1698 .trace = print_kprobe_event
1699};
1700
1442static int register_probe_event(struct trace_probe *tp) 1701static int register_probe_event(struct trace_probe *tp)
1443{ 1702{
1444 struct ftrace_event_call *call = &tp->call; 1703 struct ftrace_event_call *call = &tp->call;
1445 int ret; 1704 int ret;
1446 1705
1447 /* Initialize ftrace_event_call */ 1706 /* Initialize ftrace_event_call */
1707 INIT_LIST_HEAD(&call->class->fields);
1448 if (probe_is_return(tp)) { 1708 if (probe_is_return(tp)) {
1449 tp->event.trace = print_kretprobe_event; 1709 call->event.funcs = &kretprobe_funcs;
1450 call->raw_init = probe_event_raw_init; 1710 call->class->define_fields = kretprobe_event_define_fields;
1451 call->show_format = kretprobe_event_show_format;
1452 call->define_fields = kretprobe_event_define_fields;
1453 } else { 1711 } else {
1454 tp->event.trace = print_kprobe_event; 1712 call->event.funcs = &kprobe_funcs;
1455 call->raw_init = probe_event_raw_init; 1713 call->class->define_fields = kprobe_event_define_fields;
1456 call->show_format = kprobe_event_show_format;
1457 call->define_fields = kprobe_event_define_fields;
1458 } 1714 }
1459 call->event = &tp->event; 1715 if (set_print_fmt(tp) < 0)
1460 call->id = register_ftrace_event(&tp->event); 1716 return -ENOMEM;
1461 if (!call->id) 1717 ret = register_ftrace_event(&call->event);
1718 if (!ret) {
1719 kfree(call->print_fmt);
1462 return -ENODEV; 1720 return -ENODEV;
1463 call->enabled = 0; 1721 }
1464 call->regfunc = probe_event_enable; 1722 call->flags = 0;
1465 call->unregfunc = probe_event_disable; 1723 call->class->reg = kprobe_register;
1466
1467#ifdef CONFIG_EVENT_PROFILE
1468 call->profile_enable = probe_profile_enable;
1469 call->profile_disable = probe_profile_disable;
1470#endif
1471 call->data = tp; 1724 call->data = tp;
1472 ret = trace_add_event_call(call); 1725 ret = trace_add_event_call(call);
1473 if (ret) { 1726 if (ret) {
1474 pr_info("Failed to register kprobe event: %s\n", call->name); 1727 pr_info("Failed to register kprobe event: %s\n", call->name);
1475 unregister_ftrace_event(&tp->event); 1728 kfree(call->print_fmt);
1729 unregister_ftrace_event(&call->event);
1476 } 1730 }
1477 return ret; 1731 return ret;
1478} 1732}
@@ -1481,6 +1735,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1481{ 1735{
1482 /* tp->event is unregistered in trace_remove_event_call() */ 1736 /* tp->event is unregistered in trace_remove_event_call() */
1483 trace_remove_event_call(&tp->call); 1737 trace_remove_event_call(&tp->call);
1738 kfree(tp->call.print_fmt);
1484} 1739}
1485 1740
1486/* Make a debugfs interface for controling probe points */ 1741/* Make a debugfs interface for controling probe points */
@@ -1523,28 +1778,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1523 1778
1524static __init int kprobe_trace_self_tests_init(void) 1779static __init int kprobe_trace_self_tests_init(void)
1525{ 1780{
1526 int ret; 1781 int ret, warn = 0;
1527 int (*target)(int, int, int, int, int, int); 1782 int (*target)(int, int, int, int, int, int);
1783 struct trace_probe *tp;
1528 1784
1529 target = kprobe_trace_selftest_target; 1785 target = kprobe_trace_selftest_target;
1530 1786
1531 pr_info("Testing kprobe tracing: "); 1787 pr_info("Testing kprobe tracing: ");
1532 1788
1533 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1789 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1534 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1790 "$stack $stack0 +0($stack)");
1535 if (WARN_ON_ONCE(ret)) 1791 if (WARN_ON_ONCE(ret)) {
1536 pr_warning("error enabling function entry\n"); 1792 pr_warning("error on probing function entry.\n");
1793 warn++;
1794 } else {
1795 /* Enable trace point */
1796 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1797 if (WARN_ON_ONCE(tp == NULL)) {
1798 pr_warning("error on getting new probe.\n");
1799 warn++;
1800 } else
1801 probe_event_enable(&tp->call);
1802 }
1537 1803
1538 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1804 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1539 "$retval"); 1805 "$retval");
1540 if (WARN_ON_ONCE(ret)) 1806 if (WARN_ON_ONCE(ret)) {
1541 pr_warning("error enabling function return\n"); 1807 pr_warning("error on probing function return.\n");
1808 warn++;
1809 } else {
1810 /* Enable trace point */
1811 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1812 if (WARN_ON_ONCE(tp == NULL)) {
1813 pr_warning("error on getting new probe.\n");
1814 warn++;
1815 } else
1816 probe_event_enable(&tp->call);
1817 }
1818
1819 if (warn)
1820 goto end;
1542 1821
1543 ret = target(1, 2, 3, 4, 5, 6); 1822 ret = target(1, 2, 3, 4, 5, 6);
1544 1823
1545 cleanup_all_probes(); 1824 ret = command_trace_probe("-:testprobe");
1825 if (WARN_ON_ONCE(ret)) {
1826 pr_warning("error on deleting a probe.\n");
1827 warn++;
1828 }
1829
1830 ret = command_trace_probe("-:testprobe2");
1831 if (WARN_ON_ONCE(ret)) {
1832 pr_warning("error on deleting a probe.\n");
1833 warn++;
1834 }
1546 1835
1547 pr_cont("OK\n"); 1836end:
1837 cleanup_all_probes();
1838 if (warn)
1839 pr_cont("NG: Some tests are failed. Please check them.\n");
1840 else
1841 pr_cont("OK\n");
1548 return 0; 1842 return 0;
1549} 1843}
1550 1844
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
deleted file mode 100644
index 94103cdcf9d8..000000000000
--- a/kernel/trace/trace_ksym.c
+++ /dev/null
@@ -1,519 +0,0 @@
1/*
2 * trace_ksym.c - Kernel Symbol Tracer
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2009
19 */
20
21#include <linux/kallsyms.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/ftrace.h>
25#include <linux/module.h>
26#include <linux/fs.h>
27
28#include "trace_output.h"
29#include "trace.h"
30
31#include <linux/hw_breakpoint.h>
32#include <asm/hw_breakpoint.h>
33
34#include <asm/atomic.h>
35
36/*
37 * For now, let us restrict the no. of symbols traced simultaneously to number
38 * of available hardware breakpoint registers.
39 */
40#define KSYM_TRACER_MAX HBP_NUM
41
42#define KSYM_TRACER_OP_LEN 3 /* rw- */
43
44struct trace_ksym {
45 struct perf_event **ksym_hbp;
46 struct perf_event_attr attr;
47#ifdef CONFIG_PROFILE_KSYM_TRACER
48 atomic64_t counter;
49#endif
50 struct hlist_node ksym_hlist;
51};
52
53static struct trace_array *ksym_trace_array;
54
55static unsigned int ksym_filter_entry_count;
56static unsigned int ksym_tracing_enabled;
57
58static HLIST_HEAD(ksym_filter_head);
59
60static DEFINE_MUTEX(ksym_tracer_mutex);
61
62#ifdef CONFIG_PROFILE_KSYM_TRACER
63
64#define MAX_UL_INT 0xffffffff
65
66void ksym_collect_stats(unsigned long hbp_hit_addr)
67{
68 struct hlist_node *node;
69 struct trace_ksym *entry;
70
71 rcu_read_lock();
72 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
73 if (entry->attr.bp_addr == hbp_hit_addr) {
74 atomic64_inc(&entry->counter);
75 break;
76 }
77 }
78 rcu_read_unlock();
79}
80#endif /* CONFIG_PROFILE_KSYM_TRACER */
81
82void ksym_hbp_handler(struct perf_event *hbp, int nmi,
83 struct perf_sample_data *data,
84 struct pt_regs *regs)
85{
86 struct ring_buffer_event *event;
87 struct ksym_trace_entry *entry;
88 struct ring_buffer *buffer;
89 int pc;
90
91 if (!ksym_tracing_enabled)
92 return;
93
94 buffer = ksym_trace_array->buffer;
95
96 pc = preempt_count();
97
98 event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
99 sizeof(*entry), 0, pc);
100 if (!event)
101 return;
102
103 entry = ring_buffer_event_data(event);
104 entry->ip = instruction_pointer(regs);
105 entry->type = hw_breakpoint_type(hbp);
106 entry->addr = hw_breakpoint_addr(hbp);
107 strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
108
109#ifdef CONFIG_PROFILE_KSYM_TRACER
110 ksym_collect_stats(hw_breakpoint_addr(hbp));
111#endif /* CONFIG_PROFILE_KSYM_TRACER */
112
113 trace_buffer_unlock_commit(buffer, event, 0, pc);
114}
115
116/* Valid access types are represented as
117 *
118 * rw- : Set Read/Write Access Breakpoint
119 * -w- : Set Write Access Breakpoint
120 * --- : Clear Breakpoints
121 * --x : Set Execution Break points (Not available yet)
122 *
123 */
124static int ksym_trace_get_access_type(char *str)
125{
126 int access = 0;
127
128 if (str[0] == 'r')
129 access |= HW_BREAKPOINT_R;
130
131 if (str[1] == 'w')
132 access |= HW_BREAKPOINT_W;
133
134 if (str[2] == 'x')
135 access |= HW_BREAKPOINT_X;
136
137 switch (access) {
138 case HW_BREAKPOINT_R:
139 case HW_BREAKPOINT_W:
140 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
141 return access;
142 default:
143 return -EINVAL;
144 }
145}
146
147/*
148 * There can be several possible malformed requests and we attempt to capture
149 * all of them. We enumerate some of the rules
150 * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
151 * i.e. multiple ':' symbols disallowed. Possible uses are of the form
152 * <module>:<ksym_name>:<op>.
153 * 2. No delimiter symbol ':' in the input string
154 * 3. Spurious operator symbols or symbols not in their respective positions
155 * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
156 * 5. Kernel symbol not a part of /proc/kallsyms
157 * 6. Duplicate requests
158 */
159static int parse_ksym_trace_str(char *input_string, char **ksymname,
160 unsigned long *addr)
161{
162 int ret;
163
164 *ksymname = strsep(&input_string, ":");
165 *addr = kallsyms_lookup_name(*ksymname);
166
167 /* Check for malformed request: (2), (1) and (5) */
168 if ((!input_string) ||
169 (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
170 (*addr == 0))
171 return -EINVAL;;
172
173 ret = ksym_trace_get_access_type(input_string);
174
175 return ret;
176}
177
178int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
179{
180 struct trace_ksym *entry;
181 int ret = -ENOMEM;
182
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry)
192 return -ENOMEM;
193
194 hw_breakpoint_init(&entry->attr);
195
196 entry->attr.bp_type = op;
197 entry->attr.bp_addr = addr;
198 entry->attr.bp_len = HW_BREAKPOINT_LEN_4;
199
200 entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr,
201 ksym_hbp_handler);
202
203 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again"
206 " later!!\n");
207 goto err;
208 }
209
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212
213 return 0;
214
215err:
216 kfree(entry);
217
218 return ret;
219}
220
221static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
222 size_t count, loff_t *ppos)
223{
224 struct trace_ksym *entry;
225 struct hlist_node *node;
226 struct trace_seq *s;
227 ssize_t cnt = 0;
228 int ret;
229
230 s = kmalloc(sizeof(*s), GFP_KERNEL);
231 if (!s)
232 return -ENOMEM;
233 trace_seq_init(s);
234
235 mutex_lock(&ksym_tracer_mutex);
236
237 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
238 ret = trace_seq_printf(s, "%pS:",
239 (void *)(unsigned long)entry->attr.bp_addr);
240 if (entry->attr.bp_type == HW_BREAKPOINT_R)
241 ret = trace_seq_puts(s, "r--\n");
242 else if (entry->attr.bp_type == HW_BREAKPOINT_W)
243 ret = trace_seq_puts(s, "-w-\n");
244 else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
245 ret = trace_seq_puts(s, "rw-\n");
246 WARN_ON_ONCE(!ret);
247 }
248
249 cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
250
251 mutex_unlock(&ksym_tracer_mutex);
252
253 kfree(s);
254
255 return cnt;
256}
257
258static void __ksym_trace_reset(void)
259{
260 struct trace_ksym *entry;
261 struct hlist_node *node, *node1;
262
263 mutex_lock(&ksym_tracer_mutex);
264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
265 ksym_hlist) {
266 unregister_wide_hw_breakpoint(entry->ksym_hbp);
267 ksym_filter_entry_count--;
268 hlist_del_rcu(&(entry->ksym_hlist));
269 synchronize_rcu();
270 kfree(entry);
271 }
272 mutex_unlock(&ksym_tracer_mutex);
273}
274
275static ssize_t ksym_trace_filter_write(struct file *file,
276 const char __user *buffer,
277 size_t count, loff_t *ppos)
278{
279 struct trace_ksym *entry;
280 struct hlist_node *node;
281 char *buf, *input_string, *ksymname = NULL;
282 unsigned long ksym_addr = 0;
283 int ret, op, changed = 0;
284
285 buf = kzalloc(count + 1, GFP_KERNEL);
286 if (!buf)
287 return -ENOMEM;
288
289 ret = -EFAULT;
290 if (copy_from_user(buf, buffer, count))
291 goto out;
292
293 buf[count] = '\0';
294 input_string = strstrip(buf);
295
296 /*
297 * Clear all breakpoints if:
298 * 1: echo > ksym_trace_filter
299 * 2: echo 0 > ksym_trace_filter
300 * 3: echo "*:---" > ksym_trace_filter
301 */
302 if (!input_string[0] || !strcmp(input_string, "0") ||
303 !strcmp(input_string, "*:---")) {
304 __ksym_trace_reset();
305 ret = 0;
306 goto out;
307 }
308
309 ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
310 if (ret < 0)
311 goto out;
312
313 mutex_lock(&ksym_tracer_mutex);
314
315 ret = -EINVAL;
316 hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
317 if (entry->attr.bp_addr == ksym_addr) {
318 /* Check for malformed request: (6) */
319 if (entry->attr.bp_type != op)
320 changed = 1;
321 else
322 goto out_unlock;
323 break;
324 }
325 }
326 if (changed) {
327 unregister_wide_hw_breakpoint(entry->ksym_hbp);
328 entry->attr.bp_type = op;
329 ret = 0;
330 if (op > 0) {
331 entry->ksym_hbp =
332 register_wide_hw_breakpoint(&entry->attr,
333 ksym_hbp_handler);
334 if (IS_ERR(entry->ksym_hbp))
335 ret = PTR_ERR(entry->ksym_hbp);
336 else
337 goto out_unlock;
338 }
339 /* Error or "symbol:---" case: drop it */
340 ksym_filter_entry_count--;
341 hlist_del_rcu(&(entry->ksym_hlist));
342 synchronize_rcu();
343 kfree(entry);
344 goto out_unlock;
345 } else {
346 /* Check for malformed request: (4) */
347 if (op)
348 ret = process_new_ksym_entry(ksymname, op, ksym_addr);
349 }
350out_unlock:
351 mutex_unlock(&ksym_tracer_mutex);
352out:
353 kfree(buf);
354 return !ret ? count : ret;
355}
356
357static const struct file_operations ksym_tracing_fops = {
358 .open = tracing_open_generic,
359 .read = ksym_trace_filter_read,
360 .write = ksym_trace_filter_write,
361};
362
363static void ksym_trace_reset(struct trace_array *tr)
364{
365 ksym_tracing_enabled = 0;
366 __ksym_trace_reset();
367}
368
369static int ksym_trace_init(struct trace_array *tr)
370{
371 int cpu, ret = 0;
372
373 for_each_online_cpu(cpu)
374 tracing_reset(tr, cpu);
375 ksym_tracing_enabled = 1;
376 ksym_trace_array = tr;
377
378 return ret;
379}
380
381static void ksym_trace_print_header(struct seq_file *m)
382{
383 seq_puts(m,
384 "# TASK-PID CPU# Symbol "
385 "Type Function\n");
386 seq_puts(m,
387 "# | | | "
388 " | |\n");
389}
390
391static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
392{
393 struct trace_entry *entry = iter->ent;
394 struct trace_seq *s = &iter->seq;
395 struct ksym_trace_entry *field;
396 char str[KSYM_SYMBOL_LEN];
397 int ret;
398
399 if (entry->type != TRACE_KSYM)
400 return TRACE_TYPE_UNHANDLED;
401
402 trace_assign_type(field, entry);
403
404 ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
405 entry->pid, iter->cpu, (char *)field->addr);
406 if (!ret)
407 return TRACE_TYPE_PARTIAL_LINE;
408
409 switch (field->type) {
410 case HW_BREAKPOINT_R:
411 ret = trace_seq_printf(s, " R ");
412 break;
413 case HW_BREAKPOINT_W:
414 ret = trace_seq_printf(s, " W ");
415 break;
416 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
417 ret = trace_seq_printf(s, " RW ");
418 break;
419 default:
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 if (!ret)
424 return TRACE_TYPE_PARTIAL_LINE;
425
426 sprint_symbol(str, field->ip);
427 ret = trace_seq_printf(s, "%s\n", str);
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434struct tracer ksym_tracer __read_mostly =
435{
436 .name = "ksym_tracer",
437 .init = ksym_trace_init,
438 .reset = ksym_trace_reset,
439#ifdef CONFIG_FTRACE_SELFTEST
440 .selftest = trace_selftest_startup_ksym,
441#endif
442 .print_header = ksym_trace_print_header,
443 .print_line = ksym_trace_output
444};
445
446#ifdef CONFIG_PROFILE_KSYM_TRACER
447static int ksym_profile_show(struct seq_file *m, void *v)
448{
449 struct hlist_node *node;
450 struct trace_ksym *entry;
451 int access_type = 0;
452 char fn_name[KSYM_NAME_LEN];
453
454 seq_puts(m, " Access Type ");
455 seq_puts(m, " Symbol Counter\n");
456 seq_puts(m, " ----------- ");
457 seq_puts(m, " ------ -------\n");
458
459 rcu_read_lock();
460 hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
461
462 access_type = entry->attr.bp_type;
463
464 switch (access_type) {
465 case HW_BREAKPOINT_R:
466 seq_puts(m, " R ");
467 break;
468 case HW_BREAKPOINT_W:
469 seq_puts(m, " W ");
470 break;
471 case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
472 seq_puts(m, " RW ");
473 break;
474 default:
475 seq_puts(m, " NA ");
476 }
477
478 if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0)
479 seq_printf(m, " %-36s", fn_name);
480 else
481 seq_printf(m, " %-36s", "<NA>");
482 seq_printf(m, " %15llu\n",
483 (unsigned long long)atomic64_read(&entry->counter));
484 }
485 rcu_read_unlock();
486
487 return 0;
488}
489
490static int ksym_profile_open(struct inode *node, struct file *file)
491{
492 return single_open(file, ksym_profile_show, NULL);
493}
494
495static const struct file_operations ksym_profile_fops = {
496 .open = ksym_profile_open,
497 .read = seq_read,
498 .llseek = seq_lseek,
499 .release = single_release,
500};
501#endif /* CONFIG_PROFILE_KSYM_TRACER */
502
503__init static int init_ksym_trace(void)
504{
505 struct dentry *d_tracer;
506
507 d_tracer = tracing_init_dentry();
508
509 trace_create_file("ksym_trace_filter", 0644, d_tracer,
510 NULL, &ksym_tracing_fops);
511
512#ifdef CONFIG_PROFILE_KSYM_TRACER
513 trace_create_file("ksym_profile", 0444, d_tracer,
514 NULL, &ksym_profile_fops);
515#endif
516
517 return register_tracer(&ksym_tracer);
518}
519device_initcall(init_ksym_trace);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8e46b3323cdc..02272baa2206 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,9 +16,6 @@
16 16
17DECLARE_RWSEM(trace_event_mutex); 17DECLARE_RWSEM(trace_event_mutex);
18 18
19DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
20EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
21
22static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; 19static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
23 20
24static int next_event_type = __TRACE_LAST_TYPE + 1; 21static int next_event_type = __TRACE_LAST_TYPE + 1;
@@ -209,6 +206,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
209 206
210 return 1; 207 return 1;
211} 208}
209EXPORT_SYMBOL(trace_seq_putc);
212 210
213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 211int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
214{ 212{
@@ -253,7 +251,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
253 void *ret; 251 void *ret;
254 252
255 if (s->full) 253 if (s->full)
256 return 0; 254 return NULL;
257 255
258 if (len > ((PAGE_SIZE - 1) - s->len)) { 256 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1; 257 s->full = 1;
@@ -355,6 +353,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
355} 353}
356EXPORT_SYMBOL(ftrace_print_symbols_seq); 354EXPORT_SYMBOL(ftrace_print_symbols_seq);
357 355
356const char *
357ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
358{
359 int i;
360 const char *ret = p->buffer + p->len;
361
362 for (i = 0; i < buf_len; i++)
363 trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
364
365 trace_seq_putc(p, 0);
366
367 return ret;
368}
369EXPORT_SYMBOL(ftrace_print_hex_seq);
370
358#ifdef CONFIG_KRETPROBES 371#ifdef CONFIG_KRETPROBES
359static inline const char *kretprobed(const char *name) 372static inline const char *kretprobed(const char *name)
360{ 373{
@@ -726,6 +739,9 @@ int register_ftrace_event(struct trace_event *event)
726 if (WARN_ON(!event)) 739 if (WARN_ON(!event))
727 goto out; 740 goto out;
728 741
742 if (WARN_ON(!event->funcs))
743 goto out;
744
729 INIT_LIST_HEAD(&event->list); 745 INIT_LIST_HEAD(&event->list);
730 746
731 if (!event->type) { 747 if (!event->type) {
@@ -758,14 +774,14 @@ int register_ftrace_event(struct trace_event *event)
758 goto out; 774 goto out;
759 } 775 }
760 776
761 if (event->trace == NULL) 777 if (event->funcs->trace == NULL)
762 event->trace = trace_nop_print; 778 event->funcs->trace = trace_nop_print;
763 if (event->raw == NULL) 779 if (event->funcs->raw == NULL)
764 event->raw = trace_nop_print; 780 event->funcs->raw = trace_nop_print;
765 if (event->hex == NULL) 781 if (event->funcs->hex == NULL)
766 event->hex = trace_nop_print; 782 event->funcs->hex = trace_nop_print;
767 if (event->binary == NULL) 783 if (event->funcs->binary == NULL)
768 event->binary = trace_nop_print; 784 event->funcs->binary = trace_nop_print;
769 785
770 key = event->type & (EVENT_HASHSIZE - 1); 786 key = event->type & (EVENT_HASHSIZE - 1);
771 787
@@ -807,13 +823,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
807 * Standard events 823 * Standard events
808 */ 824 */
809 825
810enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) 826enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
827 struct trace_event *event)
811{ 828{
812 return TRACE_TYPE_HANDLED; 829 return TRACE_TYPE_HANDLED;
813} 830}
814 831
815/* TRACE_FN */ 832/* TRACE_FN */
816static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) 833static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
834 struct trace_event *event)
817{ 835{
818 struct ftrace_entry *field; 836 struct ftrace_entry *field;
819 struct trace_seq *s = &iter->seq; 837 struct trace_seq *s = &iter->seq;
@@ -840,7 +858,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
840 return TRACE_TYPE_PARTIAL_LINE; 858 return TRACE_TYPE_PARTIAL_LINE;
841} 859}
842 860
843static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) 861static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
862 struct trace_event *event)
844{ 863{
845 struct ftrace_entry *field; 864 struct ftrace_entry *field;
846 865
@@ -854,7 +873,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
854 return TRACE_TYPE_HANDLED; 873 return TRACE_TYPE_HANDLED;
855} 874}
856 875
857static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) 876static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
877 struct trace_event *event)
858{ 878{
859 struct ftrace_entry *field; 879 struct ftrace_entry *field;
860 struct trace_seq *s = &iter->seq; 880 struct trace_seq *s = &iter->seq;
@@ -867,7 +887,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
867 return TRACE_TYPE_HANDLED; 887 return TRACE_TYPE_HANDLED;
868} 888}
869 889
870static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) 890static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
891 struct trace_event *event)
871{ 892{
872 struct ftrace_entry *field; 893 struct ftrace_entry *field;
873 struct trace_seq *s = &iter->seq; 894 struct trace_seq *s = &iter->seq;
@@ -880,14 +901,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
880 return TRACE_TYPE_HANDLED; 901 return TRACE_TYPE_HANDLED;
881} 902}
882 903
883static struct trace_event trace_fn_event = { 904static struct trace_event_functions trace_fn_funcs = {
884 .type = TRACE_FN,
885 .trace = trace_fn_trace, 905 .trace = trace_fn_trace,
886 .raw = trace_fn_raw, 906 .raw = trace_fn_raw,
887 .hex = trace_fn_hex, 907 .hex = trace_fn_hex,
888 .binary = trace_fn_bin, 908 .binary = trace_fn_bin,
889}; 909};
890 910
911static struct trace_event trace_fn_event = {
912 .type = TRACE_FN,
913 .funcs = &trace_fn_funcs,
914};
915
891/* TRACE_CTX an TRACE_WAKE */ 916/* TRACE_CTX an TRACE_WAKE */
892static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, 917static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
893 char *delim) 918 char *delim)
@@ -916,13 +941,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
916 return TRACE_TYPE_HANDLED; 941 return TRACE_TYPE_HANDLED;
917} 942}
918 943
919static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) 944static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
945 struct trace_event *event)
920{ 946{
921 return trace_ctxwake_print(iter, "==>"); 947 return trace_ctxwake_print(iter, "==>");
922} 948}
923 949
924static enum print_line_t trace_wake_print(struct trace_iterator *iter, 950static enum print_line_t trace_wake_print(struct trace_iterator *iter,
925 int flags) 951 int flags, struct trace_event *event)
926{ 952{
927 return trace_ctxwake_print(iter, " +"); 953 return trace_ctxwake_print(iter, " +");
928} 954}
@@ -950,12 +976,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
950 return TRACE_TYPE_HANDLED; 976 return TRACE_TYPE_HANDLED;
951} 977}
952 978
953static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) 979static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
980 struct trace_event *event)
954{ 981{
955 return trace_ctxwake_raw(iter, 0); 982 return trace_ctxwake_raw(iter, 0);
956} 983}
957 984
958static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) 985static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags,
986 struct trace_event *event)
959{ 987{
960 return trace_ctxwake_raw(iter, '+'); 988 return trace_ctxwake_raw(iter, '+');
961} 989}
@@ -984,18 +1012,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
984 return TRACE_TYPE_HANDLED; 1012 return TRACE_TYPE_HANDLED;
985} 1013}
986 1014
987static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) 1015static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
1016 struct trace_event *event)
988{ 1017{
989 return trace_ctxwake_hex(iter, 0); 1018 return trace_ctxwake_hex(iter, 0);
990} 1019}
991 1020
992static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) 1021static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags,
1022 struct trace_event *event)
993{ 1023{
994 return trace_ctxwake_hex(iter, '+'); 1024 return trace_ctxwake_hex(iter, '+');
995} 1025}
996 1026
997static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, 1027static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
998 int flags) 1028 int flags, struct trace_event *event)
999{ 1029{
1000 struct ctx_switch_entry *field; 1030 struct ctx_switch_entry *field;
1001 struct trace_seq *s = &iter->seq; 1031 struct trace_seq *s = &iter->seq;
@@ -1012,81 +1042,34 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
1012 return TRACE_TYPE_HANDLED; 1042 return TRACE_TYPE_HANDLED;
1013} 1043}
1014 1044
1015static struct trace_event trace_ctx_event = { 1045static struct trace_event_functions trace_ctx_funcs = {
1016 .type = TRACE_CTX,
1017 .trace = trace_ctx_print, 1046 .trace = trace_ctx_print,
1018 .raw = trace_ctx_raw, 1047 .raw = trace_ctx_raw,
1019 .hex = trace_ctx_hex, 1048 .hex = trace_ctx_hex,
1020 .binary = trace_ctxwake_bin, 1049 .binary = trace_ctxwake_bin,
1021}; 1050};
1022 1051
1023static struct trace_event trace_wake_event = { 1052static struct trace_event trace_ctx_event = {
1024 .type = TRACE_WAKE, 1053 .type = TRACE_CTX,
1054 .funcs = &trace_ctx_funcs,
1055};
1056
1057static struct trace_event_functions trace_wake_funcs = {
1025 .trace = trace_wake_print, 1058 .trace = trace_wake_print,
1026 .raw = trace_wake_raw, 1059 .raw = trace_wake_raw,
1027 .hex = trace_wake_hex, 1060 .hex = trace_wake_hex,
1028 .binary = trace_ctxwake_bin, 1061 .binary = trace_ctxwake_bin,
1029}; 1062};
1030 1063
1031/* TRACE_SPECIAL */ 1064static struct trace_event trace_wake_event = {
1032static enum print_line_t trace_special_print(struct trace_iterator *iter, 1065 .type = TRACE_WAKE,
1033 int flags) 1066 .funcs = &trace_wake_funcs,
1034{
1035 struct special_entry *field;
1036
1037 trace_assign_type(field, iter->ent);
1038
1039 if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
1040 field->arg1,
1041 field->arg2,
1042 field->arg3))
1043 return TRACE_TYPE_PARTIAL_LINE;
1044
1045 return TRACE_TYPE_HANDLED;
1046}
1047
1048static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1049 int flags)
1050{
1051 struct special_entry *field;
1052 struct trace_seq *s = &iter->seq;
1053
1054 trace_assign_type(field, iter->ent);
1055
1056 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1057 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1058 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1059
1060 return TRACE_TYPE_HANDLED;
1061}
1062
1063static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1064 int flags)
1065{
1066 struct special_entry *field;
1067 struct trace_seq *s = &iter->seq;
1068
1069 trace_assign_type(field, iter->ent);
1070
1071 SEQ_PUT_FIELD_RET(s, field->arg1);
1072 SEQ_PUT_FIELD_RET(s, field->arg2);
1073 SEQ_PUT_FIELD_RET(s, field->arg3);
1074
1075 return TRACE_TYPE_HANDLED;
1076}
1077
1078static struct trace_event trace_special_event = {
1079 .type = TRACE_SPECIAL,
1080 .trace = trace_special_print,
1081 .raw = trace_special_print,
1082 .hex = trace_special_hex,
1083 .binary = trace_special_bin,
1084}; 1067};
1085 1068
1086/* TRACE_STACK */ 1069/* TRACE_STACK */
1087 1070
1088static enum print_line_t trace_stack_print(struct trace_iterator *iter, 1071static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1089 int flags) 1072 int flags, struct trace_event *event)
1090{ 1073{
1091 struct stack_entry *field; 1074 struct stack_entry *field;
1092 struct trace_seq *s = &iter->seq; 1075 struct trace_seq *s = &iter->seq;
@@ -1114,17 +1097,18 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1114 return TRACE_TYPE_PARTIAL_LINE; 1097 return TRACE_TYPE_PARTIAL_LINE;
1115} 1098}
1116 1099
1100static struct trace_event_functions trace_stack_funcs = {
1101 .trace = trace_stack_print,
1102};
1103
1117static struct trace_event trace_stack_event = { 1104static struct trace_event trace_stack_event = {
1118 .type = TRACE_STACK, 1105 .type = TRACE_STACK,
1119 .trace = trace_stack_print, 1106 .funcs = &trace_stack_funcs,
1120 .raw = trace_special_print,
1121 .hex = trace_special_hex,
1122 .binary = trace_special_bin,
1123}; 1107};
1124 1108
1125/* TRACE_USER_STACK */ 1109/* TRACE_USER_STACK */
1126static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, 1110static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1127 int flags) 1111 int flags, struct trace_event *event)
1128{ 1112{
1129 struct userstack_entry *field; 1113 struct userstack_entry *field;
1130 struct trace_seq *s = &iter->seq; 1114 struct trace_seq *s = &iter->seq;
@@ -1143,17 +1127,19 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1143 return TRACE_TYPE_PARTIAL_LINE; 1127 return TRACE_TYPE_PARTIAL_LINE;
1144} 1128}
1145 1129
1130static struct trace_event_functions trace_user_stack_funcs = {
1131 .trace = trace_user_stack_print,
1132};
1133
1146static struct trace_event trace_user_stack_event = { 1134static struct trace_event trace_user_stack_event = {
1147 .type = TRACE_USER_STACK, 1135 .type = TRACE_USER_STACK,
1148 .trace = trace_user_stack_print, 1136 .funcs = &trace_user_stack_funcs,
1149 .raw = trace_special_print,
1150 .hex = trace_special_hex,
1151 .binary = trace_special_bin,
1152}; 1137};
1153 1138
1154/* TRACE_BPRINT */ 1139/* TRACE_BPRINT */
1155static enum print_line_t 1140static enum print_line_t
1156trace_bprint_print(struct trace_iterator *iter, int flags) 1141trace_bprint_print(struct trace_iterator *iter, int flags,
1142 struct trace_event *event)
1157{ 1143{
1158 struct trace_entry *entry = iter->ent; 1144 struct trace_entry *entry = iter->ent;
1159 struct trace_seq *s = &iter->seq; 1145 struct trace_seq *s = &iter->seq;
@@ -1178,7 +1164,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags)
1178 1164
1179 1165
1180static enum print_line_t 1166static enum print_line_t
1181trace_bprint_raw(struct trace_iterator *iter, int flags) 1167trace_bprint_raw(struct trace_iterator *iter, int flags,
1168 struct trace_event *event)
1182{ 1169{
1183 struct bprint_entry *field; 1170 struct bprint_entry *field;
1184 struct trace_seq *s = &iter->seq; 1171 struct trace_seq *s = &iter->seq;
@@ -1197,16 +1184,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags)
1197 return TRACE_TYPE_PARTIAL_LINE; 1184 return TRACE_TYPE_PARTIAL_LINE;
1198} 1185}
1199 1186
1187static struct trace_event_functions trace_bprint_funcs = {
1188 .trace = trace_bprint_print,
1189 .raw = trace_bprint_raw,
1190};
1200 1191
1201static struct trace_event trace_bprint_event = { 1192static struct trace_event trace_bprint_event = {
1202 .type = TRACE_BPRINT, 1193 .type = TRACE_BPRINT,
1203 .trace = trace_bprint_print, 1194 .funcs = &trace_bprint_funcs,
1204 .raw = trace_bprint_raw,
1205}; 1195};
1206 1196
1207/* TRACE_PRINT */ 1197/* TRACE_PRINT */
1208static enum print_line_t trace_print_print(struct trace_iterator *iter, 1198static enum print_line_t trace_print_print(struct trace_iterator *iter,
1209 int flags) 1199 int flags, struct trace_event *event)
1210{ 1200{
1211 struct print_entry *field; 1201 struct print_entry *field;
1212 struct trace_seq *s = &iter->seq; 1202 struct trace_seq *s = &iter->seq;
@@ -1225,7 +1215,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
1225 return TRACE_TYPE_PARTIAL_LINE; 1215 return TRACE_TYPE_PARTIAL_LINE;
1226} 1216}
1227 1217
1228static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) 1218static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
1219 struct trace_event *event)
1229{ 1220{
1230 struct print_entry *field; 1221 struct print_entry *field;
1231 1222
@@ -1240,18 +1231,21 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
1240 return TRACE_TYPE_PARTIAL_LINE; 1231 return TRACE_TYPE_PARTIAL_LINE;
1241} 1232}
1242 1233
1243static struct trace_event trace_print_event = { 1234static struct trace_event_functions trace_print_funcs = {
1244 .type = TRACE_PRINT,
1245 .trace = trace_print_print, 1235 .trace = trace_print_print,
1246 .raw = trace_print_raw, 1236 .raw = trace_print_raw,
1247}; 1237};
1248 1238
1239static struct trace_event trace_print_event = {
1240 .type = TRACE_PRINT,
1241 .funcs = &trace_print_funcs,
1242};
1243
1249 1244
1250static struct trace_event *events[] __initdata = { 1245static struct trace_event *events[] __initdata = {
1251 &trace_fn_event, 1246 &trace_fn_event,
1252 &trace_ctx_event, 1247 &trace_ctx_event,
1253 &trace_wake_event, 1248 &trace_wake_event,
1254 &trace_special_event,
1255 &trace_stack_event, 1249 &trace_stack_event,
1256 &trace_user_stack_event, 1250 &trace_user_stack_event,
1257 &trace_bprint_event, 1251 &trace_bprint_event,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 9d91c72ba38b..c038eba0492b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,7 +25,7 @@ extern void trace_event_read_unlock(void);
25extern struct trace_event *ftrace_find_event(int type); 25extern struct trace_event *ftrace_find_event(int type);
26 26
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags, struct trace_event *event);
29extern int 29extern int
30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); 30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
31 31
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 5fca0f51fde4..8f758d070c43 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -50,8 +50,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
50} 50}
51 51
52static void 52static void
53probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
54 struct task_struct *next)
55{ 54{
56 struct trace_array_cpu *data; 55 struct trace_array_cpu *data;
57 unsigned long flags; 56 unsigned long flags;
@@ -109,7 +108,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
109} 108}
110 109
111static void 110static void
112probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 111probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
113{ 112{
114 struct trace_array_cpu *data; 113 struct trace_array_cpu *data;
115 unsigned long flags; 114 unsigned long flags;
@@ -139,21 +138,21 @@ static int tracing_sched_register(void)
139{ 138{
140 int ret; 139 int ret;
141 140
142 ret = register_trace_sched_wakeup(probe_sched_wakeup); 141 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
143 if (ret) { 142 if (ret) {
144 pr_info("wakeup trace: Couldn't activate tracepoint" 143 pr_info("wakeup trace: Couldn't activate tracepoint"
145 " probe to kernel_sched_wakeup\n"); 144 " probe to kernel_sched_wakeup\n");
146 return ret; 145 return ret;
147 } 146 }
148 147
149 ret = register_trace_sched_wakeup_new(probe_sched_wakeup); 148 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
150 if (ret) { 149 if (ret) {
151 pr_info("wakeup trace: Couldn't activate tracepoint" 150 pr_info("wakeup trace: Couldn't activate tracepoint"
152 " probe to kernel_sched_wakeup_new\n"); 151 " probe to kernel_sched_wakeup_new\n");
153 goto fail_deprobe; 152 goto fail_deprobe;
154 } 153 }
155 154
156 ret = register_trace_sched_switch(probe_sched_switch); 155 ret = register_trace_sched_switch(probe_sched_switch, NULL);
157 if (ret) { 156 if (ret) {
158 pr_info("sched trace: Couldn't activate tracepoint" 157 pr_info("sched trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_switch\n"); 158 " probe to kernel_sched_switch\n");
@@ -162,17 +161,17 @@ static int tracing_sched_register(void)
162 161
163 return ret; 162 return ret;
164fail_deprobe_wake_new: 163fail_deprobe_wake_new:
165 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 164 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
166fail_deprobe: 165fail_deprobe:
167 unregister_trace_sched_wakeup(probe_sched_wakeup); 166 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
168 return ret; 167 return ret;
169} 168}
170 169
171static void tracing_sched_unregister(void) 170static void tracing_sched_unregister(void)
172{ 171{
173 unregister_trace_sched_switch(probe_sched_switch); 172 unregister_trace_sched_switch(probe_sched_switch, NULL);
174 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 173 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
175 unregister_trace_sched_wakeup(probe_sched_wakeup); 174 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
176} 175}
177 176
178static void tracing_start_sched_switch(void) 177static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0271742abb8d..7319559ed59f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,50 +31,99 @@ static int wakeup_rt;
31static arch_spinlock_t wakeup_lock = 31static arch_spinlock_t wakeup_lock =
32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 32 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
33 33
34static void wakeup_reset(struct trace_array *tr);
34static void __wakeup_reset(struct trace_array *tr); 35static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
35 38
36static int save_lat_flag; 39static int save_lat_flag;
37 40
41#define TRACE_DISPLAY_GRAPH 1
42
43static struct tracer_opt trace_opts[] = {
44#ifdef CONFIG_FUNCTION_GRAPH_TRACER
45 /* display latency trace as call graph */
46 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
47#endif
48 { } /* Empty entry */
49};
50
51static struct tracer_flags tracer_flags = {
52 .val = 0,
53 .opts = trace_opts,
54};
55
56#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
57
38#ifdef CONFIG_FUNCTION_TRACER 58#ifdef CONFIG_FUNCTION_TRACER
59
39/* 60/*
40 * irqsoff uses its own tracer function to keep the overhead down: 61 * Prologue for the wakeup function tracers.
62 *
63 * Returns 1 if it is OK to continue, and preemption
64 * is disabled and data->disabled is incremented.
65 * 0 if the trace is to be ignored, and preemption
66 * is not disabled and data->disabled is
67 * kept the same.
68 *
69 * Note, this function is also used outside this ifdef but
70 * inside the #ifdef of the function graph tracer below.
71 * This is OK, since the function graph tracer is
72 * dependent on the function tracer.
41 */ 73 */
42static void 74static int
43wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 75func_prolog_preempt_disable(struct trace_array *tr,
76 struct trace_array_cpu **data,
77 int *pc)
44{ 78{
45 struct trace_array *tr = wakeup_trace;
46 struct trace_array_cpu *data;
47 unsigned long flags;
48 long disabled; 79 long disabled;
49 int resched;
50 int cpu; 80 int cpu;
51 int pc;
52 81
53 if (likely(!wakeup_task)) 82 if (likely(!wakeup_task))
54 return; 83 return 0;
55 84
56 pc = preempt_count(); 85 *pc = preempt_count();
57 resched = ftrace_preempt_disable(); 86 preempt_disable_notrace();
58 87
59 cpu = raw_smp_processor_id(); 88 cpu = raw_smp_processor_id();
60 if (cpu != wakeup_current_cpu) 89 if (cpu != wakeup_current_cpu)
61 goto out_enable; 90 goto out_enable;
62 91
63 data = tr->data[cpu]; 92 *data = tr->data[cpu];
64 disabled = atomic_inc_return(&data->disabled); 93 disabled = atomic_inc_return(&(*data)->disabled);
65 if (unlikely(disabled != 1)) 94 if (unlikely(disabled != 1))
66 goto out; 95 goto out;
67 96
68 local_irq_save(flags); 97 return 1;
69 98
70 trace_function(tr, ip, parent_ip, flags, pc); 99out:
100 atomic_dec(&(*data)->disabled);
101
102out_enable:
103 preempt_enable_notrace();
104 return 0;
105}
106
107/*
108 * wakeup uses its own tracer function to keep the overhead down:
109 */
110static void
111wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
112{
113 struct trace_array *tr = wakeup_trace;
114 struct trace_array_cpu *data;
115 unsigned long flags;
116 int pc;
117
118 if (!func_prolog_preempt_disable(tr, &data, &pc))
119 return;
71 120
121 local_irq_save(flags);
122 trace_function(tr, ip, parent_ip, flags, pc);
72 local_irq_restore(flags); 123 local_irq_restore(flags);
73 124
74 out:
75 atomic_dec(&data->disabled); 125 atomic_dec(&data->disabled);
76 out_enable: 126 preempt_enable_notrace();
77 ftrace_preempt_enable(resched);
78} 127}
79 128
80static struct ftrace_ops trace_ops __read_mostly = 129static struct ftrace_ops trace_ops __read_mostly =
@@ -83,6 +132,156 @@ static struct ftrace_ops trace_ops __read_mostly =
83}; 132};
84#endif /* CONFIG_FUNCTION_TRACER */ 133#endif /* CONFIG_FUNCTION_TRACER */
85 134
135static int start_func_tracer(int graph)
136{
137 int ret;
138
139 if (!graph)
140 ret = register_ftrace_function(&trace_ops);
141 else
142 ret = register_ftrace_graph(&wakeup_graph_return,
143 &wakeup_graph_entry);
144
145 if (!ret && tracing_is_enabled())
146 tracer_enabled = 1;
147 else
148 tracer_enabled = 0;
149
150 return ret;
151}
152
153static void stop_func_tracer(int graph)
154{
155 tracer_enabled = 0;
156
157 if (!graph)
158 unregister_ftrace_function(&trace_ops);
159 else
160 unregister_ftrace_graph();
161}
162
163#ifdef CONFIG_FUNCTION_GRAPH_TRACER
164static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
165{
166
167 if (!(bit & TRACE_DISPLAY_GRAPH))
168 return -EINVAL;
169
170 if (!(is_graph() ^ set))
171 return 0;
172
173 stop_func_tracer(!set);
174
175 wakeup_reset(wakeup_trace);
176 tracing_max_latency = 0;
177
178 return start_func_tracer(set);
179}
180
181static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
182{
183 struct trace_array *tr = wakeup_trace;
184 struct trace_array_cpu *data;
185 unsigned long flags;
186 int pc, ret = 0;
187
188 if (!func_prolog_preempt_disable(tr, &data, &pc))
189 return 0;
190
191 local_save_flags(flags);
192 ret = __trace_graph_entry(tr, trace, flags, pc);
193 atomic_dec(&data->disabled);
194 preempt_enable_notrace();
195
196 return ret;
197}
198
199static void wakeup_graph_return(struct ftrace_graph_ret *trace)
200{
201 struct trace_array *tr = wakeup_trace;
202 struct trace_array_cpu *data;
203 unsigned long flags;
204 int pc;
205
206 if (!func_prolog_preempt_disable(tr, &data, &pc))
207 return;
208
209 local_save_flags(flags);
210 __trace_graph_return(tr, trace, flags, pc);
211 atomic_dec(&data->disabled);
212
213 preempt_enable_notrace();
214 return;
215}
216
217static void wakeup_trace_open(struct trace_iterator *iter)
218{
219 if (is_graph())
220 graph_trace_open(iter);
221}
222
223static void wakeup_trace_close(struct trace_iterator *iter)
224{
225 if (iter->private)
226 graph_trace_close(iter);
227}
228
229#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC)
230
231static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
232{
233 /*
234 * In graph mode call the graph tracer output function,
235 * otherwise go with the TRACE_FN event handler
236 */
237 if (is_graph())
238 return print_graph_function_flags(iter, GRAPH_TRACER_FLAGS);
239
240 return TRACE_TYPE_UNHANDLED;
241}
242
243static void wakeup_print_header(struct seq_file *s)
244{
245 if (is_graph())
246 print_graph_headers_flags(s, GRAPH_TRACER_FLAGS);
247 else
248 trace_default_header(s);
249}
250
251static void
252__trace_function(struct trace_array *tr,
253 unsigned long ip, unsigned long parent_ip,
254 unsigned long flags, int pc)
255{
256 if (is_graph())
257 trace_graph_function(tr, ip, parent_ip, flags, pc);
258 else
259 trace_function(tr, ip, parent_ip, flags, pc);
260}
261#else
262#define __trace_function trace_function
263
264static int wakeup_set_flag(u32 old_flags, u32 bit, int set)
265{
266 return -EINVAL;
267}
268
269static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
270{
271 return -1;
272}
273
274static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
275{
276 return TRACE_TYPE_UNHANDLED;
277}
278
279static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
280static void wakeup_print_header(struct seq_file *s) { }
281static void wakeup_trace_open(struct trace_iterator *iter) { }
282static void wakeup_trace_close(struct trace_iterator *iter) { }
283#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
284
86/* 285/*
87 * Should this new latency be reported/recorded? 286 * Should this new latency be reported/recorded?
88 */ 287 */
@@ -98,7 +297,8 @@ static int report_latency(cycle_t delta)
98 return 1; 297 return 1;
99} 298}
100 299
101static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) 300static void
301probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
102{ 302{
103 if (task != wakeup_task) 303 if (task != wakeup_task)
104 return; 304 return;
@@ -107,8 +307,8 @@ static void probe_wakeup_migrate_task(struct task_struct *task, int cpu)
107} 307}
108 308
109static void notrace 309static void notrace
110probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, 310probe_wakeup_sched_switch(void *ignore,
111 struct task_struct *next) 311 struct task_struct *prev, struct task_struct *next)
112{ 312{
113 struct trace_array_cpu *data; 313 struct trace_array_cpu *data;
114 cycle_t T0, T1, delta; 314 cycle_t T0, T1, delta;
@@ -152,7 +352,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
152 /* The task we are waiting for is waking up */ 352 /* The task we are waiting for is waking up */
153 data = wakeup_trace->data[wakeup_cpu]; 353 data = wakeup_trace->data[wakeup_cpu];
154 354
155 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 355 __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
156 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 356 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
157 357
158 T0 = data->preempt_timestamp; 358 T0 = data->preempt_timestamp;
@@ -200,7 +400,7 @@ static void wakeup_reset(struct trace_array *tr)
200} 400}
201 401
202static void 402static void
203probe_wakeup(struct rq *rq, struct task_struct *p, int success) 403probe_wakeup(void *ignore, struct task_struct *p, int success)
204{ 404{
205 struct trace_array_cpu *data; 405 struct trace_array_cpu *data;
206 int cpu = smp_processor_id(); 406 int cpu = smp_processor_id();
@@ -252,7 +452,7 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
252 * is not called by an assembly function (where as schedule is) 452 * is not called by an assembly function (where as schedule is)
253 * it should be safe to use it here. 453 * it should be safe to use it here.
254 */ 454 */
255 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 455 __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
256 456
257out_locked: 457out_locked:
258 arch_spin_unlock(&wakeup_lock); 458 arch_spin_unlock(&wakeup_lock);
@@ -264,28 +464,28 @@ static void start_wakeup_tracer(struct trace_array *tr)
264{ 464{
265 int ret; 465 int ret;
266 466
267 ret = register_trace_sched_wakeup(probe_wakeup); 467 ret = register_trace_sched_wakeup(probe_wakeup, NULL);
268 if (ret) { 468 if (ret) {
269 pr_info("wakeup trace: Couldn't activate tracepoint" 469 pr_info("wakeup trace: Couldn't activate tracepoint"
270 " probe to kernel_sched_wakeup\n"); 470 " probe to kernel_sched_wakeup\n");
271 return; 471 return;
272 } 472 }
273 473
274 ret = register_trace_sched_wakeup_new(probe_wakeup); 474 ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
275 if (ret) { 475 if (ret) {
276 pr_info("wakeup trace: Couldn't activate tracepoint" 476 pr_info("wakeup trace: Couldn't activate tracepoint"
277 " probe to kernel_sched_wakeup_new\n"); 477 " probe to kernel_sched_wakeup_new\n");
278 goto fail_deprobe; 478 goto fail_deprobe;
279 } 479 }
280 480
281 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 481 ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
282 if (ret) { 482 if (ret) {
283 pr_info("sched trace: Couldn't activate tracepoint" 483 pr_info("sched trace: Couldn't activate tracepoint"
284 " probe to kernel_sched_switch\n"); 484 " probe to kernel_sched_switch\n");
285 goto fail_deprobe_wake_new; 485 goto fail_deprobe_wake_new;
286 } 486 }
287 487
288 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); 488 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
289 if (ret) { 489 if (ret) {
290 pr_info("wakeup trace: Couldn't activate tracepoint" 490 pr_info("wakeup trace: Couldn't activate tracepoint"
291 " probe to kernel_sched_migrate_task\n"); 491 " probe to kernel_sched_migrate_task\n");
@@ -303,28 +503,24 @@ static void start_wakeup_tracer(struct trace_array *tr)
303 */ 503 */
304 smp_wmb(); 504 smp_wmb();
305 505
306 register_ftrace_function(&trace_ops); 506 if (start_func_tracer(is_graph()))
307 507 printk(KERN_ERR "failed to start wakeup tracer\n");
308 if (tracing_is_enabled())
309 tracer_enabled = 1;
310 else
311 tracer_enabled = 0;
312 508
313 return; 509 return;
314fail_deprobe_wake_new: 510fail_deprobe_wake_new:
315 unregister_trace_sched_wakeup_new(probe_wakeup); 511 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
316fail_deprobe: 512fail_deprobe:
317 unregister_trace_sched_wakeup(probe_wakeup); 513 unregister_trace_sched_wakeup(probe_wakeup, NULL);
318} 514}
319 515
320static void stop_wakeup_tracer(struct trace_array *tr) 516static void stop_wakeup_tracer(struct trace_array *tr)
321{ 517{
322 tracer_enabled = 0; 518 tracer_enabled = 0;
323 unregister_ftrace_function(&trace_ops); 519 stop_func_tracer(is_graph());
324 unregister_trace_sched_switch(probe_wakeup_sched_switch); 520 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
325 unregister_trace_sched_wakeup_new(probe_wakeup); 521 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
326 unregister_trace_sched_wakeup(probe_wakeup); 522 unregister_trace_sched_wakeup(probe_wakeup, NULL);
327 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); 523 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
328} 524}
329 525
330static int __wakeup_tracer_init(struct trace_array *tr) 526static int __wakeup_tracer_init(struct trace_array *tr)
@@ -379,9 +575,16 @@ static struct tracer wakeup_tracer __read_mostly =
379 .start = wakeup_tracer_start, 575 .start = wakeup_tracer_start,
380 .stop = wakeup_tracer_stop, 576 .stop = wakeup_tracer_stop,
381 .print_max = 1, 577 .print_max = 1,
578 .print_header = wakeup_print_header,
579 .print_line = wakeup_print_line,
580 .flags = &tracer_flags,
581 .set_flag = wakeup_set_flag,
382#ifdef CONFIG_FTRACE_SELFTEST 582#ifdef CONFIG_FTRACE_SELFTEST
383 .selftest = trace_selftest_startup_wakeup, 583 .selftest = trace_selftest_startup_wakeup,
384#endif 584#endif
585 .open = wakeup_trace_open,
586 .close = wakeup_trace_close,
587 .use_max_tr = 1,
385}; 588};
386 589
387static struct tracer wakeup_rt_tracer __read_mostly = 590static struct tracer wakeup_rt_tracer __read_mostly =
@@ -393,9 +596,16 @@ static struct tracer wakeup_rt_tracer __read_mostly =
393 .stop = wakeup_tracer_stop, 596 .stop = wakeup_tracer_stop,
394 .wait_pipe = poll_wait_pipe, 597 .wait_pipe = poll_wait_pipe,
395 .print_max = 1, 598 .print_max = 1,
599 .print_header = wakeup_print_header,
600 .print_line = wakeup_print_line,
601 .flags = &tracer_flags,
602 .set_flag = wakeup_set_flag,
396#ifdef CONFIG_FTRACE_SELFTEST 603#ifdef CONFIG_FTRACE_SELFTEST
397 .selftest = trace_selftest_startup_wakeup, 604 .selftest = trace_selftest_startup_wakeup,
398#endif 605#endif
606 .open = wakeup_trace_open,
607 .close = wakeup_trace_close,
608 .use_max_tr = 1,
399}; 609};
400 610
401__init static int init_wakeup_tracer(void) 611__init static int init_wakeup_tracer(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..659732eba07c 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
@@ -12,12 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)
12 case TRACE_WAKE: 13 case TRACE_WAKE:
13 case TRACE_STACK: 14 case TRACE_STACK:
14 case TRACE_PRINT: 15 case TRACE_PRINT:
15 case TRACE_SPECIAL:
16 case TRACE_BRANCH: 16 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT: 17 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 18 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM:
21 return 1; 19 return 1;
22 } 20 }
23 return 0; 21 return 0;
@@ -29,7 +27,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
29 struct trace_entry *entry; 27 struct trace_entry *entry;
30 unsigned int loops = 0; 28 unsigned int loops = 0;
31 29
32 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 30 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
33 entry = ring_buffer_event_data(event); 31 entry = ring_buffer_event_data(event);
34 32
35 /* 33 /*
@@ -255,7 +253,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
255/* Maximum number of functions to trace before diagnosing a hang */ 253/* Maximum number of functions to trace before diagnosing a hang */
256#define GRAPH_MAX_FUNC_TEST 100000000 254#define GRAPH_MAX_FUNC_TEST 100000000
257 255
258static void __ftrace_dump(bool disable_tracing); 256static void
257__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
259static unsigned int graph_hang_thresh; 258static unsigned int graph_hang_thresh;
260 259
261/* Wrap the real function entry probe to avoid possible hanging */ 260/* Wrap the real function entry probe to avoid possible hanging */
@@ -266,7 +265,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
266 ftrace_graph_stop(); 265 ftrace_graph_stop();
267 printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); 266 printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
268 if (ftrace_dump_on_oops) 267 if (ftrace_dump_on_oops)
269 __ftrace_dump(false); 268 __ftrace_dump(false, DUMP_ALL);
270 return 0; 269 return 0;
271 } 270 }
272 271
@@ -559,7 +558,7 @@ trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
559static int trace_wakeup_test_thread(void *data) 558static int trace_wakeup_test_thread(void *data)
560{ 559{
561 /* Make this a RT thread, doesn't need to be too high */ 560 /* Make this a RT thread, doesn't need to be too high */
562 struct sched_param param = { .sched_priority = 5 }; 561 static const struct sched_param param = { .sched_priority = 5 };
563 struct completion *x = data; 562 struct completion *x = data;
564 563
565 sched_setscheduler(current, SCHED_FIFO, &param); 564 sched_setscheduler(current, SCHED_FIFO, &param);
@@ -690,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
690} 689}
691#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 690#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
692 691
693#ifdef CONFIG_SYSPROF_TRACER
694int
695trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
696{
697 unsigned long count;
698 int ret;
699
700 /* start the tracing */
701 ret = tracer_init(trace, tr);
702 if (ret) {
703 warn_failed_init_tracer(trace, ret);
704 return ret;
705 }
706
707 /* Sleep for a 1/10 of a second */
708 msleep(100);
709 /* stop the tracing. */
710 tracing_stop();
711 /* check the trace buffer */
712 ret = trace_test_buffer(tr, &count);
713 trace->reset(tr);
714 tracing_start();
715
716 if (!ret && !count) {
717 printk(KERN_CONT ".. no entries found ..");
718 ret = -1;
719 }
720
721 return ret;
722}
723#endif /* CONFIG_SYSPROF_TRACER */
724
725#ifdef CONFIG_BRANCH_TRACER 692#ifdef CONFIG_BRANCH_TRACER
726int 693int
727trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) 694trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
@@ -754,112 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
754} 721}
755#endif /* CONFIG_BRANCH_TRACER */ 722#endif /* CONFIG_BRANCH_TRACER */
756 723
757#ifdef CONFIG_HW_BRANCH_TRACER
758int
759trace_selftest_startup_hw_branches(struct tracer *trace,
760 struct trace_array *tr)
761{
762 struct trace_iterator *iter;
763 struct tracer tracer;
764 unsigned long count;
765 int ret;
766
767 if (!trace->open) {
768 printk(KERN_CONT "missing open function...");
769 return -1;
770 }
771
772 ret = tracer_init(trace, tr);
773 if (ret) {
774 warn_failed_init_tracer(trace, ret);
775 return ret;
776 }
777
778 /*
779 * The hw-branch tracer needs to collect the trace from the various
780 * cpu trace buffers - before tracing is stopped.
781 */
782 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
783 if (!iter)
784 return -ENOMEM;
785
786 memcpy(&tracer, trace, sizeof(tracer));
787
788 iter->trace = &tracer;
789 iter->tr = tr;
790 iter->pos = -1;
791 mutex_init(&iter->mutex);
792
793 trace->open(iter);
794
795 mutex_destroy(&iter->mutex);
796 kfree(iter);
797
798 tracing_stop();
799
800 ret = trace_test_buffer(tr, &count);
801 trace->reset(tr);
802 tracing_start();
803
804 if (!ret && !count) {
805 printk(KERN_CONT "no entries found..");
806 ret = -1;
807 }
808
809 return ret;
810}
811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy;
815
816int
817trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
818{
819 unsigned long count;
820 int ret;
821
822 /* start the tracing */
823 ret = tracer_init(trace, tr);
824 if (ret) {
825 warn_failed_init_tracer(trace, ret);
826 return ret;
827 }
828
829 ksym_selftest_dummy = 0;
830 /* Register the read-write tracing request */
831
832 ret = process_new_ksym_entry("ksym_selftest_dummy",
833 HW_BREAKPOINT_R | HW_BREAKPOINT_W,
834 (unsigned long)(&ksym_selftest_dummy));
835
836 if (ret < 0) {
837 printk(KERN_CONT "ksym_trace read-write startup test failed\n");
838 goto ret_path;
839 }
840 /* Perform a read and a write operation over the dummy variable to
841 * trigger the tracer
842 */
843 if (ksym_selftest_dummy == 0)
844 ksym_selftest_dummy++;
845
846 /* stop the tracing. */
847 tracing_stop();
848 /* check the trace buffer */
849 ret = trace_test_buffer(tr, &count);
850 trace->reset(tr);
851 tracing_start();
852
853 /* read & write operations - one each is performed on the dummy variable
854 * triggering two entries in the trace buffer
855 */
856 if (!ret && count != 2) {
857 printk(KERN_CONT "Ksym tracer startup test failed");
858 ret = -1;
859 }
860
861ret_path:
862 return ret;
863}
864#endif /* CONFIG_KSYM_TRACER */
865
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..4c5dead0c239 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -110,12 +110,12 @@ static inline void check_stack(void)
110static void 110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip) 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{ 112{
113 int cpu, resched; 113 int cpu;
114 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return; 116 return;
117 117
118 resched = ftrace_preempt_disable(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127 out: 127 out:
128 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched); 130 preempt_enable_notrace();
131} 131}
132 132
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -183,6 +195,7 @@ static const struct file_operations stack_max_size_fops = {
183 .open = tracing_open_generic, 195 .open = tracing_open_generic,
184 .read = stack_max_size_read, 196 .read = stack_max_size_read,
185 .write = stack_max_size_write, 197 .write = stack_max_size_write,
198 .llseek = default_llseek,
186}; 199};
187 200
188static void * 201static void *
@@ -206,7 +219,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 219
207static void *t_start(struct seq_file *m, loff_t *pos) 220static void *t_start(struct seq_file *m, loff_t *pos)
208{ 221{
222 int cpu;
223
209 local_irq_disable(); 224 local_irq_disable();
225
226 cpu = smp_processor_id();
227 per_cpu(trace_active, cpu)++;
228
210 arch_spin_lock(&max_stack_lock); 229 arch_spin_lock(&max_stack_lock);
211 230
212 if (*pos == 0) 231 if (*pos == 0)
@@ -217,7 +236,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 236
218static void t_stop(struct seq_file *m, void *p) 237static void t_stop(struct seq_file *m, void *p)
219{ 238{
239 int cpu;
240
220 arch_spin_unlock(&max_stack_lock); 241 arch_spin_unlock(&max_stack_lock);
242
243 cpu = smp_processor_id();
244 per_cpu(trace_active, cpu)--;
245
221 local_irq_enable(); 246 local_irq_enable();
222} 247}
223 248
@@ -225,7 +250,7 @@ static int trace_lookup_stack(struct seq_file *m, long i)
225{ 250{
226 unsigned long addr = stack_dump_trace[i]; 251 unsigned long addr = stack_dump_trace[i];
227 252
228 return seq_printf(m, "%pF\n", (void *)addr); 253 return seq_printf(m, "%pS\n", (void *)addr);
229} 254}
230 255
231static void print_disabled(struct seq_file *m) 256static void print_disabled(struct seq_file *m)
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..b706529b4fc7 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -14,6 +15,46 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 17
18static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call)
28{
29 struct syscall_metadata *entry = call->data;
30
31 return &entry->enter_fields;
32}
33
34struct trace_event_functions enter_syscall_print_funcs = {
35 .trace = print_syscall_enter,
36};
37
38struct trace_event_functions exit_syscall_print_funcs = {
39 .trace = print_syscall_exit,
40};
41
42struct ftrace_event_class event_class_syscall_enter = {
43 .system = "syscalls",
44 .reg = syscall_enter_register,
45 .define_fields = syscall_enter_define_fields,
46 .get_fields = syscall_get_enter_fields,
47 .raw_init = init_syscall_trace,
48};
49
50struct ftrace_event_class event_class_syscall_exit = {
51 .system = "syscalls",
52 .reg = syscall_exit_register,
53 .define_fields = syscall_exit_define_fields,
54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
55 .raw_init = init_syscall_trace,
56};
57
17extern unsigned long __start_syscalls_metadata[]; 58extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[]; 59extern unsigned long __stop_syscalls_metadata[];
19 60
@@ -52,7 +93,8 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
52} 93}
53 94
54enum print_line_t 95enum print_line_t
55print_syscall_enter(struct trace_iterator *iter, int flags) 96print_syscall_enter(struct trace_iterator *iter, int flags,
97 struct trace_event *event)
56{ 98{
57 struct trace_seq *s = &iter->seq; 99 struct trace_seq *s = &iter->seq;
58 struct trace_entry *ent = iter->ent; 100 struct trace_entry *ent = iter->ent;
@@ -67,7 +109,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
67 if (!entry) 109 if (!entry)
68 goto end; 110 goto end;
69 111
70 if (entry->enter_event->id != ent->type) { 112 if (entry->enter_event->event.type != ent->type) {
71 WARN_ON_ONCE(1); 113 WARN_ON_ONCE(1);
72 goto end; 114 goto end;
73 } 115 }
@@ -104,7 +146,8 @@ end:
104} 146}
105 147
106enum print_line_t 148enum print_line_t
107print_syscall_exit(struct trace_iterator *iter, int flags) 149print_syscall_exit(struct trace_iterator *iter, int flags,
150 struct trace_event *event)
108{ 151{
109 struct trace_seq *s = &iter->seq; 152 struct trace_seq *s = &iter->seq;
110 struct trace_entry *ent = iter->ent; 153 struct trace_entry *ent = iter->ent;
@@ -122,7 +165,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
122 return TRACE_TYPE_HANDLED; 165 return TRACE_TYPE_HANDLED;
123 } 166 }
124 167
125 if (entry->exit_event->id != ent->type) { 168 if (entry->exit_event->event.type != ent->type) {
126 WARN_ON_ONCE(1); 169 WARN_ON_ONCE(1);
127 return TRACE_TYPE_UNHANDLED; 170 return TRACE_TYPE_UNHANDLED;
128 } 171 }
@@ -143,73 +186,68 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 186 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 187 sizeof(trace.name), is_signed_type(type)
145 188
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 189static
190int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 191{
148 int i; 192 int i;
149 int ret; 193 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 194
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 195 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 196#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 197
198 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 199 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 200 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 201 entry->args[i], sizeof(unsigned long),
163 if (!ret) 202 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 203 }
204 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 205
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 206 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 207 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 208 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 209 }
182 trace_seq_putc(s, '"');
183 210
184 for (i = 0; i < entry->nb_args; i++) { 211#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 212
191 return trace_seq_putc(s, '\n'); 213 /* return the length of print_fmt */
214 return pos;
192} 215}
193 216
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 217static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 218{
196 int ret; 219 char *print_fmt;
197 struct syscall_trace_exit trace; 220 int len;
221 struct syscall_metadata *entry = call->data;
198 222
199 ret = trace_seq_printf(s, 223 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 224 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 225 return 0;
226 }
227
228 /* First: called with 0 length to calculate the needed length */
229 len = __set_enter_print_fmt(entry, NULL, 0);
230
231 print_fmt = kmalloc(len + 1, GFP_KERNEL);
232 if (!print_fmt)
233 return -ENOMEM;
234
235 /* Second: actually write the @print_fmt */
236 __set_enter_print_fmt(entry, print_fmt, len + 1);
237 call->print_fmt = print_fmt;
238
239 return 0;
240}
241
242static void free_syscall_print_fmt(struct ftrace_event_call *call)
243{
244 struct syscall_metadata *entry = call->data;
208 245
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 246 if (entry->enter_event == call)
247 kfree(call->print_fmt);
210} 248}
211 249
212int syscall_enter_define_fields(struct ftrace_event_call *call) 250static int syscall_enter_define_fields(struct ftrace_event_call *call)
213{ 251{
214 struct syscall_trace_enter trace; 252 struct syscall_trace_enter trace;
215 struct syscall_metadata *meta = call->data; 253 struct syscall_metadata *meta = call->data;
@@ -232,7 +270,7 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
232 return ret; 270 return ret;
233} 271}
234 272
235int syscall_exit_define_fields(struct ftrace_event_call *call) 273static int syscall_exit_define_fields(struct ftrace_event_call *call)
236{ 274{
237 struct syscall_trace_exit trace; 275 struct syscall_trace_exit trace;
238 int ret; 276 int ret;
@@ -247,7 +285,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
247 return ret; 285 return ret;
248} 286}
249 287
250void ftrace_syscall_enter(struct pt_regs *regs, long id) 288void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
251{ 289{
252 struct syscall_trace_enter *entry; 290 struct syscall_trace_enter *entry;
253 struct syscall_metadata *sys_data; 291 struct syscall_metadata *sys_data;
@@ -269,7 +307,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 307 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
270 308
271 event = trace_current_buffer_lock_reserve(&buffer, 309 event = trace_current_buffer_lock_reserve(&buffer,
272 sys_data->enter_event->id, size, 0, 0); 310 sys_data->enter_event->event.type, size, 0, 0);
273 if (!event) 311 if (!event)
274 return; 312 return;
275 313
@@ -282,7 +320,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
282 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 320 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
283} 321}
284 322
285void ftrace_syscall_exit(struct pt_regs *regs, long ret) 323void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
286{ 324{
287 struct syscall_trace_exit *entry; 325 struct syscall_trace_exit *entry;
288 struct syscall_metadata *sys_data; 326 struct syscall_metadata *sys_data;
@@ -301,7 +339,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
301 return; 339 return;
302 340
303 event = trace_current_buffer_lock_reserve(&buffer, 341 event = trace_current_buffer_lock_reserve(&buffer,
304 sys_data->exit_event->id, sizeof(*entry), 0, 0); 342 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
305 if (!event) 343 if (!event)
306 return; 344 return;
307 345
@@ -324,7 +362,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
324 return -ENOSYS; 362 return -ENOSYS;
325 mutex_lock(&syscall_trace_lock); 363 mutex_lock(&syscall_trace_lock);
326 if (!sys_refcount_enter) 364 if (!sys_refcount_enter)
327 ret = register_trace_sys_enter(ftrace_syscall_enter); 365 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
328 if (!ret) { 366 if (!ret) {
329 set_bit(num, enabled_enter_syscalls); 367 set_bit(num, enabled_enter_syscalls);
330 sys_refcount_enter++; 368 sys_refcount_enter++;
@@ -344,7 +382,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
344 sys_refcount_enter--; 382 sys_refcount_enter--;
345 clear_bit(num, enabled_enter_syscalls); 383 clear_bit(num, enabled_enter_syscalls);
346 if (!sys_refcount_enter) 384 if (!sys_refcount_enter)
347 unregister_trace_sys_enter(ftrace_syscall_enter); 385 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
348 mutex_unlock(&syscall_trace_lock); 386 mutex_unlock(&syscall_trace_lock);
349} 387}
350 388
@@ -358,7 +396,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
358 return -ENOSYS; 396 return -ENOSYS;
359 mutex_lock(&syscall_trace_lock); 397 mutex_lock(&syscall_trace_lock);
360 if (!sys_refcount_exit) 398 if (!sys_refcount_exit)
361 ret = register_trace_sys_exit(ftrace_syscall_exit); 399 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
362 if (!ret) { 400 if (!ret) {
363 set_bit(num, enabled_exit_syscalls); 401 set_bit(num, enabled_exit_syscalls);
364 sys_refcount_exit++; 402 sys_refcount_exit++;
@@ -378,7 +416,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
378 sys_refcount_exit--; 416 sys_refcount_exit--;
379 clear_bit(num, enabled_exit_syscalls); 417 clear_bit(num, enabled_exit_syscalls);
380 if (!sys_refcount_exit) 418 if (!sys_refcount_exit)
381 unregister_trace_sys_exit(ftrace_syscall_exit); 419 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
382 mutex_unlock(&syscall_trace_lock); 420 mutex_unlock(&syscall_trace_lock);
383} 421}
384 422
@@ -386,12 +424,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
386{ 424{
387 int id; 425 int id;
388 426
389 id = register_ftrace_event(call->event); 427 if (set_syscall_print_fmt(call) < 0)
390 if (!id) 428 return -ENOMEM;
391 return -ENODEV; 429
392 call->id = id; 430 id = trace_event_raw_init(call);
393 INIT_LIST_HEAD(&call->fields); 431
394 return 0; 432 if (id < 0) {
433 free_syscall_print_fmt(call);
434 return id;
435 }
436
437 return id;
438}
439
440unsigned long __init arch_syscall_addr(int nr)
441{
442 return (unsigned long)sys_call_table[nr];
395} 443}
396 444
397int __init init_ftrace_syscalls(void) 445int __init init_ftrace_syscalls(void)
@@ -421,27 +469,24 @@ int __init init_ftrace_syscalls(void)
421} 469}
422core_initcall(init_ftrace_syscalls); 470core_initcall(init_ftrace_syscalls);
423 471
424#ifdef CONFIG_EVENT_PROFILE 472#ifdef CONFIG_PERF_EVENTS
425 473
426static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 474static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
427static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 475static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
428static int sys_prof_refcount_enter; 476static int sys_perf_refcount_enter;
429static int sys_prof_refcount_exit; 477static int sys_perf_refcount_exit;
430 478
431static void prof_syscall_enter(struct pt_regs *regs, long id) 479static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
432{ 480{
433 struct syscall_metadata *sys_data; 481 struct syscall_metadata *sys_data;
434 struct syscall_trace_enter *rec; 482 struct syscall_trace_enter *rec;
435 unsigned long flags; 483 struct hlist_head *head;
436 char *trace_buf;
437 char *raw_data;
438 int syscall_nr; 484 int syscall_nr;
439 int rctx; 485 int rctx;
440 int size; 486 int size;
441 int cpu;
442 487
443 syscall_nr = syscall_get_nr(current, regs); 488 syscall_nr = syscall_get_nr(current, regs);
444 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 489 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
445 return; 490 return;
446 491
447 sys_data = syscall_nr_to_meta(syscall_nr); 492 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -453,44 +498,24 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
453 size = ALIGN(size + sizeof(u32), sizeof(u64)); 498 size = ALIGN(size + sizeof(u32), sizeof(u64));
454 size -= sizeof(u32); 499 size -= sizeof(u32);
455 500
456 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 501 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
457 "profile buffer not large enough")) 502 "perf buffer not large enough"))
458 return; 503 return;
459 504
460 /* Protect the per cpu buffer, begin the rcu read side */ 505 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
461 local_irq_save(flags); 506 sys_data->enter_event->event.type, regs, &rctx);
462 507 if (!rec)
463 rctx = perf_swevent_get_recursion_context(); 508 return;
464 if (rctx < 0)
465 goto end_recursion;
466
467 cpu = smp_processor_id();
468
469 trace_buf = rcu_dereference(perf_trace_buf);
470
471 if (!trace_buf)
472 goto end;
473
474 raw_data = per_cpu_ptr(trace_buf, cpu);
475
476 /* zero the dead bytes from align to not leak stack to user */
477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
478 509
479 rec = (struct syscall_trace_enter *) raw_data;
480 tracing_generic_entry_update(&rec->ent, 0, 0);
481 rec->ent.type = sys_data->enter_event->id;
482 rec->nr = syscall_nr; 510 rec->nr = syscall_nr;
483 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 511 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
484 (unsigned long *)&rec->args); 512 (unsigned long *)&rec->args);
485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
486 513
487end: 514 head = this_cpu_ptr(sys_data->enter_event->perf_events);
488 perf_swevent_put_recursion_context(rctx); 515 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
489end_recursion:
490 local_irq_restore(flags);
491} 516}
492 517
493int prof_sysenter_enable(struct ftrace_event_call *call) 518int perf_sysenter_enable(struct ftrace_event_call *call)
494{ 519{
495 int ret = 0; 520 int ret = 0;
496 int num; 521 int num;
@@ -498,47 +523,44 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 523 num = ((struct syscall_metadata *)call->data)->syscall_nr;
499 524
500 mutex_lock(&syscall_trace_lock); 525 mutex_lock(&syscall_trace_lock);
501 if (!sys_prof_refcount_enter) 526 if (!sys_perf_refcount_enter)
502 ret = register_trace_sys_enter(prof_syscall_enter); 527 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
503 if (ret) { 528 if (ret) {
504 pr_info("event trace: Could not activate" 529 pr_info("event trace: Could not activate"
505 "syscall entry trace point"); 530 "syscall entry trace point");
506 } else { 531 } else {
507 set_bit(num, enabled_prof_enter_syscalls); 532 set_bit(num, enabled_perf_enter_syscalls);
508 sys_prof_refcount_enter++; 533 sys_perf_refcount_enter++;
509 } 534 }
510 mutex_unlock(&syscall_trace_lock); 535 mutex_unlock(&syscall_trace_lock);
511 return ret; 536 return ret;
512} 537}
513 538
514void prof_sysenter_disable(struct ftrace_event_call *call) 539void perf_sysenter_disable(struct ftrace_event_call *call)
515{ 540{
516 int num; 541 int num;
517 542
518 num = ((struct syscall_metadata *)call->data)->syscall_nr; 543 num = ((struct syscall_metadata *)call->data)->syscall_nr;
519 544
520 mutex_lock(&syscall_trace_lock); 545 mutex_lock(&syscall_trace_lock);
521 sys_prof_refcount_enter--; 546 sys_perf_refcount_enter--;
522 clear_bit(num, enabled_prof_enter_syscalls); 547 clear_bit(num, enabled_perf_enter_syscalls);
523 if (!sys_prof_refcount_enter) 548 if (!sys_perf_refcount_enter)
524 unregister_trace_sys_enter(prof_syscall_enter); 549 unregister_trace_sys_enter(perf_syscall_enter, NULL);
525 mutex_unlock(&syscall_trace_lock); 550 mutex_unlock(&syscall_trace_lock);
526} 551}
527 552
528static void prof_syscall_exit(struct pt_regs *regs, long ret) 553static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
529{ 554{
530 struct syscall_metadata *sys_data; 555 struct syscall_metadata *sys_data;
531 struct syscall_trace_exit *rec; 556 struct syscall_trace_exit *rec;
532 unsigned long flags; 557 struct hlist_head *head;
533 int syscall_nr; 558 int syscall_nr;
534 char *trace_buf;
535 char *raw_data;
536 int rctx; 559 int rctx;
537 int size; 560 int size;
538 int cpu;
539 561
540 syscall_nr = syscall_get_nr(current, regs); 562 syscall_nr = syscall_get_nr(current, regs);
541 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 563 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
542 return; 564 return;
543 565
544 sys_data = syscall_nr_to_meta(syscall_nr); 566 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -553,45 +575,23 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
553 * Impossible, but be paranoid with the future 575 * Impossible, but be paranoid with the future
554 * How to put this check outside runtime? 576 * How to put this check outside runtime?
555 */ 577 */
556 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 578 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
557 "exit event has grown above profile buffer size")) 579 "exit event has grown above perf buffer size"))
558 return; 580 return;
559 581
560 /* Protect the per cpu buffer, begin the rcu read side */ 582 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
561 local_irq_save(flags); 583 sys_data->exit_event->event.type, regs, &rctx);
562 584 if (!rec)
563 rctx = perf_swevent_get_recursion_context(); 585 return;
564 if (rctx < 0)
565 goto end_recursion;
566
567 cpu = smp_processor_id();
568
569 trace_buf = rcu_dereference(perf_trace_buf);
570
571 if (!trace_buf)
572 goto end;
573
574 raw_data = per_cpu_ptr(trace_buf, cpu);
575
576 /* zero the dead bytes from align to not leak stack to user */
577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
578
579 rec = (struct syscall_trace_exit *)raw_data;
580 586
581 tracing_generic_entry_update(&rec->ent, 0, 0);
582 rec->ent.type = sys_data->exit_event->id;
583 rec->nr = syscall_nr; 587 rec->nr = syscall_nr;
584 rec->ret = syscall_get_return_value(current, regs); 588 rec->ret = syscall_get_return_value(current, regs);
585 589
586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 590 head = this_cpu_ptr(sys_data->exit_event->perf_events);
587 591 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
591 local_irq_restore(flags);
592} 592}
593 593
594int prof_sysexit_enable(struct ftrace_event_call *call) 594int perf_sysexit_enable(struct ftrace_event_call *call)
595{ 595{
596 int ret = 0; 596 int ret = 0;
597 int num; 597 int num;
@@ -599,33 +599,73 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
599 num = ((struct syscall_metadata *)call->data)->syscall_nr; 599 num = ((struct syscall_metadata *)call->data)->syscall_nr;
600 600
601 mutex_lock(&syscall_trace_lock); 601 mutex_lock(&syscall_trace_lock);
602 if (!sys_prof_refcount_exit) 602 if (!sys_perf_refcount_exit)
603 ret = register_trace_sys_exit(prof_syscall_exit); 603 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
604 if (ret) { 604 if (ret) {
605 pr_info("event trace: Could not activate" 605 pr_info("event trace: Could not activate"
606 "syscall entry trace point"); 606 "syscall exit trace point");
607 } else { 607 } else {
608 set_bit(num, enabled_prof_exit_syscalls); 608 set_bit(num, enabled_perf_exit_syscalls);
609 sys_prof_refcount_exit++; 609 sys_perf_refcount_exit++;
610 } 610 }
611 mutex_unlock(&syscall_trace_lock); 611 mutex_unlock(&syscall_trace_lock);
612 return ret; 612 return ret;
613} 613}
614 614
615void prof_sysexit_disable(struct ftrace_event_call *call) 615void perf_sysexit_disable(struct ftrace_event_call *call)
616{ 616{
617 int num; 617 int num;
618 618
619 num = ((struct syscall_metadata *)call->data)->syscall_nr; 619 num = ((struct syscall_metadata *)call->data)->syscall_nr;
620 620
621 mutex_lock(&syscall_trace_lock); 621 mutex_lock(&syscall_trace_lock);
622 sys_prof_refcount_exit--; 622 sys_perf_refcount_exit--;
623 clear_bit(num, enabled_prof_exit_syscalls); 623 clear_bit(num, enabled_perf_exit_syscalls);
624 if (!sys_prof_refcount_exit) 624 if (!sys_perf_refcount_exit)
625 unregister_trace_sys_exit(prof_syscall_exit); 625 unregister_trace_sys_exit(perf_syscall_exit, NULL);
626 mutex_unlock(&syscall_trace_lock); 626 mutex_unlock(&syscall_trace_lock);
627} 627}
628 628
629#endif /* CONFIG_PERF_EVENTS */
630
631static int syscall_enter_register(struct ftrace_event_call *event,
632 enum trace_reg type)
633{
634 switch (type) {
635 case TRACE_REG_REGISTER:
636 return reg_event_syscall_enter(event);
637 case TRACE_REG_UNREGISTER:
638 unreg_event_syscall_enter(event);
639 return 0;
640
641#ifdef CONFIG_PERF_EVENTS
642 case TRACE_REG_PERF_REGISTER:
643 return perf_sysenter_enable(event);
644 case TRACE_REG_PERF_UNREGISTER:
645 perf_sysenter_disable(event);
646 return 0;
629#endif 647#endif
648 }
649 return 0;
650}
630 651
652static int syscall_exit_register(struct ftrace_event_call *event,
653 enum trace_reg type)
654{
655 switch (type) {
656 case TRACE_REG_REGISTER:
657 return reg_event_syscall_exit(event);
658 case TRACE_REG_UNREGISTER:
659 unreg_event_syscall_exit(event);
660 return 0;
631 661
662#ifdef CONFIG_PERF_EVENTS
663 case TRACE_REG_PERF_REGISTER:
664 return perf_sysexit_enable(event);
665 case TRACE_REG_PERF_UNREGISTER:
666 perf_sysexit_disable(event);
667 return 0;
668#endif
669 }
670 return 0;
671}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
deleted file mode 100644
index a7974a552ca9..000000000000
--- a/kernel/trace/trace_sysprof.c
+++ /dev/null
@@ -1,329 +0,0 @@
1/*
2 * trace stack traces
3 *
4 * Copyright (C) 2004-2008, Soeren Sandmann
5 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/kallsyms.h>
9#include <linux/debugfs.h>
10#include <linux/hrtimer.h>
11#include <linux/uaccess.h>
12#include <linux/ftrace.h>
13#include <linux/module.h>
14#include <linux/irq.h>
15#include <linux/fs.h>
16
17#include <asm/stacktrace.h>
18
19#include "trace.h"
20
21static struct trace_array *sysprof_trace;
22static int __read_mostly tracer_enabled;
23
24/*
25 * 1 msec sample interval by default:
26 */
27static unsigned long sample_period = 1000000;
28static const unsigned int sample_max_depth = 512;
29
30static DEFINE_MUTEX(sample_timer_lock);
31/*
32 * Per CPU hrtimers that do the profiling:
33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35
36struct stack_frame {
37 const void __user *next_fp;
38 unsigned long return_address;
39};
40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
42{
43 int ret;
44
45 if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
46 return 0;
47
48 ret = 1;
49 pagefault_disable();
50 if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
51 ret = 0;
52 pagefault_enable();
53
54 return ret;
55}
56
57struct backtrace_info {
58 struct trace_array_cpu *data;
59 struct trace_array *tr;
60 int pos;
61};
62
63static void
64backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
65{
66 /* Ignore warnings */
67}
68
69static void backtrace_warning(void *data, char *msg)
70{
71 /* Ignore warnings */
72}
73
74static int backtrace_stack(void *data, char *name)
75{
76 /* Don't bother with IRQ stacks for now */
77 return -1;
78}
79
80static void backtrace_address(void *data, unsigned long addr, int reliable)
81{
82 struct backtrace_info *info = data;
83
84 if (info->pos < sample_max_depth && reliable) {
85 __trace_special(info->tr, info->data, 1, addr, 0);
86
87 info->pos++;
88 }
89}
90
91static const struct stacktrace_ops backtrace_ops = {
92 .warning = backtrace_warning,
93 .warning_symbol = backtrace_warning_symbol,
94 .stack = backtrace_stack,
95 .address = backtrace_address,
96 .walk_stack = print_context_stack,
97};
98
99static int
100trace_kernel(struct pt_regs *regs, struct trace_array *tr,
101 struct trace_array_cpu *data)
102{
103 struct backtrace_info info;
104 unsigned long bp;
105 char *stack;
106
107 info.tr = tr;
108 info.data = data;
109 info.pos = 1;
110
111 __trace_special(info.tr, info.data, 1, regs->ip, 0);
112
113 stack = ((char *)regs + sizeof(struct pt_regs));
114#ifdef CONFIG_FRAME_POINTER
115 bp = regs->bp;
116#else
117 bp = 0;
118#endif
119
120 dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info);
121
122 return info.pos;
123}
124
125static void timer_notify(struct pt_regs *regs, int cpu)
126{
127 struct trace_array_cpu *data;
128 struct stack_frame frame;
129 struct trace_array *tr;
130 const void __user *fp;
131 int is_user;
132 int i;
133
134 if (!regs)
135 return;
136
137 tr = sysprof_trace;
138 data = tr->data[cpu];
139 is_user = user_mode(regs);
140
141 if (!current || current->pid == 0)
142 return;
143
144 if (is_user && current->state != TASK_RUNNING)
145 return;
146
147 __trace_special(tr, data, 0, 0, current->pid);
148
149 if (!is_user)
150 i = trace_kernel(regs, tr, data);
151 else
152 i = 0;
153
154 /*
155 * Trace user stack if we are not a kernel thread
156 */
157 if (current->mm && i < sample_max_depth) {
158 regs = (struct pt_regs *)current->thread.sp0 - 1;
159
160 fp = (void __user *)regs->bp;
161
162 __trace_special(tr, data, 2, regs->ip, 0);
163
164 while (i < sample_max_depth) {
165 frame.next_fp = NULL;
166 frame.return_address = 0;
167 if (!copy_stack_frame(fp, &frame))
168 break;
169 if ((unsigned long)fp < regs->sp)
170 break;
171
172 __trace_special(tr, data, 2, frame.return_address,
173 (unsigned long)fp);
174 fp = frame.next_fp;
175
176 i++;
177 }
178
179 }
180
181 /*
182 * Special trace entry if we overflow the max depth:
183 */
184 if (i == sample_max_depth)
185 __trace_special(tr, data, -1, -1, -1);
186
187 __trace_special(tr, data, 3, current->pid, i);
188}
189
190static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
191{
192 /* trace here */
193 timer_notify(get_irq_regs(), smp_processor_id());
194
195 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
196
197 return HRTIMER_RESTART;
198}
199
200static void start_stack_timer(void *unused)
201{
202 struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
203
204 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
205 hrtimer->function = stack_trace_timer_fn;
206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
208 HRTIMER_MODE_REL_PINNED);
209}
210
211static void start_stack_timers(void)
212{
213 on_each_cpu(start_stack_timer, NULL, 1);
214}
215
216static void stop_stack_timer(int cpu)
217{
218 struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
219
220 hrtimer_cancel(hrtimer);
221}
222
223static void stop_stack_timers(void)
224{
225 int cpu;
226
227 for_each_online_cpu(cpu)
228 stop_stack_timer(cpu);
229}
230
231static void stop_stack_trace(struct trace_array *tr)
232{
233 mutex_lock(&sample_timer_lock);
234 stop_stack_timers();
235 tracer_enabled = 0;
236 mutex_unlock(&sample_timer_lock);
237}
238
239static int stack_trace_init(struct trace_array *tr)
240{
241 sysprof_trace = tr;
242
243 tracing_start_cmdline_record();
244
245 mutex_lock(&sample_timer_lock);
246 start_stack_timers();
247 tracer_enabled = 1;
248 mutex_unlock(&sample_timer_lock);
249 return 0;
250}
251
252static void stack_trace_reset(struct trace_array *tr)
253{
254 tracing_stop_cmdline_record();
255 stop_stack_trace(tr);
256}
257
258static struct tracer stack_trace __read_mostly =
259{
260 .name = "sysprof",
261 .init = stack_trace_init,
262 .reset = stack_trace_reset,
263#ifdef CONFIG_FTRACE_SELFTEST
264 .selftest = trace_selftest_startup_sysprof,
265#endif
266};
267
268__init static int init_stack_trace(void)
269{
270 return register_tracer(&stack_trace);
271}
272device_initcall(init_stack_trace);
273
274#define MAX_LONG_DIGITS 22
275
276static ssize_t
277sysprof_sample_read(struct file *filp, char __user *ubuf,
278 size_t cnt, loff_t *ppos)
279{
280 char buf[MAX_LONG_DIGITS];
281 int r;
282
283 r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
284
285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
286}
287
288static ssize_t
289sysprof_sample_write(struct file *filp, const char __user *ubuf,
290 size_t cnt, loff_t *ppos)
291{
292 char buf[MAX_LONG_DIGITS];
293 unsigned long val;
294
295 if (cnt > MAX_LONG_DIGITS-1)
296 cnt = MAX_LONG_DIGITS-1;
297
298 if (copy_from_user(&buf, ubuf, cnt))
299 return -EFAULT;
300
301 buf[cnt] = 0;
302
303 val = simple_strtoul(buf, NULL, 10);
304 /*
305 * Enforce a minimum sample period of 100 usecs:
306 */
307 if (val < 100)
308 val = 100;
309
310 mutex_lock(&sample_timer_lock);
311 stop_stack_timers();
312 sample_period = val * 1000;
313 start_stack_timers();
314 mutex_unlock(&sample_timer_lock);
315
316 return cnt;
317}
318
319static const struct file_operations sysprof_sample_fops = {
320 .read = sysprof_sample_read,
321 .write = sysprof_sample_write,
322};
323
324void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
325{
326
327 trace_create_file("sysprof_sample_period", 0644,
328 d_tracer, NULL, &sysprof_sample_fops);
329}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..209b379a4721 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"
@@ -48,7 +49,8 @@ static void cpu_workqueue_stat_free(struct kref *kref)
48 49
49/* Insertion of a work */ 50/* Insertion of a work */
50static void 51static void
51probe_workqueue_insertion(struct task_struct *wq_thread, 52probe_workqueue_insertion(void *ignore,
53 struct task_struct *wq_thread,
52 struct work_struct *work) 54 struct work_struct *work)
53{ 55{
54 int cpu = cpumask_first(&wq_thread->cpus_allowed); 56 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -69,7 +71,8 @@ found:
69 71
70/* Execution of a work */ 72/* Execution of a work */
71static void 73static void
72probe_workqueue_execution(struct task_struct *wq_thread, 74probe_workqueue_execution(void *ignore,
75 struct task_struct *wq_thread,
73 struct work_struct *work) 76 struct work_struct *work)
74{ 77{
75 int cpu = cpumask_first(&wq_thread->cpus_allowed); 78 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -89,7 +92,8 @@ found:
89} 92}
90 93
91/* Creation of a cpu workqueue thread */ 94/* Creation of a cpu workqueue thread */
92static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) 95static void probe_workqueue_creation(void *ignore,
96 struct task_struct *wq_thread, int cpu)
93{ 97{
94 struct cpu_workqueue_stats *cws; 98 struct cpu_workqueue_stats *cws;
95 unsigned long flags; 99 unsigned long flags;
@@ -113,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
113} 117}
114 118
115/* Destruction of a cpu workqueue thread */ 119/* Destruction of a cpu workqueue thread */
116static void probe_workqueue_destruction(struct task_struct *wq_thread) 120static void
121probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread)
117{ 122{
118 /* Workqueue only execute on one cpu */ 123 /* Workqueue only execute on one cpu */
119 int cpu = cpumask_first(&wq_thread->cpus_allowed); 124 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -258,35 +263,35 @@ int __init trace_workqueue_early_init(void)
258{ 263{
259 int ret, cpu; 264 int ret, cpu;
260 265
261 ret = register_trace_workqueue_insertion(probe_workqueue_insertion); 266 for_each_possible_cpu(cpu) {
267 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
268 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
269 }
270
271 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
262 if (ret) 272 if (ret)
263 goto out; 273 goto out;
264 274
265 ret = register_trace_workqueue_execution(probe_workqueue_execution); 275 ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL);
266 if (ret) 276 if (ret)
267 goto no_insertion; 277 goto no_insertion;
268 278
269 ret = register_trace_workqueue_creation(probe_workqueue_creation); 279 ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL);
270 if (ret) 280 if (ret)
271 goto no_execution; 281 goto no_execution;
272 282
273 ret = register_trace_workqueue_destruction(probe_workqueue_destruction); 283 ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL);
274 if (ret) 284 if (ret)
275 goto no_creation; 285 goto no_creation;
276 286
277 for_each_possible_cpu(cpu) {
278 spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
279 INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
280 }
281
282 return 0; 287 return 0;
283 288
284no_creation: 289no_creation:
285 unregister_trace_workqueue_creation(probe_workqueue_creation); 290 unregister_trace_workqueue_creation(probe_workqueue_creation, NULL);
286no_execution: 291no_execution:
287 unregister_trace_workqueue_execution(probe_workqueue_execution); 292 unregister_trace_workqueue_execution(probe_workqueue_execution, NULL);
288no_insertion: 293no_insertion:
289 unregister_trace_workqueue_insertion(probe_workqueue_insertion); 294 unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
290out: 295out:
291 pr_warning("trace_workqueue: unable to trace workqueues\n"); 296 pr_warning("trace_workqueue: unable to trace workqueues\n");
292 297