aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2010-08-04 07:59:13 -0400
committerMichal Marek <mmarek@suse.cz>2010-08-04 07:59:13 -0400
commit772320e84588dcbe1600ffb83e5f328f2209ac2a (patch)
treea7de21b79340aeaa17c58126f6b801b82c77b53a /kernel/trace
parent1ce53adf13a54375d2a5c7cdbe341b2558389615 (diff)
parent9fe6206f400646a2322096b56c59891d530e8d51 (diff)
Merge commit 'v2.6.35' into kbuild/kbuild
Conflicts: arch/powerpc/Makefile
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig26
-rw-r--r--kernel/trace/Makefile5
-rw-r--r--kernel/trace/blktrace.c146
-rw-r--r--kernel/trace/ftrace.c172
-rw-r--r--kernel/trace/kmemtrace.c70
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c258
-rw-r--r--kernel/trace/ring_buffer_benchmark.c6
-rw-r--r--kernel/trace/trace.c408
-rw-r--r--kernel/trace/trace.h67
-rw-r--r--kernel/trace/trace_branch.c27
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_event_perf.c195
-rw-r--r--kernel/trace/trace_event_profile.c122
-rw-r--r--kernel/trace/trace_events.c219
-rw-r--r--kernel/trace/trace_events_filter.c35
-rw-r--r--kernel/trace/trace_export.c103
-rw-r--r--kernel/trace/trace_functions_graph.c284
-rw-r--r--kernel/trace/trace_hw_branches.c312
-rw-r--r--kernel/trace/trace_irqsoff.c271
-rw-r--r--kernel/trace/trace_kprobe.c947
-rw-r--r--kernel/trace/trace_ksym.c27
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_output.c155
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_sched_switch.c21
-rw-r--r--kernel/trace/trace_sched_wakeup.c29
-rw-r--r--kernel/trace/trace_selftest.c65
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c374
-rw-r--r--kernel/trace/trace_workqueue.c27
33 files changed, 2532 insertions, 1885 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 6c22d8a2f289..8b1797c4545b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER
27config HAVE_FUNCTION_GRAPH_FP_TEST 27config HAVE_FUNCTION_GRAPH_FP_TEST
28 bool 28 bool
29 help 29 help
30 An arch may pass in a unique value (frame pointer) to both the 30 See Documentation/trace/ftrace-design.txt
31 entering and exiting of a function. On exit, the value is compared
32 and if it does not match, then it will panic the kernel.
33 31
34config HAVE_FUNCTION_TRACE_MCOUNT_TEST 32config HAVE_FUNCTION_TRACE_MCOUNT_TEST
35 bool 33 bool
@@ -46,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD
46 help 44 help
47 See Documentation/trace/ftrace-design.txt 45 See Documentation/trace/ftrace-design.txt
48 46
49config HAVE_HW_BRANCH_TRACER
50 bool
51
52config HAVE_SYSCALL_TRACEPOINTS 47config HAVE_SYSCALL_TRACEPOINTS
53 bool 48 bool
54 help 49 help
@@ -330,15 +325,6 @@ config BRANCH_TRACER
330 325
331 Say N if unsure. 326 Say N if unsure.
332 327
333config POWER_TRACER
334 bool "Trace power consumption behavior"
335 depends on X86
336 select GENERIC_TRACER
337 help
338 This tracer helps developers to analyze and optimize the kernel's
339 power management decisions, specifically the C-state and P-state
340 behavior.
341
342config KSYM_TRACER 328config KSYM_TRACER
343 bool "Trace read and write access on kernel memory locations" 329 bool "Trace read and write access on kernel memory locations"
344 depends on HAVE_HW_BREAKPOINT 330 depends on HAVE_HW_BREAKPOINT
@@ -385,14 +371,6 @@ config STACK_TRACER
385 371
386 Say N if unsure. 372 Say N if unsure.
387 373
388config HW_BRANCH_TRACER
389 depends on HAVE_HW_BRANCH_TRACER
390 bool "Trace hw branches"
391 select GENERIC_TRACER
392 help
393 This tracer records all branches on the system in a circular
394 buffer, giving access to the last N branches for each cpu.
395
396config KMEMTRACE 374config KMEMTRACE
397 bool "Trace SLAB allocations" 375 bool "Trace SLAB allocations"
398 select GENERIC_TRACER 376 select GENERIC_TRACER
@@ -451,7 +429,7 @@ config BLK_DEV_IO_TRACE
451 429
452config KPROBE_EVENT 430config KPROBE_EVENT
453 depends on KPROBES 431 depends on KPROBES
454 depends on X86 432 depends on HAVE_REGS_AND_STACK_ACCESS_API
455 bool "Enable kprobes-based dynamic events" 433 bool "Enable kprobes-based dynamic events"
456 select TRACING 434 select TRACING
457 default y 435 default y
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..ffb1a5b0550e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -51,7 +50,9 @@ endif
51obj-$(CONFIG_EVENT_TRACING) += trace_events.o 50obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 51obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 52obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 53ifeq ($(CONFIG_PERF_EVENTS),y)
54obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
55endif
55obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 56obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
56obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 57obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
57obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o 58obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d9d6206e0b14..638711c17504 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include <linux/time.h> 27#include <linux/time.h>
@@ -540,9 +541,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
540 if (ret) 541 if (ret)
541 return ret; 542 return ret;
542 543
543 if (copy_to_user(arg, &buts, sizeof(buts))) 544 if (copy_to_user(arg, &buts, sizeof(buts))) {
545 blk_trace_remove(q);
544 return -EFAULT; 546 return -EFAULT;
545 547 }
546 return 0; 548 return 0;
547} 549}
548EXPORT_SYMBOL_GPL(blk_trace_setup); 550EXPORT_SYMBOL_GPL(blk_trace_setup);
@@ -673,28 +675,33 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
673 } 675 }
674} 676}
675 677
676static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) 678static void blk_add_trace_rq_abort(void *ignore,
679 struct request_queue *q, struct request *rq)
677{ 680{
678 blk_add_trace_rq(q, rq, BLK_TA_ABORT); 681 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
679} 682}
680 683
681static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) 684static void blk_add_trace_rq_insert(void *ignore,
685 struct request_queue *q, struct request *rq)
682{ 686{
683 blk_add_trace_rq(q, rq, BLK_TA_INSERT); 687 blk_add_trace_rq(q, rq, BLK_TA_INSERT);
684} 688}
685 689
686static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) 690static void blk_add_trace_rq_issue(void *ignore,
691 struct request_queue *q, struct request *rq)
687{ 692{
688 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 693 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
689} 694}
690 695
691static void blk_add_trace_rq_requeue(struct request_queue *q, 696static void blk_add_trace_rq_requeue(void *ignore,
697 struct request_queue *q,
692 struct request *rq) 698 struct request *rq)
693{ 699{
694 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 700 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
695} 701}
696 702
697static void blk_add_trace_rq_complete(struct request_queue *q, 703static void blk_add_trace_rq_complete(void *ignore,
704 struct request_queue *q,
698 struct request *rq) 705 struct request *rq)
699{ 706{
700 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 707 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
@@ -722,34 +729,40 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
722 !bio_flagged(bio, BIO_UPTODATE), 0, NULL); 729 !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
723} 730}
724 731
725static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) 732static void blk_add_trace_bio_bounce(void *ignore,
733 struct request_queue *q, struct bio *bio)
726{ 734{
727 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); 735 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
728} 736}
729 737
730static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) 738static void blk_add_trace_bio_complete(void *ignore,
739 struct request_queue *q, struct bio *bio)
731{ 740{
732 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); 741 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
733} 742}
734 743
735static void blk_add_trace_bio_backmerge(struct request_queue *q, 744static void blk_add_trace_bio_backmerge(void *ignore,
745 struct request_queue *q,
736 struct bio *bio) 746 struct bio *bio)
737{ 747{
738 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); 748 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
739} 749}
740 750
741static void blk_add_trace_bio_frontmerge(struct request_queue *q, 751static void blk_add_trace_bio_frontmerge(void *ignore,
752 struct request_queue *q,
742 struct bio *bio) 753 struct bio *bio)
743{ 754{
744 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); 755 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
745} 756}
746 757
747static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) 758static void blk_add_trace_bio_queue(void *ignore,
759 struct request_queue *q, struct bio *bio)
748{ 760{
749 blk_add_trace_bio(q, bio, BLK_TA_QUEUE); 761 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
750} 762}
751 763
752static void blk_add_trace_getrq(struct request_queue *q, 764static void blk_add_trace_getrq(void *ignore,
765 struct request_queue *q,
753 struct bio *bio, int rw) 766 struct bio *bio, int rw)
754{ 767{
755 if (bio) 768 if (bio)
@@ -763,7 +776,8 @@ static void blk_add_trace_getrq(struct request_queue *q,
763} 776}
764 777
765 778
766static void blk_add_trace_sleeprq(struct request_queue *q, 779static void blk_add_trace_sleeprq(void *ignore,
780 struct request_queue *q,
767 struct bio *bio, int rw) 781 struct bio *bio, int rw)
768{ 782{
769 if (bio) 783 if (bio)
@@ -777,7 +791,7 @@ static void blk_add_trace_sleeprq(struct request_queue *q,
777 } 791 }
778} 792}
779 793
780static void blk_add_trace_plug(struct request_queue *q) 794static void blk_add_trace_plug(void *ignore, struct request_queue *q)
781{ 795{
782 struct blk_trace *bt = q->blk_trace; 796 struct blk_trace *bt = q->blk_trace;
783 797
@@ -785,7 +799,7 @@ static void blk_add_trace_plug(struct request_queue *q)
785 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); 799 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
786} 800}
787 801
788static void blk_add_trace_unplug_io(struct request_queue *q) 802static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
789{ 803{
790 struct blk_trace *bt = q->blk_trace; 804 struct blk_trace *bt = q->blk_trace;
791 805
@@ -798,7 +812,7 @@ static void blk_add_trace_unplug_io(struct request_queue *q)
798 } 812 }
799} 813}
800 814
801static void blk_add_trace_unplug_timer(struct request_queue *q) 815static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
802{ 816{
803 struct blk_trace *bt = q->blk_trace; 817 struct blk_trace *bt = q->blk_trace;
804 818
@@ -811,7 +825,8 @@ static void blk_add_trace_unplug_timer(struct request_queue *q)
811 } 825 }
812} 826}
813 827
814static void blk_add_trace_split(struct request_queue *q, struct bio *bio, 828static void blk_add_trace_split(void *ignore,
829 struct request_queue *q, struct bio *bio,
815 unsigned int pdu) 830 unsigned int pdu)
816{ 831{
817 struct blk_trace *bt = q->blk_trace; 832 struct blk_trace *bt = q->blk_trace;
@@ -827,6 +842,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
827 842
828/** 843/**
829 * blk_add_trace_remap - Add a trace for a remap operation 844 * blk_add_trace_remap - Add a trace for a remap operation
845 * @ignore: trace callback data parameter (not used)
830 * @q: queue the io is for 846 * @q: queue the io is for
831 * @bio: the source bio 847 * @bio: the source bio
832 * @dev: target device 848 * @dev: target device
@@ -837,8 +853,9 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
837 * it spans a stripe (or similar). Add a trace for that action. 853 * it spans a stripe (or similar). Add a trace for that action.
838 * 854 *
839 **/ 855 **/
840static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 856static void blk_add_trace_remap(void *ignore,
841 dev_t dev, sector_t from) 857 struct request_queue *q, struct bio *bio,
858 dev_t dev, sector_t from)
842{ 859{
843 struct blk_trace *bt = q->blk_trace; 860 struct blk_trace *bt = q->blk_trace;
844 struct blk_io_trace_remap r; 861 struct blk_io_trace_remap r;
@@ -857,6 +874,7 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
857 874
858/** 875/**
859 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 876 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
877 * @ignore: trace callback data parameter (not used)
860 * @q: queue the io is for 878 * @q: queue the io is for
861 * @rq: the source request 879 * @rq: the source request
862 * @dev: target device 880 * @dev: target device
@@ -867,7 +885,8 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
867 * Add a trace for that action. 885 * Add a trace for that action.
868 * 886 *
869 **/ 887 **/
870static void blk_add_trace_rq_remap(struct request_queue *q, 888static void blk_add_trace_rq_remap(void *ignore,
889 struct request_queue *q,
871 struct request *rq, dev_t dev, 890 struct request *rq, dev_t dev,
872 sector_t from) 891 sector_t from)
873{ 892{
@@ -919,64 +938,64 @@ static void blk_register_tracepoints(void)
919{ 938{
920 int ret; 939 int ret;
921 940
922 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); 941 ret = register_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
923 WARN_ON(ret); 942 WARN_ON(ret);
924 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); 943 ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
925 WARN_ON(ret); 944 WARN_ON(ret);
926 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); 945 ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
927 WARN_ON(ret); 946 WARN_ON(ret);
928 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); 947 ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
929 WARN_ON(ret); 948 WARN_ON(ret);
930 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); 949 ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
931 WARN_ON(ret); 950 WARN_ON(ret);
932 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); 951 ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
933 WARN_ON(ret); 952 WARN_ON(ret);
934 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); 953 ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
935 WARN_ON(ret); 954 WARN_ON(ret);
936 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 955 ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
937 WARN_ON(ret); 956 WARN_ON(ret);
938 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 957 ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
939 WARN_ON(ret); 958 WARN_ON(ret);
940 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); 959 ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
941 WARN_ON(ret); 960 WARN_ON(ret);
942 ret = register_trace_block_getrq(blk_add_trace_getrq); 961 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
943 WARN_ON(ret); 962 WARN_ON(ret);
944 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); 963 ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
945 WARN_ON(ret); 964 WARN_ON(ret);
946 ret = register_trace_block_plug(blk_add_trace_plug); 965 ret = register_trace_block_plug(blk_add_trace_plug, NULL);
947 WARN_ON(ret); 966 WARN_ON(ret);
948 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); 967 ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
949 WARN_ON(ret); 968 WARN_ON(ret);
950 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); 969 ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
951 WARN_ON(ret); 970 WARN_ON(ret);
952 ret = register_trace_block_split(blk_add_trace_split); 971 ret = register_trace_block_split(blk_add_trace_split, NULL);
953 WARN_ON(ret); 972 WARN_ON(ret);
954 ret = register_trace_block_remap(blk_add_trace_remap); 973 ret = register_trace_block_remap(blk_add_trace_remap, NULL);
955 WARN_ON(ret); 974 WARN_ON(ret);
956 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap); 975 ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
957 WARN_ON(ret); 976 WARN_ON(ret);
958} 977}
959 978
960static void blk_unregister_tracepoints(void) 979static void blk_unregister_tracepoints(void)
961{ 980{
962 unregister_trace_block_rq_remap(blk_add_trace_rq_remap); 981 unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL);
963 unregister_trace_block_remap(blk_add_trace_remap); 982 unregister_trace_block_remap(blk_add_trace_remap, NULL);
964 unregister_trace_block_split(blk_add_trace_split); 983 unregister_trace_block_split(blk_add_trace_split, NULL);
965 unregister_trace_block_unplug_io(blk_add_trace_unplug_io); 984 unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
966 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); 985 unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
967 unregister_trace_block_plug(blk_add_trace_plug); 986 unregister_trace_block_plug(blk_add_trace_plug, NULL);
968 unregister_trace_block_sleeprq(blk_add_trace_sleeprq); 987 unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
969 unregister_trace_block_getrq(blk_add_trace_getrq); 988 unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
970 unregister_trace_block_bio_queue(blk_add_trace_bio_queue); 989 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
971 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); 990 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
972 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); 991 unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL);
973 unregister_trace_block_bio_complete(blk_add_trace_bio_complete); 992 unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL);
974 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); 993 unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
975 unregister_trace_block_rq_complete(blk_add_trace_rq_complete); 994 unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
976 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); 995 unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
977 unregister_trace_block_rq_issue(blk_add_trace_rq_issue); 996 unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
978 unregister_trace_block_rq_insert(blk_add_trace_rq_insert); 997 unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
979 unregister_trace_block_rq_abort(blk_add_trace_rq_abort); 998 unregister_trace_block_rq_abort(blk_add_trace_rq_abort, NULL);
980 999
981 tracepoint_synchronize_unregister(); 1000 tracepoint_synchronize_unregister();
982} 1001}
@@ -1319,7 +1338,7 @@ out:
1319} 1338}
1320 1339
1321static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1340static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1322 int flags) 1341 int flags, struct trace_event *event)
1323{ 1342{
1324 return print_one_line(iter, false); 1343 return print_one_line(iter, false);
1325} 1344}
@@ -1341,7 +1360,8 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
1341} 1360}
1342 1361
1343static enum print_line_t 1362static enum print_line_t
1344blk_trace_event_print_binary(struct trace_iterator *iter, int flags) 1363blk_trace_event_print_binary(struct trace_iterator *iter, int flags,
1364 struct trace_event *event)
1345{ 1365{
1346 return blk_trace_synthesize_old_trace(iter) ? 1366 return blk_trace_synthesize_old_trace(iter) ?
1347 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; 1367 TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
@@ -1379,12 +1399,16 @@ static struct tracer blk_tracer __read_mostly = {
1379 .set_flag = blk_tracer_set_flag, 1399 .set_flag = blk_tracer_set_flag,
1380}; 1400};
1381 1401
1382static struct trace_event trace_blk_event = { 1402static struct trace_event_functions trace_blk_event_funcs = {
1383 .type = TRACE_BLK,
1384 .trace = blk_trace_event_print, 1403 .trace = blk_trace_event_print,
1385 .binary = blk_trace_event_print_binary, 1404 .binary = blk_trace_event_print_binary,
1386}; 1405};
1387 1406
1407static struct trace_event trace_blk_event = {
1408 .type = TRACE_BLK,
1409 .funcs = &trace_blk_event_funcs,
1410};
1411
1388static int __init init_blk_tracer(void) 1412static int __init init_blk_tracer(void)
1389{ 1413{
1390 if (!register_ftrace_event(&trace_blk_event)) { 1414 if (!register_ftrace_event(&trace_blk_event)) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1e6640f80454..6d2cb14f9449 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -22,12 +22,13 @@
22#include <linux/hardirq.h> 22#include <linux/hardirq.h>
23#include <linux/kthread.h> 23#include <linux/kthread.h>
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/kprobes.h>
26#include <linux/ftrace.h> 25#include <linux/ftrace.h>
27#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
28#include <linux/ctype.h> 28#include <linux/ctype.h>
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
31 32
32#include <trace/events/sched.h> 33#include <trace/events/sched.h>
33 34
@@ -85,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
87 88
88#ifdef CONFIG_FUNCTION_GRAPH_TRACER 89/*
89static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 90 * Traverse the ftrace_list, invoking all entries. The reason that we
90#endif 91 * can use rcu_dereference_raw() is that elements removed from this list
91 92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
92static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
93{ 99{
94 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
95
96 /* in case someone actually ports this to alpha! */
97 read_barrier_depends();
98 101
99 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
100 /* silly alpha */
101 read_barrier_depends();
102 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
103 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
104 }; 105 };
105} 106}
106 107
@@ -155,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
155 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
156 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
157 */ 158 */
158 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
159 ftrace_list = ops;
160 160
161 if (ftrace_enabled) { 161 if (ftrace_enabled) {
162 ftrace_func_t func; 162 ftrace_func_t func;
@@ -264,6 +264,7 @@ struct ftrace_profile {
264 unsigned long counter; 264 unsigned long counter;
265#ifdef CONFIG_FUNCTION_GRAPH_TRACER 265#ifdef CONFIG_FUNCTION_GRAPH_TRACER
266 unsigned long long time; 266 unsigned long long time;
267 unsigned long long time_squared;
267#endif 268#endif
268}; 269};
269 270
@@ -366,9 +367,9 @@ static int function_stat_headers(struct seq_file *m)
366{ 367{
367#ifdef CONFIG_FUNCTION_GRAPH_TRACER 368#ifdef CONFIG_FUNCTION_GRAPH_TRACER
368 seq_printf(m, " Function " 369 seq_printf(m, " Function "
369 "Hit Time Avg\n" 370 "Hit Time Avg s^2\n"
370 " -------- " 371 " -------- "
371 "--- ---- ---\n"); 372 "--- ---- --- ---\n");
372#else 373#else
373 seq_printf(m, " Function Hit\n" 374 seq_printf(m, " Function Hit\n"
374 " -------- ---\n"); 375 " -------- ---\n");
@@ -384,6 +385,7 @@ static int function_stat_show(struct seq_file *m, void *v)
384 static DEFINE_MUTEX(mutex); 385 static DEFINE_MUTEX(mutex);
385 static struct trace_seq s; 386 static struct trace_seq s;
386 unsigned long long avg; 387 unsigned long long avg;
388 unsigned long long stddev;
387#endif 389#endif
388 390
389 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 391 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -394,11 +396,25 @@ static int function_stat_show(struct seq_file *m, void *v)
394 avg = rec->time; 396 avg = rec->time;
395 do_div(avg, rec->counter); 397 do_div(avg, rec->counter);
396 398
399 /* Sample standard deviation (s^2) */
400 if (rec->counter <= 1)
401 stddev = 0;
402 else {
403 stddev = rec->time_squared - rec->counter * avg * avg;
404 /*
405 * Divide only 1000 for ns^2 -> us^2 conversion.
406 * trace_print_graph_duration will divide 1000 again.
407 */
408 do_div(stddev, (rec->counter - 1) * 1000);
409 }
410
397 mutex_lock(&mutex); 411 mutex_lock(&mutex);
398 trace_seq_init(&s); 412 trace_seq_init(&s);
399 trace_print_graph_duration(rec->time, &s); 413 trace_print_graph_duration(rec->time, &s);
400 trace_seq_puts(&s, " "); 414 trace_seq_puts(&s, " ");
401 trace_print_graph_duration(avg, &s); 415 trace_print_graph_duration(avg, &s);
416 trace_seq_puts(&s, " ");
417 trace_print_graph_duration(stddev, &s);
402 trace_print_seq(m, &s); 418 trace_print_seq(m, &s);
403 mutex_unlock(&mutex); 419 mutex_unlock(&mutex);
404#endif 420#endif
@@ -650,6 +666,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
650 if (!stat->hash || !ftrace_profile_enabled) 666 if (!stat->hash || !ftrace_profile_enabled)
651 goto out; 667 goto out;
652 668
669 /* If the calltime was zero'd ignore it */
670 if (!trace->calltime)
671 goto out;
672
653 calltime = trace->rettime - trace->calltime; 673 calltime = trace->rettime - trace->calltime;
654 674
655 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { 675 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
@@ -668,8 +688,10 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
668 } 688 }
669 689
670 rec = ftrace_find_profiled_func(stat, trace->func); 690 rec = ftrace_find_profiled_func(stat, trace->func);
671 if (rec) 691 if (rec) {
672 rec->time += calltime; 692 rec->time += calltime;
693 rec->time_squared += calltime * calltime;
694 }
673 695
674 out: 696 out:
675 local_irq_restore(flags); 697 local_irq_restore(flags);
@@ -898,36 +920,6 @@ static struct dyn_ftrace *ftrace_free_records;
898 } \ 920 } \
899 } 921 }
900 922
901#ifdef CONFIG_KPROBES
902
903static int frozen_record_count;
904
905static inline void freeze_record(struct dyn_ftrace *rec)
906{
907 if (!(rec->flags & FTRACE_FL_FROZEN)) {
908 rec->flags |= FTRACE_FL_FROZEN;
909 frozen_record_count++;
910 }
911}
912
913static inline void unfreeze_record(struct dyn_ftrace *rec)
914{
915 if (rec->flags & FTRACE_FL_FROZEN) {
916 rec->flags &= ~FTRACE_FL_FROZEN;
917 frozen_record_count--;
918 }
919}
920
921static inline int record_frozen(struct dyn_ftrace *rec)
922{
923 return rec->flags & FTRACE_FL_FROZEN;
924}
925#else
926# define freeze_record(rec) ({ 0; })
927# define unfreeze_record(rec) ({ 0; })
928# define record_frozen(rec) ({ 0; })
929#endif /* CONFIG_KPROBES */
930
931static void ftrace_free_rec(struct dyn_ftrace *rec) 923static void ftrace_free_rec(struct dyn_ftrace *rec)
932{ 924{
933 rec->freelist = ftrace_free_records; 925 rec->freelist = ftrace_free_records;
@@ -1025,6 +1017,21 @@ static void ftrace_bug(int failed, unsigned long ip)
1025} 1017}
1026 1018
1027 1019
1020/* Return 1 if the address range is reserved for ftrace */
1021int ftrace_text_reserved(void *start, void *end)
1022{
1023 struct dyn_ftrace *rec;
1024 struct ftrace_page *pg;
1025
1026 do_for_each_ftrace_rec(pg, rec) {
1027 if (rec->ip <= (unsigned long)end &&
1028 rec->ip + MCOUNT_INSN_SIZE > (unsigned long)start)
1029 return 1;
1030 } while_for_each_ftrace_rec();
1031 return 0;
1032}
1033
1034
1028static int 1035static int
1029__ftrace_replace_code(struct dyn_ftrace *rec, int enable) 1036__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
1030{ 1037{
@@ -1076,14 +1083,6 @@ static void ftrace_replace_code(int enable)
1076 !(rec->flags & FTRACE_FL_CONVERTED)) 1083 !(rec->flags & FTRACE_FL_CONVERTED))
1077 continue; 1084 continue;
1078 1085
1079 /* ignore updates to this record's mcount site */
1080 if (get_kprobe((void *)rec->ip)) {
1081 freeze_record(rec);
1082 continue;
1083 } else {
1084 unfreeze_record(rec);
1085 }
1086
1087 failed = __ftrace_replace_code(rec, enable); 1086 failed = __ftrace_replace_code(rec, enable);
1088 if (failed) { 1087 if (failed) {
1089 rec->flags |= FTRACE_FL_FAILED; 1088 rec->flags |= FTRACE_FL_FAILED;
@@ -2300,6 +2299,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2300 2299
2301#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2300#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2302static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2301static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2302static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2303
2303static int __init set_graph_function(char *str) 2304static int __init set_graph_function(char *str)
2304{ 2305{
2305 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2306 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -2426,6 +2427,7 @@ static const struct file_operations ftrace_notrace_fops = {
2426static DEFINE_MUTEX(graph_lock); 2427static DEFINE_MUTEX(graph_lock);
2427 2428
2428int ftrace_graph_count; 2429int ftrace_graph_count;
2430int ftrace_graph_filter_enabled;
2429unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2431unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2430 2432
2431static void * 2433static void *
@@ -2448,7 +2450,7 @@ static void *g_start(struct seq_file *m, loff_t *pos)
2448 mutex_lock(&graph_lock); 2450 mutex_lock(&graph_lock);
2449 2451
2450 /* Nothing, tell g_show to print all functions are enabled */ 2452 /* Nothing, tell g_show to print all functions are enabled */
2451 if (!ftrace_graph_count && !*pos) 2453 if (!ftrace_graph_filter_enabled && !*pos)
2452 return (void *)1; 2454 return (void *)1;
2453 2455
2454 return __g_next(m, pos); 2456 return __g_next(m, pos);
@@ -2494,6 +2496,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2494 mutex_lock(&graph_lock); 2496 mutex_lock(&graph_lock);
2495 if ((file->f_mode & FMODE_WRITE) && 2497 if ((file->f_mode & FMODE_WRITE) &&
2496 (file->f_flags & O_TRUNC)) { 2498 (file->f_flags & O_TRUNC)) {
2499 ftrace_graph_filter_enabled = 0;
2497 ftrace_graph_count = 0; 2500 ftrace_graph_count = 0;
2498 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2501 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2499 } 2502 }
@@ -2519,7 +2522,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2519 struct dyn_ftrace *rec; 2522 struct dyn_ftrace *rec;
2520 struct ftrace_page *pg; 2523 struct ftrace_page *pg;
2521 int search_len; 2524 int search_len;
2522 int found = 0; 2525 int fail = 1;
2523 int type, not; 2526 int type, not;
2524 char *search; 2527 char *search;
2525 bool exists; 2528 bool exists;
@@ -2530,37 +2533,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2530 2533
2531 /* decode regex */ 2534 /* decode regex */
2532 type = filter_parse_regex(buffer, strlen(buffer), &search, &not); 2535 type = filter_parse_regex(buffer, strlen(buffer), &search, &not);
2533 if (not) 2536 if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS)
2534 return -EINVAL; 2537 return -EBUSY;
2535 2538
2536 search_len = strlen(search); 2539 search_len = strlen(search);
2537 2540
2538 mutex_lock(&ftrace_lock); 2541 mutex_lock(&ftrace_lock);
2539 do_for_each_ftrace_rec(pg, rec) { 2542 do_for_each_ftrace_rec(pg, rec) {
2540 2543
2541 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2542 break;
2543
2544 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) 2544 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
2545 continue; 2545 continue;
2546 2546
2547 if (ftrace_match_record(rec, search, search_len, type)) { 2547 if (ftrace_match_record(rec, search, search_len, type)) {
2548 /* ensure it is not already in the array */ 2548 /* if it is in the array */
2549 exists = false; 2549 exists = false;
2550 for (i = 0; i < *idx; i++) 2550 for (i = 0; i < *idx; i++) {
2551 if (array[i] == rec->ip) { 2551 if (array[i] == rec->ip) {
2552 exists = true; 2552 exists = true;
2553 break; 2553 break;
2554 } 2554 }
2555 if (!exists) 2555 }
2556 array[(*idx)++] = rec->ip; 2556
2557 found = 1; 2557 if (!not) {
2558 fail = 0;
2559 if (!exists) {
2560 array[(*idx)++] = rec->ip;
2561 if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
2562 goto out;
2563 }
2564 } else {
2565 if (exists) {
2566 array[i] = array[--(*idx)];
2567 array[*idx] = 0;
2568 fail = 0;
2569 }
2570 }
2558 } 2571 }
2559 } while_for_each_ftrace_rec(); 2572 } while_for_each_ftrace_rec();
2560 2573out:
2561 mutex_unlock(&ftrace_lock); 2574 mutex_unlock(&ftrace_lock);
2562 2575
2563 return found ? 0 : -EINVAL; 2576 if (fail)
2577 return -EINVAL;
2578
2579 ftrace_graph_filter_enabled = 1;
2580 return 0;
2564} 2581}
2565 2582
2566static ssize_t 2583static ssize_t
@@ -2570,16 +2587,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2570 struct trace_parser parser; 2587 struct trace_parser parser;
2571 ssize_t read, ret; 2588 ssize_t read, ret;
2572 2589
2573 if (!cnt || cnt < 0) 2590 if (!cnt)
2574 return 0; 2591 return 0;
2575 2592
2576 mutex_lock(&graph_lock); 2593 mutex_lock(&graph_lock);
2577 2594
2578 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
2579 ret = -EBUSY;
2580 goto out_unlock;
2581 }
2582
2583 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { 2595 if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
2584 ret = -ENOMEM; 2596 ret = -ENOMEM;
2585 goto out_unlock; 2597 goto out_unlock;
@@ -3222,8 +3234,8 @@ free:
3222} 3234}
3223 3235
3224static void 3236static void
3225ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev, 3237ftrace_graph_probe_sched_switch(void *ignore,
3226 struct task_struct *next) 3238 struct task_struct *prev, struct task_struct *next)
3227{ 3239{
3228 unsigned long long timestamp; 3240 unsigned long long timestamp;
3229 int index; 3241 int index;
@@ -3277,7 +3289,7 @@ static int start_graph_tracing(void)
3277 } while (ret == -EAGAIN); 3289 } while (ret == -EAGAIN);
3278 3290
3279 if (!ret) { 3291 if (!ret) {
3280 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch); 3292 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3281 if (ret) 3293 if (ret)
3282 pr_info("ftrace_graph: Couldn't activate tracepoint" 3294 pr_info("ftrace_graph: Couldn't activate tracepoint"
3283 " probe to kernel_sched_switch\n"); 3295 " probe to kernel_sched_switch\n");
@@ -3349,11 +3361,11 @@ void unregister_ftrace_graph(void)
3349 goto out; 3361 goto out;
3350 3362
3351 ftrace_graph_active--; 3363 ftrace_graph_active--;
3352 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
3353 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3364 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
3354 ftrace_graph_entry = ftrace_graph_entry_stub; 3365 ftrace_graph_entry = ftrace_graph_entry_stub;
3355 ftrace_shutdown(FTRACE_STOP_FUNC_RET); 3366 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
3356 unregister_pm_notifier(&ftrace_suspend_notifier); 3367 unregister_pm_notifier(&ftrace_suspend_notifier);
3368 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
3357 3369
3358 out: 3370 out:
3359 mutex_unlock(&ftrace_lock); 3371 mutex_unlock(&ftrace_lock);
@@ -3364,6 +3376,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3364{ 3376{
3365 /* Make sure we do not use the parent ret_stack */ 3377 /* Make sure we do not use the parent ret_stack */
3366 t->ret_stack = NULL; 3378 t->ret_stack = NULL;
3379 t->curr_ret_stack = -1;
3367 3380
3368 if (ftrace_graph_active) { 3381 if (ftrace_graph_active) {
3369 struct ftrace_ret_stack *ret_stack; 3382 struct ftrace_ret_stack *ret_stack;
@@ -3373,7 +3386,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3373 GFP_KERNEL); 3386 GFP_KERNEL);
3374 if (!ret_stack) 3387 if (!ret_stack)
3375 return; 3388 return;
3376 t->curr_ret_stack = -1;
3377 atomic_set(&t->tracing_graph_pause, 0); 3389 atomic_set(&t->tracing_graph_pause, 0);
3378 atomic_set(&t->trace_overrun, 0); 3390 atomic_set(&t->trace_overrun, 0);
3379 t->ftrace_timestamp = 0; 3391 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index a91da69f153a..bbfc1bb1660b 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -95,7 +95,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
95 trace_wake_up(); 95 trace_wake_up();
96} 96}
97 97
98static void kmemtrace_kmalloc(unsigned long call_site, 98static void kmemtrace_kmalloc(void *ignore,
99 unsigned long call_site,
99 const void *ptr, 100 const void *ptr,
100 size_t bytes_req, 101 size_t bytes_req,
101 size_t bytes_alloc, 102 size_t bytes_alloc,
@@ -105,7 +106,8 @@ static void kmemtrace_kmalloc(unsigned long call_site,
105 bytes_req, bytes_alloc, gfp_flags, -1); 106 bytes_req, bytes_alloc, gfp_flags, -1);
106} 107}
107 108
108static void kmemtrace_kmem_cache_alloc(unsigned long call_site, 109static void kmemtrace_kmem_cache_alloc(void *ignore,
110 unsigned long call_site,
109 const void *ptr, 111 const void *ptr,
110 size_t bytes_req, 112 size_t bytes_req,
111 size_t bytes_alloc, 113 size_t bytes_alloc,
@@ -115,7 +117,8 @@ static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
115 bytes_req, bytes_alloc, gfp_flags, -1); 117 bytes_req, bytes_alloc, gfp_flags, -1);
116} 118}
117 119
118static void kmemtrace_kmalloc_node(unsigned long call_site, 120static void kmemtrace_kmalloc_node(void *ignore,
121 unsigned long call_site,
119 const void *ptr, 122 const void *ptr,
120 size_t bytes_req, 123 size_t bytes_req,
121 size_t bytes_alloc, 124 size_t bytes_alloc,
@@ -126,7 +129,8 @@ static void kmemtrace_kmalloc_node(unsigned long call_site,
126 bytes_req, bytes_alloc, gfp_flags, node); 129 bytes_req, bytes_alloc, gfp_flags, node);
127} 130}
128 131
129static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, 132static void kmemtrace_kmem_cache_alloc_node(void *ignore,
133 unsigned long call_site,
130 const void *ptr, 134 const void *ptr,
131 size_t bytes_req, 135 size_t bytes_req,
132 size_t bytes_alloc, 136 size_t bytes_alloc,
@@ -137,12 +141,14 @@ static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
137 bytes_req, bytes_alloc, gfp_flags, node); 141 bytes_req, bytes_alloc, gfp_flags, node);
138} 142}
139 143
140static void kmemtrace_kfree(unsigned long call_site, const void *ptr) 144static void
145kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
141{ 146{
142 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); 147 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
143} 148}
144 149
145static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) 150static void kmemtrace_kmem_cache_free(void *ignore,
151 unsigned long call_site, const void *ptr)
146{ 152{
147 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); 153 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
148} 154}
@@ -151,34 +157,34 @@ static int kmemtrace_start_probes(void)
151{ 157{
152 int err; 158 int err;
153 159
154 err = register_trace_kmalloc(kmemtrace_kmalloc); 160 err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
155 if (err) 161 if (err)
156 return err; 162 return err;
157 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); 163 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
158 if (err) 164 if (err)
159 return err; 165 return err;
160 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); 166 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
161 if (err) 167 if (err)
162 return err; 168 return err;
163 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); 169 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
164 if (err) 170 if (err)
165 return err; 171 return err;
166 err = register_trace_kfree(kmemtrace_kfree); 172 err = register_trace_kfree(kmemtrace_kfree, NULL);
167 if (err) 173 if (err)
168 return err; 174 return err;
169 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); 175 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
170 176
171 return err; 177 return err;
172} 178}
173 179
174static void kmemtrace_stop_probes(void) 180static void kmemtrace_stop_probes(void)
175{ 181{
176 unregister_trace_kmalloc(kmemtrace_kmalloc); 182 unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
177 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); 183 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
178 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); 184 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
179 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); 185 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
180 unregister_trace_kfree(kmemtrace_kfree); 186 unregister_trace_kfree(kmemtrace_kfree, NULL);
181 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); 187 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
182} 188}
183 189
184static int kmem_trace_init(struct trace_array *tr) 190static int kmem_trace_init(struct trace_array *tr)
@@ -237,7 +243,8 @@ struct kmemtrace_user_event_alloc {
237}; 243};
238 244
239static enum print_line_t 245static enum print_line_t
240kmemtrace_print_alloc(struct trace_iterator *iter, int flags) 246kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
247 struct trace_event *event)
241{ 248{
242 struct trace_seq *s = &iter->seq; 249 struct trace_seq *s = &iter->seq;
243 struct kmemtrace_alloc_entry *entry; 250 struct kmemtrace_alloc_entry *entry;
@@ -257,7 +264,8 @@ kmemtrace_print_alloc(struct trace_iterator *iter, int flags)
257} 264}
258 265
259static enum print_line_t 266static enum print_line_t
260kmemtrace_print_free(struct trace_iterator *iter, int flags) 267kmemtrace_print_free(struct trace_iterator *iter, int flags,
268 struct trace_event *event)
261{ 269{
262 struct trace_seq *s = &iter->seq; 270 struct trace_seq *s = &iter->seq;
263 struct kmemtrace_free_entry *entry; 271 struct kmemtrace_free_entry *entry;
@@ -275,7 +283,8 @@ kmemtrace_print_free(struct trace_iterator *iter, int flags)
275} 283}
276 284
277static enum print_line_t 285static enum print_line_t
278kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags) 286kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
287 struct trace_event *event)
279{ 288{
280 struct trace_seq *s = &iter->seq; 289 struct trace_seq *s = &iter->seq;
281 struct kmemtrace_alloc_entry *entry; 290 struct kmemtrace_alloc_entry *entry;
@@ -309,7 +318,8 @@ kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags)
309} 318}
310 319
311static enum print_line_t 320static enum print_line_t
312kmemtrace_print_free_user(struct trace_iterator *iter, int flags) 321kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
322 struct trace_event *event)
313{ 323{
314 struct trace_seq *s = &iter->seq; 324 struct trace_seq *s = &iter->seq;
315 struct kmemtrace_free_entry *entry; 325 struct kmemtrace_free_entry *entry;
@@ -463,18 +473,26 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
463 } 473 }
464} 474}
465 475
466static struct trace_event kmem_trace_alloc = { 476static struct trace_event_functions kmem_trace_alloc_funcs = {
467 .type = TRACE_KMEM_ALLOC,
468 .trace = kmemtrace_print_alloc, 477 .trace = kmemtrace_print_alloc,
469 .binary = kmemtrace_print_alloc_user, 478 .binary = kmemtrace_print_alloc_user,
470}; 479};
471 480
472static struct trace_event kmem_trace_free = { 481static struct trace_event kmem_trace_alloc = {
473 .type = TRACE_KMEM_FREE, 482 .type = TRACE_KMEM_ALLOC,
483 .funcs = &kmem_trace_alloc_funcs,
484};
485
486static struct trace_event_functions kmem_trace_free_funcs = {
474 .trace = kmemtrace_print_free, 487 .trace = kmemtrace_print_free,
475 .binary = kmemtrace_print_free_user, 488 .binary = kmemtrace_print_free_user,
476}; 489};
477 490
491static struct trace_event kmem_trace_free = {
492 .type = TRACE_KMEM_FREE,
493 .funcs = &kmem_trace_free_funcs,
494};
495
478static struct tracer kmem_tracer __read_mostly = { 496static struct tracer kmem_tracer __read_mostly = {
479 .name = "kmemtrace", 497 .name = "kmemtrace",
480 .init = kmem_trace_init, 498 .init = kmem_trace_init,
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,7 +9,6 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index edefe3b2801b..1da7b6ea8b85 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,12 +14,14 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/cpu.h> 21#include <linux/cpu.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22 23
24#include <asm/local.h>
23#include "trace.h" 25#include "trace.h"
24 26
25/* 27/*
@@ -206,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
206#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
207#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
208 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
211 221
@@ -309,6 +319,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
309#define TS_MASK ((1ULL << TS_SHIFT) - 1) 319#define TS_MASK ((1ULL << TS_SHIFT) - 1)
310#define TS_DELTA_TEST (~TS_MASK) 320#define TS_DELTA_TEST (~TS_MASK)
311 321
322/* Flag when events were overwritten */
323#define RB_MISSED_EVENTS (1 << 31)
324/* Missed count stored at end */
325#define RB_MISSED_STORED (1 << 30)
326
312struct buffer_data_page { 327struct buffer_data_page {
313 u64 time_stamp; /* page time stamp */ 328 u64 time_stamp; /* page time stamp */
314 local_t commit; /* write committed index */ 329 local_t commit; /* write committed index */
@@ -328,6 +343,7 @@ struct buffer_page {
328 local_t write; /* index for next write */ 343 local_t write; /* index for next write */
329 unsigned read; /* index for next read */ 344 unsigned read; /* index for next read */
330 local_t entries; /* entries on this page */ 345 local_t entries; /* entries on this page */
346 unsigned long real_end; /* real end of data */
331 struct buffer_data_page *page; /* Actual data page */ 347 struct buffer_data_page *page; /* Actual data page */
332}; 348};
333 349
@@ -407,6 +423,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
407 (unsigned int)sizeof(field.commit), 423 (unsigned int)sizeof(field.commit),
408 (unsigned int)is_signed_type(long)); 424 (unsigned int)is_signed_type(long));
409 425
426 ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
427 "offset:%u;\tsize:%u;\tsigned:%u;\n",
428 (unsigned int)offsetof(typeof(field), commit),
429 1,
430 (unsigned int)is_signed_type(long));
431
410 ret = trace_seq_printf(s, "\tfield: char data;\t" 432 ret = trace_seq_printf(s, "\tfield: char data;\t"
411 "offset:%u;\tsize:%u;\tsigned:%u;\n", 433 "offset:%u;\tsize:%u;\tsigned:%u;\n",
412 (unsigned int)offsetof(typeof(field), data), 434 (unsigned int)offsetof(typeof(field), data),
@@ -430,6 +452,8 @@ struct ring_buffer_per_cpu {
430 struct buffer_page *tail_page; /* write to tail */ 452 struct buffer_page *tail_page; /* write to tail */
431 struct buffer_page *commit_page; /* committed pages */ 453 struct buffer_page *commit_page; /* committed pages */
432 struct buffer_page *reader_page; 454 struct buffer_page *reader_page;
455 unsigned long lost_events;
456 unsigned long last_overrun;
433 local_t commit_overrun; 457 local_t commit_overrun;
434 local_t overrun; 458 local_t overrun;
435 local_t entries; 459 local_t entries;
@@ -464,6 +488,8 @@ struct ring_buffer_iter {
464 struct ring_buffer_per_cpu *cpu_buffer; 488 struct ring_buffer_per_cpu *cpu_buffer;
465 unsigned long head; 489 unsigned long head;
466 struct buffer_page *head_page; 490 struct buffer_page *head_page;
491 struct buffer_page *cache_reader_page;
492 unsigned long cache_read;
467 u64 read_stamp; 493 u64 read_stamp;
468}; 494};
469 495
@@ -1198,18 +1224,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1198 1224
1199 for (i = 0; i < nr_pages; i++) { 1225 for (i = 0; i < nr_pages; i++) {
1200 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1226 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1201 return; 1227 goto out;
1202 p = cpu_buffer->pages->next; 1228 p = cpu_buffer->pages->next;
1203 bpage = list_entry(p, struct buffer_page, list); 1229 bpage = list_entry(p, struct buffer_page, list);
1204 list_del_init(&bpage->list); 1230 list_del_init(&bpage->list);
1205 free_buffer_page(bpage); 1231 free_buffer_page(bpage);
1206 } 1232 }
1207 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1233 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1208 return; 1234 goto out;
1209 1235
1210 rb_reset_cpu(cpu_buffer); 1236 rb_reset_cpu(cpu_buffer);
1211 rb_check_pages(cpu_buffer); 1237 rb_check_pages(cpu_buffer);
1212 1238
1239out:
1213 spin_unlock_irq(&cpu_buffer->reader_lock); 1240 spin_unlock_irq(&cpu_buffer->reader_lock);
1214} 1241}
1215 1242
@@ -1226,7 +1253,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1226 1253
1227 for (i = 0; i < nr_pages; i++) { 1254 for (i = 0; i < nr_pages; i++) {
1228 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1255 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1229 return; 1256 goto out;
1230 p = pages->next; 1257 p = pages->next;
1231 bpage = list_entry(p, struct buffer_page, list); 1258 bpage = list_entry(p, struct buffer_page, list);
1232 list_del_init(&bpage->list); 1259 list_del_init(&bpage->list);
@@ -1235,6 +1262,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1235 rb_reset_cpu(cpu_buffer); 1262 rb_reset_cpu(cpu_buffer);
1236 rb_check_pages(cpu_buffer); 1263 rb_check_pages(cpu_buffer);
1237 1264
1265out:
1238 spin_unlock_irq(&cpu_buffer->reader_lock); 1266 spin_unlock_irq(&cpu_buffer->reader_lock);
1239} 1267}
1240 1268
@@ -1544,7 +1572,7 @@ rb_update_event(struct ring_buffer_event *event,
1544 1572
1545 case 0: 1573 case 0:
1546 length -= RB_EVNT_HDR_SIZE; 1574 length -= RB_EVNT_HDR_SIZE;
1547 if (length > RB_MAX_SMALL_DATA) 1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1548 event->array[0] = length; 1576 event->array[0] = length;
1549 else 1577 else
1550 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1719,11 +1747,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1719 if (!length) 1747 if (!length)
1720 length = 1; 1748 length = 1;
1721 1749
1722 if (length > RB_MAX_SMALL_DATA) 1750 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1723 length += sizeof(event.array[0]); 1751 length += sizeof(event.array[0]);
1724 1752
1725 length += RB_EVNT_HDR_SIZE; 1753 length += RB_EVNT_HDR_SIZE;
1726 length = ALIGN(length, RB_ALIGNMENT); 1754 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1727 1755
1728 return length; 1756 return length;
1729} 1757}
@@ -1740,6 +1768,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1740 * must fill the old tail_page with padding. 1768 * must fill the old tail_page with padding.
1741 */ 1769 */
1742 if (tail >= BUF_PAGE_SIZE) { 1770 if (tail >= BUF_PAGE_SIZE) {
1771 /*
1772 * If the page was filled, then we still need
1773 * to update the real_end. Reset it to zero
1774 * and the reader will ignore it.
1775 */
1776 if (tail == BUF_PAGE_SIZE)
1777 tail_page->real_end = 0;
1778
1743 local_sub(length, &tail_page->write); 1779 local_sub(length, &tail_page->write);
1744 return; 1780 return;
1745 } 1781 }
@@ -1748,6 +1784,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1748 kmemcheck_annotate_bitfield(event, bitfield); 1784 kmemcheck_annotate_bitfield(event, bitfield);
1749 1785
1750 /* 1786 /*
1787 * Save the original length to the meta data.
1788 * This will be used by the reader to add lost event
1789 * counter.
1790 */
1791 tail_page->real_end = tail;
1792
1793 /*
1751 * If this event is bigger than the minimum size, then 1794 * If this event is bigger than the minimum size, then
1752 * we need to be careful that we don't subtract the 1795 * we need to be careful that we don't subtract the
1753 * write counter enough to allow another writer to slip 1796 * write counter enough to allow another writer to slip
@@ -1965,17 +2008,13 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1965 u64 *ts, u64 *delta) 2008 u64 *ts, u64 *delta)
1966{ 2009{
1967 struct ring_buffer_event *event; 2010 struct ring_buffer_event *event;
1968 static int once;
1969 int ret; 2011 int ret;
1970 2012
1971 if (unlikely(*delta > (1ULL << 59) && !once++)) { 2013 WARN_ONCE(*delta > (1ULL << 59),
1972 printk(KERN_WARNING "Delta way too big! %llu" 2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
1973 " ts=%llu write stamp = %llu\n", 2015 (unsigned long long)*delta,
1974 (unsigned long long)*delta, 2016 (unsigned long long)*ts,
1975 (unsigned long long)*ts, 2017 (unsigned long long)cpu_buffer->write_stamp);
1976 (unsigned long long)cpu_buffer->write_stamp);
1977 WARN_ON(1);
1978 }
1979 2018
1980 /* 2019 /*
1981 * The delta is too big, we to add a 2020 * The delta is too big, we to add a
@@ -2230,12 +2269,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2230 if (ring_buffer_flags != RB_BUFFERS_ON) 2269 if (ring_buffer_flags != RB_BUFFERS_ON)
2231 return NULL; 2270 return NULL;
2232 2271
2233 if (atomic_read(&buffer->record_disabled))
2234 return NULL;
2235
2236 /* If we are tracing schedule, we don't want to recurse */ 2272 /* If we are tracing schedule, we don't want to recurse */
2237 resched = ftrace_preempt_disable(); 2273 resched = ftrace_preempt_disable();
2238 2274
2275 if (atomic_read(&buffer->record_disabled))
2276 goto out_nocheck;
2277
2239 if (trace_recursive_lock()) 2278 if (trace_recursive_lock())
2240 goto out_nocheck; 2279 goto out_nocheck;
2241 2280
@@ -2467,11 +2506,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2467 if (ring_buffer_flags != RB_BUFFERS_ON) 2506 if (ring_buffer_flags != RB_BUFFERS_ON)
2468 return -EBUSY; 2507 return -EBUSY;
2469 2508
2470 if (atomic_read(&buffer->record_disabled))
2471 return -EBUSY;
2472
2473 resched = ftrace_preempt_disable(); 2509 resched = ftrace_preempt_disable();
2474 2510
2511 if (atomic_read(&buffer->record_disabled))
2512 goto out;
2513
2475 cpu = raw_smp_processor_id(); 2514 cpu = raw_smp_processor_id();
2476 2515
2477 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2516 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2539,7 +2578,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2539 * @buffer: The ring buffer to enable writes 2578 * @buffer: The ring buffer to enable writes
2540 * 2579 *
2541 * Note, multiple disables will need the same number of enables 2580 * Note, multiple disables will need the same number of enables
2542 * to truely enable the writing (much like preempt_disable). 2581 * to truly enable the writing (much like preempt_disable).
2543 */ 2582 */
2544void ring_buffer_record_enable(struct ring_buffer *buffer) 2583void ring_buffer_record_enable(struct ring_buffer *buffer)
2545{ 2584{
@@ -2575,7 +2614,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2575 * @cpu: The CPU to enable. 2614 * @cpu: The CPU to enable.
2576 * 2615 *
2577 * Note, multiple disables will need the same number of enables 2616 * Note, multiple disables will need the same number of enables
2578 * to truely enable the writing (much like preempt_disable). 2617 * to truly enable the writing (much like preempt_disable).
2579 */ 2618 */
2580void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2619void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2581{ 2620{
@@ -2716,6 +2755,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
2716 iter->read_stamp = cpu_buffer->read_stamp; 2755 iter->read_stamp = cpu_buffer->read_stamp;
2717 else 2756 else
2718 iter->read_stamp = iter->head_page->page->time_stamp; 2757 iter->read_stamp = iter->head_page->page->time_stamp;
2758 iter->cache_reader_page = cpu_buffer->reader_page;
2759 iter->cache_read = cpu_buffer->read;
2719} 2760}
2720 2761
2721/** 2762/**
@@ -2822,6 +2863,7 @@ static struct buffer_page *
2822rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 2863rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2823{ 2864{
2824 struct buffer_page *reader = NULL; 2865 struct buffer_page *reader = NULL;
2866 unsigned long overwrite;
2825 unsigned long flags; 2867 unsigned long flags;
2826 int nr_loops = 0; 2868 int nr_loops = 0;
2827 int ret; 2869 int ret;
@@ -2863,6 +2905,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2863 local_set(&cpu_buffer->reader_page->write, 0); 2905 local_set(&cpu_buffer->reader_page->write, 0);
2864 local_set(&cpu_buffer->reader_page->entries, 0); 2906 local_set(&cpu_buffer->reader_page->entries, 0);
2865 local_set(&cpu_buffer->reader_page->page->commit, 0); 2907 local_set(&cpu_buffer->reader_page->page->commit, 0);
2908 cpu_buffer->reader_page->real_end = 0;
2866 2909
2867 spin: 2910 spin:
2868 /* 2911 /*
@@ -2883,6 +2926,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2883 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list); 2926 rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
2884 2927
2885 /* 2928 /*
2929 * We want to make sure we read the overruns after we set up our
2930 * pointers to the next object. The writer side does a
2931 * cmpxchg to cross pages which acts as the mb on the writer
2932 * side. Note, the reader will constantly fail the swap
2933 * while the writer is updating the pointers, so this
2934 * guarantees that the overwrite recorded here is the one we
2935 * want to compare with the last_overrun.
2936 */
2937 smp_mb();
2938 overwrite = local_read(&(cpu_buffer->overrun));
2939
2940 /*
2886 * Here's the tricky part. 2941 * Here's the tricky part.
2887 * 2942 *
2888 * We need to move the pointer past the header page. 2943 * We need to move the pointer past the header page.
@@ -2913,6 +2968,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
2913 cpu_buffer->reader_page = reader; 2968 cpu_buffer->reader_page = reader;
2914 rb_reset_reader_page(cpu_buffer); 2969 rb_reset_reader_page(cpu_buffer);
2915 2970
2971 if (overwrite != cpu_buffer->last_overrun) {
2972 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
2973 cpu_buffer->last_overrun = overwrite;
2974 }
2975
2916 goto again; 2976 goto again;
2917 2977
2918 out: 2978 out:
@@ -2989,8 +3049,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
2989 rb_advance_iter(iter); 3049 rb_advance_iter(iter);
2990} 3050}
2991 3051
3052static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
3053{
3054 return cpu_buffer->lost_events;
3055}
3056
2992static struct ring_buffer_event * 3057static struct ring_buffer_event *
2993rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts) 3058rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3059 unsigned long *lost_events)
2994{ 3060{
2995 struct ring_buffer_event *event; 3061 struct ring_buffer_event *event;
2996 struct buffer_page *reader; 3062 struct buffer_page *reader;
@@ -3042,6 +3108,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
3042 ring_buffer_normalize_time_stamp(cpu_buffer->buffer, 3108 ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
3043 cpu_buffer->cpu, ts); 3109 cpu_buffer->cpu, ts);
3044 } 3110 }
3111 if (lost_events)
3112 *lost_events = rb_lost_events(cpu_buffer);
3045 return event; 3113 return event;
3046 3114
3047 default: 3115 default:
@@ -3060,13 +3128,22 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3060 struct ring_buffer_event *event; 3128 struct ring_buffer_event *event;
3061 int nr_loops = 0; 3129 int nr_loops = 0;
3062 3130
3063 if (ring_buffer_iter_empty(iter))
3064 return NULL;
3065
3066 cpu_buffer = iter->cpu_buffer; 3131 cpu_buffer = iter->cpu_buffer;
3067 buffer = cpu_buffer->buffer; 3132 buffer = cpu_buffer->buffer;
3068 3133
3134 /*
3135 * Check if someone performed a consuming read to
3136 * the buffer. A consuming read invalidates the iterator
3137 * and we need to reset the iterator in this case.
3138 */
3139 if (unlikely(iter->cache_read != cpu_buffer->read ||
3140 iter->cache_reader_page != cpu_buffer->reader_page))
3141 rb_iter_reset(iter);
3142
3069 again: 3143 again:
3144 if (ring_buffer_iter_empty(iter))
3145 return NULL;
3146
3070 /* 3147 /*
3071 * We repeat when a timestamp is encountered. 3148 * We repeat when a timestamp is encountered.
3072 * We can get multiple timestamps by nested interrupts or also 3149 * We can get multiple timestamps by nested interrupts or also
@@ -3081,6 +3158,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3081 if (rb_per_cpu_empty(cpu_buffer)) 3158 if (rb_per_cpu_empty(cpu_buffer))
3082 return NULL; 3159 return NULL;
3083 3160
3161 if (iter->head >= local_read(&iter->head_page->page->commit)) {
3162 rb_inc_iter(iter);
3163 goto again;
3164 }
3165
3084 event = rb_iter_head_event(iter); 3166 event = rb_iter_head_event(iter);
3085 3167
3086 switch (event->type_len) { 3168 switch (event->type_len) {
@@ -3138,12 +3220,14 @@ static inline int rb_ok_to_lock(void)
3138 * @buffer: The ring buffer to read 3220 * @buffer: The ring buffer to read
3139 * @cpu: The cpu to peak at 3221 * @cpu: The cpu to peak at
3140 * @ts: The timestamp counter of this event. 3222 * @ts: The timestamp counter of this event.
3223 * @lost_events: a variable to store if events were lost (may be NULL)
3141 * 3224 *
3142 * This will return the event that will be read next, but does 3225 * This will return the event that will be read next, but does
3143 * not consume the data. 3226 * not consume the data.
3144 */ 3227 */
3145struct ring_buffer_event * 3228struct ring_buffer_event *
3146ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) 3229ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
3230 unsigned long *lost_events)
3147{ 3231{
3148 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 3232 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
3149 struct ring_buffer_event *event; 3233 struct ring_buffer_event *event;
@@ -3158,7 +3242,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
3158 local_irq_save(flags); 3242 local_irq_save(flags);
3159 if (dolock) 3243 if (dolock)
3160 spin_lock(&cpu_buffer->reader_lock); 3244 spin_lock(&cpu_buffer->reader_lock);
3161 event = rb_buffer_peek(cpu_buffer, ts); 3245 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3162 if (event && event->type_len == RINGBUF_TYPE_PADDING) 3246 if (event && event->type_len == RINGBUF_TYPE_PADDING)
3163 rb_advance_reader(cpu_buffer); 3247 rb_advance_reader(cpu_buffer);
3164 if (dolock) 3248 if (dolock)
@@ -3200,13 +3284,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3200/** 3284/**
3201 * ring_buffer_consume - return an event and consume it 3285 * ring_buffer_consume - return an event and consume it
3202 * @buffer: The ring buffer to get the next event from 3286 * @buffer: The ring buffer to get the next event from
3287 * @cpu: the cpu to read the buffer from
3288 * @ts: a variable to store the timestamp (may be NULL)
3289 * @lost_events: a variable to store if events were lost (may be NULL)
3203 * 3290 *
3204 * Returns the next event in the ring buffer, and that event is consumed. 3291 * Returns the next event in the ring buffer, and that event is consumed.
3205 * Meaning, that sequential reads will keep returning a different event, 3292 * Meaning, that sequential reads will keep returning a different event,
3206 * and eventually empty the ring buffer if the producer is slower. 3293 * and eventually empty the ring buffer if the producer is slower.
3207 */ 3294 */
3208struct ring_buffer_event * 3295struct ring_buffer_event *
3209ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 3296ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
3297 unsigned long *lost_events)
3210{ 3298{
3211 struct ring_buffer_per_cpu *cpu_buffer; 3299 struct ring_buffer_per_cpu *cpu_buffer;
3212 struct ring_buffer_event *event = NULL; 3300 struct ring_buffer_event *event = NULL;
@@ -3227,9 +3315,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3227 if (dolock) 3315 if (dolock)
3228 spin_lock(&cpu_buffer->reader_lock); 3316 spin_lock(&cpu_buffer->reader_lock);
3229 3317
3230 event = rb_buffer_peek(cpu_buffer, ts); 3318 event = rb_buffer_peek(cpu_buffer, ts, lost_events);
3231 if (event) 3319 if (event) {
3320 cpu_buffer->lost_events = 0;
3232 rb_advance_reader(cpu_buffer); 3321 rb_advance_reader(cpu_buffer);
3322 }
3233 3323
3234 if (dolock) 3324 if (dolock)
3235 spin_unlock(&cpu_buffer->reader_lock); 3325 spin_unlock(&cpu_buffer->reader_lock);
@@ -3246,23 +3336,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
3246EXPORT_SYMBOL_GPL(ring_buffer_consume); 3336EXPORT_SYMBOL_GPL(ring_buffer_consume);
3247 3337
3248/** 3338/**
3249 * ring_buffer_read_start - start a non consuming read of the buffer 3339 * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
3250 * @buffer: The ring buffer to read from 3340 * @buffer: The ring buffer to read from
3251 * @cpu: The cpu buffer to iterate over 3341 * @cpu: The cpu buffer to iterate over
3252 * 3342 *
3253 * This starts up an iteration through the buffer. It also disables 3343 * This performs the initial preparations necessary to iterate
3254 * the recording to the buffer until the reading is finished. 3344 * through the buffer. Memory is allocated, buffer recording
3255 * This prevents the reading from being corrupted. This is not 3345 * is disabled, and the iterator pointer is returned to the caller.
3256 * a consuming read, so a producer is not expected.
3257 * 3346 *
3258 * Must be paired with ring_buffer_finish. 3347 * Disabling buffer recordng prevents the reading from being
3348 * corrupted. This is not a consuming read, so a producer is not
3349 * expected.
3350 *
3351 * After a sequence of ring_buffer_read_prepare calls, the user is
3352 * expected to make at least one call to ring_buffer_prepare_sync.
3353 * Afterwards, ring_buffer_read_start is invoked to get things going
3354 * for real.
3355 *
3356 * This overall must be paired with ring_buffer_finish.
3259 */ 3357 */
3260struct ring_buffer_iter * 3358struct ring_buffer_iter *
3261ring_buffer_read_start(struct ring_buffer *buffer, int cpu) 3359ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
3262{ 3360{
3263 struct ring_buffer_per_cpu *cpu_buffer; 3361 struct ring_buffer_per_cpu *cpu_buffer;
3264 struct ring_buffer_iter *iter; 3362 struct ring_buffer_iter *iter;
3265 unsigned long flags;
3266 3363
3267 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 3364 if (!cpumask_test_cpu(cpu, buffer->cpumask))
3268 return NULL; 3365 return NULL;
@@ -3276,15 +3373,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
3276 iter->cpu_buffer = cpu_buffer; 3373 iter->cpu_buffer = cpu_buffer;
3277 3374
3278 atomic_inc(&cpu_buffer->record_disabled); 3375 atomic_inc(&cpu_buffer->record_disabled);
3376
3377 return iter;
3378}
3379EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
3380
3381/**
3382 * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
3383 *
3384 * All previously invoked ring_buffer_read_prepare calls to prepare
3385 * iterators will be synchronized. Afterwards, read_buffer_read_start
3386 * calls on those iterators are allowed.
3387 */
3388void
3389ring_buffer_read_prepare_sync(void)
3390{
3279 synchronize_sched(); 3391 synchronize_sched();
3392}
3393EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
3394
3395/**
3396 * ring_buffer_read_start - start a non consuming read of the buffer
3397 * @iter: The iterator returned by ring_buffer_read_prepare
3398 *
3399 * This finalizes the startup of an iteration through the buffer.
3400 * The iterator comes from a call to ring_buffer_read_prepare and
3401 * an intervening ring_buffer_read_prepare_sync must have been
3402 * performed.
3403 *
3404 * Must be paired with ring_buffer_finish.
3405 */
3406void
3407ring_buffer_read_start(struct ring_buffer_iter *iter)
3408{
3409 struct ring_buffer_per_cpu *cpu_buffer;
3410 unsigned long flags;
3411
3412 if (!iter)
3413 return;
3414
3415 cpu_buffer = iter->cpu_buffer;
3280 3416
3281 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 3417 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
3282 arch_spin_lock(&cpu_buffer->lock); 3418 arch_spin_lock(&cpu_buffer->lock);
3283 rb_iter_reset(iter); 3419 rb_iter_reset(iter);
3284 arch_spin_unlock(&cpu_buffer->lock); 3420 arch_spin_unlock(&cpu_buffer->lock);
3285 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3421 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3286
3287 return iter;
3288} 3422}
3289EXPORT_SYMBOL_GPL(ring_buffer_read_start); 3423EXPORT_SYMBOL_GPL(ring_buffer_read_start);
3290 3424
@@ -3378,6 +3512,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
3378 cpu_buffer->write_stamp = 0; 3512 cpu_buffer->write_stamp = 0;
3379 cpu_buffer->read_stamp = 0; 3513 cpu_buffer->read_stamp = 0;
3380 3514
3515 cpu_buffer->lost_events = 0;
3516 cpu_buffer->last_overrun = 0;
3517
3381 rb_head_page_activate(cpu_buffer); 3518 rb_head_page_activate(cpu_buffer);
3382} 3519}
3383 3520
@@ -3653,6 +3790,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3653 struct ring_buffer_event *event; 3790 struct ring_buffer_event *event;
3654 struct buffer_data_page *bpage; 3791 struct buffer_data_page *bpage;
3655 struct buffer_page *reader; 3792 struct buffer_page *reader;
3793 unsigned long missed_events;
3656 unsigned long flags; 3794 unsigned long flags;
3657 unsigned int commit; 3795 unsigned int commit;
3658 unsigned int read; 3796 unsigned int read;
@@ -3689,6 +3827,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3689 read = reader->read; 3827 read = reader->read;
3690 commit = rb_page_commit(reader); 3828 commit = rb_page_commit(reader);
3691 3829
3830 /* Check if any events were dropped */
3831 missed_events = cpu_buffer->lost_events;
3832
3692 /* 3833 /*
3693 * If this page has been partially read or 3834 * If this page has been partially read or
3694 * if len is not big enough to read the rest of the page or 3835 * if len is not big enough to read the rest of the page or
@@ -3749,9 +3890,42 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3749 local_set(&reader->entries, 0); 3890 local_set(&reader->entries, 0);
3750 reader->read = 0; 3891 reader->read = 0;
3751 *data_page = bpage; 3892 *data_page = bpage;
3893
3894 /*
3895 * Use the real_end for the data size,
3896 * This gives us a chance to store the lost events
3897 * on the page.
3898 */
3899 if (reader->real_end)
3900 local_set(&bpage->commit, reader->real_end);
3752 } 3901 }
3753 ret = read; 3902 ret = read;
3754 3903
3904 cpu_buffer->lost_events = 0;
3905
3906 commit = local_read(&bpage->commit);
3907 /*
3908 * Set a flag in the commit field if we lost events
3909 */
3910 if (missed_events) {
3911 /* If there is room at the end of the page to save the
3912 * missed events, then record it there.
3913 */
3914 if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
3915 memcpy(&bpage->data[commit], &missed_events,
3916 sizeof(missed_events));
3917 local_add(RB_MISSED_STORED, &bpage->commit);
3918 commit += sizeof(missed_events);
3919 }
3920 local_add(RB_MISSED_EVENTS, &bpage->commit);
3921 }
3922
3923 /*
3924 * This page may be off to user land. Zero it out here.
3925 */
3926 if (commit < BUF_PAGE_SIZE)
3927 memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
3928
3755 out_unlock: 3929 out_unlock:
3756 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 3930 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
3757 3931
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index b2477caf09c2..302f8a614635 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -8,6 +8,7 @@
8#include <linux/kthread.h> 8#include <linux/kthread.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <asm/local.h>
11 12
12struct rb_page { 13struct rb_page {
13 u64 ts; 14 u64 ts;
@@ -80,7 +81,7 @@ static enum event_status read_event(int cpu)
80 int *entry; 81 int *entry;
81 u64 ts; 82 u64 ts;
82 83
83 event = ring_buffer_consume(buffer, cpu, &ts); 84 event = ring_buffer_consume(buffer, cpu, &ts, NULL);
84 if (!event) 85 if (!event)
85 return EVENT_DROPPED; 86 return EVENT_DROPPED;
86 87
@@ -112,7 +113,8 @@ static enum event_status read_page(int cpu)
112 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1); 113 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
113 if (ret >= 0) { 114 if (ret >= 0) {
114 rpage = bpage; 115 rpage = bpage;
115 commit = local_read(&rpage->commit); 116 /* The commit may have missed event flags set, clear them */
117 commit = local_read(&rpage->commit) & 0xfffff;
116 for (i = 0; i < commit && !kill_test; i += inc) { 118 for (i = 0; i < commit && !kill_test; i += inc) {
117 119
118 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) { 120 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..086d36316805 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,10 +32,11 @@
32#include <linux/splice.h> 32#include <linux/splice.h>
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h>
36#include <linux/slab.h>
35#include <linux/ctype.h> 37#include <linux/ctype.h>
36#include <linux/init.h> 38#include <linux/init.h>
37#include <linux/poll.h> 39#include <linux/poll.h>
38#include <linux/gfp.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
40 41
41#include "trace.h" 42#include "trace.h"
@@ -91,20 +92,17 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled);
91static inline void ftrace_disable_cpu(void) 92static inline void ftrace_disable_cpu(void)
92{ 93{
93 preempt_disable(); 94 preempt_disable();
94 __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); 95 __this_cpu_inc(ftrace_cpu_disabled);
95} 96}
96 97
97static inline void ftrace_enable_cpu(void) 98static inline void ftrace_enable_cpu(void)
98{ 99{
99 __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); 100 __this_cpu_dec(ftrace_cpu_disabled);
100 preempt_enable(); 101 preempt_enable();
101} 102}
102 103
103static cpumask_var_t __read_mostly tracing_buffer_mask; 104static cpumask_var_t __read_mostly tracing_buffer_mask;
104 105
105/* Define which cpu buffers are currently read in trace_pipe */
106static cpumask_var_t tracing_reader_cpumask;
107
108#define for_each_tracing_cpu(cpu) \ 106#define for_each_tracing_cpu(cpu) \
109 for_each_cpu(cpu, tracing_buffer_mask) 107 for_each_cpu(cpu, tracing_buffer_mask)
110 108
@@ -119,9 +117,12 @@ static cpumask_var_t tracing_reader_cpumask;
119 * 117 *
120 * It is default off, but you can enable it with either specifying 118 * It is default off, but you can enable it with either specifying
121 * "ftrace_dump_on_oops" in the kernel command line, or setting 119 * "ftrace_dump_on_oops" in the kernel command line, or setting
122 * /proc/sys/kernel/ftrace_dump_on_oops to true. 120 * /proc/sys/kernel/ftrace_dump_on_oops
121 * Set 1 if you want to dump buffers of all CPUs
122 * Set 2 if you want to dump the buffer of the CPU that triggered oops
123 */ 123 */
124int ftrace_dump_on_oops; 124
125enum ftrace_dump_mode ftrace_dump_on_oops;
125 126
126static int tracing_set_tracer(const char *buf); 127static int tracing_set_tracer(const char *buf);
127 128
@@ -141,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace);
141 142
142static int __init set_ftrace_dump_on_oops(char *str) 143static int __init set_ftrace_dump_on_oops(char *str)
143{ 144{
144 ftrace_dump_on_oops = 1; 145 if (*str++ != '=' || !*str) {
145 return 1; 146 ftrace_dump_on_oops = DUMP_ALL;
147 return 1;
148 }
149
150 if (!strcmp("orig_cpu", str)) {
151 ftrace_dump_on_oops = DUMP_ORIG;
152 return 1;
153 }
154
155 return 0;
146} 156}
147__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 157__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
148 158
@@ -243,12 +253,91 @@ static struct tracer *current_trace __read_mostly;
243 253
244/* 254/*
245 * trace_types_lock is used to protect the trace_types list. 255 * trace_types_lock is used to protect the trace_types list.
246 * This lock is also used to keep user access serialized.
247 * Accesses from userspace will grab this lock while userspace
248 * activities happen inside the kernel.
249 */ 256 */
250static DEFINE_MUTEX(trace_types_lock); 257static DEFINE_MUTEX(trace_types_lock);
251 258
259/*
260 * serialize the access of the ring buffer
261 *
262 * ring buffer serializes readers, but it is low level protection.
263 * The validity of the events (which returns by ring_buffer_peek() ..etc)
264 * are not protected by ring buffer.
265 *
266 * The content of events may become garbage if we allow other process consumes
267 * these events concurrently:
268 * A) the page of the consumed events may become a normal page
269 * (not reader page) in ring buffer, and this page will be rewrited
270 * by events producer.
271 * B) The page of the consumed events may become a page for splice_read,
272 * and this page will be returned to system.
273 *
274 * These primitives allow multi process access to different cpu ring buffer
275 * concurrently.
276 *
277 * These primitives don't distinguish read-only and read-consume access.
278 * Multi read-only access are also serialized.
279 */
280
281#ifdef CONFIG_SMP
282static DECLARE_RWSEM(all_cpu_access_lock);
283static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
284
285static inline void trace_access_lock(int cpu)
286{
287 if (cpu == TRACE_PIPE_ALL_CPU) {
288 /* gain it for accessing the whole ring buffer. */
289 down_write(&all_cpu_access_lock);
290 } else {
291 /* gain it for accessing a cpu ring buffer. */
292
293 /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
294 down_read(&all_cpu_access_lock);
295
296 /* Secondly block other access to this @cpu ring buffer. */
297 mutex_lock(&per_cpu(cpu_access_lock, cpu));
298 }
299}
300
301static inline void trace_access_unlock(int cpu)
302{
303 if (cpu == TRACE_PIPE_ALL_CPU) {
304 up_write(&all_cpu_access_lock);
305 } else {
306 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
307 up_read(&all_cpu_access_lock);
308 }
309}
310
311static inline void trace_access_lock_init(void)
312{
313 int cpu;
314
315 for_each_possible_cpu(cpu)
316 mutex_init(&per_cpu(cpu_access_lock, cpu));
317}
318
319#else
320
321static DEFINE_MUTEX(access_lock);
322
323static inline void trace_access_lock(int cpu)
324{
325 (void)cpu;
326 mutex_lock(&access_lock);
327}
328
329static inline void trace_access_unlock(int cpu)
330{
331 (void)cpu;
332 mutex_unlock(&access_lock);
333}
334
335static inline void trace_access_lock_init(void)
336{
337}
338
339#endif
340
252/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 341/* trace_wait is a waitqueue for tasks blocked on trace_poll */
253static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 342static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
254 343
@@ -297,6 +386,21 @@ static int __init set_buf_size(char *str)
297} 386}
298__setup("trace_buf_size=", set_buf_size); 387__setup("trace_buf_size=", set_buf_size);
299 388
389static int __init set_tracing_thresh(char *str)
390{
391 unsigned long threshhold;
392 int ret;
393
394 if (!str)
395 return 0;
396 ret = strict_strtoul(str, 0, &threshhold);
397 if (ret < 0)
398 return 0;
399 tracing_thresh = threshhold * 1000;
400 return 1;
401}
402__setup("tracing_thresh=", set_tracing_thresh);
403
300unsigned long nsecs_to_usecs(unsigned long nsecs) 404unsigned long nsecs_to_usecs(unsigned long nsecs)
301{ 405{
302 return nsecs / 1000; 406 return nsecs / 1000;
@@ -502,9 +606,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
502static arch_spinlock_t ftrace_max_lock = 606static arch_spinlock_t ftrace_max_lock =
503 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 607 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
504 608
609unsigned long __read_mostly tracing_thresh;
610
505#ifdef CONFIG_TRACER_MAX_TRACE 611#ifdef CONFIG_TRACER_MAX_TRACE
506unsigned long __read_mostly tracing_max_latency; 612unsigned long __read_mostly tracing_max_latency;
507unsigned long __read_mostly tracing_thresh;
508 613
509/* 614/*
510 * Copy the new maximum trace into the separate maximum-trace 615 * Copy the new maximum trace into the separate maximum-trace
@@ -515,7 +620,7 @@ static void
515__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 620__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
516{ 621{
517 struct trace_array_cpu *data = tr->data[cpu]; 622 struct trace_array_cpu *data = tr->data[cpu];
518 struct trace_array_cpu *max_data = tr->data[cpu]; 623 struct trace_array_cpu *max_data;
519 624
520 max_tr.cpu = cpu; 625 max_tr.cpu = cpu;
521 max_tr.time_start = data->preempt_timestamp; 626 max_tr.time_start = data->preempt_timestamp;
@@ -525,7 +630,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
525 max_data->critical_start = data->critical_start; 630 max_data->critical_start = data->critical_start;
526 max_data->critical_end = data->critical_end; 631 max_data->critical_end = data->critical_end;
527 632
528 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 633 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
529 max_data->pid = tsk->pid; 634 max_data->pid = tsk->pid;
530 max_data->uid = task_uid(tsk); 635 max_data->uid = task_uid(tsk);
531 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 636 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -747,10 +852,10 @@ out:
747 mutex_unlock(&trace_types_lock); 852 mutex_unlock(&trace_types_lock);
748} 853}
749 854
750static void __tracing_reset(struct trace_array *tr, int cpu) 855static void __tracing_reset(struct ring_buffer *buffer, int cpu)
751{ 856{
752 ftrace_disable_cpu(); 857 ftrace_disable_cpu();
753 ring_buffer_reset_cpu(tr->buffer, cpu); 858 ring_buffer_reset_cpu(buffer, cpu);
754 ftrace_enable_cpu(); 859 ftrace_enable_cpu();
755} 860}
756 861
@@ -762,7 +867,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
762 867
763 /* Make sure all commits have finished */ 868 /* Make sure all commits have finished */
764 synchronize_sched(); 869 synchronize_sched();
765 __tracing_reset(tr, cpu); 870 __tracing_reset(buffer, cpu);
766 871
767 ring_buffer_record_enable(buffer); 872 ring_buffer_record_enable(buffer);
768} 873}
@@ -780,7 +885,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
780 tr->time_start = ftrace_now(tr->cpu); 885 tr->time_start = ftrace_now(tr->cpu);
781 886
782 for_each_online_cpu(cpu) 887 for_each_online_cpu(cpu)
783 __tracing_reset(tr, cpu); 888 __tracing_reset(buffer, cpu);
784 889
785 ring_buffer_record_enable(buffer); 890 ring_buffer_record_enable(buffer);
786} 891}
@@ -857,6 +962,8 @@ void tracing_start(void)
857 goto out; 962 goto out;
858 } 963 }
859 964
965 /* Prevent the buffers from switching */
966 arch_spin_lock(&ftrace_max_lock);
860 967
861 buffer = global_trace.buffer; 968 buffer = global_trace.buffer;
862 if (buffer) 969 if (buffer)
@@ -866,6 +973,8 @@ void tracing_start(void)
866 if (buffer) 973 if (buffer)
867 ring_buffer_record_enable(buffer); 974 ring_buffer_record_enable(buffer);
868 975
976 arch_spin_unlock(&ftrace_max_lock);
977
869 ftrace_start(); 978 ftrace_start();
870 out: 979 out:
871 spin_unlock_irqrestore(&tracing_start_lock, flags); 980 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -887,6 +996,9 @@ void tracing_stop(void)
887 if (trace_stop_count++) 996 if (trace_stop_count++)
888 goto out; 997 goto out;
889 998
999 /* Prevent the buffers from switching */
1000 arch_spin_lock(&ftrace_max_lock);
1001
890 buffer = global_trace.buffer; 1002 buffer = global_trace.buffer;
891 if (buffer) 1003 if (buffer)
892 ring_buffer_record_disable(buffer); 1004 ring_buffer_record_disable(buffer);
@@ -895,6 +1007,8 @@ void tracing_stop(void)
895 if (buffer) 1007 if (buffer)
896 ring_buffer_record_disable(buffer); 1008 ring_buffer_record_disable(buffer);
897 1009
1010 arch_spin_unlock(&ftrace_max_lock);
1011
898 out: 1012 out:
899 spin_unlock_irqrestore(&tracing_start_lock, flags); 1013 spin_unlock_irqrestore(&tracing_start_lock, flags);
900} 1014}
@@ -951,6 +1065,11 @@ void trace_find_cmdline(int pid, char comm[])
951 return; 1065 return;
952 } 1066 }
953 1067
1068 if (WARN_ON_ONCE(pid < 0)) {
1069 strcpy(comm, "<XXX>");
1070 return;
1071 }
1072
954 if (pid > PID_MAX_DEFAULT) { 1073 if (pid > PID_MAX_DEFAULT) {
955 strcpy(comm, "<...>"); 1074 strcpy(comm, "<...>");
956 return; 1075 return;
@@ -1084,7 +1203,7 @@ trace_function(struct trace_array *tr,
1084 struct ftrace_entry *entry; 1203 struct ftrace_entry *entry;
1085 1204
1086 /* If we are reading the ring buffer, don't trace */ 1205 /* If we are reading the ring buffer, don't trace */
1087 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 1206 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1088 return; 1207 return;
1089 1208
1090 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 1209 event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
@@ -1177,6 +1296,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1177 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1296 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1178 return; 1297 return;
1179 1298
1299 /*
1300 * NMIs can not handle page faults, even with fix ups.
1301 * The save user stack can (and often does) fault.
1302 */
1303 if (unlikely(in_nmi()))
1304 return;
1305
1180 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1306 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1181 sizeof(*entry), flags, pc); 1307 sizeof(*entry), flags, pc);
1182 if (!event) 1308 if (!event)
@@ -1315,8 +1441,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1315 entry->fmt = fmt; 1441 entry->fmt = fmt;
1316 1442
1317 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1443 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1318 if (!filter_check_discard(call, entry, buffer, event)) 1444 if (!filter_check_discard(call, entry, buffer, event)) {
1319 ring_buffer_unlock_commit(buffer, event); 1445 ring_buffer_unlock_commit(buffer, event);
1446 ftrace_trace_stack(buffer, flags, 6, pc);
1447 }
1320 1448
1321out_unlock: 1449out_unlock:
1322 arch_spin_unlock(&trace_buf_lock); 1450 arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1517,10 @@ int trace_array_vprintk(struct trace_array *tr,
1389 1517
1390 memcpy(&entry->buf, trace_buf, len); 1518 memcpy(&entry->buf, trace_buf, len);
1391 entry->buf[len] = '\0'; 1519 entry->buf[len] = '\0';
1392 if (!filter_check_discard(call, entry, buffer, event)) 1520 if (!filter_check_discard(call, entry, buffer, event)) {
1393 ring_buffer_unlock_commit(buffer, event); 1521 ring_buffer_unlock_commit(buffer, event);
1522 ftrace_trace_stack(buffer, irq_flags, 6, pc);
1523 }
1394 1524
1395 out_unlock: 1525 out_unlock:
1396 arch_spin_unlock(&trace_buf_lock); 1526 arch_spin_unlock(&trace_buf_lock);
@@ -1427,7 +1557,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
1427} 1557}
1428 1558
1429static struct trace_entry * 1559static struct trace_entry *
1430peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts) 1560peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1561 unsigned long *lost_events)
1431{ 1562{
1432 struct ring_buffer_event *event; 1563 struct ring_buffer_event *event;
1433 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu]; 1564 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1438,7 +1569,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1438 if (buf_iter) 1569 if (buf_iter)
1439 event = ring_buffer_iter_peek(buf_iter, ts); 1570 event = ring_buffer_iter_peek(buf_iter, ts);
1440 else 1571 else
1441 event = ring_buffer_peek(iter->tr->buffer, cpu, ts); 1572 event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
1573 lost_events);
1442 1574
1443 ftrace_enable_cpu(); 1575 ftrace_enable_cpu();
1444 1576
@@ -1446,10 +1578,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1446} 1578}
1447 1579
1448static struct trace_entry * 1580static struct trace_entry *
1449__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts) 1581__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
1582 unsigned long *missing_events, u64 *ent_ts)
1450{ 1583{
1451 struct ring_buffer *buffer = iter->tr->buffer; 1584 struct ring_buffer *buffer = iter->tr->buffer;
1452 struct trace_entry *ent, *next = NULL; 1585 struct trace_entry *ent, *next = NULL;
1586 unsigned long lost_events = 0, next_lost = 0;
1453 int cpu_file = iter->cpu_file; 1587 int cpu_file = iter->cpu_file;
1454 u64 next_ts = 0, ts; 1588 u64 next_ts = 0, ts;
1455 int next_cpu = -1; 1589 int next_cpu = -1;
@@ -1462,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1462 if (cpu_file > TRACE_PIPE_ALL_CPU) { 1596 if (cpu_file > TRACE_PIPE_ALL_CPU) {
1463 if (ring_buffer_empty_cpu(buffer, cpu_file)) 1597 if (ring_buffer_empty_cpu(buffer, cpu_file))
1464 return NULL; 1598 return NULL;
1465 ent = peek_next_entry(iter, cpu_file, ent_ts); 1599 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
1466 if (ent_cpu) 1600 if (ent_cpu)
1467 *ent_cpu = cpu_file; 1601 *ent_cpu = cpu_file;
1468 1602
@@ -1474,7 +1608,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1474 if (ring_buffer_empty_cpu(buffer, cpu)) 1608 if (ring_buffer_empty_cpu(buffer, cpu))
1475 continue; 1609 continue;
1476 1610
1477 ent = peek_next_entry(iter, cpu, &ts); 1611 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
1478 1612
1479 /* 1613 /*
1480 * Pick the entry with the smallest timestamp: 1614 * Pick the entry with the smallest timestamp:
@@ -1483,6 +1617,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1483 next = ent; 1617 next = ent;
1484 next_cpu = cpu; 1618 next_cpu = cpu;
1485 next_ts = ts; 1619 next_ts = ts;
1620 next_lost = lost_events;
1486 } 1621 }
1487 } 1622 }
1488 1623
@@ -1492,6 +1627,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1492 if (ent_ts) 1627 if (ent_ts)
1493 *ent_ts = next_ts; 1628 *ent_ts = next_ts;
1494 1629
1630 if (missing_events)
1631 *missing_events = next_lost;
1632
1495 return next; 1633 return next;
1496} 1634}
1497 1635
@@ -1499,13 +1637,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1499struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 1637struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1500 int *ent_cpu, u64 *ent_ts) 1638 int *ent_cpu, u64 *ent_ts)
1501{ 1639{
1502 return __find_next_entry(iter, ent_cpu, ent_ts); 1640 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
1503} 1641}
1504 1642
1505/* Find the next real entry, and increment the iterator to the next entry */ 1643/* Find the next real entry, and increment the iterator to the next entry */
1506static void *find_next_entry_inc(struct trace_iterator *iter) 1644static void *find_next_entry_inc(struct trace_iterator *iter)
1507{ 1645{
1508 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); 1646 iter->ent = __find_next_entry(iter, &iter->cpu,
1647 &iter->lost_events, &iter->ts);
1509 1648
1510 if (iter->ent) 1649 if (iter->ent)
1511 trace_iterator_increment(iter); 1650 trace_iterator_increment(iter);
@@ -1517,7 +1656,8 @@ static void trace_consume(struct trace_iterator *iter)
1517{ 1656{
1518 /* Don't allow ftrace to trace into the ring buffers */ 1657 /* Don't allow ftrace to trace into the ring buffers */
1519 ftrace_disable_cpu(); 1658 ftrace_disable_cpu();
1520 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts); 1659 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
1660 &iter->lost_events);
1521 ftrace_enable_cpu(); 1661 ftrace_enable_cpu();
1522} 1662}
1523 1663
@@ -1580,12 +1720,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
1580} 1720}
1581 1721
1582/* 1722/*
1583 * No necessary locking here. The worst thing which can
1584 * happen is loosing events consumed at the same time
1585 * by a trace_pipe reader.
1586 * Other than that, we don't risk to crash the ring buffer
1587 * because it serializes the readers.
1588 *
1589 * The current tracer is copied to avoid a global locking 1723 * The current tracer is copied to avoid a global locking
1590 * all around. 1724 * all around.
1591 */ 1725 */
@@ -1623,6 +1757,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1623 1757
1624 ftrace_enable_cpu(); 1758 ftrace_enable_cpu();
1625 1759
1760 iter->leftover = 0;
1626 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1761 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1627 ; 1762 ;
1628 1763
@@ -1640,12 +1775,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1640 } 1775 }
1641 1776
1642 trace_event_read_lock(); 1777 trace_event_read_lock();
1778 trace_access_lock(cpu_file);
1643 return p; 1779 return p;
1644} 1780}
1645 1781
1646static void s_stop(struct seq_file *m, void *p) 1782static void s_stop(struct seq_file *m, void *p)
1647{ 1783{
1784 struct trace_iterator *iter = m->private;
1785
1648 atomic_dec(&trace_record_cmdline_disabled); 1786 atomic_dec(&trace_record_cmdline_disabled);
1787 trace_access_unlock(iter->cpu_file);
1649 trace_event_read_unlock(); 1788 trace_event_read_unlock();
1650} 1789}
1651 1790
@@ -1669,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m)
1669} 1808}
1670 1809
1671 1810
1672static void 1811void
1673print_trace_header(struct seq_file *m, struct trace_iterator *iter) 1812print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1674{ 1813{
1675 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1814 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1797,7 +1936,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1797 } 1936 }
1798 1937
1799 if (event) 1938 if (event)
1800 return event->trace(iter, sym_flags); 1939 return event->funcs->trace(iter, sym_flags, event);
1801 1940
1802 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) 1941 if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1803 goto partial; 1942 goto partial;
@@ -1823,7 +1962,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1823 1962
1824 event = ftrace_find_event(entry->type); 1963 event = ftrace_find_event(entry->type);
1825 if (event) 1964 if (event)
1826 return event->raw(iter, 0); 1965 return event->funcs->raw(iter, 0, event);
1827 1966
1828 if (!trace_seq_printf(s, "%d ?\n", entry->type)) 1967 if (!trace_seq_printf(s, "%d ?\n", entry->type))
1829 goto partial; 1968 goto partial;
@@ -1850,7 +1989,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1850 1989
1851 event = ftrace_find_event(entry->type); 1990 event = ftrace_find_event(entry->type);
1852 if (event) { 1991 if (event) {
1853 enum print_line_t ret = event->hex(iter, 0); 1992 enum print_line_t ret = event->funcs->hex(iter, 0, event);
1854 if (ret != TRACE_TYPE_HANDLED) 1993 if (ret != TRACE_TYPE_HANDLED)
1855 return ret; 1994 return ret;
1856 } 1995 }
@@ -1875,10 +2014,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1875 } 2014 }
1876 2015
1877 event = ftrace_find_event(entry->type); 2016 event = ftrace_find_event(entry->type);
1878 return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; 2017 return event ? event->funcs->binary(iter, 0, event) :
2018 TRACE_TYPE_HANDLED;
1879} 2019}
1880 2020
1881static int trace_empty(struct trace_iterator *iter) 2021int trace_empty(struct trace_iterator *iter)
1882{ 2022{
1883 int cpu; 2023 int cpu;
1884 2024
@@ -1913,6 +2053,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1913{ 2053{
1914 enum print_line_t ret; 2054 enum print_line_t ret;
1915 2055
2056 if (iter->lost_events)
2057 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2058 iter->cpu, iter->lost_events);
2059
1916 if (iter->trace && iter->trace->print_line) { 2060 if (iter->trace && iter->trace->print_line) {
1917 ret = iter->trace->print_line(iter); 2061 ret = iter->trace->print_line(iter);
1918 if (ret != TRACE_TYPE_UNHANDLED) 2062 if (ret != TRACE_TYPE_UNHANDLED)
@@ -1941,6 +2085,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1941 return print_trace_fmt(iter); 2085 return print_trace_fmt(iter);
1942} 2086}
1943 2087
2088void trace_default_header(struct seq_file *m)
2089{
2090 struct trace_iterator *iter = m->private;
2091
2092 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2093 /* print nothing if the buffers are empty */
2094 if (trace_empty(iter))
2095 return;
2096 print_trace_header(m, iter);
2097 if (!(trace_flags & TRACE_ITER_VERBOSE))
2098 print_lat_help_header(m);
2099 } else {
2100 if (!(trace_flags & TRACE_ITER_VERBOSE))
2101 print_func_help_header(m);
2102 }
2103}
2104
1944static int s_show(struct seq_file *m, void *v) 2105static int s_show(struct seq_file *m, void *v)
1945{ 2106{
1946 struct trace_iterator *iter = v; 2107 struct trace_iterator *iter = v;
@@ -1953,17 +2114,9 @@ static int s_show(struct seq_file *m, void *v)
1953 } 2114 }
1954 if (iter->trace && iter->trace->print_header) 2115 if (iter->trace && iter->trace->print_header)
1955 iter->trace->print_header(m); 2116 iter->trace->print_header(m);
1956 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2117 else
1957 /* print nothing if the buffers are empty */ 2118 trace_default_header(m);
1958 if (trace_empty(iter)) 2119
1959 return 0;
1960 print_trace_header(m, iter);
1961 if (!(trace_flags & TRACE_ITER_VERBOSE))
1962 print_lat_help_header(m);
1963 } else {
1964 if (!(trace_flags & TRACE_ITER_VERBOSE))
1965 print_func_help_header(m);
1966 }
1967 } else if (iter->leftover) { 2120 } else if (iter->leftover) {
1968 /* 2121 /*
1969 * If we filled the seq_file buffer earlier, we 2122 * If we filled the seq_file buffer earlier, we
@@ -2049,15 +2202,20 @@ __tracing_open(struct inode *inode, struct file *file)
2049 2202
2050 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { 2203 if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
2051 for_each_tracing_cpu(cpu) { 2204 for_each_tracing_cpu(cpu) {
2052
2053 iter->buffer_iter[cpu] = 2205 iter->buffer_iter[cpu] =
2054 ring_buffer_read_start(iter->tr->buffer, cpu); 2206 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2207 }
2208 ring_buffer_read_prepare_sync();
2209 for_each_tracing_cpu(cpu) {
2210 ring_buffer_read_start(iter->buffer_iter[cpu]);
2055 tracing_iter_reset(iter, cpu); 2211 tracing_iter_reset(iter, cpu);
2056 } 2212 }
2057 } else { 2213 } else {
2058 cpu = iter->cpu_file; 2214 cpu = iter->cpu_file;
2059 iter->buffer_iter[cpu] = 2215 iter->buffer_iter[cpu] =
2060 ring_buffer_read_start(iter->tr->buffer, cpu); 2216 ring_buffer_read_prepare(iter->tr->buffer, cpu);
2217 ring_buffer_read_prepare_sync();
2218 ring_buffer_read_start(iter->buffer_iter[cpu]);
2061 tracing_iter_reset(iter, cpu); 2219 tracing_iter_reset(iter, cpu);
2062 } 2220 }
2063 2221
@@ -2836,22 +2994,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2836 2994
2837 mutex_lock(&trace_types_lock); 2995 mutex_lock(&trace_types_lock);
2838 2996
2839 /* We only allow one reader per cpu */
2840 if (cpu_file == TRACE_PIPE_ALL_CPU) {
2841 if (!cpumask_empty(tracing_reader_cpumask)) {
2842 ret = -EBUSY;
2843 goto out;
2844 }
2845 cpumask_setall(tracing_reader_cpumask);
2846 } else {
2847 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2848 cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2849 else {
2850 ret = -EBUSY;
2851 goto out;
2852 }
2853 }
2854
2855 /* create a buffer to store the information to pass to userspace */ 2997 /* create a buffer to store the information to pass to userspace */
2856 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 2998 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2857 if (!iter) { 2999 if (!iter) {
@@ -2907,12 +3049,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
2907 3049
2908 mutex_lock(&trace_types_lock); 3050 mutex_lock(&trace_types_lock);
2909 3051
2910 if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2911 cpumask_clear(tracing_reader_cpumask);
2912 else
2913 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2914
2915
2916 if (iter->trace->pipe_close) 3052 if (iter->trace->pipe_close)
2917 iter->trace->pipe_close(iter); 3053 iter->trace->pipe_close(iter);
2918 3054
@@ -3074,6 +3210,7 @@ waitagain:
3074 iter->pos = -1; 3210 iter->pos = -1;
3075 3211
3076 trace_event_read_lock(); 3212 trace_event_read_lock();
3213 trace_access_lock(iter->cpu_file);
3077 while (find_next_entry_inc(iter) != NULL) { 3214 while (find_next_entry_inc(iter) != NULL) {
3078 enum print_line_t ret; 3215 enum print_line_t ret;
3079 int len = iter->seq.len; 3216 int len = iter->seq.len;
@@ -3090,6 +3227,7 @@ waitagain:
3090 if (iter->seq.len >= cnt) 3227 if (iter->seq.len >= cnt)
3091 break; 3228 break;
3092 } 3229 }
3230 trace_access_unlock(iter->cpu_file);
3093 trace_event_read_unlock(); 3231 trace_event_read_unlock();
3094 3232
3095 /* Now copy what we have to the user */ 3233 /* Now copy what we have to the user */
@@ -3172,12 +3310,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3172 size_t len, 3310 size_t len,
3173 unsigned int flags) 3311 unsigned int flags)
3174{ 3312{
3175 struct page *pages[PIPE_BUFFERS]; 3313 struct page *pages_def[PIPE_DEF_BUFFERS];
3176 struct partial_page partial[PIPE_BUFFERS]; 3314 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3177 struct trace_iterator *iter = filp->private_data; 3315 struct trace_iterator *iter = filp->private_data;
3178 struct splice_pipe_desc spd = { 3316 struct splice_pipe_desc spd = {
3179 .pages = pages, 3317 .pages = pages_def,
3180 .partial = partial, 3318 .partial = partial_def,
3181 .nr_pages = 0, /* This gets updated below. */ 3319 .nr_pages = 0, /* This gets updated below. */
3182 .flags = flags, 3320 .flags = flags,
3183 .ops = &tracing_pipe_buf_ops, 3321 .ops = &tracing_pipe_buf_ops,
@@ -3188,6 +3326,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3188 size_t rem; 3326 size_t rem;
3189 unsigned int i; 3327 unsigned int i;
3190 3328
3329 if (splice_grow_spd(pipe, &spd))
3330 return -ENOMEM;
3331
3191 /* copy the tracer to avoid using a global lock all around */ 3332 /* copy the tracer to avoid using a global lock all around */
3192 mutex_lock(&trace_types_lock); 3333 mutex_lock(&trace_types_lock);
3193 if (unlikely(old_tracer != current_trace && current_trace)) { 3334 if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3215,40 +3356,44 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
3215 } 3356 }
3216 3357
3217 trace_event_read_lock(); 3358 trace_event_read_lock();
3359 trace_access_lock(iter->cpu_file);
3218 3360
3219 /* Fill as many pages as possible. */ 3361 /* Fill as many pages as possible. */
3220 for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { 3362 for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
3221 pages[i] = alloc_page(GFP_KERNEL); 3363 spd.pages[i] = alloc_page(GFP_KERNEL);
3222 if (!pages[i]) 3364 if (!spd.pages[i])
3223 break; 3365 break;
3224 3366
3225 rem = tracing_fill_pipe_page(rem, iter); 3367 rem = tracing_fill_pipe_page(rem, iter);
3226 3368
3227 /* Copy the data into the page, so we can start over. */ 3369 /* Copy the data into the page, so we can start over. */
3228 ret = trace_seq_to_buffer(&iter->seq, 3370 ret = trace_seq_to_buffer(&iter->seq,
3229 page_address(pages[i]), 3371 page_address(spd.pages[i]),
3230 iter->seq.len); 3372 iter->seq.len);
3231 if (ret < 0) { 3373 if (ret < 0) {
3232 __free_page(pages[i]); 3374 __free_page(spd.pages[i]);
3233 break; 3375 break;
3234 } 3376 }
3235 partial[i].offset = 0; 3377 spd.partial[i].offset = 0;
3236 partial[i].len = iter->seq.len; 3378 spd.partial[i].len = iter->seq.len;
3237 3379
3238 trace_seq_init(&iter->seq); 3380 trace_seq_init(&iter->seq);
3239 } 3381 }
3240 3382
3383 trace_access_unlock(iter->cpu_file);
3241 trace_event_read_unlock(); 3384 trace_event_read_unlock();
3242 mutex_unlock(&iter->mutex); 3385 mutex_unlock(&iter->mutex);
3243 3386
3244 spd.nr_pages = i; 3387 spd.nr_pages = i;
3245 3388
3246 return splice_to_pipe(pipe, &spd); 3389 ret = splice_to_pipe(pipe, &spd);
3390out:
3391 splice_shrink_spd(pipe, &spd);
3392 return ret;
3247 3393
3248out_err: 3394out_err:
3249 mutex_unlock(&iter->mutex); 3395 mutex_unlock(&iter->mutex);
3250 3396 goto out;
3251 return ret;
3252} 3397}
3253 3398
3254static ssize_t 3399static ssize_t
@@ -3521,7 +3666,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3521 size_t count, loff_t *ppos) 3666 size_t count, loff_t *ppos)
3522{ 3667{
3523 struct ftrace_buffer_info *info = filp->private_data; 3668 struct ftrace_buffer_info *info = filp->private_data;
3524 unsigned int pos;
3525 ssize_t ret; 3669 ssize_t ret;
3526 size_t size; 3670 size_t size;
3527 3671
@@ -3539,18 +3683,15 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3539 3683
3540 info->read = 0; 3684 info->read = 0;
3541 3685
3686 trace_access_lock(info->cpu);
3542 ret = ring_buffer_read_page(info->tr->buffer, 3687 ret = ring_buffer_read_page(info->tr->buffer,
3543 &info->spare, 3688 &info->spare,
3544 count, 3689 count,
3545 info->cpu, 0); 3690 info->cpu, 0);
3691 trace_access_unlock(info->cpu);
3546 if (ret < 0) 3692 if (ret < 0)
3547 return 0; 3693 return 0;
3548 3694
3549 pos = ring_buffer_page_len(info->spare);
3550
3551 if (pos < PAGE_SIZE)
3552 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3553
3554read: 3695read:
3555 size = PAGE_SIZE - info->read; 3696 size = PAGE_SIZE - info->read;
3556 if (size > count) 3697 if (size > count)
@@ -3645,11 +3786,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3645 unsigned int flags) 3786 unsigned int flags)
3646{ 3787{
3647 struct ftrace_buffer_info *info = file->private_data; 3788 struct ftrace_buffer_info *info = file->private_data;
3648 struct partial_page partial[PIPE_BUFFERS]; 3789 struct partial_page partial_def[PIPE_DEF_BUFFERS];
3649 struct page *pages[PIPE_BUFFERS]; 3790 struct page *pages_def[PIPE_DEF_BUFFERS];
3650 struct splice_pipe_desc spd = { 3791 struct splice_pipe_desc spd = {
3651 .pages = pages, 3792 .pages = pages_def,
3652 .partial = partial, 3793 .partial = partial_def,
3653 .flags = flags, 3794 .flags = flags,
3654 .ops = &buffer_pipe_buf_ops, 3795 .ops = &buffer_pipe_buf_ops,
3655 .spd_release = buffer_spd_release, 3796 .spd_release = buffer_spd_release,
@@ -3658,21 +3799,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3658 int entries, size, i; 3799 int entries, size, i;
3659 size_t ret; 3800 size_t ret;
3660 3801
3802 if (splice_grow_spd(pipe, &spd))
3803 return -ENOMEM;
3804
3661 if (*ppos & (PAGE_SIZE - 1)) { 3805 if (*ppos & (PAGE_SIZE - 1)) {
3662 WARN_ONCE(1, "Ftrace: previous read must page-align\n"); 3806 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3663 return -EINVAL; 3807 ret = -EINVAL;
3808 goto out;
3664 } 3809 }
3665 3810
3666 if (len & (PAGE_SIZE - 1)) { 3811 if (len & (PAGE_SIZE - 1)) {
3667 WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); 3812 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3668 if (len < PAGE_SIZE) 3813 if (len < PAGE_SIZE) {
3669 return -EINVAL; 3814 ret = -EINVAL;
3815 goto out;
3816 }
3670 len &= PAGE_MASK; 3817 len &= PAGE_MASK;
3671 } 3818 }
3672 3819
3820 trace_access_lock(info->cpu);
3673 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3821 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3674 3822
3675 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { 3823 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
3676 struct page *page; 3824 struct page *page;
3677 int r; 3825 int r;
3678 3826
@@ -3717,6 +3865,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3717 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); 3865 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3718 } 3866 }
3719 3867
3868 trace_access_unlock(info->cpu);
3720 spd.nr_pages = i; 3869 spd.nr_pages = i;
3721 3870
3722 /* did we read anything? */ 3871 /* did we read anything? */
@@ -3726,11 +3875,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3726 else 3875 else
3727 ret = 0; 3876 ret = 0;
3728 /* TODO: block */ 3877 /* TODO: block */
3729 return ret; 3878 goto out;
3730 } 3879 }
3731 3880
3732 ret = splice_to_pipe(pipe, &spd); 3881 ret = splice_to_pipe(pipe, &spd);
3733 3882 splice_shrink_spd(pipe, &spd);
3883out:
3734 return ret; 3884 return ret;
3735} 3885}
3736 3886
@@ -4153,6 +4303,8 @@ static __init int tracer_init_debugfs(void)
4153 struct dentry *d_tracer; 4303 struct dentry *d_tracer;
4154 int cpu; 4304 int cpu;
4155 4305
4306 trace_access_lock_init();
4307
4156 d_tracer = tracing_init_dentry(); 4308 d_tracer = tracing_init_dentry();
4157 4309
4158 trace_create_file("tracing_enabled", 0644, d_tracer, 4310 trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4176,10 +4328,10 @@ static __init int tracer_init_debugfs(void)
4176#ifdef CONFIG_TRACER_MAX_TRACE 4328#ifdef CONFIG_TRACER_MAX_TRACE
4177 trace_create_file("tracing_max_latency", 0644, d_tracer, 4329 trace_create_file("tracing_max_latency", 0644, d_tracer,
4178 &tracing_max_latency, &tracing_max_lat_fops); 4330 &tracing_max_latency, &tracing_max_lat_fops);
4331#endif
4179 4332
4180 trace_create_file("tracing_thresh", 0644, d_tracer, 4333 trace_create_file("tracing_thresh", 0644, d_tracer,
4181 &tracing_thresh, &tracing_max_lat_fops); 4334 &tracing_thresh, &tracing_max_lat_fops);
4182#endif
4183 4335
4184 trace_create_file("README", 0444, d_tracer, 4336 trace_create_file("README", 0444, d_tracer,
4185 NULL, &tracing_readme_fops); 4337 NULL, &tracing_readme_fops);
@@ -4219,7 +4371,7 @@ static int trace_panic_handler(struct notifier_block *this,
4219 unsigned long event, void *unused) 4371 unsigned long event, void *unused)
4220{ 4372{
4221 if (ftrace_dump_on_oops) 4373 if (ftrace_dump_on_oops)
4222 ftrace_dump(); 4374 ftrace_dump(ftrace_dump_on_oops);
4223 return NOTIFY_OK; 4375 return NOTIFY_OK;
4224} 4376}
4225 4377
@@ -4236,7 +4388,7 @@ static int trace_die_handler(struct notifier_block *self,
4236 switch (val) { 4388 switch (val) {
4237 case DIE_OOPS: 4389 case DIE_OOPS:
4238 if (ftrace_dump_on_oops) 4390 if (ftrace_dump_on_oops)
4239 ftrace_dump(); 4391 ftrace_dump(ftrace_dump_on_oops);
4240 break; 4392 break;
4241 default: 4393 default:
4242 break; 4394 break;
@@ -4277,7 +4429,8 @@ trace_printk_seq(struct trace_seq *s)
4277 trace_seq_init(s); 4429 trace_seq_init(s);
4278} 4430}
4279 4431
4280static void __ftrace_dump(bool disable_tracing) 4432static void
4433__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
4281{ 4434{
4282 static arch_spinlock_t ftrace_dump_lock = 4435 static arch_spinlock_t ftrace_dump_lock =
4283 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 4436 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -4310,12 +4463,25 @@ static void __ftrace_dump(bool disable_tracing)
4310 /* don't look at user memory in panic mode */ 4463 /* don't look at user memory in panic mode */
4311 trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 4464 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
4312 4465
4313 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4314
4315 /* Simulate the iterator */ 4466 /* Simulate the iterator */
4316 iter.tr = &global_trace; 4467 iter.tr = &global_trace;
4317 iter.trace = current_trace; 4468 iter.trace = current_trace;
4318 iter.cpu_file = TRACE_PIPE_ALL_CPU; 4469
4470 switch (oops_dump_mode) {
4471 case DUMP_ALL:
4472 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4473 break;
4474 case DUMP_ORIG:
4475 iter.cpu_file = raw_smp_processor_id();
4476 break;
4477 case DUMP_NONE:
4478 goto out_enable;
4479 default:
4480 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
4481 iter.cpu_file = TRACE_PIPE_ALL_CPU;
4482 }
4483
4484 printk(KERN_TRACE "Dumping ftrace buffer:\n");
4319 4485
4320 /* 4486 /*
4321 * We need to stop all tracing on all CPUS to read the 4487 * We need to stop all tracing on all CPUS to read the
@@ -4354,6 +4520,7 @@ static void __ftrace_dump(bool disable_tracing)
4354 else 4520 else
4355 printk(KERN_TRACE "---------------------------------\n"); 4521 printk(KERN_TRACE "---------------------------------\n");
4356 4522
4523 out_enable:
4357 /* Re-enable tracing if requested */ 4524 /* Re-enable tracing if requested */
4358 if (!disable_tracing) { 4525 if (!disable_tracing) {
4359 trace_flags |= old_userobj; 4526 trace_flags |= old_userobj;
@@ -4370,9 +4537,9 @@ static void __ftrace_dump(bool disable_tracing)
4370} 4537}
4371 4538
4372/* By default: disable tracing after the dump */ 4539/* By default: disable tracing after the dump */
4373void ftrace_dump(void) 4540void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
4374{ 4541{
4375 __ftrace_dump(true); 4542 __ftrace_dump(true, oops_dump_mode);
4376} 4543}
4377 4544
4378__init static int tracer_alloc_buffers(void) 4545__init static int tracer_alloc_buffers(void)
@@ -4387,9 +4554,6 @@ __init static int tracer_alloc_buffers(void)
4387 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) 4554 if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
4388 goto out_free_buffer_mask; 4555 goto out_free_buffer_mask;
4389 4556
4390 if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
4391 goto out_free_tracing_cpumask;
4392
4393 /* To save memory, keep the ring buffer size to its minimum */ 4557 /* To save memory, keep the ring buffer size to its minimum */
4394 if (ring_buffer_expanded) 4558 if (ring_buffer_expanded)
4395 ring_buf_size = trace_buf_size; 4559 ring_buf_size = trace_buf_size;
@@ -4447,8 +4611,6 @@ __init static int tracer_alloc_buffers(void)
4447 return 0; 4611 return 0;
4448 4612
4449out_free_cpumask: 4613out_free_cpumask:
4450 free_cpumask_var(tracing_reader_cpumask);
4451out_free_tracing_cpumask:
4452 free_cpumask_var(tracing_cpumask); 4614 free_cpumask_var(tracing_cpumask);
4453out_free_buffer_mask: 4615out_free_buffer_mask:
4454 free_cpumask_var(tracing_buffer_mask); 4616 free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4df6a77eb196..2cd96399463f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,7 +34,6 @@ enum trace_type {
34 TRACE_GRAPH_RET, 34 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 35 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 36 TRACE_USER_STACK,
37 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
40 TRACE_BLK, 39 TRACE_BLK,
@@ -103,29 +102,17 @@ struct syscall_trace_exit {
103 long ret; 102 long ret;
104}; 103};
105 104
106struct kprobe_trace_entry { 105struct kprobe_trace_entry_head {
107 struct trace_entry ent; 106 struct trace_entry ent;
108 unsigned long ip; 107 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111}; 108};
112 109
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ 110struct kretprobe_trace_entry_head {
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent; 111 struct trace_entry ent;
119 unsigned long func; 112 unsigned long func;
120 unsigned long ret_ip; 113 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123}; 114};
124 115
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
129/* 116/*
130 * trace_flag_type is an enumeration that holds different 117 * trace_flag_type is an enumeration that holds different
131 * states when a trace occurs. These are: 118 * states when a trace occurs. These are:
@@ -229,7 +216,6 @@ extern void __ftrace_bad_type(void);
229 TRACE_GRAPH_ENT); \ 216 TRACE_GRAPH_ENT); \
230 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
231 TRACE_GRAPH_RET); \ 218 TRACE_GRAPH_RET); \
232 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
233 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
234 TRACE_KMEM_ALLOC); \ 220 TRACE_KMEM_ALLOC); \
235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
@@ -378,6 +364,9 @@ void trace_function(struct trace_array *tr,
378 unsigned long ip, 364 unsigned long ip,
379 unsigned long parent_ip, 365 unsigned long parent_ip,
380 unsigned long flags, int pc); 366 unsigned long flags, int pc);
367void trace_default_header(struct seq_file *m);
368void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
369int trace_empty(struct trace_iterator *iter);
381 370
382void trace_graph_return(struct ftrace_graph_ret *trace); 371void trace_graph_return(struct ftrace_graph_ret *trace);
383int trace_graph_entry(struct ftrace_graph_ent *trace); 372int trace_graph_entry(struct ftrace_graph_ent *trace);
@@ -396,9 +385,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 385
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 386extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 387
388extern unsigned long tracing_thresh;
389
399#ifdef CONFIG_TRACER_MAX_TRACE 390#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 391extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 392
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 393void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 394void update_max_tr_single(struct trace_array *tr,
@@ -415,12 +405,12 @@ void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
415void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 405void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
416 int pc); 406 int pc);
417#else 407#else
418static inline void ftrace_trace_stack(struct trace_array *tr, 408static inline void ftrace_trace_stack(struct ring_buffer *buffer,
419 unsigned long flags, int skip, int pc) 409 unsigned long flags, int skip, int pc)
420{ 410{
421} 411}
422 412
423static inline void ftrace_trace_userstack(struct trace_array *tr, 413static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
424 unsigned long flags, int pc) 414 unsigned long flags, int pc)
425{ 415{
426} 416}
@@ -466,8 +456,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
466 struct trace_array *tr); 456 struct trace_array *tr);
467extern int trace_selftest_startup_branch(struct tracer *trace, 457extern int trace_selftest_startup_branch(struct tracer *trace,
468 struct trace_array *tr); 458 struct trace_array *tr);
469extern int trace_selftest_startup_hw_branches(struct tracer *trace,
470 struct trace_array *tr);
471extern int trace_selftest_startup_ksym(struct tracer *trace, 459extern int trace_selftest_startup_ksym(struct tracer *trace,
472 struct trace_array *tr); 460 struct trace_array *tr);
473#endif /* CONFIG_FTRACE_STARTUP_TEST */ 461#endif /* CONFIG_FTRACE_STARTUP_TEST */
@@ -490,13 +478,34 @@ extern int trace_clock_id;
490 478
491/* Standard output formatting function used for function return traces */ 479/* Standard output formatting function used for function return traces */
492#ifdef CONFIG_FUNCTION_GRAPH_TRACER 480#ifdef CONFIG_FUNCTION_GRAPH_TRACER
493extern enum print_line_t print_graph_function(struct trace_iterator *iter); 481
482/* Flag options */
483#define TRACE_GRAPH_PRINT_OVERRUN 0x1
484#define TRACE_GRAPH_PRINT_CPU 0x2
485#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
486#define TRACE_GRAPH_PRINT_PROC 0x8
487#define TRACE_GRAPH_PRINT_DURATION 0x10
488#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
489
490extern enum print_line_t
491print_graph_function_flags(struct trace_iterator *iter, u32 flags);
492extern void print_graph_headers_flags(struct seq_file *s, u32 flags);
494extern enum print_line_t 493extern enum print_line_t
495trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); 494trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
495extern void graph_trace_open(struct trace_iterator *iter);
496extern void graph_trace_close(struct trace_iterator *iter);
497extern int __trace_graph_entry(struct trace_array *tr,
498 struct ftrace_graph_ent *trace,
499 unsigned long flags, int pc);
500extern void __trace_graph_return(struct trace_array *tr,
501 struct ftrace_graph_ret *trace,
502 unsigned long flags, int pc);
503
496 504
497#ifdef CONFIG_DYNAMIC_FTRACE 505#ifdef CONFIG_DYNAMIC_FTRACE
498/* TODO: make this variable */ 506/* TODO: make this variable */
499#define FTRACE_GRAPH_MAX_FUNCS 32 507#define FTRACE_GRAPH_MAX_FUNCS 32
508extern int ftrace_graph_filter_enabled;
500extern int ftrace_graph_count; 509extern int ftrace_graph_count;
501extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; 510extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
502 511
@@ -504,7 +513,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
504{ 513{
505 int i; 514 int i;
506 515
507 if (!ftrace_graph_count || test_tsk_trace_graph(current)) 516 if (!ftrace_graph_filter_enabled)
508 return 1; 517 return 1;
509 518
510 for (i = 0; i < ftrace_graph_count; i++) { 519 for (i = 0; i < ftrace_graph_count; i++) {
@@ -522,7 +531,7 @@ static inline int ftrace_graph_addr(unsigned long addr)
522#endif /* CONFIG_DYNAMIC_FTRACE */ 531#endif /* CONFIG_DYNAMIC_FTRACE */
523#else /* CONFIG_FUNCTION_GRAPH_TRACER */ 532#else /* CONFIG_FUNCTION_GRAPH_TRACER */
524static inline enum print_line_t 533static inline enum print_line_t
525print_graph_function(struct trace_iterator *iter) 534print_graph_function_flags(struct trace_iterator *iter, u32 flags)
526{ 535{
527 return TRACE_TYPE_UNHANDLED; 536 return TRACE_TYPE_UNHANDLED;
528} 537}
@@ -549,7 +558,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
549 * struct trace_parser - servers for reading the user input separated by spaces 558 * struct trace_parser - servers for reading the user input separated by spaces
550 * @cont: set if the input is not complete - no final space char was found 559 * @cont: set if the input is not complete - no final space char was found
551 * @buffer: holds the parsed user input 560 * @buffer: holds the parsed user input
552 * @idx: user input lenght 561 * @idx: user input length
553 * @size: buffer size 562 * @size: buffer size
554 */ 563 */
555struct trace_parser { 564struct trace_parser {
@@ -769,12 +778,15 @@ extern void print_subsystem_event_filter(struct event_subsystem *system,
769 struct trace_seq *s); 778 struct trace_seq *s);
770extern int filter_assign_type(const char *type); 779extern int filter_assign_type(const char *type);
771 780
781struct list_head *
782trace_get_fields(struct ftrace_event_call *event_call);
783
772static inline int 784static inline int
773filter_check_discard(struct ftrace_event_call *call, void *rec, 785filter_check_discard(struct ftrace_event_call *call, void *rec,
774 struct ring_buffer *buffer, 786 struct ring_buffer *buffer,
775 struct ring_buffer_event *event) 787 struct ring_buffer_event *event)
776{ 788{
777 if (unlikely(call->filter_active) && 789 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
778 !filter_match_preds(call->filter, rec)) { 790 !filter_match_preds(call->filter, rec)) {
779 ring_buffer_discard_commit(buffer, event); 791 ring_buffer_discard_commit(buffer, event);
780 return 1; 792 return 1;
@@ -791,7 +803,8 @@ extern const char *__stop___trace_bprintk_fmt[];
791 803
792#undef FTRACE_ENTRY 804#undef FTRACE_ENTRY
793#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ 805#define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
794 extern struct ftrace_event_call event_##call; 806 extern struct ftrace_event_call \
807 __attribute__((__aligned__(4))) event_##call;
795#undef FTRACE_ENTRY_DUP 808#undef FTRACE_ENTRY_DUP
796#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ 809#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
797 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 810 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4a194f08f88c..8d3538b4ea5f 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -143,7 +143,7 @@ static void branch_trace_reset(struct trace_array *tr)
143} 143}
144 144
145static enum print_line_t trace_branch_print(struct trace_iterator *iter, 145static enum print_line_t trace_branch_print(struct trace_iterator *iter,
146 int flags) 146 int flags, struct trace_event *event)
147{ 147{
148 struct trace_branch *field; 148 struct trace_branch *field;
149 149
@@ -167,9 +167,13 @@ static void branch_print_header(struct seq_file *s)
167 " |\n"); 167 " |\n");
168} 168}
169 169
170static struct trace_event_functions trace_branch_funcs = {
171 .trace = trace_branch_print,
172};
173
170static struct trace_event trace_branch_event = { 174static struct trace_event trace_branch_event = {
171 .type = TRACE_BRANCH, 175 .type = TRACE_BRANCH,
172 .trace = trace_branch_print, 176 .funcs = &trace_branch_funcs,
173}; 177};
174 178
175static struct tracer branch_trace __read_mostly = 179static struct tracer branch_trace __read_mostly =
@@ -307,8 +311,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2)
307 return -1; 311 return -1;
308 if (percent_a > percent_b) 312 if (percent_a > percent_b)
309 return 1; 313 return 1;
310 else 314
311 return 0; 315 if (a->incorrect < b->incorrect)
316 return -1;
317 if (a->incorrect > b->incorrect)
318 return 1;
319
320 /*
321 * Since the above shows worse (incorrect) cases
322 * first, we continue that by showing best (correct)
323 * cases last.
324 */
325 if (a->correct > b->correct)
326 return -1;
327 if (a->correct < b->correct)
328 return 1;
329
330 return 0;
312} 331}
313 332
314static struct tracer_stat annotated_branch_stats = { 333static struct tracer_stat annotated_branch_stats = {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -83,7 +84,7 @@ u64 notrace trace_clock_global(void)
83 int this_cpu; 84 int this_cpu;
84 u64 now; 85 u64 now;
85 86
86 raw_local_irq_save(flags); 87 local_irq_save(flags);
87 88
88 this_cpu = raw_smp_processor_id(); 89 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 90 now = cpu_clock(this_cpu);
@@ -109,7 +110,7 @@ u64 notrace trace_clock_global(void)
109 arch_spin_unlock(&trace_clock_struct.lock); 110 arch_spin_unlock(&trace_clock_struct.lock);
110 111
111 out: 112 out:
112 raw_local_irq_restore(flags); 113 local_irq_restore(flags);
113 114
114 return now; 115 return now;
115} 116}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c16a08f399df..dc008c1240da 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -318,18 +318,6 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 318 __entry->func, __entry->file, __entry->correct)
319); 319);
320 320
321FTRACE_ENTRY(hw_branch, hw_branch_entry,
322
323 TRACE_HW_BRANCHES,
324
325 F_STRUCT(
326 __field( u64, from )
327 __field( u64, to )
328 ),
329
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331);
332
333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, 321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
334 322
335 TRACE_KMEM_ALLOC, 323 TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
new file mode 100644
index 000000000000..8a2b73f7c068
--- /dev/null
+++ b/kernel/trace/trace_event_perf.c
@@ -0,0 +1,195 @@
1/*
2 * trace event based perf event profiling/tracing
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */
7
8#include <linux/module.h>
9#include <linux/kprobes.h>
10#include "trace.h"
11
12EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
13
14static char *perf_trace_buf[4];
15
16/*
17 * Force it to be aligned to unsigned long to avoid misaligned accesses
18 * suprises
19 */
20typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21 perf_trace_t;
22
23/* Count the events in use (per event id, not per instance) */
24static int total_ref_count;
25
26static int perf_trace_event_init(struct ftrace_event_call *tp_event,
27 struct perf_event *p_event)
28{
29 struct hlist_head *list;
30 int ret = -ENOMEM;
31 int cpu;
32
33 p_event->tp_event = tp_event;
34 if (tp_event->perf_refcount++ > 0)
35 return 0;
36
37 list = alloc_percpu(struct hlist_head);
38 if (!list)
39 goto fail;
40
41 for_each_possible_cpu(cpu)
42 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
43
44 tp_event->perf_events = list;
45
46 if (!total_ref_count) {
47 char *buf;
48 int i;
49
50 for (i = 0; i < 4; i++) {
51 buf = (char *)alloc_percpu(perf_trace_t);
52 if (!buf)
53 goto fail;
54
55 perf_trace_buf[i] = buf;
56 }
57 }
58
59 if (tp_event->class->reg)
60 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
61 else
62 ret = tracepoint_probe_register(tp_event->name,
63 tp_event->class->perf_probe,
64 tp_event);
65
66 if (ret)
67 goto fail;
68
69 total_ref_count++;
70 return 0;
71
72fail:
73 if (!total_ref_count) {
74 int i;
75
76 for (i = 0; i < 4; i++) {
77 free_percpu(perf_trace_buf[i]);
78 perf_trace_buf[i] = NULL;
79 }
80 }
81
82 if (!--tp_event->perf_refcount) {
83 free_percpu(tp_event->perf_events);
84 tp_event->perf_events = NULL;
85 }
86
87 return ret;
88}
89
90int perf_trace_init(struct perf_event *p_event)
91{
92 struct ftrace_event_call *tp_event;
93 int event_id = p_event->attr.config;
94 int ret = -EINVAL;
95
96 mutex_lock(&event_mutex);
97 list_for_each_entry(tp_event, &ftrace_events, list) {
98 if (tp_event->event.type == event_id &&
99 tp_event->class &&
100 (tp_event->class->perf_probe ||
101 tp_event->class->reg) &&
102 try_module_get(tp_event->mod)) {
103 ret = perf_trace_event_init(tp_event, p_event);
104 break;
105 }
106 }
107 mutex_unlock(&event_mutex);
108
109 return ret;
110}
111
112int perf_trace_enable(struct perf_event *p_event)
113{
114 struct ftrace_event_call *tp_event = p_event->tp_event;
115 struct hlist_head *list;
116
117 list = tp_event->perf_events;
118 if (WARN_ON_ONCE(!list))
119 return -EINVAL;
120
121 list = this_cpu_ptr(list);
122 hlist_add_head_rcu(&p_event->hlist_entry, list);
123
124 return 0;
125}
126
127void perf_trace_disable(struct perf_event *p_event)
128{
129 hlist_del_rcu(&p_event->hlist_entry);
130}
131
132void perf_trace_destroy(struct perf_event *p_event)
133{
134 struct ftrace_event_call *tp_event = p_event->tp_event;
135 int i;
136
137 mutex_lock(&event_mutex);
138 if (--tp_event->perf_refcount > 0)
139 goto out;
140
141 if (tp_event->class->reg)
142 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
143 else
144 tracepoint_probe_unregister(tp_event->name,
145 tp_event->class->perf_probe,
146 tp_event);
147
148 /*
149 * Ensure our callback won't be called anymore. See
150 * tracepoint_probe_unregister() and __DO_TRACE().
151 */
152 synchronize_sched();
153
154 free_percpu(tp_event->perf_events);
155 tp_event->perf_events = NULL;
156
157 if (!--total_ref_count) {
158 for (i = 0; i < 4; i++) {
159 free_percpu(perf_trace_buf[i]);
160 perf_trace_buf[i] = NULL;
161 }
162 }
163out:
164 mutex_unlock(&event_mutex);
165}
166
167__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
168 struct pt_regs *regs, int *rctxp)
169{
170 struct trace_entry *entry;
171 unsigned long flags;
172 char *raw_data;
173 int pc;
174
175 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
176
177 pc = preempt_count();
178
179 *rctxp = perf_swevent_get_recursion_context();
180 if (*rctxp < 0)
181 return NULL;
182
183 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
184
185 /* zero the dead bytes from align to not leak stack to user */
186 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
187
188 entry = (struct trace_entry *)raw_data;
189 local_save_flags(flags);
190 tracing_generic_entry_update(entry, flags, pc);
191 entry->type = type;
192
193 return raw_data;
194}
195EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
deleted file mode 100644
index 9e25573242cf..000000000000
--- a/kernel/trace/trace_event_profile.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * trace event based perf counter profiling
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 *
6 */
7
8#include <linux/module.h>
9#include "trace.h"
10
11
12char *perf_trace_buf;
13EXPORT_SYMBOL_GPL(perf_trace_buf);
14
15char *perf_trace_buf_nmi;
16EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
17
18typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
19
20/* Count the events in use (per event id, not per instance) */
21static int total_profile_count;
22
23static int ftrace_profile_enable_event(struct ftrace_event_call *event)
24{
25 char *buf;
26 int ret = -ENOMEM;
27
28 if (event->profile_count++ > 0)
29 return 0;
30
31 if (!total_profile_count) {
32 buf = (char *)alloc_percpu(perf_trace_t);
33 if (!buf)
34 goto fail_buf;
35
36 rcu_assign_pointer(perf_trace_buf, buf);
37
38 buf = (char *)alloc_percpu(perf_trace_t);
39 if (!buf)
40 goto fail_buf_nmi;
41
42 rcu_assign_pointer(perf_trace_buf_nmi, buf);
43 }
44
45 ret = event->profile_enable(event);
46 if (!ret) {
47 total_profile_count++;
48 return 0;
49 }
50
51fail_buf_nmi:
52 if (!total_profile_count) {
53 free_percpu(perf_trace_buf_nmi);
54 free_percpu(perf_trace_buf);
55 perf_trace_buf_nmi = NULL;
56 perf_trace_buf = NULL;
57 }
58fail_buf:
59 event->profile_count--;
60
61 return ret;
62}
63
64int ftrace_profile_enable(int event_id)
65{
66 struct ftrace_event_call *event;
67 int ret = -EINVAL;
68
69 mutex_lock(&event_mutex);
70 list_for_each_entry(event, &ftrace_events, list) {
71 if (event->id == event_id && event->profile_enable &&
72 try_module_get(event->mod)) {
73 ret = ftrace_profile_enable_event(event);
74 break;
75 }
76 }
77 mutex_unlock(&event_mutex);
78
79 return ret;
80}
81
82static void ftrace_profile_disable_event(struct ftrace_event_call *event)
83{
84 char *buf, *nmi_buf;
85
86 if (--event->profile_count > 0)
87 return;
88
89 event->profile_disable(event);
90
91 if (!--total_profile_count) {
92 buf = perf_trace_buf;
93 rcu_assign_pointer(perf_trace_buf, NULL);
94
95 nmi_buf = perf_trace_buf_nmi;
96 rcu_assign_pointer(perf_trace_buf_nmi, NULL);
97
98 /*
99 * Ensure every events in profiling have finished before
100 * releasing the buffers
101 */
102 synchronize_sched();
103
104 free_percpu(buf);
105 free_percpu(nmi_buf);
106 }
107}
108
109void ftrace_profile_disable(int event_id)
110{
111 struct ftrace_event_call *event;
112
113 mutex_lock(&event_mutex);
114 list_for_each_entry(event, &ftrace_events, list) {
115 if (event->id == event_id) {
116 ftrace_profile_disable_event(event);
117 module_put(event->mod);
118 break;
119 }
120 }
121 mutex_unlock(&event_mutex);
122}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..53cffc0b0801 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -28,11 +29,23 @@ DEFINE_MUTEX(event_mutex);
28 29
29LIST_HEAD(ftrace_events); 30LIST_HEAD(ftrace_events);
30 31
32struct list_head *
33trace_get_fields(struct ftrace_event_call *event_call)
34{
35 if (!event_call->class->get_fields)
36 return &event_call->class->fields;
37 return event_call->class->get_fields(event_call);
38}
39
31int trace_define_field(struct ftrace_event_call *call, const char *type, 40int trace_define_field(struct ftrace_event_call *call, const char *type,
32 const char *name, int offset, int size, int is_signed, 41 const char *name, int offset, int size, int is_signed,
33 int filter_type) 42 int filter_type)
34{ 43{
35 struct ftrace_event_field *field; 44 struct ftrace_event_field *field;
45 struct list_head *head;
46
47 if (WARN_ON(!call->class))
48 return 0;
36 49
37 field = kzalloc(sizeof(*field), GFP_KERNEL); 50 field = kzalloc(sizeof(*field), GFP_KERNEL);
38 if (!field) 51 if (!field)
@@ -55,15 +68,14 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
55 field->size = size; 68 field->size = size;
56 field->is_signed = is_signed; 69 field->is_signed = is_signed;
57 70
58 list_add(&field->link, &call->fields); 71 head = trace_get_fields(call);
72 list_add(&field->link, head);
59 73
60 return 0; 74 return 0;
61 75
62err: 76err:
63 if (field) { 77 if (field)
64 kfree(field->name); 78 kfree(field->name);
65 kfree(field->type);
66 }
67 kfree(field); 79 kfree(field);
68 80
69 return -ENOMEM; 81 return -ENOMEM;
@@ -95,8 +107,10 @@ static int trace_define_common_fields(struct ftrace_event_call *call)
95void trace_destroy_fields(struct ftrace_event_call *call) 107void trace_destroy_fields(struct ftrace_event_call *call)
96{ 108{
97 struct ftrace_event_field *field, *next; 109 struct ftrace_event_field *field, *next;
110 struct list_head *head;
98 111
99 list_for_each_entry_safe(field, next, &call->fields, link) { 112 head = trace_get_fields(call);
113 list_for_each_entry_safe(field, next, head, link) {
100 list_del(&field->link); 114 list_del(&field->link);
101 kfree(field->type); 115 kfree(field->type);
102 kfree(field->name); 116 kfree(field->name);
@@ -108,11 +122,9 @@ int trace_event_raw_init(struct ftrace_event_call *call)
108{ 122{
109 int id; 123 int id;
110 124
111 id = register_ftrace_event(call->event); 125 id = register_ftrace_event(&call->event);
112 if (!id) 126 if (!id)
113 return -ENODEV; 127 return -ENODEV;
114 call->id = id;
115 INIT_LIST_HEAD(&call->fields);
116 128
117 return 0; 129 return 0;
118} 130}
@@ -125,23 +137,33 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
125 137
126 switch (enable) { 138 switch (enable) {
127 case 0: 139 case 0:
128 if (call->enabled) { 140 if (call->flags & TRACE_EVENT_FL_ENABLED) {
129 call->enabled = 0; 141 call->flags &= ~TRACE_EVENT_FL_ENABLED;
130 tracing_stop_cmdline_record(); 142 tracing_stop_cmdline_record();
131 call->unregfunc(call); 143 if (call->class->reg)
144 call->class->reg(call, TRACE_REG_UNREGISTER);
145 else
146 tracepoint_probe_unregister(call->name,
147 call->class->probe,
148 call);
132 } 149 }
133 break; 150 break;
134 case 1: 151 case 1:
135 if (!call->enabled) { 152 if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
136 tracing_start_cmdline_record(); 153 tracing_start_cmdline_record();
137 ret = call->regfunc(call); 154 if (call->class->reg)
155 ret = call->class->reg(call, TRACE_REG_REGISTER);
156 else
157 ret = tracepoint_probe_register(call->name,
158 call->class->probe,
159 call);
138 if (ret) { 160 if (ret) {
139 tracing_stop_cmdline_record(); 161 tracing_stop_cmdline_record();
140 pr_info("event trace: Could not enable event " 162 pr_info("event trace: Could not enable event "
141 "%s\n", call->name); 163 "%s\n", call->name);
142 break; 164 break;
143 } 165 }
144 call->enabled = 1; 166 call->flags |= TRACE_EVENT_FL_ENABLED;
145 } 167 }
146 break; 168 break;
147 } 169 }
@@ -172,15 +194,16 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
172 mutex_lock(&event_mutex); 194 mutex_lock(&event_mutex);
173 list_for_each_entry(call, &ftrace_events, list) { 195 list_for_each_entry(call, &ftrace_events, list) {
174 196
175 if (!call->name || !call->regfunc) 197 if (!call->name || !call->class ||
198 (!call->class->probe && !call->class->reg))
176 continue; 199 continue;
177 200
178 if (match && 201 if (match &&
179 strcmp(match, call->name) != 0 && 202 strcmp(match, call->name) != 0 &&
180 strcmp(match, call->system) != 0) 203 strcmp(match, call->class->system) != 0)
181 continue; 204 continue;
182 205
183 if (sub && strcmp(sub, call->system) != 0) 206 if (sub && strcmp(sub, call->class->system) != 0)
184 continue; 207 continue;
185 208
186 if (event && strcmp(event, call->name) != 0) 209 if (event && strcmp(event, call->name) != 0)
@@ -298,7 +321,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
298 * The ftrace subsystem is for showing formats only. 321 * The ftrace subsystem is for showing formats only.
299 * They can not be enabled or disabled via the event files. 322 * They can not be enabled or disabled via the event files.
300 */ 323 */
301 if (call->regfunc) 324 if (call->class && (call->class->probe || call->class->reg))
302 return call; 325 return call;
303 } 326 }
304 327
@@ -329,7 +352,7 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
329 (*pos)++; 352 (*pos)++;
330 353
331 list_for_each_entry_continue(call, &ftrace_events, list) { 354 list_for_each_entry_continue(call, &ftrace_events, list) {
332 if (call->enabled) 355 if (call->flags & TRACE_EVENT_FL_ENABLED)
333 return call; 356 return call;
334 } 357 }
335 358
@@ -356,8 +379,8 @@ static int t_show(struct seq_file *m, void *v)
356{ 379{
357 struct ftrace_event_call *call = v; 380 struct ftrace_event_call *call = v;
358 381
359 if (strcmp(call->system, TRACE_SYSTEM) != 0) 382 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
360 seq_printf(m, "%s:", call->system); 383 seq_printf(m, "%s:", call->class->system);
361 seq_printf(m, "%s\n", call->name); 384 seq_printf(m, "%s\n", call->name);
362 385
363 return 0; 386 return 0;
@@ -388,7 +411,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
388 struct ftrace_event_call *call = filp->private_data; 411 struct ftrace_event_call *call = filp->private_data;
389 char *buf; 412 char *buf;
390 413
391 if (call->enabled) 414 if (call->flags & TRACE_EVENT_FL_ENABLED)
392 buf = "1\n"; 415 buf = "1\n";
393 else 416 else
394 buf = "0\n"; 417 buf = "0\n";
@@ -451,10 +474,11 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
451 474
452 mutex_lock(&event_mutex); 475 mutex_lock(&event_mutex);
453 list_for_each_entry(call, &ftrace_events, list) { 476 list_for_each_entry(call, &ftrace_events, list) {
454 if (!call->name || !call->regfunc) 477 if (!call->name || !call->class ||
478 (!call->class->probe && !call->class->reg))
455 continue; 479 continue;
456 480
457 if (system && strcmp(call->system, system) != 0) 481 if (system && strcmp(call->class->system, system) != 0)
458 continue; 482 continue;
459 483
460 /* 484 /*
@@ -462,7 +486,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
462 * or if all events or cleared, or if we have 486 * or if all events or cleared, or if we have
463 * a mixture. 487 * a mixture.
464 */ 488 */
465 set |= (1 << !!call->enabled); 489 set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
466 490
467 /* 491 /*
468 * If we have a mixture, no need to look further. 492 * If we have a mixture, no need to look further.
@@ -520,41 +544,17 @@ out:
520 return ret; 544 return ret;
521} 545}
522 546
523extern char *__bad_type_size(void);
524
525#undef FIELD
526#define FIELD(type, name) \
527 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
528 #type, "common_" #name, offsetof(typeof(field), name), \
529 sizeof(field.name), is_signed_type(type)
530
531static int trace_write_header(struct trace_seq *s)
532{
533 struct trace_entry field;
534
535 /* struct trace_entry */
536 return trace_seq_printf(s,
537 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
538 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
539 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
540 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
541 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
542 "\n",
543 FIELD(unsigned short, type),
544 FIELD(unsigned char, flags),
545 FIELD(unsigned char, preempt_count),
546 FIELD(int, pid),
547 FIELD(int, lock_depth));
548}
549
550static ssize_t 547static ssize_t
551event_format_read(struct file *filp, char __user *ubuf, size_t cnt, 548event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
552 loff_t *ppos) 549 loff_t *ppos)
553{ 550{
554 struct ftrace_event_call *call = filp->private_data; 551 struct ftrace_event_call *call = filp->private_data;
552 struct ftrace_event_field *field;
553 struct list_head *head;
555 struct trace_seq *s; 554 struct trace_seq *s;
555 int common_field_count = 5;
556 char *buf; 556 char *buf;
557 int r; 557 int r = 0;
558 558
559 if (*ppos) 559 if (*ppos)
560 return 0; 560 return 0;
@@ -565,14 +565,49 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
565 565
566 trace_seq_init(s); 566 trace_seq_init(s);
567 567
568 /* If any of the first writes fail, so will the show_format. */
569
570 trace_seq_printf(s, "name: %s\n", call->name); 568 trace_seq_printf(s, "name: %s\n", call->name);
571 trace_seq_printf(s, "ID: %d\n", call->id); 569 trace_seq_printf(s, "ID: %d\n", call->event.type);
572 trace_seq_printf(s, "format:\n"); 570 trace_seq_printf(s, "format:\n");
573 trace_write_header(s);
574 571
575 r = call->show_format(call, s); 572 head = trace_get_fields(call);
573 list_for_each_entry_reverse(field, head, link) {
574 /*
575 * Smartly shows the array type(except dynamic array).
576 * Normal:
577 * field:TYPE VAR
578 * If TYPE := TYPE[LEN], it is shown:
579 * field:TYPE VAR[LEN]
580 */
581 const char *array_descriptor = strchr(field->type, '[');
582
583 if (!strncmp(field->type, "__data_loc", 10))
584 array_descriptor = NULL;
585
586 if (!array_descriptor) {
587 r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
588 "\tsize:%u;\tsigned:%d;\n",
589 field->type, field->name, field->offset,
590 field->size, !!field->is_signed);
591 } else {
592 r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
593 "\tsize:%u;\tsigned:%d;\n",
594 (int)(array_descriptor - field->type),
595 field->type, field->name,
596 array_descriptor, field->offset,
597 field->size, !!field->is_signed);
598 }
599
600 if (--common_field_count == 0)
601 r = trace_seq_printf(s, "\n");
602
603 if (!r)
604 break;
605 }
606
607 if (r)
608 r = trace_seq_printf(s, "\nprint fmt: %s\n",
609 call->print_fmt);
610
576 if (!r) { 611 if (!r) {
577 /* 612 /*
578 * ug! The format output is bigger than a PAGE!! 613 * ug! The format output is bigger than a PAGE!!
@@ -605,7 +640,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
605 return -ENOMEM; 640 return -ENOMEM;
606 641
607 trace_seq_init(s); 642 trace_seq_init(s);
608 trace_seq_printf(s, "%d\n", call->id); 643 trace_seq_printf(s, "%d\n", call->event.type);
609 644
610 r = simple_read_from_buffer(ubuf, cnt, ppos, 645 r = simple_read_from_buffer(ubuf, cnt, ppos,
611 s->buffer, s->len); 646 s->buffer, s->len);
@@ -911,14 +946,15 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
911 const struct file_operations *filter, 946 const struct file_operations *filter,
912 const struct file_operations *format) 947 const struct file_operations *format)
913{ 948{
949 struct list_head *head;
914 int ret; 950 int ret;
915 951
916 /* 952 /*
917 * If the trace point header did not define TRACE_SYSTEM 953 * If the trace point header did not define TRACE_SYSTEM
918 * then the system would be called "TRACE_SYSTEM". 954 * then the system would be called "TRACE_SYSTEM".
919 */ 955 */
920 if (strcmp(call->system, TRACE_SYSTEM) != 0) 956 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
921 d_events = event_subsystem_dir(call->system, d_events); 957 d_events = event_subsystem_dir(call->class->system, d_events);
922 958
923 call->dir = debugfs_create_dir(call->name, d_events); 959 call->dir = debugfs_create_dir(call->name, d_events);
924 if (!call->dir) { 960 if (!call->dir) {
@@ -927,31 +963,36 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
927 return -1; 963 return -1;
928 } 964 }
929 965
930 if (call->regfunc) 966 if (call->class->probe || call->class->reg)
931 trace_create_file("enable", 0644, call->dir, call, 967 trace_create_file("enable", 0644, call->dir, call,
932 enable); 968 enable);
933 969
934 if (call->id && call->profile_enable) 970#ifdef CONFIG_PERF_EVENTS
971 if (call->event.type && (call->class->perf_probe || call->class->reg))
935 trace_create_file("id", 0444, call->dir, call, 972 trace_create_file("id", 0444, call->dir, call,
936 id); 973 id);
974#endif
937 975
938 if (call->define_fields) { 976 if (call->class->define_fields) {
939 ret = trace_define_common_fields(call); 977 /*
940 if (!ret) 978 * Other events may have the same class. Only update
941 ret = call->define_fields(call); 979 * the fields if they are not already defined.
942 if (ret < 0) { 980 */
943 pr_warning("Could not initialize trace point" 981 head = trace_get_fields(call);
944 " events/%s\n", call->name); 982 if (list_empty(head)) {
945 return ret; 983 ret = trace_define_common_fields(call);
984 if (!ret)
985 ret = call->class->define_fields(call);
986 if (ret < 0) {
987 pr_warning("Could not initialize trace point"
988 " events/%s\n", call->name);
989 return ret;
990 }
946 } 991 }
947 trace_create_file("filter", 0644, call->dir, call, 992 trace_create_file("filter", 0644, call->dir, call,
948 filter); 993 filter);
949 } 994 }
950 995
951 /* A trace may not want to export its format */
952 if (!call->show_format)
953 return 0;
954
955 trace_create_file("format", 0444, call->dir, call, 996 trace_create_file("format", 0444, call->dir, call,
956 format); 997 format);
957 998
@@ -966,8 +1007,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
966 if (!call->name) 1007 if (!call->name)
967 return -EINVAL; 1008 return -EINVAL;
968 1009
969 if (call->raw_init) { 1010 if (call->class->raw_init) {
970 ret = call->raw_init(call); 1011 ret = call->class->raw_init(call);
971 if (ret < 0) { 1012 if (ret < 0) {
972 if (ret != -ENOSYS) 1013 if (ret != -ENOSYS)
973 pr_warning("Could not initialize trace " 1014 pr_warning("Could not initialize trace "
@@ -1031,13 +1072,13 @@ static void remove_subsystem_dir(const char *name)
1031static void __trace_remove_event_call(struct ftrace_event_call *call) 1072static void __trace_remove_event_call(struct ftrace_event_call *call)
1032{ 1073{
1033 ftrace_event_enable_disable(call, 0); 1074 ftrace_event_enable_disable(call, 0);
1034 if (call->event) 1075 if (call->event.funcs)
1035 __unregister_ftrace_event(call->event); 1076 __unregister_ftrace_event(&call->event);
1036 debugfs_remove_recursive(call->dir); 1077 debugfs_remove_recursive(call->dir);
1037 list_del(&call->list); 1078 list_del(&call->list);
1038 trace_destroy_fields(call); 1079 trace_destroy_fields(call);
1039 destroy_preds(call); 1080 destroy_preds(call);
1040 remove_subsystem_dir(call->system); 1081 remove_subsystem_dir(call->class->system);
1041} 1082}
1042 1083
1043/* Remove an event_call */ 1084/* Remove an event_call */
@@ -1128,8 +1169,8 @@ static void trace_module_add_events(struct module *mod)
1128 /* The linker may leave blanks */ 1169 /* The linker may leave blanks */
1129 if (!call->name) 1170 if (!call->name)
1130 continue; 1171 continue;
1131 if (call->raw_init) { 1172 if (call->class->raw_init) {
1132 ret = call->raw_init(call); 1173 ret = call->class->raw_init(call);
1133 if (ret < 0) { 1174 if (ret < 0) {
1134 if (ret != -ENOSYS) 1175 if (ret != -ENOSYS)
1135 pr_warning("Could not initialize trace " 1176 pr_warning("Could not initialize trace "
@@ -1282,8 +1323,8 @@ static __init int event_trace_init(void)
1282 /* The linker may leave blanks */ 1323 /* The linker may leave blanks */
1283 if (!call->name) 1324 if (!call->name)
1284 continue; 1325 continue;
1285 if (call->raw_init) { 1326 if (call->class->raw_init) {
1286 ret = call->raw_init(call); 1327 ret = call->class->raw_init(call);
1287 if (ret < 0) { 1328 if (ret < 0) {
1288 if (ret != -ENOSYS) 1329 if (ret != -ENOSYS)
1289 pr_warning("Could not initialize trace " 1330 pr_warning("Could not initialize trace "
@@ -1384,8 +1425,8 @@ static __init void event_trace_self_tests(void)
1384 1425
1385 list_for_each_entry(call, &ftrace_events, list) { 1426 list_for_each_entry(call, &ftrace_events, list) {
1386 1427
1387 /* Only test those that have a regfunc */ 1428 /* Only test those that have a probe */
1388 if (!call->regfunc) 1429 if (!call->class || !call->class->probe)
1389 continue; 1430 continue;
1390 1431
1391/* 1432/*
@@ -1395,8 +1436,8 @@ static __init void event_trace_self_tests(void)
1395 * syscalls as we test. 1436 * syscalls as we test.
1396 */ 1437 */
1397#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 1438#ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1398 if (call->system && 1439 if (call->class->system &&
1399 strcmp(call->system, "syscalls") == 0) 1440 strcmp(call->class->system, "syscalls") == 0)
1400 continue; 1441 continue;
1401#endif 1442#endif
1402 1443
@@ -1406,7 +1447,7 @@ static __init void event_trace_self_tests(void)
1406 * If an event is already enabled, someone is using 1447 * If an event is already enabled, someone is using
1407 * it and the self test should not be on. 1448 * it and the self test should not be on.
1408 */ 1449 */
1409 if (call->enabled) { 1450 if (call->flags & TRACE_EVENT_FL_ENABLED) {
1410 pr_warning("Enabled event during self test!\n"); 1451 pr_warning("Enabled event during self test!\n");
1411 WARN_ON_ONCE(1); 1452 WARN_ON_ONCE(1);
1412 continue; 1453 continue;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9aad69f..57bb1bb32999 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/slab.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
@@ -499,8 +500,10 @@ static struct ftrace_event_field *
499find_event_field(struct ftrace_event_call *call, char *name) 500find_event_field(struct ftrace_event_call *call, char *name)
500{ 501{
501 struct ftrace_event_field *field; 502 struct ftrace_event_field *field;
503 struct list_head *head;
502 504
503 list_for_each_entry(field, &call->fields, link) { 505 head = trace_get_fields(call);
506 list_for_each_entry(field, head, link) {
504 if (!strcmp(field->name, name)) 507 if (!strcmp(field->name, name))
505 return field; 508 return field;
506 } 509 }
@@ -544,7 +547,7 @@ static void filter_disable_preds(struct ftrace_event_call *call)
544 struct event_filter *filter = call->filter; 547 struct event_filter *filter = call->filter;
545 int i; 548 int i;
546 549
547 call->filter_active = 0; 550 call->flags &= ~TRACE_EVENT_FL_FILTERED;
548 filter->n_preds = 0; 551 filter->n_preds = 0;
549 552
550 for (i = 0; i < MAX_FILTER_PRED; i++) 553 for (i = 0; i < MAX_FILTER_PRED; i++)
@@ -571,7 +574,7 @@ void destroy_preds(struct ftrace_event_call *call)
571{ 574{
572 __free_preds(call->filter); 575 __free_preds(call->filter);
573 call->filter = NULL; 576 call->filter = NULL;
574 call->filter_active = 0; 577 call->flags &= ~TRACE_EVENT_FL_FILTERED;
575} 578}
576 579
577static struct event_filter *__alloc_preds(void) 580static struct event_filter *__alloc_preds(void)
@@ -610,7 +613,7 @@ static int init_preds(struct ftrace_event_call *call)
610 if (call->filter) 613 if (call->filter)
611 return 0; 614 return 0;
612 615
613 call->filter_active = 0; 616 call->flags &= ~TRACE_EVENT_FL_FILTERED;
614 call->filter = __alloc_preds(); 617 call->filter = __alloc_preds();
615 if (IS_ERR(call->filter)) 618 if (IS_ERR(call->filter))
616 return PTR_ERR(call->filter); 619 return PTR_ERR(call->filter);
@@ -624,10 +627,10 @@ static int init_subsystem_preds(struct event_subsystem *system)
624 int err; 627 int err;
625 628
626 list_for_each_entry(call, &ftrace_events, list) { 629 list_for_each_entry(call, &ftrace_events, list) {
627 if (!call->define_fields) 630 if (!call->class || !call->class->define_fields)
628 continue; 631 continue;
629 632
630 if (strcmp(call->system, system->name) != 0) 633 if (strcmp(call->class->system, system->name) != 0)
631 continue; 634 continue;
632 635
633 err = init_preds(call); 636 err = init_preds(call);
@@ -643,10 +646,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
643 struct ftrace_event_call *call; 646 struct ftrace_event_call *call;
644 647
645 list_for_each_entry(call, &ftrace_events, list) { 648 list_for_each_entry(call, &ftrace_events, list) {
646 if (!call->define_fields) 649 if (!call->class || !call->class->define_fields)
647 continue; 650 continue;
648 651
649 if (strcmp(call->system, system->name) != 0) 652 if (strcmp(call->class->system, system->name) != 0)
650 continue; 653 continue;
651 654
652 filter_disable_preds(call); 655 filter_disable_preds(call);
@@ -1248,10 +1251,10 @@ static int replace_system_preds(struct event_subsystem *system,
1248 list_for_each_entry(call, &ftrace_events, list) { 1251 list_for_each_entry(call, &ftrace_events, list) {
1249 struct event_filter *filter = call->filter; 1252 struct event_filter *filter = call->filter;
1250 1253
1251 if (!call->define_fields) 1254 if (!call->class || !call->class->define_fields)
1252 continue; 1255 continue;
1253 1256
1254 if (strcmp(call->system, system->name) != 0) 1257 if (strcmp(call->class->system, system->name) != 0)
1255 continue; 1258 continue;
1256 1259
1257 /* try to see if the filter can be applied */ 1260 /* try to see if the filter can be applied */
@@ -1265,7 +1268,7 @@ static int replace_system_preds(struct event_subsystem *system,
1265 if (err) 1268 if (err)
1266 filter_disable_preds(call); 1269 filter_disable_preds(call);
1267 else { 1270 else {
1268 call->filter_active = 1; 1271 call->flags |= TRACE_EVENT_FL_FILTERED;
1269 replace_filter_string(filter, filter_string); 1272 replace_filter_string(filter, filter_string);
1270 } 1273 }
1271 fail = false; 1274 fail = false;
@@ -1314,7 +1317,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1314 if (err) 1317 if (err)
1315 append_filter_err(ps, call->filter); 1318 append_filter_err(ps, call->filter);
1316 else 1319 else
1317 call->filter_active = 1; 1320 call->flags |= TRACE_EVENT_FL_FILTERED;
1318out: 1321out:
1319 filter_opstack_clear(ps); 1322 filter_opstack_clear(ps);
1320 postfix_clear(ps); 1323 postfix_clear(ps);
@@ -1371,7 +1374,7 @@ out_unlock:
1371 return err; 1374 return err;
1372} 1375}
1373 1376
1374#ifdef CONFIG_EVENT_PROFILE 1377#ifdef CONFIG_PERF_EVENTS
1375 1378
1376void ftrace_profile_free_filter(struct perf_event *event) 1379void ftrace_profile_free_filter(struct perf_event *event)
1377{ 1380{
@@ -1392,12 +1395,12 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
1392 mutex_lock(&event_mutex); 1395 mutex_lock(&event_mutex);
1393 1396
1394 list_for_each_entry(call, &ftrace_events, list) { 1397 list_for_each_entry(call, &ftrace_events, list) {
1395 if (call->id == event_id) 1398 if (call->event.type == event_id)
1396 break; 1399 break;
1397 } 1400 }
1398 1401
1399 err = -EINVAL; 1402 err = -EINVAL;
1400 if (!call) 1403 if (&call->list == &ftrace_events)
1401 goto out_unlock; 1404 goto out_unlock;
1402 1405
1403 err = -EEXIST; 1406 err = -EEXIST;
@@ -1439,5 +1442,5 @@ out_unlock:
1439 return err; 1442 return err;
1440} 1443}
1441 1444
1442#endif /* CONFIG_EVENT_PROFILE */ 1445#endif /* CONFIG_PERF_EVENTS */
1443 1446
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..8536e2a65969 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
62 62
63#include "trace_entries.h" 63#include "trace_entries.h"
64 64
65
66#undef __field
67#define __field(type, item) \
68 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
69 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
70 offsetof(typeof(field), item), \
71 sizeof(field.item), is_signed_type(type)); \
72 if (!ret) \
73 return 0;
74
75#undef __field_desc
76#define __field_desc(type, container, item) \
77 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
78 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
79 offsetof(typeof(field), container.item), \
80 sizeof(field.container.item), \
81 is_signed_type(type)); \
82 if (!ret) \
83 return 0;
84
85#undef __array
86#define __array(type, item, len) \
87 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
88 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
89 offsetof(typeof(field), item), \
90 sizeof(field.item), is_signed_type(type)); \
91 if (!ret) \
92 return 0;
93
94#undef __array_desc
95#define __array_desc(type, container, item, len) \
96 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
97 "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
98 offsetof(typeof(field), container.item), \
99 sizeof(field.container.item), \
100 is_signed_type(type)); \
101 if (!ret) \
102 return 0;
103
104#undef __dynamic_array
105#define __dynamic_array(type, item) \
106 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
107 "offset:%zu;\tsize:0;\tsigned:%u;\n", \
108 offsetof(typeof(field), item), \
109 is_signed_type(type)); \
110 if (!ret) \
111 return 0;
112
113#undef F_printk
114#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
115
116#undef __entry
117#define __entry REC
118
119#undef FTRACE_ENTRY
120#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
121static int \
122ftrace_format_##name(struct ftrace_event_call *unused, \
123 struct trace_seq *s) \
124{ \
125 struct struct_name field __attribute__((unused)); \
126 int ret = 0; \
127 \
128 tstruct; \
129 \
130 trace_seq_printf(s, "\nprint fmt: " print); \
131 \
132 return ret; \
133}
134
135#include "trace_entries.h"
136
137#undef __field 65#undef __field
138#define __field(type, item) \ 66#define __field(type, item) \
139 ret = trace_define_field(event_call, #type, #item, \ 67 ret = trace_define_field(event_call, #type, #item, \
@@ -175,7 +103,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
175 return ret; 103 return ret;
176 104
177#undef __dynamic_array 105#undef __dynamic_array
178#define __dynamic_array(type, item) 106#define __dynamic_array(type, item) \
107 ret = trace_define_field(event_call, #type, #item, \
108 offsetof(typeof(field), item), \
109 0, is_signed_type(type), FILTER_OTHER);\
110 if (ret) \
111 return ret;
179 112
180#undef FTRACE_ENTRY 113#undef FTRACE_ENTRY
181#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ 114#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -194,10 +127,13 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
194 127
195static int ftrace_raw_init_event(struct ftrace_event_call *call) 128static int ftrace_raw_init_event(struct ftrace_event_call *call)
196{ 129{
197 INIT_LIST_HEAD(&call->fields); 130 INIT_LIST_HEAD(&call->class->fields);
198 return 0; 131 return 0;
199} 132}
200 133
134#undef __entry
135#define __entry REC
136
201#undef __field 137#undef __field
202#define __field(type, item) 138#define __field(type, item)
203 139
@@ -213,18 +149,25 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
213#undef __dynamic_array 149#undef __dynamic_array
214#define __dynamic_array(type, item) 150#define __dynamic_array(type, item)
215 151
152#undef F_printk
153#define F_printk(fmt, args...) #fmt ", " __stringify(args)
154
216#undef FTRACE_ENTRY 155#undef FTRACE_ENTRY
217#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ 156#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print) \
157 \
158struct ftrace_event_class event_class_ftrace_##call = { \
159 .system = __stringify(TRACE_SYSTEM), \
160 .define_fields = ftrace_define_fields_##call, \
161 .raw_init = ftrace_raw_init_event, \
162}; \
218 \ 163 \
219struct ftrace_event_call __used \ 164struct ftrace_event_call __used \
220__attribute__((__aligned__(4))) \ 165__attribute__((__aligned__(4))) \
221__attribute__((section("_ftrace_events"))) event_##call = { \ 166__attribute__((section("_ftrace_events"))) event_##call = { \
222 .name = #call, \ 167 .name = #call, \
223 .id = type, \ 168 .event.type = etype, \
224 .system = __stringify(TRACE_SYSTEM), \ 169 .class = &event_class_ftrace_##call, \
225 .raw_init = ftrace_raw_init_event, \ 170 .print_fmt = print, \
226 .show_format = ftrace_format_##call, \
227 .define_fields = ftrace_define_fields_##call, \
228}; \ 171}; \
229 172
230#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..79f4bac99a94 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
@@ -18,6 +19,7 @@ struct fgraph_cpu_data {
18 pid_t last_pid; 19 pid_t last_pid;
19 int depth; 20 int depth;
20 int ignore; 21 int ignore;
22 unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH];
21}; 23};
22 24
23struct fgraph_data { 25struct fgraph_data {
@@ -38,7 +40,7 @@ struct fgraph_data {
38#define TRACE_GRAPH_PRINT_OVERHEAD 0x4 40#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
39#define TRACE_GRAPH_PRINT_PROC 0x8 41#define TRACE_GRAPH_PRINT_PROC 0x8
40#define TRACE_GRAPH_PRINT_DURATION 0x10 42#define TRACE_GRAPH_PRINT_DURATION 0x10
41#define TRACE_GRAPH_PRINT_ABS_TIME 0X20 43#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
42 44
43static struct tracer_opt trace_opts[] = { 45static struct tracer_opt trace_opts[] = {
44 /* Display overruns? (for self-debug purpose) */ 46 /* Display overruns? (for self-debug purpose) */
@@ -177,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
177 return ret; 179 return ret;
178} 180}
179 181
180static int __trace_graph_entry(struct trace_array *tr, 182int __trace_graph_entry(struct trace_array *tr,
181 struct ftrace_graph_ent *trace, 183 struct ftrace_graph_ent *trace,
182 unsigned long flags, 184 unsigned long flags,
183 int pc) 185 int pc)
@@ -187,7 +189,7 @@ static int __trace_graph_entry(struct trace_array *tr,
187 struct ring_buffer *buffer = tr->buffer; 189 struct ring_buffer *buffer = tr->buffer;
188 struct ftrace_graph_ent_entry *entry; 190 struct ftrace_graph_ent_entry *entry;
189 191
190 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 192 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
191 return 0; 193 return 0;
192 194
193 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, 195 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
@@ -212,13 +214,11 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
212 int cpu; 214 int cpu;
213 int pc; 215 int pc;
214 216
215 if (unlikely(!tr))
216 return 0;
217
218 if (!ftrace_trace_task(current)) 217 if (!ftrace_trace_task(current))
219 return 0; 218 return 0;
220 219
221 if (!ftrace_graph_addr(trace->func)) 220 /* trace it when it is-nested-in or is a function enabled. */
221 if (!(trace->depth || ftrace_graph_addr(trace->func)))
222 return 0; 222 return 0;
223 223
224 local_irq_save(flags); 224 local_irq_save(flags);
@@ -231,9 +231,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
231 } else { 231 } else {
232 ret = 0; 232 ret = 0;
233 } 233 }
234 /* Only do the atomic if it is not already set */
235 if (!test_tsk_trace_graph(current))
236 set_tsk_trace_graph(current);
237 234
238 atomic_dec(&data->disabled); 235 atomic_dec(&data->disabled);
239 local_irq_restore(flags); 236 local_irq_restore(flags);
@@ -241,7 +238,15 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
241 return ret; 238 return ret;
242} 239}
243 240
244static void __trace_graph_return(struct trace_array *tr, 241int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
242{
243 if (tracing_thresh)
244 return 1;
245 else
246 return trace_graph_entry(trace);
247}
248
249void __trace_graph_return(struct trace_array *tr,
245 struct ftrace_graph_ret *trace, 250 struct ftrace_graph_ret *trace,
246 unsigned long flags, 251 unsigned long flags,
247 int pc) 252 int pc)
@@ -251,7 +256,7 @@ static void __trace_graph_return(struct trace_array *tr,
251 struct ring_buffer *buffer = tr->buffer; 256 struct ring_buffer *buffer = tr->buffer;
252 struct ftrace_graph_ret_entry *entry; 257 struct ftrace_graph_ret_entry *entry;
253 258
254 if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) 259 if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
255 return; 260 return;
256 261
257 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, 262 event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
@@ -281,19 +286,39 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
281 pc = preempt_count(); 286 pc = preempt_count();
282 __trace_graph_return(tr, trace, flags, pc); 287 __trace_graph_return(tr, trace, flags, pc);
283 } 288 }
284 if (!trace->depth)
285 clear_tsk_trace_graph(current);
286 atomic_dec(&data->disabled); 289 atomic_dec(&data->disabled);
287 local_irq_restore(flags); 290 local_irq_restore(flags);
288} 291}
289 292
293void set_graph_array(struct trace_array *tr)
294{
295 graph_array = tr;
296
297 /* Make graph_array visible before we start tracing */
298
299 smp_mb();
300}
301
302void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
303{
304 if (tracing_thresh &&
305 (trace->rettime - trace->calltime < tracing_thresh))
306 return;
307 else
308 trace_graph_return(trace);
309}
310
290static int graph_trace_init(struct trace_array *tr) 311static int graph_trace_init(struct trace_array *tr)
291{ 312{
292 int ret; 313 int ret;
293 314
294 graph_array = tr; 315 set_graph_array(tr);
295 ret = register_ftrace_graph(&trace_graph_return, 316 if (tracing_thresh)
296 &trace_graph_entry); 317 ret = register_ftrace_graph(&trace_graph_thresh_return,
318 &trace_graph_thresh_entry);
319 else
320 ret = register_ftrace_graph(&trace_graph_return,
321 &trace_graph_entry);
297 if (ret) 322 if (ret)
298 return ret; 323 return ret;
299 tracing_start_cmdline_record(); 324 tracing_start_cmdline_record();
@@ -301,11 +326,6 @@ static int graph_trace_init(struct trace_array *tr)
301 return 0; 326 return 0;
302} 327}
303 328
304void set_graph_array(struct trace_array *tr)
305{
306 graph_array = tr;
307}
308
309static void graph_trace_reset(struct trace_array *tr) 329static void graph_trace_reset(struct trace_array *tr)
310{ 330{
311 tracing_stop_cmdline_record(); 331 tracing_stop_cmdline_record();
@@ -470,9 +490,10 @@ get_return_for_leaf(struct trace_iterator *iter,
470 * We need to consume the current entry to see 490 * We need to consume the current entry to see
471 * the next one. 491 * the next one.
472 */ 492 */
473 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL); 493 ring_buffer_consume(iter->tr->buffer, iter->cpu,
494 NULL, NULL);
474 event = ring_buffer_peek(iter->tr->buffer, iter->cpu, 495 event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
475 NULL); 496 NULL, NULL);
476 } 497 }
477 498
478 if (!event) 499 if (!event)
@@ -506,17 +527,18 @@ get_return_for_leaf(struct trace_iterator *iter,
506 527
507/* Signal a overhead of time execution to the output */ 528/* Signal a overhead of time execution to the output */
508static int 529static int
509print_graph_overhead(unsigned long long duration, struct trace_seq *s) 530print_graph_overhead(unsigned long long duration, struct trace_seq *s,
531 u32 flags)
510{ 532{
511 /* If duration disappear, we don't need anything */ 533 /* If duration disappear, we don't need anything */
512 if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)) 534 if (!(flags & TRACE_GRAPH_PRINT_DURATION))
513 return 1; 535 return 1;
514 536
515 /* Non nested entry or return */ 537 /* Non nested entry or return */
516 if (duration == -1) 538 if (duration == -1)
517 return trace_seq_printf(s, " "); 539 return trace_seq_printf(s, " ");
518 540
519 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) { 541 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
520 /* Duration exceeded 100 msecs */ 542 /* Duration exceeded 100 msecs */
521 if (duration > 100000ULL) 543 if (duration > 100000ULL)
522 return trace_seq_printf(s, "! "); 544 return trace_seq_printf(s, "! ");
@@ -542,7 +564,7 @@ static int print_graph_abs_time(u64 t, struct trace_seq *s)
542 564
543static enum print_line_t 565static enum print_line_t
544print_graph_irq(struct trace_iterator *iter, unsigned long addr, 566print_graph_irq(struct trace_iterator *iter, unsigned long addr,
545 enum trace_type type, int cpu, pid_t pid) 567 enum trace_type type, int cpu, pid_t pid, u32 flags)
546{ 568{
547 int ret; 569 int ret;
548 struct trace_seq *s = &iter->seq; 570 struct trace_seq *s = &iter->seq;
@@ -552,21 +574,21 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
552 return TRACE_TYPE_UNHANDLED; 574 return TRACE_TYPE_UNHANDLED;
553 575
554 /* Absolute time */ 576 /* Absolute time */
555 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 577 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
556 ret = print_graph_abs_time(iter->ts, s); 578 ret = print_graph_abs_time(iter->ts, s);
557 if (!ret) 579 if (!ret)
558 return TRACE_TYPE_PARTIAL_LINE; 580 return TRACE_TYPE_PARTIAL_LINE;
559 } 581 }
560 582
561 /* Cpu */ 583 /* Cpu */
562 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 584 if (flags & TRACE_GRAPH_PRINT_CPU) {
563 ret = print_graph_cpu(s, cpu); 585 ret = print_graph_cpu(s, cpu);
564 if (ret == TRACE_TYPE_PARTIAL_LINE) 586 if (ret == TRACE_TYPE_PARTIAL_LINE)
565 return TRACE_TYPE_PARTIAL_LINE; 587 return TRACE_TYPE_PARTIAL_LINE;
566 } 588 }
567 589
568 /* Proc */ 590 /* Proc */
569 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 591 if (flags & TRACE_GRAPH_PRINT_PROC) {
570 ret = print_graph_proc(s, pid); 592 ret = print_graph_proc(s, pid);
571 if (ret == TRACE_TYPE_PARTIAL_LINE) 593 if (ret == TRACE_TYPE_PARTIAL_LINE)
572 return TRACE_TYPE_PARTIAL_LINE; 594 return TRACE_TYPE_PARTIAL_LINE;
@@ -576,7 +598,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
576 } 598 }
577 599
578 /* No overhead */ 600 /* No overhead */
579 ret = print_graph_overhead(-1, s); 601 ret = print_graph_overhead(-1, s, flags);
580 if (!ret) 602 if (!ret)
581 return TRACE_TYPE_PARTIAL_LINE; 603 return TRACE_TYPE_PARTIAL_LINE;
582 604
@@ -589,7 +611,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
589 return TRACE_TYPE_PARTIAL_LINE; 611 return TRACE_TYPE_PARTIAL_LINE;
590 612
591 /* Don't close the duration column if haven't one */ 613 /* Don't close the duration column if haven't one */
592 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 614 if (flags & TRACE_GRAPH_PRINT_DURATION)
593 trace_seq_printf(s, " |"); 615 trace_seq_printf(s, " |");
594 ret = trace_seq_printf(s, "\n"); 616 ret = trace_seq_printf(s, "\n");
595 617
@@ -659,7 +681,8 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
659static enum print_line_t 681static enum print_line_t
660print_graph_entry_leaf(struct trace_iterator *iter, 682print_graph_entry_leaf(struct trace_iterator *iter,
661 struct ftrace_graph_ent_entry *entry, 683 struct ftrace_graph_ent_entry *entry,
662 struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s) 684 struct ftrace_graph_ret_entry *ret_entry,
685 struct trace_seq *s, u32 flags)
663{ 686{
664 struct fgraph_data *data = iter->private; 687 struct fgraph_data *data = iter->private;
665 struct ftrace_graph_ret *graph_ret; 688 struct ftrace_graph_ret *graph_ret;
@@ -673,24 +696,30 @@ print_graph_entry_leaf(struct trace_iterator *iter,
673 duration = graph_ret->rettime - graph_ret->calltime; 696 duration = graph_ret->rettime - graph_ret->calltime;
674 697
675 if (data) { 698 if (data) {
699 struct fgraph_cpu_data *cpu_data;
676 int cpu = iter->cpu; 700 int cpu = iter->cpu;
677 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 701
702 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
678 703
679 /* 704 /*
680 * Comments display at + 1 to depth. Since 705 * Comments display at + 1 to depth. Since
681 * this is a leaf function, keep the comments 706 * this is a leaf function, keep the comments
682 * equal to this depth. 707 * equal to this depth.
683 */ 708 */
684 *depth = call->depth - 1; 709 cpu_data->depth = call->depth - 1;
710
711 /* No need to keep this function around for this depth */
712 if (call->depth < FTRACE_RETFUNC_DEPTH)
713 cpu_data->enter_funcs[call->depth] = 0;
685 } 714 }
686 715
687 /* Overhead */ 716 /* Overhead */
688 ret = print_graph_overhead(duration, s); 717 ret = print_graph_overhead(duration, s, flags);
689 if (!ret) 718 if (!ret)
690 return TRACE_TYPE_PARTIAL_LINE; 719 return TRACE_TYPE_PARTIAL_LINE;
691 720
692 /* Duration */ 721 /* Duration */
693 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 722 if (flags & TRACE_GRAPH_PRINT_DURATION) {
694 ret = print_graph_duration(duration, s); 723 ret = print_graph_duration(duration, s);
695 if (ret == TRACE_TYPE_PARTIAL_LINE) 724 if (ret == TRACE_TYPE_PARTIAL_LINE)
696 return TRACE_TYPE_PARTIAL_LINE; 725 return TRACE_TYPE_PARTIAL_LINE;
@@ -713,7 +742,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
713static enum print_line_t 742static enum print_line_t
714print_graph_entry_nested(struct trace_iterator *iter, 743print_graph_entry_nested(struct trace_iterator *iter,
715 struct ftrace_graph_ent_entry *entry, 744 struct ftrace_graph_ent_entry *entry,
716 struct trace_seq *s, int cpu) 745 struct trace_seq *s, int cpu, u32 flags)
717{ 746{
718 struct ftrace_graph_ent *call = &entry->graph_ent; 747 struct ftrace_graph_ent *call = &entry->graph_ent;
719 struct fgraph_data *data = iter->private; 748 struct fgraph_data *data = iter->private;
@@ -721,19 +750,24 @@ print_graph_entry_nested(struct trace_iterator *iter,
721 int i; 750 int i;
722 751
723 if (data) { 752 if (data) {
753 struct fgraph_cpu_data *cpu_data;
724 int cpu = iter->cpu; 754 int cpu = iter->cpu;
725 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth);
726 755
727 *depth = call->depth; 756 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
757 cpu_data->depth = call->depth;
758
759 /* Save this function pointer to see if the exit matches */
760 if (call->depth < FTRACE_RETFUNC_DEPTH)
761 cpu_data->enter_funcs[call->depth] = call->func;
728 } 762 }
729 763
730 /* No overhead */ 764 /* No overhead */
731 ret = print_graph_overhead(-1, s); 765 ret = print_graph_overhead(-1, s, flags);
732 if (!ret) 766 if (!ret)
733 return TRACE_TYPE_PARTIAL_LINE; 767 return TRACE_TYPE_PARTIAL_LINE;
734 768
735 /* No time */ 769 /* No time */
736 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 770 if (flags & TRACE_GRAPH_PRINT_DURATION) {
737 ret = trace_seq_printf(s, " | "); 771 ret = trace_seq_printf(s, " | ");
738 if (!ret) 772 if (!ret)
739 return TRACE_TYPE_PARTIAL_LINE; 773 return TRACE_TYPE_PARTIAL_LINE;
@@ -759,7 +793,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
759 793
760static enum print_line_t 794static enum print_line_t
761print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, 795print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
762 int type, unsigned long addr) 796 int type, unsigned long addr, u32 flags)
763{ 797{
764 struct fgraph_data *data = iter->private; 798 struct fgraph_data *data = iter->private;
765 struct trace_entry *ent = iter->ent; 799 struct trace_entry *ent = iter->ent;
@@ -772,27 +806,27 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
772 806
773 if (type) { 807 if (type) {
774 /* Interrupt */ 808 /* Interrupt */
775 ret = print_graph_irq(iter, addr, type, cpu, ent->pid); 809 ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
776 if (ret == TRACE_TYPE_PARTIAL_LINE) 810 if (ret == TRACE_TYPE_PARTIAL_LINE)
777 return TRACE_TYPE_PARTIAL_LINE; 811 return TRACE_TYPE_PARTIAL_LINE;
778 } 812 }
779 813
780 /* Absolute time */ 814 /* Absolute time */
781 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) { 815 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
782 ret = print_graph_abs_time(iter->ts, s); 816 ret = print_graph_abs_time(iter->ts, s);
783 if (!ret) 817 if (!ret)
784 return TRACE_TYPE_PARTIAL_LINE; 818 return TRACE_TYPE_PARTIAL_LINE;
785 } 819 }
786 820
787 /* Cpu */ 821 /* Cpu */
788 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) { 822 if (flags & TRACE_GRAPH_PRINT_CPU) {
789 ret = print_graph_cpu(s, cpu); 823 ret = print_graph_cpu(s, cpu);
790 if (ret == TRACE_TYPE_PARTIAL_LINE) 824 if (ret == TRACE_TYPE_PARTIAL_LINE)
791 return TRACE_TYPE_PARTIAL_LINE; 825 return TRACE_TYPE_PARTIAL_LINE;
792 } 826 }
793 827
794 /* Proc */ 828 /* Proc */
795 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { 829 if (flags & TRACE_GRAPH_PRINT_PROC) {
796 ret = print_graph_proc(s, ent->pid); 830 ret = print_graph_proc(s, ent->pid);
797 if (ret == TRACE_TYPE_PARTIAL_LINE) 831 if (ret == TRACE_TYPE_PARTIAL_LINE)
798 return TRACE_TYPE_PARTIAL_LINE; 832 return TRACE_TYPE_PARTIAL_LINE;
@@ -814,7 +848,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
814 848
815static enum print_line_t 849static enum print_line_t
816print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, 850print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
817 struct trace_iterator *iter) 851 struct trace_iterator *iter, u32 flags)
818{ 852{
819 struct fgraph_data *data = iter->private; 853 struct fgraph_data *data = iter->private;
820 struct ftrace_graph_ent *call = &field->graph_ent; 854 struct ftrace_graph_ent *call = &field->graph_ent;
@@ -822,14 +856,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
822 static enum print_line_t ret; 856 static enum print_line_t ret;
823 int cpu = iter->cpu; 857 int cpu = iter->cpu;
824 858
825 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func)) 859 if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags))
826 return TRACE_TYPE_PARTIAL_LINE; 860 return TRACE_TYPE_PARTIAL_LINE;
827 861
828 leaf_ret = get_return_for_leaf(iter, field); 862 leaf_ret = get_return_for_leaf(iter, field);
829 if (leaf_ret) 863 if (leaf_ret)
830 ret = print_graph_entry_leaf(iter, field, leaf_ret, s); 864 ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
831 else 865 else
832 ret = print_graph_entry_nested(iter, field, s, cpu); 866 ret = print_graph_entry_nested(iter, field, s, cpu, flags);
833 867
834 if (data) { 868 if (data) {
835 /* 869 /*
@@ -848,37 +882,47 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
848 882
849static enum print_line_t 883static enum print_line_t
850print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, 884print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
851 struct trace_entry *ent, struct trace_iterator *iter) 885 struct trace_entry *ent, struct trace_iterator *iter,
886 u32 flags)
852{ 887{
853 unsigned long long duration = trace->rettime - trace->calltime; 888 unsigned long long duration = trace->rettime - trace->calltime;
854 struct fgraph_data *data = iter->private; 889 struct fgraph_data *data = iter->private;
855 pid_t pid = ent->pid; 890 pid_t pid = ent->pid;
856 int cpu = iter->cpu; 891 int cpu = iter->cpu;
892 int func_match = 1;
857 int ret; 893 int ret;
858 int i; 894 int i;
859 895
860 if (data) { 896 if (data) {
897 struct fgraph_cpu_data *cpu_data;
861 int cpu = iter->cpu; 898 int cpu = iter->cpu;
862 int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); 899
900 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
863 901
864 /* 902 /*
865 * Comments display at + 1 to depth. This is the 903 * Comments display at + 1 to depth. This is the
866 * return from a function, we now want the comments 904 * return from a function, we now want the comments
867 * to display at the same level of the bracket. 905 * to display at the same level of the bracket.
868 */ 906 */
869 *depth = trace->depth - 1; 907 cpu_data->depth = trace->depth - 1;
908
909 if (trace->depth < FTRACE_RETFUNC_DEPTH) {
910 if (cpu_data->enter_funcs[trace->depth] != trace->func)
911 func_match = 0;
912 cpu_data->enter_funcs[trace->depth] = 0;
913 }
870 } 914 }
871 915
872 if (print_graph_prologue(iter, s, 0, 0)) 916 if (print_graph_prologue(iter, s, 0, 0, flags))
873 return TRACE_TYPE_PARTIAL_LINE; 917 return TRACE_TYPE_PARTIAL_LINE;
874 918
875 /* Overhead */ 919 /* Overhead */
876 ret = print_graph_overhead(duration, s); 920 ret = print_graph_overhead(duration, s, flags);
877 if (!ret) 921 if (!ret)
878 return TRACE_TYPE_PARTIAL_LINE; 922 return TRACE_TYPE_PARTIAL_LINE;
879 923
880 /* Duration */ 924 /* Duration */
881 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 925 if (flags & TRACE_GRAPH_PRINT_DURATION) {
882 ret = print_graph_duration(duration, s); 926 ret = print_graph_duration(duration, s);
883 if (ret == TRACE_TYPE_PARTIAL_LINE) 927 if (ret == TRACE_TYPE_PARTIAL_LINE)
884 return TRACE_TYPE_PARTIAL_LINE; 928 return TRACE_TYPE_PARTIAL_LINE;
@@ -891,19 +935,32 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
891 return TRACE_TYPE_PARTIAL_LINE; 935 return TRACE_TYPE_PARTIAL_LINE;
892 } 936 }
893 937
894 ret = trace_seq_printf(s, "}\n"); 938 /*
895 if (!ret) 939 * If the return function does not have a matching entry,
896 return TRACE_TYPE_PARTIAL_LINE; 940 * then the entry was lost. Instead of just printing
941 * the '}' and letting the user guess what function this
942 * belongs to, write out the function name.
943 */
944 if (func_match) {
945 ret = trace_seq_printf(s, "}\n");
946 if (!ret)
947 return TRACE_TYPE_PARTIAL_LINE;
948 } else {
949 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
950 if (!ret)
951 return TRACE_TYPE_PARTIAL_LINE;
952 }
897 953
898 /* Overrun */ 954 /* Overrun */
899 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { 955 if (flags & TRACE_GRAPH_PRINT_OVERRUN) {
900 ret = trace_seq_printf(s, " (Overruns: %lu)\n", 956 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
901 trace->overrun); 957 trace->overrun);
902 if (!ret) 958 if (!ret)
903 return TRACE_TYPE_PARTIAL_LINE; 959 return TRACE_TYPE_PARTIAL_LINE;
904 } 960 }
905 961
906 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid); 962 ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET,
963 cpu, pid, flags);
907 if (ret == TRACE_TYPE_PARTIAL_LINE) 964 if (ret == TRACE_TYPE_PARTIAL_LINE)
908 return TRACE_TYPE_PARTIAL_LINE; 965 return TRACE_TYPE_PARTIAL_LINE;
909 966
@@ -911,8 +968,8 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
911} 968}
912 969
913static enum print_line_t 970static enum print_line_t
914print_graph_comment(struct trace_seq *s, struct trace_entry *ent, 971print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
915 struct trace_iterator *iter) 972 struct trace_iterator *iter, u32 flags)
916{ 973{
917 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 974 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
918 struct fgraph_data *data = iter->private; 975 struct fgraph_data *data = iter->private;
@@ -924,16 +981,16 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
924 if (data) 981 if (data)
925 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; 982 depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth;
926 983
927 if (print_graph_prologue(iter, s, 0, 0)) 984 if (print_graph_prologue(iter, s, 0, 0, flags))
928 return TRACE_TYPE_PARTIAL_LINE; 985 return TRACE_TYPE_PARTIAL_LINE;
929 986
930 /* No overhead */ 987 /* No overhead */
931 ret = print_graph_overhead(-1, s); 988 ret = print_graph_overhead(-1, s, flags);
932 if (!ret) 989 if (!ret)
933 return TRACE_TYPE_PARTIAL_LINE; 990 return TRACE_TYPE_PARTIAL_LINE;
934 991
935 /* No time */ 992 /* No time */
936 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) { 993 if (flags & TRACE_GRAPH_PRINT_DURATION) {
937 ret = trace_seq_printf(s, " | "); 994 ret = trace_seq_printf(s, " | ");
938 if (!ret) 995 if (!ret)
939 return TRACE_TYPE_PARTIAL_LINE; 996 return TRACE_TYPE_PARTIAL_LINE;
@@ -968,7 +1025,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
968 if (!event) 1025 if (!event)
969 return TRACE_TYPE_UNHANDLED; 1026 return TRACE_TYPE_UNHANDLED;
970 1027
971 ret = event->trace(iter, sym_flags); 1028 ret = event->funcs->trace(iter, sym_flags, event);
972 if (ret != TRACE_TYPE_HANDLED) 1029 if (ret != TRACE_TYPE_HANDLED)
973 return ret; 1030 return ret;
974 } 1031 }
@@ -988,7 +1045,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
988 1045
989 1046
990enum print_line_t 1047enum print_line_t
991print_graph_function(struct trace_iterator *iter) 1048print_graph_function_flags(struct trace_iterator *iter, u32 flags)
992{ 1049{
993 struct ftrace_graph_ent_entry *field; 1050 struct ftrace_graph_ent_entry *field;
994 struct fgraph_data *data = iter->private; 1051 struct fgraph_data *data = iter->private;
@@ -1009,7 +1066,7 @@ print_graph_function(struct trace_iterator *iter)
1009 if (data && data->failed) { 1066 if (data && data->failed) {
1010 field = &data->ent; 1067 field = &data->ent;
1011 iter->cpu = data->cpu; 1068 iter->cpu = data->cpu;
1012 ret = print_graph_entry(field, s, iter); 1069 ret = print_graph_entry(field, s, iter, flags);
1013 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) { 1070 if (ret == TRACE_TYPE_HANDLED && iter->cpu != cpu) {
1014 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1; 1071 per_cpu_ptr(data->cpu_data, iter->cpu)->ignore = 1;
1015 ret = TRACE_TYPE_NO_CONSUME; 1072 ret = TRACE_TYPE_NO_CONSUME;
@@ -1029,32 +1086,50 @@ print_graph_function(struct trace_iterator *iter)
1029 struct ftrace_graph_ent_entry saved; 1086 struct ftrace_graph_ent_entry saved;
1030 trace_assign_type(field, entry); 1087 trace_assign_type(field, entry);
1031 saved = *field; 1088 saved = *field;
1032 return print_graph_entry(&saved, s, iter); 1089 return print_graph_entry(&saved, s, iter, flags);
1033 } 1090 }
1034 case TRACE_GRAPH_RET: { 1091 case TRACE_GRAPH_RET: {
1035 struct ftrace_graph_ret_entry *field; 1092 struct ftrace_graph_ret_entry *field;
1036 trace_assign_type(field, entry); 1093 trace_assign_type(field, entry);
1037 return print_graph_return(&field->ret, s, entry, iter); 1094 return print_graph_return(&field->ret, s, entry, iter, flags);
1038 } 1095 }
1096 case TRACE_STACK:
1097 case TRACE_FN:
1098 /* dont trace stack and functions as comments */
1099 return TRACE_TYPE_UNHANDLED;
1100
1039 default: 1101 default:
1040 return print_graph_comment(s, entry, iter); 1102 return print_graph_comment(s, entry, iter, flags);
1041 } 1103 }
1042 1104
1043 return TRACE_TYPE_HANDLED; 1105 return TRACE_TYPE_HANDLED;
1044} 1106}
1045 1107
1046static void print_lat_header(struct seq_file *s) 1108static enum print_line_t
1109print_graph_function(struct trace_iterator *iter)
1110{
1111 return print_graph_function_flags(iter, tracer_flags.val);
1112}
1113
1114static enum print_line_t
1115print_graph_function_event(struct trace_iterator *iter, int flags,
1116 struct trace_event *event)
1117{
1118 return print_graph_function(iter);
1119}
1120
1121static void print_lat_header(struct seq_file *s, u32 flags)
1047{ 1122{
1048 static const char spaces[] = " " /* 16 spaces */ 1123 static const char spaces[] = " " /* 16 spaces */
1049 " " /* 4 spaces */ 1124 " " /* 4 spaces */
1050 " "; /* 17 spaces */ 1125 " "; /* 17 spaces */
1051 int size = 0; 1126 int size = 0;
1052 1127
1053 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1128 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1054 size += 16; 1129 size += 16;
1055 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1130 if (flags & TRACE_GRAPH_PRINT_CPU)
1056 size += 4; 1131 size += 4;
1057 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1132 if (flags & TRACE_GRAPH_PRINT_PROC)
1058 size += 17; 1133 size += 17;
1059 1134
1060 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces); 1135 seq_printf(s, "#%.*s _-----=> irqs-off \n", size, spaces);
@@ -1065,43 +1140,48 @@ static void print_lat_header(struct seq_file *s)
1065 seq_printf(s, "#%.*s|||| / \n", size, spaces); 1140 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1066} 1141}
1067 1142
1068static void print_graph_headers(struct seq_file *s) 1143void print_graph_headers_flags(struct seq_file *s, u32 flags)
1069{ 1144{
1070 int lat = trace_flags & TRACE_ITER_LATENCY_FMT; 1145 int lat = trace_flags & TRACE_ITER_LATENCY_FMT;
1071 1146
1072 if (lat) 1147 if (lat)
1073 print_lat_header(s); 1148 print_lat_header(s, flags);
1074 1149
1075 /* 1st line */ 1150 /* 1st line */
1076 seq_printf(s, "#"); 1151 seq_printf(s, "#");
1077 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1152 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1078 seq_printf(s, " TIME "); 1153 seq_printf(s, " TIME ");
1079 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1154 if (flags & TRACE_GRAPH_PRINT_CPU)
1080 seq_printf(s, " CPU"); 1155 seq_printf(s, " CPU");
1081 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1156 if (flags & TRACE_GRAPH_PRINT_PROC)
1082 seq_printf(s, " TASK/PID "); 1157 seq_printf(s, " TASK/PID ");
1083 if (lat) 1158 if (lat)
1084 seq_printf(s, "|||||"); 1159 seq_printf(s, "|||||");
1085 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1160 if (flags & TRACE_GRAPH_PRINT_DURATION)
1086 seq_printf(s, " DURATION "); 1161 seq_printf(s, " DURATION ");
1087 seq_printf(s, " FUNCTION CALLS\n"); 1162 seq_printf(s, " FUNCTION CALLS\n");
1088 1163
1089 /* 2nd line */ 1164 /* 2nd line */
1090 seq_printf(s, "#"); 1165 seq_printf(s, "#");
1091 if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) 1166 if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
1092 seq_printf(s, " | "); 1167 seq_printf(s, " | ");
1093 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) 1168 if (flags & TRACE_GRAPH_PRINT_CPU)
1094 seq_printf(s, " | "); 1169 seq_printf(s, " | ");
1095 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) 1170 if (flags & TRACE_GRAPH_PRINT_PROC)
1096 seq_printf(s, " | | "); 1171 seq_printf(s, " | | ");
1097 if (lat) 1172 if (lat)
1098 seq_printf(s, "|||||"); 1173 seq_printf(s, "|||||");
1099 if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) 1174 if (flags & TRACE_GRAPH_PRINT_DURATION)
1100 seq_printf(s, " | | "); 1175 seq_printf(s, " | | ");
1101 seq_printf(s, " | | | |\n"); 1176 seq_printf(s, " | | | |\n");
1102} 1177}
1103 1178
1104static void graph_trace_open(struct trace_iterator *iter) 1179void print_graph_headers(struct seq_file *s)
1180{
1181 print_graph_headers_flags(s, tracer_flags.val);
1182}
1183
1184void graph_trace_open(struct trace_iterator *iter)
1105{ 1185{
1106 /* pid and depth on the last trace processed */ 1186 /* pid and depth on the last trace processed */
1107 struct fgraph_data *data; 1187 struct fgraph_data *data;
@@ -1136,7 +1216,7 @@ static void graph_trace_open(struct trace_iterator *iter)
1136 pr_warning("function graph tracer: not enough memory\n"); 1216 pr_warning("function graph tracer: not enough memory\n");
1137} 1217}
1138 1218
1139static void graph_trace_close(struct trace_iterator *iter) 1219void graph_trace_close(struct trace_iterator *iter)
1140{ 1220{
1141 struct fgraph_data *data = iter->private; 1221 struct fgraph_data *data = iter->private;
1142 1222
@@ -1146,6 +1226,20 @@ static void graph_trace_close(struct trace_iterator *iter)
1146 } 1226 }
1147} 1227}
1148 1228
1229static struct trace_event_functions graph_functions = {
1230 .trace = print_graph_function_event,
1231};
1232
1233static struct trace_event graph_trace_entry_event = {
1234 .type = TRACE_GRAPH_ENT,
1235 .funcs = &graph_functions,
1236};
1237
1238static struct trace_event graph_trace_ret_event = {
1239 .type = TRACE_GRAPH_RET,
1240 .funcs = &graph_functions
1241};
1242
1149static struct tracer graph_trace __read_mostly = { 1243static struct tracer graph_trace __read_mostly = {
1150 .name = "function_graph", 1244 .name = "function_graph",
1151 .open = graph_trace_open, 1245 .open = graph_trace_open,
@@ -1167,6 +1261,16 @@ static __init int init_graph_trace(void)
1167{ 1261{
1168 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); 1262 max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
1169 1263
1264 if (!register_ftrace_event(&graph_trace_entry_event)) {
1265 pr_warning("Warning: could not register graph trace events\n");
1266 return 1;
1267 }
1268
1269 if (!register_ftrace_event(&graph_trace_ret_event)) {
1270 pr_warning("Warning: could not register graph trace events\n");
1271 return 1;
1272 }
1273
1170 return register_tracer(&graph_trace); 1274 return register_tracer(&graph_trace);
1171} 1275}
1172 1276
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index 7b97000745f5..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * h/w branch tracer for x86 based on BTS
3 *
4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */
7#include <linux/kallsyms.h>
8#include <linux/debugfs.h>
9#include <linux/ftrace.h>
10#include <linux/module.h>
11#include <linux/cpu.h>
12#include <linux/smp.h>
13#include <linux/fs.h>
14
15#include <asm/ds.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20
21#define BTS_BUFFER_SIZE (1 << 13)
22
23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25
26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27
28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
30static struct trace_array *hw_branch_trace __read_mostly;
31
32
33static void bts_trace_init_cpu(int cpu)
34{
35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
39
40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
41 per_cpu(hwb_tracer, cpu) = NULL;
42}
43
44static int bts_trace_init(struct trace_array *tr)
45{
46 int cpu;
47
48 hw_branch_trace = tr;
49 trace_hw_branches_enabled = 0;
50
51 get_online_cpus();
52 for_each_online_cpu(cpu) {
53 bts_trace_init_cpu(cpu);
54
55 if (likely(per_cpu(hwb_tracer, cpu)))
56 trace_hw_branches_enabled = 1;
57 }
58 trace_hw_branches_suspended = 0;
59 put_online_cpus();
60
61 /* If we could not enable tracing on a single cpu, we fail. */
62 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
63}
64
65static void bts_trace_reset(struct trace_array *tr)
66{
67 int cpu;
68
69 get_online_cpus();
70 for_each_online_cpu(cpu) {
71 if (likely(per_cpu(hwb_tracer, cpu))) {
72 ds_release_bts(per_cpu(hwb_tracer, cpu));
73 per_cpu(hwb_tracer, cpu) = NULL;
74 }
75 }
76 trace_hw_branches_enabled = 0;
77 trace_hw_branches_suspended = 0;
78 put_online_cpus();
79}
80
81static void bts_trace_start(struct trace_array *tr)
82{
83 int cpu;
84
85 get_online_cpus();
86 for_each_online_cpu(cpu)
87 if (likely(per_cpu(hwb_tracer, cpu)))
88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
89 trace_hw_branches_suspended = 0;
90 put_online_cpus();
91}
92
93static void bts_trace_stop(struct trace_array *tr)
94{
95 int cpu;
96
97 get_online_cpus();
98 for_each_online_cpu(cpu)
99 if (likely(per_cpu(hwb_tracer, cpu)))
100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
101 trace_hw_branches_suspended = 1;
102 put_online_cpus();
103}
104
105static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
106 unsigned long action, void *hcpu)
107{
108 int cpu = (long)hcpu;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 /* The notification is sent with interrupts enabled. */
114 if (trace_hw_branches_enabled) {
115 bts_trace_init_cpu(cpu);
116
117 if (trace_hw_branches_suspended &&
118 likely(per_cpu(hwb_tracer, cpu)))
119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
120 }
121 break;
122
123 case CPU_DOWN_PREPARE:
124 /* The notification is sent with interrupts enabled. */
125 if (likely(per_cpu(hwb_tracer, cpu))) {
126 ds_release_bts(per_cpu(hwb_tracer, cpu));
127 per_cpu(hwb_tracer, cpu) = NULL;
128 }
129 }
130
131 return NOTIFY_DONE;
132}
133
134static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
135 .notifier_call = bts_hotcpu_handler
136};
137
138static void bts_trace_print_header(struct seq_file *m)
139{
140 seq_puts(m, "# CPU# TO <- FROM\n");
141}
142
143static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
144{
145 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
146 struct trace_entry *entry = iter->ent;
147 struct trace_seq *seq = &iter->seq;
148 struct hw_branch_entry *it;
149
150 trace_assign_type(it, entry);
151
152 if (entry->type == TRACE_HW_BRANCHES) {
153 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
154 seq_print_ip_sym(seq, it->to, symflags) &&
155 trace_seq_printf(seq, "\t <- ") &&
156 seq_print_ip_sym(seq, it->from, symflags) &&
157 trace_seq_printf(seq, "\n"))
158 return TRACE_TYPE_HANDLED;
159 return TRACE_TYPE_PARTIAL_LINE;
160 }
161 return TRACE_TYPE_UNHANDLED;
162}
163
164void trace_hw_branch(u64 from, u64 to)
165{
166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
170 struct hw_branch_entry *entry;
171 unsigned long irq1;
172 int cpu;
173
174 if (unlikely(!tr))
175 return;
176
177 if (unlikely(!trace_hw_branches_enabled))
178 return;
179
180 local_irq_save(irq1);
181 cpu = raw_smp_processor_id();
182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
183 goto out;
184
185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
187 sizeof(*entry), 0, 0);
188 if (!event)
189 goto out;
190 entry = ring_buffer_event_data(event);
191 tracing_generic_entry_update(&entry->ent, 0, from);
192 entry->ent.type = TRACE_HW_BRANCHES;
193 entry->from = from;
194 entry->to = to;
195 if (!filter_check_discard(call, entry, buf, event))
196 trace_buffer_unlock_commit(buf, event, 0, 0);
197
198 out:
199 atomic_dec(&tr->data[cpu]->disabled);
200 local_irq_restore(irq1);
201}
202
203static void trace_bts_at(const struct bts_trace *trace, void *at)
204{
205 struct bts_struct bts;
206 int err = 0;
207
208 WARN_ON_ONCE(!trace->read);
209 if (!trace->read)
210 return;
211
212 err = trace->read(this_tracer, at, &bts);
213 if (err < 0)
214 return;
215
216 switch (bts.qualifier) {
217 case BTS_BRANCH:
218 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
219 break;
220 }
221}
222
223/*
224 * Collect the trace on the current cpu and write it into the ftrace buffer.
225 *
226 * pre: tracing must be suspended on the current cpu
227 */
228static void trace_bts_cpu(void *arg)
229{
230 struct trace_array *tr = (struct trace_array *)arg;
231 const struct bts_trace *trace;
232 unsigned char *at;
233
234 if (unlikely(!tr))
235 return;
236
237 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
238 return;
239
240 if (unlikely(!this_tracer))
241 return;
242
243 trace = ds_read_bts(this_tracer);
244 if (!trace)
245 return;
246
247 for (at = trace->ds.top; (void *)at < trace->ds.end;
248 at += trace->ds.size)
249 trace_bts_at(trace, at);
250
251 for (at = trace->ds.begin; (void *)at < trace->ds.top;
252 at += trace->ds.size)
253 trace_bts_at(trace, at);
254}
255
256static void trace_bts_prepare(struct trace_iterator *iter)
257{
258 int cpu;
259
260 get_online_cpus();
261 for_each_online_cpu(cpu)
262 if (likely(per_cpu(hwb_tracer, cpu)))
263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
264 /*
265 * We need to collect the trace on the respective cpu since ftrace
266 * implicitly adds the record for the current cpu.
267 * Once that is more flexible, we could collect the data from any cpu.
268 */
269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
270
271 for_each_online_cpu(cpu)
272 if (likely(per_cpu(hwb_tracer, cpu)))
273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
274 put_online_cpus();
275}
276
277static void trace_bts_close(struct trace_iterator *iter)
278{
279 tracing_reset_online_cpus(iter->tr);
280}
281
282void trace_hw_branch_oops(void)
283{
284 if (this_tracer) {
285 ds_suspend_bts_noirq(this_tracer);
286 trace_bts_cpu(hw_branch_trace);
287 ds_resume_bts_noirq(this_tracer);
288 }
289}
290
291struct tracer bts_tracer __read_mostly =
292{
293 .name = "hw-branch-tracer",
294 .init = bts_trace_init,
295 .reset = bts_trace_reset,
296 .print_header = bts_trace_print_header,
297 .print_line = bts_trace_print_line,
298 .start = bts_trace_start,
299 .stop = bts_trace_stop,
300 .open = trace_bts_prepare,
301 .close = trace_bts_close,
302#ifdef CONFIG_FTRACE_SELFTEST
303 .selftest = trace_selftest_startup_hw_branches,
304#endif /* CONFIG_FTRACE_SELFTEST */
305};
306
307__init static int init_bts_trace(void)
308{
309 register_hotcpu_notifier(&bts_hotcpu_notifier);
310 return register_tracer(&bts_tracer);
311}
312device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2974bc7538c7..6fd486e0cef4 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -34,6 +34,9 @@ static int trace_type __read_mostly;
34 34
35static int save_lat_flag; 35static int save_lat_flag;
36 36
37static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
38static int start_irqsoff_tracer(struct trace_array *tr, int graph);
39
37#ifdef CONFIG_PREEMPT_TRACER 40#ifdef CONFIG_PREEMPT_TRACER
38static inline int 41static inline int
39preempt_trace(void) 42preempt_trace(void)
@@ -55,6 +58,23 @@ irq_trace(void)
55# define irq_trace() (0) 58# define irq_trace() (0)
56#endif 59#endif
57 60
61#define TRACE_DISPLAY_GRAPH 1
62
63static struct tracer_opt trace_opts[] = {
64#ifdef CONFIG_FUNCTION_GRAPH_TRACER
65 /* display latency trace as call graph */
66 { TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
67#endif
68 { } /* Empty entry */
69};
70
71static struct tracer_flags tracer_flags = {
72 .val = 0,
73 .opts = trace_opts,
74};
75
76#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
77
58/* 78/*
59 * Sequence count - we record it when starting a measurement and 79 * Sequence count - we record it when starting a measurement and
60 * skip the latency if the sequence has changed - some other section 80 * skip the latency if the sequence has changed - some other section
@@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly =
108}; 128};
109#endif /* CONFIG_FUNCTION_TRACER */ 129#endif /* CONFIG_FUNCTION_TRACER */
110 130
131#ifdef CONFIG_FUNCTION_GRAPH_TRACER
132static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
133{
134 int cpu;
135
136 if (!(bit & TRACE_DISPLAY_GRAPH))
137 return -EINVAL;
138
139 if (!(is_graph() ^ set))
140 return 0;
141
142 stop_irqsoff_tracer(irqsoff_trace, !set);
143
144 for_each_possible_cpu(cpu)
145 per_cpu(tracing_cpu, cpu) = 0;
146
147 tracing_max_latency = 0;
148 tracing_reset_online_cpus(irqsoff_trace);
149
150 return start_irqsoff_tracer(irqsoff_trace, set);
151}
152
153static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
154{
155 struct trace_array *tr = irqsoff_trace;
156 struct trace_array_cpu *data;
157 unsigned long flags;
158 long disabled;
159 int ret;
160 int cpu;
161 int pc;
162
163 cpu = raw_smp_processor_id();
164 if (likely(!per_cpu(tracing_cpu, cpu)))
165 return 0;
166
167 local_save_flags(flags);
168 /* slight chance to get a false positive on tracing_cpu */
169 if (!irqs_disabled_flags(flags))
170 return 0;
171
172 data = tr->data[cpu];
173 disabled = atomic_inc_return(&data->disabled);
174
175 if (likely(disabled == 1)) {
176 pc = preempt_count();
177 ret = __trace_graph_entry(tr, trace, flags, pc);
178 } else
179 ret = 0;
180
181 atomic_dec(&data->disabled);
182 return ret;
183}
184
185static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
186{
187 struct trace_array *tr = irqsoff_trace;
188 struct trace_array_cpu *data;
189 unsigned long flags;
190 long disabled;
191 int cpu;
192 int pc;
193
194 cpu = raw_smp_processor_id();
195 if (likely(!per_cpu(tracing_cpu, cpu)))
196 return;
197
198 local_save_flags(flags);
199 /* slight chance to get a false positive on tracing_cpu */
200 if (!irqs_disabled_flags(flags))
201 return;
202
203 data = tr->data[cpu];
204 disabled = atomic_inc_return(&data->disabled);
205
206 if (likely(disabled == 1)) {
207 pc = preempt_count();
208 __trace_graph_return(tr, trace, flags, pc);
209 }
210
211 atomic_dec(&data->disabled);
212}
213
214static void irqsoff_trace_open(struct trace_iterator *iter)
215{
216 if (is_graph())
217 graph_trace_open(iter);
218
219}
220
221static void irqsoff_trace_close(struct trace_iterator *iter)
222{
223 if (iter->private)
224 graph_trace_close(iter);
225}
226
227#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
228 TRACE_GRAPH_PRINT_PROC)
229
230static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
231{
232 u32 flags = GRAPH_TRACER_FLAGS;
233
234 if (trace_flags & TRACE_ITER_LATENCY_FMT)
235 flags |= TRACE_GRAPH_PRINT_DURATION;
236 else
237 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
238
239 /*
240 * In graph mode call the graph tracer output function,
241 * otherwise go with the TRACE_FN event handler
242 */
243 if (is_graph())
244 return print_graph_function_flags(iter, flags);
245
246 return TRACE_TYPE_UNHANDLED;
247}
248
249static void irqsoff_print_header(struct seq_file *s)
250{
251 if (is_graph()) {
252 struct trace_iterator *iter = s->private;
253 u32 flags = GRAPH_TRACER_FLAGS;
254
255 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
256 /* print nothing if the buffers are empty */
257 if (trace_empty(iter))
258 return;
259
260 print_trace_header(s, iter);
261 flags |= TRACE_GRAPH_PRINT_DURATION;
262 } else
263 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
264
265 print_graph_headers_flags(s, flags);
266 } else
267 trace_default_header(s);
268}
269
270static void
271trace_graph_function(struct trace_array *tr,
272 unsigned long ip, unsigned long flags, int pc)
273{
274 u64 time = trace_clock_local();
275 struct ftrace_graph_ent ent = {
276 .func = ip,
277 .depth = 0,
278 };
279 struct ftrace_graph_ret ret = {
280 .func = ip,
281 .depth = 0,
282 .calltime = time,
283 .rettime = time,
284 };
285
286 __trace_graph_entry(tr, &ent, flags, pc);
287 __trace_graph_return(tr, &ret, flags, pc);
288}
289
290static void
291__trace_function(struct trace_array *tr,
292 unsigned long ip, unsigned long parent_ip,
293 unsigned long flags, int pc)
294{
295 if (!is_graph())
296 trace_function(tr, ip, parent_ip, flags, pc);
297 else {
298 trace_graph_function(tr, parent_ip, flags, pc);
299 trace_graph_function(tr, ip, flags, pc);
300 }
301}
302
303#else
304#define __trace_function trace_function
305
306static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
307{
308 return -EINVAL;
309}
310
311static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
312{
313 return -1;
314}
315
316static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
317{
318 return TRACE_TYPE_UNHANDLED;
319}
320
321static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
322static void irqsoff_print_header(struct seq_file *s) { }
323static void irqsoff_trace_open(struct trace_iterator *iter) { }
324static void irqsoff_trace_close(struct trace_iterator *iter) { }
325#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
326
111/* 327/*
112 * Should this new latency be reported/recorded? 328 * Should this new latency be reported/recorded?
113 */ 329 */
@@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 366 if (!report_latency(delta))
151 goto out_unlock; 367 goto out_unlock;
152 368
153 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 369 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
154 /* Skip 5 functions to get to the irq/preempt enable function */ 370 /* Skip 5 functions to get to the irq/preempt enable function */
155 __trace_stack(tr, flags, 5, pc); 371 __trace_stack(tr, flags, 5, pc);
156 372
@@ -172,7 +388,7 @@ out_unlock:
172out: 388out:
173 data->critical_sequence = max_sequence; 389 data->critical_sequence = max_sequence;
174 data->preempt_timestamp = ftrace_now(cpu); 390 data->preempt_timestamp = ftrace_now(cpu);
175 trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); 391 __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
176} 392}
177 393
178static inline void 394static inline void
@@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
204 420
205 local_save_flags(flags); 421 local_save_flags(flags);
206 422
207 trace_function(tr, ip, parent_ip, flags, preempt_count()); 423 __trace_function(tr, ip, parent_ip, flags, preempt_count());
208 424
209 per_cpu(tracing_cpu, cpu) = 1; 425 per_cpu(tracing_cpu, cpu) = 1;
210 426
@@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
238 atomic_inc(&data->disabled); 454 atomic_inc(&data->disabled);
239 455
240 local_save_flags(flags); 456 local_save_flags(flags);
241 trace_function(tr, ip, parent_ip, flags, preempt_count()); 457 __trace_function(tr, ip, parent_ip, flags, preempt_count());
242 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 458 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
243 data->critical_start = 0; 459 data->critical_start = 0;
244 atomic_dec(&data->disabled); 460 atomic_dec(&data->disabled);
@@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
347} 563}
348#endif /* CONFIG_PREEMPT_TRACER */ 564#endif /* CONFIG_PREEMPT_TRACER */
349 565
350static void start_irqsoff_tracer(struct trace_array *tr) 566static int start_irqsoff_tracer(struct trace_array *tr, int graph)
351{ 567{
352 register_ftrace_function(&trace_ops); 568 int ret = 0;
353 if (tracing_is_enabled()) 569
570 if (!graph)
571 ret = register_ftrace_function(&trace_ops);
572 else
573 ret = register_ftrace_graph(&irqsoff_graph_return,
574 &irqsoff_graph_entry);
575
576 if (!ret && tracing_is_enabled())
354 tracer_enabled = 1; 577 tracer_enabled = 1;
355 else 578 else
356 tracer_enabled = 0; 579 tracer_enabled = 0;
580
581 return ret;
357} 582}
358 583
359static void stop_irqsoff_tracer(struct trace_array *tr) 584static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
360{ 585{
361 tracer_enabled = 0; 586 tracer_enabled = 0;
362 unregister_ftrace_function(&trace_ops); 587
588 if (!graph)
589 unregister_ftrace_function(&trace_ops);
590 else
591 unregister_ftrace_graph();
363} 592}
364 593
365static void __irqsoff_tracer_init(struct trace_array *tr) 594static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
372 /* make sure that the tracer is visible */ 601 /* make sure that the tracer is visible */
373 smp_wmb(); 602 smp_wmb();
374 tracing_reset_online_cpus(tr); 603 tracing_reset_online_cpus(tr);
375 start_irqsoff_tracer(tr); 604
605 if (start_irqsoff_tracer(tr, is_graph()))
606 printk(KERN_ERR "failed to start irqsoff tracer\n");
376} 607}
377 608
378static void irqsoff_tracer_reset(struct trace_array *tr) 609static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 610{
380 stop_irqsoff_tracer(tr); 611 stop_irqsoff_tracer(tr, is_graph());
381 612
382 if (!save_lat_flag) 613 if (!save_lat_flag)
383 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 614 trace_flags &= ~TRACE_ITER_LATENCY_FMT;
@@ -409,9 +640,15 @@ static struct tracer irqsoff_tracer __read_mostly =
409 .start = irqsoff_tracer_start, 640 .start = irqsoff_tracer_start,
410 .stop = irqsoff_tracer_stop, 641 .stop = irqsoff_tracer_stop,
411 .print_max = 1, 642 .print_max = 1,
643 .print_header = irqsoff_print_header,
644 .print_line = irqsoff_print_line,
645 .flags = &tracer_flags,
646 .set_flag = irqsoff_set_flag,
412#ifdef CONFIG_FTRACE_SELFTEST 647#ifdef CONFIG_FTRACE_SELFTEST
413 .selftest = trace_selftest_startup_irqsoff, 648 .selftest = trace_selftest_startup_irqsoff,
414#endif 649#endif
650 .open = irqsoff_trace_open,
651 .close = irqsoff_trace_close,
415}; 652};
416# define register_irqsoff(trace) register_tracer(&trace) 653# define register_irqsoff(trace) register_tracer(&trace)
417#else 654#else
@@ -435,9 +672,15 @@ static struct tracer preemptoff_tracer __read_mostly =
435 .start = irqsoff_tracer_start, 672 .start = irqsoff_tracer_start,
436 .stop = irqsoff_tracer_stop, 673 .stop = irqsoff_tracer_stop,
437 .print_max = 1, 674 .print_max = 1,
675 .print_header = irqsoff_print_header,
676 .print_line = irqsoff_print_line,
677 .flags = &tracer_flags,
678 .set_flag = irqsoff_set_flag,
438#ifdef CONFIG_FTRACE_SELFTEST 679#ifdef CONFIG_FTRACE_SELFTEST
439 .selftest = trace_selftest_startup_preemptoff, 680 .selftest = trace_selftest_startup_preemptoff,
440#endif 681#endif
682 .open = irqsoff_trace_open,
683 .close = irqsoff_trace_close,
441}; 684};
442# define register_preemptoff(trace) register_tracer(&trace) 685# define register_preemptoff(trace) register_tracer(&trace)
443#else 686#else
@@ -463,9 +706,15 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
463 .start = irqsoff_tracer_start, 706 .start = irqsoff_tracer_start,
464 .stop = irqsoff_tracer_stop, 707 .stop = irqsoff_tracer_stop,
465 .print_max = 1, 708 .print_max = 1,
709 .print_header = irqsoff_print_header,
710 .print_line = irqsoff_print_line,
711 .flags = &tracer_flags,
712 .set_flag = irqsoff_set_flag,
466#ifdef CONFIG_FTRACE_SELFTEST 713#ifdef CONFIG_FTRACE_SELFTEST
467 .selftest = trace_selftest_startup_preemptirqsoff, 714 .selftest = trace_selftest_startup_preemptirqsoff,
468#endif 715#endif
716 .open = irqsoff_trace_open,
717 .close = irqsoff_trace_close,
469}; 718};
470 719
471# define register_preemptirqsoff(trace) register_tracer(&trace) 720# define register_preemptirqsoff(trace) register_tracer(&trace)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..f52b5f50299d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -29,6 +29,8 @@
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <asm/bitsperlong.h>
32 34
33#include "trace.h" 35#include "trace.h"
34#include "trace_output.h" 36#include "trace_output.h"
@@ -40,7 +42,6 @@
40 42
41/* Reserved field names */ 43/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip" 44#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip" 45#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func" 46#define FIELD_STRING_FUNC "__probe_func"
46 47
@@ -52,61 +53,102 @@ const char *reserved_field_names[] = {
52 "common_tgid", 53 "common_tgid",
53 "common_lock_depth", 54 "common_lock_depth",
54 FIELD_STRING_IP, 55 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP, 56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC, 57 FIELD_STRING_FUNC,
58}; 58};
59 59
60struct fetch_func { 60/* Printing function type */
61 unsigned long (*func)(struct pt_regs *, void *); 61typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
62#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
63#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
64
65/* Printing in basic type function template */
66#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
67static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
68 const char *name, void *data)\
69{ \
70 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71} \
72static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
73
74DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
75DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
76DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
77DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
78DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82
83/* Data fetch function type */
84typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85
86struct fetch_param {
87 fetch_func_t fn;
62 void *data; 88 void *data;
63}; 89};
64 90
65static __kprobes unsigned long call_fetch(struct fetch_func *f, 91static __kprobes void call_fetch(struct fetch_param *fprm,
66 struct pt_regs *regs) 92 struct pt_regs *regs, void *dest)
67{
68 return f->func(regs, f->data);
69}
70
71/* fetch handlers */
72static __kprobes unsigned long fetch_register(struct pt_regs *regs,
73 void *offset)
74{
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset));
76}
77
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs,
79 void *num)
80{ 93{
81 return regs_get_kernel_stack_nth(regs, 94 return fprm->fn(regs, fprm->data, dest);
82 (unsigned int)((unsigned long)num));
83} 95}
84 96
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) 97#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type
86{ 98/*
87 unsigned long retval; 99 * Define macro for basic types - we don't need to define s* types, because
88 100 * we have to care only about bitwidth at recording time.
89 if (probe_kernel_address(addr, retval)) 101 */
90 return 0; 102#define DEFINE_BASIC_FETCH_FUNCS(kind) \
91 return retval; 103DEFINE_FETCH_##kind(u8) \
104DEFINE_FETCH_##kind(u16) \
105DEFINE_FETCH_##kind(u32) \
106DEFINE_FETCH_##kind(u64)
107
108#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \
109 ((FETCH_FUNC_NAME(kind, u8) == fn) || \
110 (FETCH_FUNC_NAME(kind, u16) == fn) || \
111 (FETCH_FUNC_NAME(kind, u32) == fn) || \
112 (FETCH_FUNC_NAME(kind, u64) == fn))
113
114/* Data fetch function templates */
115#define DEFINE_FETCH_reg(type) \
116static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
117 void *offset, void *dest) \
118{ \
119 *(type *)dest = (type)regs_get_register(regs, \
120 (unsigned int)((unsigned long)offset)); \
92} 121}
93 122DEFINE_BASIC_FETCH_FUNCS(reg)
94static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) 123
95{ 124#define DEFINE_FETCH_stack(type) \
96 return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); 125static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
126 void *offset, void *dest) \
127{ \
128 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
129 (unsigned int)((unsigned long)offset)); \
97} 130}
131DEFINE_BASIC_FETCH_FUNCS(stack)
98 132
99static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 133#define DEFINE_FETCH_retval(type) \
100 void *dummy) 134static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
101{ 135 void *dummy, void *dest) \
102 return regs_return_value(regs); 136{ \
137 *(type *)dest = (type)regs_return_value(regs); \
103} 138}
104 139DEFINE_BASIC_FETCH_FUNCS(retval)
105static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, 140
106 void *dummy) 141#define DEFINE_FETCH_memory(type) \
107{ 142static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
108 return kernel_stack_pointer(regs); 143 void *addr, void *dest) \
144{ \
145 type retval; \
146 if (probe_kernel_address(addr, retval)) \
147 *(type *)dest = 0; \
148 else \
149 *(type *)dest = retval; \
109} 150}
151DEFINE_BASIC_FETCH_FUNCS(memory)
110 152
111/* Memory fetching by symbol */ 153/* Memory fetching by symbol */
112struct symbol_cache { 154struct symbol_cache {
@@ -150,51 +192,126 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
150 return sc; 192 return sc;
151} 193}
152 194
153static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) 195#define DEFINE_FETCH_symbol(type) \
154{ 196static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
155 struct symbol_cache *sc = data; 197 void *data, void *dest) \
156 198{ \
157 if (sc->addr) 199 struct symbol_cache *sc = data; \
158 return fetch_memory(regs, (void *)sc->addr); 200 if (sc->addr) \
159 else 201 fetch_memory_##type(regs, (void *)sc->addr, dest); \
160 return 0; 202 else \
203 *(type *)dest = 0; \
161} 204}
205DEFINE_BASIC_FETCH_FUNCS(symbol)
162 206
163/* Special indirect memory access interface */ 207/* Dereference memory access function */
164struct indirect_fetch_data { 208struct deref_fetch_param {
165 struct fetch_func orig; 209 struct fetch_param orig;
166 long offset; 210 long offset;
167}; 211};
168 212
169static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) 213#define DEFINE_FETCH_deref(type) \
170{ 214static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
171 struct indirect_fetch_data *ind = data; 215 void *data, void *dest) \
172 unsigned long addr; 216{ \
173 217 struct deref_fetch_param *dprm = data; \
174 addr = call_fetch(&ind->orig, regs); 218 unsigned long addr; \
175 if (addr) { 219 call_fetch(&dprm->orig, regs, &addr); \
176 addr += ind->offset; 220 if (addr) { \
177 return fetch_memory(regs, (void *)addr); 221 addr += dprm->offset; \
178 } else 222 fetch_memory_##type(regs, (void *)addr, dest); \
179 return 0; 223 } else \
224 *(type *)dest = 0; \
180} 225}
226DEFINE_BASIC_FETCH_FUNCS(deref)
181 227
182static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) 228static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
183{ 229{
184 if (data->orig.func == fetch_indirect) 230 if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
185 free_indirect_fetch_data(data->orig.data); 231 free_deref_fetch_param(data->orig.data);
186 else if (data->orig.func == fetch_symbol) 232 else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
187 free_symbol_cache(data->orig.data); 233 free_symbol_cache(data->orig.data);
188 kfree(data); 234 kfree(data);
189} 235}
190 236
237/* Default (unsigned long) fetch type */
238#define __DEFAULT_FETCH_TYPE(t) u##t
239#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
240#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242
243#define ASSIGN_FETCH_FUNC(kind, type) \
244 .kind = FETCH_FUNC_NAME(kind, type)
245
246#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
247 {.name = #ptype, \
248 .size = sizeof(ftype), \
249 .is_signed = sign, \
250 .print = PRINT_TYPE_FUNC_NAME(ptype), \
251 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
252ASSIGN_FETCH_FUNC(reg, ftype), \
253ASSIGN_FETCH_FUNC(stack, ftype), \
254ASSIGN_FETCH_FUNC(retval, ftype), \
255ASSIGN_FETCH_FUNC(memory, ftype), \
256ASSIGN_FETCH_FUNC(symbol, ftype), \
257ASSIGN_FETCH_FUNC(deref, ftype), \
258 }
259
260/* Fetch type information table */
261static const struct fetch_type {
262 const char *name; /* Name of type */
263 size_t size; /* Byte size of type */
264 int is_signed; /* Signed flag */
265 print_type_func_t print; /* Print functions */
266 const char *fmt; /* Fromat string */
267 /* Fetch functions */
268 fetch_func_t reg;
269 fetch_func_t stack;
270 fetch_func_t retval;
271 fetch_func_t memory;
272 fetch_func_t symbol;
273 fetch_func_t deref;
274} fetch_type_table[] = {
275 ASSIGN_FETCH_TYPE(u8, u8, 0),
276 ASSIGN_FETCH_TYPE(u16, u16, 0),
277 ASSIGN_FETCH_TYPE(u32, u32, 0),
278 ASSIGN_FETCH_TYPE(u64, u64, 0),
279 ASSIGN_FETCH_TYPE(s8, u8, 1),
280 ASSIGN_FETCH_TYPE(s16, u16, 1),
281 ASSIGN_FETCH_TYPE(s32, u32, 1),
282 ASSIGN_FETCH_TYPE(s64, u64, 1),
283};
284
285static const struct fetch_type *find_fetch_type(const char *type)
286{
287 int i;
288
289 if (!type)
290 type = DEFAULT_FETCH_TYPE_STR;
291
292 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
293 if (strcmp(type, fetch_type_table[i].name) == 0)
294 return &fetch_type_table[i];
295 return NULL;
296}
297
298/* Special function : only accept unsigned long */
299static __kprobes void fetch_stack_address(struct pt_regs *regs,
300 void *dummy, void *dest)
301{
302 *(unsigned long *)dest = kernel_stack_pointer(regs);
303}
304
191/** 305/**
192 * Kprobe event core functions 306 * Kprobe event core functions
193 */ 307 */
194 308
195struct probe_arg { 309struct probe_arg {
196 struct fetch_func fetch; 310 struct fetch_param fetch;
197 const char *name; 311 unsigned int offset; /* Offset from argument entry */
312 const char *name; /* Name of this argument */
313 const char *comm; /* Command of this argument */
314 const struct fetch_type *type; /* Type of this argument */
198}; 315};
199 316
200/* Flags for trace_probe */ 317/* Flags for trace_probe */
@@ -207,8 +324,9 @@ struct trace_probe {
207 unsigned long nhit; 324 unsigned long nhit;
208 unsigned int flags; /* For TP_FLAG_* */ 325 unsigned int flags; /* For TP_FLAG_* */
209 const char *symbol; /* symbol name */ 326 const char *symbol; /* symbol name */
327 struct ftrace_event_class class;
210 struct ftrace_event_call call; 328 struct ftrace_event_call call;
211 struct trace_event event; 329 ssize_t size; /* trace entry size */
212 unsigned int nr_args; 330 unsigned int nr_args;
213 struct probe_arg args[]; 331 struct probe_arg args[];
214}; 332};
@@ -217,6 +335,7 @@ struct trace_probe {
217 (offsetof(struct trace_probe, args) + \ 335 (offsetof(struct trace_probe, args) + \
218 (sizeof(struct probe_arg) * (n))) 336 (sizeof(struct probe_arg) * (n)))
219 337
338
220static __kprobes int probe_is_return(struct trace_probe *tp) 339static __kprobes int probe_is_return(struct trace_probe *tp)
221{ 340{
222 return tp->rp.handler != NULL; 341 return tp->rp.handler != NULL;
@@ -227,51 +346,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
227 return tp->symbol ? tp->symbol : "unknown"; 346 return tp->symbol ? tp->symbol : "unknown";
228} 347}
229 348
230static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
231{
232 int ret = -EINVAL;
233
234 if (ff->func == fetch_argument)
235 ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
236 else if (ff->func == fetch_register) {
237 const char *name;
238 name = regs_query_register_name((unsigned int)((long)ff->data));
239 ret = snprintf(buf, n, "%%%s", name);
240 } else if (ff->func == fetch_stack)
241 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
242 else if (ff->func == fetch_memory)
243 ret = snprintf(buf, n, "@0x%p", ff->data);
244 else if (ff->func == fetch_symbol) {
245 struct symbol_cache *sc = ff->data;
246 if (sc->offset)
247 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
248 sc->offset);
249 else
250 ret = snprintf(buf, n, "@%s", sc->symbol);
251 } else if (ff->func == fetch_retvalue)
252 ret = snprintf(buf, n, "$retval");
253 else if (ff->func == fetch_stack_address)
254 ret = snprintf(buf, n, "$stack");
255 else if (ff->func == fetch_indirect) {
256 struct indirect_fetch_data *id = ff->data;
257 size_t l = 0;
258 ret = snprintf(buf, n, "%+ld(", id->offset);
259 if (ret >= n)
260 goto end;
261 l += ret;
262 ret = probe_arg_string(buf + l, n - l, &id->orig);
263 if (ret < 0)
264 goto end;
265 l += ret;
266 ret = snprintf(buf + l, n - l, ")");
267 ret += l;
268 }
269end:
270 if (ret >= n)
271 return -ENOSPC;
272 return ret;
273}
274
275static int register_probe_event(struct trace_probe *tp); 349static int register_probe_event(struct trace_probe *tp);
276static void unregister_probe_event(struct trace_probe *tp); 350static void unregister_probe_event(struct trace_probe *tp);
277 351
@@ -330,6 +404,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
330 goto error; 404 goto error;
331 } 405 }
332 406
407 tp->call.class = &tp->class;
333 tp->call.name = kstrdup(event, GFP_KERNEL); 408 tp->call.name = kstrdup(event, GFP_KERNEL);
334 if (!tp->call.name) 409 if (!tp->call.name)
335 goto error; 410 goto error;
@@ -339,8 +414,8 @@ static struct trace_probe *alloc_trace_probe(const char *group,
339 goto error; 414 goto error;
340 } 415 }
341 416
342 tp->call.system = kstrdup(group, GFP_KERNEL); 417 tp->class.system = kstrdup(group, GFP_KERNEL);
343 if (!tp->call.system) 418 if (!tp->class.system)
344 goto error; 419 goto error;
345 420
346 INIT_LIST_HEAD(&tp->list); 421 INIT_LIST_HEAD(&tp->list);
@@ -354,11 +429,12 @@ error:
354 429
355static void free_probe_arg(struct probe_arg *arg) 430static void free_probe_arg(struct probe_arg *arg)
356{ 431{
357 if (arg->fetch.func == fetch_symbol) 432 if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
433 free_deref_fetch_param(arg->fetch.data);
434 else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
358 free_symbol_cache(arg->fetch.data); 435 free_symbol_cache(arg->fetch.data);
359 else if (arg->fetch.func == fetch_indirect)
360 free_indirect_fetch_data(arg->fetch.data);
361 kfree(arg->name); 436 kfree(arg->name);
437 kfree(arg->comm);
362} 438}
363 439
364static void free_trace_probe(struct trace_probe *tp) 440static void free_trace_probe(struct trace_probe *tp)
@@ -368,7 +444,7 @@ static void free_trace_probe(struct trace_probe *tp)
368 for (i = 0; i < tp->nr_args; i++) 444 for (i = 0; i < tp->nr_args; i++)
369 free_probe_arg(&tp->args[i]); 445 free_probe_arg(&tp->args[i]);
370 446
371 kfree(tp->call.system); 447 kfree(tp->call.class->system);
372 kfree(tp->call.name); 448 kfree(tp->call.name);
373 kfree(tp->symbol); 449 kfree(tp->symbol);
374 kfree(tp); 450 kfree(tp);
@@ -381,7 +457,7 @@ static struct trace_probe *find_probe_event(const char *event,
381 457
382 list_for_each_entry(tp, &probe_list, list) 458 list_for_each_entry(tp, &probe_list, list)
383 if (strcmp(tp->call.name, event) == 0 && 459 if (strcmp(tp->call.name, event) == 0 &&
384 strcmp(tp->call.system, group) == 0) 460 strcmp(tp->call.class->system, group) == 0)
385 return tp; 461 return tp;
386 return NULL; 462 return NULL;
387} 463}
@@ -406,7 +482,7 @@ static int register_trace_probe(struct trace_probe *tp)
406 mutex_lock(&probe_lock); 482 mutex_lock(&probe_lock);
407 483
408 /* register as an event */ 484 /* register as an event */
409 old_tp = find_probe_event(tp->call.name, tp->call.system); 485 old_tp = find_probe_event(tp->call.name, tp->call.class->system);
410 if (old_tp) { 486 if (old_tp) {
411 /* delete old event */ 487 /* delete old event */
412 unregister_trace_probe(old_tp); 488 unregister_trace_probe(old_tp);
@@ -464,46 +540,41 @@ static int split_symbol_offset(char *symbol, unsigned long *offset)
464#define PARAM_MAX_ARGS 16 540#define PARAM_MAX_ARGS 16
465#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 541#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
466 542
467static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) 543static int parse_probe_vars(char *arg, const struct fetch_type *t,
544 struct fetch_param *f, int is_return)
468{ 545{
469 int ret = 0; 546 int ret = 0;
470 unsigned long param; 547 unsigned long param;
471 548
472 if (strcmp(arg, "retval") == 0) { 549 if (strcmp(arg, "retval") == 0) {
473 if (is_return) { 550 if (is_return)
474 ff->func = fetch_retvalue; 551 f->fn = t->retval;
475 ff->data = NULL; 552 else
476 } else
477 ret = -EINVAL; 553 ret = -EINVAL;
478 } else if (strncmp(arg, "stack", 5) == 0) { 554 } else if (strncmp(arg, "stack", 5) == 0) {
479 if (arg[5] == '\0') { 555 if (arg[5] == '\0') {
480 ff->func = fetch_stack_address; 556 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
481 ff->data = NULL; 557 f->fn = fetch_stack_address;
558 else
559 ret = -EINVAL;
482 } else if (isdigit(arg[5])) { 560 } else if (isdigit(arg[5])) {
483 ret = strict_strtoul(arg + 5, 10, &param); 561 ret = strict_strtoul(arg + 5, 10, &param);
484 if (ret || param > PARAM_MAX_STACK) 562 if (ret || param > PARAM_MAX_STACK)
485 ret = -EINVAL; 563 ret = -EINVAL;
486 else { 564 else {
487 ff->func = fetch_stack; 565 f->fn = t->stack;
488 ff->data = (void *)param; 566 f->data = (void *)param;
489 } 567 }
490 } else 568 } else
491 ret = -EINVAL; 569 ret = -EINVAL;
492 } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
493 ret = strict_strtoul(arg + 3, 10, &param);
494 if (ret || param > PARAM_MAX_ARGS)
495 ret = -EINVAL;
496 else {
497 ff->func = fetch_argument;
498 ff->data = (void *)param;
499 }
500 } else 570 } else
501 ret = -EINVAL; 571 ret = -EINVAL;
502 return ret; 572 return ret;
503} 573}
504 574
505/* Recursive argument parser */ 575/* Recursive argument parser */
506static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 576static int __parse_probe_arg(char *arg, const struct fetch_type *t,
577 struct fetch_param *f, int is_return)
507{ 578{
508 int ret = 0; 579 int ret = 0;
509 unsigned long param; 580 unsigned long param;
@@ -512,13 +583,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
512 583
513 switch (arg[0]) { 584 switch (arg[0]) {
514 case '$': 585 case '$':
515 ret = parse_probe_vars(arg + 1, ff, is_return); 586 ret = parse_probe_vars(arg + 1, t, f, is_return);
516 break; 587 break;
517 case '%': /* named register */ 588 case '%': /* named register */
518 ret = regs_query_register_offset(arg + 1); 589 ret = regs_query_register_offset(arg + 1);
519 if (ret >= 0) { 590 if (ret >= 0) {
520 ff->func = fetch_register; 591 f->fn = t->reg;
521 ff->data = (void *)(unsigned long)ret; 592 f->data = (void *)(unsigned long)ret;
522 ret = 0; 593 ret = 0;
523 } 594 }
524 break; 595 break;
@@ -527,26 +598,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
527 ret = strict_strtoul(arg + 1, 0, &param); 598 ret = strict_strtoul(arg + 1, 0, &param);
528 if (ret) 599 if (ret)
529 break; 600 break;
530 ff->func = fetch_memory; 601 f->fn = t->memory;
531 ff->data = (void *)param; 602 f->data = (void *)param;
532 } else { 603 } else {
533 ret = split_symbol_offset(arg + 1, &offset); 604 ret = split_symbol_offset(arg + 1, &offset);
534 if (ret) 605 if (ret)
535 break; 606 break;
536 ff->data = alloc_symbol_cache(arg + 1, offset); 607 f->data = alloc_symbol_cache(arg + 1, offset);
537 if (ff->data) 608 if (f->data)
538 ff->func = fetch_symbol; 609 f->fn = t->symbol;
539 else
540 ret = -EINVAL;
541 } 610 }
542 break; 611 break;
543 case '+': /* indirect memory */ 612 case '+': /* deref memory */
544 case '-': 613 case '-':
545 tmp = strchr(arg, '('); 614 tmp = strchr(arg, '(');
546 if (!tmp) { 615 if (!tmp)
547 ret = -EINVAL;
548 break; 616 break;
549 }
550 *tmp = '\0'; 617 *tmp = '\0';
551 ret = strict_strtol(arg + 1, 0, &offset); 618 ret = strict_strtol(arg + 1, 0, &offset);
552 if (ret) 619 if (ret)
@@ -556,38 +623,58 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
556 arg = tmp + 1; 623 arg = tmp + 1;
557 tmp = strrchr(arg, ')'); 624 tmp = strrchr(arg, ')');
558 if (tmp) { 625 if (tmp) {
559 struct indirect_fetch_data *id; 626 struct deref_fetch_param *dprm;
627 const struct fetch_type *t2 = find_fetch_type(NULL);
560 *tmp = '\0'; 628 *tmp = '\0';
561 id = kzalloc(sizeof(struct indirect_fetch_data), 629 dprm = kzalloc(sizeof(struct deref_fetch_param),
562 GFP_KERNEL); 630 GFP_KERNEL);
563 if (!id) 631 if (!dprm)
564 return -ENOMEM; 632 return -ENOMEM;
565 id->offset = offset; 633 dprm->offset = offset;
566 ret = __parse_probe_arg(arg, &id->orig, is_return); 634 ret = __parse_probe_arg(arg, t2, &dprm->orig,
635 is_return);
567 if (ret) 636 if (ret)
568 kfree(id); 637 kfree(dprm);
569 else { 638 else {
570 ff->func = fetch_indirect; 639 f->fn = t->deref;
571 ff->data = (void *)id; 640 f->data = (void *)dprm;
572 } 641 }
573 } else 642 }
574 ret = -EINVAL;
575 break; 643 break;
576 default:
577 /* TODO: support custom handler */
578 ret = -EINVAL;
579 } 644 }
645 if (!ret && !f->fn)
646 ret = -EINVAL;
580 return ret; 647 return ret;
581} 648}
582 649
583/* String length checking wrapper */ 650/* String length checking wrapper */
584static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 651static int parse_probe_arg(char *arg, struct trace_probe *tp,
652 struct probe_arg *parg, int is_return)
585{ 653{
654 const char *t;
655
586 if (strlen(arg) > MAX_ARGSTR_LEN) { 656 if (strlen(arg) > MAX_ARGSTR_LEN) {
587 pr_info("Argument is too long.: %s\n", arg); 657 pr_info("Argument is too long.: %s\n", arg);
588 return -ENOSPC; 658 return -ENOSPC;
589 } 659 }
590 return __parse_probe_arg(arg, ff, is_return); 660 parg->comm = kstrdup(arg, GFP_KERNEL);
661 if (!parg->comm) {
662 pr_info("Failed to allocate memory for command '%s'.\n", arg);
663 return -ENOMEM;
664 }
665 t = strchr(parg->comm, ':');
666 if (t) {
667 arg[t - parg->comm] = '\0';
668 t++;
669 }
670 parg->type = find_fetch_type(t);
671 if (!parg->type) {
672 pr_info("Unsupported type: %s\n", t);
673 return -EINVAL;
674 }
675 parg->offset = tp->size;
676 tp->size += parg->type->size;
677 return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
591} 678}
592 679
593/* Return 1 if name is reserved or already used by another argument */ 680/* Return 1 if name is reserved or already used by another argument */
@@ -611,22 +698,24 @@ static int create_trace_probe(int argc, char **argv)
611 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 698 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
612 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 699 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
613 * Fetch args: 700 * Fetch args:
614 * $argN : fetch Nth of function argument. (N:0-)
615 * $retval : fetch return value 701 * $retval : fetch return value
616 * $stack : fetch stack address 702 * $stack : fetch stack address
617 * $stackN : fetch Nth of stack (N:0-) 703 * $stackN : fetch Nth of stack (N:0-)
618 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 704 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
619 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 705 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
620 * %REG : fetch register REG 706 * %REG : fetch register REG
621 * Indirect memory fetch: 707 * Dereferencing memory fetch:
622 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 708 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
623 * Alias name of args: 709 * Alias name of args:
624 * NAME=FETCHARG : set NAME as alias of FETCHARG. 710 * NAME=FETCHARG : set NAME as alias of FETCHARG.
711 * Type of args:
712 * FETCHARG:TYPE : use TYPE instead of unsigned long.
625 */ 713 */
626 struct trace_probe *tp; 714 struct trace_probe *tp;
627 int i, ret = 0; 715 int i, ret = 0;
628 int is_return = 0, is_delete = 0; 716 int is_return = 0, is_delete = 0;
629 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 717 char *symbol = NULL, *event = NULL, *group = NULL;
718 char *arg, *tmp;
630 unsigned long offset = 0; 719 unsigned long offset = 0;
631 void *addr = NULL; 720 void *addr = NULL;
632 char buf[MAX_EVENT_NAME_LEN]; 721 char buf[MAX_EVENT_NAME_LEN];
@@ -651,12 +740,12 @@ static int create_trace_probe(int argc, char **argv)
651 event = strchr(group, '/') + 1; 740 event = strchr(group, '/') + 1;
652 event[-1] = '\0'; 741 event[-1] = '\0';
653 if (strlen(group) == 0) { 742 if (strlen(group) == 0) {
654 pr_info("Group name is not specifiled\n"); 743 pr_info("Group name is not specified\n");
655 return -EINVAL; 744 return -EINVAL;
656 } 745 }
657 } 746 }
658 if (strlen(event) == 0) { 747 if (strlen(event) == 0) {
659 pr_info("Event name is not specifiled\n"); 748 pr_info("Event name is not specified\n");
660 return -EINVAL; 749 return -EINVAL;
661 } 750 }
662 } 751 }
@@ -689,7 +778,7 @@ static int create_trace_probe(int argc, char **argv)
689 return -EINVAL; 778 return -EINVAL;
690 } 779 }
691 /* an address specified */ 780 /* an address specified */
692 ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); 781 ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
693 if (ret) { 782 if (ret) {
694 pr_info("Failed to parse address.\n"); 783 pr_info("Failed to parse address.\n");
695 return ret; 784 return ret;
@@ -739,13 +828,6 @@ static int create_trace_probe(int argc, char **argv)
739 else 828 else
740 arg = argv[i]; 829 arg = argv[i];
741 830
742 if (conflict_field_name(argv[i], tp->args, i)) {
743 pr_info("Argument%d name '%s' conflicts with "
744 "another field.\n", i, argv[i]);
745 ret = -EINVAL;
746 goto error;
747 }
748
749 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 831 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
750 if (!tp->args[i].name) { 832 if (!tp->args[i].name) {
751 pr_info("Failed to allocate argument%d name '%s'.\n", 833 pr_info("Failed to allocate argument%d name '%s'.\n",
@@ -753,9 +835,19 @@ static int create_trace_probe(int argc, char **argv)
753 ret = -ENOMEM; 835 ret = -ENOMEM;
754 goto error; 836 goto error;
755 } 837 }
838 tmp = strchr(tp->args[i].name, ':');
839 if (tmp)
840 *tmp = '_'; /* convert : to _ */
841
842 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
843 pr_info("Argument%d name '%s' conflicts with "
844 "another field.\n", i, argv[i]);
845 ret = -EINVAL;
846 goto error;
847 }
756 848
757 /* Parse fetch argument */ 849 /* Parse fetch argument */
758 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); 850 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
759 if (ret) { 851 if (ret) {
760 pr_info("Parse error at argument%d. (%d)\n", i, ret); 852 pr_info("Parse error at argument%d. (%d)\n", i, ret);
761 kfree(tp->args[i].name); 853 kfree(tp->args[i].name);
@@ -810,11 +902,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
810static int probes_seq_show(struct seq_file *m, void *v) 902static int probes_seq_show(struct seq_file *m, void *v)
811{ 903{
812 struct trace_probe *tp = v; 904 struct trace_probe *tp = v;
813 int i, ret; 905 int i;
814 char buf[MAX_ARGSTR_LEN + 1];
815 906
816 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 907 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
817 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); 908 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
818 909
819 if (!tp->symbol) 910 if (!tp->symbol)
820 seq_printf(m, " 0x%p", tp->rp.kp.addr); 911 seq_printf(m, " 0x%p", tp->rp.kp.addr);
@@ -823,15 +914,10 @@ static int probes_seq_show(struct seq_file *m, void *v)
823 else 914 else
824 seq_printf(m, " %s", probe_symbol(tp)); 915 seq_printf(m, " %s", probe_symbol(tp));
825 916
826 for (i = 0; i < tp->nr_args; i++) { 917 for (i = 0; i < tp->nr_args; i++)
827 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); 918 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
828 if (ret < 0) {
829 pr_warning("Argument%d decoding error(%d).\n", i, ret);
830 return ret;
831 }
832 seq_printf(m, " %s=%s", tp->args[i].name, buf);
833 }
834 seq_printf(m, "\n"); 919 seq_printf(m, "\n");
920
835 return 0; 921 return 0;
836} 922}
837 923
@@ -958,12 +1044,13 @@ static const struct file_operations kprobe_profile_ops = {
958}; 1044};
959 1045
960/* Kprobe handler */ 1046/* Kprobe handler */
961static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1047static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
962{ 1048{
963 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1049 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
964 struct kprobe_trace_entry *entry; 1050 struct kprobe_trace_entry_head *entry;
965 struct ring_buffer_event *event; 1051 struct ring_buffer_event *event;
966 struct ring_buffer *buffer; 1052 struct ring_buffer *buffer;
1053 u8 *data;
967 int size, i, pc; 1054 int size, i, pc;
968 unsigned long irq_flags; 1055 unsigned long irq_flags;
969 struct ftrace_event_call *call = &tp->call; 1056 struct ftrace_event_call *call = &tp->call;
@@ -973,32 +1060,32 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
973 local_save_flags(irq_flags); 1060 local_save_flags(irq_flags);
974 pc = preempt_count(); 1061 pc = preempt_count();
975 1062
976 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1063 size = sizeof(*entry) + tp->size;
977 1064
978 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1065 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
979 irq_flags, pc); 1066 size, irq_flags, pc);
980 if (!event) 1067 if (!event)
981 return 0; 1068 return;
982 1069
983 entry = ring_buffer_event_data(event); 1070 entry = ring_buffer_event_data(event);
984 entry->nargs = tp->nr_args;
985 entry->ip = (unsigned long)kp->addr; 1071 entry->ip = (unsigned long)kp->addr;
1072 data = (u8 *)&entry[1];
986 for (i = 0; i < tp->nr_args; i++) 1073 for (i = 0; i < tp->nr_args; i++)
987 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1074 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
988 1075
989 if (!filter_current_check_discard(buffer, call, entry, event)) 1076 if (!filter_current_check_discard(buffer, call, entry, event))
990 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1077 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
991 return 0;
992} 1078}
993 1079
994/* Kretprobe handler */ 1080/* Kretprobe handler */
995static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, 1081static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
996 struct pt_regs *regs) 1082 struct pt_regs *regs)
997{ 1083{
998 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1084 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
999 struct kretprobe_trace_entry *entry; 1085 struct kretprobe_trace_entry_head *entry;
1000 struct ring_buffer_event *event; 1086 struct ring_buffer_event *event;
1001 struct ring_buffer *buffer; 1087 struct ring_buffer *buffer;
1088 u8 *data;
1002 int size, i, pc; 1089 int size, i, pc;
1003 unsigned long irq_flags; 1090 unsigned long irq_flags;
1004 struct ftrace_event_call *call = &tp->call; 1091 struct ftrace_event_call *call = &tp->call;
@@ -1006,39 +1093,37 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
1006 local_save_flags(irq_flags); 1093 local_save_flags(irq_flags);
1007 pc = preempt_count(); 1094 pc = preempt_count();
1008 1095
1009 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1096 size = sizeof(*entry) + tp->size;
1010 1097
1011 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1098 event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1012 irq_flags, pc); 1099 size, irq_flags, pc);
1013 if (!event) 1100 if (!event)
1014 return 0; 1101 return;
1015 1102
1016 entry = ring_buffer_event_data(event); 1103 entry = ring_buffer_event_data(event);
1017 entry->nargs = tp->nr_args;
1018 entry->func = (unsigned long)tp->rp.kp.addr; 1104 entry->func = (unsigned long)tp->rp.kp.addr;
1019 entry->ret_ip = (unsigned long)ri->ret_addr; 1105 entry->ret_ip = (unsigned long)ri->ret_addr;
1106 data = (u8 *)&entry[1];
1020 for (i = 0; i < tp->nr_args; i++) 1107 for (i = 0; i < tp->nr_args; i++)
1021 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1108 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1022 1109
1023 if (!filter_current_check_discard(buffer, call, entry, event)) 1110 if (!filter_current_check_discard(buffer, call, entry, event))
1024 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1111 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1025
1026 return 0;
1027} 1112}
1028 1113
1029/* Event entry printers */ 1114/* Event entry printers */
1030enum print_line_t 1115enum print_line_t
1031print_kprobe_event(struct trace_iterator *iter, int flags) 1116print_kprobe_event(struct trace_iterator *iter, int flags,
1117 struct trace_event *event)
1032{ 1118{
1033 struct kprobe_trace_entry *field; 1119 struct kprobe_trace_entry_head *field;
1034 struct trace_seq *s = &iter->seq; 1120 struct trace_seq *s = &iter->seq;
1035 struct trace_event *event;
1036 struct trace_probe *tp; 1121 struct trace_probe *tp;
1122 u8 *data;
1037 int i; 1123 int i;
1038 1124
1039 field = (struct kprobe_trace_entry *)iter->ent; 1125 field = (struct kprobe_trace_entry_head *)iter->ent;
1040 event = ftrace_find_event(field->ent.type); 1126 tp = container_of(event, struct trace_probe, call.event);
1041 tp = container_of(event, struct trace_probe, event);
1042 1127
1043 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1128 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1044 goto partial; 1129 goto partial;
@@ -1049,9 +1134,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
1049 if (!trace_seq_puts(s, ")")) 1134 if (!trace_seq_puts(s, ")"))
1050 goto partial; 1135 goto partial;
1051 1136
1052 for (i = 0; i < field->nargs; i++) 1137 data = (u8 *)&field[1];
1053 if (!trace_seq_printf(s, " %s=%lx", 1138 for (i = 0; i < tp->nr_args; i++)
1054 tp->args[i].name, field->args[i])) 1139 if (!tp->args[i].type->print(s, tp->args[i].name,
1140 data + tp->args[i].offset))
1055 goto partial; 1141 goto partial;
1056 1142
1057 if (!trace_seq_puts(s, "\n")) 1143 if (!trace_seq_puts(s, "\n"))
@@ -1063,17 +1149,17 @@ partial:
1063} 1149}
1064 1150
1065enum print_line_t 1151enum print_line_t
1066print_kretprobe_event(struct trace_iterator *iter, int flags) 1152print_kretprobe_event(struct trace_iterator *iter, int flags,
1153 struct trace_event *event)
1067{ 1154{
1068 struct kretprobe_trace_entry *field; 1155 struct kretprobe_trace_entry_head *field;
1069 struct trace_seq *s = &iter->seq; 1156 struct trace_seq *s = &iter->seq;
1070 struct trace_event *event;
1071 struct trace_probe *tp; 1157 struct trace_probe *tp;
1158 u8 *data;
1072 int i; 1159 int i;
1073 1160
1074 field = (struct kretprobe_trace_entry *)iter->ent; 1161 field = (struct kretprobe_trace_entry_head *)iter->ent;
1075 event = ftrace_find_event(field->ent.type); 1162 tp = container_of(event, struct trace_probe, call.event);
1076 tp = container_of(event, struct trace_probe, event);
1077 1163
1078 if (!trace_seq_printf(s, "%s: (", tp->call.name)) 1164 if (!trace_seq_printf(s, "%s: (", tp->call.name))
1079 goto partial; 1165 goto partial;
@@ -1090,9 +1176,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
1090 if (!trace_seq_puts(s, ")")) 1176 if (!trace_seq_puts(s, ")"))
1091 goto partial; 1177 goto partial;
1092 1178
1093 for (i = 0; i < field->nargs; i++) 1179 data = (u8 *)&field[1];
1094 if (!trace_seq_printf(s, " %s=%lx", 1180 for (i = 0; i < tp->nr_args; i++)
1095 tp->args[i].name, field->args[i])) 1181 if (!tp->args[i].type->print(s, tp->args[i].name,
1182 data + tp->args[i].offset))
1096 goto partial; 1183 goto partial;
1097 1184
1098 if (!trace_seq_puts(s, "\n")) 1185 if (!trace_seq_puts(s, "\n"))
@@ -1129,8 +1216,6 @@ static void probe_event_disable(struct ftrace_event_call *call)
1129 1216
1130static int probe_event_raw_init(struct ftrace_event_call *event_call) 1217static int probe_event_raw_init(struct ftrace_event_call *event_call)
1131{ 1218{
1132 INIT_LIST_HEAD(&event_call->fields);
1133
1134 return 0; 1219 return 0;
1135} 1220}
1136 1221
@@ -1148,242 +1233,170 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call)
1148static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1233static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1149{ 1234{
1150 int ret, i; 1235 int ret, i;
1151 struct kprobe_trace_entry field; 1236 struct kprobe_trace_entry_head field;
1152 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1237 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1153 1238
1154 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1239 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1155 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1156 /* Set argument names as fields */ 1240 /* Set argument names as fields */
1157 for (i = 0; i < tp->nr_args; i++) 1241 for (i = 0; i < tp->nr_args; i++) {
1158 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1242 ret = trace_define_field(event_call, tp->args[i].type->name,
1243 tp->args[i].name,
1244 sizeof(field) + tp->args[i].offset,
1245 tp->args[i].type->size,
1246 tp->args[i].type->is_signed,
1247 FILTER_OTHER);
1248 if (ret)
1249 return ret;
1250 }
1159 return 0; 1251 return 0;
1160} 1252}
1161 1253
1162static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1254static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1163{ 1255{
1164 int ret, i; 1256 int ret, i;
1165 struct kretprobe_trace_entry field; 1257 struct kretprobe_trace_entry_head field;
1166 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1258 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1167 1259
1168 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1260 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1169 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1261 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1170 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1171 /* Set argument names as fields */ 1262 /* Set argument names as fields */
1172 for (i = 0; i < tp->nr_args; i++) 1263 for (i = 0; i < tp->nr_args; i++) {
1173 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1264 ret = trace_define_field(event_call, tp->args[i].type->name,
1265 tp->args[i].name,
1266 sizeof(field) + tp->args[i].offset,
1267 tp->args[i].type->size,
1268 tp->args[i].type->is_signed,
1269 FILTER_OTHER);
1270 if (ret)
1271 return ret;
1272 }
1174 return 0; 1273 return 0;
1175} 1274}
1176 1275
1177static int __probe_event_show_format(struct trace_seq *s, 1276static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1178 struct trace_probe *tp, const char *fmt,
1179 const char *arg)
1180{ 1277{
1181 int i; 1278 int i;
1279 int pos = 0;
1182 1280
1183 /* Show format */ 1281 const char *fmt, *arg;
1184 if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
1185 return 0;
1186 1282
1187 for (i = 0; i < tp->nr_args; i++) 1283 if (!probe_is_return(tp)) {
1188 if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) 1284 fmt = "(%lx)";
1189 return 0; 1285 arg = "REC->" FIELD_STRING_IP;
1286 } else {
1287 fmt = "(%lx <- %lx)";
1288 arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1289 }
1190 1290
1191 if (!trace_seq_printf(s, "\", %s", arg)) 1291 /* When len=0, we just calculate the needed length */
1192 return 0; 1292#define LEN_OR_ZERO (len ? len - pos : 0)
1193 1293
1194 for (i = 0; i < tp->nr_args; i++) 1294 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1195 if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
1196 return 0;
1197 1295
1198 return trace_seq_puts(s, "\n"); 1296 for (i = 0; i < tp->nr_args; i++) {
1199} 1297 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1298 tp->args[i].name, tp->args[i].type->fmt);
1299 }
1200 1300
1201#undef SHOW_FIELD 1301 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1202#define SHOW_FIELD(type, item, name) \
1203 do { \
1204 ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
1205 "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
1206 (unsigned int)offsetof(typeof(field), item),\
1207 (unsigned int)sizeof(type), \
1208 is_signed_type(type)); \
1209 if (!ret) \
1210 return 0; \
1211 } while (0)
1212 1302
1213static int kprobe_event_show_format(struct ftrace_event_call *call, 1303 for (i = 0; i < tp->nr_args; i++) {
1214 struct trace_seq *s) 1304 pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1215{ 1305 tp->args[i].name);
1216 struct kprobe_trace_entry field __attribute__((unused)); 1306 }
1217 int ret, i;
1218 struct trace_probe *tp = (struct trace_probe *)call->data;
1219 1307
1220 SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); 1308#undef LEN_OR_ZERO
1221 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
1222 1309
1223 /* Show fields */ 1310 /* return the length of print_fmt */
1224 for (i = 0; i < tp->nr_args; i++) 1311 return pos;
1225 SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
1226 trace_seq_puts(s, "\n");
1227
1228 return __probe_event_show_format(s, tp, "(%lx)",
1229 "REC->" FIELD_STRING_IP);
1230} 1312}
1231 1313
1232static int kretprobe_event_show_format(struct ftrace_event_call *call, 1314static int set_print_fmt(struct trace_probe *tp)
1233 struct trace_seq *s)
1234{ 1315{
1235 struct kretprobe_trace_entry field __attribute__((unused)); 1316 int len;
1236 int ret, i; 1317 char *print_fmt;
1237 struct trace_probe *tp = (struct trace_probe *)call->data;
1238 1318
1239 SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); 1319 /* First: called with 0 length to calculate the needed length */
1240 SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); 1320 len = __set_print_fmt(tp, NULL, 0);
1241 SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); 1321 print_fmt = kmalloc(len + 1, GFP_KERNEL);
1322 if (!print_fmt)
1323 return -ENOMEM;
1242 1324
1243 /* Show fields */ 1325 /* Second: actually write the @print_fmt */
1244 for (i = 0; i < tp->nr_args; i++) 1326 __set_print_fmt(tp, print_fmt, len + 1);
1245 SHOW_FIELD(unsigned long, args[i], tp->args[i].name); 1327 tp->call.print_fmt = print_fmt;
1246 trace_seq_puts(s, "\n");
1247 1328
1248 return __probe_event_show_format(s, tp, "(%lx <- %lx)", 1329 return 0;
1249 "REC->" FIELD_STRING_FUNC
1250 ", REC->" FIELD_STRING_RETIP);
1251} 1330}
1252 1331
1253#ifdef CONFIG_EVENT_PROFILE 1332#ifdef CONFIG_PERF_EVENTS
1254 1333
1255/* Kprobe profile handler */ 1334/* Kprobe profile handler */
1256static __kprobes int kprobe_profile_func(struct kprobe *kp, 1335static __kprobes void kprobe_perf_func(struct kprobe *kp,
1257 struct pt_regs *regs) 1336 struct pt_regs *regs)
1258{ 1337{
1259 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1338 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1260 struct ftrace_event_call *call = &tp->call; 1339 struct ftrace_event_call *call = &tp->call;
1261 struct kprobe_trace_entry *entry; 1340 struct kprobe_trace_entry_head *entry;
1262 struct trace_entry *ent; 1341 struct hlist_head *head;
1263 int size, __size, i, pc, __cpu; 1342 u8 *data;
1264 unsigned long irq_flags; 1343 int size, __size, i;
1265 char *trace_buf;
1266 char *raw_data;
1267 int rctx; 1344 int rctx;
1268 1345
1269 pc = preempt_count(); 1346 __size = sizeof(*entry) + tp->size;
1270 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1271 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1347 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1272 size -= sizeof(u32); 1348 size -= sizeof(u32);
1273 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1349 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1274 "profile buffer not large enough")) 1350 "profile buffer not large enough"))
1275 return 0; 1351 return;
1276
1277 /*
1278 * Protect the non nmi buffer
1279 * This also protects the rcu read side
1280 */
1281 local_irq_save(irq_flags);
1282
1283 rctx = perf_swevent_get_recursion_context();
1284 if (rctx < 0)
1285 goto end_recursion;
1286 1352
1287 __cpu = smp_processor_id(); 1353 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1288 1354 if (!entry)
1289 if (in_nmi()) 1355 return;
1290 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1291 else
1292 trace_buf = rcu_dereference(perf_trace_buf);
1293
1294 if (!trace_buf)
1295 goto end;
1296
1297 raw_data = per_cpu_ptr(trace_buf, __cpu);
1298
1299 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1300 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1301 entry = (struct kprobe_trace_entry *)raw_data;
1302 ent = &entry->ent;
1303 1356
1304 tracing_generic_entry_update(ent, irq_flags, pc);
1305 ent->type = call->id;
1306 entry->nargs = tp->nr_args;
1307 entry->ip = (unsigned long)kp->addr; 1357 entry->ip = (unsigned long)kp->addr;
1358 data = (u8 *)&entry[1];
1308 for (i = 0; i < tp->nr_args; i++) 1359 for (i = 0; i < tp->nr_args; i++)
1309 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1310 perf_tp_event(call->id, entry->ip, 1, entry, size);
1311
1312end:
1313 perf_swevent_put_recursion_context(rctx);
1314end_recursion:
1315 local_irq_restore(irq_flags);
1316 1361
1317 return 0; 1362 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1318} 1364}
1319 1365
1320/* Kretprobe profile handler */ 1366/* Kretprobe profile handler */
1321static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, 1367static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1322 struct pt_regs *regs) 1368 struct pt_regs *regs)
1323{ 1369{
1324 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1370 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1325 struct ftrace_event_call *call = &tp->call; 1371 struct ftrace_event_call *call = &tp->call;
1326 struct kretprobe_trace_entry *entry; 1372 struct kretprobe_trace_entry_head *entry;
1327 struct trace_entry *ent; 1373 struct hlist_head *head;
1328 int size, __size, i, pc, __cpu; 1374 u8 *data;
1329 unsigned long irq_flags; 1375 int size, __size, i;
1330 char *trace_buf;
1331 char *raw_data;
1332 int rctx; 1376 int rctx;
1333 1377
1334 pc = preempt_count(); 1378 __size = sizeof(*entry) + tp->size;
1335 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1336 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1379 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1337 size -= sizeof(u32); 1380 size -= sizeof(u32);
1338 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1381 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1339 "profile buffer not large enough")) 1382 "profile buffer not large enough"))
1340 return 0; 1383 return;
1341
1342 /*
1343 * Protect the non nmi buffer
1344 * This also protects the rcu read side
1345 */
1346 local_irq_save(irq_flags);
1347 1384
1348 rctx = perf_swevent_get_recursion_context(); 1385 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1349 if (rctx < 0) 1386 if (!entry)
1350 goto end_recursion; 1387 return;
1351
1352 __cpu = smp_processor_id();
1353
1354 if (in_nmi())
1355 trace_buf = rcu_dereference(perf_trace_buf_nmi);
1356 else
1357 trace_buf = rcu_dereference(perf_trace_buf);
1358
1359 if (!trace_buf)
1360 goto end;
1361
1362 raw_data = per_cpu_ptr(trace_buf, __cpu);
1363
1364 /* Zero dead bytes from alignment to avoid buffer leak to userspace */
1365 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
1366 entry = (struct kretprobe_trace_entry *)raw_data;
1367 ent = &entry->ent;
1368 1388
1369 tracing_generic_entry_update(ent, irq_flags, pc);
1370 ent->type = call->id;
1371 entry->nargs = tp->nr_args;
1372 entry->func = (unsigned long)tp->rp.kp.addr; 1389 entry->func = (unsigned long)tp->rp.kp.addr;
1373 entry->ret_ip = (unsigned long)ri->ret_addr; 1390 entry->ret_ip = (unsigned long)ri->ret_addr;
1391 data = (u8 *)&entry[1];
1374 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1375 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1376 perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
1377
1378end:
1379 perf_swevent_put_recursion_context(rctx);
1380end_recursion:
1381 local_irq_restore(irq_flags);
1382 1394
1383 return 0; 1395 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1384} 1397}
1385 1398
1386static int probe_profile_enable(struct ftrace_event_call *call) 1399static int probe_perf_enable(struct ftrace_event_call *call)
1387{ 1400{
1388 struct trace_probe *tp = (struct trace_probe *)call->data; 1401 struct trace_probe *tp = (struct trace_probe *)call->data;
1389 1402
@@ -1395,7 +1408,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1395 return enable_kprobe(&tp->rp.kp); 1408 return enable_kprobe(&tp->rp.kp);
1396} 1409}
1397 1410
1398static void probe_profile_disable(struct ftrace_event_call *call) 1411static void probe_perf_disable(struct ftrace_event_call *call)
1399{ 1412{
1400 struct trace_probe *tp = (struct trace_probe *)call->data; 1413 struct trace_probe *tp = (struct trace_probe *)call->data;
1401 1414
@@ -1408,8 +1421,28 @@ static void probe_profile_disable(struct ftrace_event_call *call)
1408 disable_kprobe(&tp->rp.kp); 1421 disable_kprobe(&tp->rp.kp);
1409 } 1422 }
1410} 1423}
1411#endif /* CONFIG_EVENT_PROFILE */ 1424#endif /* CONFIG_PERF_EVENTS */
1412 1425
1426static __kprobes
1427int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1428{
1429 switch (type) {
1430 case TRACE_REG_REGISTER:
1431 return probe_event_enable(event);
1432 case TRACE_REG_UNREGISTER:
1433 probe_event_disable(event);
1434 return 0;
1435
1436#ifdef CONFIG_PERF_EVENTS
1437 case TRACE_REG_PERF_REGISTER:
1438 return probe_perf_enable(event);
1439 case TRACE_REG_PERF_UNREGISTER:
1440 probe_perf_disable(event);
1441 return 0;
1442#endif
1443 }
1444 return 0;
1445}
1413 1446
1414static __kprobes 1447static __kprobes
1415int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1448int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
@@ -1418,10 +1451,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1418 1451
1419 if (tp->flags & TP_FLAG_TRACE) 1452 if (tp->flags & TP_FLAG_TRACE)
1420 kprobe_trace_func(kp, regs); 1453 kprobe_trace_func(kp, regs);
1421#ifdef CONFIG_EVENT_PROFILE 1454#ifdef CONFIG_PERF_EVENTS
1422 if (tp->flags & TP_FLAG_PROFILE) 1455 if (tp->flags & TP_FLAG_PROFILE)
1423 kprobe_profile_func(kp, regs); 1456 kprobe_perf_func(kp, regs);
1424#endif /* CONFIG_EVENT_PROFILE */ 1457#endif
1425 return 0; /* We don't tweek kernel, so just return 0 */ 1458 return 0; /* We don't tweek kernel, so just return 0 */
1426} 1459}
1427 1460
@@ -1432,13 +1465,21 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1432 1465
1433 if (tp->flags & TP_FLAG_TRACE) 1466 if (tp->flags & TP_FLAG_TRACE)
1434 kretprobe_trace_func(ri, regs); 1467 kretprobe_trace_func(ri, regs);
1435#ifdef CONFIG_EVENT_PROFILE 1468#ifdef CONFIG_PERF_EVENTS
1436 if (tp->flags & TP_FLAG_PROFILE) 1469 if (tp->flags & TP_FLAG_PROFILE)
1437 kretprobe_profile_func(ri, regs); 1470 kretprobe_perf_func(ri, regs);
1438#endif /* CONFIG_EVENT_PROFILE */ 1471#endif
1439 return 0; /* We don't tweek kernel, so just return 0 */ 1472 return 0; /* We don't tweek kernel, so just return 0 */
1440} 1473}
1441 1474
1475static struct trace_event_functions kretprobe_funcs = {
1476 .trace = print_kretprobe_event
1477};
1478
1479static struct trace_event_functions kprobe_funcs = {
1480 .trace = print_kprobe_event
1481};
1482
1442static int register_probe_event(struct trace_probe *tp) 1483static int register_probe_event(struct trace_probe *tp)
1443{ 1484{
1444 struct ftrace_event_call *call = &tp->call; 1485 struct ftrace_event_call *call = &tp->call;
@@ -1446,33 +1487,31 @@ static int register_probe_event(struct trace_probe *tp)
1446 1487
1447 /* Initialize ftrace_event_call */ 1488 /* Initialize ftrace_event_call */
1448 if (probe_is_return(tp)) { 1489 if (probe_is_return(tp)) {
1449 tp->event.trace = print_kretprobe_event; 1490 INIT_LIST_HEAD(&call->class->fields);
1450 call->raw_init = probe_event_raw_init; 1491 call->event.funcs = &kretprobe_funcs;
1451 call->show_format = kretprobe_event_show_format; 1492 call->class->raw_init = probe_event_raw_init;
1452 call->define_fields = kretprobe_event_define_fields; 1493 call->class->define_fields = kretprobe_event_define_fields;
1453 } else { 1494 } else {
1454 tp->event.trace = print_kprobe_event; 1495 INIT_LIST_HEAD(&call->class->fields);
1455 call->raw_init = probe_event_raw_init; 1496 call->event.funcs = &kprobe_funcs;
1456 call->show_format = kprobe_event_show_format; 1497 call->class->raw_init = probe_event_raw_init;
1457 call->define_fields = kprobe_event_define_fields; 1498 call->class->define_fields = kprobe_event_define_fields;
1458 } 1499 }
1459 call->event = &tp->event; 1500 if (set_print_fmt(tp) < 0)
1460 call->id = register_ftrace_event(&tp->event); 1501 return -ENOMEM;
1461 if (!call->id) 1502 ret = register_ftrace_event(&call->event);
1503 if (!ret) {
1504 kfree(call->print_fmt);
1462 return -ENODEV; 1505 return -ENODEV;
1463 call->enabled = 0; 1506 }
1464 call->regfunc = probe_event_enable; 1507 call->flags = 0;
1465 call->unregfunc = probe_event_disable; 1508 call->class->reg = kprobe_register;
1466
1467#ifdef CONFIG_EVENT_PROFILE
1468 call->profile_enable = probe_profile_enable;
1469 call->profile_disable = probe_profile_disable;
1470#endif
1471 call->data = tp; 1509 call->data = tp;
1472 ret = trace_add_event_call(call); 1510 ret = trace_add_event_call(call);
1473 if (ret) { 1511 if (ret) {
1474 pr_info("Failed to register kprobe event: %s\n", call->name); 1512 pr_info("Failed to register kprobe event: %s\n", call->name);
1475 unregister_ftrace_event(&tp->event); 1513 kfree(call->print_fmt);
1514 unregister_ftrace_event(&call->event);
1476 } 1515 }
1477 return ret; 1516 return ret;
1478} 1517}
@@ -1481,6 +1520,7 @@ static void unregister_probe_event(struct trace_probe *tp)
1481{ 1520{
1482 /* tp->event is unregistered in trace_remove_event_call() */ 1521 /* tp->event is unregistered in trace_remove_event_call() */
1483 trace_remove_event_call(&tp->call); 1522 trace_remove_event_call(&tp->call);
1523 kfree(tp->call.print_fmt);
1484} 1524}
1485 1525
1486/* Make a debugfs interface for controling probe points */ 1526/* Make a debugfs interface for controling probe points */
@@ -1523,28 +1563,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1523 1563
1524static __init int kprobe_trace_self_tests_init(void) 1564static __init int kprobe_trace_self_tests_init(void)
1525{ 1565{
1526 int ret; 1566 int ret, warn = 0;
1527 int (*target)(int, int, int, int, int, int); 1567 int (*target)(int, int, int, int, int, int);
1568 struct trace_probe *tp;
1528 1569
1529 target = kprobe_trace_selftest_target; 1570 target = kprobe_trace_selftest_target;
1530 1571
1531 pr_info("Testing kprobe tracing: "); 1572 pr_info("Testing kprobe tracing: ");
1532 1573
1533 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " 1574 ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1534 "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); 1575 "$stack $stack0 +0($stack)");
1535 if (WARN_ON_ONCE(ret)) 1576 if (WARN_ON_ONCE(ret)) {
1536 pr_warning("error enabling function entry\n"); 1577 pr_warning("error on probing function entry.\n");
1578 warn++;
1579 } else {
1580 /* Enable trace point */
1581 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1582 if (WARN_ON_ONCE(tp == NULL)) {
1583 pr_warning("error on getting new probe.\n");
1584 warn++;
1585 } else
1586 probe_event_enable(&tp->call);
1587 }
1537 1588
1538 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 1589 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1539 "$retval"); 1590 "$retval");
1540 if (WARN_ON_ONCE(ret)) 1591 if (WARN_ON_ONCE(ret)) {
1541 pr_warning("error enabling function return\n"); 1592 pr_warning("error on probing function return.\n");
1593 warn++;
1594 } else {
1595 /* Enable trace point */
1596 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1597 if (WARN_ON_ONCE(tp == NULL)) {
1598 pr_warning("error on getting new probe.\n");
1599 warn++;
1600 } else
1601 probe_event_enable(&tp->call);
1602 }
1603
1604 if (warn)
1605 goto end;
1542 1606
1543 ret = target(1, 2, 3, 4, 5, 6); 1607 ret = target(1, 2, 3, 4, 5, 6);
1544 1608
1545 cleanup_all_probes(); 1609 ret = command_trace_probe("-:testprobe");
1610 if (WARN_ON_ONCE(ret)) {
1611 pr_warning("error on deleting a probe.\n");
1612 warn++;
1613 }
1614
1615 ret = command_trace_probe("-:testprobe2");
1616 if (WARN_ON_ONCE(ret)) {
1617 pr_warning("error on deleting a probe.\n");
1618 warn++;
1619 }
1546 1620
1547 pr_cont("OK\n"); 1621end:
1622 cleanup_all_probes();
1623 if (warn)
1624 pr_cont("NG: Some tests are failed. Please check them.\n");
1625 else
1626 pr_cont("OK\n");
1548 return 0; 1627 return 0;
1549} 1628}
1550 1629
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 94103cdcf9d8..8eaf00749b65 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -23,6 +23,7 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/slab.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27 28
28#include "trace_output.h" 29#include "trace_output.h"
@@ -33,12 +34,6 @@
33 34
34#include <asm/atomic.h> 35#include <asm/atomic.h>
35 36
36/*
37 * For now, let us restrict the no. of symbols traced simultaneously to number
38 * of available hardware breakpoint registers.
39 */
40#define KSYM_TRACER_MAX HBP_NUM
41
42#define KSYM_TRACER_OP_LEN 3 /* rw- */ 37#define KSYM_TRACER_OP_LEN 3 /* rw- */
43 38
44struct trace_ksym { 39struct trace_ksym {
@@ -52,7 +47,6 @@ struct trace_ksym {
52 47
53static struct trace_array *ksym_trace_array; 48static struct trace_array *ksym_trace_array;
54 49
55static unsigned int ksym_filter_entry_count;
56static unsigned int ksym_tracing_enabled; 50static unsigned int ksym_tracing_enabled;
57 51
58static HLIST_HEAD(ksym_filter_head); 52static HLIST_HEAD(ksym_filter_head);
@@ -180,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
180 struct trace_ksym *entry; 174 struct trace_ksym *entry;
181 int ret = -ENOMEM; 175 int ret = -ENOMEM;
182 176
183 if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
184 printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
185 " new requests for tracing can be accepted now.\n",
186 KSYM_TRACER_MAX);
187 return -ENOSPC;
188 }
189
190 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); 177 entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
191 if (!entry) 178 if (!entry)
192 return -ENOMEM; 179 return -ENOMEM;
@@ -202,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
202 189
203 if (IS_ERR(entry->ksym_hbp)) { 190 if (IS_ERR(entry->ksym_hbp)) {
204 ret = PTR_ERR(entry->ksym_hbp); 191 ret = PTR_ERR(entry->ksym_hbp);
205 printk(KERN_INFO "ksym_tracer request failed. Try again" 192 if (ret == -ENOSPC) {
206 " later!!\n"); 193 printk(KERN_ERR "ksym_tracer: Maximum limit reached."
194 " No new requests for tracing can be accepted now.\n");
195 } else {
196 printk(KERN_INFO "ksym_tracer request failed. Try again"
197 " later!!\n");
198 }
207 goto err; 199 goto err;
208 } 200 }
209 201
210 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); 202 hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
211 ksym_filter_entry_count++;
212 203
213 return 0; 204 return 0;
214 205
@@ -264,7 +255,6 @@ static void __ksym_trace_reset(void)
264 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, 255 hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
265 ksym_hlist) { 256 ksym_hlist) {
266 unregister_wide_hw_breakpoint(entry->ksym_hbp); 257 unregister_wide_hw_breakpoint(entry->ksym_hbp);
267 ksym_filter_entry_count--;
268 hlist_del_rcu(&(entry->ksym_hlist)); 258 hlist_del_rcu(&(entry->ksym_hlist));
269 synchronize_rcu(); 259 synchronize_rcu();
270 kfree(entry); 260 kfree(entry);
@@ -337,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
337 goto out_unlock; 327 goto out_unlock;
338 } 328 }
339 /* Error or "symbol:---" case: drop it */ 329 /* Error or "symbol:---" case: drop it */
340 ksym_filter_entry_count--;
341 hlist_del_rcu(&(entry->ksym_hlist)); 330 hlist_del_rcu(&(entry->ksym_hlist));
342 synchronize_rcu(); 331 synchronize_rcu();
343 kfree(entry); 332 kfree(entry);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8e46b3323cdc..57c1b4596470 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -209,6 +209,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
209 209
210 return 1; 210 return 1;
211} 211}
212EXPORT_SYMBOL(trace_seq_putc);
212 213
213int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) 214int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
214{ 215{
@@ -253,7 +254,7 @@ void *trace_seq_reserve(struct trace_seq *s, size_t len)
253 void *ret; 254 void *ret;
254 255
255 if (s->full) 256 if (s->full)
256 return 0; 257 return NULL;
257 258
258 if (len > ((PAGE_SIZE - 1) - s->len)) { 259 if (len > ((PAGE_SIZE - 1) - s->len)) {
259 s->full = 1; 260 s->full = 1;
@@ -355,6 +356,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
355} 356}
356EXPORT_SYMBOL(ftrace_print_symbols_seq); 357EXPORT_SYMBOL(ftrace_print_symbols_seq);
357 358
359const char *
360ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
361{
362 int i;
363 const char *ret = p->buffer + p->len;
364
365 for (i = 0; i < buf_len; i++)
366 trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
367
368 trace_seq_putc(p, 0);
369
370 return ret;
371}
372EXPORT_SYMBOL(ftrace_print_hex_seq);
373
358#ifdef CONFIG_KRETPROBES 374#ifdef CONFIG_KRETPROBES
359static inline const char *kretprobed(const char *name) 375static inline const char *kretprobed(const char *name)
360{ 376{
@@ -726,6 +742,9 @@ int register_ftrace_event(struct trace_event *event)
726 if (WARN_ON(!event)) 742 if (WARN_ON(!event))
727 goto out; 743 goto out;
728 744
745 if (WARN_ON(!event->funcs))
746 goto out;
747
729 INIT_LIST_HEAD(&event->list); 748 INIT_LIST_HEAD(&event->list);
730 749
731 if (!event->type) { 750 if (!event->type) {
@@ -758,14 +777,14 @@ int register_ftrace_event(struct trace_event *event)
758 goto out; 777 goto out;
759 } 778 }
760 779
761 if (event->trace == NULL) 780 if (event->funcs->trace == NULL)
762 event->trace = trace_nop_print; 781 event->funcs->trace = trace_nop_print;
763 if (event->raw == NULL) 782 if (event->funcs->raw == NULL)
764 event->raw = trace_nop_print; 783 event->funcs->raw = trace_nop_print;
765 if (event->hex == NULL) 784 if (event->funcs->hex == NULL)
766 event->hex = trace_nop_print; 785 event->funcs->hex = trace_nop_print;
767 if (event->binary == NULL) 786 if (event->funcs->binary == NULL)
768 event->binary = trace_nop_print; 787 event->funcs->binary = trace_nop_print;
769 788
770 key = event->type & (EVENT_HASHSIZE - 1); 789 key = event->type & (EVENT_HASHSIZE - 1);
771 790
@@ -807,13 +826,15 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
807 * Standard events 826 * Standard events
808 */ 827 */
809 828
810enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags) 829enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
830 struct trace_event *event)
811{ 831{
812 return TRACE_TYPE_HANDLED; 832 return TRACE_TYPE_HANDLED;
813} 833}
814 834
815/* TRACE_FN */ 835/* TRACE_FN */
816static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags) 836static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
837 struct trace_event *event)
817{ 838{
818 struct ftrace_entry *field; 839 struct ftrace_entry *field;
819 struct trace_seq *s = &iter->seq; 840 struct trace_seq *s = &iter->seq;
@@ -840,7 +861,8 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
840 return TRACE_TYPE_PARTIAL_LINE; 861 return TRACE_TYPE_PARTIAL_LINE;
841} 862}
842 863
843static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags) 864static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
865 struct trace_event *event)
844{ 866{
845 struct ftrace_entry *field; 867 struct ftrace_entry *field;
846 868
@@ -854,7 +876,8 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
854 return TRACE_TYPE_HANDLED; 876 return TRACE_TYPE_HANDLED;
855} 877}
856 878
857static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags) 879static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
880 struct trace_event *event)
858{ 881{
859 struct ftrace_entry *field; 882 struct ftrace_entry *field;
860 struct trace_seq *s = &iter->seq; 883 struct trace_seq *s = &iter->seq;
@@ -867,7 +890,8 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
867 return TRACE_TYPE_HANDLED; 890 return TRACE_TYPE_HANDLED;
868} 891}
869 892
870static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags) 893static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
894 struct trace_event *event)
871{ 895{
872 struct ftrace_entry *field; 896 struct ftrace_entry *field;
873 struct trace_seq *s = &iter->seq; 897 struct trace_seq *s = &iter->seq;
@@ -880,14 +904,18 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
880 return TRACE_TYPE_HANDLED; 904 return TRACE_TYPE_HANDLED;
881} 905}
882 906
883static struct trace_event trace_fn_event = { 907static struct trace_event_functions trace_fn_funcs = {
884 .type = TRACE_FN,
885 .trace = trace_fn_trace, 908 .trace = trace_fn_trace,
886 .raw = trace_fn_raw, 909 .raw = trace_fn_raw,
887 .hex = trace_fn_hex, 910 .hex = trace_fn_hex,
888 .binary = trace_fn_bin, 911 .binary = trace_fn_bin,
889}; 912};
890 913
914static struct trace_event trace_fn_event = {
915 .type = TRACE_FN,
916 .funcs = &trace_fn_funcs,
917};
918
891/* TRACE_CTX an TRACE_WAKE */ 919/* TRACE_CTX an TRACE_WAKE */
892static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, 920static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
893 char *delim) 921 char *delim)
@@ -916,13 +944,14 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
916 return TRACE_TYPE_HANDLED; 944 return TRACE_TYPE_HANDLED;
917} 945}
918 946
919static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags) 947static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
948 struct trace_event *event)
920{ 949{
921 return trace_ctxwake_print(iter, "==>"); 950 return trace_ctxwake_print(iter, "==>");
922} 951}
923 952
924static enum print_line_t trace_wake_print(struct trace_iterator *iter, 953static enum print_line_t trace_wake_print(struct trace_iterator *iter,
925 int flags) 954 int flags, struct trace_event *event)
926{ 955{
927 return trace_ctxwake_print(iter, " +"); 956 return trace_ctxwake_print(iter, " +");
928} 957}
@@ -950,12 +979,14 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
950 return TRACE_TYPE_HANDLED; 979 return TRACE_TYPE_HANDLED;
951} 980}
952 981
953static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags) 982static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
983 struct trace_event *event)
954{ 984{
955 return trace_ctxwake_raw(iter, 0); 985 return trace_ctxwake_raw(iter, 0);
956} 986}
957 987
958static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags) 988static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags,
989 struct trace_event *event)
959{ 990{
960 return trace_ctxwake_raw(iter, '+'); 991 return trace_ctxwake_raw(iter, '+');
961} 992}
@@ -984,18 +1015,20 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
984 return TRACE_TYPE_HANDLED; 1015 return TRACE_TYPE_HANDLED;
985} 1016}
986 1017
987static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags) 1018static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
1019 struct trace_event *event)
988{ 1020{
989 return trace_ctxwake_hex(iter, 0); 1021 return trace_ctxwake_hex(iter, 0);
990} 1022}
991 1023
992static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags) 1024static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags,
1025 struct trace_event *event)
993{ 1026{
994 return trace_ctxwake_hex(iter, '+'); 1027 return trace_ctxwake_hex(iter, '+');
995} 1028}
996 1029
997static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, 1030static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
998 int flags) 1031 int flags, struct trace_event *event)
999{ 1032{
1000 struct ctx_switch_entry *field; 1033 struct ctx_switch_entry *field;
1001 struct trace_seq *s = &iter->seq; 1034 struct trace_seq *s = &iter->seq;
@@ -1012,25 +1045,33 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
1012 return TRACE_TYPE_HANDLED; 1045 return TRACE_TYPE_HANDLED;
1013} 1046}
1014 1047
1015static struct trace_event trace_ctx_event = { 1048static struct trace_event_functions trace_ctx_funcs = {
1016 .type = TRACE_CTX,
1017 .trace = trace_ctx_print, 1049 .trace = trace_ctx_print,
1018 .raw = trace_ctx_raw, 1050 .raw = trace_ctx_raw,
1019 .hex = trace_ctx_hex, 1051 .hex = trace_ctx_hex,
1020 .binary = trace_ctxwake_bin, 1052 .binary = trace_ctxwake_bin,
1021}; 1053};
1022 1054
1023static struct trace_event trace_wake_event = { 1055static struct trace_event trace_ctx_event = {
1024 .type = TRACE_WAKE, 1056 .type = TRACE_CTX,
1057 .funcs = &trace_ctx_funcs,
1058};
1059
1060static struct trace_event_functions trace_wake_funcs = {
1025 .trace = trace_wake_print, 1061 .trace = trace_wake_print,
1026 .raw = trace_wake_raw, 1062 .raw = trace_wake_raw,
1027 .hex = trace_wake_hex, 1063 .hex = trace_wake_hex,
1028 .binary = trace_ctxwake_bin, 1064 .binary = trace_ctxwake_bin,
1029}; 1065};
1030 1066
1067static struct trace_event trace_wake_event = {
1068 .type = TRACE_WAKE,
1069 .funcs = &trace_wake_funcs,
1070};
1071
1031/* TRACE_SPECIAL */ 1072/* TRACE_SPECIAL */
1032static enum print_line_t trace_special_print(struct trace_iterator *iter, 1073static enum print_line_t trace_special_print(struct trace_iterator *iter,
1033 int flags) 1074 int flags, struct trace_event *event)
1034{ 1075{
1035 struct special_entry *field; 1076 struct special_entry *field;
1036 1077
@@ -1046,7 +1087,7 @@ static enum print_line_t trace_special_print(struct trace_iterator *iter,
1046} 1087}
1047 1088
1048static enum print_line_t trace_special_hex(struct trace_iterator *iter, 1089static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1049 int flags) 1090 int flags, struct trace_event *event)
1050{ 1091{
1051 struct special_entry *field; 1092 struct special_entry *field;
1052 struct trace_seq *s = &iter->seq; 1093 struct trace_seq *s = &iter->seq;
@@ -1061,7 +1102,7 @@ static enum print_line_t trace_special_hex(struct trace_iterator *iter,
1061} 1102}
1062 1103
1063static enum print_line_t trace_special_bin(struct trace_iterator *iter, 1104static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1064 int flags) 1105 int flags, struct trace_event *event)
1065{ 1106{
1066 struct special_entry *field; 1107 struct special_entry *field;
1067 struct trace_seq *s = &iter->seq; 1108 struct trace_seq *s = &iter->seq;
@@ -1075,18 +1116,22 @@ static enum print_line_t trace_special_bin(struct trace_iterator *iter,
1075 return TRACE_TYPE_HANDLED; 1116 return TRACE_TYPE_HANDLED;
1076} 1117}
1077 1118
1078static struct trace_event trace_special_event = { 1119static struct trace_event_functions trace_special_funcs = {
1079 .type = TRACE_SPECIAL,
1080 .trace = trace_special_print, 1120 .trace = trace_special_print,
1081 .raw = trace_special_print, 1121 .raw = trace_special_print,
1082 .hex = trace_special_hex, 1122 .hex = trace_special_hex,
1083 .binary = trace_special_bin, 1123 .binary = trace_special_bin,
1084}; 1124};
1085 1125
1126static struct trace_event trace_special_event = {
1127 .type = TRACE_SPECIAL,
1128 .funcs = &trace_special_funcs,
1129};
1130
1086/* TRACE_STACK */ 1131/* TRACE_STACK */
1087 1132
1088static enum print_line_t trace_stack_print(struct trace_iterator *iter, 1133static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1089 int flags) 1134 int flags, struct trace_event *event)
1090{ 1135{
1091 struct stack_entry *field; 1136 struct stack_entry *field;
1092 struct trace_seq *s = &iter->seq; 1137 struct trace_seq *s = &iter->seq;
@@ -1114,17 +1159,21 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1114 return TRACE_TYPE_PARTIAL_LINE; 1159 return TRACE_TYPE_PARTIAL_LINE;
1115} 1160}
1116 1161
1117static struct trace_event trace_stack_event = { 1162static struct trace_event_functions trace_stack_funcs = {
1118 .type = TRACE_STACK,
1119 .trace = trace_stack_print, 1163 .trace = trace_stack_print,
1120 .raw = trace_special_print, 1164 .raw = trace_special_print,
1121 .hex = trace_special_hex, 1165 .hex = trace_special_hex,
1122 .binary = trace_special_bin, 1166 .binary = trace_special_bin,
1123}; 1167};
1124 1168
1169static struct trace_event trace_stack_event = {
1170 .type = TRACE_STACK,
1171 .funcs = &trace_stack_funcs,
1172};
1173
1125/* TRACE_USER_STACK */ 1174/* TRACE_USER_STACK */
1126static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, 1175static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1127 int flags) 1176 int flags, struct trace_event *event)
1128{ 1177{
1129 struct userstack_entry *field; 1178 struct userstack_entry *field;
1130 struct trace_seq *s = &iter->seq; 1179 struct trace_seq *s = &iter->seq;
@@ -1143,17 +1192,22 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
1143 return TRACE_TYPE_PARTIAL_LINE; 1192 return TRACE_TYPE_PARTIAL_LINE;
1144} 1193}
1145 1194
1146static struct trace_event trace_user_stack_event = { 1195static struct trace_event_functions trace_user_stack_funcs = {
1147 .type = TRACE_USER_STACK,
1148 .trace = trace_user_stack_print, 1196 .trace = trace_user_stack_print,
1149 .raw = trace_special_print, 1197 .raw = trace_special_print,
1150 .hex = trace_special_hex, 1198 .hex = trace_special_hex,
1151 .binary = trace_special_bin, 1199 .binary = trace_special_bin,
1152}; 1200};
1153 1201
1202static struct trace_event trace_user_stack_event = {
1203 .type = TRACE_USER_STACK,
1204 .funcs = &trace_user_stack_funcs,
1205};
1206
1154/* TRACE_BPRINT */ 1207/* TRACE_BPRINT */
1155static enum print_line_t 1208static enum print_line_t
1156trace_bprint_print(struct trace_iterator *iter, int flags) 1209trace_bprint_print(struct trace_iterator *iter, int flags,
1210 struct trace_event *event)
1157{ 1211{
1158 struct trace_entry *entry = iter->ent; 1212 struct trace_entry *entry = iter->ent;
1159 struct trace_seq *s = &iter->seq; 1213 struct trace_seq *s = &iter->seq;
@@ -1178,7 +1232,8 @@ trace_bprint_print(struct trace_iterator *iter, int flags)
1178 1232
1179 1233
1180static enum print_line_t 1234static enum print_line_t
1181trace_bprint_raw(struct trace_iterator *iter, int flags) 1235trace_bprint_raw(struct trace_iterator *iter, int flags,
1236 struct trace_event *event)
1182{ 1237{
1183 struct bprint_entry *field; 1238 struct bprint_entry *field;
1184 struct trace_seq *s = &iter->seq; 1239 struct trace_seq *s = &iter->seq;
@@ -1197,16 +1252,19 @@ trace_bprint_raw(struct trace_iterator *iter, int flags)
1197 return TRACE_TYPE_PARTIAL_LINE; 1252 return TRACE_TYPE_PARTIAL_LINE;
1198} 1253}
1199 1254
1255static struct trace_event_functions trace_bprint_funcs = {
1256 .trace = trace_bprint_print,
1257 .raw = trace_bprint_raw,
1258};
1200 1259
1201static struct trace_event trace_bprint_event = { 1260static struct trace_event trace_bprint_event = {
1202 .type = TRACE_BPRINT, 1261 .type = TRACE_BPRINT,
1203 .trace = trace_bprint_print, 1262 .funcs = &trace_bprint_funcs,
1204 .raw = trace_bprint_raw,
1205}; 1263};
1206 1264
1207/* TRACE_PRINT */ 1265/* TRACE_PRINT */
1208static enum print_line_t trace_print_print(struct trace_iterator *iter, 1266static enum print_line_t trace_print_print(struct trace_iterator *iter,
1209 int flags) 1267 int flags, struct trace_event *event)
1210{ 1268{
1211 struct print_entry *field; 1269 struct print_entry *field;
1212 struct trace_seq *s = &iter->seq; 1270 struct trace_seq *s = &iter->seq;
@@ -1225,7 +1283,8 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
1225 return TRACE_TYPE_PARTIAL_LINE; 1283 return TRACE_TYPE_PARTIAL_LINE;
1226} 1284}
1227 1285
1228static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags) 1286static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
1287 struct trace_event *event)
1229{ 1288{
1230 struct print_entry *field; 1289 struct print_entry *field;
1231 1290
@@ -1240,12 +1299,16 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
1240 return TRACE_TYPE_PARTIAL_LINE; 1299 return TRACE_TYPE_PARTIAL_LINE;
1241} 1300}
1242 1301
1243static struct trace_event trace_print_event = { 1302static struct trace_event_functions trace_print_funcs = {
1244 .type = TRACE_PRINT,
1245 .trace = trace_print_print, 1303 .trace = trace_print_print,
1246 .raw = trace_print_raw, 1304 .raw = trace_print_raw,
1247}; 1305};
1248 1306
1307static struct trace_event trace_print_event = {
1308 .type = TRACE_PRINT,
1309 .funcs = &trace_print_funcs,
1310};
1311
1249 1312
1250static struct trace_event *events[] __initdata = { 1313static struct trace_event *events[] __initdata = {
1251 &trace_fn_event, 1314 &trace_fn_event,
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 9d91c72ba38b..c038eba0492b 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,7 +25,7 @@ extern void trace_event_read_unlock(void);
25extern struct trace_event *ftrace_find_event(int type); 25extern struct trace_event *ftrace_find_event(int type);
26 26
27extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 27extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
28 int flags); 28 int flags, struct trace_event *event);
29extern int 29extern int
30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); 30trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
31 31
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 5fca0f51fde4..8f758d070c43 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -50,8 +50,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
50} 50}
51 51
52static void 52static void
53probe_sched_switch(struct rq *__rq, struct task_struct *prev, 53probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next)
54 struct task_struct *next)
55{ 54{
56 struct trace_array_cpu *data; 55 struct trace_array_cpu *data;
57 unsigned long flags; 56 unsigned long flags;
@@ -109,7 +108,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
109} 108}
110 109
111static void 110static void
112probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) 111probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
113{ 112{
114 struct trace_array_cpu *data; 113 struct trace_array_cpu *data;
115 unsigned long flags; 114 unsigned long flags;
@@ -139,21 +138,21 @@ static int tracing_sched_register(void)
139{ 138{
140 int ret; 139 int ret;
141 140
142 ret = register_trace_sched_wakeup(probe_sched_wakeup); 141 ret = register_trace_sched_wakeup(probe_sched_wakeup, NULL);
143 if (ret) { 142 if (ret) {
144 pr_info("wakeup trace: Couldn't activate tracepoint" 143 pr_info("wakeup trace: Couldn't activate tracepoint"
145 " probe to kernel_sched_wakeup\n"); 144 " probe to kernel_sched_wakeup\n");
146 return ret; 145 return ret;
147 } 146 }
148 147
149 ret = register_trace_sched_wakeup_new(probe_sched_wakeup); 148 ret = register_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
150 if (ret) { 149 if (ret) {
151 pr_info("wakeup trace: Couldn't activate tracepoint" 150 pr_info("wakeup trace: Couldn't activate tracepoint"
152 " probe to kernel_sched_wakeup_new\n"); 151 " probe to kernel_sched_wakeup_new\n");
153 goto fail_deprobe; 152 goto fail_deprobe;
154 } 153 }
155 154
156 ret = register_trace_sched_switch(probe_sched_switch); 155 ret = register_trace_sched_switch(probe_sched_switch, NULL);
157 if (ret) { 156 if (ret) {
158 pr_info("sched trace: Couldn't activate tracepoint" 157 pr_info("sched trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_switch\n"); 158 " probe to kernel_sched_switch\n");
@@ -162,17 +161,17 @@ static int tracing_sched_register(void)
162 161
163 return ret; 162 return ret;
164fail_deprobe_wake_new: 163fail_deprobe_wake_new:
165 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 164 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
166fail_deprobe: 165fail_deprobe:
167 unregister_trace_sched_wakeup(probe_sched_wakeup); 166 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
168 return ret; 167 return ret;
169} 168}
170 169
171static void tracing_sched_unregister(void) 170static void tracing_sched_unregister(void)
172{ 171{
173 unregister_trace_sched_switch(probe_sched_switch); 172 unregister_trace_sched_switch(probe_sched_switch, NULL);
174 unregister_trace_sched_wakeup_new(probe_sched_wakeup); 173 unregister_trace_sched_wakeup_new(probe_sched_wakeup, NULL);
175 unregister_trace_sched_wakeup(probe_sched_wakeup); 174 unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
176} 175}
177 176
178static void tracing_start_sched_switch(void) 177static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0271742abb8d..0e73bc2ef8c5 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -98,7 +98,8 @@ static int report_latency(cycle_t delta)
98 return 1; 98 return 1;
99} 99}
100 100
101static void probe_wakeup_migrate_task(struct task_struct *task, int cpu) 101static void
102probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu)
102{ 103{
103 if (task != wakeup_task) 104 if (task != wakeup_task)
104 return; 105 return;
@@ -107,8 +108,8 @@ static void probe_wakeup_migrate_task(struct task_struct *task, int cpu)
107} 108}
108 109
109static void notrace 110static void notrace
110probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, 111probe_wakeup_sched_switch(void *ignore,
111 struct task_struct *next) 112 struct task_struct *prev, struct task_struct *next)
112{ 113{
113 struct trace_array_cpu *data; 114 struct trace_array_cpu *data;
114 cycle_t T0, T1, delta; 115 cycle_t T0, T1, delta;
@@ -200,7 +201,7 @@ static void wakeup_reset(struct trace_array *tr)
200} 201}
201 202
202static void 203static void
203probe_wakeup(struct rq *rq, struct task_struct *p, int success) 204probe_wakeup(void *ignore, struct task_struct *p, int success)
204{ 205{
205 struct trace_array_cpu *data; 206 struct trace_array_cpu *data;
206 int cpu = smp_processor_id(); 207 int cpu = smp_processor_id();
@@ -264,28 +265,28 @@ static void start_wakeup_tracer(struct trace_array *tr)
264{ 265{
265 int ret; 266 int ret;
266 267
267 ret = register_trace_sched_wakeup(probe_wakeup); 268 ret = register_trace_sched_wakeup(probe_wakeup, NULL);
268 if (ret) { 269 if (ret) {
269 pr_info("wakeup trace: Couldn't activate tracepoint" 270 pr_info("wakeup trace: Couldn't activate tracepoint"
270 " probe to kernel_sched_wakeup\n"); 271 " probe to kernel_sched_wakeup\n");
271 return; 272 return;
272 } 273 }
273 274
274 ret = register_trace_sched_wakeup_new(probe_wakeup); 275 ret = register_trace_sched_wakeup_new(probe_wakeup, NULL);
275 if (ret) { 276 if (ret) {
276 pr_info("wakeup trace: Couldn't activate tracepoint" 277 pr_info("wakeup trace: Couldn't activate tracepoint"
277 " probe to kernel_sched_wakeup_new\n"); 278 " probe to kernel_sched_wakeup_new\n");
278 goto fail_deprobe; 279 goto fail_deprobe;
279 } 280 }
280 281
281 ret = register_trace_sched_switch(probe_wakeup_sched_switch); 282 ret = register_trace_sched_switch(probe_wakeup_sched_switch, NULL);
282 if (ret) { 283 if (ret) {
283 pr_info("sched trace: Couldn't activate tracepoint" 284 pr_info("sched trace: Couldn't activate tracepoint"
284 " probe to kernel_sched_switch\n"); 285 " probe to kernel_sched_switch\n");
285 goto fail_deprobe_wake_new; 286 goto fail_deprobe_wake_new;
286 } 287 }
287 288
288 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task); 289 ret = register_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
289 if (ret) { 290 if (ret) {
290 pr_info("wakeup trace: Couldn't activate tracepoint" 291 pr_info("wakeup trace: Couldn't activate tracepoint"
291 " probe to kernel_sched_migrate_task\n"); 292 " probe to kernel_sched_migrate_task\n");
@@ -312,19 +313,19 @@ static void start_wakeup_tracer(struct trace_array *tr)
312 313
313 return; 314 return;
314fail_deprobe_wake_new: 315fail_deprobe_wake_new:
315 unregister_trace_sched_wakeup_new(probe_wakeup); 316 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
316fail_deprobe: 317fail_deprobe:
317 unregister_trace_sched_wakeup(probe_wakeup); 318 unregister_trace_sched_wakeup(probe_wakeup, NULL);
318} 319}
319 320
320static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
321{ 322{
322 tracer_enabled = 0; 323 tracer_enabled = 0;
323 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
324 unregister_trace_sched_switch(probe_wakeup_sched_switch); 325 unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
325 unregister_trace_sched_wakeup_new(probe_wakeup); 326 unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
326 unregister_trace_sched_wakeup(probe_wakeup); 327 unregister_trace_sched_wakeup(probe_wakeup, NULL);
327 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task); 328 unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
328} 329}
329 330
330static int __wakeup_tracer_init(struct trace_array *tr) 331static int __wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..250e7f9bd2f0 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
@@ -16,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
16 case TRACE_BRANCH: 17 case TRACE_BRANCH:
17 case TRACE_GRAPH_ENT: 18 case TRACE_GRAPH_ENT:
18 case TRACE_GRAPH_RET: 19 case TRACE_GRAPH_RET:
19 case TRACE_HW_BRANCHES:
20 case TRACE_KSYM: 20 case TRACE_KSYM:
21 return 1; 21 return 1;
22 } 22 }
@@ -29,7 +29,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
29 struct trace_entry *entry; 29 struct trace_entry *entry;
30 unsigned int loops = 0; 30 unsigned int loops = 0;
31 31
32 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { 32 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
33 entry = ring_buffer_event_data(event); 33 entry = ring_buffer_event_data(event);
34 34
35 /* 35 /*
@@ -255,7 +255,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
255/* Maximum number of functions to trace before diagnosing a hang */ 255/* Maximum number of functions to trace before diagnosing a hang */
256#define GRAPH_MAX_FUNC_TEST 100000000 256#define GRAPH_MAX_FUNC_TEST 100000000
257 257
258static void __ftrace_dump(bool disable_tracing); 258static void
259__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
259static unsigned int graph_hang_thresh; 260static unsigned int graph_hang_thresh;
260 261
261/* Wrap the real function entry probe to avoid possible hanging */ 262/* Wrap the real function entry probe to avoid possible hanging */
@@ -266,7 +267,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
266 ftrace_graph_stop(); 267 ftrace_graph_stop();
267 printk(KERN_WARNING "BUG: Function graph tracer hang!\n"); 268 printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
268 if (ftrace_dump_on_oops) 269 if (ftrace_dump_on_oops)
269 __ftrace_dump(false); 270 __ftrace_dump(false, DUMP_ALL);
270 return 0; 271 return 0;
271 } 272 }
272 273
@@ -754,62 +755,6 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
754} 755}
755#endif /* CONFIG_BRANCH_TRACER */ 756#endif /* CONFIG_BRANCH_TRACER */
756 757
757#ifdef CONFIG_HW_BRANCH_TRACER
758int
759trace_selftest_startup_hw_branches(struct tracer *trace,
760 struct trace_array *tr)
761{
762 struct trace_iterator *iter;
763 struct tracer tracer;
764 unsigned long count;
765 int ret;
766
767 if (!trace->open) {
768 printk(KERN_CONT "missing open function...");
769 return -1;
770 }
771
772 ret = tracer_init(trace, tr);
773 if (ret) {
774 warn_failed_init_tracer(trace, ret);
775 return ret;
776 }
777
778 /*
779 * The hw-branch tracer needs to collect the trace from the various
780 * cpu trace buffers - before tracing is stopped.
781 */
782 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
783 if (!iter)
784 return -ENOMEM;
785
786 memcpy(&tracer, trace, sizeof(tracer));
787
788 iter->trace = &tracer;
789 iter->tr = tr;
790 iter->pos = -1;
791 mutex_init(&iter->mutex);
792
793 trace->open(iter);
794
795 mutex_destroy(&iter->mutex);
796 kfree(iter);
797
798 tracing_stop();
799
800 ret = trace_test_buffer(tr, &count);
801 trace->reset(tr);
802 tracing_start();
803
804 if (!ret && !count) {
805 printk(KERN_CONT "no entries found..");
806 ret = -1;
807 }
808
809 return ret;
810}
811#endif /* CONFIG_HW_BRANCH_TRACER */
812
813#ifdef CONFIG_KSYM_TRACER 758#ifdef CONFIG_KSYM_TRACER
814static int ksym_selftest_dummy; 759static int ksym_selftest_dummy;
815 760
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 678a5120ee30..f4bc9b27de5f 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
157 unsigned long val, flags; 157 unsigned long val, flags;
158 char buf[64]; 158 char buf[64];
159 int ret; 159 int ret;
160 int cpu;
160 161
161 if (count >= sizeof(buf)) 162 if (count >= sizeof(buf))
162 return -EINVAL; 163 return -EINVAL;
@@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
171 return ret; 172 return ret;
172 173
173 local_irq_save(flags); 174 local_irq_save(flags);
175
176 /*
177 * In case we trace inside arch_spin_lock() or after (NMI),
178 * we will cause circular lock, so we also need to increase
179 * the percpu trace_active here.
180 */
181 cpu = smp_processor_id();
182 per_cpu(trace_active, cpu)++;
183
174 arch_spin_lock(&max_stack_lock); 184 arch_spin_lock(&max_stack_lock);
175 *ptr = val; 185 *ptr = val;
176 arch_spin_unlock(&max_stack_lock); 186 arch_spin_unlock(&max_stack_lock);
187
188 per_cpu(trace_active, cpu)--;
177 local_irq_restore(flags); 189 local_irq_restore(flags);
178 190
179 return count; 191 return count;
@@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
206 218
207static void *t_start(struct seq_file *m, loff_t *pos) 219static void *t_start(struct seq_file *m, loff_t *pos)
208{ 220{
221 int cpu;
222
209 local_irq_disable(); 223 local_irq_disable();
224
225 cpu = smp_processor_id();
226 per_cpu(trace_active, cpu)++;
227
210 arch_spin_lock(&max_stack_lock); 228 arch_spin_lock(&max_stack_lock);
211 229
212 if (*pos == 0) 230 if (*pos == 0)
@@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
217 235
218static void t_stop(struct seq_file *m, void *p) 236static void t_stop(struct seq_file *m, void *p)
219{ 237{
238 int cpu;
239
220 arch_spin_unlock(&max_stack_lock); 240 arch_spin_unlock(&max_stack_lock);
241
242 cpu = smp_processor_id();
243 per_cpu(trace_active, cpu)--;
244
221 local_irq_enable(); 245 local_irq_enable();
222} 246}
223 247
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..34e35804304b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -14,6 +15,54 @@ static int sys_refcount_exit;
14static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); 15static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); 16static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16 17
18static int syscall_enter_register(struct ftrace_event_call *event,
19 enum trace_reg type);
20static int syscall_exit_register(struct ftrace_event_call *event,
21 enum trace_reg type);
22
23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25
26static struct list_head *
27syscall_get_enter_fields(struct ftrace_event_call *call)
28{
29 struct syscall_metadata *entry = call->data;
30
31 return &entry->enter_fields;
32}
33
34static struct list_head *
35syscall_get_exit_fields(struct ftrace_event_call *call)
36{
37 struct syscall_metadata *entry = call->data;
38
39 return &entry->exit_fields;
40}
41
42struct trace_event_functions enter_syscall_print_funcs = {
43 .trace = print_syscall_enter,
44};
45
46struct trace_event_functions exit_syscall_print_funcs = {
47 .trace = print_syscall_exit,
48};
49
50struct ftrace_event_class event_class_syscall_enter = {
51 .system = "syscalls",
52 .reg = syscall_enter_register,
53 .define_fields = syscall_enter_define_fields,
54 .get_fields = syscall_get_enter_fields,
55 .raw_init = init_syscall_trace,
56};
57
58struct ftrace_event_class event_class_syscall_exit = {
59 .system = "syscalls",
60 .reg = syscall_exit_register,
61 .define_fields = syscall_exit_define_fields,
62 .get_fields = syscall_get_exit_fields,
63 .raw_init = init_syscall_trace,
64};
65
17extern unsigned long __start_syscalls_metadata[]; 66extern unsigned long __start_syscalls_metadata[];
18extern unsigned long __stop_syscalls_metadata[]; 67extern unsigned long __stop_syscalls_metadata[];
19 68
@@ -52,7 +101,8 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr)
52} 101}
53 102
54enum print_line_t 103enum print_line_t
55print_syscall_enter(struct trace_iterator *iter, int flags) 104print_syscall_enter(struct trace_iterator *iter, int flags,
105 struct trace_event *event)
56{ 106{
57 struct trace_seq *s = &iter->seq; 107 struct trace_seq *s = &iter->seq;
58 struct trace_entry *ent = iter->ent; 108 struct trace_entry *ent = iter->ent;
@@ -67,7 +117,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
67 if (!entry) 117 if (!entry)
68 goto end; 118 goto end;
69 119
70 if (entry->enter_event->id != ent->type) { 120 if (entry->enter_event->event.type != ent->type) {
71 WARN_ON_ONCE(1); 121 WARN_ON_ONCE(1);
72 goto end; 122 goto end;
73 } 123 }
@@ -104,7 +154,8 @@ end:
104} 154}
105 155
106enum print_line_t 156enum print_line_t
107print_syscall_exit(struct trace_iterator *iter, int flags) 157print_syscall_exit(struct trace_iterator *iter, int flags,
158 struct trace_event *event)
108{ 159{
109 struct trace_seq *s = &iter->seq; 160 struct trace_seq *s = &iter->seq;
110 struct trace_entry *ent = iter->ent; 161 struct trace_entry *ent = iter->ent;
@@ -122,7 +173,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
122 return TRACE_TYPE_HANDLED; 173 return TRACE_TYPE_HANDLED;
123 } 174 }
124 175
125 if (entry->exit_event->id != ent->type) { 176 if (entry->exit_event->event.type != ent->type) {
126 WARN_ON_ONCE(1); 177 WARN_ON_ONCE(1);
127 return TRACE_TYPE_UNHANDLED; 178 return TRACE_TYPE_UNHANDLED;
128 } 179 }
@@ -143,73 +194,68 @@ extern char *__bad_type_size(void);
143 #type, #name, offsetof(typeof(trace), name), \ 194 #type, #name, offsetof(typeof(trace), name), \
144 sizeof(trace.name), is_signed_type(type) 195 sizeof(trace.name), is_signed_type(type)
145 196
146int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) 197static
198int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
147{ 199{
148 int i; 200 int i;
149 int ret; 201 int pos = 0;
150 struct syscall_metadata *entry = call->data;
151 struct syscall_trace_enter trace;
152 int offset = offsetof(struct syscall_trace_enter, args);
153 202
154 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 203 /* When len=0, we just calculate the needed length */
155 "\tsigned:%u;\n", 204#define LEN_OR_ZERO (len ? len - pos : 0)
156 SYSCALL_FIELD(int, nr));
157 if (!ret)
158 return 0;
159 205
206 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
160 for (i = 0; i < entry->nb_args; i++) { 207 for (i = 0; i < entry->nb_args; i++) {
161 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], 208 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
162 entry->args[i]); 209 entry->args[i], sizeof(unsigned long),
163 if (!ret) 210 i == entry->nb_args - 1 ? "" : ", ");
164 return 0;
165 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
166 "\tsigned:%u;\n", offset,
167 sizeof(unsigned long),
168 is_signed_type(unsigned long));
169 if (!ret)
170 return 0;
171 offset += sizeof(unsigned long);
172 } 211 }
212 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
173 213
174 trace_seq_puts(s, "\nprint fmt: \"");
175 for (i = 0; i < entry->nb_args; i++) { 214 for (i = 0; i < entry->nb_args; i++) {
176 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], 215 pos += snprintf(buf + pos, LEN_OR_ZERO,
177 sizeof(unsigned long), 216 ", ((unsigned long)(REC->%s))", entry->args[i]);
178 i == entry->nb_args - 1 ? "" : ", ");
179 if (!ret)
180 return 0;
181 } 217 }
182 trace_seq_putc(s, '"');
183 218
184 for (i = 0; i < entry->nb_args; i++) { 219#undef LEN_OR_ZERO
185 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
186 entry->args[i]);
187 if (!ret)
188 return 0;
189 }
190 220
191 return trace_seq_putc(s, '\n'); 221 /* return the length of print_fmt */
222 return pos;
192} 223}
193 224
194int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) 225static int set_syscall_print_fmt(struct ftrace_event_call *call)
195{ 226{
196 int ret; 227 char *print_fmt;
197 struct syscall_trace_exit trace; 228 int len;
229 struct syscall_metadata *entry = call->data;
198 230
199 ret = trace_seq_printf(s, 231 if (entry->enter_event != call) {
200 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" 232 call->print_fmt = "\"0x%lx\", REC->ret";
201 "\tsigned:%u;\n"
202 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
203 "\tsigned:%u;\n",
204 SYSCALL_FIELD(int, nr),
205 SYSCALL_FIELD(long, ret));
206 if (!ret)
207 return 0; 233 return 0;
234 }
235
236 /* First: called with 0 length to calculate the needed length */
237 len = __set_enter_print_fmt(entry, NULL, 0);
238
239 print_fmt = kmalloc(len + 1, GFP_KERNEL);
240 if (!print_fmt)
241 return -ENOMEM;
208 242
209 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); 243 /* Second: actually write the @print_fmt */
244 __set_enter_print_fmt(entry, print_fmt, len + 1);
245 call->print_fmt = print_fmt;
246
247 return 0;
248}
249
250static void free_syscall_print_fmt(struct ftrace_event_call *call)
251{
252 struct syscall_metadata *entry = call->data;
253
254 if (entry->enter_event == call)
255 kfree(call->print_fmt);
210} 256}
211 257
212int syscall_enter_define_fields(struct ftrace_event_call *call) 258static int syscall_enter_define_fields(struct ftrace_event_call *call)
213{ 259{
214 struct syscall_trace_enter trace; 260 struct syscall_trace_enter trace;
215 struct syscall_metadata *meta = call->data; 261 struct syscall_metadata *meta = call->data;
@@ -232,7 +278,7 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
232 return ret; 278 return ret;
233} 279}
234 280
235int syscall_exit_define_fields(struct ftrace_event_call *call) 281static int syscall_exit_define_fields(struct ftrace_event_call *call)
236{ 282{
237 struct syscall_trace_exit trace; 283 struct syscall_trace_exit trace;
238 int ret; 284 int ret;
@@ -247,7 +293,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
247 return ret; 293 return ret;
248} 294}
249 295
250void ftrace_syscall_enter(struct pt_regs *regs, long id) 296void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
251{ 297{
252 struct syscall_trace_enter *entry; 298 struct syscall_trace_enter *entry;
253 struct syscall_metadata *sys_data; 299 struct syscall_metadata *sys_data;
@@ -269,7 +315,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
269 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; 315 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
270 316
271 event = trace_current_buffer_lock_reserve(&buffer, 317 event = trace_current_buffer_lock_reserve(&buffer,
272 sys_data->enter_event->id, size, 0, 0); 318 sys_data->enter_event->event.type, size, 0, 0);
273 if (!event) 319 if (!event)
274 return; 320 return;
275 321
@@ -282,7 +328,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
282 trace_current_buffer_unlock_commit(buffer, event, 0, 0); 328 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
283} 329}
284 330
285void ftrace_syscall_exit(struct pt_regs *regs, long ret) 331void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
286{ 332{
287 struct syscall_trace_exit *entry; 333 struct syscall_trace_exit *entry;
288 struct syscall_metadata *sys_data; 334 struct syscall_metadata *sys_data;
@@ -301,7 +347,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
301 return; 347 return;
302 348
303 event = trace_current_buffer_lock_reserve(&buffer, 349 event = trace_current_buffer_lock_reserve(&buffer,
304 sys_data->exit_event->id, sizeof(*entry), 0, 0); 350 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
305 if (!event) 351 if (!event)
306 return; 352 return;
307 353
@@ -324,7 +370,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
324 return -ENOSYS; 370 return -ENOSYS;
325 mutex_lock(&syscall_trace_lock); 371 mutex_lock(&syscall_trace_lock);
326 if (!sys_refcount_enter) 372 if (!sys_refcount_enter)
327 ret = register_trace_sys_enter(ftrace_syscall_enter); 373 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
328 if (!ret) { 374 if (!ret) {
329 set_bit(num, enabled_enter_syscalls); 375 set_bit(num, enabled_enter_syscalls);
330 sys_refcount_enter++; 376 sys_refcount_enter++;
@@ -344,7 +390,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
344 sys_refcount_enter--; 390 sys_refcount_enter--;
345 clear_bit(num, enabled_enter_syscalls); 391 clear_bit(num, enabled_enter_syscalls);
346 if (!sys_refcount_enter) 392 if (!sys_refcount_enter)
347 unregister_trace_sys_enter(ftrace_syscall_enter); 393 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
348 mutex_unlock(&syscall_trace_lock); 394 mutex_unlock(&syscall_trace_lock);
349} 395}
350 396
@@ -358,7 +404,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
358 return -ENOSYS; 404 return -ENOSYS;
359 mutex_lock(&syscall_trace_lock); 405 mutex_lock(&syscall_trace_lock);
360 if (!sys_refcount_exit) 406 if (!sys_refcount_exit)
361 ret = register_trace_sys_exit(ftrace_syscall_exit); 407 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
362 if (!ret) { 408 if (!ret) {
363 set_bit(num, enabled_exit_syscalls); 409 set_bit(num, enabled_exit_syscalls);
364 sys_refcount_exit++; 410 sys_refcount_exit++;
@@ -378,7 +424,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
378 sys_refcount_exit--; 424 sys_refcount_exit--;
379 clear_bit(num, enabled_exit_syscalls); 425 clear_bit(num, enabled_exit_syscalls);
380 if (!sys_refcount_exit) 426 if (!sys_refcount_exit)
381 unregister_trace_sys_exit(ftrace_syscall_exit); 427 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
382 mutex_unlock(&syscall_trace_lock); 428 mutex_unlock(&syscall_trace_lock);
383} 429}
384 430
@@ -386,12 +432,22 @@ int init_syscall_trace(struct ftrace_event_call *call)
386{ 432{
387 int id; 433 int id;
388 434
389 id = register_ftrace_event(call->event); 435 if (set_syscall_print_fmt(call) < 0)
390 if (!id) 436 return -ENOMEM;
391 return -ENODEV; 437
392 call->id = id; 438 id = trace_event_raw_init(call);
393 INIT_LIST_HEAD(&call->fields); 439
394 return 0; 440 if (id < 0) {
441 free_syscall_print_fmt(call);
442 return id;
443 }
444
445 return id;
446}
447
448unsigned long __init arch_syscall_addr(int nr)
449{
450 return (unsigned long)sys_call_table[nr];
395} 451}
396 452
397int __init init_ftrace_syscalls(void) 453int __init init_ftrace_syscalls(void)
@@ -421,27 +477,24 @@ int __init init_ftrace_syscalls(void)
421} 477}
422core_initcall(init_ftrace_syscalls); 478core_initcall(init_ftrace_syscalls);
423 479
424#ifdef CONFIG_EVENT_PROFILE 480#ifdef CONFIG_PERF_EVENTS
425 481
426static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 482static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
427static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 483static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
428static int sys_prof_refcount_enter; 484static int sys_perf_refcount_enter;
429static int sys_prof_refcount_exit; 485static int sys_perf_refcount_exit;
430 486
431static void prof_syscall_enter(struct pt_regs *regs, long id) 487static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
432{ 488{
433 struct syscall_metadata *sys_data; 489 struct syscall_metadata *sys_data;
434 struct syscall_trace_enter *rec; 490 struct syscall_trace_enter *rec;
435 unsigned long flags; 491 struct hlist_head *head;
436 char *trace_buf;
437 char *raw_data;
438 int syscall_nr; 492 int syscall_nr;
439 int rctx; 493 int rctx;
440 int size; 494 int size;
441 int cpu;
442 495
443 syscall_nr = syscall_get_nr(current, regs); 496 syscall_nr = syscall_get_nr(current, regs);
444 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 497 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
445 return; 498 return;
446 499
447 sys_data = syscall_nr_to_meta(syscall_nr); 500 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -453,44 +506,24 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
453 size = ALIGN(size + sizeof(u32), sizeof(u64)); 506 size = ALIGN(size + sizeof(u32), sizeof(u64));
454 size -= sizeof(u32); 507 size -= sizeof(u32);
455 508
456 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 509 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
457 "profile buffer not large enough")) 510 "perf buffer not large enough"))
458 return; 511 return;
459 512
460 /* Protect the per cpu buffer, begin the rcu read side */ 513 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
461 local_irq_save(flags); 514 sys_data->enter_event->event.type, regs, &rctx);
462 515 if (!rec)
463 rctx = perf_swevent_get_recursion_context(); 516 return;
464 if (rctx < 0)
465 goto end_recursion;
466
467 cpu = smp_processor_id();
468
469 trace_buf = rcu_dereference(perf_trace_buf);
470
471 if (!trace_buf)
472 goto end;
473
474 raw_data = per_cpu_ptr(trace_buf, cpu);
475
476 /* zero the dead bytes from align to not leak stack to user */
477 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
478 517
479 rec = (struct syscall_trace_enter *) raw_data;
480 tracing_generic_entry_update(&rec->ent, 0, 0);
481 rec->ent.type = sys_data->enter_event->id;
482 rec->nr = syscall_nr; 518 rec->nr = syscall_nr;
483 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
484 (unsigned long *)&rec->args); 520 (unsigned long *)&rec->args);
485 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
486 521
487end: 522 head = this_cpu_ptr(sys_data->enter_event->perf_events);
488 perf_swevent_put_recursion_context(rctx); 523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
489end_recursion:
490 local_irq_restore(flags);
491} 524}
492 525
493int prof_sysenter_enable(struct ftrace_event_call *call) 526int perf_sysenter_enable(struct ftrace_event_call *call)
494{ 527{
495 int ret = 0; 528 int ret = 0;
496 int num; 529 int num;
@@ -498,47 +531,44 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 531 num = ((struct syscall_metadata *)call->data)->syscall_nr;
499 532
500 mutex_lock(&syscall_trace_lock); 533 mutex_lock(&syscall_trace_lock);
501 if (!sys_prof_refcount_enter) 534 if (!sys_perf_refcount_enter)
502 ret = register_trace_sys_enter(prof_syscall_enter); 535 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
503 if (ret) { 536 if (ret) {
504 pr_info("event trace: Could not activate" 537 pr_info("event trace: Could not activate"
505 "syscall entry trace point"); 538 "syscall entry trace point");
506 } else { 539 } else {
507 set_bit(num, enabled_prof_enter_syscalls); 540 set_bit(num, enabled_perf_enter_syscalls);
508 sys_prof_refcount_enter++; 541 sys_perf_refcount_enter++;
509 } 542 }
510 mutex_unlock(&syscall_trace_lock); 543 mutex_unlock(&syscall_trace_lock);
511 return ret; 544 return ret;
512} 545}
513 546
514void prof_sysenter_disable(struct ftrace_event_call *call) 547void perf_sysenter_disable(struct ftrace_event_call *call)
515{ 548{
516 int num; 549 int num;
517 550
518 num = ((struct syscall_metadata *)call->data)->syscall_nr; 551 num = ((struct syscall_metadata *)call->data)->syscall_nr;
519 552
520 mutex_lock(&syscall_trace_lock); 553 mutex_lock(&syscall_trace_lock);
521 sys_prof_refcount_enter--; 554 sys_perf_refcount_enter--;
522 clear_bit(num, enabled_prof_enter_syscalls); 555 clear_bit(num, enabled_perf_enter_syscalls);
523 if (!sys_prof_refcount_enter) 556 if (!sys_perf_refcount_enter)
524 unregister_trace_sys_enter(prof_syscall_enter); 557 unregister_trace_sys_enter(perf_syscall_enter, NULL);
525 mutex_unlock(&syscall_trace_lock); 558 mutex_unlock(&syscall_trace_lock);
526} 559}
527 560
528static void prof_syscall_exit(struct pt_regs *regs, long ret) 561static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
529{ 562{
530 struct syscall_metadata *sys_data; 563 struct syscall_metadata *sys_data;
531 struct syscall_trace_exit *rec; 564 struct syscall_trace_exit *rec;
532 unsigned long flags; 565 struct hlist_head *head;
533 int syscall_nr; 566 int syscall_nr;
534 char *trace_buf;
535 char *raw_data;
536 int rctx; 567 int rctx;
537 int size; 568 int size;
538 int cpu;
539 569
540 syscall_nr = syscall_get_nr(current, regs); 570 syscall_nr = syscall_get_nr(current, regs);
541 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 571 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
542 return; 572 return;
543 573
544 sys_data = syscall_nr_to_meta(syscall_nr); 574 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -553,45 +583,23 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
553 * Impossible, but be paranoid with the future 583 * Impossible, but be paranoid with the future
554 * How to put this check outside runtime? 584 * How to put this check outside runtime?
555 */ 585 */
556 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 586 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
557 "exit event has grown above profile buffer size")) 587 "exit event has grown above perf buffer size"))
558 return; 588 return;
559 589
560 /* Protect the per cpu buffer, begin the rcu read side */ 590 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
561 local_irq_save(flags); 591 sys_data->exit_event->event.type, regs, &rctx);
562 592 if (!rec)
563 rctx = perf_swevent_get_recursion_context(); 593 return;
564 if (rctx < 0)
565 goto end_recursion;
566
567 cpu = smp_processor_id();
568
569 trace_buf = rcu_dereference(perf_trace_buf);
570
571 if (!trace_buf)
572 goto end;
573
574 raw_data = per_cpu_ptr(trace_buf, cpu);
575
576 /* zero the dead bytes from align to not leak stack to user */
577 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
578
579 rec = (struct syscall_trace_exit *)raw_data;
580 594
581 tracing_generic_entry_update(&rec->ent, 0, 0);
582 rec->ent.type = sys_data->exit_event->id;
583 rec->nr = syscall_nr; 595 rec->nr = syscall_nr;
584 rec->ret = syscall_get_return_value(current, regs); 596 rec->ret = syscall_get_return_value(current, regs);
585 597
586 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); 598 head = this_cpu_ptr(sys_data->exit_event->perf_events);
587 599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
588end:
589 perf_swevent_put_recursion_context(rctx);
590end_recursion:
591 local_irq_restore(flags);
592} 600}
593 601
594int prof_sysexit_enable(struct ftrace_event_call *call) 602int perf_sysexit_enable(struct ftrace_event_call *call)
595{ 603{
596 int ret = 0; 604 int ret = 0;
597 int num; 605 int num;
@@ -599,33 +607,73 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
599 num = ((struct syscall_metadata *)call->data)->syscall_nr; 607 num = ((struct syscall_metadata *)call->data)->syscall_nr;
600 608
601 mutex_lock(&syscall_trace_lock); 609 mutex_lock(&syscall_trace_lock);
602 if (!sys_prof_refcount_exit) 610 if (!sys_perf_refcount_exit)
603 ret = register_trace_sys_exit(prof_syscall_exit); 611 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
604 if (ret) { 612 if (ret) {
605 pr_info("event trace: Could not activate" 613 pr_info("event trace: Could not activate"
606 "syscall entry trace point"); 614 "syscall exit trace point");
607 } else { 615 } else {
608 set_bit(num, enabled_prof_exit_syscalls); 616 set_bit(num, enabled_perf_exit_syscalls);
609 sys_prof_refcount_exit++; 617 sys_perf_refcount_exit++;
610 } 618 }
611 mutex_unlock(&syscall_trace_lock); 619 mutex_unlock(&syscall_trace_lock);
612 return ret; 620 return ret;
613} 621}
614 622
615void prof_sysexit_disable(struct ftrace_event_call *call) 623void perf_sysexit_disable(struct ftrace_event_call *call)
616{ 624{
617 int num; 625 int num;
618 626
619 num = ((struct syscall_metadata *)call->data)->syscall_nr; 627 num = ((struct syscall_metadata *)call->data)->syscall_nr;
620 628
621 mutex_lock(&syscall_trace_lock); 629 mutex_lock(&syscall_trace_lock);
622 sys_prof_refcount_exit--; 630 sys_perf_refcount_exit--;
623 clear_bit(num, enabled_prof_exit_syscalls); 631 clear_bit(num, enabled_perf_exit_syscalls);
624 if (!sys_prof_refcount_exit) 632 if (!sys_perf_refcount_exit)
625 unregister_trace_sys_exit(prof_syscall_exit); 633 unregister_trace_sys_exit(perf_syscall_exit, NULL);
626 mutex_unlock(&syscall_trace_lock); 634 mutex_unlock(&syscall_trace_lock);
627} 635}
628 636
637#endif /* CONFIG_PERF_EVENTS */
638
639static int syscall_enter_register(struct ftrace_event_call *event,
640 enum trace_reg type)
641{
642 switch (type) {
643 case TRACE_REG_REGISTER:
644 return reg_event_syscall_enter(event);
645 case TRACE_REG_UNREGISTER:
646 unreg_event_syscall_enter(event);
647 return 0;
648
649#ifdef CONFIG_PERF_EVENTS
650 case TRACE_REG_PERF_REGISTER:
651 return perf_sysenter_enable(event);
652 case TRACE_REG_PERF_UNREGISTER:
653 perf_sysenter_disable(event);
654 return 0;
629#endif 655#endif
656 }
657 return 0;
658}
630 659
660static int syscall_exit_register(struct ftrace_event_call *event,
661 enum trace_reg type)
662{
663 switch (type) {
664 case TRACE_REG_REGISTER:
665 return reg_event_syscall_exit(event);
666 case TRACE_REG_UNREGISTER:
667 unreg_event_syscall_exit(event);
668 return 0;
631 669
670#ifdef CONFIG_PERF_EVENTS
671 case TRACE_REG_PERF_REGISTER:
672 return perf_sysexit_enable(event);
673 case TRACE_REG_PERF_UNREGISTER:
674 perf_sysexit_disable(event);
675 return 0;
676#endif
677 }
678 return 0;
679}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..a7cc3793baf6 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"
@@ -48,7 +49,8 @@ static void cpu_workqueue_stat_free(struct kref *kref)
48 49
49/* Insertion of a work */ 50/* Insertion of a work */
50static void 51static void
51probe_workqueue_insertion(struct task_struct *wq_thread, 52probe_workqueue_insertion(void *ignore,
53 struct task_struct *wq_thread,
52 struct work_struct *work) 54 struct work_struct *work)
53{ 55{
54 int cpu = cpumask_first(&wq_thread->cpus_allowed); 56 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -69,7 +71,8 @@ found:
69 71
70/* Execution of a work */ 72/* Execution of a work */
71static void 73static void
72probe_workqueue_execution(struct task_struct *wq_thread, 74probe_workqueue_execution(void *ignore,
75 struct task_struct *wq_thread,
73 struct work_struct *work) 76 struct work_struct *work)
74{ 77{
75 int cpu = cpumask_first(&wq_thread->cpus_allowed); 78 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -89,7 +92,8 @@ found:
89} 92}
90 93
91/* Creation of a cpu workqueue thread */ 94/* Creation of a cpu workqueue thread */
92static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) 95static void probe_workqueue_creation(void *ignore,
96 struct task_struct *wq_thread, int cpu)
93{ 97{
94 struct cpu_workqueue_stats *cws; 98 struct cpu_workqueue_stats *cws;
95 unsigned long flags; 99 unsigned long flags;
@@ -113,7 +117,8 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
113} 117}
114 118
115/* Destruction of a cpu workqueue thread */ 119/* Destruction of a cpu workqueue thread */
116static void probe_workqueue_destruction(struct task_struct *wq_thread) 120static void
121probe_workqueue_destruction(void *ignore, struct task_struct *wq_thread)
117{ 122{
118 /* Workqueue only execute on one cpu */ 123 /* Workqueue only execute on one cpu */
119 int cpu = cpumask_first(&wq_thread->cpus_allowed); 124 int cpu = cpumask_first(&wq_thread->cpus_allowed);
@@ -258,19 +263,19 @@ int __init trace_workqueue_early_init(void)
258{ 263{
259 int ret, cpu; 264 int ret, cpu;
260 265
261 ret = register_trace_workqueue_insertion(probe_workqueue_insertion); 266 ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
262 if (ret) 267 if (ret)
263 goto out; 268 goto out;
264 269
265 ret = register_trace_workqueue_execution(probe_workqueue_execution); 270 ret = register_trace_workqueue_execution(probe_workqueue_execution, NULL);
266 if (ret) 271 if (ret)
267 goto no_insertion; 272 goto no_insertion;
268 273
269 ret = register_trace_workqueue_creation(probe_workqueue_creation); 274 ret = register_trace_workqueue_creation(probe_workqueue_creation, NULL);
270 if (ret) 275 if (ret)
271 goto no_execution; 276 goto no_execution;
272 277
273 ret = register_trace_workqueue_destruction(probe_workqueue_destruction); 278 ret = register_trace_workqueue_destruction(probe_workqueue_destruction, NULL);
274 if (ret) 279 if (ret)
275 goto no_creation; 280 goto no_creation;
276 281
@@ -282,11 +287,11 @@ int __init trace_workqueue_early_init(void)
282 return 0; 287 return 0;
283 288
284no_creation: 289no_creation:
285 unregister_trace_workqueue_creation(probe_workqueue_creation); 290 unregister_trace_workqueue_creation(probe_workqueue_creation, NULL);
286no_execution: 291no_execution:
287 unregister_trace_workqueue_execution(probe_workqueue_execution); 292 unregister_trace_workqueue_execution(probe_workqueue_execution, NULL);
288no_insertion: 293no_insertion:
289 unregister_trace_workqueue_insertion(probe_workqueue_insertion); 294 unregister_trace_workqueue_insertion(probe_workqueue_insertion, NULL);
290out: 295out:
291 pr_warning("trace_workqueue: unable to trace workqueues\n"); 296 pr_warning("trace_workqueue: unable to trace workqueues\n");
292 297