aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/Makefile3
-rw-r--r--Documentation/DocBook/tracepoint.tmpl89
-rw-r--r--Documentation/trace/events.txt135
-rw-r--r--Documentation/trace/power.txt17
-rw-r--r--arch/x86/include/asm/tlbflush.h8
-rw-r--r--arch/x86/kernel/entry_64.S19
-rw-r--r--arch/x86/mm/kmmio.c104
-rw-r--r--arch/x86/mm/mmio-mod.c2
-rw-r--r--block/blk-core.c5
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/compat_ioctl.c2
-rw-r--r--drivers/md/dm.c3
-rw-r--r--drivers/scsi/sg.c1
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/blktrace_api.h32
-rw-r--r--include/linux/ftrace.h17
-rw-r--r--include/linux/ftrace_event.h156
-rw-r--r--include/linux/init_task.h1
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/mmiotrace.h2
-rw-r--r--include/linux/module.h8
-rw-r--r--include/linux/ring_buffer.h52
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/slab_def.h2
-rw-r--r--include/linux/slub_def.h2
-rw-r--r--include/linux/trace_seq.h90
-rw-r--r--include/linux/tracepoint.h8
-rw-r--r--include/trace/block.h4
-rw-r--r--include/trace/define_trace.h75
-rw-r--r--include/trace/events/irq.h134
-rw-r--r--include/trace/events/kmem.h194
-rw-r--r--include/trace/events/lockdep.h96
-rw-r--r--include/trace/events/sched.h (renamed from include/trace/sched_event_types.h)20
-rw-r--r--include/trace/events/skb.h40
-rw-r--r--include/trace/ftrace.h509
-rw-r--r--include/trace/irq.h9
-rw-r--r--include/trace/irq_event_types.h55
-rw-r--r--include/trace/kmemtrace.h63
-rw-r--r--include/trace/lockdep.h9
-rw-r--r--include/trace/lockdep_event_types.h44
-rw-r--r--include/trace/sched.h9
-rw-r--r--include/trace/skb.h11
-rw-r--r--include/trace/trace_event_types.h5
-rw-r--r--include/trace/trace_events.h5
-rw-r--r--init/main.c2
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/lockdep.c16
-rw-r--r--kernel/module.c26
-rw-r--r--kernel/sched.c12
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/trace/Kconfig89
-rw-r--r--kernel/trace/Makefile8
-rw-r--r--kernel/trace/blktrace.c193
-rw-r--r--kernel/trace/events.c14
-rw-r--r--kernel/trace/ftrace.c726
-rw-r--r--kernel/trace/kmemtrace.c10
-rw-r--r--kernel/trace/ring_buffer.c657
-rw-r--r--kernel/trace/ring_buffer_benchmark.c413
-rw-r--r--kernel/trace/trace.c370
-rw-r--r--kernel/trace/trace.h241
-rw-r--r--kernel/trace/trace_boot.c5
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_event_profile.c24
-rw-r--r--kernel/trace/trace_event_types.h12
-rw-r--r--kernel/trace/trace_events.c830
-rw-r--r--kernel/trace/trace_events_filter.c1200
-rw-r--r--kernel/trace/trace_events_stage_1.h39
-rw-r--r--kernel/trace/trace_events_stage_2.h176
-rw-r--r--kernel/trace/trace_events_stage_3.h281
-rw-r--r--kernel/trace/trace_export.c110
-rw-r--r--kernel/trace/trace_functions_graph.c25
-rw-r--r--kernel/trace/trace_hw_branches.c4
-rw-r--r--kernel/trace/trace_mmiotrace.c6
-rw-r--r--kernel/trace/trace_output.c82
-rw-r--r--kernel/trace/trace_output.h28
-rw-r--r--kernel/trace/trace_power.c8
-rw-r--r--kernel/trace/trace_printk.c6
-rw-r--r--kernel/trace/trace_sched_switch.c12
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_stack.c13
-rw-r--r--kernel/trace/trace_stat.c2
-rw-r--r--kernel/trace/trace_stat.h2
-rw-r--r--kernel/trace/trace_sysprof.c6
-rw-r--r--kernel/trace/trace_workqueue.c2
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/util.c11
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/skbuff.c2
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/trace_events/Makefile6
-rw-r--r--samples/trace_events/trace-events-sample.c52
-rw-r--r--samples/trace_events/trace-events-sample.h129
-rwxr-xr-xscripts/kernel-doc22
-rwxr-xr-xscripts/recordmcount.pl6
102 files changed, 6035 insertions, 1989 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index b1eb661e6302..9632444f6c62 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \ 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml debugobjects.xml sh.xml regulator.xml \ 15 mac80211.xml debugobjects.xml sh.xml regulator.xml \
16 alsa-driver-api.xml writing-an-alsa-driver.xml 16 alsa-driver-api.xml writing-an-alsa-driver.xml \
17 tracepoint.xml
17 18
18### 19###
19# The build process is as follows (targets): 20# The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 000000000000..b0756d0fd579
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,89 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="Tracepoints">
6 <bookinfo>
7 <title>The Linux Kernel Tracepoint API</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Jason</firstname>
12 <surname>Baron</surname>
13 <affiliation>
14 <address>
15 <email>jbaron@redhat.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <legalnotice>
22 <para>
23 This documentation is free software; you can redistribute
24 it and/or modify it under the terms of the GNU General Public
25 License as published by the Free Software Foundation; either
26 version 2 of the License, or (at your option) any later
27 version.
28 </para>
29
30 <para>
31 This program is distributed in the hope that it will be
32 useful, but WITHOUT ANY WARRANTY; without even the implied
33 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
34 See the GNU General Public License for more details.
35 </para>
36
37 <para>
38 You should have received a copy of the GNU General Public
39 License along with this program; if not, write to the Free
40 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
41 MA 02111-1307 USA
42 </para>
43
44 <para>
45 For more details see the file COPYING in the source
46 distribution of Linux.
47 </para>
48 </legalnotice>
49 </bookinfo>
50
51 <toc></toc>
52 <chapter id="intro">
53 <title>Introduction</title>
54 <para>
55 Tracepoints are static probe points that are located in strategic points
56 throughout the kernel. 'Probes' register/unregister with tracepoints
57 via a callback mechanism. The 'probes' are strictly typed functions that
58 are passed a unique set of parameters defined by each tracepoint.
59 </para>
60
61 <para>
62 From this simple callback mechanism, 'probes' can be used to profile, debug,
63 and understand kernel behavior. There are a number of tools that provide a
64 framework for using 'probes'. These tools include Systemtap, ftrace, and
65 LTTng.
66 </para>
67
68 <para>
69 Tracepoints are defined in a number of header files via various macros. Thus,
70 the purpose of this document is to provide a clear accounting of the available
71 tracepoints. The intention is to understand not only what tracepoints are
72 available but also to understand where future tracepoints might be added.
73 </para>
74
75 <para>
76 The API presented has functions of the form:
77 <function>trace_tracepointname(function parameters)</function>. These are the
78 tracepoints callbacks that are found throughout the code. Registering and
79 unregistering probes with these callback sites is covered in the
80 <filename>Documentation/trace/*</filename> directory.
81 </para>
82 </chapter>
83
84 <chapter id="irq">
85 <title>IRQ</title>
86!Iinclude/trace/events/irq.h
87 </chapter>
88
89</book>
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 000000000000..abdee664c0f6
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,135 @@
1 Event Tracing
2
3 Documentation written by Theodore Ts'o
4
5Introduction
6============
7
8Tracepoints (see Documentation/trace/tracepoints.txt) can be used
9without creating custom kernel modules to register probe functions
10using the event tracing infrastructure.
11
12Not all tracepoints can be traced using the event tracing system;
13the kernel developer must provide code snippets which define how the
14tracing information is saved into the tracing buffer, and how the
15the tracing information should be printed.
16
17Using Event Tracing
18===================
19
20The events which are available for tracing can be found in the file
21/sys/kernel/debug/tracing/available_events.
22
23To enable a particular event, such as 'sched_wakeup', simply echo it
24to /sys/debug/tracing/set_event. For example:
25
26 # echo sched_wakeup > /sys/kernel/debug/tracing/set_event
27
28[ Note: events can also be enabled/disabled via the 'enabled' toggle
29 found in the /sys/kernel/tracing/events/ hierarchy of directories. ]
30
31To disable an event, echo the event name to the set_event file prefixed
32with an exclamation point:
33
34 # echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event
35
36To disable events, echo an empty line to the set_event file:
37
38 # echo > /sys/kernel/debug/tracing/set_event
39
40The events are organized into subsystems, such as ext4, irq, sched,
41etc., and a full event name looks like this: <subsystem>:<event>. The
42subsystem name is optional, but it is displayed in the available_events
43file. All of the events in a subsystem can be specified via the syntax
44"<subsystem>:*"; for example, to enable all irq events, you can use the
45command:
46
47 # echo 'irq:*' > /sys/kernel/debug/tracing/set_event
48
49Defining an event-enabled tracepoint
50------------------------------------
51
52A kernel developer which wishes to define an event-enabled tracepoint
53must declare the tracepoint using TRACE_EVENT instead of DECLARE_TRACE.
54This is done via two header files in include/trace. For example, to
55event-enable the jbd2 subsystem, we must create two files,
56include/trace/jbd2.h and include/trace/jbd2_event_types.h. The
57include/trace/jbd2.h file should be included by kernel source files that
58will have a tracepoint inserted, and might look like this:
59
60#ifndef _TRACE_JBD2_H
61#define _TRACE_JBD2_H
62
63#include <linux/jbd2.h>
64#include <linux/tracepoint.h>
65
66#include <trace/jbd2_event_types.h>
67
68#endif
69
70In a file that utilizes a jbd2 tracepoint, this header file would be
71included. Note that you still have to use DEFINE_TRACE(). So for
72example, if fs/jbd2/commit.c planned to use the jbd2_start_commit
73tracepoint, it would have the following near the beginning of the file:
74
75#include <trace/jbd2.h>
76
77DEFINE_TRACE(jbd2_start_commit);
78
79Then in the function that would call the tracepoint, it would call the
80tracepoint function. (For more information, please see the tracepoint
81documentation in Documentation/trace/tracepoints.txt):
82
83 trace_jbd2_start_commit(journal, commit_transaction);
84
85The code snippets which allow jbd2_start_commit to be an event-enabled
86tracepoint are placed in the file include/trace/jbd2_event_types.h:
87
88/* use <trace/jbd2.h> instead */
89#ifndef TRACE_EVENT
90# error Do not include this file directly.
91# error Unless you know what you are doing.
92#endif
93
94#undef TRACE_SYSTEM
95#define TRACE_SYSTEM jbd2
96
97#include <linux/jbd2.h>
98
99TRACE_EVENT(jbd2_start_commit,
100 TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
101 TP_ARGS(journal, commit_transaction),
102 TP_STRUCT__entry(
103 __array( char, devname, BDEVNAME_SIZE+24 )
104 __field( int, transaction )
105 ),
106 TP_fast_assign(
107 memcpy(__entry->devname, journal->j_devname, BDEVNAME_SIZE+24);
108 __entry->transaction = commit_transaction->t_tid;
109 ),
110 TP_printk("dev %s transaction %d",
111 __entry->devname, __entry->transaction)
112);
113
114The TP_PROTO and TP_ARGS are unchanged from DECLARE_TRACE. The new
115arguments to TRACE_EVENT are TP_STRUCT__entry, TP_fast_assign, and
116TP_printk.
117
118TP_STRUCT__entry defines the data structure which will be stored in the
119trace buffer. Normally, fields in __entry will be arrays or simple
120types. It is possible to place data structures in __entry --- however,
121pointers in the data structure can not be trusted, since they will be
122accessed sometime later by TP_printk, and if the data structure contains
123fields that will not or cannot be used by TP_printk, this will waste
124space in the trace buffer. In general, data structures should be
125avoided, unless they do only contain non-pointer types and all of the
126fields will be used by TP_printk.
127
128TP_fast_assign defines the code snippet which saves information into the
129__entry data structure, using the passed-in arguments defined in
130TP_PROTO and TP_ARGS.
131
132Finally, TP_printk will print the __entry data structure. At the time
133when the code snippet defined by TP_printk is executed, it will not have
134access to the TP_ARGS arguments; it can only use the information saved
135in the __entry data structure.
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644
index 000000000000..cd805e16dc27
--- /dev/null
+++ b/Documentation/trace/power.txt
@@ -0,0 +1,17 @@
1The power tracer collects detailed information about C-state and P-state
2transitions, instead of just looking at the high-level "average"
3information.
4
5There is a helper script found in scrips/tracing/power.pl in the kernel
6sources which can be used to parse this information and create a
7Scalable Vector Graphics (SVG) picture from the trace data.
8
9To use this tracer:
10
11 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
12 echo power > /sys/kernel/debug/tracing/current_tracer
13 echo 1 > /sys/kernel/debug/tracing/tracing_enabled
14 sleep 1
15 echo 0 > /sys/kernel/debug/tracing/tracing_enabled
16 cat /sys/kernel/debug/tracing/trace | \
17 perl scripts/tracing/power.pl > out.sv
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 16a5c84b0329..a5ecc9c33e92 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -17,7 +17,7 @@
17 17
18static inline void __native_flush_tlb(void) 18static inline void __native_flush_tlb(void)
19{ 19{
20 write_cr3(read_cr3()); 20 native_write_cr3(native_read_cr3());
21} 21}
22 22
23static inline void __native_flush_tlb_global(void) 23static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
32 */ 32 */
33 raw_local_irq_save(flags); 33 raw_local_irq_save(flags);
34 34
35 cr4 = read_cr4(); 35 cr4 = native_read_cr4();
36 /* clear PGE */ 36 /* clear PGE */
37 write_cr4(cr4 & ~X86_CR4_PGE); 37 native_write_cr4(cr4 & ~X86_CR4_PGE);
38 /* write old PGE again and flush TLBs */ 38 /* write old PGE again and flush TLBs */
39 write_cr4(cr4); 39 native_write_cr4(cr4);
40 40
41 raw_local_irq_restore(flags); 41 raw_local_irq_restore(flags);
42} 42}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 38946c6e8433..987f91f0f755 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -147,27 +147,14 @@ END(ftrace_graph_caller)
147GLOBAL(return_to_handler) 147GLOBAL(return_to_handler)
148 subq $80, %rsp 148 subq $80, %rsp
149 149
150 /* Save the return values */
150 movq %rax, (%rsp) 151 movq %rax, (%rsp)
151 movq %rcx, 8(%rsp) 152 movq %rdx, 8(%rsp)
152 movq %rdx, 16(%rsp)
153 movq %rsi, 24(%rsp)
154 movq %rdi, 32(%rsp)
155 movq %r8, 40(%rsp)
156 movq %r9, 48(%rsp)
157 movq %r10, 56(%rsp)
158 movq %r11, 64(%rsp)
159 153
160 call ftrace_return_to_handler 154 call ftrace_return_to_handler
161 155
162 movq %rax, 72(%rsp) 156 movq %rax, 72(%rsp)
163 movq 64(%rsp), %r11 157 movq 8(%rsp), %rdx
164 movq 56(%rsp), %r10
165 movq 48(%rsp), %r9
166 movq 40(%rsp), %r8
167 movq 32(%rsp), %rdi
168 movq 24(%rsp), %rsi
169 movq 16(%rsp), %rdx
170 movq 8(%rsp), %rcx
171 movq (%rsp), %rax 158 movq (%rsp), %rax
172 addq $72, %rsp 159 addq $72, %rsp
173 retq 160 retq
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 50dc802a1c46..16ccbd77917f 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -32,7 +32,7 @@ struct kmmio_fault_page {
32 struct list_head list; 32 struct list_head list;
33 struct kmmio_fault_page *release_next; 33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */ 34 unsigned long page; /* location of the fault page */
35 bool old_presence; /* page presence prior to arming */ 35 pteval_t old_presence; /* page presence prior to arming */
36 bool armed; 36 bool armed;
37 37
38 /* 38 /*
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
97static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) 97static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
98{ 98{
99 struct list_head *head; 99 struct list_head *head;
100 struct kmmio_fault_page *p; 100 struct kmmio_fault_page *f;
101 101
102 page &= PAGE_MASK; 102 page &= PAGE_MASK;
103 head = kmmio_page_list(page); 103 head = kmmio_page_list(page);
104 list_for_each_entry_rcu(p, head, list) { 104 list_for_each_entry_rcu(f, head, list) {
105 if (p->page == page) 105 if (f->page == page)
106 return p; 106 return f;
107 } 107 }
108 return NULL; 108 return NULL;
109} 109}
110 110
111static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) 111static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
112{ 112{
113 pmdval_t v = pmd_val(*pmd); 113 pmdval_t v = pmd_val(*pmd);
114 *old = !!(v & _PAGE_PRESENT); 114 if (clear) {
115 v &= ~_PAGE_PRESENT; 115 *old = v & _PAGE_PRESENT;
116 if (present) 116 v &= ~_PAGE_PRESENT;
117 v |= _PAGE_PRESENT; 117 } else /* presume this has been called with clear==true previously */
118 v |= *old;
118 set_pmd(pmd, __pmd(v)); 119 set_pmd(pmd, __pmd(v));
119} 120}
120 121
121static void set_pte_presence(pte_t *pte, bool present, bool *old) 122static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
122{ 123{
123 pteval_t v = pte_val(*pte); 124 pteval_t v = pte_val(*pte);
124 *old = !!(v & _PAGE_PRESENT); 125 if (clear) {
125 v &= ~_PAGE_PRESENT; 126 *old = v & _PAGE_PRESENT;
126 if (present) 127 v &= ~_PAGE_PRESENT;
127 v |= _PAGE_PRESENT; 128 } else /* presume this has been called with clear==true previously */
129 v |= *old;
128 set_pte_atomic(pte, __pte(v)); 130 set_pte_atomic(pte, __pte(v));
129} 131}
130 132
131static int set_page_presence(unsigned long addr, bool present, bool *old) 133static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
132{ 134{
133 unsigned int level; 135 unsigned int level;
134 pte_t *pte = lookup_address(addr, &level); 136 pte_t *pte = lookup_address(f->page, &level);
135 137
136 if (!pte) { 138 if (!pte) {
137 pr_err("kmmio: no pte for page 0x%08lx\n", addr); 139 pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
138 return -1; 140 return -1;
139 } 141 }
140 142
141 switch (level) { 143 switch (level) {
142 case PG_LEVEL_2M: 144 case PG_LEVEL_2M:
143 set_pmd_presence((pmd_t *)pte, present, old); 145 clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
144 break; 146 break;
145 case PG_LEVEL_4K: 147 case PG_LEVEL_4K:
146 set_pte_presence(pte, present, old); 148 clear_pte_presence(pte, clear, &f->old_presence);
147 break; 149 break;
148 default: 150 default:
149 pr_err("kmmio: unexpected page level 0x%x.\n", level); 151 pr_err("kmmio: unexpected page level 0x%x.\n", level);
150 return -1; 152 return -1;
151 } 153 }
152 154
153 __flush_tlb_one(addr); 155 __flush_tlb_one(f->page);
154 return 0; 156 return 0;
155} 157}
156 158
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
171 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); 173 WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
172 if (f->armed) { 174 if (f->armed) {
173 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", 175 pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
174 f->page, f->count, f->old_presence); 176 f->page, f->count, !!f->old_presence);
175 } 177 }
176 ret = set_page_presence(f->page, false, &f->old_presence); 178 ret = clear_page_presence(f, true);
177 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); 179 WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
178 f->armed = true; 180 f->armed = true;
179 return ret; 181 return ret;
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
182/** Restore the given page to saved presence state. */ 184/** Restore the given page to saved presence state. */
183static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) 185static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
184{ 186{
185 bool tmp; 187 int ret = clear_page_presence(f, false);
186 int ret = set_page_presence(f->page, f->old_presence, &tmp);
187 WARN_ONCE(ret < 0, 188 WARN_ONCE(ret < 0,
188 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); 189 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
189 f->armed = false; 190 f->armed = false;
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
310 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); 311 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
311 312
312 if (!ctx->active) { 313 if (!ctx->active) {
313 pr_debug("kmmio: spurious debug trap on CPU %d.\n", 314 /*
315 * debug traps without an active context are due to either
316 * something external causing them (f.e. using a debugger while
317 * mmio tracing enabled), or erroneous behaviour
318 */
319 pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
314 smp_processor_id()); 320 smp_processor_id());
315 goto out; 321 goto out;
316 } 322 }
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
439 head, 445 head,
440 struct kmmio_delayed_release, 446 struct kmmio_delayed_release,
441 rcu); 447 rcu);
442 struct kmmio_fault_page *p = dr->release_list; 448 struct kmmio_fault_page *f = dr->release_list;
443 while (p) { 449 while (f) {
444 struct kmmio_fault_page *next = p->release_next; 450 struct kmmio_fault_page *next = f->release_next;
445 BUG_ON(p->count); 451 BUG_ON(f->count);
446 kfree(p); 452 kfree(f);
447 p = next; 453 f = next;
448 } 454 }
449 kfree(dr); 455 kfree(dr);
450} 456}
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
453{ 459{
454 struct kmmio_delayed_release *dr = 460 struct kmmio_delayed_release *dr =
455 container_of(head, struct kmmio_delayed_release, rcu); 461 container_of(head, struct kmmio_delayed_release, rcu);
456 struct kmmio_fault_page *p = dr->release_list; 462 struct kmmio_fault_page *f = dr->release_list;
457 struct kmmio_fault_page **prevp = &dr->release_list; 463 struct kmmio_fault_page **prevp = &dr->release_list;
458 unsigned long flags; 464 unsigned long flags;
459 465
460 spin_lock_irqsave(&kmmio_lock, flags); 466 spin_lock_irqsave(&kmmio_lock, flags);
461 while (p) { 467 while (f) {
462 if (!p->count) { 468 if (!f->count) {
463 list_del_rcu(&p->list); 469 list_del_rcu(&f->list);
464 prevp = &p->release_next; 470 prevp = &f->release_next;
465 } else { 471 } else {
466 *prevp = p->release_next; 472 *prevp = f->release_next;
467 } 473 }
468 p = p->release_next; 474 f = f->release_next;
469 } 475 }
470 spin_unlock_irqrestore(&kmmio_lock, flags); 476 spin_unlock_irqrestore(&kmmio_lock, flags);
471 477
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
528} 534}
529EXPORT_SYMBOL(unregister_kmmio_probe); 535EXPORT_SYMBOL(unregister_kmmio_probe);
530 536
531static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, 537static int
532 void *args) 538kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
533{ 539{
534 struct die_args *arg = args; 540 struct die_args *arg = args;
535 541
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = {
544 .notifier_call = kmmio_die_notifier 550 .notifier_call = kmmio_die_notifier
545}; 551};
546 552
547static int __init init_kmmio(void) 553int kmmio_init(void)
548{ 554{
549 int i; 555 int i;
556
550 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) 557 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
551 INIT_LIST_HEAD(&kmmio_page_table[i]); 558 INIT_LIST_HEAD(&kmmio_page_table[i]);
559
552 return register_die_notifier(&nb_die); 560 return register_die_notifier(&nb_die);
553} 561}
554fs_initcall(init_kmmio); /* should be before device_initcall() */ 562
563void kmmio_cleanup(void)
564{
565 int i;
566
567 unregister_die_notifier(&nb_die);
568 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
569 WARN_ONCE(!list_empty(&kmmio_page_table[i]),
570 KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
571 }
572}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index c9342ed8b402..132772a8ec57 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
451 451
452 if (nommiotrace) 452 if (nommiotrace)
453 pr_info(NAME "MMIO tracing disabled.\n"); 453 pr_info(NAME "MMIO tracing disabled.\n");
454 kmmio_init();
454 enter_uniprocessor(); 455 enter_uniprocessor();
455 spin_lock_irq(&trace_lock); 456 spin_lock_irq(&trace_lock);
456 atomic_inc(&mmiotrace_enabled); 457 atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
473 474
474 clear_trace_list(); /* guarantees: no more kmmio callbacks */ 475 clear_trace_list(); /* guarantees: no more kmmio callbacks */
475 leave_uniprocessor(); 476 leave_uniprocessor();
477 kmmio_cleanup();
476 pr_info(NAME "disabled.\n"); 478 pr_info(NAME "disabled.\n");
477out: 479out:
478 mutex_unlock(&mmiotrace_mutex); 480 mutex_unlock(&mmiotrace_mutex);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2998fe3a2377..d028baf946a3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1277,7 +1277,7 @@ static inline void blk_partition_remap(struct bio *bio)
1277 bio->bi_bdev = bdev->bd_contains; 1277 bio->bi_bdev = bdev->bd_contains;
1278 1278
1279 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, 1279 trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
1280 bdev->bd_dev, bio->bi_sector, 1280 bdev->bd_dev,
1281 bio->bi_sector - p->start_sect); 1281 bio->bi_sector - p->start_sect);
1282 } 1282 }
1283} 1283}
@@ -1446,8 +1446,7 @@ static inline void __generic_make_request(struct bio *bio)
1446 goto end_io; 1446 goto end_io;
1447 1447
1448 if (old_sector != -1) 1448 if (old_sector != -1)
1449 trace_block_remap(q, bio, old_dev, bio->bi_sector, 1449 trace_block_remap(q, bio, old_dev, old_sector);
1450 old_sector);
1451 1450
1452 trace_block_bio_queue(q, bio); 1451 trace_block_bio_queue(q, bio);
1453 1452
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3ff9bba3379a..26f9ec28f56c 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -383,16 +383,21 @@ struct kobj_type blk_queue_ktype = {
383int blk_register_queue(struct gendisk *disk) 383int blk_register_queue(struct gendisk *disk)
384{ 384{
385 int ret; 385 int ret;
386 struct device *dev = disk_to_dev(disk);
386 387
387 struct request_queue *q = disk->queue; 388 struct request_queue *q = disk->queue;
388 389
389 if (WARN_ON(!q)) 390 if (WARN_ON(!q))
390 return -ENXIO; 391 return -ENXIO;
391 392
393 ret = blk_trace_init_sysfs(dev);
394 if (ret)
395 return ret;
396
392 if (!q->request_fn) 397 if (!q->request_fn)
393 return 0; 398 return 0;
394 399
395 ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), 400 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
396 "%s", "queue"); 401 "%s", "queue");
397 if (ret < 0) 402 if (ret < 0)
398 return ret; 403 return ret;
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f87615dea46b..f8c218cd08e1 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
568 memcpy(&buts.name, &cbuts.name, 32); 568 memcpy(&buts.name, &cbuts.name, 32);
569 569
570 mutex_lock(&bdev->bd_mutex); 570 mutex_lock(&bdev->bd_mutex);
571 ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); 571 ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
572 mutex_unlock(&bdev->bd_mutex); 572 mutex_unlock(&bdev->bd_mutex);
573 if (ret) 573 if (ret)
574 return ret; 574 return ret;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 424f7b048c30..e2ee4a79ea2c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -656,8 +656,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
656 /* the bio has been remapped so dispatch it */ 656 /* the bio has been remapped so dispatch it */
657 657
658 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 658 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
659 tio->io->bio->bi_bdev->bd_dev, 659 tio->io->bio->bi_bdev->bd_dev, sector);
660 clone->bi_sector, sector);
661 660
662 generic_make_request(clone); 661 generic_make_request(clone);
663 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 662 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index e1716f14cd47..91e316fe6522 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
1065 return blk_trace_setup(sdp->device->request_queue, 1065 return blk_trace_setup(sdp->device->request_queue,
1066 sdp->disk->disk_name, 1066 sdp->disk->disk_name,
1067 MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1067 MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
1068 NULL,
1068 (char *)arg); 1069 (char *)arg);
1069 case BLKTRACESTART: 1070 case BLKTRACESTART:
1070 return blk_trace_startstop(sdp->device->request_queue, 1); 1071 return blk_trace_startstop(sdp->device->request_queue, 1);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 89853bcd27a6..f1736ca7922c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -63,7 +63,7 @@
63#define BRANCH_PROFILE() 63#define BRANCH_PROFILE()
64#endif 64#endif
65 65
66#ifdef CONFIG_EVENT_TRACER 66#ifdef CONFIG_EVENT_TRACING
67#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ 67#define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \
68 *(_ftrace_events) \ 68 *(_ftrace_events) \
69 VMLINUX_SYMBOL(__stop_ftrace_events) = .; 69 VMLINUX_SYMBOL(__stop_ftrace_events) = .;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d960889e92ef..82b4636030e9 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -116,9 +116,9 @@ struct blk_io_trace {
116 * The remap event 116 * The remap event
117 */ 117 */
118struct blk_io_trace_remap { 118struct blk_io_trace_remap {
119 __be32 device;
120 __be32 device_from; 119 __be32 device_from;
121 __be64 sector; 120 __be32 device_to;
121 __be64 sector_from;
122}; 122};
123 123
124enum { 124enum {
@@ -165,8 +165,9 @@ struct blk_trace {
165 165
166extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 166extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
167extern void blk_trace_shutdown(struct request_queue *); 167extern void blk_trace_shutdown(struct request_queue *);
168extern int do_blk_trace_setup(struct request_queue *q, 168extern int do_blk_trace_setup(struct request_queue *q, char *name,
169 char *name, dev_t dev, struct blk_user_trace_setup *buts); 169 dev_t dev, struct block_device *bdev,
170 struct blk_user_trace_setup *buts);
170extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); 171extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
171 172
172/** 173/**
@@ -193,22 +194,29 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
193extern void blk_add_driver_data(struct request_queue *q, struct request *rq, 194extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
194 void *data, size_t len); 195 void *data, size_t len);
195extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 196extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
197 struct block_device *bdev,
196 char __user *arg); 198 char __user *arg);
197extern int blk_trace_startstop(struct request_queue *q, int start); 199extern int blk_trace_startstop(struct request_queue *q, int start);
198extern int blk_trace_remove(struct request_queue *q); 200extern int blk_trace_remove(struct request_queue *q);
201extern int blk_trace_init_sysfs(struct device *dev);
199 202
200extern struct attribute_group blk_trace_attr_group; 203extern struct attribute_group blk_trace_attr_group;
201 204
202#else /* !CONFIG_BLK_DEV_IO_TRACE */ 205#else /* !CONFIG_BLK_DEV_IO_TRACE */
203#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 206# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
204#define blk_trace_shutdown(q) do { } while (0) 207# define blk_trace_shutdown(q) do { } while (0)
205#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) 208# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY)
206#define blk_add_driver_data(q, rq, data, len) do {} while (0) 209# define blk_add_driver_data(q, rq, data, len) do {} while (0)
207#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) 210# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY)
208#define blk_trace_startstop(q, start) (-ENOTTY) 211# define blk_trace_startstop(q, start) (-ENOTTY)
209#define blk_trace_remove(q) (-ENOTTY) 212# define blk_trace_remove(q) (-ENOTTY)
210#define blk_add_trace_msg(q, fmt, ...) do { } while (0) 213# define blk_add_trace_msg(q, fmt, ...) do { } while (0)
214static inline int blk_trace_init_sysfs(struct device *dev)
215{
216 return 0;
217}
211 218
212#endif /* CONFIG_BLK_DEV_IO_TRACE */ 219#endif /* CONFIG_BLK_DEV_IO_TRACE */
220
213#endif /* __KERNEL__ */ 221#endif /* __KERNEL__ */
214#endif 222#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8a0c2f221e6b..39b95c56587e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
233 233
234extern int skip_trace(unsigned long ip); 234extern int skip_trace(unsigned long ip);
235 235
236extern void ftrace_release(void *start, unsigned long size);
237
238extern void ftrace_disable_daemon(void); 236extern void ftrace_disable_daemon(void);
239extern void ftrace_enable_daemon(void); 237extern void ftrace_enable_daemon(void);
240#else 238#else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
325 323
326#ifdef CONFIG_FTRACE_MCOUNT_RECORD 324#ifdef CONFIG_FTRACE_MCOUNT_RECORD
327extern void ftrace_init(void); 325extern void ftrace_init(void);
328extern void ftrace_init_module(struct module *mod,
329 unsigned long *start, unsigned long *end);
330#else 326#else
331static inline void ftrace_init(void) { } 327static inline void ftrace_init(void) { }
332static inline void
333ftrace_init_module(struct module *mod,
334 unsigned long *start, unsigned long *end) { }
335#endif 328#endif
336 329
337/* 330/*
@@ -368,6 +361,7 @@ struct ftrace_ret_stack {
368 unsigned long ret; 361 unsigned long ret;
369 unsigned long func; 362 unsigned long func;
370 unsigned long long calltime; 363 unsigned long long calltime;
364 unsigned long long subtime;
371}; 365};
372 366
373/* 367/*
@@ -379,8 +373,6 @@ extern void return_to_handler(void);
379 373
380extern int 374extern int
381ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); 375ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
382extern void
383ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
384 376
385/* 377/*
386 * Sometimes we don't want to trace a function with the function 378 * Sometimes we don't want to trace a function with the function
@@ -496,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
496 488
497extern int ftrace_dump_on_oops; 489extern int ftrace_dump_on_oops;
498 490
491#ifdef CONFIG_PREEMPT
492#define INIT_TRACE_RECURSION .trace_recursion = 0,
493#endif
494
499#endif /* CONFIG_TRACING */ 495#endif /* CONFIG_TRACING */
500 496
497#ifndef INIT_TRACE_RECURSION
498#define INIT_TRACE_RECURSION
499#endif
501 500
502#ifdef CONFIG_HW_BRANCH_TRACER 501#ifdef CONFIG_HW_BRANCH_TRACER
503 502
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644
index 000000000000..662c1becf367
--- /dev/null
+++ b/include/linux/ftrace_event.h
@@ -0,0 +1,156 @@
1#ifndef _LINUX_FTRACE_EVENT_H
2#define _LINUX_FTRACE_EVENT_H
3
4#include <linux/trace_seq.h>
5#include <linux/ring_buffer.h>
6
7
8struct trace_array;
9struct tracer;
10struct dentry;
11
12/*
13 * The trace entry - the most basic unit of tracing. This is what
14 * is printed in the end as a single line in the trace output, such as:
15 *
16 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
17 */
18struct trace_entry {
19 unsigned short type;
20 unsigned char flags;
21 unsigned char preempt_count;
22 int pid;
23 int tgid;
24};
25
26#define FTRACE_MAX_EVENT \
27 ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
28
29/*
30 * Trace iterator - used by printout routines who present trace
31 * results to users and which routines might sleep, etc:
32 */
33struct trace_iterator {
34 struct trace_array *tr;
35 struct tracer *trace;
36 void *private;
37 int cpu_file;
38 struct mutex mutex;
39 struct ring_buffer_iter *buffer_iter[NR_CPUS];
40
41 /* The below is zeroed out in pipe_read */
42 struct trace_seq seq;
43 struct trace_entry *ent;
44 int cpu;
45 u64 ts;
46
47 unsigned long iter_flags;
48 loff_t pos;
49 long idx;
50
51 cpumask_var_t started;
52};
53
54
55typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
56 int flags);
57struct trace_event {
58 struct hlist_node node;
59 struct list_head list;
60 int type;
61 trace_print_func trace;
62 trace_print_func raw;
63 trace_print_func hex;
64 trace_print_func binary;
65};
66
67extern int register_ftrace_event(struct trace_event *event);
68extern int unregister_ftrace_event(struct trace_event *event);
69
70/* Return values for print_line callback */
71enum print_line_t {
72 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
73 TRACE_TYPE_HANDLED = 1,
74 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
75 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
76};
77
78
79struct ring_buffer_event *
80trace_current_buffer_lock_reserve(int type, unsigned long len,
81 unsigned long flags, int pc);
82void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
83 unsigned long flags, int pc);
84void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
85 unsigned long flags, int pc);
86void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
87
88void tracing_record_cmdline(struct task_struct *tsk);
89
90struct ftrace_event_call {
91 struct list_head list;
92 char *name;
93 char *system;
94 struct dentry *dir;
95 struct trace_event *event;
96 int enabled;
97 int (*regfunc)(void);
98 void (*unregfunc)(void);
99 int id;
100 int (*raw_init)(void);
101 int (*show_format)(struct trace_seq *s);
102 int (*define_fields)(void);
103 struct list_head fields;
104 int filter_active;
105 void *filter;
106 void *mod;
107
108#ifdef CONFIG_EVENT_PROFILE
109 atomic_t profile_count;
110 int (*profile_enable)(struct ftrace_event_call *);
111 void (*profile_disable)(struct ftrace_event_call *);
112#endif
113};
114
115#define MAX_FILTER_PRED 32
116#define MAX_FILTER_STR_VAL 128
117
118extern int init_preds(struct ftrace_event_call *call);
119extern void destroy_preds(struct ftrace_event_call *call);
120extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
121extern int filter_current_check_discard(struct ftrace_event_call *call,
122 void *rec,
123 struct ring_buffer_event *event);
124
125extern int trace_define_field(struct ftrace_event_call *call, char *type,
126 char *name, int offset, int size, int is_signed);
127
128#define is_signed_type(type) (((type)(-1)) < 0)
129
130/*
131 * The double __builtin_constant_p is because gcc will give us an error
132 * if we try to allocate the static variable to fmt if it is not a
133 * constant. Even with the outer if statement optimizing out.
134 */
135#define event_trace_printk(ip, fmt, args...) \
136do { \
137 __trace_printk_check_format(fmt, ##args); \
138 tracing_record_cmdline(current); \
139 if (__builtin_constant_p(fmt)) { \
140 static const char *trace_printk_fmt \
141 __attribute__((section("__trace_printk_fmt"))) = \
142 __builtin_constant_p(fmt) ? fmt : NULL; \
143 \
144 __trace_bprintk(ip, trace_printk_fmt, ##args); \
145 } else \
146 __trace_printk(ip, fmt, ##args); \
147} while (0)
148
149#define __common_field(type, item, is_signed) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item), is_signed); \
153 if (ret) \
154 return ret;
155
156#endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d87247d2641f..889bf99eca6d 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -174,6 +174,7 @@ extern struct cred init_cred;
174 INIT_TRACE_IRQFLAGS \ 174 INIT_TRACE_IRQFLAGS \
175 INIT_LOCKDEP \ 175 INIT_LOCKDEP \
176 INIT_FTRACE_GRAPH \ 176 INIT_FTRACE_GRAPH \
177 INIT_TRACE_RECURSION \
177} 178}
178 179
179 180
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 000000000000..b616d3930c3b
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,25 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <trace/events/kmem.h>
13
14#ifdef CONFIG_KMEMTRACE
15extern void kmemtrace_init(void);
16#else
17static inline void kmemtrace_init(void)
18{
19}
20#endif
21
22#endif /* __KERNEL__ */
23
24#endif /* _LINUX_KMEMTRACE_H */
25
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 3d1b7bde1283..97491f78b08c 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -30,6 +30,8 @@ extern unsigned int kmmio_count;
30 30
31extern int register_kmmio_probe(struct kmmio_probe *p); 31extern int register_kmmio_probe(struct kmmio_probe *p);
32extern void unregister_kmmio_probe(struct kmmio_probe *p); 32extern void unregister_kmmio_probe(struct kmmio_probe *p);
33extern int kmmio_init(void);
34extern void kmmio_cleanup(void);
33 35
34#ifdef CONFIG_MMIOTRACE 36#ifdef CONFIG_MMIOTRACE
35/* kmmio is active by some kmmio_probes? */ 37/* kmmio is active by some kmmio_probes? */
diff --git a/include/linux/module.h b/include/linux/module.h
index 627ac082e2a6..a8f2c0aa4c32 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -337,6 +337,14 @@ struct module
337 const char **trace_bprintk_fmt_start; 337 const char **trace_bprintk_fmt_start;
338 unsigned int num_trace_bprintk_fmt; 338 unsigned int num_trace_bprintk_fmt;
339#endif 339#endif
340#ifdef CONFIG_EVENT_TRACING
341 struct ftrace_event_call *trace_events;
342 unsigned int num_trace_events;
343#endif
344#ifdef CONFIG_FTRACE_MCOUNT_RECORD
345 unsigned long *ftrace_callsites;
346 unsigned int num_ftrace_callsites;
347#endif
340 348
341#ifdef CONFIG_MODULE_UNLOAD 349#ifdef CONFIG_MODULE_UNLOAD
342 /* What modules depend on me? */ 350 /* What modules depend on me? */
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e1b7b2173885..f1345828c7c5 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -11,7 +11,7 @@ struct ring_buffer_iter;
11 * Don't refer to this struct directly, use functions below. 11 * Don't refer to this struct directly, use functions below.
12 */ 12 */
13struct ring_buffer_event { 13struct ring_buffer_event {
14 u32 type:2, len:3, time_delta:27; 14 u32 type_len:5, time_delta:27;
15 u32 array[]; 15 u32 array[];
16}; 16};
17 17
@@ -24,7 +24,8 @@ struct ring_buffer_event {
24 * size is variable depending on how much 24 * size is variable depending on how much
25 * padding is needed 25 * padding is needed
26 * If time_delta is non zero: 26 * If time_delta is non zero:
27 * everything else same as RINGBUF_TYPE_DATA 27 * array[0] holds the actual length
28 * size = 4 + length (bytes)
28 * 29 *
29 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta 30 * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
30 * array[0] = time delta (28 .. 59) 31 * array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@ struct ring_buffer_event {
35 * array[1..2] = tv_sec 36 * array[1..2] = tv_sec
36 * size = 16 bytes 37 * size = 16 bytes
37 * 38 *
38 * @RINGBUF_TYPE_DATA: Data record 39 * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
39 * If len is zero: 40 * Data record
41 * If type_len is zero:
40 * array[0] holds the actual length 42 * array[0] holds the actual length
41 * array[1..(length+3)/4] holds data 43 * array[1..(length+3)/4] holds data
42 * size = 4 + 4 + length (bytes) 44 * size = 4 + length (bytes)
43 * else 45 * else
44 * length = len << 2 46 * length = type_len << 2
45 * array[0..(length+3)/4-1] holds data 47 * array[0..(length+3)/4-1] holds data
46 * size = 4 + length (bytes) 48 * size = 4 + length (bytes)
47 */ 49 */
48enum ring_buffer_type { 50enum ring_buffer_type {
51 RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
49 RINGBUF_TYPE_PADDING, 52 RINGBUF_TYPE_PADDING,
50 RINGBUF_TYPE_TIME_EXTEND, 53 RINGBUF_TYPE_TIME_EXTEND,
51 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ 54 /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
52 RINGBUF_TYPE_TIME_STAMP, 55 RINGBUF_TYPE_TIME_STAMP,
53 RINGBUF_TYPE_DATA,
54}; 56};
55 57
56unsigned ring_buffer_event_length(struct ring_buffer_event *event); 58unsigned ring_buffer_event_length(struct ring_buffer_event *event);
@@ -68,9 +70,38 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
68 return event->time_delta; 70 return event->time_delta;
69} 71}
70 72
73/*
74 * ring_buffer_event_discard can discard any event in the ring buffer.
75 * it is up to the caller to protect against a reader from
76 * consuming it or a writer from wrapping and replacing it.
77 *
78 * No external protection is needed if this is called before
79 * the event is commited. But in that case it would be better to
80 * use ring_buffer_discard_commit.
81 *
82 * Note, if an event that has not been committed is discarded
83 * with ring_buffer_event_discard, it must still be committed.
84 */
71void ring_buffer_event_discard(struct ring_buffer_event *event); 85void ring_buffer_event_discard(struct ring_buffer_event *event);
72 86
73/* 87/*
88 * ring_buffer_discard_commit will remove an event that has not
89 * ben committed yet. If this is used, then ring_buffer_unlock_commit
90 * must not be called on the discarded event. This function
91 * will try to remove the event from the ring buffer completely
92 * if another event has not been written after it.
93 *
94 * Example use:
95 *
96 * if (some_condition)
97 * ring_buffer_discard_commit(buffer, event);
98 * else
99 * ring_buffer_unlock_commit(buffer, event);
100 */
101void ring_buffer_discard_commit(struct ring_buffer *buffer,
102 struct ring_buffer_event *event);
103
104/*
74 * size is in bytes for each per CPU buffer. 105 * size is in bytes for each per CPU buffer.
75 */ 106 */
76struct ring_buffer * 107struct ring_buffer *
@@ -122,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
122unsigned long ring_buffer_overruns(struct ring_buffer *buffer); 153unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
123unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); 154unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
124unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); 155unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
156unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
157unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
125 158
126u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); 159u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
127void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, 160void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
@@ -137,6 +170,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
137int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, 170int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
138 size_t len, int cpu, int full); 171 size_t len, int cpu, int full);
139 172
173struct trace_seq;
174
175int ring_buffer_print_entry_header(struct trace_seq *s);
176int ring_buffer_print_page_header(struct trace_seq *s);
177
140enum ring_buffer_flags { 178enum ring_buffer_flags {
141 RB_FL_OVERWRITE = 1 << 0, 179 RB_FL_OVERWRITE = 1 << 0,
142}; 180};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc8049c..7ede5e490913 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1428,7 +1428,9 @@ struct task_struct {
1428#ifdef CONFIG_TRACING 1428#ifdef CONFIG_TRACING
1429 /* state flags for use by tracers */ 1429 /* state flags for use by tracers */
1430 unsigned long trace; 1430 unsigned long trace;
1431#endif 1431 /* bitmask of trace recursion */
1432 unsigned long trace_recursion;
1433#endif /* CONFIG_TRACING */
1432}; 1434};
1433 1435
1434/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1436/* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 5ac9b0bcaf9a..713f841ecaa9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,7 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <trace/kmemtrace.h> 17#include <linux/kmemtrace.h>
18 18
19/* Size description struct for general caches. */ 19/* Size description struct for general caches. */
20struct cache_sizes { 20struct cache_sizes {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5046f90c1171..be5d40c43bd2 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,7 +10,7 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <trace/kmemtrace.h> 13#include <linux/kmemtrace.h>
14 14
15enum stat_item { 15enum stat_item {
16 ALLOC_FASTPATH, /* Allocation from cpu slab */ 16 ALLOC_FASTPATH, /* Allocation from cpu slab */
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644
index 000000000000..ba9627f00d3f
--- /dev/null
+++ b/include/linux/trace_seq.h
@@ -0,0 +1,90 @@
1#ifndef _LINUX_TRACE_SEQ_H
2#define _LINUX_TRACE_SEQ_H
3
4#include <linux/fs.h>
5
6/*
7 * Trace sequences are used to allow a function to call several other functions
8 * to create a string of data to use (up to a max of PAGE_SIZE.
9 */
10
11struct trace_seq {
12 unsigned char buffer[PAGE_SIZE];
13 unsigned int len;
14 unsigned int readpos;
15};
16
17static inline void
18trace_seq_init(struct trace_seq *s)
19{
20 s->len = 0;
21 s->readpos = 0;
22}
23
24/*
25 * Currently only defined when tracing is enabled.
26 */
27#ifdef CONFIG_TRACING
28extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30extern int
31trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
32extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
33extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
34 size_t cnt);
35extern int trace_seq_puts(struct trace_seq *s, const char *str);
36extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
37extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
38extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
39 size_t len);
40extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
41extern int trace_seq_path(struct trace_seq *s, struct path *path);
42
43#else /* CONFIG_TRACING */
44static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
45{
46 return 0;
47}
48static inline int
49trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
50{
51 return 0;
52}
53
54static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
55{
56}
57static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
58 size_t cnt)
59{
60 return 0;
61}
62static inline int trace_seq_puts(struct trace_seq *s, const char *str)
63{
64 return 0;
65}
66static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
67{
68 return 0;
69}
70static inline int
71trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
72{
73 return 0;
74}
75static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
76 size_t len)
77{
78 return 0;
79}
80static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
81{
82 return NULL;
83}
84static inline int trace_seq_path(struct trace_seq *s, struct path *path)
85{
86 return 0;
87}
88#endif /* CONFIG_TRACING */
89
90#endif /* _LINUX_TRACE_SEQ_H */
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d35a7ee7611f..14df7e635d43 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -31,6 +31,8 @@ struct tracepoint {
31 * Keep in sync with vmlinux.lds.h. 31 * Keep in sync with vmlinux.lds.h.
32 */ 32 */
33 33
34#ifndef DECLARE_TRACE
35
34#define TP_PROTO(args...) args 36#define TP_PROTO(args...) args
35#define TP_ARGS(args...) args 37#define TP_ARGS(args...) args
36 38
@@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
114 struct tracepoint *end) 116 struct tracepoint *end)
115{ } 117{ }
116#endif /* CONFIG_TRACEPOINTS */ 118#endif /* CONFIG_TRACEPOINTS */
119#endif /* DECLARE_TRACE */
117 120
118/* 121/*
119 * Connect a probe to a tracepoint. 122 * Connect a probe to a tracepoint.
@@ -154,10 +157,8 @@ static inline void tracepoint_synchronize_unregister(void)
154} 157}
155 158
156#define PARAMS(args...) args 159#define PARAMS(args...) args
157#define TRACE_FORMAT(name, proto, args, fmt) \
158 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
159
160 160
161#ifndef TRACE_EVENT
161/* 162/*
162 * For use with the TRACE_EVENT macro: 163 * For use with the TRACE_EVENT macro:
163 * 164 *
@@ -262,5 +263,6 @@ static inline void tracepoint_synchronize_unregister(void)
262 263
263#define TRACE_EVENT(name, proto, args, struct, assign, print) \ 264#define TRACE_EVENT(name, proto, args, struct, assign, print) \
264 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) 265 DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
266#endif
265 267
266#endif 268#endif
diff --git a/include/trace/block.h b/include/trace/block.h
index 25b7068b819e..8ac945b7746e 100644
--- a/include/trace/block.h
+++ b/include/trace/block.h
@@ -70,7 +70,7 @@ DECLARE_TRACE(block_split,
70 70
71DECLARE_TRACE(block_remap, 71DECLARE_TRACE(block_remap,
72 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 72 TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
73 sector_t from, sector_t to), 73 sector_t to),
74 TP_ARGS(q, bio, dev, from, to)); 74 TP_ARGS(q, bio, dev, to));
75 75
76#endif 76#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644
index 000000000000..f7a7ae1e8f90
--- /dev/null
+++ b/include/trace/define_trace.h
@@ -0,0 +1,75 @@
1/*
2 * Trace files that want to automate creationg of all tracepoints defined
3 * in their file should include this file. The following are macros that the
4 * trace file may define:
5 *
6 * TRACE_SYSTEM defines the system the tracepoint is for
7 *
8 * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
9 * This macro may be defined to tell define_trace.h what file to include.
10 * Note, leave off the ".h".
11 *
12 * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
13 * then this macro can define the path to use. Note, the path is relative to
14 * define_trace.h, not the file including it. Full path names for out of tree
15 * modules must be used.
16 */
17
18#ifdef CREATE_TRACE_POINTS
19
20/* Prevent recursion */
21#undef CREATE_TRACE_POINTS
22
23#include <linux/stringify.h>
24
25#undef TRACE_EVENT
26#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
27 DEFINE_TRACE(name)
28
29#undef DECLARE_TRACE
30#define DECLARE_TRACE(name, proto, args) \
31 DEFINE_TRACE(name)
32
33#undef TRACE_INCLUDE
34#undef __TRACE_INCLUDE
35
36#ifndef TRACE_INCLUDE_FILE
37# define TRACE_INCLUDE_FILE TRACE_SYSTEM
38# define UNDEF_TRACE_INCLUDE_FILE
39#endif
40
41#ifndef TRACE_INCLUDE_PATH
42# define __TRACE_INCLUDE(system) <trace/events/system.h>
43# define UNDEF_TRACE_INCLUDE_PATH
44#else
45# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
46#endif
47
48# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
49
50/* Let the trace headers be reread */
51#define TRACE_HEADER_MULTI_READ
52
53#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
54
55#ifdef CONFIG_EVENT_TRACING
56#include <trace/ftrace.h>
57#endif
58
59#undef TRACE_HEADER_MULTI_READ
60
61/* Only undef what we defined in this file */
62#ifdef UNDEF_TRACE_INCLUDE_FILE
63# undef TRACE_INCLUDE_FILE
64# undef UNDEF_TRACE_INCLUDE_FILE
65#endif
66
67#ifdef UNDEF_TRACE_INCLUDE_PATH
68# undef TRACE_INCLUDE_PATH
69# undef UNDEF_TRACE_INCLUDE_PATH
70#endif
71
72/* We may be processing more files */
73#define CREATE_TRACE_POINTS
74
75#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644
index 000000000000..32a9f7ef432b
--- /dev/null
+++ b/include/trace/events/irq.h
@@ -0,0 +1,134 @@
1#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_IRQ_H
3
4#include <linux/tracepoint.h>
5#include <linux/interrupt.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM irq
9
10/**
11 * irq_handler_entry - called immediately before the irq action handler
12 * @irq: irq number
13 * @action: pointer to struct irqaction
14 *
15 * The struct irqaction pointed to by @action contains various
16 * information about the handler, including the device name,
17 * @action->name, and the device id, @action->dev_id. When used in
18 * conjunction with the irq_handler_exit tracepoint, we can figure
19 * out irq handler latencies.
20 */
21TRACE_EVENT(irq_handler_entry,
22
23 TP_PROTO(int irq, struct irqaction *action),
24
25 TP_ARGS(irq, action),
26
27 TP_STRUCT__entry(
28 __field( int, irq )
29 __string( name, action->name )
30 ),
31
32 TP_fast_assign(
33 __entry->irq = irq;
34 __assign_str(name, action->name);
35 ),
36
37 TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
38);
39
40/**
41 * irq_handler_exit - called immediately after the irq action handler returns
42 * @irq: irq number
43 * @action: pointer to struct irqaction
44 * @ret: return value
45 *
46 * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
47 * @action->handler scuccessully handled this irq. Otherwise, the irq might be
48 * a shared irq line, or the irq was not handled successfully. Can be used in
49 * conjunction with the irq_handler_entry to understand irq handler latencies.
50 */
51TRACE_EVENT(irq_handler_exit,
52
53 TP_PROTO(int irq, struct irqaction *action, int ret),
54
55 TP_ARGS(irq, action, ret),
56
57 TP_STRUCT__entry(
58 __field( int, irq )
59 __field( int, ret )
60 ),
61
62 TP_fast_assign(
63 __entry->irq = irq;
64 __entry->ret = ret;
65 ),
66
67 TP_printk("irq=%d return=%s",
68 __entry->irq, __entry->ret ? "handled" : "unhandled")
69);
70
71/**
72 * softirq_entry - called immediately before the softirq handler
73 * @h: pointer to struct softirq_action
74 * @vec: pointer to first struct softirq_action in softirq_vec array
75 *
76 * The @h parameter, contains a pointer to the struct softirq_action
77 * which has a pointer to the action handler that is called. By subtracting
78 * the @vec pointer from the @h pointer, we can determine the softirq
79 * number. Also, when used in combination with the softirq_exit tracepoint
80 * we can determine the softirq latency.
81 */
82TRACE_EVENT(softirq_entry,
83
84 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
85
86 TP_ARGS(h, vec),
87
88 TP_STRUCT__entry(
89 __field( int, vec )
90 __string( name, softirq_to_name[h-vec] )
91 ),
92
93 TP_fast_assign(
94 __entry->vec = (int)(h - vec);
95 __assign_str(name, softirq_to_name[h-vec]);
96 ),
97
98 TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
99);
100
101/**
102 * softirq_exit - called immediately after the softirq handler returns
103 * @h: pointer to struct softirq_action
104 * @vec: pointer to first struct softirq_action in softirq_vec array
105 *
106 * The @h parameter contains a pointer to the struct softirq_action
107 * that has handled the softirq. By subtracting the @vec pointer from
108 * the @h pointer, we can determine the softirq number. Also, when used in
109 * combination with the softirq_entry tracepoint we can determine the softirq
110 * latency.
111 */
112TRACE_EVENT(softirq_exit,
113
114 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
115
116 TP_ARGS(h, vec),
117
118 TP_STRUCT__entry(
119 __field( int, vec )
120 __string( name, softirq_to_name[h-vec] )
121 ),
122
123 TP_fast_assign(
124 __entry->vec = (int)(h - vec);
125 __assign_str(name, softirq_to_name[h-vec]);
126 ),
127
128 TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name))
129);
130
131#endif /* _TRACE_IRQ_H */
132
133/* This part must be outside protection */
134#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644
index 000000000000..c22c42f980b5
--- /dev/null
+++ b/include/trace/events/kmem.h
@@ -0,0 +1,194 @@
1#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_KMEM_H
3
4#include <linux/types.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM kmem
9
10TRACE_EVENT(kmalloc,
11
12 TP_PROTO(unsigned long call_site,
13 const void *ptr,
14 size_t bytes_req,
15 size_t bytes_alloc,
16 gfp_t gfp_flags),
17
18 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
19
20 TP_STRUCT__entry(
21 __field( unsigned long, call_site )
22 __field( const void *, ptr )
23 __field( size_t, bytes_req )
24 __field( size_t, bytes_alloc )
25 __field( gfp_t, gfp_flags )
26 ),
27
28 TP_fast_assign(
29 __entry->call_site = call_site;
30 __entry->ptr = ptr;
31 __entry->bytes_req = bytes_req;
32 __entry->bytes_alloc = bytes_alloc;
33 __entry->gfp_flags = gfp_flags;
34 ),
35
36 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
37 __entry->call_site,
38 __entry->ptr,
39 __entry->bytes_req,
40 __entry->bytes_alloc,
41 __entry->gfp_flags)
42);
43
44TRACE_EVENT(kmem_cache_alloc,
45
46 TP_PROTO(unsigned long call_site,
47 const void *ptr,
48 size_t bytes_req,
49 size_t bytes_alloc,
50 gfp_t gfp_flags),
51
52 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
53
54 TP_STRUCT__entry(
55 __field( unsigned long, call_site )
56 __field( const void *, ptr )
57 __field( size_t, bytes_req )
58 __field( size_t, bytes_alloc )
59 __field( gfp_t, gfp_flags )
60 ),
61
62 TP_fast_assign(
63 __entry->call_site = call_site;
64 __entry->ptr = ptr;
65 __entry->bytes_req = bytes_req;
66 __entry->bytes_alloc = bytes_alloc;
67 __entry->gfp_flags = gfp_flags;
68 ),
69
70 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x",
71 __entry->call_site,
72 __entry->ptr,
73 __entry->bytes_req,
74 __entry->bytes_alloc,
75 __entry->gfp_flags)
76);
77
78TRACE_EVENT(kmalloc_node,
79
80 TP_PROTO(unsigned long call_site,
81 const void *ptr,
82 size_t bytes_req,
83 size_t bytes_alloc,
84 gfp_t gfp_flags,
85 int node),
86
87 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
88
89 TP_STRUCT__entry(
90 __field( unsigned long, call_site )
91 __field( const void *, ptr )
92 __field( size_t, bytes_req )
93 __field( size_t, bytes_alloc )
94 __field( gfp_t, gfp_flags )
95 __field( int, node )
96 ),
97
98 TP_fast_assign(
99 __entry->call_site = call_site;
100 __entry->ptr = ptr;
101 __entry->bytes_req = bytes_req;
102 __entry->bytes_alloc = bytes_alloc;
103 __entry->gfp_flags = gfp_flags;
104 __entry->node = node;
105 ),
106
107 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
108 __entry->call_site,
109 __entry->ptr,
110 __entry->bytes_req,
111 __entry->bytes_alloc,
112 __entry->gfp_flags,
113 __entry->node)
114);
115
116TRACE_EVENT(kmem_cache_alloc_node,
117
118 TP_PROTO(unsigned long call_site,
119 const void *ptr,
120 size_t bytes_req,
121 size_t bytes_alloc,
122 gfp_t gfp_flags,
123 int node),
124
125 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
126
127 TP_STRUCT__entry(
128 __field( unsigned long, call_site )
129 __field( const void *, ptr )
130 __field( size_t, bytes_req )
131 __field( size_t, bytes_alloc )
132 __field( gfp_t, gfp_flags )
133 __field( int, node )
134 ),
135
136 TP_fast_assign(
137 __entry->call_site = call_site;
138 __entry->ptr = ptr;
139 __entry->bytes_req = bytes_req;
140 __entry->bytes_alloc = bytes_alloc;
141 __entry->gfp_flags = gfp_flags;
142 __entry->node = node;
143 ),
144
145 TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d",
146 __entry->call_site,
147 __entry->ptr,
148 __entry->bytes_req,
149 __entry->bytes_alloc,
150 __entry->gfp_flags,
151 __entry->node)
152);
153
154TRACE_EVENT(kfree,
155
156 TP_PROTO(unsigned long call_site, const void *ptr),
157
158 TP_ARGS(call_site, ptr),
159
160 TP_STRUCT__entry(
161 __field( unsigned long, call_site )
162 __field( const void *, ptr )
163 ),
164
165 TP_fast_assign(
166 __entry->call_site = call_site;
167 __entry->ptr = ptr;
168 ),
169
170 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
171);
172
173TRACE_EVENT(kmem_cache_free,
174
175 TP_PROTO(unsigned long call_site, const void *ptr),
176
177 TP_ARGS(call_site, ptr),
178
179 TP_STRUCT__entry(
180 __field( unsigned long, call_site )
181 __field( const void *, ptr )
182 ),
183
184 TP_fast_assign(
185 __entry->call_site = call_site;
186 __entry->ptr = ptr;
187 ),
188
189 TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
190);
191#endif /* _TRACE_KMEM_H */
192
193/* This part must be outside protection */
194#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644
index 000000000000..0e956c9dfd7e
--- /dev/null
+++ b/include/trace/events/lockdep.h
@@ -0,0 +1,96 @@
1#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lockdep
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_EVENT(lock_acquire,
13
14 TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
15 int trylock, int read, int check,
16 struct lockdep_map *next_lock, unsigned long ip),
17
18 TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
19
20 TP_STRUCT__entry(
21 __field(unsigned int, flags)
22 __string(name, lock->name)
23 ),
24
25 TP_fast_assign(
26 __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
27 __assign_str(name, lock->name);
28 ),
29
30 TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
31 (__entry->flags & 2) ? "read " : "",
32 __get_str(name))
33);
34
35TRACE_EVENT(lock_release,
36
37 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
38
39 TP_ARGS(lock, nested, ip),
40
41 TP_STRUCT__entry(
42 __string(name, lock->name)
43 ),
44
45 TP_fast_assign(
46 __assign_str(name, lock->name);
47 ),
48
49 TP_printk("%s", __get_str(name))
50);
51
52#ifdef CONFIG_LOCK_STAT
53
54TRACE_EVENT(lock_contended,
55
56 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
57
58 TP_ARGS(lock, ip),
59
60 TP_STRUCT__entry(
61 __string(name, lock->name)
62 ),
63
64 TP_fast_assign(
65 __assign_str(name, lock->name);
66 ),
67
68 TP_printk("%s", __get_str(name))
69);
70
71TRACE_EVENT(lock_acquired,
72 TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
73
74 TP_ARGS(lock, ip, waittime),
75
76 TP_STRUCT__entry(
77 __string(name, lock->name)
78 __field(unsigned long, wait_usec)
79 __field(unsigned long, wait_nsec_rem)
80 ),
81 TP_fast_assign(
82 __assign_str(name, lock->name);
83 __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
84 __entry->wait_usec = (unsigned long) waittime;
85 ),
86 TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
87 __entry->wait_nsec_rem)
88);
89
90#endif
91#endif
92
93#endif /* _TRACE_LOCKDEP_H */
94
95/* This part must be outside protection */
96#include <trace/define_trace.h>
diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h
index 63547dc1125f..dd4033cf5b09 100644
--- a/include/trace/sched_event_types.h
+++ b/include/trace/events/sched.h
@@ -1,9 +1,8 @@
1#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_SCHED_H
1 3
2/* use <trace/sched.h> instead */ 4#include <linux/sched.h>
3#ifndef TRACE_EVENT 5#include <linux/tracepoint.h>
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7 6
8#undef TRACE_SYSTEM 7#undef TRACE_SYSTEM
9#define TRACE_SYSTEM sched 8#define TRACE_SYSTEM sched
@@ -181,9 +180,9 @@ TRACE_EVENT(sched_switch,
181 */ 180 */
182TRACE_EVENT(sched_migrate_task, 181TRACE_EVENT(sched_migrate_task,
183 182
184 TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), 183 TP_PROTO(struct task_struct *p, int dest_cpu),
185 184
186 TP_ARGS(p, orig_cpu, dest_cpu), 185 TP_ARGS(p, dest_cpu),
187 186
188 TP_STRUCT__entry( 187 TP_STRUCT__entry(
189 __array( char, comm, TASK_COMM_LEN ) 188 __array( char, comm, TASK_COMM_LEN )
@@ -197,7 +196,7 @@ TRACE_EVENT(sched_migrate_task,
197 memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 196 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
198 __entry->pid = p->pid; 197 __entry->pid = p->pid;
199 __entry->prio = p->prio; 198 __entry->prio = p->prio;
200 __entry->orig_cpu = orig_cpu; 199 __entry->orig_cpu = task_cpu(p);
201 __entry->dest_cpu = dest_cpu; 200 __entry->dest_cpu = dest_cpu;
202 ), 201 ),
203 202
@@ -334,4 +333,7 @@ TRACE_EVENT(sched_signal_send,
334 __entry->sig, __entry->comm, __entry->pid) 333 __entry->sig, __entry->comm, __entry->pid)
335); 334);
336 335
337#undef TRACE_SYSTEM 336#endif /* _TRACE_SCHED_H */
337
338/* This part must be outside protection */
339#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644
index 000000000000..1e8fabb57c06
--- /dev/null
+++ b/include/trace/events/skb.h
@@ -0,0 +1,40 @@
1#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_SKB_H
3
4#include <linux/skbuff.h>
5#include <linux/tracepoint.h>
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM skb
9
10/*
11 * Tracepoint for free an sk_buff:
12 */
13TRACE_EVENT(kfree_skb,
14
15 TP_PROTO(struct sk_buff *skb, void *location),
16
17 TP_ARGS(skb, location),
18
19 TP_STRUCT__entry(
20 __field( void *, skbaddr )
21 __field( unsigned short, protocol )
22 __field( void *, location )
23 ),
24
25 TP_fast_assign(
26 __entry->skbaddr = skb;
27 if (skb) {
28 __entry->protocol = ntohs(skb->protocol);
29 }
30 __entry->location = location;
31 ),
32
33 TP_printk("skbaddr=%p protocol=%u location=%p",
34 __entry->skbaddr, __entry->protocol, __entry->location)
35);
36
37#endif /* _TRACE_SKB_H */
38
39/* This part must be outside protection */
40#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644
index 000000000000..edb02bc9f8ff
--- /dev/null
+++ b/include/trace/ftrace.h
@@ -0,0 +1,509 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_events.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#include <linux/ftrace_event.h>
20
21#undef __array
22#define __array(type, item, len) type item[len];
23
24#undef __field
25#define __field(type, item) type item;
26
27#undef __string
28#define __string(item, src) int __str_loc_##item;
29
30#undef TP_STRUCT__entry
31#define TP_STRUCT__entry(args...) args
32
33#undef TRACE_EVENT
34#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
35 struct ftrace_raw_##name { \
36 struct trace_entry ent; \
37 tstruct \
38 char __str_data[0]; \
39 }; \
40 static struct ftrace_event_call event_##name
41
42#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
43
44
45/*
46 * Stage 2 of the trace events.
47 *
48 * Include the following:
49 *
50 * struct ftrace_str_offsets_<call> {
51 * int <str1>;
52 * int <str2>;
53 * [...]
54 * };
55 *
56 * The __string() macro will create each int <str>, this is to
57 * keep the offset of each string from the beggining of the event
58 * once we perform the strlen() of the src strings.
59 *
60 */
61
62#undef __array
63#define __array(type, item, len)
64
65#undef __field
66#define __field(type, item);
67
68#undef __string
69#define __string(item, src) int item;
70
71#undef TRACE_EVENT
72#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
73 struct ftrace_str_offsets_##call { \
74 tstruct; \
75 };
76
77#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
78
79/*
80 * Stage 3 of the trace events.
81 *
82 * Override the macros in <trace/trace_events.h> to include the following:
83 *
84 * enum print_line_t
85 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
86 * {
87 * struct trace_seq *s = &iter->seq;
88 * struct ftrace_raw_<call> *field; <-- defined in stage 1
89 * struct trace_entry *entry;
90 * int ret;
91 *
92 * entry = iter->ent;
93 *
94 * if (entry->type != event_<call>.id) {
95 * WARN_ON_ONCE(1);
96 * return TRACE_TYPE_UNHANDLED;
97 * }
98 *
99 * field = (typeof(field))entry;
100 *
101 * ret = trace_seq_printf(s, <TP_printk> "\n");
102 * if (!ret)
103 * return TRACE_TYPE_PARTIAL_LINE;
104 *
105 * return TRACE_TYPE_HANDLED;
106 * }
107 *
108 * This is the method used to print the raw event to the trace
109 * output format. Note, this is not needed if the data is read
110 * in binary.
111 */
112
113#undef __entry
114#define __entry field
115
116#undef TP_printk
117#define TP_printk(fmt, args...) fmt "\n", args
118
119#undef __get_str
120#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field)
121
122#undef TRACE_EVENT
123#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
124enum print_line_t \
125ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
126{ \
127 struct trace_seq *s = &iter->seq; \
128 struct ftrace_raw_##call *field; \
129 struct trace_entry *entry; \
130 int ret; \
131 \
132 entry = iter->ent; \
133 \
134 if (entry->type != event_##call.id) { \
135 WARN_ON_ONCE(1); \
136 return TRACE_TYPE_UNHANDLED; \
137 } \
138 \
139 field = (typeof(field))entry; \
140 \
141 ret = trace_seq_printf(s, #call ": " print); \
142 if (!ret) \
143 return TRACE_TYPE_PARTIAL_LINE; \
144 \
145 return TRACE_TYPE_HANDLED; \
146}
147
148#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
149
150/*
151 * Setup the showing format of trace point.
152 *
153 * int
154 * ftrace_format_##call(struct trace_seq *s)
155 * {
156 * struct ftrace_raw_##call field;
157 * int ret;
158 *
159 * ret = trace_seq_printf(s, #type " " #item ";"
160 * " offset:%u; size:%u;\n",
161 * offsetof(struct ftrace_raw_##call, item),
162 * sizeof(field.type));
163 *
164 * }
165 */
166
167#undef TP_STRUCT__entry
168#define TP_STRUCT__entry(args...) args
169
170#undef __field
171#define __field(type, item) \
172 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
173 "offset:%u;\tsize:%u;\n", \
174 (unsigned int)offsetof(typeof(field), item), \
175 (unsigned int)sizeof(field.item)); \
176 if (!ret) \
177 return 0;
178
179#undef __array
180#define __array(type, item, len) \
181 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
182 "offset:%u;\tsize:%u;\n", \
183 (unsigned int)offsetof(typeof(field), item), \
184 (unsigned int)sizeof(field.item)); \
185 if (!ret) \
186 return 0;
187
188#undef __string
189#define __string(item, src) \
190 ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \
191 "offset:%u;tsize:%u;\n", \
192 (unsigned int)offsetof(typeof(field), \
193 __str_loc_##item), \
194 (unsigned int)sizeof(field.__str_loc_##item)); \
195 if (!ret) \
196 return 0;
197
198#undef __entry
199#define __entry REC
200
201#undef TP_printk
202#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
203
204#undef TP_fast_assign
205#define TP_fast_assign(args...) args
206
207#undef TRACE_EVENT
208#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
209static int \
210ftrace_format_##call(struct trace_seq *s) \
211{ \
212 struct ftrace_raw_##call field __attribute__((unused)); \
213 int ret = 0; \
214 \
215 tstruct; \
216 \
217 trace_seq_printf(s, "\nprint fmt: " print); \
218 \
219 return ret; \
220}
221
222#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
223
224#undef __field
225#define __field(type, item) \
226 ret = trace_define_field(event_call, #type, #item, \
227 offsetof(typeof(field), item), \
228 sizeof(field.item), is_signed_type(type)); \
229 if (ret) \
230 return ret;
231
232#undef __array
233#define __array(type, item, len) \
234 BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \
235 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
236 offsetof(typeof(field), item), \
237 sizeof(field.item), 0); \
238 if (ret) \
239 return ret;
240
241#undef __string
242#define __string(item, src) \
243 ret = trace_define_field(event_call, "__str_loc", #item, \
244 offsetof(typeof(field), __str_loc_##item), \
245 sizeof(field.__str_loc_##item), 0);
246
247#undef TRACE_EVENT
248#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
249int \
250ftrace_define_fields_##call(void) \
251{ \
252 struct ftrace_raw_##call field; \
253 struct ftrace_event_call *event_call = &event_##call; \
254 int ret; \
255 \
256 __common_field(int, type, 1); \
257 __common_field(unsigned char, flags, 0); \
258 __common_field(unsigned char, preempt_count, 0); \
259 __common_field(int, pid, 1); \
260 __common_field(int, tgid, 1); \
261 \
262 tstruct; \
263 \
264 return ret; \
265}
266
267#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
268
269/*
270 * Stage 4 of the trace events.
271 *
272 * Override the macros in <trace/trace_events.h> to include the following:
273 *
274 * static void ftrace_event_<call>(proto)
275 * {
276 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
277 * }
278 *
279 * static int ftrace_reg_event_<call>(void)
280 * {
281 * int ret;
282 *
283 * ret = register_trace_<call>(ftrace_event_<call>);
284 * if (!ret)
285 * pr_info("event trace: Could not activate trace point "
286 * "probe to <call>");
287 * return ret;
288 * }
289 *
290 * static void ftrace_unreg_event_<call>(void)
291 * {
292 * unregister_trace_<call>(ftrace_event_<call>);
293 * }
294 *
295 *
296 * For those macros defined with TRACE_EVENT:
297 *
298 * static struct ftrace_event_call event_<call>;
299 *
300 * static void ftrace_raw_event_<call>(proto)
301 * {
302 * struct ring_buffer_event *event;
303 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
304 * unsigned long irq_flags;
305 * int pc;
306 *
307 * local_save_flags(irq_flags);
308 * pc = preempt_count();
309 *
310 * event = trace_current_buffer_lock_reserve(event_<call>.id,
311 * sizeof(struct ftrace_raw_<call>),
312 * irq_flags, pc);
313 * if (!event)
314 * return;
315 * entry = ring_buffer_event_data(event);
316 *
317 * <assign>; <-- Here we assign the entries by the __field and
318 * __array macros.
319 *
320 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
321 * }
322 *
323 * static int ftrace_raw_reg_event_<call>(void)
324 * {
325 * int ret;
326 *
327 * ret = register_trace_<call>(ftrace_raw_event_<call>);
328 * if (!ret)
329 * pr_info("event trace: Could not activate trace point "
330 * "probe to <call>");
331 * return ret;
332 * }
333 *
334 * static void ftrace_unreg_event_<call>(void)
335 * {
336 * unregister_trace_<call>(ftrace_raw_event_<call>);
337 * }
338 *
339 * static struct trace_event ftrace_event_type_<call> = {
340 * .trace = ftrace_raw_output_<call>, <-- stage 2
341 * };
342 *
343 * static int ftrace_raw_init_event_<call>(void)
344 * {
345 * int id;
346 *
347 * id = register_ftrace_event(&ftrace_event_type_<call>);
348 * if (!id)
349 * return -ENODEV;
350 * event_<call>.id = id;
351 * return 0;
352 * }
353 *
354 * static struct ftrace_event_call __used
355 * __attribute__((__aligned__(4)))
356 * __attribute__((section("_ftrace_events"))) event_<call> = {
357 * .name = "<call>",
358 * .system = "<system>",
359 * .raw_init = ftrace_raw_init_event_<call>,
360 * .regfunc = ftrace_reg_event_<call>,
361 * .unregfunc = ftrace_unreg_event_<call>,
362 * .show_format = ftrace_format_<call>,
363 * }
364 *
365 */
366
367#undef TP_FMT
368#define TP_FMT(fmt, args...) fmt "\n", ##args
369
370#ifdef CONFIG_EVENT_PROFILE
371#define _TRACE_PROFILE(call, proto, args) \
372static void ftrace_profile_##call(proto) \
373{ \
374 extern void perf_tpcounter_event(int); \
375 perf_tpcounter_event(event_##call.id); \
376} \
377 \
378static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
379{ \
380 int ret = 0; \
381 \
382 if (!atomic_inc_return(&call->profile_count)) \
383 ret = register_trace_##call(ftrace_profile_##call); \
384 \
385 return ret; \
386} \
387 \
388static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
389{ \
390 if (atomic_add_negative(-1, &call->profile_count)) \
391 unregister_trace_##call(ftrace_profile_##call); \
392}
393
394#define _TRACE_PROFILE_INIT(call) \
395 .profile_count = ATOMIC_INIT(-1), \
396 .profile_enable = ftrace_profile_enable_##call, \
397 .profile_disable = ftrace_profile_disable_##call,
398
399#else
400#define _TRACE_PROFILE(call, proto, args)
401#define _TRACE_PROFILE_INIT(call)
402#endif
403
404#undef __entry
405#define __entry entry
406
407#undef __field
408#define __field(type, item)
409
410#undef __array
411#define __array(type, item, len)
412
413#undef __string
414#define __string(item, src) \
415 __str_offsets.item = __str_size + \
416 offsetof(typeof(*entry), __str_data); \
417 __str_size += strlen(src) + 1;
418
419#undef __assign_str
420#define __assign_str(dst, src) \
421 __entry->__str_loc_##dst = __str_offsets.dst; \
422 strcpy(__get_str(dst), src);
423
424#undef TRACE_EVENT
425#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
426_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
427 \
428static struct ftrace_event_call event_##call; \
429 \
430static void ftrace_raw_event_##call(proto) \
431{ \
432 struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \
433 struct ftrace_event_call *call = &event_##call; \
434 struct ring_buffer_event *event; \
435 struct ftrace_raw_##call *entry; \
436 unsigned long irq_flags; \
437 int __str_size = 0; \
438 int pc; \
439 \
440 local_save_flags(irq_flags); \
441 pc = preempt_count(); \
442 \
443 tstruct; \
444 \
445 event = trace_current_buffer_lock_reserve(event_##call.id, \
446 sizeof(struct ftrace_raw_##call) + __str_size,\
447 irq_flags, pc); \
448 if (!event) \
449 return; \
450 entry = ring_buffer_event_data(event); \
451 \
452 assign; \
453 \
454 if (!filter_current_check_discard(call, entry, event)) \
455 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
456} \
457 \
458static int ftrace_raw_reg_event_##call(void) \
459{ \
460 int ret; \
461 \
462 ret = register_trace_##call(ftrace_raw_event_##call); \
463 if (ret) \
464 pr_info("event trace: Could not activate trace point " \
465 "probe to " #call "\n"); \
466 return ret; \
467} \
468 \
469static void ftrace_raw_unreg_event_##call(void) \
470{ \
471 unregister_trace_##call(ftrace_raw_event_##call); \
472} \
473 \
474static struct trace_event ftrace_event_type_##call = { \
475 .trace = ftrace_raw_output_##call, \
476}; \
477 \
478static int ftrace_raw_init_event_##call(void) \
479{ \
480 int id; \
481 \
482 id = register_ftrace_event(&ftrace_event_type_##call); \
483 if (!id) \
484 return -ENODEV; \
485 event_##call.id = id; \
486 INIT_LIST_HEAD(&event_##call.fields); \
487 init_preds(&event_##call); \
488 return 0; \
489} \
490 \
491static struct ftrace_event_call __used \
492__attribute__((__aligned__(4))) \
493__attribute__((section("_ftrace_events"))) event_##call = { \
494 .name = #call, \
495 .system = __stringify(TRACE_SYSTEM), \
496 .event = &ftrace_event_type_##call, \
497 .raw_init = ftrace_raw_init_event_##call, \
498 .regfunc = ftrace_raw_reg_event_##call, \
499 .unregfunc = ftrace_raw_unreg_event_##call, \
500 .show_format = ftrace_format_##call, \
501 .define_fields = ftrace_define_fields_##call, \
502 _TRACE_PROFILE_INIT(call) \
503}
504
505#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
506
507#undef _TRACE_PROFILE
508#undef _TRACE_PROFILE_INIT
509
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644
index ff5d4495dc37..000000000000
--- a/include/trace/irq.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_IRQ_H
2#define _TRACE_IRQ_H
3
4#include <linux/interrupt.h>
5#include <linux/tracepoint.h>
6
7#include <trace/irq_event_types.h>
8
9#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644
index 85964ebd47ec..000000000000
--- a/include/trace/irq_event_types.h
+++ /dev/null
@@ -1,55 +0,0 @@
1
2/* use <trace/irq.h> instead */
3#ifndef TRACE_FORMAT
4# error Do not include this file directly.
5# error Unless you know what you are doing.
6#endif
7
8#undef TRACE_SYSTEM
9#define TRACE_SYSTEM irq
10
11/*
12 * Tracepoint for entry of interrupt handler:
13 */
14TRACE_FORMAT(irq_handler_entry,
15 TP_PROTO(int irq, struct irqaction *action),
16 TP_ARGS(irq, action),
17 TP_FMT("irq=%d handler=%s", irq, action->name)
18 );
19
20/*
21 * Tracepoint for return of an interrupt handler:
22 */
23TRACE_EVENT(irq_handler_exit,
24
25 TP_PROTO(int irq, struct irqaction *action, int ret),
26
27 TP_ARGS(irq, action, ret),
28
29 TP_STRUCT__entry(
30 __field( int, irq )
31 __field( int, ret )
32 ),
33
34 TP_fast_assign(
35 __entry->irq = irq;
36 __entry->ret = ret;
37 ),
38
39 TP_printk("irq=%d return=%s",
40 __entry->irq, __entry->ret ? "handled" : "unhandled")
41);
42
43TRACE_FORMAT(softirq_entry,
44 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
45 TP_ARGS(h, vec),
46 TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
47 );
48
49TRACE_FORMAT(softirq_exit,
50 TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
51 TP_ARGS(h, vec),
52 TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
53 );
54
55#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644
index 28ee69f9cd46..000000000000
--- a/include/trace/kmemtrace.h
+++ /dev/null
@@ -1,63 +0,0 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <linux/tracepoint.h>
13#include <linux/types.h>
14
15#ifdef CONFIG_KMEMTRACE
16extern void kmemtrace_init(void);
17#else
18static inline void kmemtrace_init(void)
19{
20}
21#endif
22
23DECLARE_TRACE(kmalloc,
24 TP_PROTO(unsigned long call_site,
25 const void *ptr,
26 size_t bytes_req,
27 size_t bytes_alloc,
28 gfp_t gfp_flags),
29 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
30DECLARE_TRACE(kmem_cache_alloc,
31 TP_PROTO(unsigned long call_site,
32 const void *ptr,
33 size_t bytes_req,
34 size_t bytes_alloc,
35 gfp_t gfp_flags),
36 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
37DECLARE_TRACE(kmalloc_node,
38 TP_PROTO(unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node),
44 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
45DECLARE_TRACE(kmem_cache_alloc_node,
46 TP_PROTO(unsigned long call_site,
47 const void *ptr,
48 size_t bytes_req,
49 size_t bytes_alloc,
50 gfp_t gfp_flags,
51 int node),
52 TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
53DECLARE_TRACE(kfree,
54 TP_PROTO(unsigned long call_site, const void *ptr),
55 TP_ARGS(call_site, ptr));
56DECLARE_TRACE(kmem_cache_free,
57 TP_PROTO(unsigned long call_site, const void *ptr),
58 TP_ARGS(call_site, ptr));
59
60#endif /* __KERNEL__ */
61
62#endif /* _LINUX_KMEMTRACE_H */
63
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644
index 5ca67df87f2a..000000000000
--- a/include/trace/lockdep.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_LOCKDEP_H
2#define _TRACE_LOCKDEP_H
3
4#include <linux/lockdep.h>
5#include <linux/tracepoint.h>
6
7#include <trace/lockdep_event_types.h>
8
9#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644
index adccfcd2ec8f..000000000000
--- a/include/trace/lockdep_event_types.h
+++ /dev/null
@@ -1,44 +0,0 @@
1
2#ifndef TRACE_FORMAT
3# error Do not include this file directly.
4# error Unless you know what you are doing.
5#endif
6
7#undef TRACE_SYSTEM
8#define TRACE_SYSTEM lock
9
10#ifdef CONFIG_LOCKDEP
11
12TRACE_FORMAT(lock_acquire,
13 TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
14 int trylock, int read, int check,
15 struct lockdep_map *next_lock, unsigned long ip),
16 TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
17 TP_FMT("%s%s%s", trylock ? "try " : "",
18 read ? "read " : "", lock->name)
19 );
20
21TRACE_FORMAT(lock_release,
22 TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
23 TP_ARGS(lock, nested, ip),
24 TP_FMT("%s", lock->name)
25 );
26
27#ifdef CONFIG_LOCK_STAT
28
29TRACE_FORMAT(lock_contended,
30 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
31 TP_ARGS(lock, ip),
32 TP_FMT("%s", lock->name)
33 );
34
35TRACE_FORMAT(lock_acquired,
36 TP_PROTO(struct lockdep_map *lock, unsigned long ip),
37 TP_ARGS(lock, ip),
38 TP_FMT("%s", lock->name)
39 );
40
41#endif
42#endif
43
44#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644
index 4e372a1a29bf..000000000000
--- a/include/trace/sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
1#ifndef _TRACE_SCHED_H
2#define _TRACE_SCHED_H
3
4#include <linux/sched.h>
5#include <linux/tracepoint.h>
6
7#include <trace/sched_event_types.h>
8
9#endif
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644
index b66206d9be72..000000000000
--- a/include/trace/skb.h
+++ /dev/null
@@ -1,11 +0,0 @@
1#ifndef _TRACE_SKB_H_
2#define _TRACE_SKB_H_
3
4#include <linux/skbuff.h>
5#include <linux/tracepoint.h>
6
7DECLARE_TRACE(kfree_skb,
8 TP_PROTO(struct sk_buff *skb, void *location),
9 TP_ARGS(skb, location));
10
11#endif
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644
index df56f5694be6..000000000000
--- a/include/trace/trace_event_types.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/* trace/<type>_event_types.h here */
2
3#include <trace/sched_event_types.h>
4#include <trace/irq_event_types.h>
5#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644
index fd13750ca4ba..000000000000
--- a/include/trace/trace_events.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/* trace/<type>.h here */
2
3#include <trace/sched.h>
4#include <trace/irq.h>
5#include <trace/lockdep.h>
diff --git a/init/main.c b/init/main.c
index 3bbf93be744c..7c6a652d3d78 100644
--- a/init/main.c
+++ b/init/main.c
@@ -64,6 +64,7 @@
64#include <linux/idr.h> 64#include <linux/idr.h>
65#include <linux/ftrace.h> 65#include <linux/ftrace.h>
66#include <linux/async.h> 66#include <linux/async.h>
67#include <linux/kmemtrace.h>
67#include <trace/boot.h> 68#include <trace/boot.h>
68 69
69#include <asm/io.h> 70#include <asm/io.h>
@@ -71,7 +72,6 @@
71#include <asm/setup.h> 72#include <asm/setup.h>
72#include <asm/sections.h> 73#include <asm/sections.h>
73#include <asm/cacheflush.h> 74#include <asm/cacheflush.h>
74#include <trace/kmemtrace.h>
75 75
76#ifdef CONFIG_X86_LOCAL_APIC 76#ifdef CONFIG_X86_LOCAL_APIC
77#include <asm/smp.h> 77#include <asm/smp.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index abf9cf3b95c6..cab535c427b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -48,7 +48,7 @@
48#include <linux/tracehook.h> 48#include <linux/tracehook.h>
49#include <linux/fs_struct.h> 49#include <linux/fs_struct.h>
50#include <linux/init_task.h> 50#include <linux/init_task.h>
51#include <trace/sched.h> 51#include <trace/events/sched.h>
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/unistd.h> 54#include <asm/unistd.h>
@@ -56,10 +56,6 @@
56#include <asm/mmu_context.h> 56#include <asm/mmu_context.h>
57#include "cred-internals.h" 57#include "cred-internals.h"
58 58
59DEFINE_TRACE(sched_process_free);
60DEFINE_TRACE(sched_process_exit);
61DEFINE_TRACE(sched_process_wait);
62
63static void exit_mm(struct task_struct * tsk); 59static void exit_mm(struct task_struct * tsk);
64 60
65static void __unhash_process(struct task_struct *p) 61static void __unhash_process(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd00726..085f73ebcea6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,6 @@
61#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
62#include <linux/blkdev.h> 62#include <linux/blkdev.h>
63#include <linux/fs_struct.h> 63#include <linux/fs_struct.h>
64#include <trace/sched.h>
65#include <linux/magic.h> 64#include <linux/magic.h>
66 65
67#include <asm/pgtable.h> 66#include <asm/pgtable.h>
@@ -71,6 +70,8 @@
71#include <asm/cacheflush.h> 70#include <asm/cacheflush.h>
72#include <asm/tlbflush.h> 71#include <asm/tlbflush.h>
73 72
73#include <trace/events/sched.h>
74
74/* 75/*
75 * Protected counters by write_lock_irq(&tasklist_lock) 76 * Protected counters by write_lock_irq(&tasklist_lock)
76 */ 77 */
@@ -83,8 +84,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
83 84
84__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 85__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
85 86
86DEFINE_TRACE(sched_process_fork);
87
88int nr_processes(void) 87int nr_processes(void)
89{ 88{
90 int cpu; 89 int cpu;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 26e08754744f..5dd2572993cf 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,8 +17,8 @@
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/rculist.h> 18#include <linux/rculist.h>
19#include <linux/hash.h> 19#include <linux/hash.h>
20#include <trace/irq.h>
21#include <linux/bootmem.h> 20#include <linux/bootmem.h>
21#include <trace/events/irq.h>
22 22
23#include "internals.h" 23#include "internals.h"
24 24
@@ -348,9 +348,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
348 "but no thread function available.", irq, action->name); 348 "but no thread function available.", irq, action->name);
349} 349}
350 350
351DEFINE_TRACE(irq_handler_entry);
352DEFINE_TRACE(irq_handler_exit);
353
354/** 351/**
355 * handle_IRQ_event - irq action chain handler 352 * handle_IRQ_event - irq action chain handler
356 * @irq: the interrupt number 353 * @irq: the interrupt number
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4ebaf8519abf..41c88fe40500 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,7 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/mutex.h> 15#include <linux/mutex.h>
16#include <trace/sched.h> 16#include <trace/events/sched.h>
17 17
18#define KTHREAD_NICE_LEVEL (-5) 18#define KTHREAD_NICE_LEVEL (-5)
19 19
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
27struct kthread_create_info 24struct kthread_create_info
28{ 25{
29 /* Information passed to kthread() from kthreadd. */ 26 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index accb40cdb12a..8bbeef996c76 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -42,12 +42,14 @@
42#include <linux/hash.h> 42#include <linux/hash.h>
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h> 44#include <linux/stringify.h>
45#include <trace/lockdep.h>
46 45
47#include <asm/sections.h> 46#include <asm/sections.h>
48 47
49#include "lockdep_internals.h" 48#include "lockdep_internals.h"
50 49
50#define CREATE_TRACE_POINTS
51#include <trace/events/lockdep.h>
52
51#ifdef CONFIG_PROVE_LOCKING 53#ifdef CONFIG_PROVE_LOCKING
52int prove_locking = 1; 54int prove_locking = 1;
53module_param(prove_locking, int, 0644); 55module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
2935} 2937}
2936EXPORT_SYMBOL_GPL(lock_set_class); 2938EXPORT_SYMBOL_GPL(lock_set_class);
2937 2939
2938DEFINE_TRACE(lock_acquire);
2939
2940/* 2940/*
2941 * We are not always called with irqs disabled - do that here, 2941 * We are not always called with irqs disabled - do that here,
2942 * and also avoid lockdep recursion: 2942 * and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2963} 2963}
2964EXPORT_SYMBOL_GPL(lock_acquire); 2964EXPORT_SYMBOL_GPL(lock_acquire);
2965 2965
2966DEFINE_TRACE(lock_release);
2967
2968void lock_release(struct lockdep_map *lock, int nested, 2966void lock_release(struct lockdep_map *lock, int nested,
2969 unsigned long ip) 2967 unsigned long ip)
2970{ 2968{
@@ -3105,6 +3103,8 @@ found_it:
3105 hlock->holdtime_stamp = now; 3103 hlock->holdtime_stamp = now;
3106 } 3104 }
3107 3105
3106 trace_lock_acquired(lock, ip, waittime);
3107
3108 stats = get_lock_stats(hlock_class(hlock)); 3108 stats = get_lock_stats(hlock_class(hlock));
3109 if (waittime) { 3109 if (waittime) {
3110 if (hlock->read) 3110 if (hlock->read)
@@ -3120,8 +3120,6 @@ found_it:
3120 lock->ip = ip; 3120 lock->ip = ip;
3121} 3121}
3122 3122
3123DEFINE_TRACE(lock_contended);
3124
3125void lock_contended(struct lockdep_map *lock, unsigned long ip) 3123void lock_contended(struct lockdep_map *lock, unsigned long ip)
3126{ 3124{
3127 unsigned long flags; 3125 unsigned long flags;
@@ -3143,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3143} 3141}
3144EXPORT_SYMBOL_GPL(lock_contended); 3142EXPORT_SYMBOL_GPL(lock_contended);
3145 3143
3146DEFINE_TRACE(lock_acquired);
3147
3148void lock_acquired(struct lockdep_map *lock, unsigned long ip) 3144void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3149{ 3145{
3150 unsigned long flags; 3146 unsigned long flags;
3151 3147
3152 trace_lock_acquired(lock, ip);
3153
3154 if (unlikely(!lock_stat)) 3148 if (unlikely(!lock_stat))
3155 return; 3149 return;
3156 3150
diff --git a/kernel/module.c b/kernel/module.c
index e797812a4d95..2383e60fcf3f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,6 +18,7 @@
18*/ 18*/
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/ftrace_event.h>
21#include <linux/init.h> 22#include <linux/init.h>
22#include <linux/kallsyms.h> 23#include <linux/kallsyms.h>
23#include <linux/fs.h> 24#include <linux/fs.h>
@@ -1489,9 +1490,6 @@ static void free_module(struct module *mod)
1489 /* Free any allocated parameters. */ 1490 /* Free any allocated parameters. */
1490 destroy_params(mod->kp, mod->num_kp); 1491 destroy_params(mod->kp, mod->num_kp);
1491 1492
1492 /* release any pointers to mcount in this module */
1493 ftrace_release(mod->module_core, mod->core_size);
1494
1495 /* This may be NULL, but that's OK */ 1493 /* This may be NULL, but that's OK */
1496 module_free(mod, mod->module_init); 1494 module_free(mod, mod->module_init);
1497 kfree(mod->args); 1495 kfree(mod->args);
@@ -1892,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
1892 unsigned int symindex = 0; 1890 unsigned int symindex = 0;
1893 unsigned int strindex = 0; 1891 unsigned int strindex = 0;
1894 unsigned int modindex, versindex, infoindex, pcpuindex; 1892 unsigned int modindex, versindex, infoindex, pcpuindex;
1895 unsigned int num_mcount;
1896 struct module *mod; 1893 struct module *mod;
1897 long err = 0; 1894 long err = 0;
1898 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 1895 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
1899 unsigned long *mseg;
1900 mm_segment_t old_fs; 1896 mm_segment_t old_fs;
1901 1897
1902 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", 1898 DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2172,7 +2168,19 @@ static noinline struct module *load_module(void __user *umod,
2172 sizeof(*mod->tracepoints), 2168 sizeof(*mod->tracepoints),
2173 &mod->num_tracepoints); 2169 &mod->num_tracepoints);
2174#endif 2170#endif
2175 2171#ifdef CONFIG_EVENT_TRACING
2172 mod->trace_events = section_objs(hdr, sechdrs, secstrings,
2173 "_ftrace_events",
2174 sizeof(*mod->trace_events),
2175 &mod->num_trace_events);
2176#endif
2177#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2178 /* sechdrs[0].sh_size is always zero */
2179 mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
2180 "__mcount_loc",
2181 sizeof(*mod->ftrace_callsites),
2182 &mod->num_ftrace_callsites);
2183#endif
2176#ifdef CONFIG_MODVERSIONS 2184#ifdef CONFIG_MODVERSIONS
2177 if ((mod->num_syms && !mod->crcs) 2185 if ((mod->num_syms && !mod->crcs)
2178 || (mod->num_gpl_syms && !mod->gpl_crcs) 2186 || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2237,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
2237 dynamic_debug_setup(debug, num_debug); 2245 dynamic_debug_setup(debug, num_debug);
2238 } 2246 }
2239 2247
2240 /* sechdrs[0].sh_size is always zero */
2241 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2242 sizeof(*mseg), &num_mcount);
2243 ftrace_init_module(mod, mseg, mseg + num_mcount);
2244
2245 err = module_finalize(hdr, sechdrs, mod); 2248 err = module_finalize(hdr, sechdrs, mod);
2246 if (err < 0) 2249 if (err < 0)
2247 goto cleanup; 2250 goto cleanup;
@@ -2302,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
2302 cleanup: 2305 cleanup:
2303 kobject_del(&mod->mkobj.kobj); 2306 kobject_del(&mod->mkobj.kobj);
2304 kobject_put(&mod->mkobj.kobj); 2307 kobject_put(&mod->mkobj.kobj);
2305 ftrace_release(mod->module_core, mod->core_size);
2306 free_unload: 2308 free_unload:
2307 module_unload_free(mod); 2309 module_unload_free(mod);
2308#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) 2310#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
diff --git a/kernel/sched.c b/kernel/sched.c
index 26efa475bdc1..14a19b17674e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,13 +72,15 @@
72#include <linux/debugfs.h> 72#include <linux/debugfs.h>
73#include <linux/ctype.h> 73#include <linux/ctype.h>
74#include <linux/ftrace.h> 74#include <linux/ftrace.h>
75#include <trace/sched.h>
76 75
77#include <asm/tlb.h> 76#include <asm/tlb.h>
78#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
79 78
80#include "sched_cpupri.h" 79#include "sched_cpupri.h"
81 80
81#define CREATE_TRACE_POINTS
82#include <trace/events/sched.h>
83
82/* 84/*
83 * Convert user-nice values [ -20 ... 0 ... 19 ] 85 * Convert user-nice values [ -20 ... 0 ... 19 ]
84 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], 86 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -118,12 +120,6 @@
118 */ 120 */
119#define RUNTIME_INF ((u64)~0ULL) 121#define RUNTIME_INF ((u64)~0ULL)
120 122
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
127#ifdef CONFIG_SMP 123#ifdef CONFIG_SMP
128 124
129static void double_rq_lock(struct rq *rq1, struct rq *rq2); 125static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -1958,7 +1954,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1958 1954
1959 clock_offset = old_rq->clock - new_rq->clock; 1955 clock_offset = old_rq->clock - new_rq->clock;
1960 1956
1961 trace_sched_migrate_task(p, task_cpu(p), new_cpu); 1957 trace_sched_migrate_task(p, new_cpu);
1962 1958
1963#ifdef CONFIG_SCHEDSTATS 1959#ifdef CONFIG_SCHEDSTATS
1964 if (p->se.wait_start) 1960 if (p->se.wait_start)
diff --git a/kernel/signal.c b/kernel/signal.c
index d8034737db4c..94ec0a4dde0f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,7 +27,7 @@
27#include <linux/freezer.h> 27#include <linux/freezer.h>
28#include <linux/pid_namespace.h> 28#include <linux/pid_namespace.h>
29#include <linux/nsproxy.h> 29#include <linux/nsproxy.h>
30#include <trace/sched.h> 30#include <trace/events/sched.h>
31 31
32#include <asm/param.h> 32#include <asm/param.h>
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
@@ -41,8 +41,6 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
46static void __user *sig_handler(struct task_struct *t, int sig) 44static void __user *sig_handler(struct task_struct *t, int sig)
47{ 45{
48 return t->sighand->action[sig - 1].sa.sa_handler; 46 return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd348511..dc4d0cfdcb2d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -24,7 +24,9 @@
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/smp.h> 25#include <linux/smp.h>
26#include <linux/tick.h> 26#include <linux/tick.h>
27#include <trace/irq.h> 27
28#define CREATE_TRACE_POINTS
29#include <trace/events/irq.h>
28 30
29#include <asm/irq.h> 31#include <asm/irq.h>
30/* 32/*
@@ -186,9 +188,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
186 */ 188 */
187#define MAX_SOFTIRQ_RESTART 10 189#define MAX_SOFTIRQ_RESTART 10
188 190
189DEFINE_TRACE(softirq_entry);
190DEFINE_TRACE(softirq_exit);
191
192asmlinkage void __do_softirq(void) 191asmlinkage void __do_softirq(void)
193{ 192{
194 struct softirq_action *h; 193 struct softirq_action *h;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 417d1985e299..50f62a296e1d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -48,6 +48,9 @@ config FTRACE_NMI_ENTER
48 depends on HAVE_FTRACE_NMI_ENTER 48 depends on HAVE_FTRACE_NMI_ENTER
49 default y 49 default y
50 50
51config EVENT_TRACING
52 bool
53
51config TRACING 54config TRACING
52 bool 55 bool
53 select DEBUG_FS 56 select DEBUG_FS
@@ -56,6 +59,7 @@ config TRACING
56 select TRACEPOINTS 59 select TRACEPOINTS
57 select NOP_TRACER 60 select NOP_TRACER
58 select BINARY_PRINTF 61 select BINARY_PRINTF
62 select EVENT_TRACING
59 63
60# 64#
61# Minimum requirements an architecture has to meet for us to 65# Minimum requirements an architecture has to meet for us to
@@ -73,7 +77,12 @@ config TRACING_SUPPORT
73 77
74if TRACING_SUPPORT 78if TRACING_SUPPORT
75 79
76menu "Tracers" 80menuconfig FTRACE
81 bool "Tracers"
82 help
83 Enable the kernel tracing infrastructure.
84
85if FTRACE
77 86
78config FUNCTION_TRACER 87config FUNCTION_TRACER
79 bool "Kernel Function Tracer" 88 bool "Kernel Function Tracer"
@@ -104,6 +113,7 @@ config FUNCTION_GRAPH_TRACER
104 the return value. This is done by setting the current return 113 the return value. This is done by setting the current return
105 address on the current task structure into a stack of calls. 114 address on the current task structure into a stack of calls.
106 115
116
107config IRQSOFF_TRACER 117config IRQSOFF_TRACER
108 bool "Interrupts-off Latency Tracer" 118 bool "Interrupts-off Latency Tracer"
109 default n 119 default n
@@ -173,7 +183,7 @@ config CONTEXT_SWITCH_TRACER
173 This tracer gets called from the context switch and records 183 This tracer gets called from the context switch and records
174 all switching of tasks. 184 all switching of tasks.
175 185
176config EVENT_TRACER 186config ENABLE_EVENT_TRACING
177 bool "Trace various events in the kernel" 187 bool "Trace various events in the kernel"
178 select TRACING 188 select TRACING
179 help 189 help
@@ -181,6 +191,10 @@ config EVENT_TRACER
181 allowing the user to pick and choose which trace point they 191 allowing the user to pick and choose which trace point they
182 want to trace. 192 want to trace.
183 193
194 Note, all tracers enable event tracing. This option is
195 only a convenience to enable event tracing when no other
196 tracers are selected.
197
184config FTRACE_SYSCALLS 198config FTRACE_SYSCALLS
185 bool "Trace syscalls" 199 bool "Trace syscalls"
186 depends on HAVE_FTRACE_SYSCALLS 200 depends on HAVE_FTRACE_SYSCALLS
@@ -207,8 +221,36 @@ config BOOT_TRACER
207 to enable this on bootup. 221 to enable this on bootup.
208 222
209config TRACE_BRANCH_PROFILING 223config TRACE_BRANCH_PROFILING
210 bool "Trace likely/unlikely profiler" 224 bool
211 select TRACING 225 select TRACING
226
227choice
228 prompt "Branch Profiling"
229 default BRANCH_PROFILE_NONE
230 help
231 The branch profiling is a software profiler. It will add hooks
232 into the C conditionals to test which path a branch takes.
233
234 The likely/unlikely profiler only looks at the conditions that
235 are annotated with a likely or unlikely macro.
236
237 The "all branch" profiler will profile every if statement in the
238 kernel. This profiler will also enable the likely/unlikely
239 profiler as well.
240
241 Either of the above profilers add a bit of overhead to the system.
242 If unsure choose "No branch profiling".
243
244config BRANCH_PROFILE_NONE
245 bool "No branch profiling"
246 help
247 No branch profiling. Branch profiling adds a bit of overhead.
248 Only enable it if you want to analyse the branching behavior.
249 Otherwise keep it disabled.
250
251config PROFILE_ANNOTATED_BRANCHES
252 bool "Trace likely/unlikely profiler"
253 select TRACE_BRANCH_PROFILING
212 help 254 help
213 This tracer profiles all the the likely and unlikely macros 255 This tracer profiles all the the likely and unlikely macros
214 in the kernel. It will display the results in: 256 in the kernel. It will display the results in:
@@ -218,11 +260,9 @@ config TRACE_BRANCH_PROFILING
218 Note: this will add a significant overhead, only turn this 260 Note: this will add a significant overhead, only turn this
219 on if you need to profile the system's use of these macros. 261 on if you need to profile the system's use of these macros.
220 262
221 Say N if unsure.
222
223config PROFILE_ALL_BRANCHES 263config PROFILE_ALL_BRANCHES
224 bool "Profile all if conditionals" 264 bool "Profile all if conditionals"
225 depends on TRACE_BRANCH_PROFILING 265 select TRACE_BRANCH_PROFILING
226 help 266 help
227 This tracer profiles all branch conditions. Every if () 267 This tracer profiles all branch conditions. Every if ()
228 taken in the kernel is recorded whether it hit or miss. 268 taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +270,12 @@ config PROFILE_ALL_BRANCHES
230 270
231 /debugfs/tracing/profile_branch 271 /debugfs/tracing/profile_branch
232 272
273 This option also enables the likely/unlikely profiler.
274
233 This configuration, when enabled, will impose a great overhead 275 This configuration, when enabled, will impose a great overhead
234 on the system. This should only be enabled when the system 276 on the system. This should only be enabled when the system
235 is to be analyzed 277 is to be analyzed
236 278endchoice
237 Say N if unsure.
238 279
239config TRACING_BRANCHES 280config TRACING_BRANCHES
240 bool 281 bool
@@ -375,6 +416,20 @@ config DYNAMIC_FTRACE
375 were made. If so, it runs stop_machine (stops all CPUS) 416 were made. If so, it runs stop_machine (stops all CPUS)
376 and modifies the code to jump over the call to ftrace. 417 and modifies the code to jump over the call to ftrace.
377 418
419config FUNCTION_PROFILER
420 bool "Kernel function profiler"
421 depends on FUNCTION_TRACER
422 default n
423 help
424 This option enables the kernel function profiler. A file is created
425 in debugfs called function_profile_enabled which defaults to zero.
426 When a 1 is echoed into this file profiling begins, and when a
427 zero is entered, profiling stops. A file in the trace_stats
428 directory called functions, that show the list of functions that
429 have been hit and their counters.
430
431 If in doubt, say N
432
378config FTRACE_MCOUNT_RECORD 433config FTRACE_MCOUNT_RECORD
379 def_bool y 434 def_bool y
380 depends on DYNAMIC_FTRACE 435 depends on DYNAMIC_FTRACE
@@ -416,7 +471,23 @@ config MMIOTRACE_TEST
416 471
417 Say N, unless you absolutely know what you are doing. 472 Say N, unless you absolutely know what you are doing.
418 473
419endmenu 474config RING_BUFFER_BENCHMARK
475 tristate "Ring buffer benchmark stress tester"
476 depends on RING_BUFFER
477 help
478 This option creates a test to stress the ring buffer and bench mark it.
479 It creates its own ring buffer such that it will not interfer with
480 any other users of the ring buffer (such as ftrace). It then creates
481 a producer and consumer that will run for 10 seconds and sleep for
482 10 seconds. Each interval it will print out the number of events
483 it recorded and give a rough estimate of how long each iteration took.
484
485 It does not disable interrupts or raise its priority, so it may be
486 affected by processes that are running.
487
488 If unsure, say N
489
490endif # FTRACE
420 491
421endif # TRACING_SUPPORT 492endif # TRACING_SUPPORT
422 493
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 2630f5121ec1..7c34cbfff96e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -17,6 +17,7 @@ endif
17 17
18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
20obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
20 21
21obj-$(CONFIG_TRACING) += trace.o 22obj-$(CONFIG_TRACING) += trace.o
22obj-$(CONFIG_TRACING) += trace_clock.o 23obj-$(CONFIG_TRACING) += trace_clock.o
@@ -40,11 +41,10 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
40obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 41obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
41obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 42obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
42obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 43obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
43obj-$(CONFIG_EVENT_TRACER) += trace_events.o 44obj-$(CONFIG_EVENT_TRACING) += trace_events.o
44obj-$(CONFIG_EVENT_TRACER) += events.o 45obj-$(CONFIG_EVENT_TRACING) += trace_export.o
45obj-$(CONFIG_EVENT_TRACER) += trace_export.o
46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 46obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o 47obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
48obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o 48obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
49 49
50libftrace-y := ftrace.o 50libftrace-y := ftrace.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 921ef5d1f0ba..e099f8cc1d1c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
147{ 147{
148 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) 148 if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
149 return 1; 149 return 1;
150 if (sector < bt->start_lba || sector > bt->end_lba) 150 if (sector && (sector < bt->start_lba || sector > bt->end_lba))
151 return 1; 151 return 1;
152 if (bt->pid && pid != bt->pid) 152 if (bt->pid && pid != bt->pid)
153 return 1; 153 return 1;
@@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
192 what |= MASK_TC_BIT(rw, DISCARD); 192 what |= MASK_TC_BIT(rw, DISCARD);
193 193
194 pid = tsk->pid; 194 pid = tsk->pid;
195 if (unlikely(act_log_check(bt, what, sector, pid))) 195 if (act_log_check(bt, what, sector, pid))
196 return; 196 return;
197 cpu = raw_smp_processor_id(); 197 cpu = raw_smp_processor_id();
198 198
@@ -403,11 +403,29 @@ static struct rchan_callbacks blk_relay_callbacks = {
403 .remove_buf_file = blk_remove_buf_file_callback, 403 .remove_buf_file = blk_remove_buf_file_callback,
404}; 404};
405 405
406static void blk_trace_setup_lba(struct blk_trace *bt,
407 struct block_device *bdev)
408{
409 struct hd_struct *part = NULL;
410
411 if (bdev)
412 part = bdev->bd_part;
413
414 if (part) {
415 bt->start_lba = part->start_sect;
416 bt->end_lba = part->start_sect + part->nr_sects;
417 } else {
418 bt->start_lba = 0;
419 bt->end_lba = -1ULL;
420 }
421}
422
406/* 423/*
407 * Setup everything required to start tracing 424 * Setup everything required to start tracing
408 */ 425 */
409int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 426int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
410 struct blk_user_trace_setup *buts) 427 struct block_device *bdev,
428 struct blk_user_trace_setup *buts)
411{ 429{
412 struct blk_trace *old_bt, *bt = NULL; 430 struct blk_trace *old_bt, *bt = NULL;
413 struct dentry *dir = NULL; 431 struct dentry *dir = NULL;
@@ -480,10 +498,13 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
480 if (!bt->act_mask) 498 if (!bt->act_mask)
481 bt->act_mask = (u16) -1; 499 bt->act_mask = (u16) -1;
482 500
483 bt->start_lba = buts->start_lba; 501 blk_trace_setup_lba(bt, bdev);
484 bt->end_lba = buts->end_lba; 502
485 if (!bt->end_lba) 503 /* overwrite with user settings */
486 bt->end_lba = -1ULL; 504 if (buts->start_lba)
505 bt->start_lba = buts->start_lba;
506 if (buts->end_lba)
507 bt->end_lba = buts->end_lba;
487 508
488 bt->pid = buts->pid; 509 bt->pid = buts->pid;
489 bt->trace_state = Blktrace_setup; 510 bt->trace_state = Blktrace_setup;
@@ -505,6 +526,7 @@ err:
505} 526}
506 527
507int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 528int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
529 struct block_device *bdev,
508 char __user *arg) 530 char __user *arg)
509{ 531{
510 struct blk_user_trace_setup buts; 532 struct blk_user_trace_setup buts;
@@ -514,7 +536,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
514 if (ret) 536 if (ret)
515 return -EFAULT; 537 return -EFAULT;
516 538
517 ret = do_blk_trace_setup(q, name, dev, &buts); 539 ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
518 if (ret) 540 if (ret)
519 return ret; 541 return ret;
520 542
@@ -582,7 +604,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
582 switch (cmd) { 604 switch (cmd) {
583 case BLKTRACESETUP: 605 case BLKTRACESETUP:
584 bdevname(bdev, b); 606 bdevname(bdev, b);
585 ret = blk_trace_setup(q, b, bdev->bd_dev, arg); 607 ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
586 break; 608 break;
587 case BLKTRACESTART: 609 case BLKTRACESTART:
588 start = 1; 610 start = 1;
@@ -809,7 +831,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
809 * @bio: the source bio 831 * @bio: the source bio
810 * @dev: target device 832 * @dev: target device
811 * @from: source sector 833 * @from: source sector
812 * @to: target sector
813 * 834 *
814 * Description: 835 * Description:
815 * Device mapper or raid target sometimes need to split a bio because 836 * Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +838,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
817 * 838 *
818 **/ 839 **/
819static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, 840static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
820 dev_t dev, sector_t from, sector_t to) 841 dev_t dev, sector_t from)
821{ 842{
822 struct blk_trace *bt = q->blk_trace; 843 struct blk_trace *bt = q->blk_trace;
823 struct blk_io_trace_remap r; 844 struct blk_io_trace_remap r;
@@ -825,12 +846,13 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
825 if (likely(!bt)) 846 if (likely(!bt))
826 return; 847 return;
827 848
828 r.device = cpu_to_be32(dev); 849 r.device_from = cpu_to_be32(dev);
829 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); 850 r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev);
830 r.sector = cpu_to_be64(to); 851 r.sector_from = cpu_to_be64(from);
831 852
832 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, 853 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
833 !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); 854 BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
855 sizeof(r), &r);
834} 856}
835 857
836/** 858/**
@@ -971,6 +993,16 @@ static inline const void *pdu_start(const struct trace_entry *ent)
971 return te_blk_io_trace(ent) + 1; 993 return te_blk_io_trace(ent) + 1;
972} 994}
973 995
996static inline u32 t_action(const struct trace_entry *ent)
997{
998 return te_blk_io_trace(ent)->action;
999}
1000
1001static inline u32 t_bytes(const struct trace_entry *ent)
1002{
1003 return te_blk_io_trace(ent)->bytes;
1004}
1005
974static inline u32 t_sec(const struct trace_entry *ent) 1006static inline u32 t_sec(const struct trace_entry *ent)
975{ 1007{
976 return te_blk_io_trace(ent)->bytes >> 9; 1008 return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1028,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
996 struct blk_io_trace_remap *r) 1028 struct blk_io_trace_remap *r)
997{ 1029{
998 const struct blk_io_trace_remap *__r = pdu_start(ent); 1030 const struct blk_io_trace_remap *__r = pdu_start(ent);
999 __u64 sector = __r->sector; 1031 __u64 sector_from = __r->sector_from;
1000 1032
1001 r->device = be32_to_cpu(__r->device);
1002 r->device_from = be32_to_cpu(__r->device_from); 1033 r->device_from = be32_to_cpu(__r->device_from);
1003 r->sector = be64_to_cpu(sector); 1034 r->device_to = be32_to_cpu(__r->device_to);
1035 r->sector_from = be64_to_cpu(sector_from);
1004} 1036}
1005 1037
1006typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); 1038typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1063,98 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
1031 MAJOR(t->device), MINOR(t->device), act, rwbs); 1063 MAJOR(t->device), MINOR(t->device), act, rwbs);
1032} 1064}
1033 1065
1066static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
1067{
1068 const char *pdu_buf;
1069 int pdu_len;
1070 int i, end, ret;
1071
1072 pdu_buf = pdu_start(ent);
1073 pdu_len = te_blk_io_trace(ent)->pdu_len;
1074
1075 if (!pdu_len)
1076 return 1;
1077
1078 /* find the last zero that needs to be printed */
1079 for (end = pdu_len - 1; end >= 0; end--)
1080 if (pdu_buf[end])
1081 break;
1082 end++;
1083
1084 if (!trace_seq_putc(s, '('))
1085 return 0;
1086
1087 for (i = 0; i < pdu_len; i++) {
1088
1089 ret = trace_seq_printf(s, "%s%02x",
1090 i == 0 ? "" : " ", pdu_buf[i]);
1091 if (!ret)
1092 return ret;
1093
1094 /*
1095 * stop when the rest is just zeroes and indicate so
1096 * with a ".." appended
1097 */
1098 if (i == end && end != pdu_len - 1)
1099 return trace_seq_puts(s, " ..) ");
1100 }
1101
1102 return trace_seq_puts(s, ") ");
1103}
1104
1034static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) 1105static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
1035{ 1106{
1036 char cmd[TASK_COMM_LEN]; 1107 char cmd[TASK_COMM_LEN];
1037 1108
1038 trace_find_cmdline(ent->pid, cmd); 1109 trace_find_cmdline(ent->pid, cmd);
1039 1110
1040 if (t_sec(ent)) 1111 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1041 return trace_seq_printf(s, "%llu + %u [%s]\n", 1112 int ret;
1042 t_sector(ent), t_sec(ent), cmd); 1113
1043 return trace_seq_printf(s, "[%s]\n", cmd); 1114 ret = trace_seq_printf(s, "%u ", t_bytes(ent));
1115 if (!ret)
1116 return 0;
1117 ret = blk_log_dump_pdu(s, ent);
1118 if (!ret)
1119 return 0;
1120 return trace_seq_printf(s, "[%s]\n", cmd);
1121 } else {
1122 if (t_sec(ent))
1123 return trace_seq_printf(s, "%llu + %u [%s]\n",
1124 t_sector(ent), t_sec(ent), cmd);
1125 return trace_seq_printf(s, "[%s]\n", cmd);
1126 }
1044} 1127}
1045 1128
1046static int blk_log_with_error(struct trace_seq *s, 1129static int blk_log_with_error(struct trace_seq *s,
1047 const struct trace_entry *ent) 1130 const struct trace_entry *ent)
1048{ 1131{
1049 if (t_sec(ent)) 1132 if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
1050 return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), 1133 int ret;
1051 t_sec(ent), t_error(ent)); 1134
1052 return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); 1135 ret = blk_log_dump_pdu(s, ent);
1136 if (ret)
1137 return trace_seq_printf(s, "[%d]\n", t_error(ent));
1138 return 0;
1139 } else {
1140 if (t_sec(ent))
1141 return trace_seq_printf(s, "%llu + %u [%d]\n",
1142 t_sector(ent),
1143 t_sec(ent), t_error(ent));
1144 return trace_seq_printf(s, "%llu [%d]\n",
1145 t_sector(ent), t_error(ent));
1146 }
1053} 1147}
1054 1148
1055static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) 1149static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
1056{ 1150{
1057 struct blk_io_trace_remap r = { .device = 0, }; 1151 struct blk_io_trace_remap r = { .device_from = 0, };
1058 1152
1059 get_pdu_remap(ent, &r); 1153 get_pdu_remap(ent, &r);
1060 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", 1154 return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
1061 t_sector(ent), 1155 t_sector(ent), t_sec(ent),
1062 t_sec(ent), MAJOR(r.device), MINOR(r.device), 1156 MAJOR(r.device_from), MINOR(r.device_from),
1063 (unsigned long long)r.sector); 1157 (unsigned long long)r.sector_from);
1064} 1158}
1065 1159
1066static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) 1160static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1211,6 @@ static void blk_tracer_print_header(struct seq_file *m)
1117static void blk_tracer_start(struct trace_array *tr) 1211static void blk_tracer_start(struct trace_array *tr)
1118{ 1212{
1119 blk_tracer_enabled = true; 1213 blk_tracer_enabled = true;
1120 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1121} 1214}
1122 1215
1123static int blk_tracer_init(struct trace_array *tr) 1216static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1223,6 @@ static int blk_tracer_init(struct trace_array *tr)
1130static void blk_tracer_stop(struct trace_array *tr) 1223static void blk_tracer_stop(struct trace_array *tr)
1131{ 1224{
1132 blk_tracer_enabled = false; 1225 blk_tracer_enabled = false;
1133 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1134} 1226}
1135 1227
1136static void blk_tracer_reset(struct trace_array *tr) 1228static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1274,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
1182 } 1274 }
1183 1275
1184 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) 1276 if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
1185 ret = trace_seq_printf(s, "Bad pc action %x\n", what); 1277 ret = trace_seq_printf(s, "Unknown action %x\n", what);
1186 else { 1278 else {
1187 ret = log_action(iter, what2act[what].act[long_act]); 1279 ret = log_action(iter, what2act[what].act[long_act]);
1188 if (ret) 1280 if (ret)
@@ -1195,9 +1287,6 @@ out:
1195static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, 1287static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
1196 int flags) 1288 int flags)
1197{ 1289{
1198 if (!trace_print_context(iter))
1199 return TRACE_TYPE_PARTIAL_LINE;
1200
1201 return print_one_line(iter, false); 1290 return print_one_line(iter, false);
1202} 1291}
1203 1292
@@ -1232,6 +1321,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
1232 return print_one_line(iter, true); 1321 return print_one_line(iter, true);
1233} 1322}
1234 1323
1324static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
1325{
1326 /* don't output context-info for blk_classic output */
1327 if (bit == TRACE_BLK_OPT_CLASSIC) {
1328 if (set)
1329 trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
1330 else
1331 trace_flags |= TRACE_ITER_CONTEXT_INFO;
1332 }
1333 return 0;
1334}
1335
1235static struct tracer blk_tracer __read_mostly = { 1336static struct tracer blk_tracer __read_mostly = {
1236 .name = "blk", 1337 .name = "blk",
1237 .init = blk_tracer_init, 1338 .init = blk_tracer_init,
@@ -1241,6 +1342,7 @@ static struct tracer blk_tracer __read_mostly = {
1241 .print_header = blk_tracer_print_header, 1342 .print_header = blk_tracer_print_header,
1242 .print_line = blk_tracer_print_line, 1343 .print_line = blk_tracer_print_line,
1243 .flags = &blk_tracer_flags, 1344 .flags = &blk_tracer_flags,
1345 .set_flag = blk_tracer_set_flag,
1244}; 1346};
1245 1347
1246static struct trace_event trace_blk_event = { 1348static struct trace_event trace_blk_event = {
@@ -1285,7 +1387,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
1285/* 1387/*
1286 * Setup everything required to start tracing 1388 * Setup everything required to start tracing
1287 */ 1389 */
1288static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) 1390static int blk_trace_setup_queue(struct request_queue *q,
1391 struct block_device *bdev)
1289{ 1392{
1290 struct blk_trace *old_bt, *bt = NULL; 1393 struct blk_trace *old_bt, *bt = NULL;
1291 int ret = -ENOMEM; 1394 int ret = -ENOMEM;
@@ -1298,9 +1401,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
1298 if (!bt->msg_data) 1401 if (!bt->msg_data)
1299 goto free_bt; 1402 goto free_bt;
1300 1403
1301 bt->dev = dev; 1404 bt->dev = bdev->bd_dev;
1302 bt->act_mask = (u16)-1; 1405 bt->act_mask = (u16)-1;
1303 bt->end_lba = -1ULL; 1406
1407 blk_trace_setup_lba(bt, bdev);
1304 1408
1305 old_bt = xchg(&q->blk_trace, bt); 1409 old_bt = xchg(&q->blk_trace, bt);
1306 if (old_bt != NULL) { 1410 if (old_bt != NULL) {
@@ -1517,7 +1621,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1517 1621
1518 if (attr == &dev_attr_enable) { 1622 if (attr == &dev_attr_enable) {
1519 if (value) 1623 if (value)
1520 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1624 ret = blk_trace_setup_queue(q, bdev);
1521 else 1625 else
1522 ret = blk_trace_remove_queue(q); 1626 ret = blk_trace_remove_queue(q);
1523 goto out_unlock_bdev; 1627 goto out_unlock_bdev;
@@ -1525,7 +1629,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
1525 1629
1526 ret = 0; 1630 ret = 0;
1527 if (q->blk_trace == NULL) 1631 if (q->blk_trace == NULL)
1528 ret = blk_trace_setup_queue(q, bdev->bd_dev); 1632 ret = blk_trace_setup_queue(q, bdev);
1529 1633
1530 if (ret == 0) { 1634 if (ret == 0) {
1531 if (attr == &dev_attr_act_mask) 1635 if (attr == &dev_attr_act_mask)
@@ -1548,3 +1652,8 @@ out:
1548 return ret ? ret : count; 1652 return ret ? ret : count;
1549} 1653}
1550 1654
1655int blk_trace_init_sysfs(struct device *dev)
1656{
1657 return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
1658}
1659
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644
index 246f2aa6dc46..000000000000
--- a/kernel/trace/events.c
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * This is the place to register all trace points as events.
3 */
4
5#include <linux/stringify.h>
6
7#include <trace/trace_events.h>
8
9#include "trace_output.h"
10
11#include "trace_events_stage_1.h"
12#include "trace_events_stage_2.h"
13#include "trace_events_stage_3.h"
14
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f1ed080406c3..5b606f45b6c4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -29,11 +29,12 @@
29#include <linux/list.h> 29#include <linux/list.h>
30#include <linux/hash.h> 30#include <linux/hash.h>
31 31
32#include <trace/sched.h> 32#include <trace/events/sched.h>
33 33
34#include <asm/ftrace.h> 34#include <asm/ftrace.h>
35 35
36#include "trace.h" 36#include "trace_output.h"
37#include "trace_stat.h"
37 38
38#define FTRACE_WARN_ON(cond) \ 39#define FTRACE_WARN_ON(cond) \
39 do { \ 40 do { \
@@ -68,7 +69,7 @@ static DEFINE_MUTEX(ftrace_lock);
68 69
69static struct ftrace_ops ftrace_list_end __read_mostly = 70static struct ftrace_ops ftrace_list_end __read_mostly =
70{ 71{
71 .func = ftrace_stub, 72 .func = ftrace_stub,
72}; 73};
73 74
74static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 75static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +241,576 @@ static void ftrace_update_pid_func(void)
240#endif 241#endif
241} 242}
242 243
244#ifdef CONFIG_FUNCTION_PROFILER
245struct ftrace_profile {
246 struct hlist_node node;
247 unsigned long ip;
248 unsigned long counter;
249#ifdef CONFIG_FUNCTION_GRAPH_TRACER
250 unsigned long long time;
251#endif
252};
253
254struct ftrace_profile_page {
255 struct ftrace_profile_page *next;
256 unsigned long index;
257 struct ftrace_profile records[];
258};
259
260struct ftrace_profile_stat {
261 atomic_t disabled;
262 struct hlist_head *hash;
263 struct ftrace_profile_page *pages;
264 struct ftrace_profile_page *start;
265 struct tracer_stat stat;
266};
267
268#define PROFILE_RECORDS_SIZE \
269 (PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
270
271#define PROFILES_PER_PAGE \
272 (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
273
274static int ftrace_profile_bits __read_mostly;
275static int ftrace_profile_enabled __read_mostly;
276
277/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
278static DEFINE_MUTEX(ftrace_profile_lock);
279
280static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
281
282#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
283
284static void *
285function_stat_next(void *v, int idx)
286{
287 struct ftrace_profile *rec = v;
288 struct ftrace_profile_page *pg;
289
290 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
291
292 again:
293 rec++;
294 if ((void *)rec >= (void *)&pg->records[pg->index]) {
295 pg = pg->next;
296 if (!pg)
297 return NULL;
298 rec = &pg->records[0];
299 if (!rec->counter)
300 goto again;
301 }
302
303 return rec;
304}
305
306static void *function_stat_start(struct tracer_stat *trace)
307{
308 struct ftrace_profile_stat *stat =
309 container_of(trace, struct ftrace_profile_stat, stat);
310
311 if (!stat || !stat->start)
312 return NULL;
313
314 return function_stat_next(&stat->start->records[0], 0);
315}
316
317#ifdef CONFIG_FUNCTION_GRAPH_TRACER
318/* function graph compares on total time */
319static int function_stat_cmp(void *p1, void *p2)
320{
321 struct ftrace_profile *a = p1;
322 struct ftrace_profile *b = p2;
323
324 if (a->time < b->time)
325 return -1;
326 if (a->time > b->time)
327 return 1;
328 else
329 return 0;
330}
331#else
332/* not function graph compares against hits */
333static int function_stat_cmp(void *p1, void *p2)
334{
335 struct ftrace_profile *a = p1;
336 struct ftrace_profile *b = p2;
337
338 if (a->counter < b->counter)
339 return -1;
340 if (a->counter > b->counter)
341 return 1;
342 else
343 return 0;
344}
345#endif
346
347static int function_stat_headers(struct seq_file *m)
348{
349#ifdef CONFIG_FUNCTION_GRAPH_TRACER
350 seq_printf(m, " Function "
351 "Hit Time Avg\n"
352 " -------- "
353 "--- ---- ---\n");
354#else
355 seq_printf(m, " Function Hit\n"
356 " -------- ---\n");
357#endif
358 return 0;
359}
360
361static int function_stat_show(struct seq_file *m, void *v)
362{
363 struct ftrace_profile *rec = v;
364 char str[KSYM_SYMBOL_LEN];
365#ifdef CONFIG_FUNCTION_GRAPH_TRACER
366 static DEFINE_MUTEX(mutex);
367 static struct trace_seq s;
368 unsigned long long avg;
369#endif
370
371 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
372 seq_printf(m, " %-30.30s %10lu", str, rec->counter);
373
374#ifdef CONFIG_FUNCTION_GRAPH_TRACER
375 seq_printf(m, " ");
376 avg = rec->time;
377 do_div(avg, rec->counter);
378
379 mutex_lock(&mutex);
380 trace_seq_init(&s);
381 trace_print_graph_duration(rec->time, &s);
382 trace_seq_puts(&s, " ");
383 trace_print_graph_duration(avg, &s);
384 trace_print_seq(m, &s);
385 mutex_unlock(&mutex);
386#endif
387 seq_putc(m, '\n');
388
389 return 0;
390}
391
392static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
393{
394 struct ftrace_profile_page *pg;
395
396 pg = stat->pages = stat->start;
397
398 while (pg) {
399 memset(pg->records, 0, PROFILE_RECORDS_SIZE);
400 pg->index = 0;
401 pg = pg->next;
402 }
403
404 memset(stat->hash, 0,
405 FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
406}
407
408int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
409{
410 struct ftrace_profile_page *pg;
411 int functions;
412 int pages;
413 int i;
414
415 /* If we already allocated, do nothing */
416 if (stat->pages)
417 return 0;
418
419 stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
420 if (!stat->pages)
421 return -ENOMEM;
422
423#ifdef CONFIG_DYNAMIC_FTRACE
424 functions = ftrace_update_tot_cnt;
425#else
426 /*
427 * We do not know the number of functions that exist because
428 * dynamic tracing is what counts them. With past experience
429 * we have around 20K functions. That should be more than enough.
430 * It is highly unlikely we will execute every function in
431 * the kernel.
432 */
433 functions = 20000;
434#endif
435
436 pg = stat->start = stat->pages;
437
438 pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
439
440 for (i = 0; i < pages; i++) {
441 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
442 if (!pg->next)
443 goto out_free;
444 pg = pg->next;
445 }
446
447 return 0;
448
449 out_free:
450 pg = stat->start;
451 while (pg) {
452 unsigned long tmp = (unsigned long)pg;
453
454 pg = pg->next;
455 free_page(tmp);
456 }
457
458 free_page((unsigned long)stat->pages);
459 stat->pages = NULL;
460 stat->start = NULL;
461
462 return -ENOMEM;
463}
464
465static int ftrace_profile_init_cpu(int cpu)
466{
467 struct ftrace_profile_stat *stat;
468 int size;
469
470 stat = &per_cpu(ftrace_profile_stats, cpu);
471
472 if (stat->hash) {
473 /* If the profile is already created, simply reset it */
474 ftrace_profile_reset(stat);
475 return 0;
476 }
477
478 /*
479 * We are profiling all functions, but usually only a few thousand
480 * functions are hit. We'll make a hash of 1024 items.
481 */
482 size = FTRACE_PROFILE_HASH_SIZE;
483
484 stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
485
486 if (!stat->hash)
487 return -ENOMEM;
488
489 if (!ftrace_profile_bits) {
490 size--;
491
492 for (; size; size >>= 1)
493 ftrace_profile_bits++;
494 }
495
496 /* Preallocate the function profiling pages */
497 if (ftrace_profile_pages_init(stat) < 0) {
498 kfree(stat->hash);
499 stat->hash = NULL;
500 return -ENOMEM;
501 }
502
503 return 0;
504}
505
506static int ftrace_profile_init(void)
507{
508 int cpu;
509 int ret = 0;
510
511 for_each_online_cpu(cpu) {
512 ret = ftrace_profile_init_cpu(cpu);
513 if (ret)
514 break;
515 }
516
517 return ret;
518}
519
520/* interrupts must be disabled */
521static struct ftrace_profile *
522ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
523{
524 struct ftrace_profile *rec;
525 struct hlist_head *hhd;
526 struct hlist_node *n;
527 unsigned long key;
528
529 key = hash_long(ip, ftrace_profile_bits);
530 hhd = &stat->hash[key];
531
532 if (hlist_empty(hhd))
533 return NULL;
534
535 hlist_for_each_entry_rcu(rec, n, hhd, node) {
536 if (rec->ip == ip)
537 return rec;
538 }
539
540 return NULL;
541}
542
543static void ftrace_add_profile(struct ftrace_profile_stat *stat,
544 struct ftrace_profile *rec)
545{
546 unsigned long key;
547
548 key = hash_long(rec->ip, ftrace_profile_bits);
549 hlist_add_head_rcu(&rec->node, &stat->hash[key]);
550}
551
552/*
553 * The memory is already allocated, this simply finds a new record to use.
554 */
555static struct ftrace_profile *
556ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
557{
558 struct ftrace_profile *rec = NULL;
559
560 /* prevent recursion (from NMIs) */
561 if (atomic_inc_return(&stat->disabled) != 1)
562 goto out;
563
564 /*
565 * Try to find the function again since an NMI
566 * could have added it
567 */
568 rec = ftrace_find_profiled_func(stat, ip);
569 if (rec)
570 goto out;
571
572 if (stat->pages->index == PROFILES_PER_PAGE) {
573 if (!stat->pages->next)
574 goto out;
575 stat->pages = stat->pages->next;
576 }
577
578 rec = &stat->pages->records[stat->pages->index++];
579 rec->ip = ip;
580 ftrace_add_profile(stat, rec);
581
582 out:
583 atomic_dec(&stat->disabled);
584
585 return rec;
586}
587
588static void
589function_profile_call(unsigned long ip, unsigned long parent_ip)
590{
591 struct ftrace_profile_stat *stat;
592 struct ftrace_profile *rec;
593 unsigned long flags;
594
595 if (!ftrace_profile_enabled)
596 return;
597
598 local_irq_save(flags);
599
600 stat = &__get_cpu_var(ftrace_profile_stats);
601 if (!stat->hash)
602 goto out;
603
604 rec = ftrace_find_profiled_func(stat, ip);
605 if (!rec) {
606 rec = ftrace_profile_alloc(stat, ip);
607 if (!rec)
608 goto out;
609 }
610
611 rec->counter++;
612 out:
613 local_irq_restore(flags);
614}
615
616#ifdef CONFIG_FUNCTION_GRAPH_TRACER
617static int profile_graph_entry(struct ftrace_graph_ent *trace)
618{
619 function_profile_call(trace->func, 0);
620 return 1;
621}
622
623static void profile_graph_return(struct ftrace_graph_ret *trace)
624{
625 struct ftrace_profile_stat *stat;
626 unsigned long long calltime;
627 struct ftrace_profile *rec;
628 unsigned long flags;
629
630 local_irq_save(flags);
631 stat = &__get_cpu_var(ftrace_profile_stats);
632 if (!stat->hash)
633 goto out;
634
635 calltime = trace->rettime - trace->calltime;
636
637 if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
638 int index;
639
640 index = trace->depth;
641
642 /* Append this call time to the parent time to subtract */
643 if (index)
644 current->ret_stack[index - 1].subtime += calltime;
645
646 if (current->ret_stack[index].subtime < calltime)
647 calltime -= current->ret_stack[index].subtime;
648 else
649 calltime = 0;
650 }
651
652 rec = ftrace_find_profiled_func(stat, trace->func);
653 if (rec)
654 rec->time += calltime;
655
656 out:
657 local_irq_restore(flags);
658}
659
660static int register_ftrace_profiler(void)
661{
662 return register_ftrace_graph(&profile_graph_return,
663 &profile_graph_entry);
664}
665
666static void unregister_ftrace_profiler(void)
667{
668 unregister_ftrace_graph();
669}
670#else
671static struct ftrace_ops ftrace_profile_ops __read_mostly =
672{
673 .func = function_profile_call,
674};
675
676static int register_ftrace_profiler(void)
677{
678 return register_ftrace_function(&ftrace_profile_ops);
679}
680
681static void unregister_ftrace_profiler(void)
682{
683 unregister_ftrace_function(&ftrace_profile_ops);
684}
685#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
686
687static ssize_t
688ftrace_profile_write(struct file *filp, const char __user *ubuf,
689 size_t cnt, loff_t *ppos)
690{
691 unsigned long val;
692 char buf[64]; /* big enough to hold a number */
693 int ret;
694
695 if (cnt >= sizeof(buf))
696 return -EINVAL;
697
698 if (copy_from_user(&buf, ubuf, cnt))
699 return -EFAULT;
700
701 buf[cnt] = 0;
702
703 ret = strict_strtoul(buf, 10, &val);
704 if (ret < 0)
705 return ret;
706
707 val = !!val;
708
709 mutex_lock(&ftrace_profile_lock);
710 if (ftrace_profile_enabled ^ val) {
711 if (val) {
712 ret = ftrace_profile_init();
713 if (ret < 0) {
714 cnt = ret;
715 goto out;
716 }
717
718 ret = register_ftrace_profiler();
719 if (ret < 0) {
720 cnt = ret;
721 goto out;
722 }
723 ftrace_profile_enabled = 1;
724 } else {
725 ftrace_profile_enabled = 0;
726 unregister_ftrace_profiler();
727 }
728 }
729 out:
730 mutex_unlock(&ftrace_profile_lock);
731
732 filp->f_pos += cnt;
733
734 return cnt;
735}
736
737static ssize_t
738ftrace_profile_read(struct file *filp, char __user *ubuf,
739 size_t cnt, loff_t *ppos)
740{
741 char buf[64]; /* big enough to hold a number */
742 int r;
743
744 r = sprintf(buf, "%u\n", ftrace_profile_enabled);
745 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
746}
747
748static const struct file_operations ftrace_profile_fops = {
749 .open = tracing_open_generic,
750 .read = ftrace_profile_read,
751 .write = ftrace_profile_write,
752};
753
754/* used to initialize the real stat files */
755static struct tracer_stat function_stats __initdata = {
756 .name = "functions",
757 .stat_start = function_stat_start,
758 .stat_next = function_stat_next,
759 .stat_cmp = function_stat_cmp,
760 .stat_headers = function_stat_headers,
761 .stat_show = function_stat_show
762};
763
764static void ftrace_profile_debugfs(struct dentry *d_tracer)
765{
766 struct ftrace_profile_stat *stat;
767 struct dentry *entry;
768 char *name;
769 int ret;
770 int cpu;
771
772 for_each_possible_cpu(cpu) {
773 stat = &per_cpu(ftrace_profile_stats, cpu);
774
775 /* allocate enough for function name + cpu number */
776 name = kmalloc(32, GFP_KERNEL);
777 if (!name) {
778 /*
779 * The files created are permanent, if something happens
780 * we still do not free memory.
781 */
782 kfree(stat);
783 WARN(1,
784 "Could not allocate stat file for cpu %d\n",
785 cpu);
786 return;
787 }
788 stat->stat = function_stats;
789 snprintf(name, 32, "function%d", cpu);
790 stat->stat.name = name;
791 ret = register_stat_tracer(&stat->stat);
792 if (ret) {
793 WARN(1,
794 "Could not register function stat for cpu %d\n",
795 cpu);
796 kfree(name);
797 return;
798 }
799 }
800
801 entry = debugfs_create_file("function_profile_enabled", 0644,
802 d_tracer, NULL, &ftrace_profile_fops);
803 if (!entry)
804 pr_warning("Could not create debugfs "
805 "'function_profile_enabled' entry\n");
806}
807
808#else /* CONFIG_FUNCTION_PROFILER */
809static void ftrace_profile_debugfs(struct dentry *d_tracer)
810{
811}
812#endif /* CONFIG_FUNCTION_PROFILER */
813
243/* set when tracing only a pid */ 814/* set when tracing only a pid */
244struct pid *ftrace_pid_trace; 815struct pid *ftrace_pid_trace;
245static struct pid * const ftrace_swapper_pid = &init_struct_pid; 816static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +832,6 @@ struct ftrace_func_probe {
261 struct rcu_head rcu; 832 struct rcu_head rcu;
262}; 833};
263 834
264
265enum { 835enum {
266 FTRACE_ENABLE_CALLS = (1 << 0), 836 FTRACE_ENABLE_CALLS = (1 << 0),
267 FTRACE_DISABLE_CALLS = (1 << 1), 837 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -346,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
346 rec->flags |= FTRACE_FL_FREE; 916 rec->flags |= FTRACE_FL_FREE;
347} 917}
348 918
349void ftrace_release(void *start, unsigned long size)
350{
351 struct dyn_ftrace *rec;
352 struct ftrace_page *pg;
353 unsigned long s = (unsigned long)start;
354 unsigned long e = s + size;
355
356 if (ftrace_disabled || !start)
357 return;
358
359 mutex_lock(&ftrace_lock);
360 do_for_each_ftrace_rec(pg, rec) {
361 if ((rec->ip >= s) && (rec->ip < e)) {
362 /*
363 * rec->ip is changed in ftrace_free_rec()
364 * It should not between s and e if record was freed.
365 */
366 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
367 ftrace_free_rec(rec);
368 }
369 } while_for_each_ftrace_rec();
370 mutex_unlock(&ftrace_lock);
371}
372
373static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 919static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
374{ 920{
375 struct dyn_ftrace *rec; 921 struct dyn_ftrace *rec;
@@ -1408,7 +1954,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1408 1954
1409static struct ftrace_ops trace_probe_ops __read_mostly = 1955static struct ftrace_ops trace_probe_ops __read_mostly =
1410{ 1956{
1411 .func = function_trace_probe_call, 1957 .func = function_trace_probe_call,
1412}; 1958};
1413 1959
1414static int ftrace_probe_registered; 1960static int ftrace_probe_registered;
@@ -2128,38 +2674,23 @@ static const struct file_operations ftrace_graph_fops = {
2128 2674
2129static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) 2675static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
2130{ 2676{
2131 struct dentry *entry;
2132 2677
2133 entry = debugfs_create_file("available_filter_functions", 0444, 2678 trace_create_file("available_filter_functions", 0444,
2134 d_tracer, NULL, &ftrace_avail_fops); 2679 d_tracer, NULL, &ftrace_avail_fops);
2135 if (!entry)
2136 pr_warning("Could not create debugfs "
2137 "'available_filter_functions' entry\n");
2138 2680
2139 entry = debugfs_create_file("failures", 0444, 2681 trace_create_file("failures", 0444,
2140 d_tracer, NULL, &ftrace_failures_fops); 2682 d_tracer, NULL, &ftrace_failures_fops);
2141 if (!entry)
2142 pr_warning("Could not create debugfs 'failures' entry\n");
2143 2683
2144 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, 2684 trace_create_file("set_ftrace_filter", 0644, d_tracer,
2145 NULL, &ftrace_filter_fops); 2685 NULL, &ftrace_filter_fops);
2146 if (!entry)
2147 pr_warning("Could not create debugfs "
2148 "'set_ftrace_filter' entry\n");
2149 2686
2150 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, 2687 trace_create_file("set_ftrace_notrace", 0644, d_tracer,
2151 NULL, &ftrace_notrace_fops); 2688 NULL, &ftrace_notrace_fops);
2152 if (!entry)
2153 pr_warning("Could not create debugfs "
2154 "'set_ftrace_notrace' entry\n");
2155 2689
2156#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2690#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2157 entry = debugfs_create_file("set_graph_function", 0444, d_tracer, 2691 trace_create_file("set_graph_function", 0444, d_tracer,
2158 NULL, 2692 NULL,
2159 &ftrace_graph_fops); 2693 &ftrace_graph_fops);
2160 if (!entry)
2161 pr_warning("Could not create debugfs "
2162 "'set_graph_function' entry\n");
2163#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2694#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2164 2695
2165 return 0; 2696 return 0;
@@ -2197,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod,
2197 return 0; 2728 return 0;
2198} 2729}
2199 2730
2200void ftrace_init_module(struct module *mod, 2731#ifdef CONFIG_MODULES
2201 unsigned long *start, unsigned long *end) 2732void ftrace_release(void *start, void *end)
2733{
2734 struct dyn_ftrace *rec;
2735 struct ftrace_page *pg;
2736 unsigned long s = (unsigned long)start;
2737 unsigned long e = (unsigned long)end;
2738
2739 if (ftrace_disabled || !start || start == end)
2740 return;
2741
2742 mutex_lock(&ftrace_lock);
2743 do_for_each_ftrace_rec(pg, rec) {
2744 if ((rec->ip >= s) && (rec->ip < e)) {
2745 /*
2746 * rec->ip is changed in ftrace_free_rec()
2747 * It should not between s and e if record was freed.
2748 */
2749 FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
2750 ftrace_free_rec(rec);
2751 }
2752 } while_for_each_ftrace_rec();
2753 mutex_unlock(&ftrace_lock);
2754}
2755
2756static void ftrace_init_module(struct module *mod,
2757 unsigned long *start, unsigned long *end)
2202{ 2758{
2203 if (ftrace_disabled || start == end) 2759 if (ftrace_disabled || start == end)
2204 return; 2760 return;
2205 ftrace_convert_nops(mod, start, end); 2761 ftrace_convert_nops(mod, start, end);
2206} 2762}
2207 2763
2764static int ftrace_module_notify(struct notifier_block *self,
2765 unsigned long val, void *data)
2766{
2767 struct module *mod = data;
2768
2769 switch (val) {
2770 case MODULE_STATE_COMING:
2771 ftrace_init_module(mod, mod->ftrace_callsites,
2772 mod->ftrace_callsites +
2773 mod->num_ftrace_callsites);
2774 break;
2775 case MODULE_STATE_GOING:
2776 ftrace_release(mod->ftrace_callsites,
2777 mod->ftrace_callsites +
2778 mod->num_ftrace_callsites);
2779 break;
2780 }
2781
2782 return 0;
2783}
2784#else
2785static int ftrace_module_notify(struct notifier_block *self,
2786 unsigned long val, void *data)
2787{
2788 return 0;
2789}
2790#endif /* CONFIG_MODULES */
2791
2792struct notifier_block ftrace_module_nb = {
2793 .notifier_call = ftrace_module_notify,
2794 .priority = 0,
2795};
2796
2208extern unsigned long __start_mcount_loc[]; 2797extern unsigned long __start_mcount_loc[];
2209extern unsigned long __stop_mcount_loc[]; 2798extern unsigned long __stop_mcount_loc[];
2210 2799
@@ -2236,6 +2825,10 @@ void __init ftrace_init(void)
2236 __start_mcount_loc, 2825 __start_mcount_loc,
2237 __stop_mcount_loc); 2826 __stop_mcount_loc);
2238 2827
2828 ret = register_module_notifier(&ftrace_module_nb);
2829 if (!ret)
2830 pr_warning("Failed to register trace ftrace module notifier\n");
2831
2239 return; 2832 return;
2240 failed: 2833 failed:
2241 ftrace_disabled = 1; 2834 ftrace_disabled = 1;
@@ -2417,7 +3010,6 @@ static const struct file_operations ftrace_pid_fops = {
2417static __init int ftrace_init_debugfs(void) 3010static __init int ftrace_init_debugfs(void)
2418{ 3011{
2419 struct dentry *d_tracer; 3012 struct dentry *d_tracer;
2420 struct dentry *entry;
2421 3013
2422 d_tracer = tracing_init_dentry(); 3014 d_tracer = tracing_init_dentry();
2423 if (!d_tracer) 3015 if (!d_tracer)
@@ -2425,11 +3017,11 @@ static __init int ftrace_init_debugfs(void)
2425 3017
2426 ftrace_init_dyn_debugfs(d_tracer); 3018 ftrace_init_dyn_debugfs(d_tracer);
2427 3019
2428 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, 3020 trace_create_file("set_ftrace_pid", 0644, d_tracer,
2429 NULL, &ftrace_pid_fops); 3021 NULL, &ftrace_pid_fops);
2430 if (!entry) 3022
2431 pr_warning("Could not create debugfs " 3023 ftrace_profile_debugfs(d_tracer);
2432 "'set_ftrace_pid' entry\n"); 3024
2433 return 0; 3025 return 0;
2434} 3026}
2435fs_initcall(ftrace_init_debugfs); 3027fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3130,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
2538 3130
2539#ifdef CONFIG_FUNCTION_GRAPH_TRACER 3131#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2540 3132
2541static atomic_t ftrace_graph_active; 3133static int ftrace_graph_active;
2542static struct notifier_block ftrace_suspend_notifier; 3134static struct notifier_block ftrace_suspend_notifier;
2543 3135
2544int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) 3136int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2690,7 +3282,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2690 mutex_lock(&ftrace_lock); 3282 mutex_lock(&ftrace_lock);
2691 3283
2692 /* we currently allow only one tracer registered at a time */ 3284 /* we currently allow only one tracer registered at a time */
2693 if (atomic_read(&ftrace_graph_active)) { 3285 if (ftrace_graph_active) {
2694 ret = -EBUSY; 3286 ret = -EBUSY;
2695 goto out; 3287 goto out;
2696 } 3288 }
@@ -2698,10 +3290,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2698 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; 3290 ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
2699 register_pm_notifier(&ftrace_suspend_notifier); 3291 register_pm_notifier(&ftrace_suspend_notifier);
2700 3292
2701 atomic_inc(&ftrace_graph_active); 3293 ftrace_graph_active++;
2702 ret = start_graph_tracing(); 3294 ret = start_graph_tracing();
2703 if (ret) { 3295 if (ret) {
2704 atomic_dec(&ftrace_graph_active); 3296 ftrace_graph_active--;
2705 goto out; 3297 goto out;
2706 } 3298 }
2707 3299
@@ -2719,10 +3311,10 @@ void unregister_ftrace_graph(void)
2719{ 3311{
2720 mutex_lock(&ftrace_lock); 3312 mutex_lock(&ftrace_lock);
2721 3313
2722 if (!unlikely(atomic_read(&ftrace_graph_active))) 3314 if (unlikely(!ftrace_graph_active))
2723 goto out; 3315 goto out;
2724 3316
2725 atomic_dec(&ftrace_graph_active); 3317 ftrace_graph_active--;
2726 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); 3318 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
2727 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; 3319 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2728 ftrace_graph_entry = ftrace_graph_entry_stub; 3320 ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,7 +3328,7 @@ void unregister_ftrace_graph(void)
2736/* Allocate a return stack for newly created task */ 3328/* Allocate a return stack for newly created task */
2737void ftrace_graph_init_task(struct task_struct *t) 3329void ftrace_graph_init_task(struct task_struct *t)
2738{ 3330{
2739 if (atomic_read(&ftrace_graph_active)) { 3331 if (ftrace_graph_active) {
2740 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH 3332 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
2741 * sizeof(struct ftrace_ret_stack), 3333 * sizeof(struct ftrace_ret_stack),
2742 GFP_KERNEL); 3334 GFP_KERNEL);
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index 5011f4d91e37..86cdf671d7e2 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -12,7 +12,7 @@
12#include <linux/dcache.h> 12#include <linux/dcache.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14 14
15#include <trace/kmemtrace.h> 15#include <linux/kmemtrace.h>
16 16
17#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h" 18#include "trace.h"
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
42 gfp_t gfp_flags, 42 gfp_t gfp_flags,
43 int node) 43 int node)
44{ 44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
45 struct trace_array *tr = kmemtrace_array; 46 struct trace_array *tr = kmemtrace_array;
46 struct kmemtrace_alloc_entry *entry; 47 struct kmemtrace_alloc_entry *entry;
47 struct ring_buffer_event *event; 48 struct ring_buffer_event *event;
@@ -62,7 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
62 entry->gfp_flags = gfp_flags; 63 entry->gfp_flags = gfp_flags;
63 entry->node = node; 64 entry->node = node;
64 65
65 ring_buffer_unlock_commit(tr->buffer, event); 66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
66 68
67 trace_wake_up(); 69 trace_wake_up();
68} 70}
@@ -71,6 +73,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
71 unsigned long call_site, 73 unsigned long call_site,
72 const void *ptr) 74 const void *ptr)
73{ 75{
76 struct ftrace_event_call *call = &event_kmem_free;
74 struct trace_array *tr = kmemtrace_array; 77 struct trace_array *tr = kmemtrace_array;
75 struct kmemtrace_free_entry *entry; 78 struct kmemtrace_free_entry *entry;
76 struct ring_buffer_event *event; 79 struct ring_buffer_event *event;
@@ -86,7 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
86 entry->call_site = call_site; 89 entry->call_site = call_site;
87 entry->ptr = ptr; 90 entry->ptr = ptr;
88 91
89 ring_buffer_unlock_commit(tr->buffer, event); 92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
90 94
91 trace_wake_up(); 95 trace_wake_up();
92} 96}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 960cbf44c844..3ae5ccf2c0fc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,28 @@
22#include "trace.h" 22#include "trace.h"
23 23
24/* 24/*
25 * The ring buffer header is special. We must manually up keep it.
26 */
27int ring_buffer_print_entry_header(struct trace_seq *s)
28{
29 int ret;
30
31 ret = trace_seq_printf(s, "# compressed entry header\n");
32 ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
33 ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
34 ret = trace_seq_printf(s, "\tarray : 32 bits\n");
35 ret = trace_seq_printf(s, "\n");
36 ret = trace_seq_printf(s, "\tpadding : type == %d\n",
37 RINGBUF_TYPE_PADDING);
38 ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
39 RINGBUF_TYPE_TIME_EXTEND);
40 ret = trace_seq_printf(s, "\tdata max type_len == %d\n",
41 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
42
43 return ret;
44}
45
46/*
25 * The ring buffer is made up of a list of pages. A separate list of pages is 47 * The ring buffer is made up of a list of pages. A separate list of pages is
26 * allocated for each CPU. A writer may only write to a buffer that is 48 * allocated for each CPU. A writer may only write to a buffer that is
27 * associated with the CPU it is currently executing on. A reader may read 49 * associated with the CPU it is currently executing on. A reader may read
@@ -182,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
182 204
183#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) 205#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
184#define RB_ALIGNMENT 4U 206#define RB_ALIGNMENT 4U
185#define RB_MAX_SMALL_DATA 28 207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
208
209/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
210#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
186 211
187enum { 212enum {
188 RB_LEN_TIME_EXTEND = 8, 213 RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +216,28 @@ enum {
191 216
192static inline int rb_null_event(struct ring_buffer_event *event) 217static inline int rb_null_event(struct ring_buffer_event *event)
193{ 218{
194 return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; 219 return event->type_len == RINGBUF_TYPE_PADDING
220 && event->time_delta == 0;
195} 221}
196 222
197static inline int rb_discarded_event(struct ring_buffer_event *event) 223static inline int rb_discarded_event(struct ring_buffer_event *event)
198{ 224{
199 return event->type == RINGBUF_TYPE_PADDING && event->time_delta; 225 return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
200} 226}
201 227
202static void rb_event_set_padding(struct ring_buffer_event *event) 228static void rb_event_set_padding(struct ring_buffer_event *event)
203{ 229{
204 event->type = RINGBUF_TYPE_PADDING; 230 event->type_len = RINGBUF_TYPE_PADDING;
205 event->time_delta = 0; 231 event->time_delta = 0;
206} 232}
207 233
208/**
209 * ring_buffer_event_discard - discard an event in the ring buffer
210 * @buffer: the ring buffer
211 * @event: the event to discard
212 *
213 * Sometimes a event that is in the ring buffer needs to be ignored.
214 * This function lets the user discard an event in the ring buffer
215 * and then that event will not be read later.
216 *
217 * Note, it is up to the user to be careful with this, and protect
218 * against races. If the user discards an event that has been consumed
219 * it is possible that it could corrupt the ring buffer.
220 */
221void ring_buffer_event_discard(struct ring_buffer_event *event)
222{
223 event->type = RINGBUF_TYPE_PADDING;
224 /* time delta must be non zero */
225 if (!event->time_delta)
226 event->time_delta = 1;
227}
228
229static unsigned 234static unsigned
230rb_event_data_length(struct ring_buffer_event *event) 235rb_event_data_length(struct ring_buffer_event *event)
231{ 236{
232 unsigned length; 237 unsigned length;
233 238
234 if (event->len) 239 if (event->type_len)
235 length = event->len * RB_ALIGNMENT; 240 length = event->type_len * RB_ALIGNMENT;
236 else 241 else
237 length = event->array[0]; 242 length = event->array[0];
238 return length + RB_EVNT_HDR_SIZE; 243 return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event)
242static unsigned 247static unsigned
243rb_event_length(struct ring_buffer_event *event) 248rb_event_length(struct ring_buffer_event *event)
244{ 249{
245 switch (event->type) { 250 switch (event->type_len) {
246 case RINGBUF_TYPE_PADDING: 251 case RINGBUF_TYPE_PADDING:
247 if (rb_null_event(event)) 252 if (rb_null_event(event))
248 /* undefined */ 253 /* undefined */
249 return -1; 254 return -1;
250 return rb_event_data_length(event); 255 return event->array[0] + RB_EVNT_HDR_SIZE;
251 256
252 case RINGBUF_TYPE_TIME_EXTEND: 257 case RINGBUF_TYPE_TIME_EXTEND:
253 return RB_LEN_TIME_EXTEND; 258 return RB_LEN_TIME_EXTEND;
@@ -271,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event)
271unsigned ring_buffer_event_length(struct ring_buffer_event *event) 276unsigned ring_buffer_event_length(struct ring_buffer_event *event)
272{ 277{
273 unsigned length = rb_event_length(event); 278 unsigned length = rb_event_length(event);
274 if (event->type != RINGBUF_TYPE_DATA) 279 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
275 return length; 280 return length;
276 length -= RB_EVNT_HDR_SIZE; 281 length -= RB_EVNT_HDR_SIZE;
277 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 282 if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
284static void * 289static void *
285rb_event_data(struct ring_buffer_event *event) 290rb_event_data(struct ring_buffer_event *event)
286{ 291{
287 BUG_ON(event->type != RINGBUF_TYPE_DATA); 292 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
288 /* If length is in len field, then array[0] has the data */ 293 /* If length is in len field, then array[0] has the data */
289 if (event->len) 294 if (event->type_len)
290 return (void *)&event->array[0]; 295 return (void *)&event->array[0];
291 /* Otherwise length is in array[0] and array[1] has the data */ 296 /* Otherwise length is in array[0] and array[1] has the data */
292 return (void *)&event->array[1]; 297 return (void *)&event->array[1];
@@ -316,9 +321,10 @@ struct buffer_data_page {
316}; 321};
317 322
318struct buffer_page { 323struct buffer_page {
324 struct list_head list; /* list of buffer pages */
319 local_t write; /* index for next write */ 325 local_t write; /* index for next write */
320 unsigned read; /* index for next read */ 326 unsigned read; /* index for next read */
321 struct list_head list; /* list of free pages */ 327 local_t entries; /* entries on this page */
322 struct buffer_data_page *page; /* Actual data page */ 328 struct buffer_data_page *page; /* Actual data page */
323}; 329};
324 330
@@ -361,6 +367,28 @@ static inline int test_time_stamp(u64 delta)
361 367
362#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) 368#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
363 369
370int ring_buffer_print_page_header(struct trace_seq *s)
371{
372 struct buffer_data_page field;
373 int ret;
374
375 ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
376 "offset:0;\tsize:%u;\n",
377 (unsigned int)sizeof(field.time_stamp));
378
379 ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
380 "offset:%u;\tsize:%u;\n",
381 (unsigned int)offsetof(typeof(field), commit),
382 (unsigned int)sizeof(field.commit));
383
384 ret = trace_seq_printf(s, "\tfield: char data;\t"
385 "offset:%u;\tsize:%u;\n",
386 (unsigned int)offsetof(typeof(field), data),
387 (unsigned int)BUF_PAGE_SIZE);
388
389 return ret;
390}
391
364/* 392/*
365 * head_page == tail_page && head == tail then buffer is empty. 393 * head_page == tail_page && head == tail then buffer is empty.
366 */ 394 */
@@ -375,8 +403,11 @@ struct ring_buffer_per_cpu {
375 struct buffer_page *tail_page; /* write to tail */ 403 struct buffer_page *tail_page; /* write to tail */
376 struct buffer_page *commit_page; /* committed pages */ 404 struct buffer_page *commit_page; /* committed pages */
377 struct buffer_page *reader_page; 405 struct buffer_page *reader_page;
406 unsigned long nmi_dropped;
407 unsigned long commit_overrun;
378 unsigned long overrun; 408 unsigned long overrun;
379 unsigned long entries; 409 unsigned long read;
410 local_t entries;
380 u64 write_stamp; 411 u64 write_stamp;
381 u64 read_stamp; 412 u64 read_stamp;
382 atomic_t record_disabled; 413 atomic_t record_disabled;
@@ -947,31 +978,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
947 return rb_page_commit(cpu_buffer->head_page); 978 return rb_page_commit(cpu_buffer->head_page);
948} 979}
949 980
950/*
951 * When the tail hits the head and the buffer is in overwrite mode,
952 * the head jumps to the next page and all content on the previous
953 * page is discarded. But before doing so, we update the overrun
954 * variable of the buffer.
955 */
956static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
957{
958 struct ring_buffer_event *event;
959 unsigned long head;
960
961 for (head = 0; head < rb_head_size(cpu_buffer);
962 head += rb_event_length(event)) {
963
964 event = __rb_page_index(cpu_buffer->head_page, head);
965 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
966 return;
967 /* Only count data entries */
968 if (event->type != RINGBUF_TYPE_DATA)
969 continue;
970 cpu_buffer->overrun++;
971 cpu_buffer->entries--;
972 }
973}
974
975static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 981static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
976 struct buffer_page **bpage) 982 struct buffer_page **bpage)
977{ 983{
@@ -1110,28 +1116,21 @@ static void
1110rb_update_event(struct ring_buffer_event *event, 1116rb_update_event(struct ring_buffer_event *event,
1111 unsigned type, unsigned length) 1117 unsigned type, unsigned length)
1112{ 1118{
1113 event->type = type; 1119 event->type_len = type;
1114 1120
1115 switch (type) { 1121 switch (type) {
1116 1122
1117 case RINGBUF_TYPE_PADDING: 1123 case RINGBUF_TYPE_PADDING:
1118 break;
1119
1120 case RINGBUF_TYPE_TIME_EXTEND: 1124 case RINGBUF_TYPE_TIME_EXTEND:
1121 event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
1122 break;
1123
1124 case RINGBUF_TYPE_TIME_STAMP: 1125 case RINGBUF_TYPE_TIME_STAMP:
1125 event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
1126 break; 1126 break;
1127 1127
1128 case RINGBUF_TYPE_DATA: 1128 case 0:
1129 length -= RB_EVNT_HDR_SIZE; 1129 length -= RB_EVNT_HDR_SIZE;
1130 if (length > RB_MAX_SMALL_DATA) { 1130 if (length > RB_MAX_SMALL_DATA)
1131 event->len = 0;
1132 event->array[0] = length; 1131 event->array[0] = length;
1133 } else 1132 else
1134 event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1133 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1135 break; 1134 break;
1136 default: 1135 default:
1137 BUG(); 1136 BUG();
@@ -1155,131 +1154,156 @@ static unsigned rb_calculate_event_length(unsigned length)
1155 return length; 1154 return length;
1156} 1155}
1157 1156
1157
1158static struct ring_buffer_event * 1158static struct ring_buffer_event *
1159__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1159rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1160 unsigned type, unsigned long length, u64 *ts) 1160 unsigned long length, unsigned long tail,
1161 struct buffer_page *commit_page,
1162 struct buffer_page *tail_page, u64 *ts)
1161{ 1163{
1162 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page; 1164 struct buffer_page *next_page, *head_page, *reader_page;
1163 unsigned long tail, write;
1164 struct ring_buffer *buffer = cpu_buffer->buffer; 1165 struct ring_buffer *buffer = cpu_buffer->buffer;
1165 struct ring_buffer_event *event; 1166 struct ring_buffer_event *event;
1166 unsigned long flags;
1167 bool lock_taken = false; 1167 bool lock_taken = false;
1168 unsigned long flags;
1168 1169
1169 commit_page = cpu_buffer->commit_page; 1170 next_page = tail_page;
1170 /* we just need to protect against interrupts */
1171 barrier();
1172 tail_page = cpu_buffer->tail_page;
1173 write = local_add_return(length, &tail_page->write);
1174 tail = write - length;
1175 1171
1176 /* See if we shot pass the end of this buffer page */ 1172 local_irq_save(flags);
1177 if (write > BUF_PAGE_SIZE) { 1173 /*
1178 struct buffer_page *next_page = tail_page; 1174 * Since the write to the buffer is still not
1175 * fully lockless, we must be careful with NMIs.
1176 * The locks in the writers are taken when a write
1177 * crosses to a new page. The locks protect against
1178 * races with the readers (this will soon be fixed
1179 * with a lockless solution).
1180 *
1181 * Because we can not protect against NMIs, and we
1182 * want to keep traces reentrant, we need to manage
1183 * what happens when we are in an NMI.
1184 *
1185 * NMIs can happen after we take the lock.
1186 * If we are in an NMI, only take the lock
1187 * if it is not already taken. Otherwise
1188 * simply fail.
1189 */
1190 if (unlikely(in_nmi())) {
1191 if (!__raw_spin_trylock(&cpu_buffer->lock)) {
1192 cpu_buffer->nmi_dropped++;
1193 goto out_reset;
1194 }
1195 } else
1196 __raw_spin_lock(&cpu_buffer->lock);
1179 1197
1180 local_irq_save(flags); 1198 lock_taken = true;
1181 /*
1182 * Since the write to the buffer is still not
1183 * fully lockless, we must be careful with NMIs.
1184 * The locks in the writers are taken when a write
1185 * crosses to a new page. The locks protect against
1186 * races with the readers (this will soon be fixed
1187 * with a lockless solution).
1188 *
1189 * Because we can not protect against NMIs, and we
1190 * want to keep traces reentrant, we need to manage
1191 * what happens when we are in an NMI.
1192 *
1193 * NMIs can happen after we take the lock.
1194 * If we are in an NMI, only take the lock
1195 * if it is not already taken. Otherwise
1196 * simply fail.
1197 */
1198 if (unlikely(in_nmi())) {
1199 if (!__raw_spin_trylock(&cpu_buffer->lock))
1200 goto out_reset;
1201 } else
1202 __raw_spin_lock(&cpu_buffer->lock);
1203 1199
1204 lock_taken = true; 1200 rb_inc_page(cpu_buffer, &next_page);
1205 1201
1206 rb_inc_page(cpu_buffer, &next_page); 1202 head_page = cpu_buffer->head_page;
1203 reader_page = cpu_buffer->reader_page;
1207 1204
1208 head_page = cpu_buffer->head_page; 1205 /* we grabbed the lock before incrementing */
1209 reader_page = cpu_buffer->reader_page; 1206 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1207 goto out_reset;
1210 1208
1211 /* we grabbed the lock before incrementing */ 1209 /*
1212 if (RB_WARN_ON(cpu_buffer, next_page == reader_page)) 1210 * If for some reason, we had an interrupt storm that made
1213 goto out_reset; 1211 * it all the way around the buffer, bail, and warn
1212 * about it.
1213 */
1214 if (unlikely(next_page == commit_page)) {
1215 cpu_buffer->commit_overrun++;
1216 goto out_reset;
1217 }
1214 1218
1215 /* 1219 if (next_page == head_page) {
1216 * If for some reason, we had an interrupt storm that made 1220 if (!(buffer->flags & RB_FL_OVERWRITE))
1217 * it all the way around the buffer, bail, and warn
1218 * about it.
1219 */
1220 if (unlikely(next_page == commit_page)) {
1221 WARN_ON_ONCE(1);
1222 goto out_reset; 1221 goto out_reset;
1223 }
1224
1225 if (next_page == head_page) {
1226 if (!(buffer->flags & RB_FL_OVERWRITE))
1227 goto out_reset;
1228 1222
1229 /* tail_page has not moved yet? */ 1223 /* tail_page has not moved yet? */
1230 if (tail_page == cpu_buffer->tail_page) { 1224 if (tail_page == cpu_buffer->tail_page) {
1231 /* count overflows */ 1225 /* count overflows */
1232 rb_update_overflow(cpu_buffer); 1226 cpu_buffer->overrun +=
1227 local_read(&head_page->entries);
1233 1228
1234 rb_inc_page(cpu_buffer, &head_page); 1229 rb_inc_page(cpu_buffer, &head_page);
1235 cpu_buffer->head_page = head_page; 1230 cpu_buffer->head_page = head_page;
1236 cpu_buffer->head_page->read = 0; 1231 cpu_buffer->head_page->read = 0;
1237 }
1238 } 1232 }
1233 }
1239 1234
1240 /* 1235 /*
1241 * If the tail page is still the same as what we think 1236 * If the tail page is still the same as what we think
1242 * it is, then it is up to us to update the tail 1237 * it is, then it is up to us to update the tail
1243 * pointer. 1238 * pointer.
1244 */ 1239 */
1245 if (tail_page == cpu_buffer->tail_page) { 1240 if (tail_page == cpu_buffer->tail_page) {
1246 local_set(&next_page->write, 0); 1241 local_set(&next_page->write, 0);
1247 local_set(&next_page->page->commit, 0); 1242 local_set(&next_page->entries, 0);
1248 cpu_buffer->tail_page = next_page; 1243 local_set(&next_page->page->commit, 0);
1244 cpu_buffer->tail_page = next_page;
1245
1246 /* reread the time stamp */
1247 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
1248 cpu_buffer->tail_page->page->time_stamp = *ts;
1249 }
1249 1250
1250 /* reread the time stamp */ 1251 /*
1251 *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu); 1252 * The actual tail page has moved forward.
1252 cpu_buffer->tail_page->page->time_stamp = *ts; 1253 */
1253 } 1254 if (tail < BUF_PAGE_SIZE) {
1255 /* Mark the rest of the page with padding */
1256 event = __rb_page_index(tail_page, tail);
1257 rb_event_set_padding(event);
1258 }
1254 1259
1255 /* 1260 /* Set the write back to the previous setting */
1256 * The actual tail page has moved forward. 1261 local_sub(length, &tail_page->write);
1257 */
1258 if (tail < BUF_PAGE_SIZE) {
1259 /* Mark the rest of the page with padding */
1260 event = __rb_page_index(tail_page, tail);
1261 rb_event_set_padding(event);
1262 }
1263 1262
1264 if (tail <= BUF_PAGE_SIZE) 1263 /*
1265 /* Set the write back to the previous setting */ 1264 * If this was a commit entry that failed,
1266 local_set(&tail_page->write, tail); 1265 * increment that too
1266 */
1267 if (tail_page == cpu_buffer->commit_page &&
1268 tail == rb_commit_index(cpu_buffer)) {
1269 rb_set_commit_to_write(cpu_buffer);
1270 }
1267 1271
1268 /* 1272 __raw_spin_unlock(&cpu_buffer->lock);
1269 * If this was a commit entry that failed, 1273 local_irq_restore(flags);
1270 * increment that too 1274
1271 */ 1275 /* fail and let the caller try again */
1272 if (tail_page == cpu_buffer->commit_page && 1276 return ERR_PTR(-EAGAIN);
1273 tail == rb_commit_index(cpu_buffer)) { 1277
1274 rb_set_commit_to_write(cpu_buffer); 1278 out_reset:
1275 } 1279 /* reset write */
1280 local_sub(length, &tail_page->write);
1276 1281
1282 if (likely(lock_taken))
1277 __raw_spin_unlock(&cpu_buffer->lock); 1283 __raw_spin_unlock(&cpu_buffer->lock);
1278 local_irq_restore(flags); 1284 local_irq_restore(flags);
1285 return NULL;
1286}
1279 1287
1280 /* fail and let the caller try again */ 1288static struct ring_buffer_event *
1281 return ERR_PTR(-EAGAIN); 1289__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1282 } 1290 unsigned type, unsigned long length, u64 *ts)
1291{
1292 struct buffer_page *tail_page, *commit_page;
1293 struct ring_buffer_event *event;
1294 unsigned long tail, write;
1295
1296 commit_page = cpu_buffer->commit_page;
1297 /* we just need to protect against interrupts */
1298 barrier();
1299 tail_page = cpu_buffer->tail_page;
1300 write = local_add_return(length, &tail_page->write);
1301 tail = write - length;
1302
1303 /* See if we shot pass the end of this buffer page */
1304 if (write > BUF_PAGE_SIZE)
1305 return rb_move_tail(cpu_buffer, length, tail,
1306 commit_page, tail_page, ts);
1283 1307
1284 /* We reserved something on the buffer */ 1308 /* We reserved something on the buffer */
1285 1309
@@ -1289,6 +1313,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1289 event = __rb_page_index(tail_page, tail); 1313 event = __rb_page_index(tail_page, tail);
1290 rb_update_event(event, type, length); 1314 rb_update_event(event, type, length);
1291 1315
1316 /* The passed in type is zero for DATA */
1317 if (likely(!type))
1318 local_inc(&tail_page->entries);
1319
1292 /* 1320 /*
1293 * If this is a commit and the tail is zero, then update 1321 * If this is a commit and the tail is zero, then update
1294 * this page's time stamp. 1322 * this page's time stamp.
@@ -1297,16 +1325,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1297 cpu_buffer->commit_page->page->time_stamp = *ts; 1325 cpu_buffer->commit_page->page->time_stamp = *ts;
1298 1326
1299 return event; 1327 return event;
1300
1301 out_reset:
1302 /* reset write */
1303 if (tail <= BUF_PAGE_SIZE)
1304 local_set(&tail_page->write, tail);
1305
1306 if (likely(lock_taken))
1307 __raw_spin_unlock(&cpu_buffer->lock);
1308 local_irq_restore(flags);
1309 return NULL;
1310} 1328}
1311 1329
1312static int 1330static int
@@ -1458,6 +1476,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1458 return event; 1476 return event;
1459} 1477}
1460 1478
1479#define TRACE_RECURSIVE_DEPTH 16
1480
1481static int trace_recursive_lock(void)
1482{
1483 current->trace_recursion++;
1484
1485 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
1486 return 0;
1487
1488 /* Disable all tracing before we do anything else */
1489 tracing_off_permanent();
1490
1491 printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
1492 "HC[%lu]:SC[%lu]:NMI[%lu]\n",
1493 current->trace_recursion,
1494 hardirq_count() >> HARDIRQ_SHIFT,
1495 softirq_count() >> SOFTIRQ_SHIFT,
1496 in_nmi());
1497
1498 WARN_ON_ONCE(1);
1499 return -1;
1500}
1501
1502static void trace_recursive_unlock(void)
1503{
1504 WARN_ON_ONCE(!current->trace_recursion);
1505
1506 current->trace_recursion--;
1507}
1508
1461static DEFINE_PER_CPU(int, rb_need_resched); 1509static DEFINE_PER_CPU(int, rb_need_resched);
1462 1510
1463/** 1511/**
@@ -1491,6 +1539,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1491 /* If we are tracing schedule, we don't want to recurse */ 1539 /* If we are tracing schedule, we don't want to recurse */
1492 resched = ftrace_preempt_disable(); 1540 resched = ftrace_preempt_disable();
1493 1541
1542 if (trace_recursive_lock())
1543 goto out_nocheck;
1544
1494 cpu = raw_smp_processor_id(); 1545 cpu = raw_smp_processor_id();
1495 1546
1496 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 1547 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1505,7 +1556,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1505 if (length > BUF_PAGE_SIZE) 1556 if (length > BUF_PAGE_SIZE)
1506 goto out; 1557 goto out;
1507 1558
1508 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); 1559 event = rb_reserve_next_event(cpu_buffer, 0, length);
1509 if (!event) 1560 if (!event)
1510 goto out; 1561 goto out;
1511 1562
@@ -1520,6 +1571,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
1520 return event; 1571 return event;
1521 1572
1522 out: 1573 out:
1574 trace_recursive_unlock();
1575
1576 out_nocheck:
1523 ftrace_preempt_enable(resched); 1577 ftrace_preempt_enable(resched);
1524 return NULL; 1578 return NULL;
1525} 1579}
@@ -1528,7 +1582,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1528static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 1582static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1529 struct ring_buffer_event *event) 1583 struct ring_buffer_event *event)
1530{ 1584{
1531 cpu_buffer->entries++; 1585 local_inc(&cpu_buffer->entries);
1532 1586
1533 /* Only process further if we own the commit */ 1587 /* Only process further if we own the commit */
1534 if (!rb_is_commit(cpu_buffer, event)) 1588 if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1612,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1558 1612
1559 rb_commit(cpu_buffer, event); 1613 rb_commit(cpu_buffer, event);
1560 1614
1615 trace_recursive_unlock();
1616
1561 /* 1617 /*
1562 * Only the last preempt count needs to restore preemption. 1618 * Only the last preempt count needs to restore preemption.
1563 */ 1619 */
@@ -1570,6 +1626,119 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1570} 1626}
1571EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); 1627EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
1572 1628
1629static inline void rb_event_discard(struct ring_buffer_event *event)
1630{
1631 /* array[0] holds the actual length for the discarded event */
1632 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
1633 event->type_len = RINGBUF_TYPE_PADDING;
1634 /* time delta must be non zero */
1635 if (!event->time_delta)
1636 event->time_delta = 1;
1637}
1638
1639/**
1640 * ring_buffer_event_discard - discard any event in the ring buffer
1641 * @event: the event to discard
1642 *
1643 * Sometimes a event that is in the ring buffer needs to be ignored.
1644 * This function lets the user discard an event in the ring buffer
1645 * and then that event will not be read later.
1646 *
1647 * Note, it is up to the user to be careful with this, and protect
1648 * against races. If the user discards an event that has been consumed
1649 * it is possible that it could corrupt the ring buffer.
1650 */
1651void ring_buffer_event_discard(struct ring_buffer_event *event)
1652{
1653 rb_event_discard(event);
1654}
1655EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
1656
1657/**
1658 * ring_buffer_commit_discard - discard an event that has not been committed
1659 * @buffer: the ring buffer
1660 * @event: non committed event to discard
1661 *
1662 * This is similar to ring_buffer_event_discard but must only be
1663 * performed on an event that has not been committed yet. The difference
1664 * is that this will also try to free the event from the ring buffer
1665 * if another event has not been added behind it.
1666 *
1667 * If another event has been added behind it, it will set the event
1668 * up as discarded, and perform the commit.
1669 *
1670 * If this function is called, do not call ring_buffer_unlock_commit on
1671 * the event.
1672 */
1673void ring_buffer_discard_commit(struct ring_buffer *buffer,
1674 struct ring_buffer_event *event)
1675{
1676 struct ring_buffer_per_cpu *cpu_buffer;
1677 unsigned long new_index, old_index;
1678 struct buffer_page *bpage;
1679 unsigned long index;
1680 unsigned long addr;
1681 int cpu;
1682
1683 /* The event is discarded regardless */
1684 rb_event_discard(event);
1685
1686 /*
1687 * This must only be called if the event has not been
1688 * committed yet. Thus we can assume that preemption
1689 * is still disabled.
1690 */
1691 RB_WARN_ON(buffer, !preempt_count());
1692
1693 cpu = smp_processor_id();
1694 cpu_buffer = buffer->buffers[cpu];
1695
1696 new_index = rb_event_index(event);
1697 old_index = new_index + rb_event_length(event);
1698 addr = (unsigned long)event;
1699 addr &= PAGE_MASK;
1700
1701 bpage = cpu_buffer->tail_page;
1702
1703 if (bpage == (void *)addr && rb_page_write(bpage) == old_index) {
1704 /*
1705 * This is on the tail page. It is possible that
1706 * a write could come in and move the tail page
1707 * and write to the next page. That is fine
1708 * because we just shorten what is on this page.
1709 */
1710 index = local_cmpxchg(&bpage->write, old_index, new_index);
1711 if (index == old_index)
1712 goto out;
1713 }
1714
1715 /*
1716 * The commit is still visible by the reader, so we
1717 * must increment entries.
1718 */
1719 local_inc(&cpu_buffer->entries);
1720 out:
1721 /*
1722 * If a write came in and pushed the tail page
1723 * we still need to update the commit pointer
1724 * if we were the commit.
1725 */
1726 if (rb_is_commit(cpu_buffer, event))
1727 rb_set_commit_to_write(cpu_buffer);
1728
1729 trace_recursive_unlock();
1730
1731 /*
1732 * Only the last preempt count needs to restore preemption.
1733 */
1734 if (preempt_count() == 1)
1735 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1736 else
1737 preempt_enable_no_resched_notrace();
1738
1739}
1740EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
1741
1573/** 1742/**
1574 * ring_buffer_write - write data to the buffer without reserving 1743 * ring_buffer_write - write data to the buffer without reserving
1575 * @buffer: The ring buffer to write to. 1744 * @buffer: The ring buffer to write to.
@@ -1613,8 +1782,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1613 goto out; 1782 goto out;
1614 1783
1615 event_length = rb_calculate_event_length(length); 1784 event_length = rb_calculate_event_length(length);
1616 event = rb_reserve_next_event(cpu_buffer, 1785 event = rb_reserve_next_event(cpu_buffer, 0, event_length);
1617 RINGBUF_TYPE_DATA, event_length);
1618 if (!event) 1786 if (!event)
1619 goto out; 1787 goto out;
1620 1788
@@ -1728,7 +1896,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1728 return 0; 1896 return 0;
1729 1897
1730 cpu_buffer = buffer->buffers[cpu]; 1898 cpu_buffer = buffer->buffers[cpu];
1731 ret = cpu_buffer->entries; 1899 ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
1900 - cpu_buffer->read;
1732 1901
1733 return ret; 1902 return ret;
1734} 1903}
@@ -1755,6 +1924,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1755EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 1924EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1756 1925
1757/** 1926/**
1927 * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
1928 * @buffer: The ring buffer
1929 * @cpu: The per CPU buffer to get the number of overruns from
1930 */
1931unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
1932{
1933 struct ring_buffer_per_cpu *cpu_buffer;
1934 unsigned long ret;
1935
1936 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1937 return 0;
1938
1939 cpu_buffer = buffer->buffers[cpu];
1940 ret = cpu_buffer->nmi_dropped;
1941
1942 return ret;
1943}
1944EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
1945
1946/**
1947 * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
1948 * @buffer: The ring buffer
1949 * @cpu: The per CPU buffer to get the number of overruns from
1950 */
1951unsigned long
1952ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
1953{
1954 struct ring_buffer_per_cpu *cpu_buffer;
1955 unsigned long ret;
1956
1957 if (!cpumask_test_cpu(cpu, buffer->cpumask))
1958 return 0;
1959
1960 cpu_buffer = buffer->buffers[cpu];
1961 ret = cpu_buffer->commit_overrun;
1962
1963 return ret;
1964}
1965EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
1966
1967/**
1758 * ring_buffer_entries - get the number of entries in a buffer 1968 * ring_buffer_entries - get the number of entries in a buffer
1759 * @buffer: The ring buffer 1969 * @buffer: The ring buffer
1760 * 1970 *
@@ -1770,7 +1980,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1770 /* if you care about this being correct, lock the buffer */ 1980 /* if you care about this being correct, lock the buffer */
1771 for_each_buffer_cpu(buffer, cpu) { 1981 for_each_buffer_cpu(buffer, cpu) {
1772 cpu_buffer = buffer->buffers[cpu]; 1982 cpu_buffer = buffer->buffers[cpu];
1773 entries += cpu_buffer->entries; 1983 entries += (local_read(&cpu_buffer->entries) -
1984 cpu_buffer->overrun) - cpu_buffer->read;
1774 } 1985 }
1775 1986
1776 return entries; 1987 return entries;
@@ -1862,7 +2073,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1862{ 2073{
1863 u64 delta; 2074 u64 delta;
1864 2075
1865 switch (event->type) { 2076 switch (event->type_len) {
1866 case RINGBUF_TYPE_PADDING: 2077 case RINGBUF_TYPE_PADDING:
1867 return; 2078 return;
1868 2079
@@ -1893,7 +2104,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1893{ 2104{
1894 u64 delta; 2105 u64 delta;
1895 2106
1896 switch (event->type) { 2107 switch (event->type_len) {
1897 case RINGBUF_TYPE_PADDING: 2108 case RINGBUF_TYPE_PADDING:
1898 return; 2109 return;
1899 2110
@@ -1966,6 +2177,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1966 cpu_buffer->reader_page->list.prev = reader->list.prev; 2177 cpu_buffer->reader_page->list.prev = reader->list.prev;
1967 2178
1968 local_set(&cpu_buffer->reader_page->write, 0); 2179 local_set(&cpu_buffer->reader_page->write, 0);
2180 local_set(&cpu_buffer->reader_page->entries, 0);
1969 local_set(&cpu_buffer->reader_page->page->commit, 0); 2181 local_set(&cpu_buffer->reader_page->page->commit, 0);
1970 2182
1971 /* Make the reader page now replace the head */ 2183 /* Make the reader page now replace the head */
@@ -2008,8 +2220,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
2008 2220
2009 event = rb_reader_event(cpu_buffer); 2221 event = rb_reader_event(cpu_buffer);
2010 2222
2011 if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) 2223 if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
2012 cpu_buffer->entries--; 2224 || rb_discarded_event(event))
2225 cpu_buffer->read++;
2013 2226
2014 rb_update_read_stamp(cpu_buffer, event); 2227 rb_update_read_stamp(cpu_buffer, event);
2015 2228
@@ -2089,7 +2302,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2089 2302
2090 event = rb_reader_event(cpu_buffer); 2303 event = rb_reader_event(cpu_buffer);
2091 2304
2092 switch (event->type) { 2305 switch (event->type_len) {
2093 case RINGBUF_TYPE_PADDING: 2306 case RINGBUF_TYPE_PADDING:
2094 if (rb_null_event(event)) 2307 if (rb_null_event(event))
2095 RB_WARN_ON(cpu_buffer, 1); 2308 RB_WARN_ON(cpu_buffer, 1);
@@ -2161,7 +2374,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2161 2374
2162 event = rb_iter_head_event(iter); 2375 event = rb_iter_head_event(iter);
2163 2376
2164 switch (event->type) { 2377 switch (event->type_len) {
2165 case RINGBUF_TYPE_PADDING: 2378 case RINGBUF_TYPE_PADDING:
2166 if (rb_null_event(event)) { 2379 if (rb_null_event(event)) {
2167 rb_inc_iter(iter); 2380 rb_inc_iter(iter);
@@ -2220,7 +2433,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2220 event = rb_buffer_peek(buffer, cpu, ts); 2433 event = rb_buffer_peek(buffer, cpu, ts);
2221 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2434 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2222 2435
2223 if (event && event->type == RINGBUF_TYPE_PADDING) { 2436 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2224 cpu_relax(); 2437 cpu_relax();
2225 goto again; 2438 goto again;
2226 } 2439 }
@@ -2248,7 +2461,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2248 event = rb_iter_peek(iter, ts); 2461 event = rb_iter_peek(iter, ts);
2249 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2462 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2250 2463
2251 if (event && event->type == RINGBUF_TYPE_PADDING) { 2464 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2252 cpu_relax(); 2465 cpu_relax();
2253 goto again; 2466 goto again;
2254 } 2467 }
@@ -2293,7 +2506,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2293 out: 2506 out:
2294 preempt_enable(); 2507 preempt_enable();
2295 2508
2296 if (event && event->type == RINGBUF_TYPE_PADDING) { 2509 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2297 cpu_relax(); 2510 cpu_relax();
2298 goto again; 2511 goto again;
2299 } 2512 }
@@ -2386,7 +2599,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2386 out: 2599 out:
2387 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2600 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2388 2601
2389 if (event && event->type == RINGBUF_TYPE_PADDING) { 2602 if (event && event->type_len == RINGBUF_TYPE_PADDING) {
2390 cpu_relax(); 2603 cpu_relax();
2391 goto again; 2604 goto again;
2392 } 2605 }
@@ -2411,6 +2624,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2411 cpu_buffer->head_page 2624 cpu_buffer->head_page
2412 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2625 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
2413 local_set(&cpu_buffer->head_page->write, 0); 2626 local_set(&cpu_buffer->head_page->write, 0);
2627 local_set(&cpu_buffer->head_page->entries, 0);
2414 local_set(&cpu_buffer->head_page->page->commit, 0); 2628 local_set(&cpu_buffer->head_page->page->commit, 0);
2415 2629
2416 cpu_buffer->head_page->read = 0; 2630 cpu_buffer->head_page->read = 0;
@@ -2420,11 +2634,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2420 2634
2421 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2635 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
2422 local_set(&cpu_buffer->reader_page->write, 0); 2636 local_set(&cpu_buffer->reader_page->write, 0);
2637 local_set(&cpu_buffer->reader_page->entries, 0);
2423 local_set(&cpu_buffer->reader_page->page->commit, 0); 2638 local_set(&cpu_buffer->reader_page->page->commit, 0);
2424 cpu_buffer->reader_page->read = 0; 2639 cpu_buffer->reader_page->read = 0;
2425 2640
2641 cpu_buffer->nmi_dropped = 0;
2642 cpu_buffer->commit_overrun = 0;
2426 cpu_buffer->overrun = 0; 2643 cpu_buffer->overrun = 0;
2427 cpu_buffer->entries = 0; 2644 cpu_buffer->read = 0;
2645 local_set(&cpu_buffer->entries, 0);
2428 2646
2429 cpu_buffer->write_stamp = 0; 2647 cpu_buffer->write_stamp = 0;
2430 cpu_buffer->read_stamp = 0; 2648 cpu_buffer->read_stamp = 0;
@@ -2443,6 +2661,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2443 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2661 if (!cpumask_test_cpu(cpu, buffer->cpumask))
2444 return; 2662 return;
2445 2663
2664 atomic_inc(&cpu_buffer->record_disabled);
2665
2446 spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2666 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2447 2667
2448 __raw_spin_lock(&cpu_buffer->lock); 2668 __raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2672,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2452 __raw_spin_unlock(&cpu_buffer->lock); 2672 __raw_spin_unlock(&cpu_buffer->lock);
2453 2673
2454 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2674 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2675
2676 atomic_dec(&cpu_buffer->record_disabled);
2455} 2677}
2456EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 2678EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
2457 2679
@@ -2578,28 +2800,6 @@ out:
2578} 2800}
2579EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2801EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2580 2802
2581static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2582 struct buffer_data_page *bpage,
2583 unsigned int offset)
2584{
2585 struct ring_buffer_event *event;
2586 unsigned long head;
2587
2588 __raw_spin_lock(&cpu_buffer->lock);
2589 for (head = offset; head < local_read(&bpage->commit);
2590 head += rb_event_length(event)) {
2591
2592 event = __rb_data_page_index(bpage, head);
2593 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2594 return;
2595 /* Only count data entries */
2596 if (event->type != RINGBUF_TYPE_DATA)
2597 continue;
2598 cpu_buffer->entries--;
2599 }
2600 __raw_spin_unlock(&cpu_buffer->lock);
2601}
2602
2603/** 2803/**
2604 * ring_buffer_alloc_read_page - allocate a page to read from buffer 2804 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2605 * @buffer: the buffer to allocate for. 2805 * @buffer: the buffer to allocate for.
@@ -2630,6 +2830,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2630 2830
2631 return bpage; 2831 return bpage;
2632} 2832}
2833EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
2633 2834
2634/** 2835/**
2635 * ring_buffer_free_read_page - free an allocated read page 2836 * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2843,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2642{ 2843{
2643 free_page((unsigned long)data); 2844 free_page((unsigned long)data);
2644} 2845}
2846EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
2645 2847
2646/** 2848/**
2647 * ring_buffer_read_page - extract a page from the ring buffer 2849 * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +2970,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2768 /* we copied everything to the beginning */ 2970 /* we copied everything to the beginning */
2769 read = 0; 2971 read = 0;
2770 } else { 2972 } else {
2973 /* update the entry counter */
2974 cpu_buffer->read += local_read(&reader->entries);
2975
2771 /* swap the pages */ 2976 /* swap the pages */
2772 rb_init_page(bpage); 2977 rb_init_page(bpage);
2773 bpage = reader->page; 2978 bpage = reader->page;
2774 reader->page = *data_page; 2979 reader->page = *data_page;
2775 local_set(&reader->write, 0); 2980 local_set(&reader->write, 0);
2981 local_set(&reader->entries, 0);
2776 reader->read = 0; 2982 reader->read = 0;
2777 *data_page = bpage; 2983 *data_page = bpage;
2778
2779 /* update the entry counter */
2780 rb_remove_entries(cpu_buffer, bpage, read);
2781 } 2984 }
2782 ret = read; 2985 ret = read;
2783 2986
@@ -2787,6 +2990,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
2787 out: 2990 out:
2788 return ret; 2991 return ret;
2789} 2992}
2993EXPORT_SYMBOL_GPL(ring_buffer_read_page);
2790 2994
2791static ssize_t 2995static ssize_t
2792rb_simple_read(struct file *filp, char __user *ubuf, 2996rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3049,11 @@ static const struct file_operations rb_simple_fops = {
2845static __init int rb_init_debugfs(void) 3049static __init int rb_init_debugfs(void)
2846{ 3050{
2847 struct dentry *d_tracer; 3051 struct dentry *d_tracer;
2848 struct dentry *entry;
2849 3052
2850 d_tracer = tracing_init_dentry(); 3053 d_tracer = tracing_init_dentry();
2851 3054
2852 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 3055 trace_create_file("tracing_on", 0644, d_tracer,
2853 &ring_buffer_flags, &rb_simple_fops); 3056 &ring_buffer_flags, &rb_simple_fops);
2854 if (!entry)
2855 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2856 3057
2857 return 0; 3058 return 0;
2858} 3059}
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644
index 000000000000..a26fc67b63bb
--- /dev/null
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -0,0 +1,413 @@
1/*
2 * ring buffer tester and benchmark
3 *
4 * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/completion.h>
8#include <linux/kthread.h>
9#include <linux/module.h>
10#include <linux/time.h>
11
12struct rb_page {
13 u64 ts;
14 local_t commit;
15 char data[4080];
16};
17
18/* run time and sleep time in seconds */
19#define RUN_TIME 10
20#define SLEEP_TIME 10
21
22/* number of events for writer to wake up the reader */
23static int wakeup_interval = 100;
24
25static int reader_finish;
26static struct completion read_start;
27static struct completion read_done;
28
29static struct ring_buffer *buffer;
30static struct task_struct *producer;
31static struct task_struct *consumer;
32static unsigned long read;
33
34static int disable_reader;
35module_param(disable_reader, uint, 0644);
36MODULE_PARM_DESC(disable_reader, "only run producer");
37
38static int read_events;
39
40static int kill_test;
41
42#define KILL_TEST() \
43 do { \
44 if (!kill_test) { \
45 kill_test = 1; \
46 WARN_ON(1); \
47 } \
48 } while (0)
49
50enum event_status {
51 EVENT_FOUND,
52 EVENT_DROPPED,
53};
54
55static enum event_status read_event(int cpu)
56{
57 struct ring_buffer_event *event;
58 int *entry;
59 u64 ts;
60
61 event = ring_buffer_consume(buffer, cpu, &ts);
62 if (!event)
63 return EVENT_DROPPED;
64
65 entry = ring_buffer_event_data(event);
66 if (*entry != cpu) {
67 KILL_TEST();
68 return EVENT_DROPPED;
69 }
70
71 read++;
72 return EVENT_FOUND;
73}
74
75static enum event_status read_page(int cpu)
76{
77 struct ring_buffer_event *event;
78 struct rb_page *rpage;
79 unsigned long commit;
80 void *bpage;
81 int *entry;
82 int ret;
83 int inc;
84 int i;
85
86 bpage = ring_buffer_alloc_read_page(buffer);
87 if (!bpage)
88 return EVENT_DROPPED;
89
90 ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
91 if (ret >= 0) {
92 rpage = bpage;
93 commit = local_read(&rpage->commit);
94 for (i = 0; i < commit && !kill_test; i += inc) {
95
96 if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
97 KILL_TEST();
98 break;
99 }
100
101 inc = -1;
102 event = (void *)&rpage->data[i];
103 switch (event->type_len) {
104 case RINGBUF_TYPE_PADDING:
105 /* We don't expect any padding */
106 KILL_TEST();
107 break;
108 case RINGBUF_TYPE_TIME_EXTEND:
109 inc = 8;
110 break;
111 case 0:
112 entry = ring_buffer_event_data(event);
113 if (*entry != cpu) {
114 KILL_TEST();
115 break;
116 }
117 read++;
118 if (!event->array[0]) {
119 KILL_TEST();
120 break;
121 }
122 inc = event->array[0];
123 break;
124 default:
125 entry = ring_buffer_event_data(event);
126 if (*entry != cpu) {
127 KILL_TEST();
128 break;
129 }
130 read++;
131 inc = ((event->type_len + 1) * 4);
132 }
133 if (kill_test)
134 break;
135
136 if (inc <= 0) {
137 KILL_TEST();
138 break;
139 }
140 }
141 }
142 ring_buffer_free_read_page(buffer, bpage);
143
144 if (ret < 0)
145 return EVENT_DROPPED;
146 return EVENT_FOUND;
147}
148
149static void ring_buffer_consumer(void)
150{
151 /* toggle between reading pages and events */
152 read_events ^= 1;
153
154 read = 0;
155 while (!reader_finish && !kill_test) {
156 int found;
157
158 do {
159 int cpu;
160
161 found = 0;
162 for_each_online_cpu(cpu) {
163 enum event_status stat;
164
165 if (read_events)
166 stat = read_event(cpu);
167 else
168 stat = read_page(cpu);
169
170 if (kill_test)
171 break;
172 if (stat == EVENT_FOUND)
173 found = 1;
174 }
175 } while (found && !kill_test);
176
177 set_current_state(TASK_INTERRUPTIBLE);
178 if (reader_finish)
179 break;
180
181 schedule();
182 __set_current_state(TASK_RUNNING);
183 }
184 reader_finish = 0;
185 complete(&read_done);
186}
187
188/*
189 * If we are a non preempt kernel, the 10 second run will
190 * stop everything while it runs. Instead, we will call cond_resched
191 * and also add any time that was lost by a rescedule.
192 */
193#ifdef CONFIG_PREEMPT
194static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
195{
196}
197#else
198static void sched_if_needed(struct timeval *start_tv, struct timeval *end_tv)
199{
200 struct timeval tv;
201
202 cond_resched();
203 do_gettimeofday(&tv);
204 if (tv.tv_usec < end_tv->tv_usec) {
205 tv.tv_usec += 1000000;
206 tv.tv_sec--;
207 }
208 start_tv->tv_sec += tv.tv_sec - end_tv->tv_sec;
209 start_tv->tv_usec += tv.tv_usec - end_tv->tv_usec;
210 if (start_tv->tv_usec > 1000000) {
211 start_tv->tv_usec -= 1000000;
212 start_tv->tv_sec++;
213 }
214}
215#endif
216
217static void ring_buffer_producer(void)
218{
219 struct timeval start_tv;
220 struct timeval end_tv;
221 unsigned long long time;
222 unsigned long long entries;
223 unsigned long long overruns;
224 unsigned long missed = 0;
225 unsigned long hit = 0;
226 unsigned long avg;
227 int cnt = 0;
228
229 /*
230 * Hammer the buffer for 10 secs (this may
231 * make the system stall)
232 */
233 pr_info("Starting ring buffer hammer\n");
234 do_gettimeofday(&start_tv);
235 do {
236 struct ring_buffer_event *event;
237 int *entry;
238
239 event = ring_buffer_lock_reserve(buffer, 10);
240 if (!event) {
241 missed++;
242 } else {
243 hit++;
244 entry = ring_buffer_event_data(event);
245 *entry = smp_processor_id();
246 ring_buffer_unlock_commit(buffer, event);
247 }
248 do_gettimeofday(&end_tv);
249
250 if (consumer && !(++cnt % wakeup_interval))
251 wake_up_process(consumer);
252
253 sched_if_needed(&start_tv, &end_tv);
254
255 } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
256 pr_info("End ring buffer hammer\n");
257
258 if (consumer) {
259 /* Init both completions here to avoid races */
260 init_completion(&read_start);
261 init_completion(&read_done);
262 /* the completions must be visible before the finish var */
263 smp_wmb();
264 reader_finish = 1;
265 /* finish var visible before waking up the consumer */
266 smp_wmb();
267 wake_up_process(consumer);
268 wait_for_completion(&read_done);
269 }
270
271 time = end_tv.tv_sec - start_tv.tv_sec;
272 time *= 1000000;
273 time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
274
275 entries = ring_buffer_entries(buffer);
276 overruns = ring_buffer_overruns(buffer);
277
278 if (kill_test)
279 pr_info("ERROR!\n");
280 pr_info("Time: %lld (usecs)\n", time);
281 pr_info("Overruns: %lld\n", overruns);
282 if (disable_reader)
283 pr_info("Read: (reader disabled)\n");
284 else
285 pr_info("Read: %ld (by %s)\n", read,
286 read_events ? "events" : "pages");
287 pr_info("Entries: %lld\n", entries);
288 pr_info("Total: %lld\n", entries + overruns + read);
289 pr_info("Missed: %ld\n", missed);
290 pr_info("Hit: %ld\n", hit);
291
292 do_div(time, 1000);
293 if (time)
294 hit /= (long)time;
295 else
296 pr_info("TIME IS ZERO??\n");
297
298 pr_info("Entries per millisec: %ld\n", hit);
299
300 if (hit) {
301 avg = 1000000 / hit;
302 pr_info("%ld ns per entry\n", avg);
303 }
304}
305
306static void wait_to_die(void)
307{
308 set_current_state(TASK_INTERRUPTIBLE);
309 while (!kthread_should_stop()) {
310 schedule();
311 set_current_state(TASK_INTERRUPTIBLE);
312 }
313 __set_current_state(TASK_RUNNING);
314}
315
316static int ring_buffer_consumer_thread(void *arg)
317{
318 while (!kthread_should_stop() && !kill_test) {
319 complete(&read_start);
320
321 ring_buffer_consumer();
322
323 set_current_state(TASK_INTERRUPTIBLE);
324 if (kthread_should_stop() || kill_test)
325 break;
326
327 schedule();
328 __set_current_state(TASK_RUNNING);
329 }
330 __set_current_state(TASK_RUNNING);
331
332 if (kill_test)
333 wait_to_die();
334
335 return 0;
336}
337
338static int ring_buffer_producer_thread(void *arg)
339{
340 init_completion(&read_start);
341
342 while (!kthread_should_stop() && !kill_test) {
343 ring_buffer_reset(buffer);
344
345 if (consumer) {
346 smp_wmb();
347 wake_up_process(consumer);
348 wait_for_completion(&read_start);
349 }
350
351 ring_buffer_producer();
352
353 pr_info("Sleeping for 10 secs\n");
354 set_current_state(TASK_INTERRUPTIBLE);
355 schedule_timeout(HZ * SLEEP_TIME);
356 __set_current_state(TASK_RUNNING);
357 }
358
359 if (kill_test)
360 wait_to_die();
361
362 return 0;
363}
364
365static int __init ring_buffer_benchmark_init(void)
366{
367 int ret;
368
369 /* make a one meg buffer in overwite mode */
370 buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
371 if (!buffer)
372 return -ENOMEM;
373
374 if (!disable_reader) {
375 consumer = kthread_create(ring_buffer_consumer_thread,
376 NULL, "rb_consumer");
377 ret = PTR_ERR(consumer);
378 if (IS_ERR(consumer))
379 goto out_fail;
380 }
381
382 producer = kthread_run(ring_buffer_producer_thread,
383 NULL, "rb_producer");
384 ret = PTR_ERR(producer);
385
386 if (IS_ERR(producer))
387 goto out_kill;
388
389 return 0;
390
391 out_kill:
392 if (consumer)
393 kthread_stop(consumer);
394
395 out_fail:
396 ring_buffer_free(buffer);
397 return ret;
398}
399
400static void __exit ring_buffer_benchmark_exit(void)
401{
402 kthread_stop(producer);
403 if (consumer)
404 kthread_stop(consumer);
405 ring_buffer_free(buffer);
406}
407
408module_init(ring_buffer_benchmark_init);
409module_exit(ring_buffer_benchmark_exit);
410
411MODULE_AUTHOR("Steven Rostedt");
412MODULE_DESCRIPTION("ring_buffer_benchmark");
413MODULE_LICENSE("GPL");
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a884c09006c4..dd40d2320346 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -171,6 +171,13 @@ static struct trace_array global_trace;
171 171
172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); 172static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
173 173
174int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
175 struct ring_buffer_event *event)
176{
177 return filter_check_discard(call, rec, global_trace.buffer, event);
178}
179EXPORT_SYMBOL_GPL(filter_current_check_discard);
180
174cycle_t ftrace_now(int cpu) 181cycle_t ftrace_now(int cpu)
175{ 182{
176 u64 ts; 183 u64 ts;
@@ -255,7 +262,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
255 262
256/* trace_flags holds trace_options default values */ 263/* trace_flags holds trace_options default values */
257unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | 264unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
258 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; 265 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
266 TRACE_ITER_GRAPH_TIME;
259 267
260/** 268/**
261 * trace_wake_up - wake up tasks waiting for trace input 269 * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@ static const char *trace_options[] = {
317 "latency-format", 325 "latency-format",
318 "global-clock", 326 "global-clock",
319 "sleep-time", 327 "sleep-time",
328 "graph-time",
320 NULL 329 NULL
321}; 330};
322 331
@@ -402,17 +411,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
402 return cnt; 411 return cnt;
403} 412}
404 413
405static void
406trace_print_seq(struct seq_file *m, struct trace_seq *s)
407{
408 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
409
410 s->buffer[len] = 0;
411 seq_puts(m, s->buffer);
412
413 trace_seq_init(s);
414}
415
416/** 414/**
417 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 415 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
418 * @tr: tracer 416 * @tr: tracer
@@ -641,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr)
641 tracing_reset(tr, cpu); 639 tracing_reset(tr, cpu);
642} 640}
643 641
642void tracing_reset_current(int cpu)
643{
644 tracing_reset(&global_trace, cpu);
645}
646
647void tracing_reset_current_online_cpus(void)
648{
649 tracing_reset_online_cpus(&global_trace);
650}
651
644#define SAVED_CMDLINES 128 652#define SAVED_CMDLINES 128
645#define NO_CMDLINE_MAP UINT_MAX 653#define NO_CMDLINE_MAP UINT_MAX
646static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; 654static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -840,7 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
840} 848}
841 849
842struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 850struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
843 unsigned char type, 851 int type,
844 unsigned long len, 852 unsigned long len,
845 unsigned long flags, int pc) 853 unsigned long flags, int pc)
846{ 854{
@@ -883,30 +891,40 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
883} 891}
884 892
885struct ring_buffer_event * 893struct ring_buffer_event *
886trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, 894trace_current_buffer_lock_reserve(int type, unsigned long len,
887 unsigned long flags, int pc) 895 unsigned long flags, int pc)
888{ 896{
889 return trace_buffer_lock_reserve(&global_trace, 897 return trace_buffer_lock_reserve(&global_trace,
890 type, len, flags, pc); 898 type, len, flags, pc);
891} 899}
900EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
892 901
893void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, 902void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
894 unsigned long flags, int pc) 903 unsigned long flags, int pc)
895{ 904{
896 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); 905 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
897} 906}
907EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
898 908
899void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, 909void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
900 unsigned long flags, int pc) 910 unsigned long flags, int pc)
901{ 911{
902 return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); 912 __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
903} 913}
914EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
915
916void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
917{
918 ring_buffer_discard_commit(global_trace.buffer, event);
919}
920EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
904 921
905void 922void
906trace_function(struct trace_array *tr, 923trace_function(struct trace_array *tr,
907 unsigned long ip, unsigned long parent_ip, unsigned long flags, 924 unsigned long ip, unsigned long parent_ip, unsigned long flags,
908 int pc) 925 int pc)
909{ 926{
927 struct ftrace_event_call *call = &event_function;
910 struct ring_buffer_event *event; 928 struct ring_buffer_event *event;
911 struct ftrace_entry *entry; 929 struct ftrace_entry *entry;
912 930
@@ -921,7 +939,9 @@ trace_function(struct trace_array *tr,
921 entry = ring_buffer_event_data(event); 939 entry = ring_buffer_event_data(event);
922 entry->ip = ip; 940 entry->ip = ip;
923 entry->parent_ip = parent_ip; 941 entry->parent_ip = parent_ip;
924 ring_buffer_unlock_commit(tr->buffer, event); 942
943 if (!filter_check_discard(call, entry, tr->buffer, event))
944 ring_buffer_unlock_commit(tr->buffer, event);
925} 945}
926 946
927#ifdef CONFIG_FUNCTION_GRAPH_TRACER 947#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +950,7 @@ static int __trace_graph_entry(struct trace_array *tr,
930 unsigned long flags, 950 unsigned long flags,
931 int pc) 951 int pc)
932{ 952{
953 struct ftrace_event_call *call = &event_funcgraph_entry;
933 struct ring_buffer_event *event; 954 struct ring_buffer_event *event;
934 struct ftrace_graph_ent_entry *entry; 955 struct ftrace_graph_ent_entry *entry;
935 956
@@ -942,7 +963,8 @@ static int __trace_graph_entry(struct trace_array *tr,
942 return 0; 963 return 0;
943 entry = ring_buffer_event_data(event); 964 entry = ring_buffer_event_data(event);
944 entry->graph_ent = *trace; 965 entry->graph_ent = *trace;
945 ring_buffer_unlock_commit(global_trace.buffer, event); 966 if (!filter_current_check_discard(call, entry, event))
967 ring_buffer_unlock_commit(global_trace.buffer, event);
946 968
947 return 1; 969 return 1;
948} 970}
@@ -952,6 +974,7 @@ static void __trace_graph_return(struct trace_array *tr,
952 unsigned long flags, 974 unsigned long flags,
953 int pc) 975 int pc)
954{ 976{
977 struct ftrace_event_call *call = &event_funcgraph_exit;
955 struct ring_buffer_event *event; 978 struct ring_buffer_event *event;
956 struct ftrace_graph_ret_entry *entry; 979 struct ftrace_graph_ret_entry *entry;
957 980
@@ -964,7 +987,8 @@ static void __trace_graph_return(struct trace_array *tr,
964 return; 987 return;
965 entry = ring_buffer_event_data(event); 988 entry = ring_buffer_event_data(event);
966 entry->ret = *trace; 989 entry->ret = *trace;
967 ring_buffer_unlock_commit(global_trace.buffer, event); 990 if (!filter_current_check_discard(call, entry, event))
991 ring_buffer_unlock_commit(global_trace.buffer, event);
968} 992}
969#endif 993#endif
970 994
@@ -982,6 +1006,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
982 int skip, int pc) 1006 int skip, int pc)
983{ 1007{
984#ifdef CONFIG_STACKTRACE 1008#ifdef CONFIG_STACKTRACE
1009 struct ftrace_event_call *call = &event_kernel_stack;
985 struct ring_buffer_event *event; 1010 struct ring_buffer_event *event;
986 struct stack_entry *entry; 1011 struct stack_entry *entry;
987 struct stack_trace trace; 1012 struct stack_trace trace;
@@ -999,7 +1024,8 @@ static void __ftrace_trace_stack(struct trace_array *tr,
999 trace.entries = entry->caller; 1024 trace.entries = entry->caller;
1000 1025
1001 save_stack_trace(&trace); 1026 save_stack_trace(&trace);
1002 ring_buffer_unlock_commit(tr->buffer, event); 1027 if (!filter_check_discard(call, entry, tr->buffer, event))
1028 ring_buffer_unlock_commit(tr->buffer, event);
1003#endif 1029#endif
1004} 1030}
1005 1031
@@ -1024,6 +1050,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1024 unsigned long flags, int pc) 1050 unsigned long flags, int pc)
1025{ 1051{
1026#ifdef CONFIG_STACKTRACE 1052#ifdef CONFIG_STACKTRACE
1053 struct ftrace_event_call *call = &event_user_stack;
1027 struct ring_buffer_event *event; 1054 struct ring_buffer_event *event;
1028 struct userstack_entry *entry; 1055 struct userstack_entry *entry;
1029 struct stack_trace trace; 1056 struct stack_trace trace;
@@ -1045,7 +1072,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
1045 trace.entries = entry->caller; 1072 trace.entries = entry->caller;
1046 1073
1047 save_stack_trace_user(&trace); 1074 save_stack_trace_user(&trace);
1048 ring_buffer_unlock_commit(tr->buffer, event); 1075 if (!filter_check_discard(call, entry, tr->buffer, event))
1076 ring_buffer_unlock_commit(tr->buffer, event);
1049#endif 1077#endif
1050} 1078}
1051 1079
@@ -1089,6 +1117,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
1089 struct task_struct *next, 1117 struct task_struct *next,
1090 unsigned long flags, int pc) 1118 unsigned long flags, int pc)
1091{ 1119{
1120 struct ftrace_event_call *call = &event_context_switch;
1092 struct ring_buffer_event *event; 1121 struct ring_buffer_event *event;
1093 struct ctx_switch_entry *entry; 1122 struct ctx_switch_entry *entry;
1094 1123
@@ -1104,7 +1133,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
1104 entry->next_prio = next->prio; 1133 entry->next_prio = next->prio;
1105 entry->next_state = next->state; 1134 entry->next_state = next->state;
1106 entry->next_cpu = task_cpu(next); 1135 entry->next_cpu = task_cpu(next);
1107 trace_buffer_unlock_commit(tr, event, flags, pc); 1136
1137 if (!filter_check_discard(call, entry, tr->buffer, event))
1138 trace_buffer_unlock_commit(tr, event, flags, pc);
1108} 1139}
1109 1140
1110void 1141void
@@ -1113,6 +1144,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1113 struct task_struct *curr, 1144 struct task_struct *curr,
1114 unsigned long flags, int pc) 1145 unsigned long flags, int pc)
1115{ 1146{
1147 struct ftrace_event_call *call = &event_wakeup;
1116 struct ring_buffer_event *event; 1148 struct ring_buffer_event *event;
1117 struct ctx_switch_entry *entry; 1149 struct ctx_switch_entry *entry;
1118 1150
@@ -1129,7 +1161,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
1129 entry->next_state = wakee->state; 1161 entry->next_state = wakee->state;
1130 entry->next_cpu = task_cpu(wakee); 1162 entry->next_cpu = task_cpu(wakee);
1131 1163
1132 ring_buffer_unlock_commit(tr->buffer, event); 1164 if (!filter_check_discard(call, entry, tr->buffer, event))
1165 ring_buffer_unlock_commit(tr->buffer, event);
1133 ftrace_trace_stack(tr, flags, 6, pc); 1166 ftrace_trace_stack(tr, flags, 6, pc);
1134 ftrace_trace_userstack(tr, flags, pc); 1167 ftrace_trace_userstack(tr, flags, pc);
1135} 1168}
@@ -1230,11 +1263,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1230 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 1263 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
1231 static u32 trace_buf[TRACE_BUF_SIZE]; 1264 static u32 trace_buf[TRACE_BUF_SIZE];
1232 1265
1266 struct ftrace_event_call *call = &event_bprint;
1233 struct ring_buffer_event *event; 1267 struct ring_buffer_event *event;
1234 struct trace_array *tr = &global_trace; 1268 struct trace_array *tr = &global_trace;
1235 struct trace_array_cpu *data; 1269 struct trace_array_cpu *data;
1236 struct bprint_entry *entry; 1270 struct bprint_entry *entry;
1237 unsigned long flags; 1271 unsigned long flags;
1272 int disable;
1238 int resched; 1273 int resched;
1239 int cpu, len = 0, size, pc; 1274 int cpu, len = 0, size, pc;
1240 1275
@@ -1249,7 +1284,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1249 cpu = raw_smp_processor_id(); 1284 cpu = raw_smp_processor_id();
1250 data = tr->data[cpu]; 1285 data = tr->data[cpu];
1251 1286
1252 if (unlikely(atomic_read(&data->disabled))) 1287 disable = atomic_inc_return(&data->disabled);
1288 if (unlikely(disable != 1))
1253 goto out; 1289 goto out;
1254 1290
1255 /* Lockdep uses trace_printk for lock tracing */ 1291 /* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1305,15 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1269 entry->fmt = fmt; 1305 entry->fmt = fmt;
1270 1306
1271 memcpy(entry->buf, trace_buf, sizeof(u32) * len); 1307 memcpy(entry->buf, trace_buf, sizeof(u32) * len);
1272 ring_buffer_unlock_commit(tr->buffer, event); 1308 if (!filter_check_discard(call, entry, tr->buffer, event))
1309 ring_buffer_unlock_commit(tr->buffer, event);
1273 1310
1274out_unlock: 1311out_unlock:
1275 __raw_spin_unlock(&trace_buf_lock); 1312 __raw_spin_unlock(&trace_buf_lock);
1276 local_irq_restore(flags); 1313 local_irq_restore(flags);
1277 1314
1278out: 1315out:
1316 atomic_dec_return(&data->disabled);
1279 ftrace_preempt_enable(resched); 1317 ftrace_preempt_enable(resched);
1280 unpause_graph_tracing(); 1318 unpause_graph_tracing();
1281 1319
@@ -1288,12 +1326,14 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1288 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; 1326 static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
1289 static char trace_buf[TRACE_BUF_SIZE]; 1327 static char trace_buf[TRACE_BUF_SIZE];
1290 1328
1329 struct ftrace_event_call *call = &event_print;
1291 struct ring_buffer_event *event; 1330 struct ring_buffer_event *event;
1292 struct trace_array *tr = &global_trace; 1331 struct trace_array *tr = &global_trace;
1293 struct trace_array_cpu *data; 1332 struct trace_array_cpu *data;
1294 int cpu, len = 0, size, pc; 1333 int cpu, len = 0, size, pc;
1295 struct print_entry *entry; 1334 struct print_entry *entry;
1296 unsigned long irq_flags; 1335 unsigned long irq_flags;
1336 int disable;
1297 1337
1298 if (tracing_disabled || tracing_selftest_running) 1338 if (tracing_disabled || tracing_selftest_running)
1299 return 0; 1339 return 0;
@@ -1303,7 +1343,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1303 cpu = raw_smp_processor_id(); 1343 cpu = raw_smp_processor_id();
1304 data = tr->data[cpu]; 1344 data = tr->data[cpu];
1305 1345
1306 if (unlikely(atomic_read(&data->disabled))) 1346 disable = atomic_inc_return(&data->disabled);
1347 if (unlikely(disable != 1))
1307 goto out; 1348 goto out;
1308 1349
1309 pause_graph_tracing(); 1350 pause_graph_tracing();
@@ -1323,13 +1364,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
1323 1364
1324 memcpy(&entry->buf, trace_buf, len); 1365 memcpy(&entry->buf, trace_buf, len);
1325 entry->buf[len] = 0; 1366 entry->buf[len] = 0;
1326 ring_buffer_unlock_commit(tr->buffer, event); 1367 if (!filter_check_discard(call, entry, tr->buffer, event))
1368 ring_buffer_unlock_commit(tr->buffer, event);
1327 1369
1328 out_unlock: 1370 out_unlock:
1329 __raw_spin_unlock(&trace_buf_lock); 1371 __raw_spin_unlock(&trace_buf_lock);
1330 raw_local_irq_restore(irq_flags); 1372 raw_local_irq_restore(irq_flags);
1331 unpause_graph_tracing(); 1373 unpause_graph_tracing();
1332 out: 1374 out:
1375 atomic_dec_return(&data->disabled);
1333 preempt_enable_notrace(); 1376 preempt_enable_notrace();
1334 1377
1335 return len; 1378 return len;
@@ -2397,6 +2440,56 @@ static const struct file_operations tracing_readme_fops = {
2397}; 2440};
2398 2441
2399static ssize_t 2442static ssize_t
2443tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
2444 size_t cnt, loff_t *ppos)
2445{
2446 char *buf_comm;
2447 char *file_buf;
2448 char *buf;
2449 int len = 0;
2450 int pid;
2451 int i;
2452
2453 file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
2454 if (!file_buf)
2455 return -ENOMEM;
2456
2457 buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
2458 if (!buf_comm) {
2459 kfree(file_buf);
2460 return -ENOMEM;
2461 }
2462
2463 buf = file_buf;
2464
2465 for (i = 0; i < SAVED_CMDLINES; i++) {
2466 int r;
2467
2468 pid = map_cmdline_to_pid[i];
2469 if (pid == -1 || pid == NO_CMDLINE_MAP)
2470 continue;
2471
2472 trace_find_cmdline(pid, buf_comm);
2473 r = sprintf(buf, "%d %s\n", pid, buf_comm);
2474 buf += r;
2475 len += r;
2476 }
2477
2478 len = simple_read_from_buffer(ubuf, cnt, ppos,
2479 file_buf, len);
2480
2481 kfree(file_buf);
2482 kfree(buf_comm);
2483
2484 return len;
2485}
2486
2487static const struct file_operations tracing_saved_cmdlines_fops = {
2488 .open = tracing_open_generic,
2489 .read = tracing_saved_cmdlines_read,
2490};
2491
2492static ssize_t
2400tracing_ctrl_read(struct file *filp, char __user *ubuf, 2493tracing_ctrl_read(struct file *filp, char __user *ubuf,
2401 size_t cnt, loff_t *ppos) 2494 size_t cnt, loff_t *ppos)
2402{ 2495{
@@ -3425,7 +3518,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3425 .spd_release = buffer_spd_release, 3518 .spd_release = buffer_spd_release,
3426 }; 3519 };
3427 struct buffer_ref *ref; 3520 struct buffer_ref *ref;
3428 int size, i; 3521 int entries, size, i;
3429 size_t ret; 3522 size_t ret;
3430 3523
3431 if (*ppos & (PAGE_SIZE - 1)) { 3524 if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3533,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3440 len &= PAGE_MASK; 3533 len &= PAGE_MASK;
3441 } 3534 }
3442 3535
3443 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { 3536 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3537
3538 for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
3444 struct page *page; 3539 struct page *page;
3445 int r; 3540 int r;
3446 3541
@@ -3457,7 +3552,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3457 } 3552 }
3458 3553
3459 r = ring_buffer_read_page(ref->buffer, &ref->page, 3554 r = ring_buffer_read_page(ref->buffer, &ref->page,
3460 len, info->cpu, 0); 3555 len, info->cpu, 1);
3461 if (r < 0) { 3556 if (r < 0) {
3462 ring_buffer_free_read_page(ref->buffer, 3557 ring_buffer_free_read_page(ref->buffer,
3463 ref->page); 3558 ref->page);
@@ -3481,6 +3576,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3481 spd.partial[i].private = (unsigned long)ref; 3576 spd.partial[i].private = (unsigned long)ref;
3482 spd.nr_pages++; 3577 spd.nr_pages++;
3483 *ppos += PAGE_SIZE; 3578 *ppos += PAGE_SIZE;
3579
3580 entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
3484 } 3581 }
3485 3582
3486 spd.nr_pages = i; 3583 spd.nr_pages = i;
@@ -3508,6 +3605,45 @@ static const struct file_operations tracing_buffers_fops = {
3508 .llseek = no_llseek, 3605 .llseek = no_llseek,
3509}; 3606};
3510 3607
3608static ssize_t
3609tracing_stats_read(struct file *filp, char __user *ubuf,
3610 size_t count, loff_t *ppos)
3611{
3612 unsigned long cpu = (unsigned long)filp->private_data;
3613 struct trace_array *tr = &global_trace;
3614 struct trace_seq *s;
3615 unsigned long cnt;
3616
3617 s = kmalloc(sizeof(*s), GFP_ATOMIC);
3618 if (!s)
3619 return ENOMEM;
3620
3621 trace_seq_init(s);
3622
3623 cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
3624 trace_seq_printf(s, "entries: %ld\n", cnt);
3625
3626 cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
3627 trace_seq_printf(s, "overrun: %ld\n", cnt);
3628
3629 cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
3630 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
3631
3632 cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
3633 trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
3634
3635 count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
3636
3637 kfree(s);
3638
3639 return count;
3640}
3641
3642static const struct file_operations tracing_stats_fops = {
3643 .open = tracing_open_generic,
3644 .read = tracing_stats_read,
3645};
3646
3511#ifdef CONFIG_DYNAMIC_FTRACE 3647#ifdef CONFIG_DYNAMIC_FTRACE
3512 3648
3513int __weak ftrace_arch_read_dyn_info(char *buf, int size) 3649int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3733,7 @@ struct dentry *tracing_dentry_percpu(void)
3597static void tracing_init_debugfs_percpu(long cpu) 3733static void tracing_init_debugfs_percpu(long cpu)
3598{ 3734{
3599 struct dentry *d_percpu = tracing_dentry_percpu(); 3735 struct dentry *d_percpu = tracing_dentry_percpu();
3600 struct dentry *entry, *d_cpu; 3736 struct dentry *d_cpu;
3601 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 3737 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3602 char cpu_dir[7]; 3738 char cpu_dir[7];
3603 3739
@@ -3612,21 +3748,18 @@ static void tracing_init_debugfs_percpu(long cpu)
3612 } 3748 }
3613 3749
3614 /* per cpu trace_pipe */ 3750 /* per cpu trace_pipe */
3615 entry = debugfs_create_file("trace_pipe", 0444, d_cpu, 3751 trace_create_file("trace_pipe", 0444, d_cpu,
3616 (void *) cpu, &tracing_pipe_fops); 3752 (void *) cpu, &tracing_pipe_fops);
3617 if (!entry)
3618 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3619 3753
3620 /* per cpu trace */ 3754 /* per cpu trace */
3621 entry = debugfs_create_file("trace", 0644, d_cpu, 3755 trace_create_file("trace", 0644, d_cpu,
3622 (void *) cpu, &tracing_fops); 3756 (void *) cpu, &tracing_fops);
3623 if (!entry)
3624 pr_warning("Could not create debugfs 'trace' entry\n");
3625 3757
3626 entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, 3758 trace_create_file("trace_pipe_raw", 0444, d_cpu,
3627 (void *) cpu, &tracing_buffers_fops); 3759 (void *) cpu, &tracing_buffers_fops);
3628 if (!entry) 3760
3629 pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); 3761 trace_create_file("stats", 0444, d_cpu,
3762 (void *) cpu, &tracing_stats_fops);
3630} 3763}
3631 3764
3632#ifdef CONFIG_FTRACE_SELFTEST 3765#ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3915,22 @@ static const struct file_operations trace_options_core_fops = {
3782 .write = trace_options_core_write, 3915 .write = trace_options_core_write,
3783}; 3916};
3784 3917
3918struct dentry *trace_create_file(const char *name,
3919 mode_t mode,
3920 struct dentry *parent,
3921 void *data,
3922 const struct file_operations *fops)
3923{
3924 struct dentry *ret;
3925
3926 ret = debugfs_create_file(name, mode, parent, data, fops);
3927 if (!ret)
3928 pr_warning("Could not create debugfs '%s' entry\n", name);
3929
3930 return ret;
3931}
3932
3933
3785static struct dentry *trace_options_init_dentry(void) 3934static struct dentry *trace_options_init_dentry(void)
3786{ 3935{
3787 struct dentry *d_tracer; 3936 struct dentry *d_tracer;
@@ -3809,7 +3958,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
3809 struct tracer_opt *opt) 3958 struct tracer_opt *opt)
3810{ 3959{
3811 struct dentry *t_options; 3960 struct dentry *t_options;
3812 struct dentry *entry;
3813 3961
3814 t_options = trace_options_init_dentry(); 3962 t_options = trace_options_init_dentry();
3815 if (!t_options) 3963 if (!t_options)
@@ -3818,11 +3966,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
3818 topt->flags = flags; 3966 topt->flags = flags;
3819 topt->opt = opt; 3967 topt->opt = opt;
3820 3968
3821 entry = debugfs_create_file(opt->name, 0644, t_options, topt, 3969 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
3822 &trace_options_fops); 3970 &trace_options_fops);
3823 3971
3824 topt->entry = entry;
3825
3826} 3972}
3827 3973
3828static struct trace_option_dentry * 3974static struct trace_option_dentry *
@@ -3877,123 +4023,84 @@ static struct dentry *
3877create_trace_option_core_file(const char *option, long index) 4023create_trace_option_core_file(const char *option, long index)
3878{ 4024{
3879 struct dentry *t_options; 4025 struct dentry *t_options;
3880 struct dentry *entry;
3881 4026
3882 t_options = trace_options_init_dentry(); 4027 t_options = trace_options_init_dentry();
3883 if (!t_options) 4028 if (!t_options)
3884 return NULL; 4029 return NULL;
3885 4030
3886 entry = debugfs_create_file(option, 0644, t_options, (void *)index, 4031 return trace_create_file(option, 0644, t_options, (void *)index,
3887 &trace_options_core_fops); 4032 &trace_options_core_fops);
3888
3889 return entry;
3890} 4033}
3891 4034
3892static __init void create_trace_options_dir(void) 4035static __init void create_trace_options_dir(void)
3893{ 4036{
3894 struct dentry *t_options; 4037 struct dentry *t_options;
3895 struct dentry *entry;
3896 int i; 4038 int i;
3897 4039
3898 t_options = trace_options_init_dentry(); 4040 t_options = trace_options_init_dentry();
3899 if (!t_options) 4041 if (!t_options)
3900 return; 4042 return;
3901 4043
3902 for (i = 0; trace_options[i]; i++) { 4044 for (i = 0; trace_options[i]; i++)
3903 entry = create_trace_option_core_file(trace_options[i], i); 4045 create_trace_option_core_file(trace_options[i], i);
3904 if (!entry)
3905 pr_warning("Could not create debugfs %s entry\n",
3906 trace_options[i]);
3907 }
3908} 4046}
3909 4047
3910static __init int tracer_init_debugfs(void) 4048static __init int tracer_init_debugfs(void)
3911{ 4049{
3912 struct dentry *d_tracer; 4050 struct dentry *d_tracer;
3913 struct dentry *entry;
3914 int cpu; 4051 int cpu;
3915 4052
3916 d_tracer = tracing_init_dentry(); 4053 d_tracer = tracing_init_dentry();
3917 4054
3918 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, 4055 trace_create_file("tracing_enabled", 0644, d_tracer,
3919 &global_trace, &tracing_ctrl_fops); 4056 &global_trace, &tracing_ctrl_fops);
3920 if (!entry)
3921 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
3922 4057
3923 entry = debugfs_create_file("trace_options", 0644, d_tracer, 4058 trace_create_file("trace_options", 0644, d_tracer,
3924 NULL, &tracing_iter_fops); 4059 NULL, &tracing_iter_fops);
3925 if (!entry)
3926 pr_warning("Could not create debugfs 'trace_options' entry\n");
3927 4060
3928 create_trace_options_dir(); 4061 trace_create_file("tracing_cpumask", 0644, d_tracer,
4062 NULL, &tracing_cpumask_fops);
4063
4064 trace_create_file("trace", 0644, d_tracer,
4065 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
4066
4067 trace_create_file("available_tracers", 0444, d_tracer,
4068 &global_trace, &show_traces_fops);
3929 4069
3930 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 4070 trace_create_file("current_tracer", 0644, d_tracer,
3931 NULL, &tracing_cpumask_fops); 4071 &global_trace, &set_tracer_fops);
3932 if (!entry) 4072
3933 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); 4073 trace_create_file("tracing_max_latency", 0644, d_tracer,
3934 4074 &tracing_max_latency, &tracing_max_lat_fops);
3935 entry = debugfs_create_file("trace", 0644, d_tracer, 4075
3936 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); 4076 trace_create_file("tracing_thresh", 0644, d_tracer,
3937 if (!entry) 4077 &tracing_thresh, &tracing_max_lat_fops);
3938 pr_warning("Could not create debugfs 'trace' entry\n"); 4078
3939 4079 trace_create_file("README", 0444, d_tracer,
3940 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 4080 NULL, &tracing_readme_fops);
3941 &global_trace, &show_traces_fops); 4081
3942 if (!entry) 4082 trace_create_file("trace_pipe", 0444, d_tracer,
3943 pr_warning("Could not create debugfs 'available_tracers' entry\n");
3944
3945 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
3946 &global_trace, &set_tracer_fops);
3947 if (!entry)
3948 pr_warning("Could not create debugfs 'current_tracer' entry\n");
3949
3950 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
3951 &tracing_max_latency,
3952 &tracing_max_lat_fops);
3953 if (!entry)
3954 pr_warning("Could not create debugfs "
3955 "'tracing_max_latency' entry\n");
3956
3957 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
3958 &tracing_thresh, &tracing_max_lat_fops);
3959 if (!entry)
3960 pr_warning("Could not create debugfs "
3961 "'tracing_thresh' entry\n");
3962 entry = debugfs_create_file("README", 0644, d_tracer,
3963 NULL, &tracing_readme_fops);
3964 if (!entry)
3965 pr_warning("Could not create debugfs 'README' entry\n");
3966
3967 entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3968 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); 4083 (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3969 if (!entry) 4084
3970 pr_warning("Could not create debugfs " 4085 trace_create_file("buffer_size_kb", 0644, d_tracer,
3971 "'trace_pipe' entry\n"); 4086 &global_trace, &tracing_entries_fops);
3972 4087
3973 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, 4088 trace_create_file("trace_marker", 0220, d_tracer,
3974 &global_trace, &tracing_entries_fops); 4089 NULL, &tracing_mark_fops);
3975 if (!entry) 4090
3976 pr_warning("Could not create debugfs " 4091 trace_create_file("saved_cmdlines", 0444, d_tracer,
3977 "'buffer_size_kb' entry\n"); 4092 NULL, &tracing_saved_cmdlines_fops);
3978
3979 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3980 NULL, &tracing_mark_fops);
3981 if (!entry)
3982 pr_warning("Could not create debugfs "
3983 "'trace_marker' entry\n");
3984 4093
3985#ifdef CONFIG_DYNAMIC_FTRACE 4094#ifdef CONFIG_DYNAMIC_FTRACE
3986 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 4095 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3987 &ftrace_update_tot_cnt, 4096 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
3988 &tracing_dyn_info_fops);
3989 if (!entry)
3990 pr_warning("Could not create debugfs "
3991 "'dyn_ftrace_total_info' entry\n");
3992#endif 4097#endif
3993#ifdef CONFIG_SYSPROF_TRACER 4098#ifdef CONFIG_SYSPROF_TRACER
3994 init_tracer_sysprof_debugfs(d_tracer); 4099 init_tracer_sysprof_debugfs(d_tracer);
3995#endif 4100#endif
3996 4101
4102 create_trace_options_dir();
4103
3997 for_each_tracing_cpu(cpu) 4104 for_each_tracing_cpu(cpu)
3998 tracing_init_debugfs_percpu(cpu); 4105 tracing_init_debugfs_percpu(cpu);
3999 4106
@@ -4064,7 +4171,8 @@ trace_printk_seq(struct trace_seq *s)
4064 4171
4065static void __ftrace_dump(bool disable_tracing) 4172static void __ftrace_dump(bool disable_tracing)
4066{ 4173{
4067 static DEFINE_SPINLOCK(ftrace_dump_lock); 4174 static raw_spinlock_t ftrace_dump_lock =
4175 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
4068 /* use static because iter can be a bit big for the stack */ 4176 /* use static because iter can be a bit big for the stack */
4069 static struct trace_iterator iter; 4177 static struct trace_iterator iter;
4070 unsigned int old_userobj; 4178 unsigned int old_userobj;
@@ -4073,7 +4181,8 @@ static void __ftrace_dump(bool disable_tracing)
4073 int cnt = 0, cpu; 4181 int cnt = 0, cpu;
4074 4182
4075 /* only one dump */ 4183 /* only one dump */
4076 spin_lock_irqsave(&ftrace_dump_lock, flags); 4184 local_irq_save(flags);
4185 __raw_spin_lock(&ftrace_dump_lock);
4077 if (dump_ran) 4186 if (dump_ran)
4078 goto out; 4187 goto out;
4079 4188
@@ -4145,7 +4254,8 @@ static void __ftrace_dump(bool disable_tracing)
4145 } 4254 }
4146 4255
4147 out: 4256 out:
4148 spin_unlock_irqrestore(&ftrace_dump_lock, flags); 4257 __raw_spin_unlock(&ftrace_dump_lock);
4258 local_irq_restore(flags);
4149} 4259}
4150 4260
4151/* By default: disable tracing after the dump */ 4261/* By default: disable tracing after the dump */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e685ac2b2ba1..ba25793ffe67 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,9 +9,12 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h> 11#include <trace/boot.h>
12#include <trace/kmemtrace.h> 12#include <linux/kmemtrace.h>
13#include <trace/power.h> 13#include <trace/power.h>
14 14
15#include <linux/trace_seq.h>
16#include <linux/ftrace_event.h>
17
15enum trace_type { 18enum trace_type {
16 __TRACE_FIRST_TYPE = 0, 19 __TRACE_FIRST_TYPE = 0,
17 20
@@ -42,20 +45,6 @@ enum trace_type {
42}; 45};
43 46
44/* 47/*
45 * The trace entry - the most basic unit of tracing. This is what
46 * is printed in the end as a single line in the trace output, such as:
47 *
48 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
49 */
50struct trace_entry {
51 unsigned char type;
52 unsigned char flags;
53 unsigned char preempt_count;
54 int pid;
55 int tgid;
56};
57
58/*
59 * Function trace entry - function address and parent function addres: 48 * Function trace entry - function address and parent function addres:
60 */ 49 */
61struct ftrace_entry { 50struct ftrace_entry {
@@ -263,8 +252,6 @@ struct trace_array_cpu {
263 char comm[TASK_COMM_LEN]; 252 char comm[TASK_COMM_LEN];
264}; 253};
265 254
266struct trace_iterator;
267
268/* 255/*
269 * The trace array - an array of per-CPU trace arrays. This is the 256 * The trace array - an array of per-CPU trace arrays. This is the
270 * highest level data structure that individual tracers deal with. 257 * highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@ extern void __ftrace_bad_type(void);
339 __ftrace_bad_type(); \ 326 __ftrace_bad_type(); \
340 } while (0) 327 } while (0)
341 328
342/* Return values for print_line callback */
343enum print_line_t {
344 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
345 TRACE_TYPE_HANDLED = 1,
346 TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */
347 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
348};
349
350
351/* 329/*
352 * An option specific to a tracer. This is a boolean value. 330 * An option specific to a tracer. This is a boolean value.
353 * The bit is the bit index that sets its value on the 331 * The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@ struct tracer {
423 struct tracer_stat *stats; 401 struct tracer_stat *stats;
424}; 402};
425 403
426struct trace_seq {
427 unsigned char buffer[PAGE_SIZE];
428 unsigned int len;
429 unsigned int readpos;
430};
431
432static inline void
433trace_seq_init(struct trace_seq *s)
434{
435 s->len = 0;
436 s->readpos = 0;
437}
438
439 404
440#define TRACE_PIPE_ALL_CPU -1 405#define TRACE_PIPE_ALL_CPU -1
441 406
442/*
443 * Trace iterator - used by printout routines who present trace
444 * results to users and which routines might sleep, etc:
445 */
446struct trace_iterator {
447 struct trace_array *tr;
448 struct tracer *trace;
449 void *private;
450 int cpu_file;
451 struct mutex mutex;
452 struct ring_buffer_iter *buffer_iter[NR_CPUS];
453
454 /* The below is zeroed out in pipe_read */
455 struct trace_seq seq;
456 struct trace_entry *ent;
457 int cpu;
458 u64 ts;
459
460 unsigned long iter_flags;
461 loff_t pos;
462 long idx;
463
464 cpumask_var_t started;
465};
466
467int tracer_init(struct tracer *t, struct trace_array *tr); 407int tracer_init(struct tracer *t, struct trace_array *tr);
468int tracing_is_enabled(void); 408int tracing_is_enabled(void);
469void trace_wake_up(void); 409void trace_wake_up(void);
470void tracing_reset(struct trace_array *tr, int cpu); 410void tracing_reset(struct trace_array *tr, int cpu);
471void tracing_reset_online_cpus(struct trace_array *tr); 411void tracing_reset_online_cpus(struct trace_array *tr);
412void tracing_reset_current(int cpu);
413void tracing_reset_current_online_cpus(void);
472int tracing_open_generic(struct inode *inode, struct file *filp); 414int tracing_open_generic(struct inode *inode, struct file *filp);
415struct dentry *trace_create_file(const char *name,
416 mode_t mode,
417 struct dentry *parent,
418 void *data,
419 const struct file_operations *fops);
420
473struct dentry *tracing_init_dentry(void); 421struct dentry *tracing_init_dentry(void);
474void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 422void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
475 423
476struct ring_buffer_event; 424struct ring_buffer_event;
477 425
478struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 426struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
479 unsigned char type, 427 int type,
480 unsigned long len, 428 unsigned long len,
481 unsigned long flags, 429 unsigned long flags,
482 int pc); 430 int pc);
@@ -484,14 +432,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
484 struct ring_buffer_event *event, 432 struct ring_buffer_event *event,
485 unsigned long flags, int pc); 433 unsigned long flags, int pc);
486 434
487struct ring_buffer_event *
488trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
489 unsigned long flags, int pc);
490void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
491 unsigned long flags, int pc);
492void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
493 unsigned long flags, int pc);
494
495struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, 435struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
496 struct trace_array_cpu *data); 436 struct trace_array_cpu *data);
497 437
@@ -514,7 +454,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
514 struct task_struct *prev, 454 struct task_struct *prev,
515 struct task_struct *next, 455 struct task_struct *next,
516 unsigned long flags, int pc); 456 unsigned long flags, int pc);
517void tracing_record_cmdline(struct task_struct *tsk);
518 457
519void tracing_sched_wakeup_trace(struct trace_array *tr, 458void tracing_sched_wakeup_trace(struct trace_array *tr,
520 struct task_struct *wakee, 459 struct task_struct *wakee,
@@ -613,6 +552,8 @@ extern unsigned long trace_flags;
613/* Standard output formatting function used for function return traces */ 552/* Standard output formatting function used for function return traces */
614#ifdef CONFIG_FUNCTION_GRAPH_TRACER 553#ifdef CONFIG_FUNCTION_GRAPH_TRACER
615extern enum print_line_t print_graph_function(struct trace_iterator *iter); 554extern enum print_line_t print_graph_function(struct trace_iterator *iter);
555extern enum print_line_t
556trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
616 557
617#ifdef CONFIG_DYNAMIC_FTRACE 558#ifdef CONFIG_DYNAMIC_FTRACE
618/* TODO: make this variable */ 559/* TODO: make this variable */
@@ -644,7 +585,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
644 return 1; 585 return 1;
645} 586}
646#endif /* CONFIG_DYNAMIC_FTRACE */ 587#endif /* CONFIG_DYNAMIC_FTRACE */
647
648#else /* CONFIG_FUNCTION_GRAPH_TRACER */ 588#else /* CONFIG_FUNCTION_GRAPH_TRACER */
649static inline enum print_line_t 589static inline enum print_line_t
650print_graph_function(struct trace_iterator *iter) 590print_graph_function(struct trace_iterator *iter)
@@ -692,6 +632,7 @@ enum trace_iterator_flags {
692 TRACE_ITER_LATENCY_FMT = 0x40000, 632 TRACE_ITER_LATENCY_FMT = 0x40000,
693 TRACE_ITER_GLOBAL_CLK = 0x80000, 633 TRACE_ITER_GLOBAL_CLK = 0x80000,
694 TRACE_ITER_SLEEP_TIME = 0x100000, 634 TRACE_ITER_SLEEP_TIME = 0x100000,
635 TRACE_ITER_GRAPH_TIME = 0x200000,
695}; 636};
696 637
697/* 638/*
@@ -790,103 +731,113 @@ struct ftrace_event_field {
790 char *type; 731 char *type;
791 int offset; 732 int offset;
792 int size; 733 int size;
734 int is_signed;
793}; 735};
794 736
795struct ftrace_event_call { 737struct event_filter {
796 char *name; 738 int n_preds;
797 char *system;
798 struct dentry *dir;
799 int enabled;
800 int (*regfunc)(void);
801 void (*unregfunc)(void);
802 int id;
803 int (*raw_init)(void);
804 int (*show_format)(struct trace_seq *s);
805 int (*define_fields)(void);
806 struct list_head fields;
807 struct filter_pred **preds; 739 struct filter_pred **preds;
808 740 char *filter_string;
809#ifdef CONFIG_EVENT_PROFILE
810 atomic_t profile_count;
811 int (*profile_enable)(struct ftrace_event_call *);
812 void (*profile_disable)(struct ftrace_event_call *);
813#endif
814}; 741};
815 742
816struct event_subsystem { 743struct event_subsystem {
817 struct list_head list; 744 struct list_head list;
818 const char *name; 745 const char *name;
819 struct dentry *entry; 746 struct dentry *entry;
820 struct filter_pred **preds; 747 void *filter;
821}; 748};
822 749
823#define events_for_each(event) \
824 for (event = __start_ftrace_events; \
825 (unsigned long)event < (unsigned long)__stop_ftrace_events; \
826 event++)
827
828#define MAX_FILTER_PRED 8
829
830struct filter_pred; 750struct filter_pred;
831 751
832typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); 752typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
753 int val1, int val2);
833 754
834struct filter_pred { 755struct filter_pred {
835 filter_pred_fn_t fn; 756 filter_pred_fn_t fn;
836 u64 val; 757 u64 val;
837 char *str_val; 758 char str_val[MAX_FILTER_STR_VAL];
838 int str_len; 759 int str_len;
839 char *field_name; 760 char *field_name;
840 int offset; 761 int offset;
841 int not; 762 int not;
842 int or; 763 int op;
843 int compound; 764 int pop_n;
844 int clear;
845}; 765};
846 766
847int trace_define_field(struct ftrace_event_call *call, char *type, 767extern void print_event_filter(struct ftrace_event_call *call,
848 char *name, int offset, int size);
849extern void filter_free_pred(struct filter_pred *pred);
850extern void filter_print_preds(struct filter_pred **preds,
851 struct trace_seq *s); 768 struct trace_seq *s);
852extern int filter_parse(char **pbuf, struct filter_pred *pred); 769extern int apply_event_filter(struct ftrace_event_call *call,
853extern int filter_add_pred(struct ftrace_event_call *call, 770 char *filter_string);
854 struct filter_pred *pred); 771extern int apply_subsystem_event_filter(struct event_subsystem *system,
855extern void filter_free_preds(struct ftrace_event_call *call); 772 char *filter_string);
856extern int filter_match_preds(struct ftrace_event_call *call, void *rec); 773extern void print_subsystem_event_filter(struct event_subsystem *system,
857extern void filter_free_subsystem_preds(struct event_subsystem *system); 774 struct trace_seq *s);
858extern int filter_add_subsystem_pred(struct event_subsystem *system, 775
859 struct filter_pred *pred); 776static inline int
860 777filter_check_discard(struct ftrace_event_call *call, void *rec,
861void event_trace_printk(unsigned long ip, const char *fmt, ...); 778 struct ring_buffer *buffer,
862extern struct ftrace_event_call __start_ftrace_events[]; 779 struct ring_buffer_event *event)
863extern struct ftrace_event_call __stop_ftrace_events[]; 780{
864 781 if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
865#define for_each_event(event) \ 782 ring_buffer_discard_commit(buffer, event);
866 for (event = __start_ftrace_events; \ 783 return 1;
867 (unsigned long)event < (unsigned long)__stop_ftrace_events; \ 784 }
868 event++) 785
786 return 0;
787}
788
789#define DEFINE_COMPARISON_PRED(type) \
790static int filter_pred_##type(struct filter_pred *pred, void *event, \
791 int val1, int val2) \
792{ \
793 type *addr = (type *)(event + pred->offset); \
794 type val = (type)pred->val; \
795 int match = 0; \
796 \
797 switch (pred->op) { \
798 case OP_LT: \
799 match = (*addr < val); \
800 break; \
801 case OP_LE: \
802 match = (*addr <= val); \
803 break; \
804 case OP_GT: \
805 match = (*addr > val); \
806 break; \
807 case OP_GE: \
808 match = (*addr >= val); \
809 break; \
810 default: \
811 break; \
812 } \
813 \
814 return match; \
815}
816
817#define DEFINE_EQUALITY_PRED(size) \
818static int filter_pred_##size(struct filter_pred *pred, void *event, \
819 int val1, int val2) \
820{ \
821 u##size *addr = (u##size *)(event + pred->offset); \
822 u##size val = (u##size)pred->val; \
823 int match; \
824 \
825 match = (val == *addr) ^ pred->not; \
826 \
827 return match; \
828}
829
830extern struct mutex event_mutex;
831extern struct list_head ftrace_events;
869 832
870extern const char *__start___trace_bprintk_fmt[]; 833extern const char *__start___trace_bprintk_fmt[];
871extern const char *__stop___trace_bprintk_fmt[]; 834extern const char *__stop___trace_bprintk_fmt[];
872 835
873/* 836#undef TRACE_EVENT_FORMAT
874 * The double __builtin_constant_p is because gcc will give us an error 837#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
875 * if we try to allocate the static variable to fmt if it is not a 838 extern struct ftrace_event_call event_##call;
876 * constant. Even with the outer if statement optimizing out. 839#undef TRACE_EVENT_FORMAT_NOFILTER
877 */ 840#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
878#define event_trace_printk(ip, fmt, args...) \ 841#include "trace_event_types.h"
879do { \
880 __trace_printk_check_format(fmt, ##args); \
881 tracing_record_cmdline(current); \
882 if (__builtin_constant_p(fmt)) { \
883 static const char *trace_printk_fmt \
884 __attribute__((section("__trace_printk_fmt"))) = \
885 __builtin_constant_p(fmt) ? fmt : NULL; \
886 \
887 __trace_bprintk(ip, trace_printk_fmt, ##args); \
888 } else \
889 __trace_printk(ip, fmt, ##args); \
890} while (0)
891 842
892#endif /* _LINUX_KERNEL_TRACE_H */ 843#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 7a30fc4c3642..a29ef23ffb47 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/kallsyms.h> 11#include <linux/kallsyms.h>
12#include <linux/time.h>
12 13
13#include "trace.h" 14#include "trace.h"
14#include "trace_output.h" 15#include "trace_output.h"
@@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter)
67 trace_assign_type(field, entry); 68 trace_assign_type(field, entry);
68 call = &field->boot_call; 69 call = &field->boot_call;
69 ts = iter->ts; 70 ts = iter->ts;
70 nsec_rem = do_div(ts, 1000000000); 71 nsec_rem = do_div(ts, NSEC_PER_SEC);
71 72
72 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 73 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
73 (unsigned long)ts, nsec_rem, call->func, call->caller); 74 (unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter)
92 trace_assign_type(field, entry); 93 trace_assign_type(field, entry);
93 init_ret = &field->boot_ret; 94 init_ret = &field->boot_ret;
94 ts = iter->ts; 95 ts = iter->ts;
95 nsec_rem = do_div(ts, 1000000000); 96 nsec_rem = do_div(ts, NSEC_PER_SEC);
96 97
97 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 98 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
98 "returned %d after %llu msecs\n", 99 "returned %d after %llu msecs\n",
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 8333715e4066..7a7a9fd249a9 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
30static void 30static void
31probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) 31probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
32{ 32{
33 struct ftrace_event_call *call = &event_branch;
33 struct trace_array *tr = branch_tracer; 34 struct trace_array *tr = branch_tracer;
34 struct ring_buffer_event *event; 35 struct ring_buffer_event *event;
35 struct trace_branch *entry; 36 struct trace_branch *entry;
@@ -73,7 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
73 entry->line = f->line; 74 entry->line = f->line;
74 entry->correct = val == expect; 75 entry->correct = val == expect;
75 76
76 ring_buffer_unlock_commit(tr->buffer, event); 77 if (!filter_check_discard(call, entry, tr->buffer, event))
78 ring_buffer_unlock_commit(tr->buffer, event);
77 79
78 out: 80 out:
79 atomic_dec(&tr->data[cpu]->disabled); 81 atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v)
271 return 0; 273 return 0;
272} 274}
273 275
274static void *annotated_branch_stat_start(void) 276static void *annotated_branch_stat_start(struct tracer_stat *trace)
275{ 277{
276 return __start_annotated_branch_profile; 278 return __start_annotated_branch_profile;
277} 279}
@@ -346,7 +348,7 @@ static int all_branch_stat_headers(struct seq_file *m)
346 return 0; 348 return 0;
347} 349}
348 350
349static void *all_branch_stat_start(void) 351static void *all_branch_stat_start(struct tracer_stat *trace)
350{ 352{
351 return __start_branch_profile; 353 return __start_branch_profile;
352} 354}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 22cba9970776..5b5895afecfe 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -10,22 +10,30 @@
10int ftrace_profile_enable(int event_id) 10int ftrace_profile_enable(int event_id)
11{ 11{
12 struct ftrace_event_call *event; 12 struct ftrace_event_call *event;
13 int ret = -EINVAL;
13 14
14 for_each_event(event) { 15 mutex_lock(&event_mutex);
15 if (event->id == event_id) 16 list_for_each_entry(event, &ftrace_events, list) {
16 return event->profile_enable(event); 17 if (event->id == event_id) {
18 ret = event->profile_enable(event);
19 break;
20 }
17 } 21 }
22 mutex_unlock(&event_mutex);
18 23
19 return -EINVAL; 24 return ret;
20} 25}
21 26
22void ftrace_profile_disable(int event_id) 27void ftrace_profile_disable(int event_id)
23{ 28{
24 struct ftrace_event_call *event; 29 struct ftrace_event_call *event;
25 30
26 for_each_event(event) { 31 mutex_lock(&event_mutex);
27 if (event->id == event_id) 32 list_for_each_entry(event, &ftrace_events, list) {
28 return event->profile_disable(event); 33 if (event->id == event_id) {
34 event->profile_disable(event);
35 break;
36 }
29 } 37 }
38 mutex_unlock(&event_mutex);
30} 39}
31
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index fd78bee71dd7..5e32e375134d 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") 57 TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]")
58); 58);
59 59
60TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, 60TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
61 TRACE_STRUCT( 61 TRACE_STRUCT(
62 TRACE_FIELD(unsigned long, arg1, arg1) 62 TRACE_FIELD(unsigned long, arg1, arg1)
63 TRACE_FIELD(unsigned long, arg2, arg2) 63 TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
122TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, 122TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
123 TRACE_STRUCT( 123 TRACE_STRUCT(
124 TRACE_FIELD(unsigned int, line, line) 124 TRACE_FIELD(unsigned int, line, line)
125 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) 125 TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
126 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) 126 TRACE_FUNC_SIZE+1, func)
127 TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
128 TRACE_FUNC_SIZE+1, file)
127 TRACE_FIELD(char, correct, correct) 129 TRACE_FIELD(char, correct, correct)
128 ), 130 ),
129 TP_RAW_FMT("%u:%s:%s (%u)") 131 TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
139 141
140TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, 142TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
141 TRACE_STRUCT( 143 TRACE_STRUCT(
142 TRACE_FIELD(ktime_t, state_data.stamp, stamp) 144 TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
143 TRACE_FIELD(ktime_t, state_data.end, end) 145 TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
144 TRACE_FIELD(int, state_data.type, type) 146 TRACE_FIELD(int, state_data.type, type)
145 TRACE_FIELD(int, state_data.state, state) 147 TRACE_FIELD(int, state_data.state, state)
146 ), 148 ),
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 576f4fa2af0d..87feb0117ce2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -8,19 +8,25 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/workqueue.h>
12#include <linux/spinlock.h>
13#include <linux/kthread.h>
11#include <linux/debugfs.h> 14#include <linux/debugfs.h>
12#include <linux/uaccess.h> 15#include <linux/uaccess.h>
13#include <linux/module.h> 16#include <linux/module.h>
14#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/delay.h>
15 19
16#include "trace_output.h" 20#include "trace_output.h"
17 21
18#define TRACE_SYSTEM "TRACE_SYSTEM" 22#define TRACE_SYSTEM "TRACE_SYSTEM"
19 23
20static DEFINE_MUTEX(event_mutex); 24DEFINE_MUTEX(event_mutex);
25
26LIST_HEAD(ftrace_events);
21 27
22int trace_define_field(struct ftrace_event_call *call, char *type, 28int trace_define_field(struct ftrace_event_call *call, char *type,
23 char *name, int offset, int size) 29 char *name, int offset, int size, int is_signed)
24{ 30{
25 struct ftrace_event_field *field; 31 struct ftrace_event_field *field;
26 32
@@ -38,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
38 44
39 field->offset = offset; 45 field->offset = offset;
40 field->size = size; 46 field->size = size;
47 field->is_signed = is_signed;
41 list_add(&field->link, &call->fields); 48 list_add(&field->link, &call->fields);
42 49
43 return 0; 50 return 0;
@@ -51,20 +58,37 @@ err:
51 58
52 return -ENOMEM; 59 return -ENOMEM;
53} 60}
61EXPORT_SYMBOL_GPL(trace_define_field);
54 62
55static void ftrace_clear_events(void) 63#ifdef CONFIG_MODULES
64
65static void trace_destroy_fields(struct ftrace_event_call *call)
56{ 66{
57 struct ftrace_event_call *call = (void *)__start_ftrace_events; 67 struct ftrace_event_field *field, *next;
58 68
69 list_for_each_entry_safe(field, next, &call->fields, link) {
70 list_del(&field->link);
71 kfree(field->type);
72 kfree(field->name);
73 kfree(field);
74 }
75}
76
77#endif /* CONFIG_MODULES */
78
79static void ftrace_clear_events(void)
80{
81 struct ftrace_event_call *call;
59 82
60 while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { 83 mutex_lock(&event_mutex);
84 list_for_each_entry(call, &ftrace_events, list) {
61 85
62 if (call->enabled) { 86 if (call->enabled) {
63 call->enabled = 0; 87 call->enabled = 0;
64 call->unregfunc(); 88 call->unregfunc();
65 } 89 }
66 call++;
67 } 90 }
91 mutex_unlock(&event_mutex);
68} 92}
69 93
70static void ftrace_event_enable_disable(struct ftrace_event_call *call, 94static void ftrace_event_enable_disable(struct ftrace_event_call *call,
@@ -89,7 +113,7 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
89 113
90static int ftrace_set_clr_event(char *buf, int set) 114static int ftrace_set_clr_event(char *buf, int set)
91{ 115{
92 struct ftrace_event_call *call = __start_ftrace_events; 116 struct ftrace_event_call *call;
93 char *event = NULL, *sub = NULL, *match; 117 char *event = NULL, *sub = NULL, *match;
94 int ret = -EINVAL; 118 int ret = -EINVAL;
95 119
@@ -118,7 +142,7 @@ static int ftrace_set_clr_event(char *buf, int set)
118 } 142 }
119 143
120 mutex_lock(&event_mutex); 144 mutex_lock(&event_mutex);
121 for_each_event(call) { 145 list_for_each_entry(call, &ftrace_events, list) {
122 146
123 if (!call->name || !call->regfunc) 147 if (!call->name || !call->regfunc)
124 continue; 148 continue;
@@ -224,15 +248,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
224static void * 248static void *
225t_next(struct seq_file *m, void *v, loff_t *pos) 249t_next(struct seq_file *m, void *v, loff_t *pos)
226{ 250{
227 struct ftrace_event_call *call = m->private; 251 struct list_head *list = m->private;
228 struct ftrace_event_call *next = call; 252 struct ftrace_event_call *call;
229 253
230 (*pos)++; 254 (*pos)++;
231 255
232 for (;;) { 256 for (;;) {
233 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) 257 if (list == &ftrace_events)
234 return NULL; 258 return NULL;
235 259
260 call = list_entry(list, struct ftrace_event_call, list);
261
236 /* 262 /*
237 * The ftrace subsystem is for showing formats only. 263 * The ftrace subsystem is for showing formats only.
238 * They can not be enabled or disabled via the event files. 264 * They can not be enabled or disabled via the event files.
@@ -240,45 +266,51 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
240 if (call->regfunc) 266 if (call->regfunc)
241 break; 267 break;
242 268
243 call++; 269 list = list->next;
244 next = call;
245 } 270 }
246 271
247 m->private = ++next; 272 m->private = list->next;
248 273
249 return call; 274 return call;
250} 275}
251 276
252static void *t_start(struct seq_file *m, loff_t *pos) 277static void *t_start(struct seq_file *m, loff_t *pos)
253{ 278{
279 mutex_lock(&event_mutex);
280 if (*pos == 0)
281 m->private = ftrace_events.next;
254 return t_next(m, NULL, pos); 282 return t_next(m, NULL, pos);
255} 283}
256 284
257static void * 285static void *
258s_next(struct seq_file *m, void *v, loff_t *pos) 286s_next(struct seq_file *m, void *v, loff_t *pos)
259{ 287{
260 struct ftrace_event_call *call = m->private; 288 struct list_head *list = m->private;
261 struct ftrace_event_call *next; 289 struct ftrace_event_call *call;
262 290
263 (*pos)++; 291 (*pos)++;
264 292
265 retry: 293 retry:
266 if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) 294 if (list == &ftrace_events)
267 return NULL; 295 return NULL;
268 296
297 call = list_entry(list, struct ftrace_event_call, list);
298
269 if (!call->enabled) { 299 if (!call->enabled) {
270 call++; 300 list = list->next;
271 goto retry; 301 goto retry;
272 } 302 }
273 303
274 next = call; 304 m->private = list->next;
275 m->private = ++next;
276 305
277 return call; 306 return call;
278} 307}
279 308
280static void *s_start(struct seq_file *m, loff_t *pos) 309static void *s_start(struct seq_file *m, loff_t *pos)
281{ 310{
311 mutex_lock(&event_mutex);
312 if (*pos == 0)
313 m->private = ftrace_events.next;
282 return s_next(m, NULL, pos); 314 return s_next(m, NULL, pos);
283} 315}
284 316
@@ -295,12 +327,12 @@ static int t_show(struct seq_file *m, void *v)
295 327
296static void t_stop(struct seq_file *m, void *p) 328static void t_stop(struct seq_file *m, void *p)
297{ 329{
330 mutex_unlock(&event_mutex);
298} 331}
299 332
300static int 333static int
301ftrace_event_seq_open(struct inode *inode, struct file *file) 334ftrace_event_seq_open(struct inode *inode, struct file *file)
302{ 335{
303 int ret;
304 const struct seq_operations *seq_ops; 336 const struct seq_operations *seq_ops;
305 337
306 if ((file->f_mode & FMODE_WRITE) && 338 if ((file->f_mode & FMODE_WRITE) &&
@@ -308,13 +340,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
308 ftrace_clear_events(); 340 ftrace_clear_events();
309 341
310 seq_ops = inode->i_private; 342 seq_ops = inode->i_private;
311 ret = seq_open(file, seq_ops); 343 return seq_open(file, seq_ops);
312 if (!ret) {
313 struct seq_file *m = file->private_data;
314
315 m->private = __start_ftrace_events;
316 }
317 return ret;
318} 344}
319 345
320static ssize_t 346static ssize_t
@@ -374,8 +400,138 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
374 return cnt; 400 return cnt;
375} 401}
376 402
403static ssize_t
404system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
405 loff_t *ppos)
406{
407 const char *system = filp->private_data;
408 struct ftrace_event_call *call;
409 char buf[2];
410 int set = -1;
411 int all = 0;
412 int ret;
413
414 if (system[0] == '*')
415 all = 1;
416
417 mutex_lock(&event_mutex);
418 list_for_each_entry(call, &ftrace_events, list) {
419 if (!call->name || !call->regfunc)
420 continue;
421
422 if (!all && strcmp(call->system, system) != 0)
423 continue;
424
425 /*
426 * We need to find out if all the events are set
427 * or if all events or cleared, or if we have
428 * a mixture.
429 */
430 if (call->enabled) {
431 switch (set) {
432 case -1:
433 set = 1;
434 break;
435 case 0:
436 set = 2;
437 break;
438 }
439 } else {
440 switch (set) {
441 case -1:
442 set = 0;
443 break;
444 case 1:
445 set = 2;
446 break;
447 }
448 }
449 /*
450 * If we have a mixture, no need to look further.
451 */
452 if (set == 2)
453 break;
454 }
455 mutex_unlock(&event_mutex);
456
457 buf[1] = '\n';
458 switch (set) {
459 case 0:
460 buf[0] = '0';
461 break;
462 case 1:
463 buf[0] = '1';
464 break;
465 case 2:
466 buf[0] = 'X';
467 break;
468 default:
469 buf[0] = '?';
470 }
471
472 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
473
474 return ret;
475}
476
477static ssize_t
478system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
479 loff_t *ppos)
480{
481 const char *system = filp->private_data;
482 unsigned long val;
483 char *command;
484 char buf[64];
485 ssize_t ret;
486
487 if (cnt >= sizeof(buf))
488 return -EINVAL;
489
490 if (copy_from_user(&buf, ubuf, cnt))
491 return -EFAULT;
492
493 buf[cnt] = 0;
494
495 ret = strict_strtoul(buf, 10, &val);
496 if (ret < 0)
497 return ret;
498
499 ret = tracing_update_buffers();
500 if (ret < 0)
501 return ret;
502
503 switch (val) {
504 case 0:
505 case 1:
506 break;
507
508 default:
509 return -EINVAL;
510 }
511
512 command = kstrdup(system, GFP_KERNEL);
513 if (!command)
514 return -ENOMEM;
515
516 ret = ftrace_set_clr_event(command, val);
517 if (ret)
518 goto out_free;
519
520 ret = cnt;
521
522 out_free:
523 kfree(command);
524
525 *ppos += cnt;
526
527 return ret;
528}
529
530extern char *__bad_type_size(void);
531
377#undef FIELD 532#undef FIELD
378#define FIELD(type, name) \ 533#define FIELD(type, name) \
534 sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
379 #type, "common_" #name, offsetof(typeof(field), name), \ 535 #type, "common_" #name, offsetof(typeof(field), name), \
380 sizeof(field.name) 536 sizeof(field.name)
381 537
@@ -391,7 +547,7 @@ static int trace_write_header(struct trace_seq *s)
391 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 547 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
392 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" 548 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
393 "\n", 549 "\n",
394 FIELD(unsigned char, type), 550 FIELD(unsigned short, type),
395 FIELD(unsigned char, flags), 551 FIELD(unsigned char, flags),
396 FIELD(unsigned char, preempt_count), 552 FIELD(unsigned char, preempt_count),
397 FIELD(int, pid), 553 FIELD(int, pid),
@@ -481,7 +637,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
481 637
482 trace_seq_init(s); 638 trace_seq_init(s);
483 639
484 filter_print_preds(call->preds, s); 640 print_event_filter(call, s);
485 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); 641 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
486 642
487 kfree(s); 643 kfree(s);
@@ -494,38 +650,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
494 loff_t *ppos) 650 loff_t *ppos)
495{ 651{
496 struct ftrace_event_call *call = filp->private_data; 652 struct ftrace_event_call *call = filp->private_data;
497 char buf[64], *pbuf = buf; 653 char *buf;
498 struct filter_pred *pred;
499 int err; 654 int err;
500 655
501 if (cnt >= sizeof(buf)) 656 if (cnt >= PAGE_SIZE)
502 return -EINVAL; 657 return -EINVAL;
503 658
504 if (copy_from_user(&buf, ubuf, cnt)) 659 buf = (char *)__get_free_page(GFP_TEMPORARY);
505 return -EFAULT; 660 if (!buf)
506 buf[cnt] = '\0';
507
508 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
509 if (!pred)
510 return -ENOMEM; 661 return -ENOMEM;
511 662
512 err = filter_parse(&pbuf, pred); 663 if (copy_from_user(buf, ubuf, cnt)) {
513 if (err < 0) { 664 free_page((unsigned long) buf);
514 filter_free_pred(pred); 665 return -EFAULT;
515 return err;
516 }
517
518 if (pred->clear) {
519 filter_free_preds(call);
520 filter_free_pred(pred);
521 return cnt;
522 } 666 }
667 buf[cnt] = '\0';
523 668
524 err = filter_add_pred(call, pred); 669 err = apply_event_filter(call, buf);
525 if (err < 0) { 670 free_page((unsigned long) buf);
526 filter_free_pred(pred); 671 if (err < 0)
527 return err; 672 return err;
528 }
529 673
530 *ppos += cnt; 674 *ppos += cnt;
531 675
@@ -549,7 +693,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
549 693
550 trace_seq_init(s); 694 trace_seq_init(s);
551 695
552 filter_print_preds(system->preds, s); 696 print_subsystem_event_filter(system, s);
553 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); 697 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
554 698
555 kfree(s); 699 kfree(s);
@@ -562,45 +706,56 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
562 loff_t *ppos) 706 loff_t *ppos)
563{ 707{
564 struct event_subsystem *system = filp->private_data; 708 struct event_subsystem *system = filp->private_data;
565 char buf[64], *pbuf = buf; 709 char *buf;
566 struct filter_pred *pred;
567 int err; 710 int err;
568 711
569 if (cnt >= sizeof(buf)) 712 if (cnt >= PAGE_SIZE)
570 return -EINVAL; 713 return -EINVAL;
571 714
572 if (copy_from_user(&buf, ubuf, cnt)) 715 buf = (char *)__get_free_page(GFP_TEMPORARY);
573 return -EFAULT; 716 if (!buf)
574 buf[cnt] = '\0';
575
576 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
577 if (!pred)
578 return -ENOMEM; 717 return -ENOMEM;
579 718
580 err = filter_parse(&pbuf, pred); 719 if (copy_from_user(buf, ubuf, cnt)) {
581 if (err < 0) { 720 free_page((unsigned long) buf);
582 filter_free_pred(pred); 721 return -EFAULT;
583 return err;
584 }
585
586 if (pred->clear) {
587 filter_free_subsystem_preds(system);
588 filter_free_pred(pred);
589 return cnt;
590 } 722 }
723 buf[cnt] = '\0';
591 724
592 err = filter_add_subsystem_pred(system, pred); 725 err = apply_subsystem_event_filter(system, buf);
593 if (err < 0) { 726 free_page((unsigned long) buf);
594 filter_free_subsystem_preds(system); 727 if (err < 0)
595 filter_free_pred(pred);
596 return err; 728 return err;
597 }
598 729
599 *ppos += cnt; 730 *ppos += cnt;
600 731
601 return cnt; 732 return cnt;
602} 733}
603 734
735static ssize_t
736show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
737{
738 int (*func)(struct trace_seq *s) = filp->private_data;
739 struct trace_seq *s;
740 int r;
741
742 if (*ppos)
743 return 0;
744
745 s = kmalloc(sizeof(*s), GFP_KERNEL);
746 if (!s)
747 return -ENOMEM;
748
749 trace_seq_init(s);
750
751 func(s);
752 r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
753
754 kfree(s);
755
756 return r;
757}
758
604static const struct seq_operations show_event_seq_ops = { 759static const struct seq_operations show_event_seq_ops = {
605 .start = t_start, 760 .start = t_start,
606 .next = t_next, 761 .next = t_next,
@@ -658,6 +813,17 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
658 .write = subsystem_filter_write, 813 .write = subsystem_filter_write,
659}; 814};
660 815
816static const struct file_operations ftrace_system_enable_fops = {
817 .open = tracing_open_generic,
818 .read = system_enable_read,
819 .write = system_enable_write,
820};
821
822static const struct file_operations ftrace_show_header_fops = {
823 .open = tracing_open_generic,
824 .read = show_header,
825};
826
661static struct dentry *event_trace_events_dir(void) 827static struct dentry *event_trace_events_dir(void)
662{ 828{
663 static struct dentry *d_tracer; 829 static struct dentry *d_tracer;
@@ -684,6 +850,7 @@ static struct dentry *
684event_subsystem_dir(const char *name, struct dentry *d_events) 850event_subsystem_dir(const char *name, struct dentry *d_events)
685{ 851{
686 struct event_subsystem *system; 852 struct event_subsystem *system;
853 struct dentry *entry;
687 854
688 /* First see if we did not already create this dir */ 855 /* First see if we did not already create this dir */
689 list_for_each_entry(system, &event_subsystems, list) { 856 list_for_each_entry(system, &event_subsystems, list) {
@@ -707,16 +874,46 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
707 return d_events; 874 return d_events;
708 } 875 }
709 876
710 system->name = name; 877 system->name = kstrdup(name, GFP_KERNEL);
878 if (!system->name) {
879 debugfs_remove(system->entry);
880 kfree(system);
881 return d_events;
882 }
883
711 list_add(&system->list, &event_subsystems); 884 list_add(&system->list, &event_subsystems);
712 885
713 system->preds = NULL; 886 system->filter = NULL;
887
888 system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
889 if (!system->filter) {
890 pr_warning("Could not allocate filter for subsystem "
891 "'%s'\n", name);
892 return system->entry;
893 }
894
895 entry = debugfs_create_file("filter", 0644, system->entry, system,
896 &ftrace_subsystem_filter_fops);
897 if (!entry) {
898 kfree(system->filter);
899 system->filter = NULL;
900 pr_warning("Could not create debugfs "
901 "'%s/filter' entry\n", name);
902 }
903
904 entry = trace_create_file("enable", 0644, system->entry,
905 (void *)system->name,
906 &ftrace_system_enable_fops);
714 907
715 return system->entry; 908 return system->entry;
716} 909}
717 910
718static int 911static int
719event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) 912event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
913 const struct file_operations *id,
914 const struct file_operations *enable,
915 const struct file_operations *filter,
916 const struct file_operations *format)
720{ 917{
721 struct dentry *entry; 918 struct dentry *entry;
722 int ret; 919 int ret;
@@ -725,7 +922,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
725 * If the trace point header did not define TRACE_SYSTEM 922 * If the trace point header did not define TRACE_SYSTEM
726 * then the system would be called "TRACE_SYSTEM". 923 * then the system would be called "TRACE_SYSTEM".
727 */ 924 */
728 if (strcmp(call->system, "TRACE_SYSTEM") != 0) 925 if (strcmp(call->system, TRACE_SYSTEM) != 0)
729 d_events = event_subsystem_dir(call->system, d_events); 926 d_events = event_subsystem_dir(call->system, d_events);
730 927
731 if (call->raw_init) { 928 if (call->raw_init) {
@@ -744,21 +941,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
744 return -1; 941 return -1;
745 } 942 }
746 943
747 if (call->regfunc) { 944 if (call->regfunc)
748 entry = debugfs_create_file("enable", 0644, call->dir, call, 945 entry = trace_create_file("enable", 0644, call->dir, call,
749 &ftrace_enable_fops); 946 enable);
750 if (!entry)
751 pr_warning("Could not create debugfs "
752 "'%s/enable' entry\n", call->name);
753 }
754 947
755 if (call->id) { 948 if (call->id)
756 entry = debugfs_create_file("id", 0444, call->dir, call, 949 entry = trace_create_file("id", 0444, call->dir, call,
757 &ftrace_event_id_fops); 950 id);
758 if (!entry)
759 pr_warning("Could not create debugfs '%s/id' entry\n",
760 call->name);
761 }
762 951
763 if (call->define_fields) { 952 if (call->define_fields) {
764 ret = call->define_fields(); 953 ret = call->define_fields();
@@ -767,32 +956,196 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
767 " events/%s\n", call->name); 956 " events/%s\n", call->name);
768 return ret; 957 return ret;
769 } 958 }
770 entry = debugfs_create_file("filter", 0644, call->dir, call, 959 entry = trace_create_file("filter", 0644, call->dir, call,
771 &ftrace_event_filter_fops); 960 filter);
772 if (!entry)
773 pr_warning("Could not create debugfs "
774 "'%s/filter' entry\n", call->name);
775 } 961 }
776 962
777 /* A trace may not want to export its format */ 963 /* A trace may not want to export its format */
778 if (!call->show_format) 964 if (!call->show_format)
779 return 0; 965 return 0;
780 966
781 entry = debugfs_create_file("format", 0444, call->dir, call, 967 entry = trace_create_file("format", 0444, call->dir, call,
782 &ftrace_event_format_fops); 968 format);
783 if (!entry) 969
784 pr_warning("Could not create debugfs " 970 return 0;
785 "'%s/format' entry\n", call->name); 971}
972
973#define for_each_event(event, start, end) \
974 for (event = start; \
975 (unsigned long)event < (unsigned long)end; \
976 event++)
977
978#ifdef CONFIG_MODULES
979
980static LIST_HEAD(ftrace_module_file_list);
981
982/*
983 * Modules must own their file_operations to keep up with
984 * reference counting.
985 */
986struct ftrace_module_file_ops {
987 struct list_head list;
988 struct module *mod;
989 struct file_operations id;
990 struct file_operations enable;
991 struct file_operations format;
992 struct file_operations filter;
993};
994
995static struct ftrace_module_file_ops *
996trace_create_file_ops(struct module *mod)
997{
998 struct ftrace_module_file_ops *file_ops;
999
1000 /*
1001 * This is a bit of a PITA. To allow for correct reference
1002 * counting, modules must "own" their file_operations.
1003 * To do this, we allocate the file operations that will be
1004 * used in the event directory.
1005 */
1006
1007 file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1008 if (!file_ops)
1009 return NULL;
1010
1011 file_ops->mod = mod;
1012
1013 file_ops->id = ftrace_event_id_fops;
1014 file_ops->id.owner = mod;
1015
1016 file_ops->enable = ftrace_enable_fops;
1017 file_ops->enable.owner = mod;
1018
1019 file_ops->filter = ftrace_event_filter_fops;
1020 file_ops->filter.owner = mod;
1021
1022 file_ops->format = ftrace_event_format_fops;
1023 file_ops->format.owner = mod;
1024
1025 list_add(&file_ops->list, &ftrace_module_file_list);
1026
1027 return file_ops;
1028}
1029
1030static void trace_module_add_events(struct module *mod)
1031{
1032 struct ftrace_module_file_ops *file_ops = NULL;
1033 struct ftrace_event_call *call, *start, *end;
1034 struct dentry *d_events;
1035
1036 start = mod->trace_events;
1037 end = mod->trace_events + mod->num_trace_events;
1038
1039 if (start == end)
1040 return;
1041
1042 d_events = event_trace_events_dir();
1043 if (!d_events)
1044 return;
1045
1046 for_each_event(call, start, end) {
1047 /* The linker may leave blanks */
1048 if (!call->name)
1049 continue;
1050
1051 /*
1052 * This module has events, create file ops for this module
1053 * if not already done.
1054 */
1055 if (!file_ops) {
1056 file_ops = trace_create_file_ops(mod);
1057 if (!file_ops)
1058 return;
1059 }
1060 call->mod = mod;
1061 list_add(&call->list, &ftrace_events);
1062 event_create_dir(call, d_events,
1063 &file_ops->id, &file_ops->enable,
1064 &file_ops->filter, &file_ops->format);
1065 }
1066}
1067
1068static void trace_module_remove_events(struct module *mod)
1069{
1070 struct ftrace_module_file_ops *file_ops;
1071 struct ftrace_event_call *call, *p;
1072 bool found = false;
1073
1074 list_for_each_entry_safe(call, p, &ftrace_events, list) {
1075 if (call->mod == mod) {
1076 found = true;
1077 if (call->enabled) {
1078 call->enabled = 0;
1079 call->unregfunc();
1080 }
1081 if (call->event)
1082 unregister_ftrace_event(call->event);
1083 debugfs_remove_recursive(call->dir);
1084 list_del(&call->list);
1085 trace_destroy_fields(call);
1086 destroy_preds(call);
1087 }
1088 }
1089
1090 /* Now free the file_operations */
1091 list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1092 if (file_ops->mod == mod)
1093 break;
1094 }
1095 if (&file_ops->list != &ftrace_module_file_list) {
1096 list_del(&file_ops->list);
1097 kfree(file_ops);
1098 }
1099
1100 /*
1101 * It is safest to reset the ring buffer if the module being unloaded
1102 * registered any events.
1103 */
1104 if (found)
1105 tracing_reset_current_online_cpus();
1106}
1107
1108static int trace_module_notify(struct notifier_block *self,
1109 unsigned long val, void *data)
1110{
1111 struct module *mod = data;
786 1112
1113 mutex_lock(&event_mutex);
1114 switch (val) {
1115 case MODULE_STATE_COMING:
1116 trace_module_add_events(mod);
1117 break;
1118 case MODULE_STATE_GOING:
1119 trace_module_remove_events(mod);
1120 break;
1121 }
1122 mutex_unlock(&event_mutex);
1123
1124 return 0;
1125}
1126#else
1127static int trace_module_notify(struct notifier_block *self,
1128 unsigned long val, void *data)
1129{
787 return 0; 1130 return 0;
788} 1131}
1132#endif /* CONFIG_MODULES */
1133
1134struct notifier_block trace_module_nb = {
1135 .notifier_call = trace_module_notify,
1136 .priority = 0,
1137};
1138
1139extern struct ftrace_event_call __start_ftrace_events[];
1140extern struct ftrace_event_call __stop_ftrace_events[];
789 1141
790static __init int event_trace_init(void) 1142static __init int event_trace_init(void)
791{ 1143{
792 struct ftrace_event_call *call = __start_ftrace_events; 1144 struct ftrace_event_call *call;
793 struct dentry *d_tracer; 1145 struct dentry *d_tracer;
794 struct dentry *entry; 1146 struct dentry *entry;
795 struct dentry *d_events; 1147 struct dentry *d_events;
1148 int ret;
796 1149
797 d_tracer = tracing_init_dentry(); 1150 d_tracer = tracing_init_dentry();
798 if (!d_tracer) 1151 if (!d_tracer)
@@ -816,13 +1169,272 @@ static __init int event_trace_init(void)
816 if (!d_events) 1169 if (!d_events)
817 return 0; 1170 return 0;
818 1171
819 for_each_event(call) { 1172 /* ring buffer internal formats */
1173 trace_create_file("header_page", 0444, d_events,
1174 ring_buffer_print_page_header,
1175 &ftrace_show_header_fops);
1176
1177 trace_create_file("header_event", 0444, d_events,
1178 ring_buffer_print_entry_header,
1179 &ftrace_show_header_fops);
1180
1181 trace_create_file("enable", 0644, d_events,
1182 "*:*", &ftrace_system_enable_fops);
1183
1184 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
820 /* The linker may leave blanks */ 1185 /* The linker may leave blanks */
821 if (!call->name) 1186 if (!call->name)
822 continue; 1187 continue;
823 event_create_dir(call, d_events); 1188 list_add(&call->list, &ftrace_events);
1189 event_create_dir(call, d_events, &ftrace_event_id_fops,
1190 &ftrace_enable_fops, &ftrace_event_filter_fops,
1191 &ftrace_event_format_fops);
824 } 1192 }
825 1193
1194 ret = register_module_notifier(&trace_module_nb);
1195 if (!ret)
1196 pr_warning("Failed to register trace events module notifier\n");
1197
826 return 0; 1198 return 0;
827} 1199}
828fs_initcall(event_trace_init); 1200fs_initcall(event_trace_init);
1201
1202#ifdef CONFIG_FTRACE_STARTUP_TEST
1203
1204static DEFINE_SPINLOCK(test_spinlock);
1205static DEFINE_SPINLOCK(test_spinlock_irq);
1206static DEFINE_MUTEX(test_mutex);
1207
1208static __init void test_work(struct work_struct *dummy)
1209{
1210 spin_lock(&test_spinlock);
1211 spin_lock_irq(&test_spinlock_irq);
1212 udelay(1);
1213 spin_unlock_irq(&test_spinlock_irq);
1214 spin_unlock(&test_spinlock);
1215
1216 mutex_lock(&test_mutex);
1217 msleep(1);
1218 mutex_unlock(&test_mutex);
1219}
1220
1221static __init int event_test_thread(void *unused)
1222{
1223 void *test_malloc;
1224
1225 test_malloc = kmalloc(1234, GFP_KERNEL);
1226 if (!test_malloc)
1227 pr_info("failed to kmalloc\n");
1228
1229 schedule_on_each_cpu(test_work);
1230
1231 kfree(test_malloc);
1232
1233 set_current_state(TASK_INTERRUPTIBLE);
1234 while (!kthread_should_stop())
1235 schedule();
1236
1237 return 0;
1238}
1239
1240/*
1241 * Do various things that may trigger events.
1242 */
1243static __init void event_test_stuff(void)
1244{
1245 struct task_struct *test_thread;
1246
1247 test_thread = kthread_run(event_test_thread, NULL, "test-events");
1248 msleep(1);
1249 kthread_stop(test_thread);
1250}
1251
1252/*
1253 * For every trace event defined, we will test each trace point separately,
1254 * and then by groups, and finally all trace points.
1255 */
1256static __init void event_trace_self_tests(void)
1257{
1258 struct ftrace_event_call *call;
1259 struct event_subsystem *system;
1260 char *sysname;
1261 int ret;
1262
1263 pr_info("Running tests on trace events:\n");
1264
1265 list_for_each_entry(call, &ftrace_events, list) {
1266
1267 /* Only test those that have a regfunc */
1268 if (!call->regfunc)
1269 continue;
1270
1271 pr_info("Testing event %s: ", call->name);
1272
1273 /*
1274 * If an event is already enabled, someone is using
1275 * it and the self test should not be on.
1276 */
1277 if (call->enabled) {
1278 pr_warning("Enabled event during self test!\n");
1279 WARN_ON_ONCE(1);
1280 continue;
1281 }
1282
1283 call->enabled = 1;
1284 call->regfunc();
1285
1286 event_test_stuff();
1287
1288 call->unregfunc();
1289 call->enabled = 0;
1290
1291 pr_cont("OK\n");
1292 }
1293
1294 /* Now test at the sub system level */
1295
1296 pr_info("Running tests on trace event systems:\n");
1297
1298 list_for_each_entry(system, &event_subsystems, list) {
1299
1300 /* the ftrace system is special, skip it */
1301 if (strcmp(system->name, "ftrace") == 0)
1302 continue;
1303
1304 pr_info("Testing event system %s: ", system->name);
1305
1306 /* ftrace_set_clr_event can modify the name passed in. */
1307 sysname = kstrdup(system->name, GFP_KERNEL);
1308 if (WARN_ON(!sysname)) {
1309 pr_warning("Can't allocate memory, giving up!\n");
1310 return;
1311 }
1312 ret = ftrace_set_clr_event(sysname, 1);
1313 kfree(sysname);
1314 if (WARN_ON_ONCE(ret)) {
1315 pr_warning("error enabling system %s\n",
1316 system->name);
1317 continue;
1318 }
1319
1320 event_test_stuff();
1321
1322 sysname = kstrdup(system->name, GFP_KERNEL);
1323 if (WARN_ON(!sysname)) {
1324 pr_warning("Can't allocate memory, giving up!\n");
1325 return;
1326 }
1327 ret = ftrace_set_clr_event(sysname, 0);
1328 kfree(sysname);
1329
1330 if (WARN_ON_ONCE(ret))
1331 pr_warning("error disabling system %s\n",
1332 system->name);
1333
1334 pr_cont("OK\n");
1335 }
1336
1337 /* Test with all events enabled */
1338
1339 pr_info("Running tests on all trace events:\n");
1340 pr_info("Testing all events: ");
1341
1342 sysname = kmalloc(4, GFP_KERNEL);
1343 if (WARN_ON(!sysname)) {
1344 pr_warning("Can't allocate memory, giving up!\n");
1345 return;
1346 }
1347 memcpy(sysname, "*:*", 4);
1348 ret = ftrace_set_clr_event(sysname, 1);
1349 if (WARN_ON_ONCE(ret)) {
1350 kfree(sysname);
1351 pr_warning("error enabling all events\n");
1352 return;
1353 }
1354
1355 event_test_stuff();
1356
1357 /* reset sysname */
1358 memcpy(sysname, "*:*", 4);
1359 ret = ftrace_set_clr_event(sysname, 0);
1360 kfree(sysname);
1361
1362 if (WARN_ON_ONCE(ret)) {
1363 pr_warning("error disabling all events\n");
1364 return;
1365 }
1366
1367 pr_cont("OK\n");
1368}
1369
1370#ifdef CONFIG_FUNCTION_TRACER
1371
1372static DEFINE_PER_CPU(atomic_t, test_event_disable);
1373
1374static void
1375function_test_events_call(unsigned long ip, unsigned long parent_ip)
1376{
1377 struct ring_buffer_event *event;
1378 struct ftrace_entry *entry;
1379 unsigned long flags;
1380 long disabled;
1381 int resched;
1382 int cpu;
1383 int pc;
1384
1385 pc = preempt_count();
1386 resched = ftrace_preempt_disable();
1387 cpu = raw_smp_processor_id();
1388 disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
1389
1390 if (disabled != 1)
1391 goto out;
1392
1393 local_save_flags(flags);
1394
1395 event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
1396 flags, pc);
1397 if (!event)
1398 goto out;
1399 entry = ring_buffer_event_data(event);
1400 entry->ip = ip;
1401 entry->parent_ip = parent_ip;
1402
1403 trace_nowake_buffer_unlock_commit(event, flags, pc);
1404
1405 out:
1406 atomic_dec(&per_cpu(test_event_disable, cpu));
1407 ftrace_preempt_enable(resched);
1408}
1409
1410static struct ftrace_ops trace_ops __initdata =
1411{
1412 .func = function_test_events_call,
1413};
1414
1415static __init void event_trace_self_test_with_function(void)
1416{
1417 register_ftrace_function(&trace_ops);
1418 pr_info("Running tests again, along with the function tracer\n");
1419 event_trace_self_tests();
1420 unregister_ftrace_function(&trace_ops);
1421}
1422#else
1423static __init void event_trace_self_test_with_function(void)
1424{
1425}
1426#endif
1427
1428static __init int event_trace_self_tests_init(void)
1429{
1430
1431 event_trace_self_tests();
1432
1433 event_trace_self_test_with_function();
1434
1435 return 0;
1436}
1437
1438late_initcall(event_trace_self_tests_init);
1439
1440#endif
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e03cbf1e38f3..85ad6a8939ad 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,119 +22,297 @@
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/ctype.h> 24#include <linux/ctype.h>
25#include <linux/mutex.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
28 29
29static int filter_pred_64(struct filter_pred *pred, void *event) 30static DEFINE_MUTEX(filter_mutex);
31
32enum filter_op_ids
33{
34 OP_OR,
35 OP_AND,
36 OP_NE,
37 OP_EQ,
38 OP_LT,
39 OP_LE,
40 OP_GT,
41 OP_GE,
42 OP_NONE,
43 OP_OPEN_PAREN,
44};
45
46struct filter_op {
47 int id;
48 char *string;
49 int precedence;
50};
51
52static struct filter_op filter_ops[] = {
53 { OP_OR, "||", 1 },
54 { OP_AND, "&&", 2 },
55 { OP_NE, "!=", 4 },
56 { OP_EQ, "==", 4 },
57 { OP_LT, "<", 5 },
58 { OP_LE, "<=", 5 },
59 { OP_GT, ">", 5 },
60 { OP_GE, ">=", 5 },
61 { OP_NONE, "OP_NONE", 0 },
62 { OP_OPEN_PAREN, "(", 0 },
63};
64
65enum {
66 FILT_ERR_NONE,
67 FILT_ERR_INVALID_OP,
68 FILT_ERR_UNBALANCED_PAREN,
69 FILT_ERR_TOO_MANY_OPERANDS,
70 FILT_ERR_OPERAND_TOO_LONG,
71 FILT_ERR_FIELD_NOT_FOUND,
72 FILT_ERR_ILLEGAL_FIELD_OP,
73 FILT_ERR_ILLEGAL_INTVAL,
74 FILT_ERR_BAD_SUBSYS_FILTER,
75 FILT_ERR_TOO_MANY_PREDS,
76 FILT_ERR_MISSING_FIELD,
77 FILT_ERR_INVALID_FILTER,
78};
79
80static char *err_text[] = {
81 "No error",
82 "Invalid operator",
83 "Unbalanced parens",
84 "Too many operands",
85 "Operand too long",
86 "Field not found",
87 "Illegal operation for field type",
88 "Illegal integer value",
89 "Couldn't find or set field in one of a subsystem's events",
90 "Too many terms in predicate expression",
91 "Missing field name and/or value",
92 "Meaningless filter expression",
93};
94
95struct opstack_op {
96 int op;
97 struct list_head list;
98};
99
100struct postfix_elt {
101 int op;
102 char *operand;
103 struct list_head list;
104};
105
106struct filter_parse_state {
107 struct filter_op *ops;
108 struct list_head opstack;
109 struct list_head postfix;
110 int lasterr;
111 int lasterr_pos;
112
113 struct {
114 char *string;
115 unsigned int cnt;
116 unsigned int tail;
117 } infix;
118
119 struct {
120 char string[MAX_FILTER_STR_VAL];
121 int pos;
122 unsigned int tail;
123 } operand;
124};
125
126DEFINE_COMPARISON_PRED(s64);
127DEFINE_COMPARISON_PRED(u64);
128DEFINE_COMPARISON_PRED(s32);
129DEFINE_COMPARISON_PRED(u32);
130DEFINE_COMPARISON_PRED(s16);
131DEFINE_COMPARISON_PRED(u16);
132DEFINE_COMPARISON_PRED(s8);
133DEFINE_COMPARISON_PRED(u8);
134
135DEFINE_EQUALITY_PRED(64);
136DEFINE_EQUALITY_PRED(32);
137DEFINE_EQUALITY_PRED(16);
138DEFINE_EQUALITY_PRED(8);
139
140static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
141 void *event __attribute((unused)),
142 int val1, int val2)
143{
144 return val1 && val2;
145}
146
147static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
148 void *event __attribute((unused)),
149 int val1, int val2)
150{
151 return val1 || val2;
152}
153
154/* Filter predicate for fixed sized arrays of characters */
155static int filter_pred_string(struct filter_pred *pred, void *event,
156 int val1, int val2)
30{ 157{
31 u64 *addr = (u64 *)(event + pred->offset); 158 char *addr = (char *)(event + pred->offset);
32 u64 val = (u64)pred->val; 159 int cmp, match;
33 int match; 160
161 cmp = strncmp(addr, pred->str_val, pred->str_len);
34 162
35 match = (val == *addr) ^ pred->not; 163 match = (!cmp) ^ pred->not;
36 164
37 return match; 165 return match;
38} 166}
39 167
40static int filter_pred_32(struct filter_pred *pred, void *event) 168/*
169 * Filter predicate for dynamic sized arrays of characters.
170 * These are implemented through a list of strings at the end
171 * of the entry.
172 * Also each of these strings have a field in the entry which
173 * contains its offset from the beginning of the entry.
174 * We have then first to get this field, dereference it
175 * and add it to the address of the entry, and at last we have
176 * the address of the string.
177 */
178static int filter_pred_strloc(struct filter_pred *pred, void *event,
179 int val1, int val2)
41{ 180{
42 u32 *addr = (u32 *)(event + pred->offset); 181 int str_loc = *(int *)(event + pred->offset);
43 u32 val = (u32)pred->val; 182 char *addr = (char *)(event + str_loc);
44 int match; 183 int cmp, match;
184
185 cmp = strncmp(addr, pred->str_val, pred->str_len);
45 186
46 match = (val == *addr) ^ pred->not; 187 match = (!cmp) ^ pred->not;
47 188
48 return match; 189 return match;
49} 190}
50 191
51static int filter_pred_16(struct filter_pred *pred, void *event) 192static int filter_pred_none(struct filter_pred *pred, void *event,
193 int val1, int val2)
194{
195 return 0;
196}
197
198/* return 1 if event matches, 0 otherwise (discard) */
199int filter_match_preds(struct ftrace_event_call *call, void *rec)
52{ 200{
53 u16 *addr = (u16 *)(event + pred->offset); 201 struct event_filter *filter = call->filter;
54 u16 val = (u16)pred->val; 202 int match, top = 0, val1 = 0, val2 = 0;
55 int match; 203 int stack[MAX_FILTER_PRED];
204 struct filter_pred *pred;
205 int i;
206
207 for (i = 0; i < filter->n_preds; i++) {
208 pred = filter->preds[i];
209 if (!pred->pop_n) {
210 match = pred->fn(pred, rec, val1, val2);
211 stack[top++] = match;
212 continue;
213 }
214 if (pred->pop_n > top) {
215 WARN_ON_ONCE(1);
216 return 0;
217 }
218 val1 = stack[--top];
219 val2 = stack[--top];
220 match = pred->fn(pred, rec, val1, val2);
221 stack[top++] = match;
222 }
56 223
57 match = (val == *addr) ^ pred->not; 224 return stack[--top];
225}
226EXPORT_SYMBOL_GPL(filter_match_preds);
58 227
59 return match; 228static void parse_error(struct filter_parse_state *ps, int err, int pos)
229{
230 ps->lasterr = err;
231 ps->lasterr_pos = pos;
60} 232}
61 233
62static int filter_pred_8(struct filter_pred *pred, void *event) 234static void remove_filter_string(struct event_filter *filter)
63{ 235{
64 u8 *addr = (u8 *)(event + pred->offset); 236 kfree(filter->filter_string);
65 u8 val = (u8)pred->val; 237 filter->filter_string = NULL;
66 int match; 238}
67 239
68 match = (val == *addr) ^ pred->not; 240static int replace_filter_string(struct event_filter *filter,
241 char *filter_string)
242{
243 kfree(filter->filter_string);
244 filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
245 if (!filter->filter_string)
246 return -ENOMEM;
69 247
70 return match; 248 return 0;
71} 249}
72 250
73static int filter_pred_string(struct filter_pred *pred, void *event) 251static int append_filter_string(struct event_filter *filter,
252 char *string)
74{ 253{
75 char *addr = (char *)(event + pred->offset); 254 int newlen;
76 int cmp, match; 255 char *new_filter_string;
77 256
78 cmp = strncmp(addr, pred->str_val, pred->str_len); 257 BUG_ON(!filter->filter_string);
258 newlen = strlen(filter->filter_string) + strlen(string) + 1;
259 new_filter_string = kmalloc(newlen, GFP_KERNEL);
260 if (!new_filter_string)
261 return -ENOMEM;
79 262
80 match = (!cmp) ^ pred->not; 263 strcpy(new_filter_string, filter->filter_string);
264 strcat(new_filter_string, string);
265 kfree(filter->filter_string);
266 filter->filter_string = new_filter_string;
81 267
82 return match; 268 return 0;
83} 269}
84 270
85/* return 1 if event matches, 0 otherwise (discard) */ 271static void append_filter_err(struct filter_parse_state *ps,
86int filter_match_preds(struct ftrace_event_call *call, void *rec) 272 struct event_filter *filter)
87{ 273{
88 int i, matched, and_failed = 0; 274 int pos = ps->lasterr_pos;
89 struct filter_pred *pred; 275 char *buf, *pbuf;
90 276
91 for (i = 0; i < MAX_FILTER_PRED; i++) { 277 buf = (char *)__get_free_page(GFP_TEMPORARY);
92 if (call->preds[i]) { 278 if (!buf)
93 pred = call->preds[i]; 279 return;
94 if (and_failed && !pred->or)
95 continue;
96 matched = pred->fn(pred, rec);
97 if (!matched && !pred->or) {
98 and_failed = 1;
99 continue;
100 } else if (matched && pred->or)
101 return 1;
102 } else
103 break;
104 }
105 280
106 if (and_failed) 281 append_filter_string(filter, "\n");
107 return 0; 282 memset(buf, ' ', PAGE_SIZE);
283 if (pos > PAGE_SIZE - 128)
284 pos = 0;
285 buf[pos] = '^';
286 pbuf = &buf[pos] + 1;
108 287
109 return 1; 288 sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
289 append_filter_string(filter, buf);
290 free_page((unsigned long) buf);
110} 291}
111 292
112void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) 293void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
113{ 294{
114 char *field_name; 295 struct event_filter *filter = call->filter;
115 struct filter_pred *pred;
116 int i;
117 296
118 if (!preds) { 297 mutex_lock(&filter_mutex);
298 if (filter->filter_string)
299 trace_seq_printf(s, "%s\n", filter->filter_string);
300 else
119 trace_seq_printf(s, "none\n"); 301 trace_seq_printf(s, "none\n");
120 return; 302 mutex_unlock(&filter_mutex);
121 } 303}
122 304
123 for (i = 0; i < MAX_FILTER_PRED; i++) { 305void print_subsystem_event_filter(struct event_subsystem *system,
124 if (preds[i]) { 306 struct trace_seq *s)
125 pred = preds[i]; 307{
126 field_name = pred->field_name; 308 struct event_filter *filter = system->filter;
127 if (i) 309
128 trace_seq_printf(s, pred->or ? "|| " : "&& "); 310 mutex_lock(&filter_mutex);
129 trace_seq_printf(s, "%s ", field_name); 311 if (filter->filter_string)
130 trace_seq_printf(s, pred->not ? "!= " : "== "); 312 trace_seq_printf(s, "%s\n", filter->filter_string);
131 if (pred->str_val) 313 else
132 trace_seq_printf(s, "%s\n", pred->str_val); 314 trace_seq_printf(s, "none\n");
133 else 315 mutex_unlock(&filter_mutex);
134 trace_seq_printf(s, "%llu\n", pred->val);
135 } else
136 break;
137 }
138} 316}
139 317
140static struct ftrace_event_field * 318static struct ftrace_event_field *
@@ -150,284 +328,826 @@ find_event_field(struct ftrace_event_call *call, char *name)
150 return NULL; 328 return NULL;
151} 329}
152 330
153void filter_free_pred(struct filter_pred *pred) 331static void filter_free_pred(struct filter_pred *pred)
154{ 332{
155 if (!pred) 333 if (!pred)
156 return; 334 return;
157 335
158 kfree(pred->field_name); 336 kfree(pred->field_name);
159 kfree(pred->str_val);
160 kfree(pred); 337 kfree(pred);
161} 338}
162 339
163void filter_free_preds(struct ftrace_event_call *call) 340static void filter_clear_pred(struct filter_pred *pred)
164{ 341{
165 int i; 342 kfree(pred->field_name);
343 pred->field_name = NULL;
344 pred->str_len = 0;
345}
166 346
167 if (call->preds) { 347static int filter_set_pred(struct filter_pred *dest,
168 for (i = 0; i < MAX_FILTER_PRED; i++) 348 struct filter_pred *src,
169 filter_free_pred(call->preds[i]); 349 filter_pred_fn_t fn)
170 kfree(call->preds); 350{
171 call->preds = NULL; 351 *dest = *src;
352 if (src->field_name) {
353 dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
354 if (!dest->field_name)
355 return -ENOMEM;
172 } 356 }
357 dest->fn = fn;
358
359 return 0;
173} 360}
174 361
175void filter_free_subsystem_preds(struct event_subsystem *system) 362static void filter_disable_preds(struct ftrace_event_call *call)
176{ 363{
177 struct ftrace_event_call *call = __start_ftrace_events; 364 struct event_filter *filter = call->filter;
178 int i; 365 int i;
179 366
180 if (system->preds) { 367 call->filter_active = 0;
181 for (i = 0; i < MAX_FILTER_PRED; i++) 368 filter->n_preds = 0;
182 filter_free_pred(system->preds[i]);
183 kfree(system->preds);
184 system->preds = NULL;
185 }
186 369
187 events_for_each(call) { 370 for (i = 0; i < MAX_FILTER_PRED; i++)
188 if (!call->name || !call->regfunc) 371 filter->preds[i]->fn = filter_pred_none;
189 continue; 372}
373
374void destroy_preds(struct ftrace_event_call *call)
375{
376 struct event_filter *filter = call->filter;
377 int i;
190 378
191 if (!strcmp(call->system, system->name)) 379 for (i = 0; i < MAX_FILTER_PRED; i++) {
192 filter_free_preds(call); 380 if (filter->preds[i])
381 filter_free_pred(filter->preds[i]);
193 } 382 }
383 kfree(filter->preds);
384 kfree(filter);
385 call->filter = NULL;
194} 386}
195 387
196static int __filter_add_pred(struct ftrace_event_call *call, 388int init_preds(struct ftrace_event_call *call)
197 struct filter_pred *pred)
198{ 389{
390 struct event_filter *filter;
391 struct filter_pred *pred;
199 int i; 392 int i;
200 393
201 if (call->preds && !pred->compound) 394 filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
202 filter_free_preds(call); 395 if (!call->filter)
396 return -ENOMEM;
203 397
204 if (!call->preds) { 398 call->filter_active = 0;
205 call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), 399 filter->n_preds = 0;
206 GFP_KERNEL); 400
207 if (!call->preds) 401 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
208 return -ENOMEM; 402 if (!filter->preds)
209 } 403 goto oom;
210 404
211 for (i = 0; i < MAX_FILTER_PRED; i++) { 405 for (i = 0; i < MAX_FILTER_PRED; i++) {
212 if (!call->preds[i]) { 406 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
213 call->preds[i] = pred; 407 if (!pred)
214 return 0; 408 goto oom;
409 pred->fn = filter_pred_none;
410 filter->preds[i] = pred;
411 }
412
413 return 0;
414
415oom:
416 destroy_preds(call);
417
418 return -ENOMEM;
419}
420EXPORT_SYMBOL_GPL(init_preds);
421
422static void filter_free_subsystem_preds(struct event_subsystem *system)
423{
424 struct event_filter *filter = system->filter;
425 struct ftrace_event_call *call;
426 int i;
427
428 if (filter->n_preds) {
429 for (i = 0; i < filter->n_preds; i++)
430 filter_free_pred(filter->preds[i]);
431 kfree(filter->preds);
432 filter->preds = NULL;
433 filter->n_preds = 0;
434 }
435
436 mutex_lock(&event_mutex);
437 list_for_each_entry(call, &ftrace_events, list) {
438 if (!call->define_fields)
439 continue;
440
441 if (!strcmp(call->system, system->name)) {
442 filter_disable_preds(call);
443 remove_filter_string(call->filter);
215 } 444 }
216 } 445 }
446 mutex_unlock(&event_mutex);
447}
448
449static int filter_add_pred_fn(struct filter_parse_state *ps,
450 struct ftrace_event_call *call,
451 struct filter_pred *pred,
452 filter_pred_fn_t fn)
453{
454 struct event_filter *filter = call->filter;
455 int idx, err;
456
457 if (filter->n_preds == MAX_FILTER_PRED) {
458 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
459 return -ENOSPC;
460 }
461
462 idx = filter->n_preds;
463 filter_clear_pred(filter->preds[idx]);
464 err = filter_set_pred(filter->preds[idx], pred, fn);
465 if (err)
466 return err;
217 467
218 return -ENOSPC; 468 filter->n_preds++;
469 call->filter_active = 1;
470
471 return 0;
219} 472}
220 473
474enum {
475 FILTER_STATIC_STRING = 1,
476 FILTER_DYN_STRING
477};
478
221static int is_string_field(const char *type) 479static int is_string_field(const char *type)
222{ 480{
223 if (strchr(type, '[') && strstr(type, "char")) 481 if (strchr(type, '[') && strstr(type, "char"))
224 return 1; 482 return FILTER_STATIC_STRING;
483
484 if (!strcmp(type, "__str_loc"))
485 return FILTER_DYN_STRING;
225 486
226 return 0; 487 return 0;
227} 488}
228 489
229int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) 490static int is_legal_op(struct ftrace_event_field *field, int op)
491{
492 if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
493 return 0;
494
495 return 1;
496}
497
498static filter_pred_fn_t select_comparison_fn(int op, int field_size,
499 int field_is_signed)
500{
501 filter_pred_fn_t fn = NULL;
502
503 switch (field_size) {
504 case 8:
505 if (op == OP_EQ || op == OP_NE)
506 fn = filter_pred_64;
507 else if (field_is_signed)
508 fn = filter_pred_s64;
509 else
510 fn = filter_pred_u64;
511 break;
512 case 4:
513 if (op == OP_EQ || op == OP_NE)
514 fn = filter_pred_32;
515 else if (field_is_signed)
516 fn = filter_pred_s32;
517 else
518 fn = filter_pred_u32;
519 break;
520 case 2:
521 if (op == OP_EQ || op == OP_NE)
522 fn = filter_pred_16;
523 else if (field_is_signed)
524 fn = filter_pred_s16;
525 else
526 fn = filter_pred_u16;
527 break;
528 case 1:
529 if (op == OP_EQ || op == OP_NE)
530 fn = filter_pred_8;
531 else if (field_is_signed)
532 fn = filter_pred_s8;
533 else
534 fn = filter_pred_u8;
535 break;
536 }
537
538 return fn;
539}
540
541static int filter_add_pred(struct filter_parse_state *ps,
542 struct ftrace_event_call *call,
543 struct filter_pred *pred)
230{ 544{
231 struct ftrace_event_field *field; 545 struct ftrace_event_field *field;
546 filter_pred_fn_t fn;
547 unsigned long long val;
548 int string_type;
549
550 pred->fn = filter_pred_none;
551
552 if (pred->op == OP_AND) {
553 pred->pop_n = 2;
554 return filter_add_pred_fn(ps, call, pred, filter_pred_and);
555 } else if (pred->op == OP_OR) {
556 pred->pop_n = 2;
557 return filter_add_pred_fn(ps, call, pred, filter_pred_or);
558 }
232 559
233 field = find_event_field(call, pred->field_name); 560 field = find_event_field(call, pred->field_name);
234 if (!field) 561 if (!field) {
562 parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
235 return -EINVAL; 563 return -EINVAL;
564 }
236 565
237 pred->offset = field->offset; 566 pred->offset = field->offset;
238 567
239 if (is_string_field(field->type)) { 568 if (!is_legal_op(field, pred->op)) {
240 if (!pred->str_val) 569 parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
241 return -EINVAL; 570 return -EINVAL;
242 pred->fn = filter_pred_string; 571 }
572
573 string_type = is_string_field(field->type);
574 if (string_type) {
575 if (string_type == FILTER_STATIC_STRING)
576 fn = filter_pred_string;
577 else
578 fn = filter_pred_strloc;
243 pred->str_len = field->size; 579 pred->str_len = field->size;
244 return __filter_add_pred(call, pred); 580 if (pred->op == OP_NE)
581 pred->not = 1;
582 return filter_add_pred_fn(ps, call, pred, fn);
245 } else { 583 } else {
246 if (pred->str_val) 584 if (strict_strtoull(pred->str_val, 0, &val)) {
585 parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
247 return -EINVAL; 586 return -EINVAL;
587 }
588 pred->val = val;
248 } 589 }
249 590
250 switch (field->size) { 591 fn = select_comparison_fn(pred->op, field->size, field->is_signed);
251 case 8: 592 if (!fn) {
252 pred->fn = filter_pred_64; 593 parse_error(ps, FILT_ERR_INVALID_OP, 0);
253 break;
254 case 4:
255 pred->fn = filter_pred_32;
256 break;
257 case 2:
258 pred->fn = filter_pred_16;
259 break;
260 case 1:
261 pred->fn = filter_pred_8;
262 break;
263 default:
264 return -EINVAL; 594 return -EINVAL;
265 } 595 }
266 596
267 return __filter_add_pred(call, pred); 597 if (pred->op == OP_NE)
598 pred->not = 1;
599
600 return filter_add_pred_fn(ps, call, pred, fn);
268} 601}
269 602
270static struct filter_pred *copy_pred(struct filter_pred *pred) 603static int filter_add_subsystem_pred(struct filter_parse_state *ps,
604 struct event_subsystem *system,
605 struct filter_pred *pred,
606 char *filter_string)
271{ 607{
272 struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); 608 struct event_filter *filter = system->filter;
273 if (!new_pred) 609 struct ftrace_event_call *call;
274 return NULL; 610 int err = 0;
611
612 if (!filter->preds) {
613 filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
614 GFP_KERNEL);
275 615
276 memcpy(new_pred, pred, sizeof(*pred)); 616 if (!filter->preds)
617 return -ENOMEM;
618 }
277 619
278 if (pred->field_name) { 620 if (filter->n_preds == MAX_FILTER_PRED) {
279 new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 621 parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
280 if (!new_pred->field_name) { 622 return -ENOSPC;
281 kfree(new_pred);
282 return NULL;
283 }
284 } 623 }
285 624
286 if (pred->str_val) { 625 filter->preds[filter->n_preds] = pred;
287 new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); 626 filter->n_preds++;
288 if (!new_pred->str_val) { 627
289 filter_free_pred(new_pred); 628 mutex_lock(&event_mutex);
290 return NULL; 629 list_for_each_entry(call, &ftrace_events, list) {
630
631 if (!call->define_fields)
632 continue;
633
634 if (strcmp(call->system, system->name))
635 continue;
636
637 err = filter_add_pred(ps, call, pred);
638 if (err) {
639 mutex_unlock(&event_mutex);
640 filter_free_subsystem_preds(system);
641 parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
642 goto out;
291 } 643 }
644 replace_filter_string(call->filter, filter_string);
292 } 645 }
646 mutex_unlock(&event_mutex);
647out:
648 return err;
649}
650
651static void parse_init(struct filter_parse_state *ps,
652 struct filter_op *ops,
653 char *infix_string)
654{
655 memset(ps, '\0', sizeof(*ps));
293 656
294 return new_pred; 657 ps->infix.string = infix_string;
658 ps->infix.cnt = strlen(infix_string);
659 ps->ops = ops;
660
661 INIT_LIST_HEAD(&ps->opstack);
662 INIT_LIST_HEAD(&ps->postfix);
295} 663}
296 664
297int filter_add_subsystem_pred(struct event_subsystem *system, 665static char infix_next(struct filter_parse_state *ps)
298 struct filter_pred *pred)
299{ 666{
300 struct ftrace_event_call *call = __start_ftrace_events; 667 ps->infix.cnt--;
301 struct filter_pred *event_pred;
302 int i;
303 668
304 if (system->preds && !pred->compound) 669 return ps->infix.string[ps->infix.tail++];
305 filter_free_subsystem_preds(system); 670}
306 671
307 if (!system->preds) { 672static char infix_peek(struct filter_parse_state *ps)
308 system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), 673{
309 GFP_KERNEL); 674 if (ps->infix.tail == strlen(ps->infix.string))
310 if (!system->preds) 675 return 0;
311 return -ENOMEM; 676
677 return ps->infix.string[ps->infix.tail];
678}
679
680static void infix_advance(struct filter_parse_state *ps)
681{
682 ps->infix.cnt--;
683 ps->infix.tail++;
684}
685
686static inline int is_precedence_lower(struct filter_parse_state *ps,
687 int a, int b)
688{
689 return ps->ops[a].precedence < ps->ops[b].precedence;
690}
691
692static inline int is_op_char(struct filter_parse_state *ps, char c)
693{
694 int i;
695
696 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
697 if (ps->ops[i].string[0] == c)
698 return 1;
312 } 699 }
313 700
314 for (i = 0; i < MAX_FILTER_PRED; i++) { 701 return 0;
315 if (!system->preds[i]) { 702}
316 system->preds[i] = pred; 703
317 break; 704static int infix_get_op(struct filter_parse_state *ps, char firstc)
705{
706 char nextc = infix_peek(ps);
707 char opstr[3];
708 int i;
709
710 opstr[0] = firstc;
711 opstr[1] = nextc;
712 opstr[2] = '\0';
713
714 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
715 if (!strcmp(opstr, ps->ops[i].string)) {
716 infix_advance(ps);
717 return ps->ops[i].id;
318 } 718 }
319 } 719 }
320 720
321 if (i == MAX_FILTER_PRED) 721 opstr[1] = '\0';
322 return -ENOSPC; 722
723 for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
724 if (!strcmp(opstr, ps->ops[i].string))
725 return ps->ops[i].id;
726 }
727
728 return OP_NONE;
729}
730
731static inline void clear_operand_string(struct filter_parse_state *ps)
732{
733 memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
734 ps->operand.tail = 0;
735}
736
737static inline int append_operand_char(struct filter_parse_state *ps, char c)
738{
739 if (ps->operand.tail == MAX_FILTER_STR_VAL)
740 return -EINVAL;
741
742 ps->operand.string[ps->operand.tail++] = c;
743
744 return 0;
745}
746
747static int filter_opstack_push(struct filter_parse_state *ps, int op)
748{
749 struct opstack_op *opstack_op;
750
751 opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
752 if (!opstack_op)
753 return -ENOMEM;
754
755 opstack_op->op = op;
756 list_add(&opstack_op->list, &ps->opstack);
757
758 return 0;
759}
760
761static int filter_opstack_empty(struct filter_parse_state *ps)
762{
763 return list_empty(&ps->opstack);
764}
765
766static int filter_opstack_top(struct filter_parse_state *ps)
767{
768 struct opstack_op *opstack_op;
769
770 if (filter_opstack_empty(ps))
771 return OP_NONE;
772
773 opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
774
775 return opstack_op->op;
776}
777
778static int filter_opstack_pop(struct filter_parse_state *ps)
779{
780 struct opstack_op *opstack_op;
781 int op;
782
783 if (filter_opstack_empty(ps))
784 return OP_NONE;
785
786 opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
787 op = opstack_op->op;
788 list_del(&opstack_op->list);
789
790 kfree(opstack_op);
791
792 return op;
793}
794
795static void filter_opstack_clear(struct filter_parse_state *ps)
796{
797 while (!filter_opstack_empty(ps))
798 filter_opstack_pop(ps);
799}
800
801static char *curr_operand(struct filter_parse_state *ps)
802{
803 return ps->operand.string;
804}
805
806static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
807{
808 struct postfix_elt *elt;
809
810 elt = kmalloc(sizeof(*elt), GFP_KERNEL);
811 if (!elt)
812 return -ENOMEM;
813
814 elt->op = OP_NONE;
815 elt->operand = kstrdup(operand, GFP_KERNEL);
816 if (!elt->operand) {
817 kfree(elt);
818 return -ENOMEM;
819 }
820
821 list_add_tail(&elt->list, &ps->postfix);
822
823 return 0;
824}
825
826static int postfix_append_op(struct filter_parse_state *ps, int op)
827{
828 struct postfix_elt *elt;
829
830 elt = kmalloc(sizeof(*elt), GFP_KERNEL);
831 if (!elt)
832 return -ENOMEM;
833
834 elt->op = op;
835 elt->operand = NULL;
836
837 list_add_tail(&elt->list, &ps->postfix);
838
839 return 0;
840}
841
842static void postfix_clear(struct filter_parse_state *ps)
843{
844 struct postfix_elt *elt;
845
846 while (!list_empty(&ps->postfix)) {
847 elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
848 kfree(elt->operand);
849 list_del(&elt->list);
850 }
851}
323 852
324 events_for_each(call) { 853static int filter_parse(struct filter_parse_state *ps)
325 int err; 854{
855 int in_string = 0;
856 int op, top_op;
857 char ch;
326 858
327 if (!call->name || !call->regfunc) 859 while ((ch = infix_next(ps))) {
860 if (ch == '"') {
861 in_string ^= 1;
328 continue; 862 continue;
863 }
329 864
330 if (strcmp(call->system, system->name)) 865 if (in_string)
866 goto parse_operand;
867
868 if (isspace(ch))
331 continue; 869 continue;
332 870
333 if (!find_event_field(call, pred->field_name)) 871 if (is_op_char(ps, ch)) {
872 op = infix_get_op(ps, ch);
873 if (op == OP_NONE) {
874 parse_error(ps, FILT_ERR_INVALID_OP, 0);
875 return -EINVAL;
876 }
877
878 if (strlen(curr_operand(ps))) {
879 postfix_append_operand(ps, curr_operand(ps));
880 clear_operand_string(ps);
881 }
882
883 while (!filter_opstack_empty(ps)) {
884 top_op = filter_opstack_top(ps);
885 if (!is_precedence_lower(ps, top_op, op)) {
886 top_op = filter_opstack_pop(ps);
887 postfix_append_op(ps, top_op);
888 continue;
889 }
890 break;
891 }
892
893 filter_opstack_push(ps, op);
334 continue; 894 continue;
895 }
335 896
336 event_pred = copy_pred(pred); 897 if (ch == '(') {
337 if (!event_pred) 898 filter_opstack_push(ps, OP_OPEN_PAREN);
338 goto oom; 899 continue;
900 }
339 901
340 err = filter_add_pred(call, event_pred); 902 if (ch == ')') {
341 if (err) 903 if (strlen(curr_operand(ps))) {
342 filter_free_pred(event_pred); 904 postfix_append_operand(ps, curr_operand(ps));
343 if (err == -ENOMEM) 905 clear_operand_string(ps);
344 goto oom; 906 }
907
908 top_op = filter_opstack_pop(ps);
909 while (top_op != OP_NONE) {
910 if (top_op == OP_OPEN_PAREN)
911 break;
912 postfix_append_op(ps, top_op);
913 top_op = filter_opstack_pop(ps);
914 }
915 if (top_op == OP_NONE) {
916 parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
917 return -EINVAL;
918 }
919 continue;
920 }
921parse_operand:
922 if (append_operand_char(ps, ch)) {
923 parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
924 return -EINVAL;
925 }
926 }
927
928 if (strlen(curr_operand(ps)))
929 postfix_append_operand(ps, curr_operand(ps));
930
931 while (!filter_opstack_empty(ps)) {
932 top_op = filter_opstack_pop(ps);
933 if (top_op == OP_NONE)
934 break;
935 if (top_op == OP_OPEN_PAREN) {
936 parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
937 return -EINVAL;
938 }
939 postfix_append_op(ps, top_op);
345 } 940 }
346 941
347 return 0; 942 return 0;
943}
348 944
349oom: 945static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
350 system->preds[i] = NULL; 946{
351 return -ENOMEM; 947 struct filter_pred *pred;
948
949 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
950 if (!pred)
951 return NULL;
952
953 pred->field_name = kstrdup(operand1, GFP_KERNEL);
954 if (!pred->field_name) {
955 kfree(pred);
956 return NULL;
957 }
958
959 strcpy(pred->str_val, operand2);
960 pred->str_len = strlen(operand2);
961
962 pred->op = op;
963
964 return pred;
352} 965}
353 966
354int filter_parse(char **pbuf, struct filter_pred *pred) 967static struct filter_pred *create_logical_pred(int op)
355{ 968{
356 char *tmp, *tok, *val_str = NULL; 969 struct filter_pred *pred;
357 int tok_n = 0;
358 970
359 /* field ==/!= number, or/and field ==/!= number, number */ 971 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
360 while ((tok = strsep(pbuf, " \n"))) { 972 if (!pred)
361 if (tok_n == 0) { 973 return NULL;
362 if (!strcmp(tok, "0")) { 974
363 pred->clear = 1; 975 pred->op = op;
364 return 0; 976
365 } else if (!strcmp(tok, "&&")) { 977 return pred;
366 pred->or = 0; 978}
367 pred->compound = 1; 979
368 } else if (!strcmp(tok, "||")) { 980static int check_preds(struct filter_parse_state *ps)
369 pred->or = 1; 981{
370 pred->compound = 1; 982 int n_normal_preds = 0, n_logical_preds = 0;
371 } else 983 struct postfix_elt *elt;
372 pred->field_name = tok; 984
373 tok_n = 1; 985 list_for_each_entry(elt, &ps->postfix, list) {
986 if (elt->op == OP_NONE)
987 continue;
988
989 if (elt->op == OP_AND || elt->op == OP_OR) {
990 n_logical_preds++;
374 continue; 991 continue;
375 } 992 }
376 if (tok_n == 1) { 993 n_normal_preds++;
377 if (!pred->field_name) 994 }
378 pred->field_name = tok; 995
379 else if (!strcmp(tok, "!=")) 996 if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
380 pred->not = 1; 997 parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
381 else if (!strcmp(tok, "==")) 998 return -EINVAL;
382 pred->not = 0; 999 }
1000
1001 return 0;
1002}
1003
1004static int replace_preds(struct event_subsystem *system,
1005 struct ftrace_event_call *call,
1006 struct filter_parse_state *ps,
1007 char *filter_string)
1008{
1009 char *operand1 = NULL, *operand2 = NULL;
1010 struct filter_pred *pred;
1011 struct postfix_elt *elt;
1012 int err;
1013
1014 err = check_preds(ps);
1015 if (err)
1016 return err;
1017
1018 list_for_each_entry(elt, &ps->postfix, list) {
1019 if (elt->op == OP_NONE) {
1020 if (!operand1)
1021 operand1 = elt->operand;
1022 else if (!operand2)
1023 operand2 = elt->operand;
383 else { 1024 else {
384 pred->field_name = NULL; 1025 parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
385 return -EINVAL; 1026 return -EINVAL;
386 } 1027 }
387 tok_n = 2;
388 continue; 1028 continue;
389 } 1029 }
390 if (tok_n == 2) { 1030
391 if (pred->compound) { 1031 if (elt->op == OP_AND || elt->op == OP_OR) {
392 if (!strcmp(tok, "!=")) 1032 pred = create_logical_pred(elt->op);
393 pred->not = 1; 1033 if (call) {
394 else if (!strcmp(tok, "==")) 1034 err = filter_add_pred(ps, call, pred);
395 pred->not = 0; 1035 filter_free_pred(pred);
396 else { 1036 } else
397 pred->field_name = NULL; 1037 err = filter_add_subsystem_pred(ps, system,
398 return -EINVAL; 1038 pred, filter_string);
399 } 1039 if (err)
400 } else { 1040 return err;
401 val_str = tok; 1041
402 break; /* done */ 1042 operand1 = operand2 = NULL;
403 }
404 tok_n = 3;
405 continue; 1043 continue;
406 } 1044 }
407 if (tok_n == 3) { 1045
408 val_str = tok; 1046 if (!operand1 || !operand2) {
409 break; /* done */ 1047 parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
1048 return -EINVAL;
410 } 1049 }
1050
1051 pred = create_pred(elt->op, operand1, operand2);
1052 if (call) {
1053 err = filter_add_pred(ps, call, pred);
1054 filter_free_pred(pred);
1055 } else
1056 err = filter_add_subsystem_pred(ps, system, pred,
1057 filter_string);
1058 if (err)
1059 return err;
1060
1061 operand1 = operand2 = NULL;
411 } 1062 }
412 1063
413 if (!val_str) { 1064 return 0;
414 pred->field_name = NULL; 1065}
415 return -EINVAL; 1066
1067int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
1068{
1069 int err;
1070
1071 struct filter_parse_state *ps;
1072
1073 mutex_lock(&filter_mutex);
1074
1075 if (!strcmp(strstrip(filter_string), "0")) {
1076 filter_disable_preds(call);
1077 remove_filter_string(call->filter);
1078 mutex_unlock(&filter_mutex);
1079 return 0;
416 } 1080 }
417 1081
418 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 1082 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
419 if (!pred->field_name) 1083 if (!ps)
420 return -ENOMEM; 1084 return -ENOMEM;
421 1085
422 pred->val = simple_strtoull(val_str, &tmp, 0); 1086 filter_disable_preds(call);
423 if (tmp == val_str) { 1087 replace_filter_string(call->filter, filter_string);
424 pred->str_val = kstrdup(val_str, GFP_KERNEL);
425 if (!pred->str_val)
426 return -ENOMEM;
427 } else if (*tmp != '\0')
428 return -EINVAL;
429 1088
430 return 0; 1089 parse_init(ps, filter_ops, filter_string);
1090 err = filter_parse(ps);
1091 if (err) {
1092 append_filter_err(ps, call->filter);
1093 goto out;
1094 }
1095
1096 err = replace_preds(NULL, call, ps, filter_string);
1097 if (err)
1098 append_filter_err(ps, call->filter);
1099
1100out:
1101 filter_opstack_clear(ps);
1102 postfix_clear(ps);
1103 kfree(ps);
1104
1105 mutex_unlock(&filter_mutex);
1106
1107 return err;
431} 1108}
432 1109
1110int apply_subsystem_event_filter(struct event_subsystem *system,
1111 char *filter_string)
1112{
1113 int err;
1114
1115 struct filter_parse_state *ps;
1116
1117 mutex_lock(&filter_mutex);
1118
1119 if (!strcmp(strstrip(filter_string), "0")) {
1120 filter_free_subsystem_preds(system);
1121 remove_filter_string(system->filter);
1122 mutex_unlock(&filter_mutex);
1123 return 0;
1124 }
1125
1126 ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1127 if (!ps)
1128 return -ENOMEM;
1129
1130 filter_free_subsystem_preds(system);
1131 replace_filter_string(system->filter, filter_string);
1132
1133 parse_init(ps, filter_ops, filter_string);
1134 err = filter_parse(ps);
1135 if (err) {
1136 append_filter_err(ps, system->filter);
1137 goto out;
1138 }
1139
1140 err = replace_preds(system, NULL, ps, filter_string);
1141 if (err)
1142 append_filter_err(ps, system->filter);
1143
1144out:
1145 filter_opstack_clear(ps);
1146 postfix_clear(ps);
1147 kfree(ps);
1148
1149 mutex_unlock(&filter_mutex);
1150
1151 return err;
1152}
433 1153
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644
index 38985f9b379c..000000000000
--- a/kernel/trace/trace_events_stage_1.h
+++ /dev/null
@@ -1,39 +0,0 @@
1/*
2 * Stage 1 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * struct ftrace_raw_<call> {
7 * struct trace_entry ent;
8 * <type> <item>;
9 * <type2> <item2>[<len>];
10 * [...]
11 * };
12 *
13 * The <type> <item> is created by the __field(type, item) macro or
14 * the __array(type2, item2, len) macro.
15 * We simply do "type item;", and that will create the fields
16 * in the structure.
17 */
18
19#undef TRACE_FORMAT
20#define TRACE_FORMAT(call, proto, args, fmt)
21
22#undef __array
23#define __array(type, item, len) type item[len];
24
25#undef __field
26#define __field(type, item) type item;
27
28#undef TP_STRUCT__entry
29#define TP_STRUCT__entry(args...) args
30
31#undef TRACE_EVENT
32#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
33 struct ftrace_raw_##name { \
34 struct trace_entry ent; \
35 tstruct \
36 }; \
37 static struct ftrace_event_call event_##name
38
39#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644
index d363c6672c6c..000000000000
--- a/kernel/trace/trace_events_stage_2.h
+++ /dev/null
@@ -1,176 +0,0 @@
1/*
2 * Stage 2 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * enum print_line_t
7 * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
8 * {
9 * struct trace_seq *s = &iter->seq;
10 * struct ftrace_raw_<call> *field; <-- defined in stage 1
11 * struct trace_entry *entry;
12 * int ret;
13 *
14 * entry = iter->ent;
15 *
16 * if (entry->type != event_<call>.id) {
17 * WARN_ON_ONCE(1);
18 * return TRACE_TYPE_UNHANDLED;
19 * }
20 *
21 * field = (typeof(field))entry;
22 *
23 * ret = trace_seq_printf(s, <TP_printk> "\n");
24 * if (!ret)
25 * return TRACE_TYPE_PARTIAL_LINE;
26 *
27 * return TRACE_TYPE_HANDLED;
28 * }
29 *
30 * This is the method used to print the raw event to the trace
31 * output format. Note, this is not needed if the data is read
32 * in binary.
33 */
34
35#undef __entry
36#define __entry field
37
38#undef TP_printk
39#define TP_printk(fmt, args...) fmt "\n", args
40
41#undef TRACE_EVENT
42#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
43enum print_line_t \
44ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
45{ \
46 struct trace_seq *s = &iter->seq; \
47 struct ftrace_raw_##call *field; \
48 struct trace_entry *entry; \
49 int ret; \
50 \
51 entry = iter->ent; \
52 \
53 if (entry->type != event_##call.id) { \
54 WARN_ON_ONCE(1); \
55 return TRACE_TYPE_UNHANDLED; \
56 } \
57 \
58 field = (typeof(field))entry; \
59 \
60 ret = trace_seq_printf(s, #call ": " print); \
61 if (!ret) \
62 return TRACE_TYPE_PARTIAL_LINE; \
63 \
64 return TRACE_TYPE_HANDLED; \
65}
66
67#include <trace/trace_event_types.h>
68
69/*
70 * Setup the showing format of trace point.
71 *
72 * int
73 * ftrace_format_##call(struct trace_seq *s)
74 * {
75 * struct ftrace_raw_##call field;
76 * int ret;
77 *
78 * ret = trace_seq_printf(s, #type " " #item ";"
79 * " offset:%u; size:%u;\n",
80 * offsetof(struct ftrace_raw_##call, item),
81 * sizeof(field.type));
82 *
83 * }
84 */
85
86#undef TP_STRUCT__entry
87#define TP_STRUCT__entry(args...) args
88
89#undef __field
90#define __field(type, item) \
91 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
92 "offset:%u;\tsize:%u;\n", \
93 (unsigned int)offsetof(typeof(field), item), \
94 (unsigned int)sizeof(field.item)); \
95 if (!ret) \
96 return 0;
97
98#undef __array
99#define __array(type, item, len) \
100 ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
101 "offset:%u;\tsize:%u;\n", \
102 (unsigned int)offsetof(typeof(field), item), \
103 (unsigned int)sizeof(field.item)); \
104 if (!ret) \
105 return 0;
106
107#undef __entry
108#define __entry REC
109
110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112
113#undef TP_fast_assign
114#define TP_fast_assign(args...) args
115
116#undef TRACE_EVENT
117#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
118static int \
119ftrace_format_##call(struct trace_seq *s) \
120{ \
121 struct ftrace_raw_##call field; \
122 int ret; \
123 \
124 tstruct; \
125 \
126 trace_seq_printf(s, "\nprint fmt: " print); \
127 \
128 return ret; \
129}
130
131#include <trace/trace_event_types.h>
132
133#undef __field
134#define __field(type, item) \
135 ret = trace_define_field(event_call, #type, #item, \
136 offsetof(typeof(field), item), \
137 sizeof(field.item)); \
138 if (ret) \
139 return ret;
140
141#undef __array
142#define __array(type, item, len) \
143 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
144 offsetof(typeof(field), item), \
145 sizeof(field.item)); \
146 if (ret) \
147 return ret;
148
149#define __common_field(type, item) \
150 ret = trace_define_field(event_call, #type, "common_" #item, \
151 offsetof(typeof(field.ent), item), \
152 sizeof(field.ent.item)); \
153 if (ret) \
154 return ret;
155
156#undef TRACE_EVENT
157#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
158int \
159ftrace_define_fields_##call(void) \
160{ \
161 struct ftrace_raw_##call field; \
162 struct ftrace_event_call *event_call = &event_##call; \
163 int ret; \
164 \
165 __common_field(unsigned char, type); \
166 __common_field(unsigned char, flags); \
167 __common_field(unsigned char, preempt_count); \
168 __common_field(int, pid); \
169 __common_field(int, tgid); \
170 \
171 tstruct; \
172 \
173 return ret; \
174}
175
176#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644
index 9d2fa78cecca..000000000000
--- a/kernel/trace/trace_events_stage_3.h
+++ /dev/null
@@ -1,281 +0,0 @@
1/*
2 * Stage 3 of the trace events.
3 *
4 * Override the macros in <trace/trace_event_types.h> to include the following:
5 *
6 * static void ftrace_event_<call>(proto)
7 * {
8 * event_trace_printk(_RET_IP_, "<call>: " <fmt>);
9 * }
10 *
11 * static int ftrace_reg_event_<call>(void)
12 * {
13 * int ret;
14 *
15 * ret = register_trace_<call>(ftrace_event_<call>);
16 * if (!ret)
17 * pr_info("event trace: Could not activate trace point "
18 * "probe to <call>");
19 * return ret;
20 * }
21 *
22 * static void ftrace_unreg_event_<call>(void)
23 * {
24 * unregister_trace_<call>(ftrace_event_<call>);
25 * }
26 *
27 * For those macros defined with TRACE_FORMAT:
28 *
29 * static struct ftrace_event_call __used
30 * __attribute__((__aligned__(4)))
31 * __attribute__((section("_ftrace_events"))) event_<call> = {
32 * .name = "<call>",
33 * .regfunc = ftrace_reg_event_<call>,
34 * .unregfunc = ftrace_unreg_event_<call>,
35 * }
36 *
37 *
38 * For those macros defined with TRACE_EVENT:
39 *
40 * static struct ftrace_event_call event_<call>;
41 *
42 * static void ftrace_raw_event_<call>(proto)
43 * {
44 * struct ring_buffer_event *event;
45 * struct ftrace_raw_<call> *entry; <-- defined in stage 1
46 * unsigned long irq_flags;
47 * int pc;
48 *
49 * local_save_flags(irq_flags);
50 * pc = preempt_count();
51 *
52 * event = trace_current_buffer_lock_reserve(event_<call>.id,
53 * sizeof(struct ftrace_raw_<call>),
54 * irq_flags, pc);
55 * if (!event)
56 * return;
57 * entry = ring_buffer_event_data(event);
58 *
59 * <assign>; <-- Here we assign the entries by the __field and
60 * __array macros.
61 *
62 * trace_current_buffer_unlock_commit(event, irq_flags, pc);
63 * }
64 *
65 * static int ftrace_raw_reg_event_<call>(void)
66 * {
67 * int ret;
68 *
69 * ret = register_trace_<call>(ftrace_raw_event_<call>);
70 * if (!ret)
71 * pr_info("event trace: Could not activate trace point "
72 * "probe to <call>");
73 * return ret;
74 * }
75 *
76 * static void ftrace_unreg_event_<call>(void)
77 * {
78 * unregister_trace_<call>(ftrace_raw_event_<call>);
79 * }
80 *
81 * static struct trace_event ftrace_event_type_<call> = {
82 * .trace = ftrace_raw_output_<call>, <-- stage 2
83 * };
84 *
85 * static int ftrace_raw_init_event_<call>(void)
86 * {
87 * int id;
88 *
89 * id = register_ftrace_event(&ftrace_event_type_<call>);
90 * if (!id)
91 * return -ENODEV;
92 * event_<call>.id = id;
93 * return 0;
94 * }
95 *
96 * static struct ftrace_event_call __used
97 * __attribute__((__aligned__(4)))
98 * __attribute__((section("_ftrace_events"))) event_<call> = {
99 * .name = "<call>",
100 * .system = "<system>",
101 * .raw_init = ftrace_raw_init_event_<call>,
102 * .regfunc = ftrace_reg_event_<call>,
103 * .unregfunc = ftrace_unreg_event_<call>,
104 * .show_format = ftrace_format_<call>,
105 * }
106 *
107 */
108
109#undef TP_FMT
110#define TP_FMT(fmt, args...) fmt "\n", ##args
111
112#ifdef CONFIG_EVENT_PROFILE
113#define _TRACE_PROFILE(call, proto, args) \
114static void ftrace_profile_##call(proto) \
115{ \
116 extern void perf_tpcounter_event(int); \
117 perf_tpcounter_event(event_##call.id); \
118} \
119 \
120static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
121{ \
122 int ret = 0; \
123 \
124 if (!atomic_inc_return(&call->profile_count)) \
125 ret = register_trace_##call(ftrace_profile_##call); \
126 \
127 return ret; \
128} \
129 \
130static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
131{ \
132 if (atomic_add_negative(-1, &call->profile_count)) \
133 unregister_trace_##call(ftrace_profile_##call); \
134}
135
136#define _TRACE_PROFILE_INIT(call) \
137 .profile_count = ATOMIC_INIT(-1), \
138 .profile_enable = ftrace_profile_enable_##call, \
139 .profile_disable = ftrace_profile_disable_##call,
140
141#else
142#define _TRACE_PROFILE(call, proto, args)
143#define _TRACE_PROFILE_INIT(call)
144#endif
145
146#define _TRACE_FORMAT(call, proto, args, fmt) \
147static void ftrace_event_##call(proto) \
148{ \
149 event_trace_printk(_RET_IP_, #call ": " fmt); \
150} \
151 \
152static int ftrace_reg_event_##call(void) \
153{ \
154 int ret; \
155 \
156 ret = register_trace_##call(ftrace_event_##call); \
157 if (ret) \
158 pr_info("event trace: Could not activate trace point " \
159 "probe to " #call "\n"); \
160 return ret; \
161} \
162 \
163static void ftrace_unreg_event_##call(void) \
164{ \
165 unregister_trace_##call(ftrace_event_##call); \
166} \
167 \
168static struct ftrace_event_call event_##call; \
169 \
170static int ftrace_init_event_##call(void) \
171{ \
172 int id; \
173 \
174 id = register_ftrace_event(NULL); \
175 if (!id) \
176 return -ENODEV; \
177 event_##call.id = id; \
178 return 0; \
179}
180
181#undef TRACE_FORMAT
182#define TRACE_FORMAT(call, proto, args, fmt) \
183_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \
184_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
185static struct ftrace_event_call __used \
186__attribute__((__aligned__(4))) \
187__attribute__((section("_ftrace_events"))) event_##call = { \
188 .name = #call, \
189 .system = __stringify(TRACE_SYSTEM), \
190 .raw_init = ftrace_init_event_##call, \
191 .regfunc = ftrace_reg_event_##call, \
192 .unregfunc = ftrace_unreg_event_##call, \
193 _TRACE_PROFILE_INIT(call) \
194}
195
196#undef __entry
197#define __entry entry
198
199#undef TRACE_EVENT
200#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
201_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
202 \
203static struct ftrace_event_call event_##call; \
204 \
205static void ftrace_raw_event_##call(proto) \
206{ \
207 struct ftrace_event_call *call = &event_##call; \
208 struct ring_buffer_event *event; \
209 struct ftrace_raw_##call *entry; \
210 unsigned long irq_flags; \
211 int pc; \
212 \
213 local_save_flags(irq_flags); \
214 pc = preempt_count(); \
215 \
216 event = trace_current_buffer_lock_reserve(event_##call.id, \
217 sizeof(struct ftrace_raw_##call), \
218 irq_flags, pc); \
219 if (!event) \
220 return; \
221 entry = ring_buffer_event_data(event); \
222 \
223 assign; \
224 \
225 if (call->preds && !filter_match_preds(call, entry)) \
226 ring_buffer_event_discard(event); \
227 \
228 trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
229 \
230} \
231 \
232static int ftrace_raw_reg_event_##call(void) \
233{ \
234 int ret; \
235 \
236 ret = register_trace_##call(ftrace_raw_event_##call); \
237 if (ret) \
238 pr_info("event trace: Could not activate trace point " \
239 "probe to " #call "\n"); \
240 return ret; \
241} \
242 \
243static void ftrace_raw_unreg_event_##call(void) \
244{ \
245 unregister_trace_##call(ftrace_raw_event_##call); \
246} \
247 \
248static struct trace_event ftrace_event_type_##call = { \
249 .trace = ftrace_raw_output_##call, \
250}; \
251 \
252static int ftrace_raw_init_event_##call(void) \
253{ \
254 int id; \
255 \
256 id = register_ftrace_event(&ftrace_event_type_##call); \
257 if (!id) \
258 return -ENODEV; \
259 event_##call.id = id; \
260 INIT_LIST_HEAD(&event_##call.fields); \
261 return 0; \
262} \
263 \
264static struct ftrace_event_call __used \
265__attribute__((__aligned__(4))) \
266__attribute__((section("_ftrace_events"))) event_##call = { \
267 .name = #call, \
268 .system = __stringify(TRACE_SYSTEM), \
269 .raw_init = ftrace_raw_init_event_##call, \
270 .regfunc = ftrace_raw_reg_event_##call, \
271 .unregfunc = ftrace_raw_unreg_event_##call, \
272 .show_format = ftrace_format_##call, \
273 .define_fields = ftrace_define_fields_##call, \
274 _TRACE_PROFILE_INIT(call) \
275}
276
277#include <trace/trace_event_types.h>
278
279#undef _TRACE_PROFILE
280#undef _TRACE_PROFILE_INIT
281
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 07a22c33ebf3..d06cf898dc86 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -19,8 +19,12 @@
19#undef TRACE_STRUCT 19#undef TRACE_STRUCT
20#define TRACE_STRUCT(args...) args 20#define TRACE_STRUCT(args...) args
21 21
22extern void __bad_type_size(void);
23
22#undef TRACE_FIELD 24#undef TRACE_FIELD
23#define TRACE_FIELD(type, item, assign) \ 25#define TRACE_FIELD(type, item, assign) \
26 if (sizeof(type) != sizeof(field.item)) \
27 __bad_type_size(); \
24 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ 28 ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
25 "offset:%u;\tsize:%u;\n", \ 29 "offset:%u;\tsize:%u;\n", \
26 (unsigned int)offsetof(typeof(field), item), \ 30 (unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
30 34
31 35
32#undef TRACE_FIELD_SPECIAL 36#undef TRACE_FIELD_SPECIAL
33#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ 37#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
34 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ 38 ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \
35 "offset:%u;\tsize:%u;\n", \ 39 "offset:%u;\tsize:%u;\n", \
36 (unsigned int)offsetof(typeof(field), item), \ 40 (unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
46 if (!ret) \ 50 if (!ret) \
47 return 0; 51 return 0;
48 52
53#undef TRACE_FIELD_SIGN
54#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
55 TRACE_FIELD(type, item, assign)
49 56
50#undef TP_RAW_FMT 57#undef TP_RAW_FMT
51#define TP_RAW_FMT(args...) args 58#define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@ ftrace_format_##call(struct trace_seq *s) \
65 return ret; \ 72 return ret; \
66} 73}
67 74
75#undef TRACE_EVENT_FORMAT_NOFILTER
76#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
77 tpfmt) \
78static int \
79ftrace_format_##call(struct trace_seq *s) \
80{ \
81 struct args field; \
82 int ret; \
83 \
84 tstruct; \
85 \
86 trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \
87 \
88 return ret; \
89}
90
68#include "trace_event_types.h" 91#include "trace_event_types.h"
69 92
70#undef TRACE_ZERO_CHAR 93#undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \
78#define TRACE_FIELD(type, item, assign)\ 101#define TRACE_FIELD(type, item, assign)\
79 entry->item = assign; 102 entry->item = assign;
80 103
104#undef TRACE_FIELD_SIGN
105#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
106 TRACE_FIELD(type, item, assign)
107
81#undef TP_CMD 108#undef TP_CMD
82#define TP_CMD(cmd...) cmd 109#define TP_CMD(cmd...) cmd
83 110
@@ -85,18 +112,95 @@ ftrace_format_##call(struct trace_seq *s) \
85#define TRACE_ENTRY entry 112#define TRACE_ENTRY entry
86 113
87#undef TRACE_FIELD_SPECIAL 114#undef TRACE_FIELD_SPECIAL
88#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ 115#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
89 cmd; 116 cmd;
90 117
91#undef TRACE_EVENT_FORMAT 118#undef TRACE_EVENT_FORMAT
92#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ 119#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
120int ftrace_define_fields_##call(void); \
121static int ftrace_raw_init_event_##call(void); \
122 \
123struct ftrace_event_call __used \
124__attribute__((__aligned__(4))) \
125__attribute__((section("_ftrace_events"))) event_##call = { \
126 .name = #call, \
127 .id = proto, \
128 .system = __stringify(TRACE_SYSTEM), \
129 .raw_init = ftrace_raw_init_event_##call, \
130 .show_format = ftrace_format_##call, \
131 .define_fields = ftrace_define_fields_##call, \
132}; \
133static int ftrace_raw_init_event_##call(void) \
134{ \
135 INIT_LIST_HEAD(&event_##call.fields); \
136 init_preds(&event_##call); \
137 return 0; \
138} \
139
140#undef TRACE_EVENT_FORMAT_NOFILTER
141#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
142 tpfmt) \
93 \ 143 \
94static struct ftrace_event_call __used \ 144struct ftrace_event_call __used \
95__attribute__((__aligned__(4))) \ 145__attribute__((__aligned__(4))) \
96__attribute__((section("_ftrace_events"))) event_##call = { \ 146__attribute__((section("_ftrace_events"))) event_##call = { \
97 .name = #call, \ 147 .name = #call, \
98 .id = proto, \ 148 .id = proto, \
99 .system = __stringify(TRACE_SYSTEM), \ 149 .system = __stringify(TRACE_SYSTEM), \
100 .show_format = ftrace_format_##call, \ 150 .show_format = ftrace_format_##call, \
151};
152
153#include "trace_event_types.h"
154
155#undef TRACE_FIELD
156#define TRACE_FIELD(type, item, assign) \
157 ret = trace_define_field(event_call, #type, #item, \
158 offsetof(typeof(field), item), \
159 sizeof(field.item), is_signed_type(type)); \
160 if (ret) \
161 return ret;
162
163#undef TRACE_FIELD_SPECIAL
164#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \
165 ret = trace_define_field(event_call, #type "[" #len "]", #item, \
166 offsetof(typeof(field), item), \
167 sizeof(field.item), 0); \
168 if (ret) \
169 return ret;
170
171#undef TRACE_FIELD_SIGN
172#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \
173 ret = trace_define_field(event_call, #type, #item, \
174 offsetof(typeof(field), item), \
175 sizeof(field.item), is_signed); \
176 if (ret) \
177 return ret;
178
179#undef TRACE_FIELD_ZERO_CHAR
180#define TRACE_FIELD_ZERO_CHAR(item)
181
182#undef TRACE_EVENT_FORMAT
183#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
184int \
185ftrace_define_fields_##call(void) \
186{ \
187 struct ftrace_event_call *event_call = &event_##call; \
188 struct args field; \
189 int ret; \
190 \
191 __common_field(unsigned char, type, 0); \
192 __common_field(unsigned char, flags, 0); \
193 __common_field(unsigned char, preempt_count, 0); \
194 __common_field(int, pid, 1); \
195 __common_field(int, tgid, 1); \
196 \
197 tstruct; \
198 \
199 return ret; \
101} 200}
201
202#undef TRACE_EVENT_FORMAT_NOFILTER
203#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \
204 tpfmt)
205
102#include "trace_event_types.h" 206#include "trace_event_types.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d28687e7b3a7..10f6ad7d85f6 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
78 current->ret_stack[index].ret = ret; 78 current->ret_stack[index].ret = ret;
79 current->ret_stack[index].func = func; 79 current->ret_stack[index].func = func;
80 current->ret_stack[index].calltime = calltime; 80 current->ret_stack[index].calltime = calltime;
81 current->ret_stack[index].subtime = 0;
81 *depth = index; 82 *depth = index;
82 83
83 return 0; 84 return 0;
84} 85}
85 86
86/* Retrieve a function return address to the trace stack on thread info.*/ 87/* Retrieve a function return address to the trace stack on thread info.*/
87void 88static void
88ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) 89ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
89{ 90{
90 int index; 91 int index;
@@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
104 trace->calltime = current->ret_stack[index].calltime; 105 trace->calltime = current->ret_stack[index].calltime;
105 trace->overrun = atomic_read(&current->trace_overrun); 106 trace->overrun = atomic_read(&current->trace_overrun);
106 trace->depth = index; 107 trace->depth = index;
107 barrier();
108 current->curr_ret_stack--;
109
110} 108}
111 109
112/* 110/*
@@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void)
121 ftrace_pop_return_trace(&trace, &ret); 119 ftrace_pop_return_trace(&trace, &ret);
122 trace.rettime = trace_clock_local(); 120 trace.rettime = trace_clock_local();
123 ftrace_graph_return(&trace); 121 ftrace_graph_return(&trace);
122 barrier();
123 current->curr_ret_stack--;
124 124
125 if (unlikely(!ret)) { 125 if (unlikely(!ret)) {
126 ftrace_graph_stop(); 126 ftrace_graph_stop();
@@ -426,8 +426,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
426 return TRACE_TYPE_HANDLED; 426 return TRACE_TYPE_HANDLED;
427} 427}
428 428
429static enum print_line_t 429enum print_line_t
430print_graph_duration(unsigned long long duration, struct trace_seq *s) 430trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
431{ 431{
432 unsigned long nsecs_rem = do_div(duration, 1000); 432 unsigned long nsecs_rem = do_div(duration, 1000);
433 /* log10(ULONG_MAX) + '\0' */ 433 /* log10(ULONG_MAX) + '\0' */
@@ -464,12 +464,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
464 if (!ret) 464 if (!ret)
465 return TRACE_TYPE_PARTIAL_LINE; 465 return TRACE_TYPE_PARTIAL_LINE;
466 } 466 }
467 return TRACE_TYPE_HANDLED;
468}
469
470static enum print_line_t
471print_graph_duration(unsigned long long duration, struct trace_seq *s)
472{
473 int ret;
474
475 ret = trace_print_graph_duration(duration, s);
476 if (ret != TRACE_TYPE_HANDLED)
477 return ret;
467 478
468 ret = trace_seq_printf(s, "| "); 479 ret = trace_seq_printf(s, "| ");
469 if (!ret) 480 if (!ret)
470 return TRACE_TYPE_PARTIAL_LINE; 481 return TRACE_TYPE_PARTIAL_LINE;
471 return TRACE_TYPE_HANDLED;
472 482
483 return TRACE_TYPE_HANDLED;
473} 484}
474 485
475/* Case of a leaf function on its call entry */ 486/* Case of a leaf function on its call entry */
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index 7bfdf4c2347f..8683d50a753a 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -168,6 +168,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
168 168
169void trace_hw_branch(u64 from, u64 to) 169void trace_hw_branch(u64 from, u64 to)
170{ 170{
171 struct ftrace_event_call *call = &event_hw_branch;
171 struct trace_array *tr = hw_branch_trace; 172 struct trace_array *tr = hw_branch_trace;
172 struct ring_buffer_event *event; 173 struct ring_buffer_event *event;
173 struct hw_branch_entry *entry; 174 struct hw_branch_entry *entry;
@@ -194,7 +195,8 @@ void trace_hw_branch(u64 from, u64 to)
194 entry->ent.type = TRACE_HW_BRANCHES; 195 entry->ent.type = TRACE_HW_BRANCHES;
195 entry->from = from; 196 entry->from = from;
196 entry->to = to; 197 entry->to = to;
197 trace_buffer_unlock_commit(tr, event, 0, 0); 198 if (!filter_check_discard(call, entry, tr->buffer, event))
199 trace_buffer_unlock_commit(tr, event, 0, 0);
198 200
199 out: 201 out:
200 atomic_dec(&tr->data[cpu]->disabled); 202 atomic_dec(&tr->data[cpu]->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 8e37fcddd8b4..d53b45ed0806 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,8 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/time.h>
13
12#include <asm/atomic.h> 14#include <asm/atomic.h>
13 15
14#include "trace.h" 16#include "trace.h"
@@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
174 struct mmiotrace_rw *rw; 176 struct mmiotrace_rw *rw;
175 struct trace_seq *s = &iter->seq; 177 struct trace_seq *s = &iter->seq;
176 unsigned long long t = ns2usecs(iter->ts); 178 unsigned long long t = ns2usecs(iter->ts);
177 unsigned long usec_rem = do_div(t, 1000000ULL); 179 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
178 unsigned secs = (unsigned long)t; 180 unsigned secs = (unsigned long)t;
179 int ret = 1; 181 int ret = 1;
180 182
@@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
221 struct mmiotrace_map *m; 223 struct mmiotrace_map *m;
222 struct trace_seq *s = &iter->seq; 224 struct trace_seq *s = &iter->seq;
223 unsigned long long t = ns2usecs(iter->ts); 225 unsigned long long t = ns2usecs(iter->ts);
224 unsigned long usec_rem = do_div(t, 1000000ULL); 226 unsigned long usec_rem = do_div(t, USEC_PER_SEC);
225 unsigned secs = (unsigned long)t; 227 unsigned secs = (unsigned long)t;
226 int ret; 228 int ret;
227 229
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 64b54a59c55b..8bd9a2c1a46a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -19,6 +19,16 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
19 19
20static int next_event_type = __TRACE_LAST_TYPE + 1; 20static int next_event_type = __TRACE_LAST_TYPE + 1;
21 21
22void trace_print_seq(struct seq_file *m, struct trace_seq *s)
23{
24 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
25
26 s->buffer[len] = 0;
27 seq_puts(m, s->buffer);
28
29 trace_seq_init(s);
30}
31
22enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) 32enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
23{ 33{
24 struct trace_seq *s = &iter->seq; 34 struct trace_seq *s = &iter->seq;
@@ -84,6 +94,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
84 94
85 return len; 95 return len;
86} 96}
97EXPORT_SYMBOL_GPL(trace_seq_printf);
87 98
88int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) 99int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
89{ 100{
@@ -472,6 +483,36 @@ struct trace_event *ftrace_find_event(int type)
472 return NULL; 483 return NULL;
473} 484}
474 485
486static LIST_HEAD(ftrace_event_list);
487
488static int trace_search_list(struct list_head **list)
489{
490 struct trace_event *e;
491 int last = __TRACE_LAST_TYPE;
492
493 if (list_empty(&ftrace_event_list)) {
494 *list = &ftrace_event_list;
495 return last + 1;
496 }
497
498 /*
499 * We used up all possible max events,
500 * lets see if somebody freed one.
501 */
502 list_for_each_entry(e, &ftrace_event_list, list) {
503 if (e->type != last + 1)
504 break;
505 last++;
506 }
507
508 /* Did we used up all 65 thousand events??? */
509 if ((last + 1) > FTRACE_MAX_EVENT)
510 return 0;
511
512 *list = &e->list;
513 return last + 1;
514}
515
475/** 516/**
476 * register_ftrace_event - register output for an event type 517 * register_ftrace_event - register output for an event type
477 * @event: the event type to register 518 * @event: the event type to register
@@ -494,20 +535,40 @@ int register_ftrace_event(struct trace_event *event)
494 535
495 mutex_lock(&trace_event_mutex); 536 mutex_lock(&trace_event_mutex);
496 537
497 if (!event) { 538 if (WARN_ON(!event))
498 ret = next_event_type++;
499 goto out; 539 goto out;
500 }
501 540
502 if (!event->type) 541 INIT_LIST_HEAD(&event->list);
503 event->type = next_event_type++; 542
504 else if (event->type > __TRACE_LAST_TYPE) { 543 if (!event->type) {
544 struct list_head *list = NULL;
545
546 if (next_event_type > FTRACE_MAX_EVENT) {
547
548 event->type = trace_search_list(&list);
549 if (!event->type)
550 goto out;
551
552 } else {
553
554 event->type = next_event_type++;
555 list = &ftrace_event_list;
556 }
557
558 if (WARN_ON(ftrace_find_event(event->type)))
559 goto out;
560
561 list_add_tail(&event->list, list);
562
563 } else if (event->type > __TRACE_LAST_TYPE) {
505 printk(KERN_WARNING "Need to add type to trace.h\n"); 564 printk(KERN_WARNING "Need to add type to trace.h\n");
506 WARN_ON(1); 565 WARN_ON(1);
507 }
508
509 if (ftrace_find_event(event->type))
510 goto out; 566 goto out;
567 } else {
568 /* Is this event already used */
569 if (ftrace_find_event(event->type))
570 goto out;
571 }
511 572
512 if (event->trace == NULL) 573 if (event->trace == NULL)
513 event->trace = trace_nop_print; 574 event->trace = trace_nop_print;
@@ -528,6 +589,7 @@ int register_ftrace_event(struct trace_event *event)
528 589
529 return ret; 590 return ret;
530} 591}
592EXPORT_SYMBOL_GPL(register_ftrace_event);
531 593
532/** 594/**
533 * unregister_ftrace_event - remove a no longer used event 595 * unregister_ftrace_event - remove a no longer used event
@@ -537,10 +599,12 @@ int unregister_ftrace_event(struct trace_event *event)
537{ 599{
538 mutex_lock(&trace_event_mutex); 600 mutex_lock(&trace_event_mutex);
539 hlist_del(&event->node); 601 hlist_del(&event->node);
602 list_del(&event->list);
540 mutex_unlock(&trace_event_mutex); 603 mutex_unlock(&trace_event_mutex);
541 604
542 return 0; 605 return 0;
543} 606}
607EXPORT_SYMBOL_GPL(unregister_ftrace_event);
544 608
545/* 609/*
546 * Standard events 610 * Standard events
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index e0bde39c2dd9..6e220a8e5706 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -1,41 +1,17 @@
1#ifndef __TRACE_EVENTS_H 1#ifndef __TRACE_EVENTS_H
2#define __TRACE_EVENTS_H 2#define __TRACE_EVENTS_H
3 3
4#include <linux/trace_seq.h>
4#include "trace.h" 5#include "trace.h"
5 6
6typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
7 int flags);
8
9struct trace_event {
10 struct hlist_node node;
11 int type;
12 trace_print_func trace;
13 trace_print_func raw;
14 trace_print_func hex;
15 trace_print_func binary;
16};
17
18extern enum print_line_t 7extern enum print_line_t
19trace_print_bprintk_msg_only(struct trace_iterator *iter); 8trace_print_bprintk_msg_only(struct trace_iterator *iter);
20extern enum print_line_t 9extern enum print_line_t
21trace_print_printk_msg_only(struct trace_iterator *iter); 10trace_print_printk_msg_only(struct trace_iterator *iter);
22 11
23extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
24 __attribute__ ((format (printf, 2, 3)));
25extern int
26trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
27extern int 12extern int
28seq_print_ip_sym(struct trace_seq *s, unsigned long ip, 13seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
29 unsigned long sym_flags); 14 unsigned long sym_flags);
30extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
31 size_t cnt);
32extern int trace_seq_puts(struct trace_seq *s, const char *str);
33extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
34extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
35extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
36 size_t len);
37extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
38extern int trace_seq_path(struct trace_seq *s, struct path *path);
39extern int seq_print_userip_objs(const struct userstack_entry *entry, 15extern int seq_print_userip_objs(const struct userstack_entry *entry,
40 struct trace_seq *s, unsigned long sym_flags); 16 struct trace_seq *s, unsigned long sym_flags);
41extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, 17extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -45,8 +21,6 @@ extern int trace_print_context(struct trace_iterator *iter);
45extern int trace_print_lat_context(struct trace_iterator *iter); 21extern int trace_print_lat_context(struct trace_iterator *iter);
46 22
47extern struct trace_event *ftrace_find_event(int type); 23extern struct trace_event *ftrace_find_event(int type);
48extern int register_ftrace_event(struct trace_event *event);
49extern int unregister_ftrace_event(struct trace_event *event);
50 24
51extern enum print_line_t trace_nop_print(struct trace_iterator *iter, 25extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
52 int flags); 26 int flags);
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index 118439709fb7..8a30d9874cd4 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
36 36
37static void probe_power_end(struct power_trace *it) 37static void probe_power_end(struct power_trace *it)
38{ 38{
39 struct ftrace_event_call *call = &event_power;
39 struct ring_buffer_event *event; 40 struct ring_buffer_event *event;
40 struct trace_power *entry; 41 struct trace_power *entry;
41 struct trace_array_cpu *data; 42 struct trace_array_cpu *data;
@@ -54,7 +55,8 @@ static void probe_power_end(struct power_trace *it)
54 goto out; 55 goto out;
55 entry = ring_buffer_event_data(event); 56 entry = ring_buffer_event_data(event);
56 entry->state_data = *it; 57 entry->state_data = *it;
57 trace_buffer_unlock_commit(tr, event, 0, 0); 58 if (!filter_check_discard(call, entry, tr->buffer, event))
59 trace_buffer_unlock_commit(tr, event, 0, 0);
58 out: 60 out:
59 preempt_enable(); 61 preempt_enable();
60} 62}
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
62static void probe_power_mark(struct power_trace *it, unsigned int type, 64static void probe_power_mark(struct power_trace *it, unsigned int type,
63 unsigned int level) 65 unsigned int level)
64{ 66{
67 struct ftrace_event_call *call = &event_power;
65 struct ring_buffer_event *event; 68 struct ring_buffer_event *event;
66 struct trace_power *entry; 69 struct trace_power *entry;
67 struct trace_array_cpu *data; 70 struct trace_array_cpu *data;
@@ -84,7 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
84 goto out; 87 goto out;
85 entry = ring_buffer_event_data(event); 88 entry = ring_buffer_event_data(event);
86 entry->state_data = *it; 89 entry->state_data = *it;
87 trace_buffer_unlock_commit(tr, event, 0, 0); 90 if (!filter_check_discard(call, entry, tr->buffer, event))
91 trace_buffer_unlock_commit(tr, event, 0, 0);
88 out: 92 out:
89 preempt_enable(); 93 preempt_enable();
90} 94}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index eb81556107fe..9bece9687b62 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = {
245static __init int init_trace_printk_function_export(void) 245static __init int init_trace_printk_function_export(void)
246{ 246{
247 struct dentry *d_tracer; 247 struct dentry *d_tracer;
248 struct dentry *entry;
249 248
250 d_tracer = tracing_init_dentry(); 249 d_tracer = tracing_init_dentry();
251 if (!d_tracer) 250 if (!d_tracer)
252 return 0; 251 return 0;
253 252
254 entry = debugfs_create_file("printk_formats", 0444, d_tracer, 253 trace_create_file("printk_formats", 0444, d_tracer,
255 NULL, &ftrace_formats_fops); 254 NULL, &ftrace_formats_fops);
256 if (!entry)
257 pr_warning("Could not create debugfs "
258 "'printk_formats' entry\n");
259 255
260 return 0; 256 return 0;
261} 257}
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 9117cea6f1ae..a98106dd979c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -10,7 +10,7 @@
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h> 13#include <trace/events/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
29 int cpu; 29 int cpu;
30 int pc; 30 int pc;
31 31
32 if (!sched_ref || sched_stopped) 32 if (unlikely(!sched_ref))
33 return; 33 return;
34 34
35 tracing_record_cmdline(prev); 35 tracing_record_cmdline(prev);
36 tracing_record_cmdline(next); 36 tracing_record_cmdline(next);
37 37
38 if (!tracer_enabled) 38 if (!tracer_enabled || sched_stopped)
39 return; 39 return;
40 40
41 pc = preempt_count(); 41 pc = preempt_count();
@@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
56 unsigned long flags; 56 unsigned long flags;
57 int cpu, pc; 57 int cpu, pc;
58 58
59 if (!likely(tracer_enabled)) 59 if (unlikely(!sched_ref))
60 return; 60 return;
61 61
62 pc = preempt_count();
63 tracing_record_cmdline(current); 62 tracing_record_cmdline(current);
64 63
65 if (sched_stopped) 64 if (!tracer_enabled || sched_stopped)
66 return; 65 return;
67 66
67 pc = preempt_count();
68 local_irq_save(flags); 68 local_irq_save(flags);
69 cpu = raw_smp_processor_id(); 69 cpu = raw_smp_processor_id();
70 data = ctx_trace->data[cpu]; 70 data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5bc00e8f153e..eacb27225173 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <trace/sched.h> 18#include <trace/events/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
138 138
139 pc = preempt_count(); 139 pc = preempt_count();
140 140
141 /* The task we are waiting for is waking up */
142 data = wakeup_trace->data[wakeup_cpu];
143
144 /* disable local data, not wakeup_cpu data */ 141 /* disable local data, not wakeup_cpu data */
145 cpu = raw_smp_processor_id(); 142 cpu = raw_smp_processor_id();
146 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); 143 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 if (unlikely(!tracer_enabled || next != wakeup_task)) 151 if (unlikely(!tracer_enabled || next != wakeup_task))
155 goto out_unlock; 152 goto out_unlock;
156 153
154 /* The task we are waiting for is waking up */
155 data = wakeup_trace->data[wakeup_cpu];
156
157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 159
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c750f65f9661..1796f00524e1 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace);
352static __init int stack_trace_init(void) 352static __init int stack_trace_init(void)
353{ 353{
354 struct dentry *d_tracer; 354 struct dentry *d_tracer;
355 struct dentry *entry;
356 355
357 d_tracer = tracing_init_dentry(); 356 d_tracer = tracing_init_dentry();
358 357
359 entry = debugfs_create_file("stack_max_size", 0644, d_tracer, 358 trace_create_file("stack_max_size", 0644, d_tracer,
360 &max_stack_size, &stack_max_size_fops); 359 &max_stack_size, &stack_max_size_fops);
361 if (!entry)
362 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
363 360
364 entry = debugfs_create_file("stack_trace", 0444, d_tracer, 361 trace_create_file("stack_trace", 0444, d_tracer,
365 NULL, &stack_trace_fops); 362 NULL, &stack_trace_fops);
366 if (!entry)
367 pr_warning("Could not create debugfs 'stack_trace' entry\n");
368 363
369 if (stack_tracer_enabled) 364 if (stack_tracer_enabled)
370 register_ftrace_function(&trace_ops); 365 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index acdebd771a93..fdde3a4a94cd 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -85,7 +85,7 @@ static int stat_seq_init(struct tracer_stat_session *session)
85 if (!ts->stat_cmp) 85 if (!ts->stat_cmp)
86 ts->stat_cmp = dummy_cmp; 86 ts->stat_cmp = dummy_cmp;
87 87
88 stat = ts->stat_start(); 88 stat = ts->stat_start(ts);
89 if (!stat) 89 if (!stat)
90 goto exit; 90 goto exit;
91 91
diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h
index 202274cf7f3d..f3546a2cd826 100644
--- a/kernel/trace/trace_stat.h
+++ b/kernel/trace/trace_stat.h
@@ -12,7 +12,7 @@ struct tracer_stat {
12 /* The name of your stat file */ 12 /* The name of your stat file */
13 const char *name; 13 const char *name;
14 /* Iteration over statistic entries */ 14 /* Iteration over statistic entries */
15 void *(*stat_start)(void); 15 void *(*stat_start)(struct tracer_stat *trace);
16 void *(*stat_next)(void *prev, int idx); 16 void *(*stat_next)(void *prev, int idx);
17 /* Compare two entries for stats sorting */ 17 /* Compare two entries for stats sorting */
18 int (*stat_cmp)(void *p1, void *p2); 18 int (*stat_cmp)(void *p1, void *p2);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 91fd19c2149f..e04b76cc238a 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = {
321 321
322void init_tracer_sysprof_debugfs(struct dentry *d_tracer) 322void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
323{ 323{
324 struct dentry *entry;
325 324
326 entry = debugfs_create_file("sysprof_sample_period", 0644, 325 trace_create_file("sysprof_sample_period", 0644,
327 d_tracer, NULL, &sysprof_sample_fops); 326 d_tracer, NULL, &sysprof_sample_fops);
328 if (entry)
329 return;
330 pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
331} 327}
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 797201e4a137..984b9175c13d 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -152,7 +152,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
152 return ret; 152 return ret;
153} 153}
154 154
155static void *workqueue_stat_start(void) 155static void *workqueue_stat_start(struct tracer_stat *trace)
156{ 156{
157 int cpu; 157 int cpu;
158 void *ret = NULL; 158 void *ret = NULL;
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00d2f91..f85831da9080 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,7 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h> 105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
107#include <linux/string.h> 107#include <linux/string.h>
108#include <linux/uaccess.h> 108#include <linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index a2d4ab32198d..494f05f19417 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -65,7 +65,7 @@
65#include <linux/module.h> 65#include <linux/module.h>
66#include <linux/rcupdate.h> 66#include <linux/rcupdate.h>
67#include <linux/list.h> 67#include <linux/list.h>
68#include <trace/kmemtrace.h> 68#include <linux/kmemtrace.h>
69#include <asm/atomic.h> 69#include <asm/atomic.h>
70 70
71/* 71/*
diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ecbd3f3..ea9e7160e2e7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -16,7 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <trace/kmemtrace.h> 19#include <linux/kmemtrace.h>
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/cpuset.h> 21#include <linux/cpuset.h>
22#include <linux/mempolicy.h> 22#include <linux/mempolicy.h>
diff --git a/mm/util.c b/mm/util.c
index 55bef160b9f1..abc65aa7cdfc 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/tracepoint.h>
8#include <asm/uaccess.h> 7#include <asm/uaccess.h>
9 8
9#define CREATE_TRACE_POINTS
10#include <trace/events/kmem.h>
11
10/** 12/**
11 * kstrdup - allocate space for and copy an existing string 13 * kstrdup - allocate space for and copy an existing string
12 * @s: the string to duplicate 14 * @s: the string to duplicate
@@ -255,13 +257,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
255EXPORT_SYMBOL_GPL(get_user_pages_fast); 257EXPORT_SYMBOL_GPL(get_user_pages_fast);
256 258
257/* Tracepoints definitions. */ 259/* Tracepoints definitions. */
258DEFINE_TRACE(kmalloc);
259DEFINE_TRACE(kmem_cache_alloc);
260DEFINE_TRACE(kmalloc_node);
261DEFINE_TRACE(kmem_cache_alloc_node);
262DEFINE_TRACE(kfree);
263DEFINE_TRACE(kmem_cache_free);
264
265EXPORT_TRACEPOINT_SYMBOL(kmalloc); 260EXPORT_TRACEPOINT_SYMBOL(kmalloc);
266EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 261EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
267EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); 262EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3cca99..b75b6cea49da 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -23,7 +23,7 @@
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <net/genetlink.h> 24#include <net/genetlink.h>
25 25
26#include <trace/skb.h> 26#include <trace/events/skb.h>
27 27
28#include <asm/unaligned.h> 28#include <asm/unaligned.h>
29 29
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb45665e4f..499a67eaf3ae 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,11 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/net_dropmon.h> 21#include <linux/net_dropmon.h>
22#include <trace/skb.h>
23 22
24#include <asm/unaligned.h> 23#include <asm/unaligned.h>
25#include <asm/bitops.h> 24#include <asm/bitops.h>
26 25
26#define CREATE_TRACE_POINTS
27#include <trace/events/skb.h>
27 28
28DEFINE_TRACE(kfree_skb);
29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f091a5a845c1..f8bcc06ae8a0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -65,7 +65,7 @@
65 65
66#include <asm/uaccess.h> 66#include <asm/uaccess.h>
67#include <asm/system.h> 67#include <asm/system.h>
68#include <trace/skb.h> 68#include <trace/events/skb.h>
69 69
70#include "kmap_skb.h" 70#include "kmap_skb.h"
71 71
diff --git a/samples/Kconfig b/samples/Kconfig
index 4b02f5a0e656..b75d28cba3f7 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -19,6 +19,12 @@ config SAMPLE_TRACEPOINTS
19 help 19 help
20 This build tracepoints example modules. 20 This build tracepoints example modules.
21 21
22config SAMPLE_TRACE_EVENTS
23 tristate "Build trace_events examples -- loadable modules only"
24 depends on EVENT_TRACING && m
25 help
26 This build trace event example modules.
27
22config SAMPLE_KOBJECT 28config SAMPLE_KOBJECT
23 tristate "Build kobject examples" 29 tristate "Build kobject examples"
24 help 30 help
diff --git a/samples/Makefile b/samples/Makefile
index 10eaca89fe17..13e4b470b539 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
1# Makefile for Linux samples code 1# Makefile for Linux samples code
2 2
3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ 3obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644
index 000000000000..0d428dc67283
--- /dev/null
+++ b/samples/trace_events/Makefile
@@ -0,0 +1,6 @@
1# builds the trace events example kernel modules;
2# then to use one (as root): insmod <module_name.ko>
3
4CFLAGS_trace-events-sample.o := -I$(src)
5
6obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644
index 000000000000..aabc4e970911
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.c
@@ -0,0 +1,52 @@
1#include <linux/module.h>
2#include <linux/kthread.h>
3
4/*
5 * Any file that uses trace points, must include the header.
6 * But only one file, must include the header by defining
7 * CREATE_TRACE_POINTS first. This will make the C code that
8 * creates the handles for the trace points.
9 */
10#define CREATE_TRACE_POINTS
11#include "trace-events-sample.h"
12
13
14static void simple_thread_func(int cnt)
15{
16 set_current_state(TASK_INTERRUPTIBLE);
17 schedule_timeout(HZ);
18 trace_foo_bar("hello", cnt);
19}
20
21static int simple_thread(void *arg)
22{
23 int cnt = 0;
24
25 while (!kthread_should_stop())
26 simple_thread_func(cnt++);
27
28 return 0;
29}
30
31static struct task_struct *simple_tsk;
32
33static int __init trace_event_init(void)
34{
35 simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
36 if (IS_ERR(simple_tsk))
37 return -1;
38
39 return 0;
40}
41
42static void __exit trace_event_exit(void)
43{
44 kthread_stop(simple_tsk);
45}
46
47module_init(trace_event_init);
48module_exit(trace_event_exit);
49
50MODULE_AUTHOR("Steven Rostedt");
51MODULE_DESCRIPTION("trace-events-sample");
52MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644
index 000000000000..128a897687c5
--- /dev/null
+++ b/samples/trace_events/trace-events-sample.h
@@ -0,0 +1,129 @@
1/*
2 * Notice that this file is not protected like a normal header.
3 * We also must allow for rereading of this file. The
4 *
5 * || defined(TRACE_HEADER_MULTI_READ)
6 *
7 * serves this purpose.
8 */
9#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
10#define _TRACE_EVENT_SAMPLE_H
11
12/*
13 * All trace headers should include tracepoint.h, until we finally
14 * make it into a standard header.
15 */
16#include <linux/tracepoint.h>
17
18/*
19 * If TRACE_SYSTEM is defined, that will be the directory created
20 * in the ftrace directory under /debugfs/tracing/events/<system>
21 *
22 * The define_trace.h belowe will also look for a file name of
23 * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
24 *
25 * If you want a different system than file name, you can override
26 * the header name by defining TRACE_INCLUDE_FILE
27 *
28 * If this file was called, goofy.h, then we would define:
29 *
30 * #define TRACE_INCLUDE_FILE goofy
31 *
32 */
33#undef TRACE_SYSTEM
34#define TRACE_SYSTEM sample
35
36/*
37 * The TRACE_EVENT macro is broken up into 5 parts.
38 *
39 * name: name of the trace point. This is also how to enable the tracepoint.
40 * A function called trace_foo_bar() will be created.
41 *
42 * proto: the prototype of the function trace_foo_bar()
43 * Here it is trace_foo_bar(char *foo, int bar).
44 *
45 * args: must match the arguments in the prototype.
46 * Here it is simply "foo, bar".
47 *
48 * struct: This defines the way the data will be stored in the ring buffer.
49 * There are currently two types of elements. __field and __array.
50 * a __field is broken up into (type, name). Where type can be any
51 * type but an array.
52 * For an array. there are three fields. (type, name, size). The
53 * type of elements in the array, the name of the field and the size
54 * of the array.
55 *
56 * __array( char, foo, 10) is the same as saying char foo[10].
57 *
58 * fast_assign: This is a C like function that is used to store the items
59 * into the ring buffer.
60 *
61 * printk: This is a way to print out the data in pretty print. This is
62 * useful if the system crashes and you are logging via a serial line,
63 * the data can be printed to the console using this "printk" method.
64 *
65 * Note, that for both the assign and the printk, __entry is the handler
66 * to the data structure in the ring buffer, and is defined by the
67 * TP_STRUCT__entry.
68 */
69TRACE_EVENT(foo_bar,
70
71 TP_PROTO(char *foo, int bar),
72
73 TP_ARGS(foo, bar),
74
75 TP_STRUCT__entry(
76 __array( char, foo, 10 )
77 __field( int, bar )
78 ),
79
80 TP_fast_assign(
81 strncpy(__entry->foo, foo, 10);
82 __entry->bar = bar;
83 ),
84
85 TP_printk("foo %s %d", __entry->foo, __entry->bar)
86);
87#endif
88
89/***** NOTICE! The #if protection ends here. *****/
90
91
92/*
93 * There are several ways I could have done this. If I left out the
94 * TRACE_INCLUDE_PATH, then it would default to the kernel source
95 * include/trace/events directory.
96 *
97 * I could specify a path from the define_trace.h file back to this
98 * file.
99 *
100 * #define TRACE_INCLUDE_PATH ../../samples/trace_events
101 *
102 * But I chose to simply make it use the current directory and then in
103 * the Makefile I added:
104 *
105 * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
106 *
107 * This will make sure the current path is part of the include
108 * structure for our file so that we can find it.
109 *
110 * I could have made only the top level directory the include:
111 *
112 * CFLAGS_trace-events-sample.o := -I$(PWD)
113 *
114 * And then let the path to this directory be the TRACE_INCLUDE_PATH:
115 *
116 * #define TRACE_INCLUDE_PATH samples/trace_events
117 *
118 * But then if something defines "samples" or "trace_events" then we
119 * could risk that being converted too, and give us an unexpected
120 * result.
121 */
122#undef TRACE_INCLUDE_PATH
123#undef TRACE_INCLUDE_FILE
124#define TRACE_INCLUDE_PATH .
125/*
126 * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
127 */
128#define TRACE_INCLUDE_FILE trace-events-sample
129#include <trace/define_trace.h>
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 3208a3a7e7fe..acd8c4a8e3e0 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1828,6 +1828,25 @@ sub reset_state {
1828 $state = 0; 1828 $state = 0;
1829} 1829}
1830 1830
1831sub tracepoint_munge($) {
1832 my $file = shift;
1833 my $tracepointname = 0;
1834 my $tracepointargs = 0;
1835
1836 if($prototype =~ m/TRACE_EVENT\((.*?),/) {
1837 $tracepointname = $1;
1838 }
1839 if($prototype =~ m/TP_PROTO\((.*?)\)/) {
1840 $tracepointargs = $1;
1841 }
1842 if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
1843 print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
1844 "$prototype\n";
1845 } else {
1846 $prototype = "static inline void trace_$tracepointname($tracepointargs)";
1847 }
1848}
1849
1831sub syscall_munge() { 1850sub syscall_munge() {
1832 my $void = 0; 1851 my $void = 0;
1833 1852
@@ -1882,6 +1901,9 @@ sub process_state3_function($$) {
1882 if ($prototype =~ /SYSCALL_DEFINE/) { 1901 if ($prototype =~ /SYSCALL_DEFINE/) {
1883 syscall_munge(); 1902 syscall_munge();
1884 } 1903 }
1904 if ($prototype =~ /TRACE_EVENT/) {
1905 tracepoint_munge($file);
1906 }
1885 dump_function($prototype, $file); 1907 dump_function($prototype, $file);
1886 reset_state(); 1908 reset_state();
1887 } 1909 }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 409596eca124..0fae7da0529c 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -26,7 +26,7 @@
26# which will also be the location of that section after final link. 26# which will also be the location of that section after final link.
27# e.g. 27# e.g.
28# 28#
29# .section ".text.sched" 29# .section ".sched.text", "ax"
30# .globl my_func 30# .globl my_func
31# my_func: 31# my_func:
32# [...] 32# [...]
@@ -39,7 +39,7 @@
39# [...] 39# [...]
40# 40#
41# Both relocation offsets for the mcounts in the above example will be 41# Both relocation offsets for the mcounts in the above example will be
42# offset from .text.sched. If we make another file called tmp.s with: 42# offset from .sched.text. If we make another file called tmp.s with:
43# 43#
44# .section __mcount_loc 44# .section __mcount_loc
45# .quad my_func + 0x5 45# .quad my_func + 0x5
@@ -51,7 +51,7 @@
51# But this gets hard if my_func is not globl (a static function). 51# But this gets hard if my_func is not globl (a static function).
52# In such a case we have: 52# In such a case we have:
53# 53#
54# .section ".text.sched" 54# .section ".sched.text", "ax"
55# my_func: 55# my_func:
56# [...] 56# [...]
57# call mcount (offset: 0x5) 57# call mcount (offset: 0x5)