aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/include/asm/ftrace.h3
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/ftrace.c129
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--drivers/char/sysrq.c18
-rw-r--r--include/linux/ftrace.h66
-rw-r--r--include/linux/ftrace_irq.h13
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/marker.h2
-rw-r--r--include/linux/tracepoint.h4
-rw-r--r--init/main.c4
-rw-r--r--kernel/marker.c80
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/Kconfig7
-rw-r--r--kernel/trace/ftrace.c50
-rw-r--r--kernel/trace/ring_buffer.c275
-rw-r--r--kernel/trace/trace.c355
-rw-r--r--kernel/trace/trace.h61
-rw-r--r--kernel/trace/trace_boot.c44
-rw-r--r--kernel/trace/trace_functions.c15
-rw-r--r--kernel/trace/trace_irqsoff.c52
-rw-r--r--kernel/trace/trace_mmiotrace.c22
-rw-r--r--kernel/trace/trace_nop.c16
-rw-r--r--kernel/trace/trace_sched_switch.c103
-rw-r--r--kernel/trace/trace_sched_wakeup.c67
-rw-r--r--kernel/trace/trace_selftest.c82
-rw-r--r--kernel/trace/trace_stack.c8
-rw-r--r--kernel/trace/trace_sysprof.c16
-rw-r--r--kernel/tracepoint.c261
-rw-r--r--scripts/Makefile.build12
-rwxr-xr-xscripts/recordmcount.pl4
-rw-r--r--scripts/tracing/draw_functrace.py130
37 files changed, 1364 insertions, 589 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c86c07459712..196fd1c62a2f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -765,6 +765,14 @@ and is between 256 and 4096 characters. It is defined in the file
765 parameter will force ia64_sal_cache_flush to call 765 parameter will force ia64_sal_cache_flush to call
766 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. 766 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
767 767
768 ftrace=[tracer]
769 [ftrace] will set and start the specified tracer
770 as early as possible in order to facilitate early
771 boot debugging.
772
773 ftrace_dump_on_oops
774 [ftrace] will dump the trace buffers on oops.
775
768 gamecon.map[2|3]= 776 gamecon.map[2|3]=
769 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 777 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
770 support via parallel port (up to 5 devices per port) 778 support via parallel port (up to 5 devices per port)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..6ab097fd5241 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86
29 select HAVE_FTRACE_MCOUNT_RECORD 29 select HAVE_FTRACE_MCOUNT_RECORD
30 select HAVE_DYNAMIC_FTRACE 30 select HAVE_DYNAMIC_FTRACE
31 select HAVE_FUNCTION_TRACER 31 select HAVE_FUNCTION_TRACER
32 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
32 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) 33 select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
33 select HAVE_ARCH_KGDB if !X86_VOYAGER 34 select HAVE_ARCH_KGDB if !X86_VOYAGER
34 select HAVE_ARCH_TRACEHOOK 35 select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
186 Add a simple leak tracer to the IOMMU code. This is useful when you 186 Add a simple leak tracer to the IOMMU code. This is useful when you
187 are debugging a buggy device driver that leaks IOMMU mappings. 187 are debugging a buggy device driver that leaks IOMMU mappings.
188 188
189config MMIOTRACE_HOOKS
190 bool
191
192config MMIOTRACE 189config MMIOTRACE
193 bool "Memory mapped IO tracing" 190 bool "Memory mapped IO tracing"
194 depends on DEBUG_KERNEL && PCI 191 depends on DEBUG_KERNEL && PCI
195 select TRACING 192 select TRACING
196 select MMIOTRACE_HOOKS
197 help 193 help
198 Mmiotrace traces Memory Mapped I/O access and is meant for 194 Mmiotrace traces Memory Mapped I/O access and is meant for
199 debugging and reverse engineering. It is called from the ioremap 195 debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..f8173ed1c970 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
17 */ 17 */
18 return addr - 1; 18 return addr - 1;
19} 19}
20#endif 20#endif /* __ASSEMBLY__ */
21
22#endif /* CONFIG_FUNCTION_TRACER */ 21#endif /* CONFIG_FUNCTION_TRACER */
23 22
24#endif /* _ASM_X86_FTRACE_H */ 23#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..9134de814c97 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
1157END(mcount) 1157END(mcount)
1158 1158
1159ENTRY(ftrace_caller) 1159ENTRY(ftrace_caller)
1160 cmpl $0, function_trace_stop
1161 jne ftrace_stub
1162
1160 pushl %eax 1163 pushl %eax
1161 pushl %ecx 1164 pushl %ecx
1162 pushl %edx 1165 pushl %edx
@@ -1180,6 +1183,9 @@ END(ftrace_caller)
1180#else /* ! CONFIG_DYNAMIC_FTRACE */ 1183#else /* ! CONFIG_DYNAMIC_FTRACE */
1181 1184
1182ENTRY(mcount) 1185ENTRY(mcount)
1186 cmpl $0, function_trace_stop
1187 jne ftrace_stub
1188
1183 cmpl $ftrace_stub, ftrace_trace_function 1189 cmpl $ftrace_stub, ftrace_trace_function
1184 jnz trace 1190 jnz trace
1185.globl ftrace_stub 1191.globl ftrace_stub
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..08aa6b10933c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
68END(mcount) 68END(mcount)
69 69
70ENTRY(ftrace_caller) 70ENTRY(ftrace_caller)
71 cmpl $0, function_trace_stop
72 jne ftrace_stub
71 73
72 /* taken from glibc */ 74 /* taken from glibc */
73 subq $0x38, %rsp 75 subq $0x38, %rsp
@@ -103,6 +105,9 @@ END(ftrace_caller)
103 105
104#else /* ! CONFIG_DYNAMIC_FTRACE */ 106#else /* ! CONFIG_DYNAMIC_FTRACE */
105ENTRY(mcount) 107ENTRY(mcount)
108 cmpl $0, function_trace_stop
109 jne ftrace_stub
110
106 cmpq $ftrace_stub, ftrace_trace_function 111 cmpq $ftrace_stub, ftrace_trace_function
107 jnz trace 112 jnz trace
108.globl ftrace_stub 113.globl ftrace_stub
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..69149337f2fe 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -56,6 +56,133 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
56 return calc.code; 56 return calc.code;
57} 57}
58 58
59/*
60 * Modifying code must take extra care. On an SMP machine, if
61 * the code being modified is also being executed on another CPU
62 * that CPU will have undefined results and possibly take a GPF.
63 * We use kstop_machine to stop other CPUS from exectuing code.
64 * But this does not stop NMIs from happening. We still need
65 * to protect against that. We separate out the modification of
66 * the code to take care of this.
67 *
68 * Two buffers are added: An IP buffer and a "code" buffer.
69 *
70 * 1) Put the instruction pointer into the IP buffer
71 * and the new code into the "code" buffer.
72 * 2) Set a flag that says we are modifying code
73 * 3) Wait for any running NMIs to finish.
74 * 4) Write the code
75 * 5) clear the flag.
76 * 6) Wait for any running NMIs to finish.
77 *
78 * If an NMI is executed, the first thing it does is to call
79 * "ftrace_nmi_enter". This will check if the flag is set to write
80 * and if it is, it will write what is in the IP and "code" buffers.
81 *
82 * The trick is, it does not matter if everyone is writing the same
83 * content to the code location. Also, if a CPU is executing code
84 * it is OK to write to that code location if the contents being written
85 * are the same as what exists.
86 */
87
88static atomic_t in_nmi = ATOMIC_INIT(0);
89static int mod_code_status; /* holds return value of text write */
90static int mod_code_write; /* set when NMI should do the write */
91static void *mod_code_ip; /* holds the IP to write to */
92static void *mod_code_newcode; /* holds the text to write to the IP */
93
94static unsigned nmi_wait_count;
95static atomic_t nmi_update_count = ATOMIC_INIT(0);
96
97int ftrace_arch_read_dyn_info(char *buf, int size)
98{
99 int r;
100
101 r = snprintf(buf, size, "%u %u",
102 nmi_wait_count,
103 atomic_read(&nmi_update_count));
104 return r;
105}
106
107static void ftrace_mod_code(void)
108{
109 /*
110 * Yes, more than one CPU process can be writing to mod_code_status.
111 * (and the code itself)
112 * But if one were to fail, then they all should, and if one were
113 * to succeed, then they all should.
114 */
115 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
116 MCOUNT_INSN_SIZE);
117
118}
119
120void ftrace_nmi_enter(void)
121{
122 atomic_inc(&in_nmi);
123 /* Must have in_nmi seen before reading write flag */
124 smp_mb();
125 if (mod_code_write) {
126 ftrace_mod_code();
127 atomic_inc(&nmi_update_count);
128 }
129}
130
131void ftrace_nmi_exit(void)
132{
133 /* Finish all executions before clearing in_nmi */
134 smp_wmb();
135 atomic_dec(&in_nmi);
136}
137
138static void wait_for_nmi(void)
139{
140 int waited = 0;
141
142 while (atomic_read(&in_nmi)) {
143 waited = 1;
144 cpu_relax();
145 }
146
147 if (waited)
148 nmi_wait_count++;
149}
150
151static int
152do_ftrace_mod_code(unsigned long ip, void *new_code)
153{
154 mod_code_ip = (void *)ip;
155 mod_code_newcode = new_code;
156
157 /* The buffers need to be visible before we let NMIs write them */
158 smp_wmb();
159
160 mod_code_write = 1;
161
162 /* Make sure write bit is visible before we wait on NMIs */
163 smp_mb();
164
165 wait_for_nmi();
166
167 /* Make sure all running NMIs have finished before we write the code */
168 smp_mb();
169
170 ftrace_mod_code();
171
172 /* Make sure the write happens before clearing the bit */
173 smp_wmb();
174
175 mod_code_write = 0;
176
177 /* make sure NMIs see the cleared bit */
178 smp_mb();
179
180 wait_for_nmi();
181
182 return mod_code_status;
183}
184
185
59int 186int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 187ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 188 unsigned char *new_code)
@@ -81,7 +208,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
81 return -EINVAL; 208 return -EINVAL;
82 209
83 /* replace the text with the new text */ 210 /* replace the text with the new text */
84 if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) 211 if (do_ftrace_mod_code(ip, new_code))
85 return -EPERM; 212 return -EPERM;
86 213
87 sync_core(); 214 sync_core();
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
8 8
9obj-$(CONFIG_HIGHMEM) += highmem_32.o 9obj-$(CONFIG_HIGHMEM) += highmem_32.o
10 10
11obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
12obj-$(CONFIG_MMIOTRACE) += mmiotrace.o 11obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
13mmiotrace-y := pf_in.o mmio-mod.o 12mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
14obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 13obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
15 14
16obj-$(CONFIG_NUMA) += numa_$(BITS).o 15obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..4152d3c3b138 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
53 53
54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) 54static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
55{ 55{
56#ifdef CONFIG_MMIOTRACE_HOOKS 56#ifdef CONFIG_MMIOTRACE
57 if (unlikely(is_kmmio_active())) 57 if (unlikely(is_kmmio_active()))
58 if (kmmio_handler(regs, addr) == 1) 58 if (kmmio_handler(regs, addr) == 1)
59 return -1; 59 return -1;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index ce0d9da52a8a..94966edfb44d 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -274,6 +274,22 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
274 .enable_mask = SYSRQ_ENABLE_DUMP, 274 .enable_mask = SYSRQ_ENABLE_DUMP,
275}; 275};
276 276
277#ifdef CONFIG_TRACING
278#include <linux/ftrace.h>
279
280static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
281{
282 ftrace_dump();
283}
284static struct sysrq_key_op sysrq_ftrace_dump_op = {
285 .handler = sysrq_ftrace_dump,
286 .help_msg = "dumpZ-ftrace-buffer",
287 .action_msg = "Dump ftrace buffer",
288 .enable_mask = SYSRQ_ENABLE_DUMP,
289};
290#else
291#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0)
292#endif
277 293
278static void sysrq_handle_showmem(int key, struct tty_struct *tty) 294static void sysrq_handle_showmem(int key, struct tty_struct *tty)
279{ 295{
@@ -406,7 +422,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
406 NULL, /* x */ 422 NULL, /* x */
407 /* y: May be registered on sparc64 for global register dump */ 423 /* y: May be registered on sparc64 for global register dump */
408 NULL, /* y */ 424 NULL, /* y */
409 NULL /* z */ 425 &sysrq_ftrace_dump_op, /* z */
410}; 426};
411 427
412/* key2index calculation, -1 on invalid index */ 428/* key2index calculation, -1 on invalid index */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 703eb53cfa2b..1f5608c11023 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -23,6 +23,34 @@ struct ftrace_ops {
23 struct ftrace_ops *next; 23 struct ftrace_ops *next;
24}; 24};
25 25
26extern int function_trace_stop;
27
28/**
29 * ftrace_stop - stop function tracer.
30 *
31 * A quick way to stop the function tracer. Note this an on off switch,
32 * it is not something that is recursive like preempt_disable.
33 * This does not disable the calling of mcount, it only stops the
34 * calling of functions from mcount.
35 */
36static inline void ftrace_stop(void)
37{
38 function_trace_stop = 1;
39}
40
41/**
42 * ftrace_start - start the function tracer.
43 *
44 * This function is the inverse of ftrace_stop. This does not enable
45 * the function tracing if the function tracer is disabled. This only
46 * sets the function tracer flag to continue calling the functions
47 * from mcount.
48 */
49static inline void ftrace_start(void)
50{
51 function_trace_stop = 0;
52}
53
26/* 54/*
27 * The ftrace_ops must be a static and should also 55 * The ftrace_ops must be a static and should also
28 * be read_mostly. These functions do modify read_mostly variables 56 * be read_mostly. These functions do modify read_mostly variables
@@ -41,10 +69,11 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1);
41# define unregister_ftrace_function(ops) do { } while (0) 69# define unregister_ftrace_function(ops) do { } while (0)
42# define clear_ftrace_function(ops) do { } while (0) 70# define clear_ftrace_function(ops) do { } while (0)
43static inline void ftrace_kill(void) { } 71static inline void ftrace_kill(void) { }
72static inline void ftrace_stop(void) { }
73static inline void ftrace_start(void) { }
44#endif /* CONFIG_FUNCTION_TRACER */ 74#endif /* CONFIG_FUNCTION_TRACER */
45 75
46#ifdef CONFIG_DYNAMIC_FTRACE 76#ifdef CONFIG_DYNAMIC_FTRACE
47
48enum { 77enum {
49 FTRACE_FL_FREE = (1 << 0), 78 FTRACE_FL_FREE = (1 << 0),
50 FTRACE_FL_FAILED = (1 << 1), 79 FTRACE_FL_FAILED = (1 << 1),
@@ -74,6 +103,9 @@ extern void ftrace_caller(void);
74extern void ftrace_call(void); 103extern void ftrace_call(void);
75extern void mcount_call(void); 104extern void mcount_call(void);
76 105
106/* May be defined in arch */
107extern int ftrace_arch_read_dyn_info(char *buf, int size);
108
77/** 109/**
78 * ftrace_modify_code - modify code segment 110 * ftrace_modify_code - modify code segment
79 * @ip: the address of the code segment 111 * @ip: the address of the code segment
@@ -102,7 +134,6 @@ extern void ftrace_release(void *start, unsigned long size);
102 134
103extern void ftrace_disable_daemon(void); 135extern void ftrace_disable_daemon(void);
104extern void ftrace_enable_daemon(void); 136extern void ftrace_enable_daemon(void);
105
106#else 137#else
107# define skip_trace(ip) ({ 0; }) 138# define skip_trace(ip) ({ 0; })
108# define ftrace_force_update() ({ 0; }) 139# define ftrace_force_update() ({ 0; })
@@ -181,6 +212,11 @@ static inline void __ftrace_enabled_restore(int enabled)
181#endif 212#endif
182 213
183#ifdef CONFIG_TRACING 214#ifdef CONFIG_TRACING
215extern int ftrace_dump_on_oops;
216
217extern void tracing_start(void);
218extern void tracing_stop(void);
219
184extern void 220extern void
185ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); 221ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
186 222
@@ -211,6 +247,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
211static inline int 247static inline int
212ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); 248ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
213 249
250static inline void tracing_start(void) { }
251static inline void tracing_stop(void) { }
214static inline int 252static inline int
215ftrace_printk(const char *fmt, ...) 253ftrace_printk(const char *fmt, ...)
216{ 254{
@@ -229,6 +267,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { }
229#endif 267#endif
230 268
231 269
270/*
271 * Structure which defines the trace of an initcall.
272 * You don't have to fill the func field since it is
273 * only used internally by the tracer.
274 */
232struct boot_trace { 275struct boot_trace {
233 pid_t caller; 276 pid_t caller;
234 char func[KSYM_NAME_LEN]; 277 char func[KSYM_NAME_LEN];
@@ -239,13 +282,28 @@ struct boot_trace {
239}; 282};
240 283
241#ifdef CONFIG_BOOT_TRACER 284#ifdef CONFIG_BOOT_TRACER
285/* Append the trace on the ring-buffer */
242extern void trace_boot(struct boot_trace *it, initcall_t fn); 286extern void trace_boot(struct boot_trace *it, initcall_t fn);
287
288/* Tells the tracer that smp_pre_initcall is finished.
289 * So we can start the tracing
290 */
243extern void start_boot_trace(void); 291extern void start_boot_trace(void);
244extern void stop_boot_trace(void); 292
293/* Resume the tracing of other necessary events
294 * such as sched switches
295 */
296extern void enable_boot_trace(void);
297
298/* Suspend this tracing. Actually, only sched_switches tracing have
299 * to be suspended. Initcalls doesn't need it.)
300 */
301extern void disable_boot_trace(void);
245#else 302#else
246static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } 303static inline void trace_boot(struct boot_trace *it, initcall_t fn) { }
247static inline void start_boot_trace(void) { } 304static inline void start_boot_trace(void) { }
248static inline void stop_boot_trace(void) { } 305static inline void enable_boot_trace(void) { }
306static inline void disable_boot_trace(void) { }
249#endif 307#endif
250 308
251 309
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
new file mode 100644
index 000000000000..b1299d6729f2
--- /dev/null
+++ b/include/linux/ftrace_irq.h
@@ -0,0 +1,13 @@
1#ifndef _LINUX_FTRACE_IRQ_H
2#define _LINUX_FTRACE_IRQ_H
3
4
5#ifdef CONFIG_DYNAMIC_FTRACE
6extern void ftrace_nmi_enter(void);
7extern void ftrace_nmi_exit(void);
8#else
9static inline void ftrace_nmi_enter(void) { }
10static inline void ftrace_nmi_exit(void) { }
11#endif
12
13#endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006cc94a0..89a56d79e4c6 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
4#include <linux/preempt.h> 4#include <linux/preempt.h>
5#include <linux/smp_lock.h> 5#include <linux/smp_lock.h>
6#include <linux/lockdep.h> 6#include <linux/lockdep.h>
7#include <linux/ftrace_irq.h>
7#include <asm/hardirq.h> 8#include <asm/hardirq.h>
8#include <asm/system.h> 9#include <asm/system.h>
9 10
@@ -161,7 +162,17 @@ extern void irq_enter(void);
161 */ 162 */
162extern void irq_exit(void); 163extern void irq_exit(void);
163 164
164#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) 165#define nmi_enter() \
165#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) 166 do { \
167 ftrace_nmi_enter(); \
168 lockdep_off(); \
169 __irq_enter(); \
170 } while (0)
171#define nmi_exit() \
172 do { \
173 __irq_exit(); \
174 lockdep_on(); \
175 ftrace_nmi_exit(); \
176 } while (0)
166 177
167#endif /* LINUX_HARDIRQ_H */ 178#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 889196c7fbb1..4cf45472d9f5 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function;
136 136
137extern void marker_probe_cb(const struct marker *mdata, 137extern void marker_probe_cb(const struct marker *mdata,
138 void *call_private, ...); 138 void *call_private, ...);
139extern void marker_probe_cb_noarg(const struct marker *mdata,
140 void *call_private, ...);
141 139
142/* 140/*
143 * Connect a probe to a marker. 141 * Connect a probe to a marker.
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c5bb39c7a770..63064e9403f2 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe);
112 */ 112 */
113extern int tracepoint_probe_unregister(const char *name, void *probe); 113extern int tracepoint_probe_unregister(const char *name, void *probe);
114 114
115extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
116extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
117extern void tracepoint_probe_update_all(void);
118
115struct tracepoint_iter { 119struct tracepoint_iter {
116 struct module *module; 120 struct module *module;
117 struct tracepoint *tracepoint; 121 struct tracepoint *tracepoint;
diff --git a/init/main.c b/init/main.c
index 7e117a231af1..4b03cd5656ca 100644
--- a/init/main.c
+++ b/init/main.c
@@ -711,6 +711,7 @@ int do_one_initcall(initcall_t fn)
711 it.caller = task_pid_nr(current); 711 it.caller = task_pid_nr(current);
712 printk("calling %pF @ %i\n", fn, it.caller); 712 printk("calling %pF @ %i\n", fn, it.caller);
713 it.calltime = ktime_get(); 713 it.calltime = ktime_get();
714 enable_boot_trace();
714 } 715 }
715 716
716 it.result = fn(); 717 it.result = fn();
@@ -722,6 +723,7 @@ int do_one_initcall(initcall_t fn)
722 printk("initcall %pF returned %d after %Ld usecs\n", fn, 723 printk("initcall %pF returned %d after %Ld usecs\n", fn,
723 it.result, it.duration); 724 it.result, it.duration);
724 trace_boot(&it, fn); 725 trace_boot(&it, fn);
726 disable_boot_trace();
725 } 727 }
726 728
727 msgbuf[0] = 0; 729 msgbuf[0] = 0;
@@ -882,7 +884,7 @@ static int __init kernel_init(void * unused)
882 * we're essentially up and running. Get rid of the 884 * we're essentially up and running. Get rid of the
883 * initmem segments and start the user-mode stuff.. 885 * initmem segments and start the user-mode stuff..
884 */ 886 */
885 stop_boot_trace(); 887
886 init_post(); 888 init_post();
887 return 0; 889 return 0;
888} 890}
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..2898b647d415 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
43 */ 43 */
44#define MARKER_HASH_BITS 6 44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
64 void *oldptr; 65 void *oldptr;
65 int rcu_pending; 66 int rcu_pending;
66 unsigned char ptype:1; 67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 69 char name[0]; /* Contains name'\0'format'\0' */
68}; 70};
69 71
70static struct hlist_head marker_table[MARKER_TABLE_SIZE];
71
72/** 72/**
73 * __mark_empty_function - Empty probe callback 73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data 74 * @probe_private: probe private data
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
157 * 157 *
158 * Should be connected to markers "MARK_NOARGS". 158 * Should be connected to markers "MARK_NOARGS".
159 */ 159 */
160void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) 160static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
161{ 161{
162 va_list args; /* not initialized */ 162 va_list args; /* not initialized */
163 char ptype; 163 char ptype;
@@ -197,7 +197,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
197 } 197 }
198 rcu_read_unlock_sched(); 198 rcu_read_unlock_sched();
199} 199}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 200
202static void free_old_closure(struct rcu_head *head) 201static void free_old_closure(struct rcu_head *head)
203{ 202{
@@ -416,6 +415,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
416 e->single.probe_private = NULL; 415 e->single.probe_private = NULL;
417 e->multi = NULL; 416 e->multi = NULL;
418 e->ptype = 0; 417 e->ptype = 0;
418 e->format_allocated = 0;
419 e->refcount = 0; 419 e->refcount = 0;
420 e->rcu_pending = 0; 420 e->rcu_pending = 0;
421 hlist_add_head(&e->hlist, head); 421 hlist_add_head(&e->hlist, head);
@@ -447,6 +447,8 @@ static int remove_marker(const char *name)
447 if (e->single.func != __mark_empty_function) 447 if (e->single.func != __mark_empty_function)
448 return -EBUSY; 448 return -EBUSY;
449 hlist_del(&e->hlist); 449 hlist_del(&e->hlist);
450 if (e->format_allocated)
451 kfree(e->format);
450 /* Make sure the call_rcu has been executed */ 452 /* Make sure the call_rcu has been executed */
451 if (e->rcu_pending) 453 if (e->rcu_pending)
452 rcu_barrier_sched(); 454 rcu_barrier_sched();
@@ -457,57 +459,34 @@ static int remove_marker(const char *name)
457/* 459/*
458 * Set the mark_entry format to the format found in the element. 460 * Set the mark_entry format to the format found in the element.
459 */ 461 */
460static int marker_set_format(struct marker_entry **entry, const char *format) 462static int marker_set_format(struct marker_entry *entry, const char *format)
461{ 463{
462 struct marker_entry *e; 464 entry->format = kstrdup(format, GFP_KERNEL);
463 size_t name_len = strlen((*entry)->name) + 1; 465 if (!entry->format)
464 size_t format_len = strlen(format) + 1;
465
466
467 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
468 GFP_KERNEL);
469 if (!e)
470 return -ENOMEM; 466 return -ENOMEM;
471 memcpy(&e->name[0], (*entry)->name, name_len); 467 entry->format_allocated = 1;
472 e->format = &e->name[name_len]; 468
473 memcpy(e->format, format, format_len);
474 if (strcmp(e->format, MARK_NOARGS) == 0)
475 e->call = marker_probe_cb_noarg;
476 else
477 e->call = marker_probe_cb;
478 e->single = (*entry)->single;
479 e->multi = (*entry)->multi;
480 e->ptype = (*entry)->ptype;
481 e->refcount = (*entry)->refcount;
482 e->rcu_pending = 0;
483 hlist_add_before(&e->hlist, &(*entry)->hlist);
484 hlist_del(&(*entry)->hlist);
485 /* Make sure the call_rcu has been executed */
486 if ((*entry)->rcu_pending)
487 rcu_barrier_sched();
488 kfree(*entry);
489 *entry = e;
490 trace_mark(core_marker_format, "name %s format %s", 469 trace_mark(core_marker_format, "name %s format %s",
491 e->name, e->format); 470 entry->name, entry->format);
492 return 0; 471 return 0;
493} 472}
494 473
495/* 474/*
496 * Sets the probe callback corresponding to one marker. 475 * Sets the probe callback corresponding to one marker.
497 */ 476 */
498static int set_marker(struct marker_entry **entry, struct marker *elem, 477static int set_marker(struct marker_entry *entry, struct marker *elem,
499 int active) 478 int active)
500{ 479{
501 int ret; 480 int ret;
502 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 481 WARN_ON(strcmp(entry->name, elem->name) != 0);
503 482
504 if ((*entry)->format) { 483 if (entry->format) {
505 if (strcmp((*entry)->format, elem->format) != 0) { 484 if (strcmp(entry->format, elem->format) != 0) {
506 printk(KERN_NOTICE 485 printk(KERN_NOTICE
507 "Format mismatch for probe %s " 486 "Format mismatch for probe %s "
508 "(%s), marker (%s)\n", 487 "(%s), marker (%s)\n",
509 (*entry)->name, 488 entry->name,
510 (*entry)->format, 489 entry->format,
511 elem->format); 490 elem->format);
512 return -EPERM; 491 return -EPERM;
513 } 492 }
@@ -523,34 +502,33 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
523 * pass from a "safe" callback (with argument) to an "unsafe" 502 * pass from a "safe" callback (with argument) to an "unsafe"
524 * callback (does not set arguments). 503 * callback (does not set arguments).
525 */ 504 */
526 elem->call = (*entry)->call; 505 elem->call = entry->call;
527 /* 506 /*
528 * Sanity check : 507 * Sanity check :
529 * We only update the single probe private data when the ptr is 508 * We only update the single probe private data when the ptr is
530 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) 509 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
531 */ 510 */
532 WARN_ON(elem->single.func != __mark_empty_function 511 WARN_ON(elem->single.func != __mark_empty_function
533 && elem->single.probe_private 512 && elem->single.probe_private != entry->single.probe_private
534 != (*entry)->single.probe_private && 513 && !elem->ptype);
535 !elem->ptype); 514 elem->single.probe_private = entry->single.probe_private;
536 elem->single.probe_private = (*entry)->single.probe_private;
537 /* 515 /*
538 * Make sure the private data is valid when we update the 516 * Make sure the private data is valid when we update the
539 * single probe ptr. 517 * single probe ptr.
540 */ 518 */
541 smp_wmb(); 519 smp_wmb();
542 elem->single.func = (*entry)->single.func; 520 elem->single.func = entry->single.func;
543 /* 521 /*
544 * We also make sure that the new probe callbacks array is consistent 522 * We also make sure that the new probe callbacks array is consistent
545 * before setting a pointer to it. 523 * before setting a pointer to it.
546 */ 524 */
547 rcu_assign_pointer(elem->multi, (*entry)->multi); 525 rcu_assign_pointer(elem->multi, entry->multi);
548 /* 526 /*
549 * Update the function or multi probe array pointer before setting the 527 * Update the function or multi probe array pointer before setting the
550 * ptype. 528 * ptype.
551 */ 529 */
552 smp_wmb(); 530 smp_wmb();
553 elem->ptype = (*entry)->ptype; 531 elem->ptype = entry->ptype;
554 elem->state = active; 532 elem->state = active;
555 533
556 return 0; 534 return 0;
@@ -594,8 +572,7 @@ void marker_update_probe_range(struct marker *begin,
594 for (iter = begin; iter < end; iter++) { 572 for (iter = begin; iter < end; iter++) {
595 mark_entry = get_marker(iter->name); 573 mark_entry = get_marker(iter->name);
596 if (mark_entry) { 574 if (mark_entry) {
597 set_marker(&mark_entry, iter, 575 set_marker(mark_entry, iter, !!mark_entry->refcount);
598 !!mark_entry->refcount);
599 /* 576 /*
600 * ignore error, continue 577 * ignore error, continue
601 */ 578 */
@@ -657,7 +634,7 @@ int marker_probe_register(const char *name, const char *format,
657 ret = PTR_ERR(entry); 634 ret = PTR_ERR(entry);
658 } else if (format) { 635 } else if (format) {
659 if (!entry->format) 636 if (!entry->format)
660 ret = marker_set_format(&entry, format); 637 ret = marker_set_format(entry, format);
661 else if (strcmp(entry->format, format)) 638 else if (strcmp(entry->format, format))
662 ret = -EPERM; 639 ret = -EPERM;
663 } 640 }
@@ -848,8 +825,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
848 if (!e->ptype) { 825 if (!e->ptype) {
849 if (num == 0 && e->single.func == probe) 826 if (num == 0 && e->single.func == probe)
850 return e->single.probe_private; 827 return e->single.probe_private;
851 else
852 break;
853 } else { 828 } else {
854 struct marker_probe_closure *closure; 829 struct marker_probe_closure *closure;
855 int match = 0; 830 int match = 0;
@@ -861,6 +836,7 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
861 return closure[i].probe_private; 836 return closure[i].probe_private;
862 } 837 }
863 } 838 }
839 break;
864 } 840 }
865 } 841 }
866 return ERR_PTR(-ENOENT); 842 return ERR_PTR(-ENOENT);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..65d4a9ba79e4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
484 .proc_handler = &ftrace_enable_sysctl, 484 .proc_handler = &ftrace_enable_sysctl,
485 }, 485 },
486#endif 486#endif
487#ifdef CONFIG_TRACING
488 {
489 .ctl_name = CTL_UNNUMBERED,
490 .procname = "ftrace_dump_on_oops",
491 .data = &ftrace_dump_on_oops,
492 .maxlen = sizeof(int),
493 .mode = 0644,
494 .proc_handler = &proc_dointvec,
495 },
496#endif
487#ifdef CONFIG_MODULES 497#ifdef CONFIG_MODULES
488 { 498 {
489 .ctl_name = KERN_MODPROBE, 499 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..fc4febc3334a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,13 @@ config NOP_TRACER
9config HAVE_FUNCTION_TRACER 9config HAVE_FUNCTION_TRACER
10 bool 10 bool
11 11
12config HAVE_FUNCTION_TRACE_MCOUNT_TEST
13 bool
14 help
15 This gets selected when the arch tests the function_trace_stop
16 variable at the mcount call site. Otherwise, this variable
17 is tested by the called function.
18
12config HAVE_DYNAMIC_FTRACE 19config HAVE_DYNAMIC_FTRACE
13 bool 20 bool
14 21
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 14fa52297b28..25b803559f17 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,9 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* Quick disabling of function tracer. */
51int function_trace_stop;
52
50/* 53/*
51 * ftrace_disabled is set when an anomaly is discovered. 54 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 55 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 66
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 67static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 68ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
69ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
66 70
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 71static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 72{
@@ -88,7 +92,22 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 92void clear_ftrace_function(void)
89{ 93{
90 ftrace_trace_function = ftrace_stub; 94 ftrace_trace_function = ftrace_stub;
95 __ftrace_trace_function = ftrace_stub;
96}
97
98#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
99/*
100 * For those archs that do not test ftrace_trace_stop in their
101 * mcount call site, we need to do it from C.
102 */
103static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
104{
105 if (function_trace_stop)
106 return;
107
108 __ftrace_trace_function(ip, parent_ip);
91} 109}
110#endif
92 111
93static int __register_ftrace_function(struct ftrace_ops *ops) 112static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 113{
@@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
110 * For one func, simply call it directly. 129 * For one func, simply call it directly.
111 * For more than one func, call the chain. 130 * For more than one func, call the chain.
112 */ 131 */
132#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
113 if (ops->next == &ftrace_list_end) 133 if (ops->next == &ftrace_list_end)
114 ftrace_trace_function = ops->func; 134 ftrace_trace_function = ops->func;
115 else 135 else
116 ftrace_trace_function = ftrace_list_func; 136 ftrace_trace_function = ftrace_list_func;
137#else
138 if (ops->next == &ftrace_list_end)
139 __ftrace_trace_function = ops->func;
140 else
141 __ftrace_trace_function = ftrace_list_func;
142 ftrace_trace_function = ftrace_test_stop_func;
143#endif
117 } 144 }
118 145
119 spin_unlock(&ftrace_lock); 146 spin_unlock(&ftrace_lock);
@@ -522,7 +549,7 @@ static void ftrace_run_update_code(int command)
522} 549}
523 550
524static ftrace_func_t saved_ftrace_func; 551static ftrace_func_t saved_ftrace_func;
525static int ftrace_start; 552static int ftrace_start_up;
526static DEFINE_MUTEX(ftrace_start_lock); 553static DEFINE_MUTEX(ftrace_start_lock);
527 554
528static void ftrace_startup(void) 555static void ftrace_startup(void)
@@ -533,8 +560,8 @@ static void ftrace_startup(void)
533 return; 560 return;
534 561
535 mutex_lock(&ftrace_start_lock); 562 mutex_lock(&ftrace_start_lock);
536 ftrace_start++; 563 ftrace_start_up++;
537 if (ftrace_start == 1) 564 if (ftrace_start_up == 1)
538 command |= FTRACE_ENABLE_CALLS; 565 command |= FTRACE_ENABLE_CALLS;
539 566
540 if (saved_ftrace_func != ftrace_trace_function) { 567 if (saved_ftrace_func != ftrace_trace_function) {
@@ -558,8 +585,8 @@ static void ftrace_shutdown(void)
558 return; 585 return;
559 586
560 mutex_lock(&ftrace_start_lock); 587 mutex_lock(&ftrace_start_lock);
561 ftrace_start--; 588 ftrace_start_up--;
562 if (!ftrace_start) 589 if (!ftrace_start_up)
563 command |= FTRACE_DISABLE_CALLS; 590 command |= FTRACE_DISABLE_CALLS;
564 591
565 if (saved_ftrace_func != ftrace_trace_function) { 592 if (saved_ftrace_func != ftrace_trace_function) {
@@ -585,8 +612,8 @@ static void ftrace_startup_sysctl(void)
585 mutex_lock(&ftrace_start_lock); 612 mutex_lock(&ftrace_start_lock);
586 /* Force update next time */ 613 /* Force update next time */
587 saved_ftrace_func = NULL; 614 saved_ftrace_func = NULL;
588 /* ftrace_start is true if we want ftrace running */ 615 /* ftrace_start_up is true if we want ftrace running */
589 if (ftrace_start) 616 if (ftrace_start_up)
590 command |= FTRACE_ENABLE_CALLS; 617 command |= FTRACE_ENABLE_CALLS;
591 618
592 ftrace_run_update_code(command); 619 ftrace_run_update_code(command);
@@ -601,8 +628,8 @@ static void ftrace_shutdown_sysctl(void)
601 return; 628 return;
602 629
603 mutex_lock(&ftrace_start_lock); 630 mutex_lock(&ftrace_start_lock);
604 /* ftrace_start is true if ftrace is running */ 631 /* ftrace_start_up is true if ftrace is running */
605 if (ftrace_start) 632 if (ftrace_start_up)
606 command |= FTRACE_DISABLE_CALLS; 633 command |= FTRACE_DISABLE_CALLS;
607 634
608 ftrace_run_update_code(command); 635 ftrace_run_update_code(command);
@@ -734,6 +761,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
734 ((iter->flags & FTRACE_ITER_FAILURES) && 761 ((iter->flags & FTRACE_ITER_FAILURES) &&
735 !(rec->flags & FTRACE_FL_FAILED)) || 762 !(rec->flags & FTRACE_FL_FAILED)) ||
736 763
764 ((iter->flags & FTRACE_ITER_FILTER) &&
765 !(rec->flags & FTRACE_FL_FILTER)) ||
766
737 ((iter->flags & FTRACE_ITER_NOTRACE) && 767 ((iter->flags & FTRACE_ITER_NOTRACE) &&
738 !(rec->flags & FTRACE_FL_NOTRACE))) { 768 !(rec->flags & FTRACE_FL_NOTRACE))) {
739 rec = NULL; 769 rec = NULL;
@@ -1182,7 +1212,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1182 1212
1183 mutex_lock(&ftrace_sysctl_lock); 1213 mutex_lock(&ftrace_sysctl_lock);
1184 mutex_lock(&ftrace_start_lock); 1214 mutex_lock(&ftrace_start_lock);
1185 if (iter->filtered && ftrace_start && ftrace_enabled) 1215 if (iter->filtered && ftrace_start_up && ftrace_enabled)
1186 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1216 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1187 mutex_unlock(&ftrace_start_lock); 1217 mutex_unlock(&ftrace_start_lock);
1188 mutex_unlock(&ftrace_sysctl_lock); 1218 mutex_unlock(&ftrace_sysctl_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b08ee9f00c8d..c04c433fbc59 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -45,6 +45,8 @@ void tracing_off(void)
45 ring_buffers_off = 1; 45 ring_buffers_off = 1;
46} 46}
47 47
48#include "trace.h"
49
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 50/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 51#define DEBUG_SHIFT 0
50 52
@@ -181,7 +183,8 @@ static inline int test_time_stamp(u64 delta)
181struct ring_buffer_per_cpu { 183struct ring_buffer_per_cpu {
182 int cpu; 184 int cpu;
183 struct ring_buffer *buffer; 185 struct ring_buffer *buffer;
184 spinlock_t lock; 186 spinlock_t reader_lock; /* serialize readers */
187 raw_spinlock_t lock;
185 struct lock_class_key lock_key; 188 struct lock_class_key lock_key;
186 struct list_head pages; 189 struct list_head pages;
187 struct buffer_page *head_page; /* read from head */ 190 struct buffer_page *head_page; /* read from head */
@@ -215,32 +218,16 @@ struct ring_buffer_iter {
215 u64 read_stamp; 218 u64 read_stamp;
216}; 219};
217 220
221/* buffer may be either ring_buffer or ring_buffer_per_cpu */
218#define RB_WARN_ON(buffer, cond) \ 222#define RB_WARN_ON(buffer, cond) \
219 do { \ 223 ({ \
220 if (unlikely(cond)) { \ 224 int _____ret = unlikely(cond); \
221 atomic_inc(&buffer->record_disabled); \ 225 if (_____ret) { \
222 WARN_ON(1); \
223 } \
224 } while (0)
225
226#define RB_WARN_ON_RET(buffer, cond) \
227 do { \
228 if (unlikely(cond)) { \
229 atomic_inc(&buffer->record_disabled); \ 226 atomic_inc(&buffer->record_disabled); \
230 WARN_ON(1); \ 227 WARN_ON(1); \
231 return -1; \
232 } \ 228 } \
233 } while (0) 229 _____ret; \
234 230 })
235#define RB_WARN_ON_ONCE(buffer, cond) \
236 do { \
237 static int once; \
238 if (unlikely(cond) && !once) { \
239 once++; \
240 atomic_inc(&buffer->record_disabled); \
241 WARN_ON(1); \
242 } \
243 } while (0)
244 231
245/** 232/**
246 * check_pages - integrity check of buffer pages 233 * check_pages - integrity check of buffer pages
@@ -254,14 +241,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
254 struct list_head *head = &cpu_buffer->pages; 241 struct list_head *head = &cpu_buffer->pages;
255 struct buffer_page *page, *tmp; 242 struct buffer_page *page, *tmp;
256 243
257 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 244 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
258 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 245 return -1;
246 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
247 return -1;
259 248
260 list_for_each_entry_safe(page, tmp, head, list) { 249 list_for_each_entry_safe(page, tmp, head, list) {
261 RB_WARN_ON_RET(cpu_buffer, 250 if (RB_WARN_ON(cpu_buffer,
262 page->list.next->prev != &page->list); 251 page->list.next->prev != &page->list))
263 RB_WARN_ON_RET(cpu_buffer, 252 return -1;
264 page->list.prev->next != &page->list); 253 if (RB_WARN_ON(cpu_buffer,
254 page->list.prev->next != &page->list))
255 return -1;
265 } 256 }
266 257
267 return 0; 258 return 0;
@@ -318,7 +309,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
318 309
319 cpu_buffer->cpu = cpu; 310 cpu_buffer->cpu = cpu;
320 cpu_buffer->buffer = buffer; 311 cpu_buffer->buffer = buffer;
321 spin_lock_init(&cpu_buffer->lock); 312 spin_lock_init(&cpu_buffer->reader_lock);
313 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
322 INIT_LIST_HEAD(&cpu_buffer->pages); 314 INIT_LIST_HEAD(&cpu_buffer->pages);
323 315
324 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 316 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -467,13 +459,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467 synchronize_sched(); 459 synchronize_sched();
468 460
469 for (i = 0; i < nr_pages; i++) { 461 for (i = 0; i < nr_pages; i++) {
470 BUG_ON(list_empty(&cpu_buffer->pages)); 462 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
463 return;
471 p = cpu_buffer->pages.next; 464 p = cpu_buffer->pages.next;
472 page = list_entry(p, struct buffer_page, list); 465 page = list_entry(p, struct buffer_page, list);
473 list_del_init(&page->list); 466 list_del_init(&page->list);
474 free_buffer_page(page); 467 free_buffer_page(page);
475 } 468 }
476 BUG_ON(list_empty(&cpu_buffer->pages)); 469 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
470 return;
477 471
478 rb_reset_cpu(cpu_buffer); 472 rb_reset_cpu(cpu_buffer);
479 473
@@ -495,7 +489,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
495 synchronize_sched(); 489 synchronize_sched();
496 490
497 for (i = 0; i < nr_pages; i++) { 491 for (i = 0; i < nr_pages; i++) {
498 BUG_ON(list_empty(pages)); 492 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
493 return;
499 p = pages->next; 494 p = pages->next;
500 page = list_entry(p, struct buffer_page, list); 495 page = list_entry(p, struct buffer_page, list);
501 list_del_init(&page->list); 496 list_del_init(&page->list);
@@ -550,7 +545,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
550 if (size < buffer_size) { 545 if (size < buffer_size) {
551 546
552 /* easy case, just free pages */ 547 /* easy case, just free pages */
553 BUG_ON(nr_pages >= buffer->pages); 548 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
549 mutex_unlock(&buffer->mutex);
550 return -1;
551 }
554 552
555 rm_pages = buffer->pages - nr_pages; 553 rm_pages = buffer->pages - nr_pages;
556 554
@@ -569,7 +567,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
569 * add these pages to the cpu_buffers. Otherwise we just free 567 * add these pages to the cpu_buffers. Otherwise we just free
570 * them all and return -ENOMEM; 568 * them all and return -ENOMEM;
571 */ 569 */
572 BUG_ON(nr_pages <= buffer->pages); 570 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
571 mutex_unlock(&buffer->mutex);
572 return -1;
573 }
574
573 new_pages = nr_pages - buffer->pages; 575 new_pages = nr_pages - buffer->pages;
574 576
575 for_each_buffer_cpu(buffer, cpu) { 577 for_each_buffer_cpu(buffer, cpu) {
@@ -592,7 +594,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
592 rb_insert_pages(cpu_buffer, &pages, new_pages); 594 rb_insert_pages(cpu_buffer, &pages, new_pages);
593 } 595 }
594 596
595 BUG_ON(!list_empty(&pages)); 597 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
598 mutex_unlock(&buffer->mutex);
599 return -1;
600 }
596 601
597 out: 602 out:
598 buffer->pages = nr_pages; 603 buffer->pages = nr_pages;
@@ -680,7 +685,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
680 head += rb_event_length(event)) { 685 head += rb_event_length(event)) {
681 686
682 event = __rb_page_index(cpu_buffer->head_page, head); 687 event = __rb_page_index(cpu_buffer->head_page, head);
683 BUG_ON(rb_null_event(event)); 688 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
689 return;
684 /* Only count data entries */ 690 /* Only count data entries */
685 if (event->type != RINGBUF_TYPE_DATA) 691 if (event->type != RINGBUF_TYPE_DATA)
686 continue; 692 continue;
@@ -733,8 +739,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
733 addr &= PAGE_MASK; 739 addr &= PAGE_MASK;
734 740
735 while (cpu_buffer->commit_page->page != (void *)addr) { 741 while (cpu_buffer->commit_page->page != (void *)addr) {
736 RB_WARN_ON(cpu_buffer, 742 if (RB_WARN_ON(cpu_buffer,
737 cpu_buffer->commit_page == cpu_buffer->tail_page); 743 cpu_buffer->commit_page == cpu_buffer->tail_page))
744 return;
738 cpu_buffer->commit_page->commit = 745 cpu_buffer->commit_page->commit =
739 cpu_buffer->commit_page->write; 746 cpu_buffer->commit_page->write;
740 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 747 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -881,7 +888,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
881 if (write > BUF_PAGE_SIZE) { 888 if (write > BUF_PAGE_SIZE) {
882 struct buffer_page *next_page = tail_page; 889 struct buffer_page *next_page = tail_page;
883 890
884 spin_lock_irqsave(&cpu_buffer->lock, flags); 891 local_irq_save(flags);
892 __raw_spin_lock(&cpu_buffer->lock);
885 893
886 rb_inc_page(cpu_buffer, &next_page); 894 rb_inc_page(cpu_buffer, &next_page);
887 895
@@ -889,7 +897,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
889 reader_page = cpu_buffer->reader_page; 897 reader_page = cpu_buffer->reader_page;
890 898
891 /* we grabbed the lock before incrementing */ 899 /* we grabbed the lock before incrementing */
892 RB_WARN_ON(cpu_buffer, next_page == reader_page); 900 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
901 goto out_unlock;
893 902
894 /* 903 /*
895 * If for some reason, we had an interrupt storm that made 904 * If for some reason, we had an interrupt storm that made
@@ -957,7 +966,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
957 rb_set_commit_to_write(cpu_buffer); 966 rb_set_commit_to_write(cpu_buffer);
958 } 967 }
959 968
960 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 969 __raw_spin_unlock(&cpu_buffer->lock);
970 local_irq_restore(flags);
961 971
962 /* fail and let the caller try again */ 972 /* fail and let the caller try again */
963 return ERR_PTR(-EAGAIN); 973 return ERR_PTR(-EAGAIN);
@@ -965,7 +975,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
965 975
966 /* We reserved something on the buffer */ 976 /* We reserved something on the buffer */
967 977
968 BUG_ON(write > BUF_PAGE_SIZE); 978 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
979 return NULL;
969 980
970 event = __rb_page_index(tail_page, tail); 981 event = __rb_page_index(tail_page, tail);
971 rb_update_event(event, type, length); 982 rb_update_event(event, type, length);
@@ -980,7 +991,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
980 return event; 991 return event;
981 992
982 out_unlock: 993 out_unlock:
983 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 994 __raw_spin_unlock(&cpu_buffer->lock);
995 local_irq_restore(flags);
984 return NULL; 996 return NULL;
985} 997}
986 998
@@ -1063,10 +1075,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1063 * storm or we have something buggy. 1075 * storm or we have something buggy.
1064 * Bail! 1076 * Bail!
1065 */ 1077 */
1066 if (unlikely(++nr_loops > 1000)) { 1078 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1067 RB_WARN_ON(cpu_buffer, 1);
1068 return NULL; 1079 return NULL;
1069 }
1070 1080
1071 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1081 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1072 1082
@@ -1169,8 +1179,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1169 return NULL; 1179 return NULL;
1170 1180
1171 /* If we are tracing schedule, we don't want to recurse */ 1181 /* If we are tracing schedule, we don't want to recurse */
1172 resched = need_resched(); 1182 resched = ftrace_preempt_disable();
1173 preempt_disable_notrace();
1174 1183
1175 cpu = raw_smp_processor_id(); 1184 cpu = raw_smp_processor_id();
1176 1185
@@ -1201,10 +1210,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1201 return event; 1210 return event;
1202 1211
1203 out: 1212 out:
1204 if (resched) 1213 ftrace_preempt_enable(resched);
1205 preempt_enable_notrace();
1206 else
1207 preempt_enable_notrace();
1208 return NULL; 1214 return NULL;
1209} 1215}
1210 1216
@@ -1246,12 +1252,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1246 /* 1252 /*
1247 * Only the last preempt count needs to restore preemption. 1253 * Only the last preempt count needs to restore preemption.
1248 */ 1254 */
1249 if (preempt_count() == 1) { 1255 if (preempt_count() == 1)
1250 if (per_cpu(rb_need_resched, cpu)) 1256 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1251 preempt_enable_no_resched_notrace(); 1257 else
1252 else
1253 preempt_enable_notrace();
1254 } else
1255 preempt_enable_no_resched_notrace(); 1258 preempt_enable_no_resched_notrace();
1256 1259
1257 return 0; 1260 return 0;
@@ -1287,8 +1290,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1287 if (atomic_read(&buffer->record_disabled)) 1290 if (atomic_read(&buffer->record_disabled))
1288 return -EBUSY; 1291 return -EBUSY;
1289 1292
1290 resched = need_resched(); 1293 resched = ftrace_preempt_disable();
1291 preempt_disable_notrace();
1292 1294
1293 cpu = raw_smp_processor_id(); 1295 cpu = raw_smp_processor_id();
1294 1296
@@ -1314,10 +1316,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1314 1316
1315 ret = 0; 1317 ret = 0;
1316 out: 1318 out:
1317 if (resched) 1319 ftrace_preempt_enable(resched);
1318 preempt_enable_no_resched_notrace();
1319 else
1320 preempt_enable_notrace();
1321 1320
1322 return ret; 1321 return ret;
1323} 1322}
@@ -1486,6 +1485,9 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1486void ring_buffer_iter_reset(struct ring_buffer_iter *iter) 1485void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1487{ 1486{
1488 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1487 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1488 unsigned long flags;
1489
1490 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1489 1491
1490 /* Iterator usage is expected to have record disabled */ 1492 /* Iterator usage is expected to have record disabled */
1491 if (list_empty(&cpu_buffer->reader_page->list)) { 1493 if (list_empty(&cpu_buffer->reader_page->list)) {
@@ -1499,6 +1501,8 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1499 iter->read_stamp = cpu_buffer->read_stamp; 1501 iter->read_stamp = cpu_buffer->read_stamp;
1500 else 1502 else
1501 iter->read_stamp = iter->head_page->time_stamp; 1503 iter->read_stamp = iter->head_page->time_stamp;
1504
1505 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1502} 1506}
1503 1507
1504/** 1508/**
@@ -1584,7 +1588,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1584 unsigned long flags; 1588 unsigned long flags;
1585 int nr_loops = 0; 1589 int nr_loops = 0;
1586 1590
1587 spin_lock_irqsave(&cpu_buffer->lock, flags); 1591 local_irq_save(flags);
1592 __raw_spin_lock(&cpu_buffer->lock);
1588 1593
1589 again: 1594 again:
1590 /* 1595 /*
@@ -1593,8 +1598,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1593 * a case where we will loop three times. There should be no 1598 * a case where we will loop three times. There should be no
1594 * reason to loop four times (that I know of). 1599 * reason to loop four times (that I know of).
1595 */ 1600 */
1596 if (unlikely(++nr_loops > 3)) { 1601 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1597 RB_WARN_ON(cpu_buffer, 1);
1598 reader = NULL; 1602 reader = NULL;
1599 goto out; 1603 goto out;
1600 } 1604 }
@@ -1606,8 +1610,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 goto out; 1610 goto out;
1607 1611
1608 /* Never should we have an index greater than the size */ 1612 /* Never should we have an index greater than the size */
1609 RB_WARN_ON(cpu_buffer, 1613 if (RB_WARN_ON(cpu_buffer,
1610 cpu_buffer->reader_page->read > rb_page_size(reader)); 1614 cpu_buffer->reader_page->read > rb_page_size(reader)))
1615 goto out;
1611 1616
1612 /* check if we caught up to the tail */ 1617 /* check if we caught up to the tail */
1613 reader = NULL; 1618 reader = NULL;
@@ -1646,7 +1651,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1646 goto again; 1651 goto again;
1647 1652
1648 out: 1653 out:
1649 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1654 __raw_spin_unlock(&cpu_buffer->lock);
1655 local_irq_restore(flags);
1650 1656
1651 return reader; 1657 return reader;
1652} 1658}
@@ -1660,7 +1666,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1660 reader = rb_get_reader_page(cpu_buffer); 1666 reader = rb_get_reader_page(cpu_buffer);
1661 1667
1662 /* This function should not be called when buffer is empty */ 1668 /* This function should not be called when buffer is empty */
1663 BUG_ON(!reader); 1669 if (RB_WARN_ON(cpu_buffer, !reader))
1670 return;
1664 1671
1665 event = rb_reader_event(cpu_buffer); 1672 event = rb_reader_event(cpu_buffer);
1666 1673
@@ -1687,7 +1694,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1687 * Check if we are at the end of the buffer. 1694 * Check if we are at the end of the buffer.
1688 */ 1695 */
1689 if (iter->head >= rb_page_size(iter->head_page)) { 1696 if (iter->head >= rb_page_size(iter->head_page)) {
1690 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1697 if (RB_WARN_ON(buffer,
1698 iter->head_page == cpu_buffer->commit_page))
1699 return;
1691 rb_inc_iter(iter); 1700 rb_inc_iter(iter);
1692 return; 1701 return;
1693 } 1702 }
@@ -1700,8 +1709,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * This should not be called to advance the header if we are 1709 * This should not be called to advance the header if we are
1701 * at the tail of the buffer. 1710 * at the tail of the buffer.
1702 */ 1711 */
1703 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1712 if (RB_WARN_ON(cpu_buffer,
1704 (iter->head + length > rb_commit_index(cpu_buffer))); 1713 (iter->head_page == cpu_buffer->commit_page) &&
1714 (iter->head + length > rb_commit_index(cpu_buffer))))
1715 return;
1705 1716
1706 rb_update_iter_read_stamp(iter, event); 1717 rb_update_iter_read_stamp(iter, event);
1707 1718
@@ -1713,17 +1724,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 rb_advance_iter(iter); 1724 rb_advance_iter(iter);
1714} 1725}
1715 1726
1716/** 1727static struct ring_buffer_event *
1717 * ring_buffer_peek - peek at the next event to be read 1728rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1718 * @buffer: The ring buffer to read
1719 * @cpu: The cpu to peak at
1720 * @ts: The timestamp counter of this event.
1721 *
1722 * This will return the event that will be read next, but does
1723 * not consume the data.
1724 */
1725struct ring_buffer_event *
1726ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1727{ 1729{
1728 struct ring_buffer_per_cpu *cpu_buffer; 1730 struct ring_buffer_per_cpu *cpu_buffer;
1729 struct ring_buffer_event *event; 1731 struct ring_buffer_event *event;
@@ -1744,10 +1746,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1744 * can have. Nesting 10 deep of interrupts is clearly 1746 * can have. Nesting 10 deep of interrupts is clearly
1745 * an anomaly. 1747 * an anomaly.
1746 */ 1748 */
1747 if (unlikely(++nr_loops > 10)) { 1749 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1748 RB_WARN_ON(cpu_buffer, 1);
1749 return NULL; 1750 return NULL;
1750 }
1751 1751
1752 reader = rb_get_reader_page(cpu_buffer); 1752 reader = rb_get_reader_page(cpu_buffer);
1753 if (!reader) 1753 if (!reader)
@@ -1785,16 +1785,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1785 return NULL; 1785 return NULL;
1786} 1786}
1787 1787
1788/** 1788static struct ring_buffer_event *
1789 * ring_buffer_iter_peek - peek at the next event to be read 1789rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1790 * @iter: The ring buffer iterator
1791 * @ts: The timestamp counter of this event.
1792 *
1793 * This will return the event that will be read next, but does
1794 * not increment the iterator.
1795 */
1796struct ring_buffer_event *
1797ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1798{ 1790{
1799 struct ring_buffer *buffer; 1791 struct ring_buffer *buffer;
1800 struct ring_buffer_per_cpu *cpu_buffer; 1792 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1816,10 +1808,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1816 * can have. Nesting 10 deep of interrupts is clearly 1808 * can have. Nesting 10 deep of interrupts is clearly
1817 * an anomaly. 1809 * an anomaly.
1818 */ 1810 */
1819 if (unlikely(++nr_loops > 10)) { 1811 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1820 RB_WARN_ON(cpu_buffer, 1);
1821 return NULL; 1812 return NULL;
1822 }
1823 1813
1824 if (rb_per_cpu_empty(cpu_buffer)) 1814 if (rb_per_cpu_empty(cpu_buffer))
1825 return NULL; 1815 return NULL;
@@ -1856,6 +1846,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1856} 1846}
1857 1847
1858/** 1848/**
1849 * ring_buffer_peek - peek at the next event to be read
1850 * @buffer: The ring buffer to read
1851 * @cpu: The cpu to peak at
1852 * @ts: The timestamp counter of this event.
1853 *
1854 * This will return the event that will be read next, but does
1855 * not consume the data.
1856 */
1857struct ring_buffer_event *
1858ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1859{
1860 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1861 struct ring_buffer_event *event;
1862 unsigned long flags;
1863
1864 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1865 event = rb_buffer_peek(buffer, cpu, ts);
1866 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1867
1868 return event;
1869}
1870
1871/**
1872 * ring_buffer_iter_peek - peek at the next event to be read
1873 * @iter: The ring buffer iterator
1874 * @ts: The timestamp counter of this event.
1875 *
1876 * This will return the event that will be read next, but does
1877 * not increment the iterator.
1878 */
1879struct ring_buffer_event *
1880ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1881{
1882 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1883 struct ring_buffer_event *event;
1884 unsigned long flags;
1885
1886 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1887 event = rb_iter_peek(iter, ts);
1888 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1889
1890 return event;
1891}
1892
1893/**
1859 * ring_buffer_consume - return an event and consume it 1894 * ring_buffer_consume - return an event and consume it
1860 * @buffer: The ring buffer to get the next event from 1895 * @buffer: The ring buffer to get the next event from
1861 * 1896 *
@@ -1866,19 +1901,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1866struct ring_buffer_event * 1901struct ring_buffer_event *
1867ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1902ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1868{ 1903{
1869 struct ring_buffer_per_cpu *cpu_buffer; 1904 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1870 struct ring_buffer_event *event; 1905 struct ring_buffer_event *event;
1906 unsigned long flags;
1871 1907
1872 if (!cpu_isset(cpu, buffer->cpumask)) 1908 if (!cpu_isset(cpu, buffer->cpumask))
1873 return NULL; 1909 return NULL;
1874 1910
1875 event = ring_buffer_peek(buffer, cpu, ts); 1911 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1912
1913 event = rb_buffer_peek(buffer, cpu, ts);
1876 if (!event) 1914 if (!event)
1877 return NULL; 1915 goto out;
1878 1916
1879 cpu_buffer = buffer->buffers[cpu];
1880 rb_advance_reader(cpu_buffer); 1917 rb_advance_reader(cpu_buffer);
1881 1918
1919 out:
1920 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1921
1882 return event; 1922 return event;
1883} 1923}
1884 1924
@@ -1915,9 +1955,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1915 atomic_inc(&cpu_buffer->record_disabled); 1955 atomic_inc(&cpu_buffer->record_disabled);
1916 synchronize_sched(); 1956 synchronize_sched();
1917 1957
1918 spin_lock_irqsave(&cpu_buffer->lock, flags); 1958 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1959 __raw_spin_lock(&cpu_buffer->lock);
1919 ring_buffer_iter_reset(iter); 1960 ring_buffer_iter_reset(iter);
1920 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1961 __raw_spin_unlock(&cpu_buffer->lock);
1962 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1921 1963
1922 return iter; 1964 return iter;
1923} 1965}
@@ -1949,12 +1991,17 @@ struct ring_buffer_event *
1949ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 1991ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1950{ 1992{
1951 struct ring_buffer_event *event; 1993 struct ring_buffer_event *event;
1994 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1995 unsigned long flags;
1952 1996
1953 event = ring_buffer_iter_peek(iter, ts); 1997 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1998 event = rb_iter_peek(iter, ts);
1954 if (!event) 1999 if (!event)
1955 return NULL; 2000 goto out;
1956 2001
1957 rb_advance_iter(iter); 2002 rb_advance_iter(iter);
2003 out:
2004 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1958 2005
1959 return event; 2006 return event;
1960} 2007}
@@ -2003,11 +2050,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2003 if (!cpu_isset(cpu, buffer->cpumask)) 2050 if (!cpu_isset(cpu, buffer->cpumask))
2004 return; 2051 return;
2005 2052
2006 spin_lock_irqsave(&cpu_buffer->lock, flags); 2053 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2054
2055 __raw_spin_lock(&cpu_buffer->lock);
2007 2056
2008 rb_reset_cpu(cpu_buffer); 2057 rb_reset_cpu(cpu_buffer);
2009 2058
2010 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2059 __raw_spin_unlock(&cpu_buffer->lock);
2060
2061 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2011} 2062}
2012 2063
2013/** 2064/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 697eda36b86a..216bbe7547a4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,6 +43,15 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
45 45
46
47/*
48 * Kill all tracing for good (never come back).
49 * It is initialized to 1 but will turn to zero if the initialization
50 * of the tracer is successful. But that is the only place that sets
51 * this back to zero.
52 */
53int tracing_disabled = 1;
54
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 55static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 56
48static inline void ftrace_disable_cpu(void) 57static inline void ftrace_disable_cpu(void)
@@ -62,7 +71,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 71#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 72 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 73
65static int tracing_disabled = 1; 74/*
75 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
76 *
77 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
78 * is set, then ftrace_dump is called. This will output the contents
79 * of the ftrace buffers to the console. This is very useful for
80 * capturing traces that lead to crashes and outputing it to a
81 * serial console.
82 *
83 * It is default off, but you can enable it with either specifying
84 * "ftrace_dump_on_oops" in the kernel command line, or setting
85 * /proc/sys/kernel/ftrace_dump_on_oops to true.
86 */
87int ftrace_dump_on_oops;
88
89static int tracing_set_tracer(char *buf);
90
91static int __init set_ftrace(char *str)
92{
93 tracing_set_tracer(str);
94 return 1;
95}
96__setup("ftrace", set_ftrace);
97
98static int __init set_ftrace_dump_on_oops(char *str)
99{
100 ftrace_dump_on_oops = 1;
101 return 1;
102}
103__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 104
67long 105long
68ns2usecs(cycle_t nsec) 106ns2usecs(cycle_t nsec)
@@ -112,6 +150,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 150/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 151static int tracer_enabled = 1;
114 152
153/**
154 * tracing_is_enabled - return tracer_enabled status
155 *
156 * This function is used by other tracers to know the status
157 * of the tracer_enabled flag. Tracers may use this function
158 * to know if it should enable their features when starting
159 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
160 */
161int tracing_is_enabled(void)
162{
163 return tracer_enabled;
164}
165
115/* function tracing enabled */ 166/* function tracing enabled */
116int ftrace_function_enabled; 167int ftrace_function_enabled;
117 168
@@ -154,7 +205,7 @@ static DEFINE_MUTEX(trace_types_lock);
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 205static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 206
156/* trace_flags holds iter_ctrl options */ 207/* trace_flags holds iter_ctrl options */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 208unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK;
158 209
159/** 210/**
160 * trace_wake_up - wake up tasks waiting for trace input 211 * trace_wake_up - wake up tasks waiting for trace input
@@ -213,6 +264,7 @@ static const char *trace_options[] = {
213 "stacktrace", 264 "stacktrace",
214 "sched-tree", 265 "sched-tree",
215 "ftrace_printk", 266 "ftrace_printk",
267 "ftrace_preempt",
216 NULL 268 NULL
217}; 269};
218 270
@@ -485,7 +537,6 @@ int register_tracer(struct tracer *type)
485 if (type->selftest) { 537 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 538 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 539 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 540 int i;
490 /* 541 /*
491 * Run a selftest on this tracer. 542 * Run a selftest on this tracer.
@@ -498,13 +549,11 @@ int register_tracer(struct tracer *type)
498 tracing_reset(tr, i); 549 tracing_reset(tr, i);
499 } 550 }
500 current_trace = type; 551 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 552 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 553 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 554 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 555 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 556 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 557 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 558 printk(KERN_CONT "FAILED!\n");
510 goto out; 559 goto out;
@@ -581,6 +630,76 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 630 cmdline_idx = 0;
582} 631}
583 632
633static int trace_stop_count;
634static DEFINE_SPINLOCK(tracing_start_lock);
635
636/**
637 * tracing_start - quick start of the tracer
638 *
639 * If tracing is enabled but was stopped by tracing_stop,
640 * this will start the tracer back up.
641 */
642void tracing_start(void)
643{
644 struct ring_buffer *buffer;
645 unsigned long flags;
646
647 if (tracing_disabled)
648 return;
649
650 spin_lock_irqsave(&tracing_start_lock, flags);
651 if (--trace_stop_count)
652 goto out;
653
654 if (trace_stop_count < 0) {
655 /* Someone screwed up their debugging */
656 WARN_ON_ONCE(1);
657 trace_stop_count = 0;
658 goto out;
659 }
660
661
662 buffer = global_trace.buffer;
663 if (buffer)
664 ring_buffer_record_enable(buffer);
665
666 buffer = max_tr.buffer;
667 if (buffer)
668 ring_buffer_record_enable(buffer);
669
670 ftrace_start();
671 out:
672 spin_unlock_irqrestore(&tracing_start_lock, flags);
673}
674
675/**
676 * tracing_stop - quick stop of the tracer
677 *
678 * Light weight way to stop tracing. Use in conjunction with
679 * tracing_start.
680 */
681void tracing_stop(void)
682{
683 struct ring_buffer *buffer;
684 unsigned long flags;
685
686 ftrace_stop();
687 spin_lock_irqsave(&tracing_start_lock, flags);
688 if (trace_stop_count++)
689 goto out;
690
691 buffer = global_trace.buffer;
692 if (buffer)
693 ring_buffer_record_disable(buffer);
694
695 buffer = max_tr.buffer;
696 if (buffer)
697 ring_buffer_record_disable(buffer);
698
699 out:
700 spin_unlock_irqrestore(&tracing_start_lock, flags);
701}
702
584void trace_stop_cmdline_recording(void); 703void trace_stop_cmdline_recording(void);
585 704
586static void trace_save_cmdline(struct task_struct *tsk) 705static void trace_save_cmdline(struct task_struct *tsk)
@@ -841,26 +960,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 960{
842 struct trace_array *tr = &global_trace; 961 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 962 struct trace_array_cpu *data;
963 unsigned long flags;
844 int cpu; 964 int cpu;
845 int pc; 965 int pc;
846 966
847 if (tracing_disabled || !tr->ctrl) 967 if (tracing_disabled)
848 return; 968 return;
849 969
850 pc = preempt_count(); 970 pc = preempt_count();
851 preempt_disable_notrace(); 971 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 972 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 973 data = tr->data[cpu];
854 974
855 if (likely(!atomic_read(&data->disabled))) 975 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 976 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 977
858 preempt_enable_notrace(); 978 atomic_dec(&data->disabled);
979 local_irq_restore(flags);
859} 980}
860 981
861#ifdef CONFIG_FUNCTION_TRACER 982#ifdef CONFIG_FUNCTION_TRACER
862static void 983static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 984function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 985{
865 struct trace_array *tr = &global_trace; 986 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 987 struct trace_array_cpu *data;
@@ -873,8 +994,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 994 return;
874 995
875 pc = preempt_count(); 996 pc = preempt_count();
876 resched = need_resched(); 997 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 998 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 999 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1000 data = tr->data[cpu];
@@ -884,10 +1004,38 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1004 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1005
886 atomic_dec(&data->disabled); 1006 atomic_dec(&data->disabled);
887 if (resched) 1007 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace(); 1008}
889 else 1009
890 preempt_enable_notrace(); 1010static void
1011function_trace_call(unsigned long ip, unsigned long parent_ip)
1012{
1013 struct trace_array *tr = &global_trace;
1014 struct trace_array_cpu *data;
1015 unsigned long flags;
1016 long disabled;
1017 int cpu;
1018 int pc;
1019
1020 if (unlikely(!ftrace_function_enabled))
1021 return;
1022
1023 /*
1024 * Need to use raw, since this must be called before the
1025 * recursive protection is performed.
1026 */
1027 raw_local_irq_save(flags);
1028 cpu = raw_smp_processor_id();
1029 data = tr->data[cpu];
1030 disabled = atomic_inc_return(&data->disabled);
1031
1032 if (likely(disabled == 1)) {
1033 pc = preempt_count();
1034 trace_function(tr, data, ip, parent_ip, flags, pc);
1035 }
1036
1037 atomic_dec(&data->disabled);
1038 raw_local_irq_restore(flags);
891} 1039}
892 1040
893static struct ftrace_ops trace_ops __read_mostly = 1041static struct ftrace_ops trace_ops __read_mostly =
@@ -898,9 +1046,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1046void tracing_start_function_trace(void)
899{ 1047{
900 ftrace_function_enabled = 0; 1048 ftrace_function_enabled = 0;
1049
1050 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1051 trace_ops.func = function_trace_call_preempt_only;
1052 else
1053 trace_ops.func = function_trace_call;
1054
901 register_ftrace_function(&trace_ops); 1055 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1056 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1057}
905 1058
906void tracing_stop_function_trace(void) 1059void tracing_stop_function_trace(void)
@@ -1047,10 +1200,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1200
1048 atomic_inc(&trace_record_cmdline_disabled); 1201 atomic_inc(&trace_record_cmdline_disabled);
1049 1202
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1203 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1204 iter->ent = NULL;
1056 iter->cpu = 0; 1205 iter->cpu = 0;
@@ -1077,14 +1226,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1226
1078static void s_stop(struct seq_file *m, void *p) 1227static void s_stop(struct seq_file *m, void *p)
1079{ 1228{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1229 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1230 mutex_unlock(&trace_types_lock);
1089} 1231}
1090 1232
@@ -1338,6 +1480,17 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1338 trace_seq_putc(s, '\n'); 1480 trace_seq_putc(s, '\n');
1339} 1481}
1340 1482
1483static void test_cpu_buff_start(struct trace_iterator *iter)
1484{
1485 struct trace_seq *s = &iter->seq;
1486
1487 if (cpu_isset(iter->cpu, iter->started))
1488 return;
1489
1490 cpu_set(iter->cpu, iter->started);
1491 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1492}
1493
1341static enum print_line_t 1494static enum print_line_t
1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1495print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1343{ 1496{
@@ -1357,6 +1510,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1357 if (entry->type == TRACE_CONT) 1510 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED; 1511 return TRACE_TYPE_HANDLED;
1359 1512
1513 test_cpu_buff_start(iter);
1514
1360 next_entry = find_next_entry(iter, NULL, &next_ts); 1515 next_entry = find_next_entry(iter, NULL, &next_ts);
1361 if (!next_entry) 1516 if (!next_entry)
1362 next_ts = iter->ts; 1517 next_ts = iter->ts;
@@ -1472,6 +1627,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1472 if (entry->type == TRACE_CONT) 1627 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED; 1628 return TRACE_TYPE_HANDLED;
1474 1629
1630 test_cpu_buff_start(iter);
1631
1475 comm = trace_find_cmdline(iter->ent->pid); 1632 comm = trace_find_cmdline(iter->ent->pid);
1476 1633
1477 t = ns2usecs(iter->ts); 1634 t = ns2usecs(iter->ts);
@@ -1917,10 +2074,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1917 m->private = iter; 2074 m->private = iter;
1918 2075
1919 /* stop the trace while dumping */ 2076 /* stop the trace while dumping */
1920 if (iter->tr->ctrl) { 2077 tracing_stop();
1921 tracer_enabled = 0;
1922 ftrace_function_enabled = 0;
1923 }
1924 2078
1925 if (iter->trace && iter->trace->open) 2079 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter); 2080 iter->trace->open(iter);
@@ -1965,14 +2119,7 @@ int tracing_release(struct inode *inode, struct file *file)
1965 iter->trace->close(iter); 2119 iter->trace->close(iter);
1966 2120
1967 /* reenable tracing if it was previously enabled */ 2121 /* reenable tracing if it was previously enabled */
1968 if (iter->tr->ctrl) { 2122 tracing_start();
1969 tracer_enabled = 1;
1970 /*
1971 * It is safe to enable function tracing even if it
1972 * isn't used
1973 */
1974 ftrace_function_enabled = 1;
1975 }
1976 mutex_unlock(&trace_types_lock); 2123 mutex_unlock(&trace_types_lock);
1977 2124
1978 seq_release(inode, file); 2125 seq_release(inode, file);
@@ -2310,11 +2457,10 @@ static ssize_t
2310tracing_ctrl_read(struct file *filp, char __user *ubuf, 2457tracing_ctrl_read(struct file *filp, char __user *ubuf,
2311 size_t cnt, loff_t *ppos) 2458 size_t cnt, loff_t *ppos)
2312{ 2459{
2313 struct trace_array *tr = filp->private_data;
2314 char buf[64]; 2460 char buf[64];
2315 int r; 2461 int r;
2316 2462
2317 r = sprintf(buf, "%ld\n", tr->ctrl); 2463 r = sprintf(buf, "%u\n", tracer_enabled);
2318 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2464 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2319} 2465}
2320 2466
@@ -2342,16 +2488,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2342 val = !!val; 2488 val = !!val;
2343 2489
2344 mutex_lock(&trace_types_lock); 2490 mutex_lock(&trace_types_lock);
2345 if (tr->ctrl ^ val) { 2491 if (tracer_enabled ^ val) {
2346 if (val) 2492 if (val) {
2347 tracer_enabled = 1; 2493 tracer_enabled = 1;
2348 else 2494 if (current_trace->start)
2495 current_trace->start(tr);
2496 tracing_start();
2497 } else {
2349 tracer_enabled = 0; 2498 tracer_enabled = 0;
2350 2499 tracing_stop();
2351 tr->ctrl = val; 2500 if (current_trace->stop)
2352 2501 current_trace->stop(tr);
2353 if (current_trace && current_trace->ctrl_update) 2502 }
2354 current_trace->ctrl_update(tr);
2355 } 2503 }
2356 mutex_unlock(&trace_types_lock); 2504 mutex_unlock(&trace_types_lock);
2357 2505
@@ -2377,29 +2525,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2377 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2525 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2378} 2526}
2379 2527
2380static ssize_t 2528static int tracing_set_tracer(char *buf)
2381tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2382 size_t cnt, loff_t *ppos)
2383{ 2529{
2384 struct trace_array *tr = &global_trace; 2530 struct trace_array *tr = &global_trace;
2385 struct tracer *t; 2531 struct tracer *t;
2386 char buf[max_tracer_type_len+1]; 2532 int ret = 0;
2387 int i;
2388 size_t ret;
2389
2390 ret = cnt;
2391
2392 if (cnt > max_tracer_type_len)
2393 cnt = max_tracer_type_len;
2394
2395 if (copy_from_user(&buf, ubuf, cnt))
2396 return -EFAULT;
2397
2398 buf[cnt] = 0;
2399
2400 /* strip ending whitespace. */
2401 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2402 buf[i] = 0;
2403 2533
2404 mutex_lock(&trace_types_lock); 2534 mutex_lock(&trace_types_lock);
2405 for (t = trace_types; t; t = t->next) { 2535 for (t = trace_types; t; t = t->next) {
@@ -2423,6 +2553,33 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2423 out: 2553 out:
2424 mutex_unlock(&trace_types_lock); 2554 mutex_unlock(&trace_types_lock);
2425 2555
2556 return ret;
2557}
2558
2559static ssize_t
2560tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2561 size_t cnt, loff_t *ppos)
2562{
2563 char buf[max_tracer_type_len+1];
2564 int i;
2565 size_t ret;
2566
2567 if (cnt > max_tracer_type_len)
2568 cnt = max_tracer_type_len;
2569
2570 if (copy_from_user(&buf, ubuf, cnt))
2571 return -EFAULT;
2572
2573 buf[cnt] = 0;
2574
2575 /* strip ending whitespace. */
2576 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2577 buf[i] = 0;
2578
2579 ret = tracing_set_tracer(buf);
2580 if (!ret)
2581 ret = cnt;
2582
2426 if (ret > 0) 2583 if (ret > 0)
2427 filp->f_pos += ret; 2584 filp->f_pos += ret;
2428 2585
@@ -2491,6 +2648,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2491 return -ENOMEM; 2648 return -ENOMEM;
2492 2649
2493 mutex_lock(&trace_types_lock); 2650 mutex_lock(&trace_types_lock);
2651
2652 /* trace pipe does not show start of buffer */
2653 cpus_setall(iter->started);
2654
2494 iter->tr = &global_trace; 2655 iter->tr = &global_trace;
2495 iter->trace = current_trace; 2656 iter->trace = current_trace;
2496 filp->private_data = iter; 2657 filp->private_data = iter;
@@ -2677,7 +2838,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2677 unsigned long val; 2838 unsigned long val;
2678 char buf[64]; 2839 char buf[64];
2679 int ret, cpu; 2840 int ret, cpu;
2680 struct trace_array *tr = filp->private_data;
2681 2841
2682 if (cnt >= sizeof(buf)) 2842 if (cnt >= sizeof(buf))
2683 return -EINVAL; 2843 return -EINVAL;
@@ -2697,12 +2857,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2697 2857
2698 mutex_lock(&trace_types_lock); 2858 mutex_lock(&trace_types_lock);
2699 2859
2700 if (tr->ctrl) { 2860 tracing_stop();
2701 cnt = -EBUSY;
2702 pr_info("ftrace: please disable tracing"
2703 " before modifying buffer size\n");
2704 goto out;
2705 }
2706 2861
2707 /* disable all cpu buffers */ 2862 /* disable all cpu buffers */
2708 for_each_tracing_cpu(cpu) { 2863 for_each_tracing_cpu(cpu) {
@@ -2750,6 +2905,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2750 atomic_dec(&max_tr.data[cpu]->disabled); 2905 atomic_dec(&max_tr.data[cpu]->disabled);
2751 } 2906 }
2752 2907
2908 tracing_start();
2753 max_tr.entries = global_trace.entries; 2909 max_tr.entries = global_trace.entries;
2754 mutex_unlock(&trace_types_lock); 2910 mutex_unlock(&trace_types_lock);
2755 2911
@@ -2772,9 +2928,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2772{ 2928{
2773 char *buf; 2929 char *buf;
2774 char *end; 2930 char *end;
2775 struct trace_array *tr = &global_trace;
2776 2931
2777 if (!tr->ctrl || tracing_disabled) 2932 if (tracing_disabled)
2778 return -EINVAL; 2933 return -EINVAL;
2779 2934
2780 if (cnt > TRACE_BUF_SIZE) 2935 if (cnt > TRACE_BUF_SIZE)
@@ -2840,22 +2995,38 @@ static struct file_operations tracing_mark_fops = {
2840 2995
2841#ifdef CONFIG_DYNAMIC_FTRACE 2996#ifdef CONFIG_DYNAMIC_FTRACE
2842 2997
2998int __weak ftrace_arch_read_dyn_info(char *buf, int size)
2999{
3000 return 0;
3001}
3002
2843static ssize_t 3003static ssize_t
2844tracing_read_long(struct file *filp, char __user *ubuf, 3004tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2845 size_t cnt, loff_t *ppos) 3005 size_t cnt, loff_t *ppos)
2846{ 3006{
3007 static char ftrace_dyn_info_buffer[1024];
3008 static DEFINE_MUTEX(dyn_info_mutex);
2847 unsigned long *p = filp->private_data; 3009 unsigned long *p = filp->private_data;
2848 char buf[64]; 3010 char *buf = ftrace_dyn_info_buffer;
3011 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2849 int r; 3012 int r;
2850 3013
2851 r = sprintf(buf, "%ld\n", *p); 3014 mutex_lock(&dyn_info_mutex);
3015 r = sprintf(buf, "%ld ", *p);
2852 3016
2853 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3017 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3018 buf[r++] = '\n';
3019
3020 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3021
3022 mutex_unlock(&dyn_info_mutex);
3023
3024 return r;
2854} 3025}
2855 3026
2856static struct file_operations tracing_read_long_fops = { 3027static struct file_operations tracing_dyn_info_fops = {
2857 .open = tracing_open_generic, 3028 .open = tracing_open_generic,
2858 .read = tracing_read_long, 3029 .read = tracing_read_dyn_info,
2859}; 3030};
2860#endif 3031#endif
2861 3032
@@ -2964,7 +3135,7 @@ static __init int tracer_init_debugfs(void)
2964#ifdef CONFIG_DYNAMIC_FTRACE 3135#ifdef CONFIG_DYNAMIC_FTRACE
2965 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3136 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2966 &ftrace_update_tot_cnt, 3137 &ftrace_update_tot_cnt,
2967 &tracing_read_long_fops); 3138 &tracing_dyn_info_fops);
2968 if (!entry) 3139 if (!entry)
2969 pr_warning("Could not create debugfs " 3140 pr_warning("Could not create debugfs "
2970 "'dyn_ftrace_total_info' entry\n"); 3141 "'dyn_ftrace_total_info' entry\n");
@@ -2987,7 +3158,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2987 unsigned long flags, irq_flags; 3158 unsigned long flags, irq_flags;
2988 int cpu, len = 0, size, pc; 3159 int cpu, len = 0, size, pc;
2989 3160
2990 if (!tr->ctrl || tracing_disabled) 3161 if (tracing_disabled)
2991 return 0; 3162 return 0;
2992 3163
2993 pc = preempt_count(); 3164 pc = preempt_count();
@@ -3045,7 +3216,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3045static int trace_panic_handler(struct notifier_block *this, 3216static int trace_panic_handler(struct notifier_block *this,
3046 unsigned long event, void *unused) 3217 unsigned long event, void *unused)
3047{ 3218{
3048 ftrace_dump(); 3219 if (ftrace_dump_on_oops)
3220 ftrace_dump();
3049 return NOTIFY_OK; 3221 return NOTIFY_OK;
3050} 3222}
3051 3223
@@ -3061,7 +3233,8 @@ static int trace_die_handler(struct notifier_block *self,
3061{ 3233{
3062 switch (val) { 3234 switch (val) {
3063 case DIE_OOPS: 3235 case DIE_OOPS:
3064 ftrace_dump(); 3236 if (ftrace_dump_on_oops)
3237 ftrace_dump();
3065 break; 3238 break;
3066 default: 3239 default:
3067 break; 3240 break;
@@ -3102,7 +3275,6 @@ trace_printk_seq(struct trace_seq *s)
3102 trace_seq_reset(s); 3275 trace_seq_reset(s);
3103} 3276}
3104 3277
3105
3106void ftrace_dump(void) 3278void ftrace_dump(void)
3107{ 3279{
3108 static DEFINE_SPINLOCK(ftrace_dump_lock); 3280 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3220,7 +3392,6 @@ __init static int tracer_alloc_buffers(void)
3220#endif 3392#endif
3221 3393
3222 /* All seems OK, enable tracing */ 3394 /* All seems OK, enable tracing */
3223 global_trace.ctrl = tracer_enabled;
3224 tracing_disabled = 0; 3395 tracing_disabled = 0;
3225 3396
3226 atomic_notifier_chain_register(&panic_notifier_list, 3397 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..978145088fb8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -172,7 +172,6 @@ struct trace_iterator;
172struct trace_array { 172struct trace_array {
173 struct ring_buffer *buffer; 173 struct ring_buffer *buffer;
174 unsigned long entries; 174 unsigned long entries;
175 long ctrl;
176 int cpu; 175 int cpu;
177 cycle_t time_start; 176 cycle_t time_start;
178 struct task_struct *waiter; 177 struct task_struct *waiter;
@@ -236,15 +235,14 @@ struct tracer {
236 const char *name; 235 const char *name;
237 void (*init)(struct trace_array *tr); 236 void (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 237 void (*reset)(struct trace_array *tr);
238 void (*start)(struct trace_array *tr);
239 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 240 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 241 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 242 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 243 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 244 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 245 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 246#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 247 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 248 struct trace_array *tr);
@@ -279,8 +277,11 @@ struct trace_iterator {
279 unsigned long iter_flags; 277 unsigned long iter_flags;
280 loff_t pos; 278 loff_t pos;
281 long idx; 279 long idx;
280
281 cpumask_t started;
282}; 282};
283 283
284int tracing_is_enabled(void);
284void trace_wake_up(void); 285void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 286void tracing_reset(struct trace_array *tr, int cpu);
286int tracing_open_generic(struct inode *inode, struct file *filp); 287int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -323,6 +324,9 @@ void trace_function(struct trace_array *tr,
323 324
324void tracing_start_cmdline_record(void); 325void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 326void tracing_stop_cmdline_record(void);
327void tracing_sched_switch_assign_trace(struct trace_array *tr);
328void tracing_stop_sched_switch_record(void);
329void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 330int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 331void unregister_tracer(struct tracer *type);
328 332
@@ -415,8 +419,57 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 419 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 420 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 421 TRACE_ITER_PRINTK = 0x400,
422 TRACE_ITER_PREEMPTONLY = 0x800,
418}; 423};
419 424
420extern struct tracer nop_trace; 425extern struct tracer nop_trace;
421 426
427/**
428 * ftrace_preempt_disable - disable preemption scheduler safe
429 *
430 * When tracing can happen inside the scheduler, there exists
431 * cases that the tracing might happen before the need_resched
432 * flag is checked. If this happens and the tracer calls
433 * preempt_enable (after a disable), a schedule might take place
434 * causing an infinite recursion.
435 *
436 * To prevent this, we read the need_recshed flag before
437 * disabling preemption. When we want to enable preemption we
438 * check the flag, if it is set, then we call preempt_enable_no_resched.
439 * Otherwise, we call preempt_enable.
440 *
441 * The rational for doing the above is that if need resched is set
442 * and we have yet to reschedule, we are either in an atomic location
443 * (where we do not need to check for scheduling) or we are inside
444 * the scheduler and do not want to resched.
445 */
446static inline int ftrace_preempt_disable(void)
447{
448 int resched;
449
450 resched = need_resched();
451 preempt_disable_notrace();
452
453 return resched;
454}
455
456/**
457 * ftrace_preempt_enable - enable preemption scheduler safe
458 * @resched: the return value from ftrace_preempt_disable
459 *
460 * This is a scheduler safe way to enable preemption and not miss
461 * any preemption checks. The disabled saved the state of preemption.
462 * If resched is set, then we were either inside an atomic or
463 * are inside the scheduler (we would have already scheduled
464 * otherwise). In this case, we do not want to call normal
465 * preempt_enable, but preempt_enable_no_resched instead.
466 */
467static inline void ftrace_preempt_enable(int resched)
468{
469 if (resched)
470 preempt_enable_no_resched_notrace();
471 else
472 preempt_enable_notrace();
473}
474
422#endif /* _LINUX_KERNEL_TRACE_H */ 475#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..8f71915e8bb4 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,42 +13,49 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static void reset_boot_trace(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr;
39 43
40 trace_boot_enabled = 0; 44 tr->time_start = ftrace_now(tr->cpu);
41 45
42 for_each_cpu_mask(cpu, cpu_possible_map) 46 for_each_online_cpu(cpu)
43 tracing_reset(tr, cpu); 47 tracing_reset(tr, cpu);
44} 48}
45 49
46static void boot_trace_ctrl_update(struct trace_array *tr) 50static void boot_trace_init(struct trace_array *tr)
47{ 51{
48 if (tr->ctrl) 52 int cpu;
49 start_boot_trace(); 53 boot_trace = tr;
50 else 54
51 stop_boot_trace(); 55 for_each_cpu_mask(cpu, cpu_possible_map)
56 tracing_reset(tr, cpu);
57
58 tracing_sched_switch_assign_trace(tr);
52} 59}
53 60
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 61static enum print_line_t initcall_print_line(struct trace_iterator *iter)
@@ -87,7 +94,6 @@ struct tracer boot_tracer __read_mostly =
87 .name = "initcall", 94 .name = "initcall",
88 .init = boot_trace_init, 95 .init = boot_trace_init,
89 .reset = reset_boot_trace, 96 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 97 .print_line = initcall_print_line,
92}; 98};
93 99
@@ -99,7 +105,7 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
99 unsigned long irq_flags; 105 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 106 struct trace_array *tr = boot_trace;
101 107
102 if (!trace_boot_enabled) 108 if (!pre_initcalls_finished)
103 return; 109 return;
104 110
105 /* Get its name now since this function could 111 /* Get its name now since this function could
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..8693b7a0a5b2 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -44,22 +44,17 @@ static void stop_function_trace(struct trace_array *tr)
44 44
45static void function_trace_init(struct trace_array *tr) 45static void function_trace_init(struct trace_array *tr)
46{ 46{
47 if (tr->ctrl) 47 start_function_trace(tr);
48 start_function_trace(tr);
49} 48}
50 49
51static void function_trace_reset(struct trace_array *tr) 50static void function_trace_reset(struct trace_array *tr)
52{ 51{
53 if (tr->ctrl) 52 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 53}
56 54
57static void function_trace_ctrl_update(struct trace_array *tr) 55static void function_trace_start(struct trace_array *tr)
58{ 56{
59 if (tr->ctrl) 57 function_reset(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 58}
64 59
65static struct tracer function_trace __read_mostly = 60static struct tracer function_trace __read_mostly =
@@ -67,7 +62,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 62 .name = "function",
68 .init = function_trace_init, 63 .init = function_trace_init,
69 .reset = function_trace_reset, 64 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 65 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 66#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 67 .selftest = trace_selftest_startup_function,
73#endif 68#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..d919d4eaa7cc 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,36 +383,36 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
@@ -414,9 +427,10 @@ static struct tracer irqsoff_tracer __read_mostly =
414 .name = "irqsoff", 427 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 428 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 429 .reset = irqsoff_tracer_reset,
430 .start = irqsoff_tracer_start,
431 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 432 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 433 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 434 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 435#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 436 .selftest = trace_selftest_startup_irqsoff,
@@ -440,9 +454,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 454 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 455 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 456 .reset = irqsoff_tracer_reset,
457 .start = irqsoff_tracer_start,
458 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 459 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 460 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 461 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 462#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 463 .selftest = trace_selftest_startup_preemptoff,
@@ -468,9 +483,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 483 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 484 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 485 .reset = irqsoff_tracer_reset,
486 .start = irqsoff_tracer_start,
487 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 488 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 489 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 490 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 491#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 492 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f28484618ff0..51bcf370215e 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -34,30 +34,24 @@ static void mmio_trace_init(struct trace_array *tr)
34{ 34{
35 pr_debug("in %s\n", __func__); 35 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr; 36 mmio_trace_array = tr;
37 if (tr->ctrl) { 37
38 mmio_reset_data(tr); 38 mmio_reset_data(tr);
39 enable_mmiotrace(); 39 enable_mmiotrace();
40 }
41} 40}
42 41
43static void mmio_trace_reset(struct trace_array *tr) 42static void mmio_trace_reset(struct trace_array *tr)
44{ 43{
45 pr_debug("in %s\n", __func__); 44 pr_debug("in %s\n", __func__);
46 if (tr->ctrl) 45
47 disable_mmiotrace(); 46 disable_mmiotrace();
48 mmio_reset_data(tr); 47 mmio_reset_data(tr);
49 mmio_trace_array = NULL; 48 mmio_trace_array = NULL;
50} 49}
51 50
52static void mmio_trace_ctrl_update(struct trace_array *tr) 51static void mmio_trace_start(struct trace_array *tr)
53{ 52{
54 pr_debug("in %s\n", __func__); 53 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) { 54 mmio_reset_data(tr);
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61} 55}
62 56
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 57static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -298,10 +292,10 @@ static struct tracer mmio_tracer __read_mostly =
298 .name = "mmiotrace", 292 .name = "mmiotrace",
299 .init = mmio_trace_init, 293 .init = mmio_trace_init,
300 .reset = mmio_trace_reset, 294 .reset = mmio_trace_reset,
295 .start = mmio_trace_start,
301 .pipe_open = mmio_pipe_open, 296 .pipe_open = mmio_pipe_open,
302 .close = mmio_close, 297 .close = mmio_close,
303 .read = mmio_read, 298 .read = mmio_read,
304 .ctrl_update = mmio_trace_ctrl_update,
305 .print_line = mmio_print_line, 299 .print_line = mmio_print_line,
306}; 300};
307 301
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..2ef1d227e7d8 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -32,23 +32,12 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 32 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 33 tracing_reset(tr, cpu);
34 34
35 if (tr->ctrl) 35 start_nop_trace(tr);
36 start_nop_trace(tr);
37} 36}
38 37
39static void nop_trace_reset(struct trace_array *tr) 38static void nop_trace_reset(struct trace_array *tr)
40{ 39{
41 if (tr->ctrl) 40 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43}
44
45static void nop_trace_ctrl_update(struct trace_array *tr)
46{
47 /* When starting a new trace, reset the buffers */
48 if (tr->ctrl)
49 start_nop_trace(tr);
50 else
51 stop_nop_trace(tr);
52} 41}
53 42
54struct tracer nop_trace __read_mostly = 43struct tracer nop_trace __read_mostly =
@@ -56,7 +45,6 @@ struct tracer nop_trace __read_mostly =
56 .name = "nop", 45 .name = "nop",
57 .init = nop_trace_init, 46 .init = nop_trace_init,
58 .reset = nop_trace_reset, 47 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 48#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 49 .selftest = trace_selftest_startup_nop,
62#endif 50#endif
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..be35bdfe2e38 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
123 124
124static void tracing_start_sched_switch(void) 125static void tracing_start_sched_switch(void)
125{ 126{
126 long ref; 127 mutex_lock(&sched_register_mutex);
127 128 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 129 tracing_sched_register();
130 mutex_unlock(&sched_register_mutex);
131} 131}
132 132
133static void tracing_stop_sched_switch(void) 133static void tracing_stop_sched_switch(void)
134{ 134{
135 long ref; 135 mutex_lock(&sched_register_mutex);
136 136 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 137 tracing_sched_unregister();
138 mutex_unlock(&sched_register_mutex);
140} 139}
141 140
142void tracing_start_cmdline_record(void) 141void tracing_start_cmdline_record(void)
@@ -149,40 +148,85 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 148 tracing_stop_sched_switch();
150} 149}
151 150
151/**
152 * tracing_start_sched_switch_record - start tracing context switches
153 *
154 * Turns on context switch tracing for a tracer.
155 */
156void tracing_start_sched_switch_record(void)
157{
158 if (unlikely(!ctx_trace)) {
159 WARN_ON(1);
160 return;
161 }
162
163 tracing_start_sched_switch();
164
165 mutex_lock(&sched_register_mutex);
166 tracer_enabled++;
167 mutex_unlock(&sched_register_mutex);
168}
169
170/**
171 * tracing_stop_sched_switch_record - start tracing context switches
172 *
173 * Turns off context switch tracing for a tracer.
174 */
175void tracing_stop_sched_switch_record(void)
176{
177 mutex_lock(&sched_register_mutex);
178 tracer_enabled--;
179 WARN_ON(tracer_enabled < 0);
180 mutex_unlock(&sched_register_mutex);
181
182 tracing_stop_sched_switch();
183}
184
185/**
186 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
187 * @tr: trace array pointer to assign
188 *
189 * Some tracers might want to record the context switches in their
190 * trace. This function lets those tracers assign the trace array
191 * to use.
192 */
193void tracing_sched_switch_assign_trace(struct trace_array *tr)
194{
195 ctx_trace = tr;
196}
197
152static void start_sched_trace(struct trace_array *tr) 198static void start_sched_trace(struct trace_array *tr)
153{ 199{
154 sched_switch_reset(tr); 200 sched_switch_reset(tr);
155 tracing_start_cmdline_record(); 201 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 202}
158 203
159static void stop_sched_trace(struct trace_array *tr) 204static void stop_sched_trace(struct trace_array *tr)
160{ 205{
161 tracer_enabled = 0; 206 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 207}
164 208
165static void sched_switch_trace_init(struct trace_array *tr) 209static void sched_switch_trace_init(struct trace_array *tr)
166{ 210{
167 ctx_trace = tr; 211 ctx_trace = tr;
168 212 start_sched_trace(tr);
169 if (tr->ctrl)
170 start_sched_trace(tr);
171} 213}
172 214
173static void sched_switch_trace_reset(struct trace_array *tr) 215static void sched_switch_trace_reset(struct trace_array *tr)
174{ 216{
175 if (tr->ctrl) 217 if (sched_ref)
176 stop_sched_trace(tr); 218 stop_sched_trace(tr);
177} 219}
178 220
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 221static void sched_switch_trace_start(struct trace_array *tr)
180{ 222{
181 /* When starting a new trace, reset the buffers */ 223 sched_switch_reset(tr);
182 if (tr->ctrl) 224 tracing_start_sched_switch();
183 start_sched_trace(tr); 225}
184 else 226
185 stop_sched_trace(tr); 227static void sched_switch_trace_stop(struct trace_array *tr)
228{
229 tracing_stop_sched_switch();
186} 230}
187 231
188static struct tracer sched_switch_trace __read_mostly = 232static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +234,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 234 .name = "sched_switch",
191 .init = sched_switch_trace_init, 235 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 236 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 237 .start = sched_switch_trace_start,
238 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 239#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 240 .selftest = trace_selftest_startup_sched_switch,
196#endif 241#endif
@@ -198,14 +243,6 @@ static struct tracer sched_switch_trace __read_mostly =
198 243
199__init static int init_sched_switch_trace(void) 244__init static int init_sched_switch_trace(void)
200{ 245{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 246 return register_tracer(&sched_switch_trace);
210} 247}
211device_initcall(init_sched_switch_trace); 248device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..983f2b1478c9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,6 +324,7 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
@@ -330,40 +334,42 @@ static void stop_wakeup_tracer(struct trace_array *tr)
330static void wakeup_tracer_init(struct trace_array *tr) 334static void wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl)
335 start_wakeup_tracer(tr);
336} 338}
337 339
338static void wakeup_tracer_reset(struct trace_array *tr) 340static void wakeup_tracer_reset(struct trace_array *tr)
339{ 341{
340 if (tr->ctrl) { 342 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 343 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 344 wakeup_reset(tr);
343 wakeup_reset(tr);
344 }
345} 345}
346 346
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 347static void wakeup_tracer_start(struct trace_array *tr)
348{ 348{
349 if (tr->ctrl) 349 wakeup_reset(tr);
350 start_wakeup_tracer(tr); 350 tracer_enabled = 1;
351 else 351 save_tracer_enabled = 1;
352 stop_wakeup_tracer(tr); 352}
353
354static void wakeup_tracer_stop(struct trace_array *tr)
355{
356 tracer_enabled = 0;
357 save_tracer_enabled = 0;
353} 358}
354 359
355static void wakeup_tracer_open(struct trace_iterator *iter) 360static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 361{
357 /* stop the trace while dumping */ 362 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 363 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 364}
361 365
362static void wakeup_tracer_close(struct trace_iterator *iter) 366static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 367{
364 /* forget about any processes we were recording */ 368 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 369 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 370 wakeup_reset(iter->tr);
371 tracer_enabled = 1;
372 }
367} 373}
368 374
369static struct tracer wakeup_tracer __read_mostly = 375static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +377,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 377 .name = "wakeup",
372 .init = wakeup_tracer_init, 378 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 379 .reset = wakeup_tracer_reset,
380 .start = wakeup_tracer_start,
381 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 382 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 383 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 384 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 385#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 386 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..0728a105dcc1 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -110,7 +110,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 110 ftrace_set_filter(func_name, strlen(func_name), 1);
111 111
112 /* enable tracing */ 112 /* enable tracing */
113 tr->ctrl = 1;
114 trace->init(tr); 113 trace->init(tr);
115 114
116 /* Sleep for a 1/10 of a second */ 115 /* Sleep for a 1/10 of a second */
@@ -134,13 +133,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 133 msleep(100);
135 134
136 /* stop the tracing. */ 135 /* stop the tracing. */
137 tr->ctrl = 0; 136 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 137 ftrace_enabled = 0;
140 138
141 /* check the trace buffer */ 139 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 140 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 141 trace->reset(tr);
142 tracing_start();
144 143
145 /* we should only have one item */ 144 /* we should only have one item */
146 if (!ret && count != 1) { 145 if (!ret && count != 1) {
@@ -148,6 +147,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 147 ret = -1;
149 goto out; 148 goto out;
150 } 149 }
150
151 out: 151 out:
152 ftrace_enabled = save_ftrace_enabled; 152 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 153 tracer_enabled = save_tracer_enabled;
@@ -180,18 +180,17 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 180 ftrace_enabled = 1;
181 tracer_enabled = 1; 181 tracer_enabled = 1;
182 182
183 tr->ctrl = 1;
184 trace->init(tr); 183 trace->init(tr);
185 /* Sleep for a 1/10 of a second */ 184 /* Sleep for a 1/10 of a second */
186 msleep(100); 185 msleep(100);
187 /* stop the tracing. */ 186 /* stop the tracing. */
188 tr->ctrl = 0; 187 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 188 ftrace_enabled = 0;
191 189
192 /* check the trace buffer */ 190 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 191 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 192 trace->reset(tr);
193 tracing_start();
195 194
196 if (!ret && !count) { 195 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 196 printk(KERN_CONT ".. no entries found ..");
@@ -223,7 +222,6 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 222 int ret;
224 223
225 /* start the tracing */ 224 /* start the tracing */
226 tr->ctrl = 1;
227 trace->init(tr); 225 trace->init(tr);
228 /* reset the max latency */ 226 /* reset the max latency */
229 tracing_max_latency = 0; 227 tracing_max_latency = 0;
@@ -232,13 +230,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 230 udelay(100);
233 local_irq_enable(); 231 local_irq_enable();
234 /* stop the tracing. */ 232 /* stop the tracing. */
235 tr->ctrl = 0; 233 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 234 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 235 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 236 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 237 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 238 trace->reset(tr);
239 tracing_start();
242 240
243 if (!ret && !count) { 241 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 242 printk(KERN_CONT ".. no entries found ..");
@@ -259,8 +257,20 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 257 unsigned long count;
260 int ret; 258 int ret;
261 259
260 /*
261 * Now that the big kernel lock is no longer preemptable,
262 * and this is called with the BKL held, it will always
263 * fail. If preemption is already disabled, simply
264 * pass the test. When the BKL is removed, or becomes
265 * preemptible again, we will once again test this,
266 * so keep it in.
267 */
268 if (preempt_count()) {
269 printk(KERN_CONT "can not test ... force ");
270 return 0;
271 }
272
262 /* start the tracing */ 273 /* start the tracing */
263 tr->ctrl = 1;
264 trace->init(tr); 274 trace->init(tr);
265 /* reset the max latency */ 275 /* reset the max latency */
266 tracing_max_latency = 0; 276 tracing_max_latency = 0;
@@ -269,13 +279,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 279 udelay(100);
270 preempt_enable(); 280 preempt_enable();
271 /* stop the tracing. */ 281 /* stop the tracing. */
272 tr->ctrl = 0; 282 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 283 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 284 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 285 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 286 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 287 trace->reset(tr);
288 tracing_start();
279 289
280 if (!ret && !count) { 290 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 291 printk(KERN_CONT ".. no entries found ..");
@@ -296,8 +306,20 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 306 unsigned long count;
297 int ret; 307 int ret;
298 308
309 /*
310 * Now that the big kernel lock is no longer preemptable,
311 * and this is called with the BKL held, it will always
312 * fail. If preemption is already disabled, simply
313 * pass the test. When the BKL is removed, or becomes
314 * preemptible again, we will once again test this,
315 * so keep it in.
316 */
317 if (preempt_count()) {
318 printk(KERN_CONT "can not test ... force ");
319 return 0;
320 }
321
299 /* start the tracing */ 322 /* start the tracing */
300 tr->ctrl = 1;
301 trace->init(tr); 323 trace->init(tr);
302 324
303 /* reset the max latency */ 325 /* reset the max latency */
@@ -312,27 +334,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 334 local_irq_enable();
313 335
314 /* stop the tracing. */ 336 /* stop the tracing. */
315 tr->ctrl = 0; 337 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 338 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 339 ret = trace_test_buffer(tr, NULL);
319 if (ret) 340 if (ret) {
341 tracing_start();
320 goto out; 342 goto out;
343 }
321 344
322 ret = trace_test_buffer(&max_tr, &count); 345 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 346 if (ret) {
347 tracing_start();
324 goto out; 348 goto out;
349 }
325 350
326 if (!ret && !count) { 351 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 352 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 353 ret = -1;
354 tracing_start();
329 goto out; 355 goto out;
330 } 356 }
331 357
332 /* do the test by disabling interrupts first this time */ 358 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 359 tracing_max_latency = 0;
334 tr->ctrl = 1; 360 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 361 preempt_disable();
337 local_irq_disable(); 362 local_irq_disable();
338 udelay(100); 363 udelay(100);
@@ -341,8 +366,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 366 local_irq_enable();
342 367
343 /* stop the tracing. */ 368 /* stop the tracing. */
344 tr->ctrl = 0; 369 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 370 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 371 ret = trace_test_buffer(tr, NULL);
348 if (ret) 372 if (ret)
@@ -358,6 +382,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 382
359 out: 383 out:
360 trace->reset(tr); 384 trace->reset(tr);
385 tracing_start();
361 tracing_max_latency = save_max; 386 tracing_max_latency = save_max;
362 387
363 return ret; 388 return ret;
@@ -423,7 +448,6 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 448 wait_for_completion(&isrt);
424 449
425 /* start the tracing */ 450 /* start the tracing */
426 tr->ctrl = 1;
427 trace->init(tr); 451 trace->init(tr);
428 /* reset the max latency */ 452 /* reset the max latency */
429 tracing_max_latency = 0; 453 tracing_max_latency = 0;
@@ -448,8 +472,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 472 msleep(100);
449 473
450 /* stop the tracing. */ 474 /* stop the tracing. */
451 tr->ctrl = 0; 475 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 476 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 477 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 478 if (!ret)
@@ -457,6 +480,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 480
458 481
459 trace->reset(tr); 482 trace->reset(tr);
483 tracing_start();
460 484
461 tracing_max_latency = save_max; 485 tracing_max_latency = save_max;
462 486
@@ -480,16 +504,15 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 504 int ret;
481 505
482 /* start the tracing */ 506 /* start the tracing */
483 tr->ctrl = 1;
484 trace->init(tr); 507 trace->init(tr);
485 /* Sleep for a 1/10 of a second */ 508 /* Sleep for a 1/10 of a second */
486 msleep(100); 509 msleep(100);
487 /* stop the tracing. */ 510 /* stop the tracing. */
488 tr->ctrl = 0; 511 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 512 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 513 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 514 trace->reset(tr);
515 tracing_start();
493 516
494 if (!ret && !count) { 517 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 518 printk(KERN_CONT ".. no entries found ..");
@@ -508,16 +531,15 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 531 int ret;
509 532
510 /* start the tracing */ 533 /* start the tracing */
511 tr->ctrl = 1;
512 trace->init(tr); 534 trace->init(tr);
513 /* Sleep for a 1/10 of a second */ 535 /* Sleep for a 1/10 of a second */
514 msleep(100); 536 msleep(100);
515 /* stop the tracing. */ 537 /* stop the tracing. */
516 tr->ctrl = 0; 538 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 539 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 540 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 541 trace->reset(tr);
542 tracing_start();
521 543
522 return ret; 544 return ret;
523} 545}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index be682b62fe58..d39e8b7de6a2 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 108 return;
109 109
110 resched = need_resched(); 110 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 111
113 cpu = raw_smp_processor_id(); 112 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 113 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 119 out:
121 per_cpu(trace_active, cpu)--; 120 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 121 /* prevent recursion in schedule */
123 if (resched) 122 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 123}
128 124
129static struct ftrace_ops trace_ops __read_mostly = 125static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..05f753422aea 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -265,23 +265,12 @@ static void stack_trace_init(struct trace_array *tr)
265{ 265{
266 sysprof_trace = tr; 266 sysprof_trace = tr;
267 267
268 if (tr->ctrl) 268 start_stack_trace(tr);
269 start_stack_trace(tr);
270} 269}
271 270
272static void stack_trace_reset(struct trace_array *tr) 271static void stack_trace_reset(struct trace_array *tr)
273{ 272{
274 if (tr->ctrl) 273 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 274}
286 275
287static struct tracer stack_trace __read_mostly = 276static struct tracer stack_trace __read_mostly =
@@ -289,7 +278,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 278 .name = "sysprof",
290 .init = stack_trace_init, 279 .init = stack_trace_init,
291 .reset = stack_trace_reset, 280 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 281#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 282 .selftest = trace_selftest_startup_sysprof,
295#endif 283#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..e96590f17de1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
43 */ 43 */
44#define TRACEPOINT_HASH_BITS 6 44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 void **funcs; 56 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */ 57 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0]; 58 char name[0];
61}; 59};
62 60
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 61struct tp_probes {
62 union {
63 struct rcu_head rcu;
64 struct list_head list;
65 } u;
66 void *probes[0];
67};
64 68
65static void free_old_closure(struct rcu_head *head) 69static inline void *allocate_probes(int count)
66{ 70{
67 struct tracepoint_entry *entry = container_of(head, 71 struct tp_probes *p = kmalloc(count * sizeof(void *)
68 struct tracepoint_entry, rcu); 72 + sizeof(struct tp_probes), GFP_KERNEL);
69 kfree(entry->oldptr); 73 return p == NULL ? NULL : p->probes;
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73} 74}
74 75
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) 76static void rcu_free_old_probes(struct rcu_head *head)
76{ 77{
77 if (!old) 78 kfree(container_of(head, struct tp_probes, u.rcu));
78 return; 79}
79 entry->oldptr = old; 80
80 entry->rcu_pending = 1; 81static inline void release_probes(void *old)
81 /* write rcu_pending before calling the RCU callback */ 82{
82 smp_wmb(); 83 if (old) {
83 call_rcu_sched(&entry->rcu, free_old_closure); 84 struct tp_probes *tp_probes = container_of(old,
85 struct tp_probes, probes[0]);
86 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
87 }
84} 88}
85 89
86static void debug_print_probes(struct tracepoint_entry *entry) 90static void debug_print_probes(struct tracepoint_entry *entry)
87{ 91{
88 int i; 92 int i;
89 93
90 if (!tracepoint_debug) 94 if (!tracepoint_debug || !entry->funcs)
91 return; 95 return;
92 96
93 for (i = 0; entry->funcs[i]; i++) 97 for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 return ERR_PTR(-EEXIST); 115 return ERR_PTR(-EEXIST);
112 } 116 }
113 /* + 2 : one for new probe, one for NULL func */ 117 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); 118 new = allocate_probes(nr_probes + 2);
115 if (new == NULL) 119 if (new == NULL)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
117 if (old) 121 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *)); 122 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe; 123 new[nr_probes] = probe;
124 new[nr_probes + 1] = NULL;
120 entry->refcount = nr_probes + 1; 125 entry->refcount = nr_probes + 1;
121 entry->funcs = new; 126 entry->funcs = new;
122 debug_print_probes(entry); 127 debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
132 old = entry->funcs; 137 old = entry->funcs;
133 138
134 if (!old) 139 if (!old)
135 return NULL; 140 return ERR_PTR(-ENOENT);
136 141
137 debug_print_probes(entry); 142 debug_print_probes(entry);
138 /* (N -> M), (N > 1, M >= 0) probes */ 143 /* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
151 int j = 0; 156 int j = 0;
152 /* N -> M, (N > 1, M > 0) */ 157 /* N -> M, (N > 1, M > 0) */
153 /* + 1 for NULL */ 158 /* + 1 for NULL */
154 new = kzalloc((nr_probes - nr_del + 1) 159 new = allocate_probes(nr_probes - nr_del + 1);
155 * sizeof(void *), GFP_KERNEL);
156 if (new == NULL) 160 if (new == NULL)
157 return ERR_PTR(-ENOMEM); 161 return ERR_PTR(-ENOMEM);
158 for (i = 0; old[i]; i++) 162 for (i = 0; old[i]; i++)
159 if ((probe && old[i] != probe)) 163 if ((probe && old[i] != probe))
160 new[j++] = old[i]; 164 new[j++] = old[i];
165 new[nr_probes - nr_del] = NULL;
161 entry->refcount = nr_probes - nr_del; 166 entry->refcount = nr_probes - nr_del;
162 entry->funcs = new; 167 entry->funcs = new;
163 } 168 }
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
215 memcpy(&e->name[0], name, name_len); 220 memcpy(&e->name[0], name, name_len);
216 e->funcs = NULL; 221 e->funcs = NULL;
217 e->refcount = 0; 222 e->refcount = 0;
218 e->rcu_pending = 0;
219 hlist_add_head(&e->hlist, head); 223 hlist_add_head(&e->hlist, head);
220 return e; 224 return e;
221} 225}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
224 * Remove the tracepoint from the tracepoint hash table. Must be called with 228 * Remove the tracepoint from the tracepoint hash table. Must be called with
225 * mutex_lock held. 229 * mutex_lock held.
226 */ 230 */
227static int remove_tracepoint(const char *name) 231static inline void remove_tracepoint(struct tracepoint_entry *e)
228{ 232{
229 struct hlist_head *head;
230 struct hlist_node *node;
231 struct tracepoint_entry *e;
232 int found = 0;
233 size_t len = strlen(name) + 1;
234 u32 hash = jhash(name, len-1, 0);
235
236 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
237 hlist_for_each_entry(e, node, head, hlist) {
238 if (!strcmp(name, e->name)) {
239 found = 1;
240 break;
241 }
242 }
243 if (!found)
244 return -ENOENT;
245 if (e->refcount)
246 return -EBUSY;
247 hlist_del(&e->hlist); 233 hlist_del(&e->hlist);
248 /* Make sure the call_rcu_sched has been executed */
249 if (e->rcu_pending)
250 rcu_barrier_sched();
251 kfree(e); 234 kfree(e);
252 return 0;
253} 235}
254 236
255/* 237/*
@@ -320,6 +302,23 @@ static void tracepoint_update_probes(void)
320 module_update_tracepoints(); 302 module_update_tracepoints();
321} 303}
322 304
305static void *tracepoint_add_probe(const char *name, void *probe)
306{
307 struct tracepoint_entry *entry;
308 void *old;
309
310 entry = get_tracepoint(name);
311 if (!entry) {
312 entry = add_tracepoint(name);
313 if (IS_ERR(entry))
314 return entry;
315 }
316 old = tracepoint_entry_add_probe(entry, probe);
317 if (IS_ERR(old) && !entry->refcount)
318 remove_tracepoint(entry);
319 return old;
320}
321
323/** 322/**
324 * tracepoint_probe_register - Connect a probe to a tracepoint 323 * tracepoint_probe_register - Connect a probe to a tracepoint
325 * @name: tracepoint name 324 * @name: tracepoint name
@@ -330,44 +329,36 @@ static void tracepoint_update_probes(void)
330 */ 329 */
331int tracepoint_probe_register(const char *name, void *probe) 330int tracepoint_probe_register(const char *name, void *probe)
332{ 331{
333 struct tracepoint_entry *entry;
334 int ret = 0;
335 void *old; 332 void *old;
336 333
337 mutex_lock(&tracepoints_mutex); 334 mutex_lock(&tracepoints_mutex);
338 entry = get_tracepoint(name); 335 old = tracepoint_add_probe(name, probe);
339 if (!entry) {
340 entry = add_tracepoint(name);
341 if (IS_ERR(entry)) {
342 ret = PTR_ERR(entry);
343 goto end;
344 }
345 }
346 /*
347 * If we detect that a call_rcu_sched is pending for this tracepoint,
348 * make sure it's executed now.
349 */
350 if (entry->rcu_pending)
351 rcu_barrier_sched();
352 old = tracepoint_entry_add_probe(entry, probe);
353 if (IS_ERR(old)) {
354 ret = PTR_ERR(old);
355 goto end;
356 }
357 mutex_unlock(&tracepoints_mutex); 336 mutex_unlock(&tracepoints_mutex);
337 if (IS_ERR(old))
338 return PTR_ERR(old);
339
358 tracepoint_update_probes(); /* may update entry */ 340 tracepoint_update_probes(); /* may update entry */
359 mutex_lock(&tracepoints_mutex); 341 release_probes(old);
360 entry = get_tracepoint(name); 342 return 0;
361 WARN_ON(!entry);
362 if (entry->rcu_pending)
363 rcu_barrier_sched();
364 tracepoint_entry_free_old(entry, old);
365end:
366 mutex_unlock(&tracepoints_mutex);
367 return ret;
368} 343}
369EXPORT_SYMBOL_GPL(tracepoint_probe_register); 344EXPORT_SYMBOL_GPL(tracepoint_probe_register);
370 345
346static void *tracepoint_remove_probe(const char *name, void *probe)
347{
348 struct tracepoint_entry *entry;
349 void *old;
350
351 entry = get_tracepoint(name);
352 if (!entry)
353 return ERR_PTR(-ENOENT);
354 old = tracepoint_entry_remove_probe(entry, probe);
355 if (IS_ERR(old))
356 return old;
357 if (!entry->refcount)
358 remove_tracepoint(entry);
359 return old;
360}
361
371/** 362/**
372 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 363 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
373 * @name: tracepoint name 364 * @name: tracepoint name
@@ -380,38 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
380 */ 371 */
381int tracepoint_probe_unregister(const char *name, void *probe) 372int tracepoint_probe_unregister(const char *name, void *probe)
382{ 373{
383 struct tracepoint_entry *entry;
384 void *old; 374 void *old;
385 int ret = -ENOENT;
386 375
387 mutex_lock(&tracepoints_mutex); 376 mutex_lock(&tracepoints_mutex);
388 entry = get_tracepoint(name); 377 old = tracepoint_remove_probe(name, probe);
389 if (!entry)
390 goto end;
391 if (entry->rcu_pending)
392 rcu_barrier_sched();
393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
399 mutex_unlock(&tracepoints_mutex); 378 mutex_unlock(&tracepoints_mutex);
379 if (IS_ERR(old))
380 return PTR_ERR(old);
381
400 tracepoint_update_probes(); /* may update entry */ 382 tracepoint_update_probes(); /* may update entry */
383 release_probes(old);
384 return 0;
385}
386EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
387
388static LIST_HEAD(old_probes);
389static int need_update;
390
391static void tracepoint_add_old_probes(void *old)
392{
393 need_update = 1;
394 if (old) {
395 struct tp_probes *tp_probes = container_of(old,
396 struct tp_probes, probes[0]);
397 list_add(&tp_probes->u.list, &old_probes);
398 }
399}
400
401/**
402 * tracepoint_probe_register_noupdate - register a probe but not connect
403 * @name: tracepoint name
404 * @probe: probe handler
405 *
406 * caller must call tracepoint_probe_update_all()
407 */
408int tracepoint_probe_register_noupdate(const char *name, void *probe)
409{
410 void *old;
411
401 mutex_lock(&tracepoints_mutex); 412 mutex_lock(&tracepoints_mutex);
402 entry = get_tracepoint(name); 413 old = tracepoint_add_probe(name, probe);
403 if (!entry) 414 if (IS_ERR(old)) {
404 goto end; 415 mutex_unlock(&tracepoints_mutex);
405 if (entry->rcu_pending) 416 return PTR_ERR(old);
406 rcu_barrier_sched(); 417 }
407 tracepoint_entry_free_old(entry, old); 418 tracepoint_add_old_probes(old);
408 remove_tracepoint(name); /* Ignore busy error message */
409 ret = 0;
410end:
411 mutex_unlock(&tracepoints_mutex); 419 mutex_unlock(&tracepoints_mutex);
412 return ret; 420 return 0;
413} 421}
414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 422EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
423
424/**
425 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
426 * @name: tracepoint name
427 * @probe: probe function pointer
428 *
429 * caller must call tracepoint_probe_update_all()
430 */
431int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
432{
433 void *old;
434
435 mutex_lock(&tracepoints_mutex);
436 old = tracepoint_remove_probe(name, probe);
437 if (IS_ERR(old)) {
438 mutex_unlock(&tracepoints_mutex);
439 return PTR_ERR(old);
440 }
441 tracepoint_add_old_probes(old);
442 mutex_unlock(&tracepoints_mutex);
443 return 0;
444}
445EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
446
447/**
448 * tracepoint_probe_update_all - update tracepoints
449 */
450void tracepoint_probe_update_all(void)
451{
452 LIST_HEAD(release_probes);
453 struct tp_probes *pos, *next;
454
455 mutex_lock(&tracepoints_mutex);
456 if (!need_update) {
457 mutex_unlock(&tracepoints_mutex);
458 return;
459 }
460 if (!list_empty(&old_probes))
461 list_replace_init(&old_probes, &release_probes);
462 need_update = 0;
463 mutex_unlock(&tracepoints_mutex);
464
465 tracepoint_update_probes();
466 list_for_each_entry_safe(pos, next, &release_probes, u.list) {
467 list_del(&pos->u.list);
468 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
469 }
470}
471EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
415 472
416/** 473/**
417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 474 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 468fbc9016c7..7a176773af85 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -198,16 +198,10 @@ cmd_modversions = \
198 fi; 198 fi;
199endif 199endif
200 200
201ifdef CONFIG_64BIT
202arch_bits = 64
203else
204arch_bits = 32
205endif
206
207ifdef CONFIG_FTRACE_MCOUNT_RECORD 201ifdef CONFIG_FTRACE_MCOUNT_RECORD
208cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl \ 202cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
209 "$(ARCH)" "$(arch_bits)" "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" \ 203 "$(if $(CONFIG_64BIT),64,32)" \
210 "$(NM)" "$(RM)" "$(MV)" "$(@)"; 204 "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
211endif 205endif
212 206
213define rule_cc_o_c 207define rule_cc_o_c
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 6b9fe3eb8360..eeac71c87c66 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -134,6 +134,7 @@ my $section_regex; # Find the start of a section
134my $function_regex; # Find the name of a function 134my $function_regex; # Find the name of a function
135 # (return offset and func name) 135 # (return offset and func name)
136my $mcount_regex; # Find the call site to mcount (return offset) 136my $mcount_regex; # Find the call site to mcount (return offset)
137my $alignment; # The .align value to use for $mcount_section
137 138
138if ($arch eq "x86") { 139if ($arch eq "x86") {
139 if ($bits == 64) { 140 if ($bits == 64) {
@@ -148,6 +149,7 @@ if ($arch eq "x86_64") {
148 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; 149 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
149 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; 150 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$";
150 $type = ".quad"; 151 $type = ".quad";
152 $alignment = 8;
151 153
152 # force flags for this arch 154 # force flags for this arch
153 $ld .= " -m elf_x86_64"; 155 $ld .= " -m elf_x86_64";
@@ -160,6 +162,7 @@ if ($arch eq "x86_64") {
160 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; 162 $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
161 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; 163 $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
162 $type = ".long"; 164 $type = ".long";
165 $alignment = 4;
163 166
164 # force flags for this arch 167 # force flags for this arch
165 $ld .= " -m elf_i386"; 168 $ld .= " -m elf_i386";
@@ -288,6 +291,7 @@ sub update_funcs
288 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n"; 291 open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
289 $opened = 1; 292 $opened = 1;
290 print FILE "\t.section $mcount_section,\"a\",\@progbits\n"; 293 print FILE "\t.section $mcount_section,\"a\",\@progbits\n";
294 print FILE "\t.align $alignment\n";
291 } 295 }
292 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset; 296 printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
293 } 297 }
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
new file mode 100644
index 000000000000..902f9a992620
--- /dev/null
+++ b/scripts/tracing/draw_functrace.py
@@ -0,0 +1,130 @@
1#!/usr/bin/python
2
3"""
4Copyright 2008 (c) Frederic Weisbecker <fweisbec@gmail.com>
5Licensed under the terms of the GNU GPL License version 2
6
7This script parses a trace provided by the function tracer in
8kernel/trace/trace_functions.c
9The resulted trace is processed into a tree to produce a more human
10view of the call stack by drawing textual but hierarchical tree of
11calls. Only the functions's names and the the call time are provided.
12
13Usage:
14 Be sure that you have CONFIG_FUNCTION_TRACER
15 # mkdir /debugfs
16 # mount -t debug debug /debug
17 # echo function > /debug/tracing/current_tracer
18 $ cat /debug/tracing/trace_pipe > ~/raw_trace_func
19 Wait some times but not too much, the script is a bit slow.
20 Break the pipe (Ctrl + Z)
21 $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
22 Then you have your drawn trace in draw_functrace
23"""
24
25
26import sys, re
27
28class CallTree:
29 """ This class provides a tree representation of the functions
30 call stack. If a function has no parent in the kernel (interrupt,
31 syscall, kernel thread...) then it is attached to a virtual parent
32 called ROOT.
33 """
34 ROOT = None
35
36 def __init__(self, func, time = None, parent = None):
37 self._func = func
38 self._time = time
39 if parent is None:
40 self._parent = CallTree.ROOT
41 else:
42 self._parent = parent
43 self._children = []
44
45 def calls(self, func, calltime):
46 """ If a function calls another one, call this method to insert it
47 into the tree at the appropriate place.
48 @return: A reference to the newly created child node.
49 """
50 child = CallTree(func, calltime, self)
51 self._children.append(child)
52 return child
53
54 def getParent(self, func):
55 """ Retrieve the last parent of the current node that
56 has the name given by func. If this function is not
57 on a parent, then create it as new child of root
58 @return: A reference to the parent.
59 """
60 tree = self
61 while tree != CallTree.ROOT and tree._func != func:
62 tree = tree._parent
63 if tree == CallTree.ROOT:
64 child = CallTree.ROOT.calls(func, None)
65 return child
66 return tree
67
68 def __repr__(self):
69 return self.__toString("", True)
70
71 def __toString(self, branch, lastChild):
72 if self._time is not None:
73 s = "%s----%s (%s)\n" % (branch, self._func, self._time)
74 else:
75 s = "%s----%s\n" % (branch, self._func)
76
77 i = 0
78 if lastChild:
79 branch = branch[:-1] + " "
80 while i < len(self._children):
81 if i != len(self._children) - 1:
82 s += "%s" % self._children[i].__toString(branch +\
83 " |", False)
84 else:
85 s += "%s" % self._children[i].__toString(branch +\
86 " |", True)
87 i += 1
88 return s
89
90class BrokenLineException(Exception):
91 """If the last line is not complete because of the pipe breakage,
92 we want to stop the processing and ignore this line.
93 """
94 pass
95
96class CommentLineException(Exception):
97 """ If the line is a comment (as in the beginning of the trace file),
98 just ignore it.
99 """
100 pass
101
102
103def parseLine(line):
104 line = line.strip()
105 if line.startswith("#"):
106 raise CommentLineException
107 m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
108 if m is None:
109 raise BrokenLineException
110 return (m.group(1), m.group(2), m.group(3))
111
112
113def main():
114 CallTree.ROOT = CallTree("Root (Nowhere)", None, None)
115 tree = CallTree.ROOT
116
117 for line in sys.stdin:
118 try:
119 calltime, callee, caller = parseLine(line)
120 except BrokenLineException:
121 break
122 except CommentLineException:
123 continue
124 tree = tree.getParent(caller)
125 tree = tree.calls(callee, calltime)
126
127 print CallTree.ROOT
128
129if __name__ == "__main__":
130 main()