aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig194
-rw-r--r--kernel/trace/Makefile19
-rw-r--r--kernel/trace/ftrace.c1541
-rw-r--r--kernel/trace/ring_buffer.c2517
-rw-r--r--kernel/trace/trace.c2721
-rw-r--r--kernel/trace/trace.h470
-rw-r--r--kernel/trace/trace_boot.c186
-rw-r--r--kernel/trace/trace_branch.c342
-rw-r--r--kernel/trace/trace_functions.c32
-rw-r--r--kernel/trace/trace_functions_graph.c669
-rw-r--r--kernel/trace/trace_hw_branches.c195
-rw-r--r--kernel/trace/trace_irqsoff.c84
-rw-r--r--kernel/trace/trace_mmiotrace.c153
-rw-r--r--kernel/trace/trace_nop.c105
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--kernel/trace/trace_sched_switch.c252
-rw-r--r--kernel/trace/trace_sched_wakeup.c220
-rw-r--r--kernel/trace/trace_selftest.c274
-rw-r--r--kernel/trace/trace_stack.c360
-rw-r--r--kernel/trace/trace_sysprof.c32
20 files changed, 8366 insertions, 2179 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..e2a4ff6fc3a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,56 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4config HAVE_FTRACE 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
9config NOP_TRACER
10 bool
11
12config HAVE_FUNCTION_TRACER
13 bool
14
15config HAVE_FUNCTION_GRAPH_TRACER
16 bool
17
18config HAVE_FUNCTION_TRACE_MCOUNT_TEST
5 bool 19 bool
20 help
21 This gets selected when the arch tests the function_trace_stop
22 variable at the mcount call site. Otherwise, this variable
23 is tested by the called function.
6 24
7config HAVE_DYNAMIC_FTRACE 25config HAVE_DYNAMIC_FTRACE
8 bool 26 bool
9 27
28config HAVE_FTRACE_MCOUNT_RECORD
29 bool
30
31config HAVE_HW_BRANCH_TRACER
32 bool
33
10config TRACER_MAX_TRACE 34config TRACER_MAX_TRACE
11 bool 35 bool
12 36
37config RING_BUFFER
38 bool
39
13config TRACING 40config TRACING
14 bool 41 bool
15 select DEBUG_FS 42 select DEBUG_FS
16 select STACKTRACE 43 select RING_BUFFER
44 select STACKTRACE if STACKTRACE_SUPPORT
45 select TRACEPOINTS
46 select NOP_TRACER
47
48menu "Tracers"
17 49
18config FTRACE 50config FUNCTION_TRACER
19 bool "Kernel Function Tracer" 51 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE 52 depends on HAVE_FUNCTION_TRACER
53 depends on DEBUG_KERNEL
21 select FRAME_POINTER 54 select FRAME_POINTER
22 select TRACING 55 select TRACING
23 select CONTEXT_SWITCH_TRACER 56 select CONTEXT_SWITCH_TRACER
@@ -30,12 +63,26 @@ config FTRACE
30 (the bootup default), then the overhead of the instructions is very 63 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks. 64 small and not measurable even in micro-benchmarks.
32 65
66config FUNCTION_GRAPH_TRACER
67 bool "Kernel Function Graph Tracer"
68 depends on HAVE_FUNCTION_GRAPH_TRACER
69 depends on FUNCTION_TRACER
70 default y
71 help
72 Enable the kernel to trace a function at both its return
73 and its entry.
74 It's first purpose is to trace the duration of functions and
75 draw a call graph for each thread with some informations like
76 the return value.
77 This is done by setting the current return address on the current
78 task structure into a stack of calls.
79
33config IRQSOFF_TRACER 80config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer" 81 bool "Interrupts-off Latency Tracer"
35 default n 82 default n
36 depends on TRACE_IRQFLAGS_SUPPORT 83 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME 84 depends on GENERIC_TIME
38 depends on HAVE_FTRACE 85 depends on DEBUG_KERNEL
39 select TRACE_IRQFLAGS 86 select TRACE_IRQFLAGS
40 select TRACING 87 select TRACING
41 select TRACER_MAX_TRACE 88 select TRACER_MAX_TRACE
@@ -58,7 +105,7 @@ config PREEMPT_TRACER
58 default n 105 default n
59 depends on GENERIC_TIME 106 depends on GENERIC_TIME
60 depends on PREEMPT 107 depends on PREEMPT
61 depends on HAVE_FTRACE 108 depends on DEBUG_KERNEL
62 select TRACING 109 select TRACING
63 select TRACER_MAX_TRACE 110 select TRACER_MAX_TRACE
64 help 111 help
@@ -85,7 +132,7 @@ config SYSPROF_TRACER
85 132
86config SCHED_TRACER 133config SCHED_TRACER
87 bool "Scheduling Latency Tracer" 134 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE 135 depends on DEBUG_KERNEL
89 select TRACING 136 select TRACING
90 select CONTEXT_SWITCH_TRACER 137 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE 138 select TRACER_MAX_TRACE
@@ -95,17 +142,133 @@ config SCHED_TRACER
95 142
96config CONTEXT_SWITCH_TRACER 143config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches" 144 bool "Trace process context switches"
98 depends on HAVE_FTRACE 145 depends on DEBUG_KERNEL
99 select TRACING 146 select TRACING
100 select MARKERS 147 select MARKERS
101 help 148 help
102 This tracer gets called from the context switch and records 149 This tracer gets called from the context switch and records
103 all switching of tasks. 150 all switching of tasks.
104 151
152config BOOT_TRACER
153 bool "Trace boot initcalls"
154 depends on DEBUG_KERNEL
155 select TRACING
156 select CONTEXT_SWITCH_TRACER
157 help
158 This tracer helps developers to optimize boot times: it records
159 the timings of the initcalls and traces key events and the identity
160 of tasks that can cause boot delays, such as context-switches.
161
162 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
163 produce pretty graphics about boot inefficiencies, giving a visual
164 representation of the delays during initcalls - but the raw
165 /debug/tracing/trace text output is readable too.
166
167 ( Note that tracing self tests can't be enabled if this tracer is
168 selected, because the self-tests are an initcall as well and that
169 would invalidate the boot trace. )
170
171config TRACE_BRANCH_PROFILING
172 bool "Trace likely/unlikely profiler"
173 depends on DEBUG_KERNEL
174 select TRACING
175 help
176 This tracer profiles all the the likely and unlikely macros
177 in the kernel. It will display the results in:
178
179 /debugfs/tracing/profile_annotated_branch
180
181 Note: this will add a significant overhead, only turn this
182 on if you need to profile the system's use of these macros.
183
184 Say N if unsure.
185
186config PROFILE_ALL_BRANCHES
187 bool "Profile all if conditionals"
188 depends on TRACE_BRANCH_PROFILING
189 help
190 This tracer profiles all branch conditions. Every if ()
191 taken in the kernel is recorded whether it hit or miss.
192 The results will be displayed in:
193
194 /debugfs/tracing/profile_branch
195
196 This configuration, when enabled, will impose a great overhead
197 on the system. This should only be enabled when the system
198 is to be analyzed
199
200 Say N if unsure.
201
202config TRACING_BRANCHES
203 bool
204 help
205 Selected by tracers that will trace the likely and unlikely
206 conditions. This prevents the tracers themselves from being
207 profiled. Profiling the tracing infrastructure can only happen
208 when the likelys and unlikelys are not being traced.
209
210config BRANCH_TRACER
211 bool "Trace likely/unlikely instances"
212 depends on TRACE_BRANCH_PROFILING
213 select TRACING_BRANCHES
214 help
215 This traces the events of likely and unlikely condition
216 calls in the kernel. The difference between this and the
217 "Trace likely/unlikely profiler" is that this is not a
218 histogram of the callers, but actually places the calling
219 events into a running trace buffer to see when and where the
220 events happened, as well as their results.
221
222 Say N if unsure.
223
224config POWER_TRACER
225 bool "Trace power consumption behavior"
226 depends on DEBUG_KERNEL
227 depends on X86
228 select TRACING
229 help
230 This tracer helps developers to analyze and optimize the kernels
231 power management decisions, specifically the C-state and P-state
232 behavior.
233
234
235config STACK_TRACER
236 bool "Trace max stack"
237 depends on HAVE_FUNCTION_TRACER
238 depends on DEBUG_KERNEL
239 select FUNCTION_TRACER
240 select STACKTRACE
241 help
242 This special tracer records the maximum stack footprint of the
243 kernel and displays it in debugfs/tracing/stack_trace.
244
245 This tracer works by hooking into every function call that the
246 kernel executes, and keeping a maximum stack depth value and
247 stack-trace saved. If this is configured with DYNAMIC_FTRACE
248 then it will not have any overhead while the stack tracer
249 is disabled.
250
251 To enable the stack tracer on bootup, pass in 'stacktrace'
252 on the kernel command line.
253
254 The stack tracer can also be enabled or disabled via the
255 sysctl kernel.stack_tracer_enabled
256
257 Say N if unsure.
258
259config HW_BRANCH_TRACER
260 depends on HAVE_HW_BRANCH_TRACER
261 bool "Trace hw branches"
262 select TRACING
263 help
264 This tracer records all branches on the system in a circular
265 buffer giving access to the last N branches for each cpu.
266
105config DYNAMIC_FTRACE 267config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically" 268 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE 269 depends on FUNCTION_TRACER
108 depends on HAVE_DYNAMIC_FTRACE 270 depends on HAVE_DYNAMIC_FTRACE
271 depends on DEBUG_KERNEL
109 default y 272 default y
110 help 273 help
111 This option will modify all the calls to ftrace dynamically 274 This option will modify all the calls to ftrace dynamically
@@ -113,7 +276,7 @@ config DYNAMIC_FTRACE
113 with a No-Op instruction) as they are called. A table is 276 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again. 277 created to dynamically enable them again.
115 278
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 279 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active. 280 has native performance as long as no tracing is active.
118 281
119 The changes to the code are done by a kernel thread that 282 The changes to the code are done by a kernel thread that
@@ -121,15 +284,22 @@ config DYNAMIC_FTRACE
121 were made. If so, it runs stop_machine (stops all CPUS) 284 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace. 285 and modifies the code to jump over the call to ftrace.
123 286
287config FTRACE_MCOUNT_RECORD
288 def_bool y
289 depends on DYNAMIC_FTRACE
290 depends on HAVE_FTRACE_MCOUNT_RECORD
291
124config FTRACE_SELFTEST 292config FTRACE_SELFTEST
125 bool 293 bool
126 294
127config FTRACE_STARTUP_TEST 295config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace" 296 bool "Perform a startup test on ftrace"
129 depends on TRACING 297 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
130 select FTRACE_SELFTEST 298 select FTRACE_SELFTEST
131 help 299 help
132 This option performs a series of startup tests on ftrace. On bootup 300 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is 301 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured 302 functioning properly. It will do tests on all the configured
135 tracers of ftrace. 303 tracers of ftrace.
304
305endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..349d5a93653f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,15 +10,28 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
14 20
15obj-$(CONFIG_TRACING) += trace.o 21obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 22obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 23obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o 24obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 25obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 26obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 27obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
28obj-$(CONFIG_NOP_TRACER) += trace_nop.o
29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o
23 36
24libftrace-y := ftrace.o 37libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..2f32969c09df 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,17 +25,35 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
38 49
50/* set when tracing only a pid */
51struct pid *ftrace_pid_trace;
52static struct pid * const ftrace_swapper_pid = &init_struct_pid;
53
54/* Quick disabling of function tracer. */
55int function_trace_stop;
56
39/* 57/*
40 * ftrace_disabled is set when an anomaly is discovered. 58 * ftrace_disabled is set when an anomaly is discovered.
41 * ftrace_disabled is much stronger than ftrace_enabled. 59 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -44,6 +62,7 @@ static int ftrace_disabled __read_mostly;
44 62
45static DEFINE_SPINLOCK(ftrace_lock); 63static DEFINE_SPINLOCK(ftrace_lock);
46static DEFINE_MUTEX(ftrace_sysctl_lock); 64static DEFINE_MUTEX(ftrace_sysctl_lock);
65static DEFINE_MUTEX(ftrace_start_lock);
47 66
48static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
49{ 68{
@@ -52,6 +71,8 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
52 71
53static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 72static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
54ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 73ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
74ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
75ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
55 76
56static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 77static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
57{ 78{
@@ -68,6 +89,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68 }; 89 };
69} 90}
70 91
92static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
93{
94 if (!test_tsk_trace_trace(current))
95 return;
96
97 ftrace_pid_function(ip, parent_ip);
98}
99
100static void set_ftrace_pid_function(ftrace_func_t func)
101{
102 /* do not set ftrace_pid_function to itself! */
103 if (func != ftrace_pid_func)
104 ftrace_pid_function = func;
105}
106
71/** 107/**
72 * clear_ftrace_function - reset the ftrace function 108 * clear_ftrace_function - reset the ftrace function
73 * 109 *
@@ -77,11 +113,27 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
77void clear_ftrace_function(void) 113void clear_ftrace_function(void)
78{ 114{
79 ftrace_trace_function = ftrace_stub; 115 ftrace_trace_function = ftrace_stub;
116 __ftrace_trace_function = ftrace_stub;
117 ftrace_pid_function = ftrace_stub;
118}
119
120#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
121/*
122 * For those archs that do not test ftrace_trace_stop in their
123 * mcount call site, we need to do it from C.
124 */
125static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
126{
127 if (function_trace_stop)
128 return;
129
130 __ftrace_trace_function(ip, parent_ip);
80} 131}
132#endif
81 133
82static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
83{ 135{
84 /* Should never be called by interrupts */ 136 /* should not be called from interrupt context */
85 spin_lock(&ftrace_lock); 137 spin_lock(&ftrace_lock);
86 138
87 ops->next = ftrace_list; 139 ops->next = ftrace_list;
@@ -95,14 +147,28 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
95 ftrace_list = ops; 147 ftrace_list = ops;
96 148
97 if (ftrace_enabled) { 149 if (ftrace_enabled) {
150 ftrace_func_t func;
151
152 if (ops->next == &ftrace_list_end)
153 func = ops->func;
154 else
155 func = ftrace_list_func;
156
157 if (ftrace_pid_trace) {
158 set_ftrace_pid_function(func);
159 func = ftrace_pid_func;
160 }
161
98 /* 162 /*
99 * For one func, simply call it directly. 163 * For one func, simply call it directly.
100 * For more than one func, call the chain. 164 * For more than one func, call the chain.
101 */ 165 */
102 if (ops->next == &ftrace_list_end) 166#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
103 ftrace_trace_function = ops->func; 167 ftrace_trace_function = func;
104 else 168#else
105 ftrace_trace_function = ftrace_list_func; 169 __ftrace_trace_function = func;
170 ftrace_trace_function = ftrace_test_stop_func;
171#endif
106 } 172 }
107 173
108 spin_unlock(&ftrace_lock); 174 spin_unlock(&ftrace_lock);
@@ -115,6 +181,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
115 struct ftrace_ops **p; 181 struct ftrace_ops **p;
116 int ret = 0; 182 int ret = 0;
117 183
184 /* should not be called from interrupt context */
118 spin_lock(&ftrace_lock); 185 spin_lock(&ftrace_lock);
119 186
120 /* 187 /*
@@ -140,9 +207,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
140 207
141 if (ftrace_enabled) { 208 if (ftrace_enabled) {
142 /* If we only have one func left, then call that directly */ 209 /* If we only have one func left, then call that directly */
143 if (ftrace_list == &ftrace_list_end || 210 if (ftrace_list->next == &ftrace_list_end) {
144 ftrace_list->next == &ftrace_list_end) 211 ftrace_func_t func = ftrace_list->func;
145 ftrace_trace_function = ftrace_list->func; 212
213 if (ftrace_pid_trace) {
214 set_ftrace_pid_function(func);
215 func = ftrace_pid_func;
216 }
217#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
218 ftrace_trace_function = func;
219#else
220 __ftrace_trace_function = func;
221#endif
222 }
146 } 223 }
147 224
148 out: 225 out:
@@ -151,9 +228,48 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
151 return ret; 228 return ret;
152} 229}
153 230
231static void ftrace_update_pid_func(void)
232{
233 ftrace_func_t func;
234
235 /* should not be called from interrupt context */
236 spin_lock(&ftrace_lock);
237
238 if (ftrace_trace_function == ftrace_stub)
239 goto out;
240
241 func = ftrace_trace_function;
242
243 if (ftrace_pid_trace) {
244 set_ftrace_pid_function(func);
245 func = ftrace_pid_func;
246 } else {
247 if (func == ftrace_pid_func)
248 func = ftrace_pid_function;
249 }
250
251#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
252 ftrace_trace_function = func;
253#else
254 __ftrace_trace_function = func;
255#endif
256
257 out:
258 spin_unlock(&ftrace_lock);
259}
260
154#ifdef CONFIG_DYNAMIC_FTRACE 261#ifdef CONFIG_DYNAMIC_FTRACE
262#ifndef CONFIG_FTRACE_MCOUNT_RECORD
263# error Dynamic ftrace depends on MCOUNT_RECORD
264#endif
155 265
156static struct task_struct *ftraced_task; 266/*
267 * Since MCOUNT_ADDR may point to mcount itself, we do not want
268 * to get it confused by reading a reference in the code as we
269 * are parsing on objcopy output of text. Use a variable for
270 * it instead.
271 */
272static unsigned long mcount_addr = MCOUNT_ADDR;
157 273
158enum { 274enum {
159 FTRACE_ENABLE_CALLS = (1 << 0), 275 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -161,18 +277,14 @@ enum {
161 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 277 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
162 FTRACE_ENABLE_MCOUNT = (1 << 3), 278 FTRACE_ENABLE_MCOUNT = (1 << 3),
163 FTRACE_DISABLE_MCOUNT = (1 << 4), 279 FTRACE_DISABLE_MCOUNT = (1 << 4),
280 FTRACE_START_FUNC_RET = (1 << 5),
281 FTRACE_STOP_FUNC_RET = (1 << 6),
164}; 282};
165 283
166static int ftrace_filtered; 284static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171 285
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 286static LIST_HEAD(ftrace_new_addrs);
173 287
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock); 288static DEFINE_MUTEX(ftrace_regex_lock);
177 289
178struct ftrace_page { 290struct ftrace_page {
@@ -190,16 +302,13 @@ struct ftrace_page {
190static struct ftrace_page *ftrace_pages_start; 302static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages; 303static struct ftrace_page *ftrace_pages;
192 304
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records; 305static struct dyn_ftrace *ftrace_free_records;
200 306
201 307
202#ifdef CONFIG_KPROBES 308#ifdef CONFIG_KPROBES
309
310static int frozen_record_count;
311
203static inline void freeze_record(struct dyn_ftrace *rec) 312static inline void freeze_record(struct dyn_ftrace *rec)
204{ 313{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) { 314 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -226,79 +335,36 @@ static inline int record_frozen(struct dyn_ftrace *rec)
226# define record_frozen(rec) ({ 0; }) 335# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */ 336#endif /* CONFIG_KPROBES */
228 337
229int skip_trace(unsigned long ip) 338static void ftrace_free_rec(struct dyn_ftrace *rec)
230{ 339{
231 unsigned long fl; 340 rec->ip = (unsigned long)ftrace_free_records;
232 struct dyn_ftrace *rec; 341 ftrace_free_records = rec;
233 struct hlist_node *t; 342 rec->flags |= FTRACE_FL_FREE;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264} 343}
265 344
266static inline int 345void ftrace_release(void *start, unsigned long size)
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{ 346{
269 struct dyn_ftrace *p; 347 struct dyn_ftrace *rec;
270 struct hlist_node *t; 348 struct ftrace_page *pg;
271 int found = 0; 349 unsigned long s = (unsigned long)start;
272 350 unsigned long e = s + size;
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { 351 int i;
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282 352
283static inline void 353 if (ftrace_disabled || !start)
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) 354 return;
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288 355
289/* called from kstop_machine */ 356 /* should not be called from interrupt context */
290static inline void ftrace_del_hash(struct dyn_ftrace *node) 357 spin_lock(&ftrace_lock);
291{
292 hlist_del(&node->node);
293}
294 358
295static void ftrace_free_rec(struct dyn_ftrace *rec) 359 for (pg = ftrace_pages_start; pg; pg = pg->next) {
296{ 360 for (i = 0; i < pg->index; i++) {
297 /* no locking, only called from kstop_machine */ 361 rec = &pg->records[i];
298 362
299 rec->ip = (unsigned long)ftrace_free_records; 363 if ((rec->ip >= s) && (rec->ip < e))
300 ftrace_free_records = rec; 364 ftrace_free_rec(rec);
301 rec->flags |= FTRACE_FL_FREE; 365 }
366 }
367 spin_unlock(&ftrace_lock);
302} 368}
303 369
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 370static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -310,10 +376,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
310 rec = ftrace_free_records; 376 rec = ftrace_free_records;
311 377
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 378 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1); 379 FTRACE_WARN_ON_ONCE(1);
314 ftrace_free_records = NULL; 380 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL; 381 return NULL;
318 } 382 }
319 383
@@ -323,182 +387,163 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
323 } 387 }
324 388
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 389 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next) 390 if (!ftrace_pages->next) {
327 return NULL; 391 /* allocate another page */
392 ftrace_pages->next =
393 (void *)get_zeroed_page(GFP_KERNEL);
394 if (!ftrace_pages->next)
395 return NULL;
396 }
328 ftrace_pages = ftrace_pages->next; 397 ftrace_pages = ftrace_pages->next;
329 } 398 }
330 399
331 return &ftrace_pages->records[ftrace_pages->index++]; 400 return &ftrace_pages->records[ftrace_pages->index++];
332} 401}
333 402
334static void 403static struct dyn_ftrace *
335ftrace_record_ip(unsigned long ip) 404ftrace_record_ip(unsigned long ip)
336{ 405{
337 struct dyn_ftrace *node; 406 struct dyn_ftrace *rec;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343
344 if (!ftrace_enabled || ftrace_disabled)
345 return;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374 407
375 /* This ip may have hit the hash before the lock */ 408 if (ftrace_disabled)
376 if (ftrace_ip_in_hash(ip, key)) 409 return NULL;
377 goto out_unlock;
378 410
379 node = ftrace_alloc_dyn_node(ip); 411 rec = ftrace_alloc_dyn_node(ip);
380 if (!node) 412 if (!rec)
381 goto out_unlock; 413 return NULL;
382 414
383 node->ip = ip; 415 rec->ip = ip;
384 416
385 ftrace_add_hash(node, key); 417 list_add(&rec->list, &ftrace_new_addrs);
386 418
387 ftraced_trigger = 1; 419 return rec;
420}
388 421
389 out_unlock: 422static void print_ip_ins(const char *fmt, unsigned char *p)
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); 423{
391 out: 424 int i;
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393 425
394 /* prevent recursion with scheduler */ 426 printk(KERN_CONT "%s", fmt);
395 if (resched) 427
396 preempt_enable_no_resched_notrace(); 428 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
397 else 429 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
398 preempt_enable_notrace(); 430}
431
432static void ftrace_bug(int failed, unsigned long ip)
433{
434 switch (failed) {
435 case -EFAULT:
436 FTRACE_WARN_ON_ONCE(1);
437 pr_info("ftrace faulted on modifying ");
438 print_ip_sym(ip);
439 break;
440 case -EINVAL:
441 FTRACE_WARN_ON_ONCE(1);
442 pr_info("ftrace failed to modify ");
443 print_ip_sym(ip);
444 print_ip_ins(" actual: ", (unsigned char *)ip);
445 printk(KERN_CONT "\n");
446 break;
447 case -EPERM:
448 FTRACE_WARN_ON_ONCE(1);
449 pr_info("ftrace faulted on writing ");
450 print_ip_sym(ip);
451 break;
452 default:
453 FTRACE_WARN_ON_ONCE(1);
454 pr_info("ftrace faulted on unknown error ");
455 print_ip_sym(ip);
456 }
399} 457}
400 458
401#define FTRACE_ADDR ((long)(ftrace_caller))
402 459
403static int 460static int
404__ftrace_replace_code(struct dyn_ftrace *rec, 461__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
405 unsigned char *old, unsigned char *new, int enable)
406{ 462{
407 unsigned long ip, fl; 463 unsigned long ip, fl;
464 unsigned long ftrace_addr;
465
466 ftrace_addr = (unsigned long)ftrace_caller;
408 467
409 ip = rec->ip; 468 ip = rec->ip;
410 469
411 if (ftrace_filtered && enable) { 470 /*
471 * If this record is not to be traced and
472 * it is not enabled then do nothing.
473 *
474 * If this record is not to be traced and
475 * it is enabled then disabled it.
476 *
477 */
478 if (rec->flags & FTRACE_FL_NOTRACE) {
479 if (rec->flags & FTRACE_FL_ENABLED)
480 rec->flags &= ~FTRACE_FL_ENABLED;
481 else
482 return 0;
483
484 } else if (ftrace_filtered && enable) {
412 /* 485 /*
413 * If filtering is on: 486 * Filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */ 487 */
433 488
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | 489 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
435 FTRACE_FL_ENABLED);
436 490
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || 491 /* Record is filtered and enabled, do nothing */
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || 492 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0; 493 return 0;
441 494
442 /* 495 /* Record is not filtered and is not enabled do nothing */
443 * If it is enabled disable it, 496 if (!fl)
444 * otherwise enable it! 497 return 0;
445 */ 498
446 if (fl & FTRACE_FL_ENABLED) { 499 /* Record is not filtered but enabled, disable it */
447 /* swap new and old */ 500 if (fl == FTRACE_FL_ENABLED)
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED; 501 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else { 502 else
452 new = ftrace_call_replace(ip, FTRACE_ADDR); 503 /* Otherwise record is filtered but not enabled, enable it */
453 rec->flags |= FTRACE_FL_ENABLED; 504 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else { 505 } else {
506 /* Disable or not filtered */
456 507
457 if (enable) { 508 if (enable) {
458 /* 509 /* if record is enabled, do nothing */
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED) 510 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0; 511 return 0;
512
473 rec->flags |= FTRACE_FL_ENABLED; 513 rec->flags |= FTRACE_FL_ENABLED;
514
474 } else { 515 } else {
516
517 /* if record is not enabled do nothing */
475 if (!(rec->flags & FTRACE_FL_ENABLED)) 518 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0; 519 return 0;
520
477 rec->flags &= ~FTRACE_FL_ENABLED; 521 rec->flags &= ~FTRACE_FL_ENABLED;
478 } 522 }
479 } 523 }
480 524
481 return ftrace_modify_code(ip, old, new); 525 if (rec->flags & FTRACE_FL_ENABLED)
526 return ftrace_make_call(rec, ftrace_addr);
527 else
528 return ftrace_make_nop(NULL, rec, ftrace_addr);
482} 529}
483 530
484static void ftrace_replace_code(int enable) 531static void ftrace_replace_code(int enable)
485{ 532{
486 int i, failed; 533 int i, failed;
487 unsigned char *new = NULL, *old = NULL;
488 struct dyn_ftrace *rec; 534 struct dyn_ftrace *rec;
489 struct ftrace_page *pg; 535 struct ftrace_page *pg;
490 536
491 if (enable)
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495
496 for (pg = ftrace_pages_start; pg; pg = pg->next) { 537 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) { 538 for (i = 0; i < pg->index; i++) {
498 rec = &pg->records[i]; 539 rec = &pg->records[i];
499 540
500 /* don't modify code that has already faulted */ 541 /*
501 if (rec->flags & FTRACE_FL_FAILED) 542 * Skip over free records and records that have
543 * failed.
544 */
545 if (rec->flags & FTRACE_FL_FREE ||
546 rec->flags & FTRACE_FL_FAILED)
502 continue; 547 continue;
503 548
504 /* ignore updates to this record's mcount site */ 549 /* ignore updates to this record's mcount site */
@@ -509,78 +554,52 @@ static void ftrace_replace_code(int enable)
509 unfreeze_record(rec); 554 unfreeze_record(rec);
510 } 555 }
511 556
512 failed = __ftrace_replace_code(rec, old, new, enable); 557 failed = __ftrace_replace_code(rec, enable);
513 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 558 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
514 rec->flags |= FTRACE_FL_FAILED; 559 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) || 560 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) { 561 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec); 562 ftrace_free_rec(rec);
519 } 563 } else
564 ftrace_bug(failed, rec->ip);
520 } 565 }
521 } 566 }
522 } 567 }
523} 568}
524 569
525static void ftrace_shutdown_replenish(void)
526{
527 if (ftrace_pages->next)
528 return;
529
530 /* allocate another page */
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532}
533
534static int 570static int
535ftrace_code_disable(struct dyn_ftrace *rec) 571ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
536{ 572{
537 unsigned long ip; 573 unsigned long ip;
538 unsigned char *nop, *call; 574 int ret;
539 int failed;
540 575
541 ip = rec->ip; 576 ip = rec->ip;
542 577
543 nop = ftrace_nop_replace(); 578 ret = ftrace_make_nop(mod, rec, mcount_addr);
544 call = ftrace_call_replace(ip, MCOUNT_ADDR); 579 if (ret) {
545 580 ftrace_bug(ret, ip);
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED; 581 rec->flags |= FTRACE_FL_FAILED;
549 return 0; 582 return 0;
550 } 583 }
551 return 1; 584 return 1;
552} 585}
553 586
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data) 587static int __ftrace_modify_code(void *data)
557{ 588{
558 unsigned long addr;
559 int *command = data; 589 int *command = data;
560 590
561 if (*command & FTRACE_ENABLE_CALLS) { 591 if (*command & FTRACE_ENABLE_CALLS)
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1); 592 ftrace_replace_code(1);
568 tracing_on = 1; 593 else if (*command & FTRACE_DISABLE_CALLS)
569 } else if (*command & FTRACE_DISABLE_CALLS) {
570 ftrace_replace_code(0); 594 ftrace_replace_code(0);
571 tracing_on = 0;
572 }
573 595
574 if (*command & FTRACE_UPDATE_TRACE_FUNC) 596 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function); 597 ftrace_update_ftrace_func(ftrace_trace_function);
576 598
577 if (*command & FTRACE_ENABLE_MCOUNT) { 599 if (*command & FTRACE_START_FUNC_RET)
578 addr = (unsigned long)ftrace_record_ip; 600 ftrace_enable_ftrace_graph_caller();
579 ftrace_mcount_set(&addr); 601 else if (*command & FTRACE_STOP_FUNC_RET)
580 } else if (*command & FTRACE_DISABLE_MCOUNT) { 602 ftrace_disable_ftrace_graph_caller();
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584 603
585 return 0; 604 return 0;
586} 605}
@@ -590,62 +609,44 @@ static void ftrace_run_update_code(int command)
590 stop_machine(__ftrace_modify_code, &command, NULL); 609 stop_machine(__ftrace_modify_code, &command, NULL);
591} 610}
592 611
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func; 612static ftrace_func_t saved_ftrace_func;
613static int ftrace_start_up;
613 614
614static void ftrace_startup(void) 615static void ftrace_startup_enable(int command)
615{ 616{
616 int command = 0;
617
618 if (unlikely(ftrace_disabled))
619 return;
620
621 mutex_lock(&ftraced_lock);
622 ftraced_suspend++;
623 if (ftraced_suspend == 1)
624 command |= FTRACE_ENABLE_CALLS;
625
626 if (saved_ftrace_func != ftrace_trace_function) { 617 if (saved_ftrace_func != ftrace_trace_function) {
627 saved_ftrace_func = ftrace_trace_function; 618 saved_ftrace_func = ftrace_trace_function;
628 command |= FTRACE_UPDATE_TRACE_FUNC; 619 command |= FTRACE_UPDATE_TRACE_FUNC;
629 } 620 }
630 621
631 if (!command || !ftrace_enabled) 622 if (!command || !ftrace_enabled)
632 goto out; 623 return;
633 624
634 ftrace_run_update_code(command); 625 ftrace_run_update_code(command);
635 out:
636 mutex_unlock(&ftraced_lock);
637} 626}
638 627
639static void ftrace_shutdown(void) 628static void ftrace_startup(int command)
640{ 629{
641 int command = 0; 630 if (unlikely(ftrace_disabled))
631 return;
632
633 mutex_lock(&ftrace_start_lock);
634 ftrace_start_up++;
635 command |= FTRACE_ENABLE_CALLS;
642 636
637 ftrace_startup_enable(command);
638
639 mutex_unlock(&ftrace_start_lock);
640}
641
642static void ftrace_shutdown(int command)
643{
643 if (unlikely(ftrace_disabled)) 644 if (unlikely(ftrace_disabled))
644 return; 645 return;
645 646
646 mutex_lock(&ftraced_lock); 647 mutex_lock(&ftrace_start_lock);
647 ftraced_suspend--; 648 ftrace_start_up--;
648 if (!ftraced_suspend) 649 if (!ftrace_start_up)
649 command |= FTRACE_DISABLE_CALLS; 650 command |= FTRACE_DISABLE_CALLS;
650 651
651 if (saved_ftrace_func != ftrace_trace_function) { 652 if (saved_ftrace_func != ftrace_trace_function) {
@@ -658,7 +659,7 @@ static void ftrace_shutdown(void)
658 659
659 ftrace_run_update_code(command); 660 ftrace_run_update_code(command);
660 out: 661 out:
661 mutex_unlock(&ftraced_lock); 662 mutex_unlock(&ftrace_start_lock);
662} 663}
663 664
664static void ftrace_startup_sysctl(void) 665static void ftrace_startup_sysctl(void)
@@ -668,15 +669,15 @@ static void ftrace_startup_sysctl(void)
668 if (unlikely(ftrace_disabled)) 669 if (unlikely(ftrace_disabled))
669 return; 670 return;
670 671
671 mutex_lock(&ftraced_lock); 672 mutex_lock(&ftrace_start_lock);
672 /* Force update next time */ 673 /* Force update next time */
673 saved_ftrace_func = NULL; 674 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */ 675 /* ftrace_start_up is true if we want ftrace running */
675 if (ftraced_suspend) 676 if (ftrace_start_up)
676 command |= FTRACE_ENABLE_CALLS; 677 command |= FTRACE_ENABLE_CALLS;
677 678
678 ftrace_run_update_code(command); 679 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock); 680 mutex_unlock(&ftrace_start_lock);
680} 681}
681 682
682static void ftrace_shutdown_sysctl(void) 683static void ftrace_shutdown_sysctl(void)
@@ -686,153 +687,51 @@ static void ftrace_shutdown_sysctl(void)
686 if (unlikely(ftrace_disabled)) 687 if (unlikely(ftrace_disabled))
687 return; 688 return;
688 689
689 mutex_lock(&ftraced_lock); 690 mutex_lock(&ftrace_start_lock);
690 /* ftraced_suspend is true if ftrace is running */ 691 /* ftrace_start_up is true if ftrace is running */
691 if (ftraced_suspend) 692 if (ftrace_start_up)
692 command |= FTRACE_DISABLE_CALLS; 693 command |= FTRACE_DISABLE_CALLS;
693 694
694 ftrace_run_update_code(command); 695 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock); 696 mutex_unlock(&ftrace_start_lock);
696} 697}
697 698
698static cycle_t ftrace_update_time; 699static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt; 700static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt; 701unsigned long ftrace_update_tot_cnt;
701 702
702static int __ftrace_update_code(void *ignore) 703static int ftrace_update_code(struct module *mod)
703{ 704{
704 int i, save_ftrace_enabled; 705 struct dyn_ftrace *p, *t;
705 cycle_t start, stop; 706 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714 707
715 start = ftrace_now(raw_smp_processor_id()); 708 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0; 709 ftrace_update_cnt = 0;
717 710
718 /* No locks needed, the machine is stopped! */ 711 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722 712
723 /* all CPUS are stopped, we are safe to modify code */ 713 /* If something went wrong, bail without enabling anything */
724 hlist_for_each_entry_safe(p, t, n, head, node) { 714 if (unlikely(ftrace_disabled))
725 /* Skip over failed records which have not been 715 return -1;
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737
738 /* Ignore updates to this record's mcount site.
739 * Reintroduce this record at the head of this
740 * bucket to attempt to "convert" it again if
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752 716
753 /* convert record (i.e, patch mcount-call with NOP) */ 717 list_del_init(&p->list);
754 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++;
757 } else {
758 if ((system_state == SYSTEM_BOOTING) ||
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765 718
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) { 719 /* convert record (i.e, patch mcount-call with NOP) */
767 hlist_del(&p->node); 720 if (ftrace_code_disable(mod, p)) {
768 INIT_HLIST_NODE(&p->node); 721 p->flags |= FTRACE_FL_CONVERTED;
769 hlist_add_head(&p->node, head); 722 ftrace_update_cnt++;
770 } 723 } else
724 ftrace_free_rec(p);
771 } 725 }
772 726
773 stop = ftrace_now(raw_smp_processor_id()); 727 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start; 728 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt; 729 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780 730
781 return 0; 731 return 0;
782} 732}
783 733
784static int ftrace_update_code(void) 734static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine(__ftrace_update_code, NULL, NULL);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{ 735{
837 struct ftrace_page *pg; 736 struct ftrace_page *pg;
838 int cnt; 737 int cnt;
@@ -859,7 +758,9 @@ static int __init ftrace_dyn_table_alloc(void)
859 758
860 pg = ftrace_pages = ftrace_pages_start; 759 pg = ftrace_pages = ftrace_pages_start;
861 760
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE; 761 cnt = num_to_init / ENTRIES_PER_PAGE;
762 pr_info("ftrace: allocating %ld entries in %d pages\n",
763 num_to_init, cnt + 1);
863 764
864 for (i = 0; i < cnt; i++) { 765 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 766 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -884,7 +785,6 @@ enum {
884#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 785#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
885 786
886struct ftrace_iterator { 787struct ftrace_iterator {
887 loff_t pos;
888 struct ftrace_page *pg; 788 struct ftrace_page *pg;
889 unsigned idx; 789 unsigned idx;
890 unsigned flags; 790 unsigned flags;
@@ -901,21 +801,26 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
901 801
902 (*pos)++; 802 (*pos)++;
903 803
804 /* should not be called from interrupt context */
805 spin_lock(&ftrace_lock);
904 retry: 806 retry:
905 if (iter->idx >= iter->pg->index) { 807 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) { 808 if (iter->pg->next) {
907 iter->pg = iter->pg->next; 809 iter->pg = iter->pg->next;
908 iter->idx = 0; 810 iter->idx = 0;
909 goto retry; 811 goto retry;
812 } else {
813 iter->idx = -1;
910 } 814 }
911 } else { 815 } else {
912 rec = &iter->pg->records[iter->idx++]; 816 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) && 817 if ((rec->flags & FTRACE_FL_FREE) ||
818
819 (!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) || 820 (rec->flags & FTRACE_FL_FAILED)) ||
915 821
916 ((iter->flags & FTRACE_ITER_FAILURES) && 822 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) || 823 !(rec->flags & FTRACE_FL_FAILED)) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919 824
920 ((iter->flags & FTRACE_ITER_FILTER) && 825 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) || 826 !(rec->flags & FTRACE_FL_FILTER)) ||
@@ -926,8 +831,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
926 goto retry; 831 goto retry;
927 } 832 }
928 } 833 }
929 834 spin_unlock(&ftrace_lock);
930 iter->pos = *pos;
931 835
932 return rec; 836 return rec;
933} 837}
@@ -936,16 +840,16 @@ static void *t_start(struct seq_file *m, loff_t *pos)
936{ 840{
937 struct ftrace_iterator *iter = m->private; 841 struct ftrace_iterator *iter = m->private;
938 void *p = NULL; 842 void *p = NULL;
939 loff_t l = -1;
940 843
941 if (*pos != iter->pos) { 844 if (*pos > 0) {
942 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 845 if (iter->idx < 0)
943 ; 846 return p;
944 } else { 847 (*pos)--;
945 l = *pos; 848 iter->idx--;
946 p = t_next(m, p, &l);
947 } 849 }
948 850
851 p = t_next(m, p, pos);
852
949 return p; 853 return p;
950} 854}
951 855
@@ -989,7 +893,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
989 return -ENOMEM; 893 return -ENOMEM;
990 894
991 iter->pg = ftrace_pages_start; 895 iter->pg = ftrace_pages_start;
992 iter->pos = -1;
993 896
994 ret = seq_open(file, &show_ftrace_seq_ops); 897 ret = seq_open(file, &show_ftrace_seq_ops);
995 if (!ret) { 898 if (!ret) {
@@ -1039,8 +942,8 @@ static void ftrace_filter_reset(int enable)
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 942 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i; 943 unsigned i;
1041 944
1042 /* keep kstop machine from running */ 945 /* should not be called from interrupt context */
1043 preempt_disable(); 946 spin_lock(&ftrace_lock);
1044 if (enable) 947 if (enable)
1045 ftrace_filtered = 0; 948 ftrace_filtered = 0;
1046 pg = ftrace_pages_start; 949 pg = ftrace_pages_start;
@@ -1053,7 +956,7 @@ static void ftrace_filter_reset(int enable)
1053 } 956 }
1054 pg = pg->next; 957 pg = pg->next;
1055 } 958 }
1056 preempt_enable(); 959 spin_unlock(&ftrace_lock);
1057} 960}
1058 961
1059static int 962static int
@@ -1076,7 +979,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1076 979
1077 if (file->f_mode & FMODE_READ) { 980 if (file->f_mode & FMODE_READ) {
1078 iter->pg = ftrace_pages_start; 981 iter->pg = ftrace_pages_start;
1079 iter->pos = -1;
1080 iter->flags = enable ? FTRACE_ITER_FILTER : 982 iter->flags = enable ? FTRACE_ITER_FILTER :
1081 FTRACE_ITER_NOTRACE; 983 FTRACE_ITER_NOTRACE;
1082 984
@@ -1145,6 +1047,13 @@ ftrace_match(unsigned char *buff, int len, int enable)
1145 int type = MATCH_FULL; 1047 int type = MATCH_FULL;
1146 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1048 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1147 unsigned i, match = 0, search_len = 0; 1049 unsigned i, match = 0, search_len = 0;
1050 int not = 0;
1051
1052 if (buff[0] == '!') {
1053 not = 1;
1054 buff++;
1055 len--;
1056 }
1148 1057
1149 for (i = 0; i < len; i++) { 1058 for (i = 0; i < len; i++) {
1150 if (buff[i] == '*') { 1059 if (buff[i] == '*') {
@@ -1165,8 +1074,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
1165 } 1074 }
1166 } 1075 }
1167 1076
1168 /* keep kstop machine from running */ 1077 /* should not be called from interrupt context */
1169 preempt_disable(); 1078 spin_lock(&ftrace_lock);
1170 if (enable) 1079 if (enable)
1171 ftrace_filtered = 1; 1080 ftrace_filtered = 1;
1172 pg = ftrace_pages_start; 1081 pg = ftrace_pages_start;
@@ -1198,12 +1107,16 @@ ftrace_match(unsigned char *buff, int len, int enable)
1198 matched = 1; 1107 matched = 1;
1199 break; 1108 break;
1200 } 1109 }
1201 if (matched) 1110 if (matched) {
1202 rec->flags |= flag; 1111 if (not)
1112 rec->flags &= ~flag;
1113 else
1114 rec->flags |= flag;
1115 }
1203 } 1116 }
1204 pg = pg->next; 1117 pg = pg->next;
1205 } 1118 }
1206 preempt_enable(); 1119 spin_unlock(&ftrace_lock);
1207} 1120}
1208 1121
1209static ssize_t 1122static ssize_t
@@ -1366,10 +1279,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1366 } 1279 }
1367 1280
1368 mutex_lock(&ftrace_sysctl_lock); 1281 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock); 1282 mutex_lock(&ftrace_start_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1283 if (ftrace_start_up && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1284 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock); 1285 mutex_unlock(&ftrace_start_lock);
1373 mutex_unlock(&ftrace_sysctl_lock); 1286 mutex_unlock(&ftrace_sysctl_lock);
1374 1287
1375 kfree(iter); 1288 kfree(iter);
@@ -1389,55 +1302,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1389 return ftrace_regex_release(inode, file, 0); 1302 return ftrace_regex_release(inode, file, 0);
1390} 1303}
1391 1304
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = { 1305static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open, 1306 .open = ftrace_avail_open,
1443 .read = seq_read, 1307 .read = seq_read,
@@ -1468,60 +1332,233 @@ static struct file_operations ftrace_notrace_fops = {
1468 .release = ftrace_notrace_release, 1332 .release = ftrace_notrace_release,
1469}; 1333};
1470 1334
1471static struct file_operations ftraced_fops = { 1335#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1472 .open = tracing_open_generic, 1336
1473 .read = ftraced_read, 1337static DEFINE_MUTEX(graph_lock);
1474 .write = ftraced_write, 1338
1339int ftrace_graph_count;
1340unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
1341
1342static void *
1343g_next(struct seq_file *m, void *v, loff_t *pos)
1344{
1345 unsigned long *array = m->private;
1346 int index = *pos;
1347
1348 (*pos)++;
1349
1350 if (index >= ftrace_graph_count)
1351 return NULL;
1352
1353 return &array[index];
1354}
1355
1356static void *g_start(struct seq_file *m, loff_t *pos)
1357{
1358 void *p = NULL;
1359
1360 mutex_lock(&graph_lock);
1361
1362 p = g_next(m, p, pos);
1363
1364 return p;
1365}
1366
1367static void g_stop(struct seq_file *m, void *p)
1368{
1369 mutex_unlock(&graph_lock);
1370}
1371
1372static int g_show(struct seq_file *m, void *v)
1373{
1374 unsigned long *ptr = v;
1375 char str[KSYM_SYMBOL_LEN];
1376
1377 if (!ptr)
1378 return 0;
1379
1380 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1381
1382 seq_printf(m, "%s\n", str);
1383
1384 return 0;
1385}
1386
1387static struct seq_operations ftrace_graph_seq_ops = {
1388 .start = g_start,
1389 .next = g_next,
1390 .stop = g_stop,
1391 .show = g_show,
1475}; 1392};
1476 1393
1477/** 1394static int
1478 * ftrace_force_update - force an update to all recording ftrace functions 1395ftrace_graph_open(struct inode *inode, struct file *file)
1479 */
1480int ftrace_force_update(void)
1481{ 1396{
1482 int ret = 0; 1397 int ret = 0;
1483 1398
1484 if (unlikely(ftrace_disabled)) 1399 if (unlikely(ftrace_disabled))
1485 return -ENODEV; 1400 return -ENODEV;
1486 1401
1487 mutex_lock(&ftrace_sysctl_lock); 1402 mutex_lock(&graph_lock);
1488 mutex_lock(&ftraced_lock); 1403 if ((file->f_mode & FMODE_WRITE) &&
1489 1404 !(file->f_flags & O_APPEND)) {
1490 /* 1405 ftrace_graph_count = 0;
1491 * If ftraced_trigger is not set, then there is nothing 1406 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
1492 * to update. 1407 }
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496 1408
1497 mutex_unlock(&ftraced_lock); 1409 if (file->f_mode & FMODE_READ) {
1498 mutex_unlock(&ftrace_sysctl_lock); 1410 ret = seq_open(file, &ftrace_graph_seq_ops);
1411 if (!ret) {
1412 struct seq_file *m = file->private_data;
1413 m->private = ftrace_graph_funcs;
1414 }
1415 } else
1416 file->private_data = ftrace_graph_funcs;
1417 mutex_unlock(&graph_lock);
1499 1418
1500 return ret; 1419 return ret;
1501} 1420}
1502 1421
1503static void ftrace_force_shutdown(void) 1422static ssize_t
1423ftrace_graph_read(struct file *file, char __user *ubuf,
1424 size_t cnt, loff_t *ppos)
1504{ 1425{
1505 struct task_struct *task; 1426 if (file->f_mode & FMODE_READ)
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC; 1427 return seq_read(file, ubuf, cnt, ppos);
1428 else
1429 return -EPERM;
1430}
1507 1431
1508 mutex_lock(&ftraced_lock); 1432static int
1509 task = ftraced_task; 1433ftrace_set_func(unsigned long *array, int idx, char *buffer)
1510 ftraced_task = NULL; 1434{
1511 ftraced_suspend = -1; 1435 char str[KSYM_SYMBOL_LEN];
1512 ftrace_run_update_code(command); 1436 struct dyn_ftrace *rec;
1513 mutex_unlock(&ftraced_lock); 1437 struct ftrace_page *pg;
1438 int found = 0;
1439 int i, j;
1440
1441 if (ftrace_disabled)
1442 return -ENODEV;
1443
1444 /* should not be called from interrupt context */
1445 spin_lock(&ftrace_lock);
1446
1447 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1448 for (i = 0; i < pg->index; i++) {
1449 rec = &pg->records[i];
1450
1451 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
1452 continue;
1453
1454 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1455 if (strcmp(str, buffer) == 0) {
1456 found = 1;
1457 for (j = 0; j < idx; j++)
1458 if (array[j] == rec->ip) {
1459 found = 0;
1460 break;
1461 }
1462 if (found)
1463 array[idx] = rec->ip;
1464 break;
1465 }
1466 }
1467 }
1468 spin_unlock(&ftrace_lock);
1514 1469
1515 if (task) 1470 return found ? 0 : -EINVAL;
1516 kthread_stop(task);
1517} 1471}
1518 1472
1519static __init int ftrace_init_debugfs(void) 1473static ssize_t
1474ftrace_graph_write(struct file *file, const char __user *ubuf,
1475 size_t cnt, loff_t *ppos)
1520{ 1476{
1521 struct dentry *d_tracer; 1477 unsigned char buffer[FTRACE_BUFF_MAX+1];
1522 struct dentry *entry; 1478 unsigned long *array;
1479 size_t read = 0;
1480 ssize_t ret;
1481 int index = 0;
1482 char ch;
1523 1483
1524 d_tracer = tracing_init_dentry(); 1484 if (!cnt || cnt < 0)
1485 return 0;
1486
1487 mutex_lock(&graph_lock);
1488
1489 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
1490 ret = -EBUSY;
1491 goto out;
1492 }
1493
1494 if (file->f_mode & FMODE_READ) {
1495 struct seq_file *m = file->private_data;
1496 array = m->private;
1497 } else
1498 array = file->private_data;
1499
1500 ret = get_user(ch, ubuf++);
1501 if (ret)
1502 goto out;
1503 read++;
1504 cnt--;
1505
1506 /* skip white space */
1507 while (cnt && isspace(ch)) {
1508 ret = get_user(ch, ubuf++);
1509 if (ret)
1510 goto out;
1511 read++;
1512 cnt--;
1513 }
1514
1515 if (isspace(ch)) {
1516 *ppos += read;
1517 ret = read;
1518 goto out;
1519 }
1520
1521 while (cnt && !isspace(ch)) {
1522 if (index < FTRACE_BUFF_MAX)
1523 buffer[index++] = ch;
1524 else {
1525 ret = -EINVAL;
1526 goto out;
1527 }
1528 ret = get_user(ch, ubuf++);
1529 if (ret)
1530 goto out;
1531 read++;
1532 cnt--;
1533 }
1534 buffer[index] = 0;
1535
1536 /* we allow only one at a time */
1537 ret = ftrace_set_func(array, ftrace_graph_count, buffer);
1538 if (ret)
1539 goto out;
1540
1541 ftrace_graph_count++;
1542
1543 file->f_pos += read;
1544
1545 ret = read;
1546 out:
1547 mutex_unlock(&graph_lock);
1548
1549 return ret;
1550}
1551
1552static const struct file_operations ftrace_graph_fops = {
1553 .open = ftrace_graph_open,
1554 .read = ftrace_graph_read,
1555 .write = ftrace_graph_write,
1556};
1557#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1558
1559static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
1560{
1561 struct dentry *entry;
1525 1562
1526 entry = debugfs_create_file("available_filter_functions", 0444, 1563 entry = debugfs_create_file("available_filter_functions", 0444,
1527 d_tracer, NULL, &ftrace_avail_fops); 1564 d_tracer, NULL, &ftrace_avail_fops);
@@ -1546,97 +1583,295 @@ static __init int ftrace_init_debugfs(void)
1546 pr_warning("Could not create debugfs " 1583 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n"); 1584 "'set_ftrace_notrace' entry\n");
1548 1585
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer, 1586#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1550 NULL, &ftraced_fops); 1587 entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
1588 NULL,
1589 &ftrace_graph_fops);
1551 if (!entry) 1590 if (!entry)
1552 pr_warning("Could not create debugfs " 1591 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n"); 1592 "'set_graph_function' entry\n");
1593#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1594
1554 return 0; 1595 return 0;
1555} 1596}
1556 1597
1557fs_initcall(ftrace_init_debugfs); 1598static int ftrace_convert_nops(struct module *mod,
1558 1599 unsigned long *start,
1559static int __init ftrace_dynamic_init(void) 1600 unsigned long *end)
1560{ 1601{
1561 struct task_struct *p; 1602 unsigned long *p;
1562 unsigned long addr; 1603 unsigned long addr;
1604 unsigned long flags;
1605
1606 mutex_lock(&ftrace_start_lock);
1607 p = start;
1608 while (p < end) {
1609 addr = ftrace_call_adjust(*p++);
1610 /*
1611 * Some architecture linkers will pad between
1612 * the different mcount_loc sections of different
1613 * object files to satisfy alignments.
1614 * Skip any NULL pointers.
1615 */
1616 if (!addr)
1617 continue;
1618 ftrace_record_ip(addr);
1619 }
1620
1621 /* disable interrupts to prevent kstop machine */
1622 local_irq_save(flags);
1623 ftrace_update_code(mod);
1624 local_irq_restore(flags);
1625 mutex_unlock(&ftrace_start_lock);
1626
1627 return 0;
1628}
1629
1630void ftrace_init_module(struct module *mod,
1631 unsigned long *start, unsigned long *end)
1632{
1633 if (ftrace_disabled || start == end)
1634 return;
1635 ftrace_convert_nops(mod, start, end);
1636}
1637
1638extern unsigned long __start_mcount_loc[];
1639extern unsigned long __stop_mcount_loc[];
1640
1641void __init ftrace_init(void)
1642{
1643 unsigned long count, addr, flags;
1563 int ret; 1644 int ret;
1564 1645
1565 addr = (unsigned long)ftrace_record_ip; 1646 /* Keep the ftrace pointer to the stub */
1647 addr = (unsigned long)ftrace_stub;
1566 1648
1567 stop_machine(ftrace_dyn_arch_init, &addr, NULL); 1649 local_irq_save(flags);
1650 ftrace_dyn_arch_init(&addr);
1651 local_irq_restore(flags);
1568 1652
1569 /* ftrace_dyn_arch_init places the return code in addr */ 1653 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) { 1654 if (addr)
1571 ret = (int)addr;
1572 goto failed; 1655 goto failed;
1573 }
1574 1656
1575 ret = ftrace_dyn_table_alloc(); 1657 count = __stop_mcount_loc - __start_mcount_loc;
1576 if (ret)
1577 goto failed;
1578 1658
1579 p = kthread_run(ftraced, NULL, "ftraced"); 1659 ret = ftrace_dyn_table_alloc(count);
1580 if (IS_ERR(p)) { 1660 if (ret)
1581 ret = -1;
1582 goto failed; 1661 goto failed;
1583 }
1584 1662
1585 last_ftrace_enabled = ftrace_enabled = 1; 1663 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587 1664
1588 return 0; 1665 ret = ftrace_convert_nops(NULL,
1666 __start_mcount_loc,
1667 __stop_mcount_loc);
1589 1668
1669 return;
1590 failed: 1670 failed:
1591 ftrace_disabled = 1; 1671 ftrace_disabled = 1;
1592 return ret;
1593} 1672}
1594 1673
1595core_initcall(ftrace_dynamic_init);
1596#else 1674#else
1597# define ftrace_startup() do { } while (0) 1675
1598# define ftrace_shutdown() do { } while (0) 1676static int __init ftrace_nodyn_init(void)
1677{
1678 ftrace_enabled = 1;
1679 return 0;
1680}
1681device_initcall(ftrace_nodyn_init);
1682
1683static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
1684static inline void ftrace_startup_enable(int command) { }
1685/* Keep as macros so we do not need to define the commands */
1686# define ftrace_startup(command) do { } while (0)
1687# define ftrace_shutdown(command) do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0) 1688# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0) 1689# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */ 1690#endif /* CONFIG_DYNAMIC_FTRACE */
1603 1691
1604/** 1692static ssize_t
1605 * ftrace_kill_atomic - kill ftrace from critical sections 1693ftrace_pid_read(struct file *file, char __user *ubuf,
1606 * 1694 size_t cnt, loff_t *ppos)
1607 * This function should be used by panic code. It stops ftrace
1608 * but in a not so nice way. If you need to simply kill ftrace
1609 * from a non-atomic section, use ftrace_kill.
1610 */
1611void ftrace_kill_atomic(void)
1612{ 1695{
1613 ftrace_disabled = 1; 1696 char buf[64];
1614 ftrace_enabled = 0; 1697 int r;
1615#ifdef CONFIG_DYNAMIC_FTRACE 1698
1616 ftraced_suspend = -1; 1699 if (ftrace_pid_trace == ftrace_swapper_pid)
1617#endif 1700 r = sprintf(buf, "swapper tasks\n");
1618 clear_ftrace_function(); 1701 else if (ftrace_pid_trace)
1702 r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
1703 else
1704 r = sprintf(buf, "no pid\n");
1705
1706 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1707}
1708
1709static void clear_ftrace_swapper(void)
1710{
1711 struct task_struct *p;
1712 int cpu;
1713
1714 get_online_cpus();
1715 for_each_online_cpu(cpu) {
1716 p = idle_task(cpu);
1717 clear_tsk_trace_trace(p);
1718 }
1719 put_online_cpus();
1720}
1721
1722static void set_ftrace_swapper(void)
1723{
1724 struct task_struct *p;
1725 int cpu;
1726
1727 get_online_cpus();
1728 for_each_online_cpu(cpu) {
1729 p = idle_task(cpu);
1730 set_tsk_trace_trace(p);
1731 }
1732 put_online_cpus();
1733}
1734
1735static void clear_ftrace_pid(struct pid *pid)
1736{
1737 struct task_struct *p;
1738
1739 do_each_pid_task(pid, PIDTYPE_PID, p) {
1740 clear_tsk_trace_trace(p);
1741 } while_each_pid_task(pid, PIDTYPE_PID, p);
1742 put_pid(pid);
1743}
1744
1745static void set_ftrace_pid(struct pid *pid)
1746{
1747 struct task_struct *p;
1748
1749 do_each_pid_task(pid, PIDTYPE_PID, p) {
1750 set_tsk_trace_trace(p);
1751 } while_each_pid_task(pid, PIDTYPE_PID, p);
1752}
1753
1754static void clear_ftrace_pid_task(struct pid **pid)
1755{
1756 if (*pid == ftrace_swapper_pid)
1757 clear_ftrace_swapper();
1758 else
1759 clear_ftrace_pid(*pid);
1760
1761 *pid = NULL;
1762}
1763
1764static void set_ftrace_pid_task(struct pid *pid)
1765{
1766 if (pid == ftrace_swapper_pid)
1767 set_ftrace_swapper();
1768 else
1769 set_ftrace_pid(pid);
1770}
1771
1772static ssize_t
1773ftrace_pid_write(struct file *filp, const char __user *ubuf,
1774 size_t cnt, loff_t *ppos)
1775{
1776 struct pid *pid;
1777 char buf[64];
1778 long val;
1779 int ret;
1780
1781 if (cnt >= sizeof(buf))
1782 return -EINVAL;
1783
1784 if (copy_from_user(&buf, ubuf, cnt))
1785 return -EFAULT;
1786
1787 buf[cnt] = 0;
1788
1789 ret = strict_strtol(buf, 10, &val);
1790 if (ret < 0)
1791 return ret;
1792
1793 mutex_lock(&ftrace_start_lock);
1794 if (val < 0) {
1795 /* disable pid tracing */
1796 if (!ftrace_pid_trace)
1797 goto out;
1798
1799 clear_ftrace_pid_task(&ftrace_pid_trace);
1800
1801 } else {
1802 /* swapper task is special */
1803 if (!val) {
1804 pid = ftrace_swapper_pid;
1805 if (pid == ftrace_pid_trace)
1806 goto out;
1807 } else {
1808 pid = find_get_pid(val);
1809
1810 if (pid == ftrace_pid_trace) {
1811 put_pid(pid);
1812 goto out;
1813 }
1814 }
1815
1816 if (ftrace_pid_trace)
1817 clear_ftrace_pid_task(&ftrace_pid_trace);
1818
1819 if (!pid)
1820 goto out;
1821
1822 ftrace_pid_trace = pid;
1823
1824 set_ftrace_pid_task(ftrace_pid_trace);
1825 }
1826
1827 /* update the function call */
1828 ftrace_update_pid_func();
1829 ftrace_startup_enable(0);
1830
1831 out:
1832 mutex_unlock(&ftrace_start_lock);
1833
1834 return cnt;
1619} 1835}
1620 1836
1837static struct file_operations ftrace_pid_fops = {
1838 .read = ftrace_pid_read,
1839 .write = ftrace_pid_write,
1840};
1841
1842static __init int ftrace_init_debugfs(void)
1843{
1844 struct dentry *d_tracer;
1845 struct dentry *entry;
1846
1847 d_tracer = tracing_init_dentry();
1848 if (!d_tracer)
1849 return 0;
1850
1851 ftrace_init_dyn_debugfs(d_tracer);
1852
1853 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
1854 NULL, &ftrace_pid_fops);
1855 if (!entry)
1856 pr_warning("Could not create debugfs "
1857 "'set_ftrace_pid' entry\n");
1858 return 0;
1859}
1860
1861fs_initcall(ftrace_init_debugfs);
1862
1621/** 1863/**
1622 * ftrace_kill - totally shutdown ftrace 1864 * ftrace_kill - kill ftrace
1623 * 1865 *
1624 * This is a safety measure. If something was detected that seems 1866 * This function should be used by panic code. It stops ftrace
1625 * wrong, calling this function will keep ftrace from doing 1867 * but in a not so nice way. If you need to simply kill ftrace
1626 * any more modifications, and updates. 1868 * from a non-atomic section, use ftrace_kill.
1627 * used when something went wrong.
1628 */ 1869 */
1629void ftrace_kill(void) 1870void ftrace_kill(void)
1630{ 1871{
1631 mutex_lock(&ftrace_sysctl_lock);
1632 ftrace_disabled = 1; 1872 ftrace_disabled = 1;
1633 ftrace_enabled = 0; 1873 ftrace_enabled = 0;
1634
1635 clear_ftrace_function(); 1874 clear_ftrace_function();
1636 mutex_unlock(&ftrace_sysctl_lock);
1637
1638 /* Try to totally disable ftrace */
1639 ftrace_force_shutdown();
1640} 1875}
1641 1876
1642/** 1877/**
@@ -1658,10 +1893,11 @@ int register_ftrace_function(struct ftrace_ops *ops)
1658 return -1; 1893 return -1;
1659 1894
1660 mutex_lock(&ftrace_sysctl_lock); 1895 mutex_lock(&ftrace_sysctl_lock);
1896
1661 ret = __register_ftrace_function(ops); 1897 ret = __register_ftrace_function(ops);
1662 ftrace_startup(); 1898 ftrace_startup(0);
1663 mutex_unlock(&ftrace_sysctl_lock);
1664 1899
1900 mutex_unlock(&ftrace_sysctl_lock);
1665 return ret; 1901 return ret;
1666} 1902}
1667 1903
@@ -1677,7 +1913,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1677 1913
1678 mutex_lock(&ftrace_sysctl_lock); 1914 mutex_lock(&ftrace_sysctl_lock);
1679 ret = __unregister_ftrace_function(ops); 1915 ret = __unregister_ftrace_function(ops);
1680 ftrace_shutdown(); 1916 ftrace_shutdown(0);
1681 mutex_unlock(&ftrace_sysctl_lock); 1917 mutex_unlock(&ftrace_sysctl_lock);
1682 1918
1683 return ret; 1919 return ret;
@@ -1725,3 +1961,154 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1725 mutex_unlock(&ftrace_sysctl_lock); 1961 mutex_unlock(&ftrace_sysctl_lock);
1726 return ret; 1962 return ret;
1727} 1963}
1964
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966
1967static atomic_t ftrace_graph_active;
1968
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{
1971 return 0;
1972}
1973
1974/* The callbacks that hook a function */
1975trace_func_graph_ret_t ftrace_graph_return =
1976 (trace_func_graph_ret_t)ftrace_stub;
1977trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
1978
1979/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1980static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1981{
1982 int i;
1983 int ret = 0;
1984 unsigned long flags;
1985 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1986 struct task_struct *g, *t;
1987
1988 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1989 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1990 * sizeof(struct ftrace_ret_stack),
1991 GFP_KERNEL);
1992 if (!ret_stack_list[i]) {
1993 start = 0;
1994 end = i;
1995 ret = -ENOMEM;
1996 goto free;
1997 }
1998 }
1999
2000 read_lock_irqsave(&tasklist_lock, flags);
2001 do_each_thread(g, t) {
2002 if (start == end) {
2003 ret = -EAGAIN;
2004 goto unlock;
2005 }
2006
2007 if (t->ret_stack == NULL) {
2008 t->curr_ret_stack = -1;
2009 /* Make sure IRQs see the -1 first: */
2010 barrier();
2011 t->ret_stack = ret_stack_list[start++];
2012 atomic_set(&t->tracing_graph_pause, 0);
2013 atomic_set(&t->trace_overrun, 0);
2014 }
2015 } while_each_thread(g, t);
2016
2017unlock:
2018 read_unlock_irqrestore(&tasklist_lock, flags);
2019free:
2020 for (i = start; i < end; i++)
2021 kfree(ret_stack_list[i]);
2022 return ret;
2023}
2024
2025/* Allocate a return stack for each task */
2026static int start_graph_tracing(void)
2027{
2028 struct ftrace_ret_stack **ret_stack_list;
2029 int ret;
2030
2031 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2032 sizeof(struct ftrace_ret_stack *),
2033 GFP_KERNEL);
2034
2035 if (!ret_stack_list)
2036 return -ENOMEM;
2037
2038 do {
2039 ret = alloc_retstack_tasklist(ret_stack_list);
2040 } while (ret == -EAGAIN);
2041
2042 kfree(ret_stack_list);
2043 return ret;
2044}
2045
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc)
2048{
2049 int ret = 0;
2050
2051 mutex_lock(&ftrace_sysctl_lock);
2052
2053 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing();
2055 if (ret) {
2056 atomic_dec(&ftrace_graph_active);
2057 goto out;
2058 }
2059
2060 ftrace_graph_return = retfunc;
2061 ftrace_graph_entry = entryfunc;
2062
2063 ftrace_startup(FTRACE_START_FUNC_RET);
2064
2065out:
2066 mutex_unlock(&ftrace_sysctl_lock);
2067 return ret;
2068}
2069
2070void unregister_ftrace_graph(void)
2071{
2072 mutex_lock(&ftrace_sysctl_lock);
2073
2074 atomic_dec(&ftrace_graph_active);
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2078
2079 mutex_unlock(&ftrace_sysctl_lock);
2080}
2081
2082/* Allocate a return stack for newly created task */
2083void ftrace_graph_init_task(struct task_struct *t)
2084{
2085 if (atomic_read(&ftrace_graph_active)) {
2086 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
2087 * sizeof(struct ftrace_ret_stack),
2088 GFP_KERNEL);
2089 if (!t->ret_stack)
2090 return;
2091 t->curr_ret_stack = -1;
2092 atomic_set(&t->tracing_graph_pause, 0);
2093 atomic_set(&t->trace_overrun, 0);
2094 } else
2095 t->ret_stack = NULL;
2096}
2097
2098void ftrace_graph_exit_task(struct task_struct *t)
2099{
2100 struct ftrace_ret_stack *ret_stack = t->ret_stack;
2101
2102 t->ret_stack = NULL;
2103 /* NULL must become visible to IRQs before we free it: */
2104 barrier();
2105
2106 kfree(ret_stack);
2107}
2108
2109void ftrace_graph_stop(void)
2110{
2111 ftrace_stop();
2112}
2113#endif
2114
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..1d601a7c4587
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2517 @@
1/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/spinlock.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/percpu.h>
12#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h>
15#include <linux/hash.h>
16#include <linux/list.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21/*
22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
61
62/**
63 * tracing_on - enable all tracing buffers
64 *
65 * This function enables all tracing buffers that may have been
66 * disabled with tracing_off.
67 */
68void tracing_on(void)
69{
70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
71}
72EXPORT_SYMBOL_GPL(tracing_on);
73
74/**
75 * tracing_off - turn off all tracing buffers
76 *
77 * This function stops all tracing buffers from recording data.
78 * It does not disable any overhead the tracers themselves may
79 * be causing. This function simply causes all recording to
80 * the ring buffers to fail.
81 */
82void tracing_off(void)
83{
84 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
85}
86EXPORT_SYMBOL_GPL(tracing_off);
87
88/**
89 * tracing_off_permanent - permanently disable ring buffers
90 *
91 * This function, once called, will disable all ring buffers
92 * permanenty.
93 */
94void tracing_off_permanent(void)
95{
96 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
97}
98
99#include "trace.h"
100
101/* Up this if you want to test the TIME_EXTENTS and normalization */
102#define DEBUG_SHIFT 0
103
104/* FIXME!!! */
105u64 ring_buffer_time_stamp(int cpu)
106{
107 u64 time;
108
109 preempt_disable_notrace();
110 /* shift to debug/test normalization and TIME_EXTENTS */
111 time = sched_clock() << DEBUG_SHIFT;
112 preempt_enable_no_resched_notrace();
113
114 return time;
115}
116EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
117
118void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
119{
120 /* Just stupid testing the normalize function and deltas */
121 *ts >>= DEBUG_SHIFT;
122}
123EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
124
125#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
126#define RB_ALIGNMENT_SHIFT 2
127#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
128#define RB_MAX_SMALL_DATA 28
129
130enum {
131 RB_LEN_TIME_EXTEND = 8,
132 RB_LEN_TIME_STAMP = 16,
133};
134
135/* inline for ring buffer fast paths */
136static inline unsigned
137rb_event_length(struct ring_buffer_event *event)
138{
139 unsigned length;
140
141 switch (event->type) {
142 case RINGBUF_TYPE_PADDING:
143 /* undefined */
144 return -1;
145
146 case RINGBUF_TYPE_TIME_EXTEND:
147 return RB_LEN_TIME_EXTEND;
148
149 case RINGBUF_TYPE_TIME_STAMP:
150 return RB_LEN_TIME_STAMP;
151
152 case RINGBUF_TYPE_DATA:
153 if (event->len)
154 length = event->len << RB_ALIGNMENT_SHIFT;
155 else
156 length = event->array[0];
157 return length + RB_EVNT_HDR_SIZE;
158 default:
159 BUG();
160 }
161 /* not hit */
162 return 0;
163}
164
165/**
166 * ring_buffer_event_length - return the length of the event
167 * @event: the event to get the length of
168 */
169unsigned ring_buffer_event_length(struct ring_buffer_event *event)
170{
171 return rb_event_length(event);
172}
173EXPORT_SYMBOL_GPL(ring_buffer_event_length);
174
175/* inline for ring buffer fast paths */
176static inline void *
177rb_event_data(struct ring_buffer_event *event)
178{
179 BUG_ON(event->type != RINGBUF_TYPE_DATA);
180 /* If length is in len field, then array[0] has the data */
181 if (event->len)
182 return (void *)&event->array[0];
183 /* Otherwise length is in array[0] and array[1] has the data */
184 return (void *)&event->array[1];
185}
186
187/**
188 * ring_buffer_event_data - return the data of the event
189 * @event: the event to get the data from
190 */
191void *ring_buffer_event_data(struct ring_buffer_event *event)
192{
193 return rb_event_data(event);
194}
195EXPORT_SYMBOL_GPL(ring_buffer_event_data);
196
197#define for_each_buffer_cpu(buffer, cpu) \
198 for_each_cpu_mask(cpu, buffer->cpumask)
199
200#define TS_SHIFT 27
201#define TS_MASK ((1ULL << TS_SHIFT) - 1)
202#define TS_DELTA_TEST (~TS_MASK)
203
204struct buffer_data_page {
205 u64 time_stamp; /* page time stamp */
206 local_t commit; /* write commited index */
207 unsigned char data[]; /* data of buffer page */
208};
209
210struct buffer_page {
211 local_t write; /* index for next write */
212 unsigned read; /* index for next read */
213 struct list_head list; /* list of free pages */
214 struct buffer_data_page *page; /* Actual data page */
215};
216
217static void rb_init_page(struct buffer_data_page *bpage)
218{
219 local_set(&bpage->commit, 0);
220}
221
222/*
223 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
224 * this issue out.
225 */
226static inline void free_buffer_page(struct buffer_page *bpage)
227{
228 if (bpage->page)
229 free_page((unsigned long)bpage->page);
230 kfree(bpage);
231}
232
233/*
234 * We need to fit the time_stamp delta into 27 bits.
235 */
236static inline int test_time_stamp(u64 delta)
237{
238 if (delta & TS_DELTA_TEST)
239 return 1;
240 return 0;
241}
242
243#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
244
245/*
246 * head_page == tail_page && head == tail then buffer is empty.
247 */
248struct ring_buffer_per_cpu {
249 int cpu;
250 struct ring_buffer *buffer;
251 spinlock_t reader_lock; /* serialize readers */
252 raw_spinlock_t lock;
253 struct lock_class_key lock_key;
254 struct list_head pages;
255 struct buffer_page *head_page; /* read from head */
256 struct buffer_page *tail_page; /* write to tail */
257 struct buffer_page *commit_page; /* commited pages */
258 struct buffer_page *reader_page;
259 unsigned long overrun;
260 unsigned long entries;
261 u64 write_stamp;
262 u64 read_stamp;
263 atomic_t record_disabled;
264};
265
266struct ring_buffer {
267 unsigned pages;
268 unsigned flags;
269 int cpus;
270 cpumask_t cpumask;
271 atomic_t record_disabled;
272
273 struct mutex mutex;
274
275 struct ring_buffer_per_cpu **buffers;
276};
277
278struct ring_buffer_iter {
279 struct ring_buffer_per_cpu *cpu_buffer;
280 unsigned long head;
281 struct buffer_page *head_page;
282 u64 read_stamp;
283};
284
285/* buffer may be either ring_buffer or ring_buffer_per_cpu */
286#define RB_WARN_ON(buffer, cond) \
287 ({ \
288 int _____ret = unlikely(cond); \
289 if (_____ret) { \
290 atomic_inc(&buffer->record_disabled); \
291 WARN_ON(1); \
292 } \
293 _____ret; \
294 })
295
296/**
297 * check_pages - integrity check of buffer pages
298 * @cpu_buffer: CPU buffer with pages to test
299 *
300 * As a safty measure we check to make sure the data pages have not
301 * been corrupted.
302 */
303static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
304{
305 struct list_head *head = &cpu_buffer->pages;
306 struct buffer_page *bpage, *tmp;
307
308 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
309 return -1;
310 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
311 return -1;
312
313 list_for_each_entry_safe(bpage, tmp, head, list) {
314 if (RB_WARN_ON(cpu_buffer,
315 bpage->list.next->prev != &bpage->list))
316 return -1;
317 if (RB_WARN_ON(cpu_buffer,
318 bpage->list.prev->next != &bpage->list))
319 return -1;
320 }
321
322 return 0;
323}
324
325static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
326 unsigned nr_pages)
327{
328 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *bpage, *tmp;
330 unsigned long addr;
331 LIST_HEAD(pages);
332 unsigned i;
333
334 for (i = 0; i < nr_pages; i++) {
335 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
336 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
337 if (!bpage)
338 goto free_pages;
339 list_add(&bpage->list, &pages);
340
341 addr = __get_free_page(GFP_KERNEL);
342 if (!addr)
343 goto free_pages;
344 bpage->page = (void *)addr;
345 rb_init_page(bpage->page);
346 }
347
348 list_splice(&pages, head);
349
350 rb_check_pages(cpu_buffer);
351
352 return 0;
353
354 free_pages:
355 list_for_each_entry_safe(bpage, tmp, &pages, list) {
356 list_del_init(&bpage->list);
357 free_buffer_page(bpage);
358 }
359 return -ENOMEM;
360}
361
362static struct ring_buffer_per_cpu *
363rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
364{
365 struct ring_buffer_per_cpu *cpu_buffer;
366 struct buffer_page *bpage;
367 unsigned long addr;
368 int ret;
369
370 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
371 GFP_KERNEL, cpu_to_node(cpu));
372 if (!cpu_buffer)
373 return NULL;
374
375 cpu_buffer->cpu = cpu;
376 cpu_buffer->buffer = buffer;
377 spin_lock_init(&cpu_buffer->reader_lock);
378 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
379 INIT_LIST_HEAD(&cpu_buffer->pages);
380
381 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
382 GFP_KERNEL, cpu_to_node(cpu));
383 if (!bpage)
384 goto fail_free_buffer;
385
386 cpu_buffer->reader_page = bpage;
387 addr = __get_free_page(GFP_KERNEL);
388 if (!addr)
389 goto fail_free_reader;
390 bpage->page = (void *)addr;
391 rb_init_page(bpage->page);
392
393 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
394
395 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
396 if (ret < 0)
397 goto fail_free_reader;
398
399 cpu_buffer->head_page
400 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
401 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
402
403 return cpu_buffer;
404
405 fail_free_reader:
406 free_buffer_page(cpu_buffer->reader_page);
407
408 fail_free_buffer:
409 kfree(cpu_buffer);
410 return NULL;
411}
412
413static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
414{
415 struct list_head *head = &cpu_buffer->pages;
416 struct buffer_page *bpage, *tmp;
417
418 list_del_init(&cpu_buffer->reader_page->list);
419 free_buffer_page(cpu_buffer->reader_page);
420
421 list_for_each_entry_safe(bpage, tmp, head, list) {
422 list_del_init(&bpage->list);
423 free_buffer_page(bpage);
424 }
425 kfree(cpu_buffer);
426}
427
428/*
429 * Causes compile errors if the struct buffer_page gets bigger
430 * than the struct page.
431 */
432extern int ring_buffer_page_too_big(void);
433
434/**
435 * ring_buffer_alloc - allocate a new ring_buffer
436 * @size: the size in bytes per cpu that is needed.
437 * @flags: attributes to set for the ring buffer.
438 *
439 * Currently the only flag that is available is the RB_FL_OVERWRITE
440 * flag. This flag means that the buffer will overwrite old data
441 * when the buffer wraps. If this flag is not set, the buffer will
442 * drop data when the tail hits the head.
443 */
444struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
445{
446 struct ring_buffer *buffer;
447 int bsize;
448 int cpu;
449
450 /* Paranoid! Optimizes out when all is well */
451 if (sizeof(struct buffer_page) > sizeof(struct page))
452 ring_buffer_page_too_big();
453
454
455 /* keep it in its own cache line */
456 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
457 GFP_KERNEL);
458 if (!buffer)
459 return NULL;
460
461 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
462 buffer->flags = flags;
463
464 /* need at least two pages */
465 if (buffer->pages == 1)
466 buffer->pages++;
467
468 buffer->cpumask = cpu_possible_map;
469 buffer->cpus = nr_cpu_ids;
470
471 bsize = sizeof(void *) * nr_cpu_ids;
472 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
473 GFP_KERNEL);
474 if (!buffer->buffers)
475 goto fail_free_buffer;
476
477 for_each_buffer_cpu(buffer, cpu) {
478 buffer->buffers[cpu] =
479 rb_allocate_cpu_buffer(buffer, cpu);
480 if (!buffer->buffers[cpu])
481 goto fail_free_buffers;
482 }
483
484 mutex_init(&buffer->mutex);
485
486 return buffer;
487
488 fail_free_buffers:
489 for_each_buffer_cpu(buffer, cpu) {
490 if (buffer->buffers[cpu])
491 rb_free_cpu_buffer(buffer->buffers[cpu]);
492 }
493 kfree(buffer->buffers);
494
495 fail_free_buffer:
496 kfree(buffer);
497 return NULL;
498}
499EXPORT_SYMBOL_GPL(ring_buffer_alloc);
500
501/**
502 * ring_buffer_free - free a ring buffer.
503 * @buffer: the buffer to free.
504 */
505void
506ring_buffer_free(struct ring_buffer *buffer)
507{
508 int cpu;
509
510 for_each_buffer_cpu(buffer, cpu)
511 rb_free_cpu_buffer(buffer->buffers[cpu]);
512
513 kfree(buffer);
514}
515EXPORT_SYMBOL_GPL(ring_buffer_free);
516
517static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
518
519static void
520rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
521{
522 struct buffer_page *bpage;
523 struct list_head *p;
524 unsigned i;
525
526 atomic_inc(&cpu_buffer->record_disabled);
527 synchronize_sched();
528
529 for (i = 0; i < nr_pages; i++) {
530 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
531 return;
532 p = cpu_buffer->pages.next;
533 bpage = list_entry(p, struct buffer_page, list);
534 list_del_init(&bpage->list);
535 free_buffer_page(bpage);
536 }
537 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
538 return;
539
540 rb_reset_cpu(cpu_buffer);
541
542 rb_check_pages(cpu_buffer);
543
544 atomic_dec(&cpu_buffer->record_disabled);
545
546}
547
548static void
549rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
550 struct list_head *pages, unsigned nr_pages)
551{
552 struct buffer_page *bpage;
553 struct list_head *p;
554 unsigned i;
555
556 atomic_inc(&cpu_buffer->record_disabled);
557 synchronize_sched();
558
559 for (i = 0; i < nr_pages; i++) {
560 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
561 return;
562 p = pages->next;
563 bpage = list_entry(p, struct buffer_page, list);
564 list_del_init(&bpage->list);
565 list_add_tail(&bpage->list, &cpu_buffer->pages);
566 }
567 rb_reset_cpu(cpu_buffer);
568
569 rb_check_pages(cpu_buffer);
570
571 atomic_dec(&cpu_buffer->record_disabled);
572}
573
574/**
575 * ring_buffer_resize - resize the ring buffer
576 * @buffer: the buffer to resize.
577 * @size: the new size.
578 *
579 * The tracer is responsible for making sure that the buffer is
580 * not being used while changing the size.
581 * Note: We may be able to change the above requirement by using
582 * RCU synchronizations.
583 *
584 * Minimum size is 2 * BUF_PAGE_SIZE.
585 *
586 * Returns -1 on failure.
587 */
588int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
589{
590 struct ring_buffer_per_cpu *cpu_buffer;
591 unsigned nr_pages, rm_pages, new_pages;
592 struct buffer_page *bpage, *tmp;
593 unsigned long buffer_size;
594 unsigned long addr;
595 LIST_HEAD(pages);
596 int i, cpu;
597
598 /*
599 * Always succeed at resizing a non-existent buffer:
600 */
601 if (!buffer)
602 return size;
603
604 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
605 size *= BUF_PAGE_SIZE;
606 buffer_size = buffer->pages * BUF_PAGE_SIZE;
607
608 /* we need a minimum of two pages */
609 if (size < BUF_PAGE_SIZE * 2)
610 size = BUF_PAGE_SIZE * 2;
611
612 if (size == buffer_size)
613 return size;
614
615 mutex_lock(&buffer->mutex);
616
617 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
618
619 if (size < buffer_size) {
620
621 /* easy case, just free pages */
622 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
623 mutex_unlock(&buffer->mutex);
624 return -1;
625 }
626
627 rm_pages = buffer->pages - nr_pages;
628
629 for_each_buffer_cpu(buffer, cpu) {
630 cpu_buffer = buffer->buffers[cpu];
631 rb_remove_pages(cpu_buffer, rm_pages);
632 }
633 goto out;
634 }
635
636 /*
637 * This is a bit more difficult. We only want to add pages
638 * when we can allocate enough for all CPUs. We do this
639 * by allocating all the pages and storing them on a local
640 * link list. If we succeed in our allocation, then we
641 * add these pages to the cpu_buffers. Otherwise we just free
642 * them all and return -ENOMEM;
643 */
644 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
645 mutex_unlock(&buffer->mutex);
646 return -1;
647 }
648
649 new_pages = nr_pages - buffer->pages;
650
651 for_each_buffer_cpu(buffer, cpu) {
652 for (i = 0; i < new_pages; i++) {
653 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
654 cache_line_size()),
655 GFP_KERNEL, cpu_to_node(cpu));
656 if (!bpage)
657 goto free_pages;
658 list_add(&bpage->list, &pages);
659 addr = __get_free_page(GFP_KERNEL);
660 if (!addr)
661 goto free_pages;
662 bpage->page = (void *)addr;
663 rb_init_page(bpage->page);
664 }
665 }
666
667 for_each_buffer_cpu(buffer, cpu) {
668 cpu_buffer = buffer->buffers[cpu];
669 rb_insert_pages(cpu_buffer, &pages, new_pages);
670 }
671
672 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
673 mutex_unlock(&buffer->mutex);
674 return -1;
675 }
676
677 out:
678 buffer->pages = nr_pages;
679 mutex_unlock(&buffer->mutex);
680
681 return size;
682
683 free_pages:
684 list_for_each_entry_safe(bpage, tmp, &pages, list) {
685 list_del_init(&bpage->list);
686 free_buffer_page(bpage);
687 }
688 mutex_unlock(&buffer->mutex);
689 return -ENOMEM;
690}
691EXPORT_SYMBOL_GPL(ring_buffer_resize);
692
693static inline int rb_null_event(struct ring_buffer_event *event)
694{
695 return event->type == RINGBUF_TYPE_PADDING;
696}
697
698static inline void *
699__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
700{
701 return bpage->data + index;
702}
703
704static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
705{
706 return bpage->page->data + index;
707}
708
709static inline struct ring_buffer_event *
710rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
711{
712 return __rb_page_index(cpu_buffer->reader_page,
713 cpu_buffer->reader_page->read);
714}
715
716static inline struct ring_buffer_event *
717rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
718{
719 return __rb_page_index(cpu_buffer->head_page,
720 cpu_buffer->head_page->read);
721}
722
723static inline struct ring_buffer_event *
724rb_iter_head_event(struct ring_buffer_iter *iter)
725{
726 return __rb_page_index(iter->head_page, iter->head);
727}
728
729static inline unsigned rb_page_write(struct buffer_page *bpage)
730{
731 return local_read(&bpage->write);
732}
733
734static inline unsigned rb_page_commit(struct buffer_page *bpage)
735{
736 return local_read(&bpage->page->commit);
737}
738
739/* Size is determined by what has been commited */
740static inline unsigned rb_page_size(struct buffer_page *bpage)
741{
742 return rb_page_commit(bpage);
743}
744
745static inline unsigned
746rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
747{
748 return rb_page_commit(cpu_buffer->commit_page);
749}
750
751static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
752{
753 return rb_page_commit(cpu_buffer->head_page);
754}
755
756/*
757 * When the tail hits the head and the buffer is in overwrite mode,
758 * the head jumps to the next page and all content on the previous
759 * page is discarded. But before doing so, we update the overrun
760 * variable of the buffer.
761 */
762static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
763{
764 struct ring_buffer_event *event;
765 unsigned long head;
766
767 for (head = 0; head < rb_head_size(cpu_buffer);
768 head += rb_event_length(event)) {
769
770 event = __rb_page_index(cpu_buffer->head_page, head);
771 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
772 return;
773 /* Only count data entries */
774 if (event->type != RINGBUF_TYPE_DATA)
775 continue;
776 cpu_buffer->overrun++;
777 cpu_buffer->entries--;
778 }
779}
780
781static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
782 struct buffer_page **bpage)
783{
784 struct list_head *p = (*bpage)->list.next;
785
786 if (p == &cpu_buffer->pages)
787 p = p->next;
788
789 *bpage = list_entry(p, struct buffer_page, list);
790}
791
792static inline unsigned
793rb_event_index(struct ring_buffer_event *event)
794{
795 unsigned long addr = (unsigned long)event;
796
797 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
798}
799
800static inline int
801rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
802 struct ring_buffer_event *event)
803{
804 unsigned long addr = (unsigned long)event;
805 unsigned long index;
806
807 index = rb_event_index(event);
808 addr &= PAGE_MASK;
809
810 return cpu_buffer->commit_page->page == (void *)addr &&
811 rb_commit_index(cpu_buffer) == index;
812}
813
814static inline void
815rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
816 struct ring_buffer_event *event)
817{
818 unsigned long addr = (unsigned long)event;
819 unsigned long index;
820
821 index = rb_event_index(event);
822 addr &= PAGE_MASK;
823
824 while (cpu_buffer->commit_page->page != (void *)addr) {
825 if (RB_WARN_ON(cpu_buffer,
826 cpu_buffer->commit_page == cpu_buffer->tail_page))
827 return;
828 cpu_buffer->commit_page->page->commit =
829 cpu_buffer->commit_page->write;
830 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
831 cpu_buffer->write_stamp =
832 cpu_buffer->commit_page->page->time_stamp;
833 }
834
835 /* Now set the commit to the event's index */
836 local_set(&cpu_buffer->commit_page->page->commit, index);
837}
838
839static inline void
840rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
841{
842 /*
843 * We only race with interrupts and NMIs on this CPU.
844 * If we own the commit event, then we can commit
845 * all others that interrupted us, since the interruptions
846 * are in stack format (they finish before they come
847 * back to us). This allows us to do a simple loop to
848 * assign the commit to the tail.
849 */
850 again:
851 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
852 cpu_buffer->commit_page->page->commit =
853 cpu_buffer->commit_page->write;
854 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
855 cpu_buffer->write_stamp =
856 cpu_buffer->commit_page->page->time_stamp;
857 /* add barrier to keep gcc from optimizing too much */
858 barrier();
859 }
860 while (rb_commit_index(cpu_buffer) !=
861 rb_page_write(cpu_buffer->commit_page)) {
862 cpu_buffer->commit_page->page->commit =
863 cpu_buffer->commit_page->write;
864 barrier();
865 }
866
867 /* again, keep gcc from optimizing */
868 barrier();
869
870 /*
871 * If an interrupt came in just after the first while loop
872 * and pushed the tail page forward, we will be left with
873 * a dangling commit that will never go forward.
874 */
875 if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
876 goto again;
877}
878
879static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
880{
881 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
882 cpu_buffer->reader_page->read = 0;
883}
884
885static inline void rb_inc_iter(struct ring_buffer_iter *iter)
886{
887 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
888
889 /*
890 * The iterator could be on the reader page (it starts there).
891 * But the head could have moved, since the reader was
892 * found. Check for this case and assign the iterator
893 * to the head page instead of next.
894 */
895 if (iter->head_page == cpu_buffer->reader_page)
896 iter->head_page = cpu_buffer->head_page;
897 else
898 rb_inc_page(cpu_buffer, &iter->head_page);
899
900 iter->read_stamp = iter->head_page->page->time_stamp;
901 iter->head = 0;
902}
903
904/**
905 * ring_buffer_update_event - update event type and data
906 * @event: the even to update
907 * @type: the type of event
908 * @length: the size of the event field in the ring buffer
909 *
910 * Update the type and data fields of the event. The length
911 * is the actual size that is written to the ring buffer,
912 * and with this, we can determine what to place into the
913 * data field.
914 */
915static inline void
916rb_update_event(struct ring_buffer_event *event,
917 unsigned type, unsigned length)
918{
919 event->type = type;
920
921 switch (type) {
922
923 case RINGBUF_TYPE_PADDING:
924 break;
925
926 case RINGBUF_TYPE_TIME_EXTEND:
927 event->len =
928 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
929 >> RB_ALIGNMENT_SHIFT;
930 break;
931
932 case RINGBUF_TYPE_TIME_STAMP:
933 event->len =
934 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
935 >> RB_ALIGNMENT_SHIFT;
936 break;
937
938 case RINGBUF_TYPE_DATA:
939 length -= RB_EVNT_HDR_SIZE;
940 if (length > RB_MAX_SMALL_DATA) {
941 event->len = 0;
942 event->array[0] = length;
943 } else
944 event->len =
945 (length + (RB_ALIGNMENT-1))
946 >> RB_ALIGNMENT_SHIFT;
947 break;
948 default:
949 BUG();
950 }
951}
952
953static inline unsigned rb_calculate_event_length(unsigned length)
954{
955 struct ring_buffer_event event; /* Used only for sizeof array */
956
957 /* zero length can cause confusions */
958 if (!length)
959 length = 1;
960
961 if (length > RB_MAX_SMALL_DATA)
962 length += sizeof(event.array[0]);
963
964 length += RB_EVNT_HDR_SIZE;
965 length = ALIGN(length, RB_ALIGNMENT);
966
967 return length;
968}
969
970static struct ring_buffer_event *
971__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
972 unsigned type, unsigned long length, u64 *ts)
973{
974 struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
975 unsigned long tail, write;
976 struct ring_buffer *buffer = cpu_buffer->buffer;
977 struct ring_buffer_event *event;
978 unsigned long flags;
979
980 commit_page = cpu_buffer->commit_page;
981 /* we just need to protect against interrupts */
982 barrier();
983 tail_page = cpu_buffer->tail_page;
984 write = local_add_return(length, &tail_page->write);
985 tail = write - length;
986
987 /* See if we shot pass the end of this buffer page */
988 if (write > BUF_PAGE_SIZE) {
989 struct buffer_page *next_page = tail_page;
990
991 local_irq_save(flags);
992 __raw_spin_lock(&cpu_buffer->lock);
993
994 rb_inc_page(cpu_buffer, &next_page);
995
996 head_page = cpu_buffer->head_page;
997 reader_page = cpu_buffer->reader_page;
998
999 /* we grabbed the lock before incrementing */
1000 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
1001 goto out_unlock;
1002
1003 /*
1004 * If for some reason, we had an interrupt storm that made
1005 * it all the way around the buffer, bail, and warn
1006 * about it.
1007 */
1008 if (unlikely(next_page == commit_page)) {
1009 WARN_ON_ONCE(1);
1010 goto out_unlock;
1011 }
1012
1013 if (next_page == head_page) {
1014 if (!(buffer->flags & RB_FL_OVERWRITE)) {
1015 /* reset write */
1016 if (tail <= BUF_PAGE_SIZE)
1017 local_set(&tail_page->write, tail);
1018 goto out_unlock;
1019 }
1020
1021 /* tail_page has not moved yet? */
1022 if (tail_page == cpu_buffer->tail_page) {
1023 /* count overflows */
1024 rb_update_overflow(cpu_buffer);
1025
1026 rb_inc_page(cpu_buffer, &head_page);
1027 cpu_buffer->head_page = head_page;
1028 cpu_buffer->head_page->read = 0;
1029 }
1030 }
1031
1032 /*
1033 * If the tail page is still the same as what we think
1034 * it is, then it is up to us to update the tail
1035 * pointer.
1036 */
1037 if (tail_page == cpu_buffer->tail_page) {
1038 local_set(&next_page->write, 0);
1039 local_set(&next_page->page->commit, 0);
1040 cpu_buffer->tail_page = next_page;
1041
1042 /* reread the time stamp */
1043 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1044 cpu_buffer->tail_page->page->time_stamp = *ts;
1045 }
1046
1047 /*
1048 * The actual tail page has moved forward.
1049 */
1050 if (tail < BUF_PAGE_SIZE) {
1051 /* Mark the rest of the page with padding */
1052 event = __rb_page_index(tail_page, tail);
1053 event->type = RINGBUF_TYPE_PADDING;
1054 }
1055
1056 if (tail <= BUF_PAGE_SIZE)
1057 /* Set the write back to the previous setting */
1058 local_set(&tail_page->write, tail);
1059
1060 /*
1061 * If this was a commit entry that failed,
1062 * increment that too
1063 */
1064 if (tail_page == cpu_buffer->commit_page &&
1065 tail == rb_commit_index(cpu_buffer)) {
1066 rb_set_commit_to_write(cpu_buffer);
1067 }
1068
1069 __raw_spin_unlock(&cpu_buffer->lock);
1070 local_irq_restore(flags);
1071
1072 /* fail and let the caller try again */
1073 return ERR_PTR(-EAGAIN);
1074 }
1075
1076 /* We reserved something on the buffer */
1077
1078 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1079 return NULL;
1080
1081 event = __rb_page_index(tail_page, tail);
1082 rb_update_event(event, type, length);
1083
1084 /*
1085 * If this is a commit and the tail is zero, then update
1086 * this page's time stamp.
1087 */
1088 if (!tail && rb_is_commit(cpu_buffer, event))
1089 cpu_buffer->commit_page->page->time_stamp = *ts;
1090
1091 return event;
1092
1093 out_unlock:
1094 __raw_spin_unlock(&cpu_buffer->lock);
1095 local_irq_restore(flags);
1096 return NULL;
1097}
1098
1099static int
1100rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1101 u64 *ts, u64 *delta)
1102{
1103 struct ring_buffer_event *event;
1104 static int once;
1105 int ret;
1106
1107 if (unlikely(*delta > (1ULL << 59) && !once++)) {
1108 printk(KERN_WARNING "Delta way too big! %llu"
1109 " ts=%llu write stamp = %llu\n",
1110 (unsigned long long)*delta,
1111 (unsigned long long)*ts,
1112 (unsigned long long)cpu_buffer->write_stamp);
1113 WARN_ON(1);
1114 }
1115
1116 /*
1117 * The delta is too big, we to add a
1118 * new timestamp.
1119 */
1120 event = __rb_reserve_next(cpu_buffer,
1121 RINGBUF_TYPE_TIME_EXTEND,
1122 RB_LEN_TIME_EXTEND,
1123 ts);
1124 if (!event)
1125 return -EBUSY;
1126
1127 if (PTR_ERR(event) == -EAGAIN)
1128 return -EAGAIN;
1129
1130 /* Only a commited time event can update the write stamp */
1131 if (rb_is_commit(cpu_buffer, event)) {
1132 /*
1133 * If this is the first on the page, then we need to
1134 * update the page itself, and just put in a zero.
1135 */
1136 if (rb_event_index(event)) {
1137 event->time_delta = *delta & TS_MASK;
1138 event->array[0] = *delta >> TS_SHIFT;
1139 } else {
1140 cpu_buffer->commit_page->page->time_stamp = *ts;
1141 event->time_delta = 0;
1142 event->array[0] = 0;
1143 }
1144 cpu_buffer->write_stamp = *ts;
1145 /* let the caller know this was the commit */
1146 ret = 1;
1147 } else {
1148 /* Darn, this is just wasted space */
1149 event->time_delta = 0;
1150 event->array[0] = 0;
1151 ret = 0;
1152 }
1153
1154 *delta = 0;
1155
1156 return ret;
1157}
1158
1159static struct ring_buffer_event *
1160rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1161 unsigned type, unsigned long length)
1162{
1163 struct ring_buffer_event *event;
1164 u64 ts, delta;
1165 int commit = 0;
1166 int nr_loops = 0;
1167
1168 again:
1169 /*
1170 * We allow for interrupts to reenter here and do a trace.
1171 * If one does, it will cause this original code to loop
1172 * back here. Even with heavy interrupts happening, this
1173 * should only happen a few times in a row. If this happens
1174 * 1000 times in a row, there must be either an interrupt
1175 * storm or we have something buggy.
1176 * Bail!
1177 */
1178 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1179 return NULL;
1180
1181 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1182
1183 /*
1184 * Only the first commit can update the timestamp.
1185 * Yes there is a race here. If an interrupt comes in
1186 * just after the conditional and it traces too, then it
1187 * will also check the deltas. More than one timestamp may
1188 * also be made. But only the entry that did the actual
1189 * commit will be something other than zero.
1190 */
1191 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1192 rb_page_write(cpu_buffer->tail_page) ==
1193 rb_commit_index(cpu_buffer)) {
1194
1195 delta = ts - cpu_buffer->write_stamp;
1196
1197 /* make sure this delta is calculated here */
1198 barrier();
1199
1200 /* Did the write stamp get updated already? */
1201 if (unlikely(ts < cpu_buffer->write_stamp))
1202 delta = 0;
1203
1204 if (test_time_stamp(delta)) {
1205
1206 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1207
1208 if (commit == -EBUSY)
1209 return NULL;
1210
1211 if (commit == -EAGAIN)
1212 goto again;
1213
1214 RB_WARN_ON(cpu_buffer, commit < 0);
1215 }
1216 } else
1217 /* Non commits have zero deltas */
1218 delta = 0;
1219
1220 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1221 if (PTR_ERR(event) == -EAGAIN)
1222 goto again;
1223
1224 if (!event) {
1225 if (unlikely(commit))
1226 /*
1227 * Ouch! We needed a timestamp and it was commited. But
1228 * we didn't get our event reserved.
1229 */
1230 rb_set_commit_to_write(cpu_buffer);
1231 return NULL;
1232 }
1233
1234 /*
1235 * If the timestamp was commited, make the commit our entry
1236 * now so that we will update it when needed.
1237 */
1238 if (commit)
1239 rb_set_commit_event(cpu_buffer, event);
1240 else if (!rb_is_commit(cpu_buffer, event))
1241 delta = 0;
1242
1243 event->time_delta = delta;
1244
1245 return event;
1246}
1247
1248static DEFINE_PER_CPU(int, rb_need_resched);
1249
1250/**
1251 * ring_buffer_lock_reserve - reserve a part of the buffer
1252 * @buffer: the ring buffer to reserve from
1253 * @length: the length of the data to reserve (excluding event header)
1254 * @flags: a pointer to save the interrupt flags
1255 *
1256 * Returns a reseverd event on the ring buffer to copy directly to.
1257 * The user of this interface will need to get the body to write into
1258 * and can use the ring_buffer_event_data() interface.
1259 *
1260 * The length is the length of the data needed, not the event length
1261 * which also includes the event header.
1262 *
1263 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1264 * If NULL is returned, then nothing has been allocated or locked.
1265 */
1266struct ring_buffer_event *
1267ring_buffer_lock_reserve(struct ring_buffer *buffer,
1268 unsigned long length,
1269 unsigned long *flags)
1270{
1271 struct ring_buffer_per_cpu *cpu_buffer;
1272 struct ring_buffer_event *event;
1273 int cpu, resched;
1274
1275 if (ring_buffer_flags != RB_BUFFERS_ON)
1276 return NULL;
1277
1278 if (atomic_read(&buffer->record_disabled))
1279 return NULL;
1280
1281 /* If we are tracing schedule, we don't want to recurse */
1282 resched = ftrace_preempt_disable();
1283
1284 cpu = raw_smp_processor_id();
1285
1286 if (!cpu_isset(cpu, buffer->cpumask))
1287 goto out;
1288
1289 cpu_buffer = buffer->buffers[cpu];
1290
1291 if (atomic_read(&cpu_buffer->record_disabled))
1292 goto out;
1293
1294 length = rb_calculate_event_length(length);
1295 if (length > BUF_PAGE_SIZE)
1296 goto out;
1297
1298 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1299 if (!event)
1300 goto out;
1301
1302 /*
1303 * Need to store resched state on this cpu.
1304 * Only the first needs to.
1305 */
1306
1307 if (preempt_count() == 1)
1308 per_cpu(rb_need_resched, cpu) = resched;
1309
1310 return event;
1311
1312 out:
1313 ftrace_preempt_enable(resched);
1314 return NULL;
1315}
1316EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
1317
1318static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1319 struct ring_buffer_event *event)
1320{
1321 cpu_buffer->entries++;
1322
1323 /* Only process further if we own the commit */
1324 if (!rb_is_commit(cpu_buffer, event))
1325 return;
1326
1327 cpu_buffer->write_stamp += event->time_delta;
1328
1329 rb_set_commit_to_write(cpu_buffer);
1330}
1331
1332/**
1333 * ring_buffer_unlock_commit - commit a reserved
1334 * @buffer: The buffer to commit to
1335 * @event: The event pointer to commit.
1336 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1337 *
1338 * This commits the data to the ring buffer, and releases any locks held.
1339 *
1340 * Must be paired with ring_buffer_lock_reserve.
1341 */
1342int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1343 struct ring_buffer_event *event,
1344 unsigned long flags)
1345{
1346 struct ring_buffer_per_cpu *cpu_buffer;
1347 int cpu = raw_smp_processor_id();
1348
1349 cpu_buffer = buffer->buffers[cpu];
1350
1351 rb_commit(cpu_buffer, event);
1352
1353 /*
1354 * Only the last preempt count needs to restore preemption.
1355 */
1356 if (preempt_count() == 1)
1357 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1358 else
1359 preempt_enable_no_resched_notrace();
1360
1361 return 0;
1362}
1363EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
1364
1365/**
1366 * ring_buffer_write - write data to the buffer without reserving
1367 * @buffer: The ring buffer to write to.
1368 * @length: The length of the data being written (excluding the event header)
1369 * @data: The data to write to the buffer.
1370 *
1371 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1372 * one function. If you already have the data to write to the buffer, it
1373 * may be easier to simply call this function.
1374 *
1375 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1376 * and not the length of the event which would hold the header.
1377 */
1378int ring_buffer_write(struct ring_buffer *buffer,
1379 unsigned long length,
1380 void *data)
1381{
1382 struct ring_buffer_per_cpu *cpu_buffer;
1383 struct ring_buffer_event *event;
1384 unsigned long event_length;
1385 void *body;
1386 int ret = -EBUSY;
1387 int cpu, resched;
1388
1389 if (ring_buffer_flags != RB_BUFFERS_ON)
1390 return -EBUSY;
1391
1392 if (atomic_read(&buffer->record_disabled))
1393 return -EBUSY;
1394
1395 resched = ftrace_preempt_disable();
1396
1397 cpu = raw_smp_processor_id();
1398
1399 if (!cpu_isset(cpu, buffer->cpumask))
1400 goto out;
1401
1402 cpu_buffer = buffer->buffers[cpu];
1403
1404 if (atomic_read(&cpu_buffer->record_disabled))
1405 goto out;
1406
1407 event_length = rb_calculate_event_length(length);
1408 event = rb_reserve_next_event(cpu_buffer,
1409 RINGBUF_TYPE_DATA, event_length);
1410 if (!event)
1411 goto out;
1412
1413 body = rb_event_data(event);
1414
1415 memcpy(body, data, length);
1416
1417 rb_commit(cpu_buffer, event);
1418
1419 ret = 0;
1420 out:
1421 ftrace_preempt_enable(resched);
1422
1423 return ret;
1424}
1425EXPORT_SYMBOL_GPL(ring_buffer_write);
1426
1427static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1428{
1429 struct buffer_page *reader = cpu_buffer->reader_page;
1430 struct buffer_page *head = cpu_buffer->head_page;
1431 struct buffer_page *commit = cpu_buffer->commit_page;
1432
1433 return reader->read == rb_page_commit(reader) &&
1434 (commit == reader ||
1435 (commit == head &&
1436 head->read == rb_page_commit(commit)));
1437}
1438
1439/**
1440 * ring_buffer_record_disable - stop all writes into the buffer
1441 * @buffer: The ring buffer to stop writes to.
1442 *
1443 * This prevents all writes to the buffer. Any attempt to write
1444 * to the buffer after this will fail and return NULL.
1445 *
1446 * The caller should call synchronize_sched() after this.
1447 */
1448void ring_buffer_record_disable(struct ring_buffer *buffer)
1449{
1450 atomic_inc(&buffer->record_disabled);
1451}
1452EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
1453
1454/**
1455 * ring_buffer_record_enable - enable writes to the buffer
1456 * @buffer: The ring buffer to enable writes
1457 *
1458 * Note, multiple disables will need the same number of enables
1459 * to truely enable the writing (much like preempt_disable).
1460 */
1461void ring_buffer_record_enable(struct ring_buffer *buffer)
1462{
1463 atomic_dec(&buffer->record_disabled);
1464}
1465EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
1466
1467/**
1468 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1469 * @buffer: The ring buffer to stop writes to.
1470 * @cpu: The CPU buffer to stop
1471 *
1472 * This prevents all writes to the buffer. Any attempt to write
1473 * to the buffer after this will fail and return NULL.
1474 *
1475 * The caller should call synchronize_sched() after this.
1476 */
1477void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1478{
1479 struct ring_buffer_per_cpu *cpu_buffer;
1480
1481 if (!cpu_isset(cpu, buffer->cpumask))
1482 return;
1483
1484 cpu_buffer = buffer->buffers[cpu];
1485 atomic_inc(&cpu_buffer->record_disabled);
1486}
1487EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
1488
1489/**
1490 * ring_buffer_record_enable_cpu - enable writes to the buffer
1491 * @buffer: The ring buffer to enable writes
1492 * @cpu: The CPU to enable.
1493 *
1494 * Note, multiple disables will need the same number of enables
1495 * to truely enable the writing (much like preempt_disable).
1496 */
1497void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1498{
1499 struct ring_buffer_per_cpu *cpu_buffer;
1500
1501 if (!cpu_isset(cpu, buffer->cpumask))
1502 return;
1503
1504 cpu_buffer = buffer->buffers[cpu];
1505 atomic_dec(&cpu_buffer->record_disabled);
1506}
1507EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
1508
1509/**
1510 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1511 * @buffer: The ring buffer
1512 * @cpu: The per CPU buffer to get the entries from.
1513 */
1514unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1515{
1516 struct ring_buffer_per_cpu *cpu_buffer;
1517
1518 if (!cpu_isset(cpu, buffer->cpumask))
1519 return 0;
1520
1521 cpu_buffer = buffer->buffers[cpu];
1522 return cpu_buffer->entries;
1523}
1524EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
1525
1526/**
1527 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1528 * @buffer: The ring buffer
1529 * @cpu: The per CPU buffer to get the number of overruns from
1530 */
1531unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1532{
1533 struct ring_buffer_per_cpu *cpu_buffer;
1534
1535 if (!cpu_isset(cpu, buffer->cpumask))
1536 return 0;
1537
1538 cpu_buffer = buffer->buffers[cpu];
1539 return cpu_buffer->overrun;
1540}
1541EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
1542
1543/**
1544 * ring_buffer_entries - get the number of entries in a buffer
1545 * @buffer: The ring buffer
1546 *
1547 * Returns the total number of entries in the ring buffer
1548 * (all CPU entries)
1549 */
1550unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1551{
1552 struct ring_buffer_per_cpu *cpu_buffer;
1553 unsigned long entries = 0;
1554 int cpu;
1555
1556 /* if you care about this being correct, lock the buffer */
1557 for_each_buffer_cpu(buffer, cpu) {
1558 cpu_buffer = buffer->buffers[cpu];
1559 entries += cpu_buffer->entries;
1560 }
1561
1562 return entries;
1563}
1564EXPORT_SYMBOL_GPL(ring_buffer_entries);
1565
1566/**
1567 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1568 * @buffer: The ring buffer
1569 *
1570 * Returns the total number of overruns in the ring buffer
1571 * (all CPU entries)
1572 */
1573unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1574{
1575 struct ring_buffer_per_cpu *cpu_buffer;
1576 unsigned long overruns = 0;
1577 int cpu;
1578
1579 /* if you care about this being correct, lock the buffer */
1580 for_each_buffer_cpu(buffer, cpu) {
1581 cpu_buffer = buffer->buffers[cpu];
1582 overruns += cpu_buffer->overrun;
1583 }
1584
1585 return overruns;
1586}
1587EXPORT_SYMBOL_GPL(ring_buffer_overruns);
1588
1589static void rb_iter_reset(struct ring_buffer_iter *iter)
1590{
1591 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1592
1593 /* Iterator usage is expected to have record disabled */
1594 if (list_empty(&cpu_buffer->reader_page->list)) {
1595 iter->head_page = cpu_buffer->head_page;
1596 iter->head = cpu_buffer->head_page->read;
1597 } else {
1598 iter->head_page = cpu_buffer->reader_page;
1599 iter->head = cpu_buffer->reader_page->read;
1600 }
1601 if (iter->head)
1602 iter->read_stamp = cpu_buffer->read_stamp;
1603 else
1604 iter->read_stamp = iter->head_page->page->time_stamp;
1605}
1606
1607/**
1608 * ring_buffer_iter_reset - reset an iterator
1609 * @iter: The iterator to reset
1610 *
1611 * Resets the iterator, so that it will start from the beginning
1612 * again.
1613 */
1614void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1615{
1616 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1617 unsigned long flags;
1618
1619 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1620 rb_iter_reset(iter);
1621 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1622}
1623EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
1624
1625/**
1626 * ring_buffer_iter_empty - check if an iterator has no more to read
1627 * @iter: The iterator to check
1628 */
1629int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1630{
1631 struct ring_buffer_per_cpu *cpu_buffer;
1632
1633 cpu_buffer = iter->cpu_buffer;
1634
1635 return iter->head_page == cpu_buffer->commit_page &&
1636 iter->head == rb_commit_index(cpu_buffer);
1637}
1638EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
1639
1640static void
1641rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1642 struct ring_buffer_event *event)
1643{
1644 u64 delta;
1645
1646 switch (event->type) {
1647 case RINGBUF_TYPE_PADDING:
1648 return;
1649
1650 case RINGBUF_TYPE_TIME_EXTEND:
1651 delta = event->array[0];
1652 delta <<= TS_SHIFT;
1653 delta += event->time_delta;
1654 cpu_buffer->read_stamp += delta;
1655 return;
1656
1657 case RINGBUF_TYPE_TIME_STAMP:
1658 /* FIXME: not implemented */
1659 return;
1660
1661 case RINGBUF_TYPE_DATA:
1662 cpu_buffer->read_stamp += event->time_delta;
1663 return;
1664
1665 default:
1666 BUG();
1667 }
1668 return;
1669}
1670
1671static void
1672rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1673 struct ring_buffer_event *event)
1674{
1675 u64 delta;
1676
1677 switch (event->type) {
1678 case RINGBUF_TYPE_PADDING:
1679 return;
1680
1681 case RINGBUF_TYPE_TIME_EXTEND:
1682 delta = event->array[0];
1683 delta <<= TS_SHIFT;
1684 delta += event->time_delta;
1685 iter->read_stamp += delta;
1686 return;
1687
1688 case RINGBUF_TYPE_TIME_STAMP:
1689 /* FIXME: not implemented */
1690 return;
1691
1692 case RINGBUF_TYPE_DATA:
1693 iter->read_stamp += event->time_delta;
1694 return;
1695
1696 default:
1697 BUG();
1698 }
1699 return;
1700}
1701
1702static struct buffer_page *
1703rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1704{
1705 struct buffer_page *reader = NULL;
1706 unsigned long flags;
1707 int nr_loops = 0;
1708
1709 local_irq_save(flags);
1710 __raw_spin_lock(&cpu_buffer->lock);
1711
1712 again:
1713 /*
1714 * This should normally only loop twice. But because the
1715 * start of the reader inserts an empty page, it causes
1716 * a case where we will loop three times. There should be no
1717 * reason to loop four times (that I know of).
1718 */
1719 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1720 reader = NULL;
1721 goto out;
1722 }
1723
1724 reader = cpu_buffer->reader_page;
1725
1726 /* If there's more to read, return this page */
1727 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1728 goto out;
1729
1730 /* Never should we have an index greater than the size */
1731 if (RB_WARN_ON(cpu_buffer,
1732 cpu_buffer->reader_page->read > rb_page_size(reader)))
1733 goto out;
1734
1735 /* check if we caught up to the tail */
1736 reader = NULL;
1737 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1738 goto out;
1739
1740 /*
1741 * Splice the empty reader page into the list around the head.
1742 * Reset the reader page to size zero.
1743 */
1744
1745 reader = cpu_buffer->head_page;
1746 cpu_buffer->reader_page->list.next = reader->list.next;
1747 cpu_buffer->reader_page->list.prev = reader->list.prev;
1748
1749 local_set(&cpu_buffer->reader_page->write, 0);
1750 local_set(&cpu_buffer->reader_page->page->commit, 0);
1751
1752 /* Make the reader page now replace the head */
1753 reader->list.prev->next = &cpu_buffer->reader_page->list;
1754 reader->list.next->prev = &cpu_buffer->reader_page->list;
1755
1756 /*
1757 * If the tail is on the reader, then we must set the head
1758 * to the inserted page, otherwise we set it one before.
1759 */
1760 cpu_buffer->head_page = cpu_buffer->reader_page;
1761
1762 if (cpu_buffer->commit_page != reader)
1763 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1764
1765 /* Finally update the reader page to the new head */
1766 cpu_buffer->reader_page = reader;
1767 rb_reset_reader_page(cpu_buffer);
1768
1769 goto again;
1770
1771 out:
1772 __raw_spin_unlock(&cpu_buffer->lock);
1773 local_irq_restore(flags);
1774
1775 return reader;
1776}
1777
1778static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1779{
1780 struct ring_buffer_event *event;
1781 struct buffer_page *reader;
1782 unsigned length;
1783
1784 reader = rb_get_reader_page(cpu_buffer);
1785
1786 /* This function should not be called when buffer is empty */
1787 if (RB_WARN_ON(cpu_buffer, !reader))
1788 return;
1789
1790 event = rb_reader_event(cpu_buffer);
1791
1792 if (event->type == RINGBUF_TYPE_DATA)
1793 cpu_buffer->entries--;
1794
1795 rb_update_read_stamp(cpu_buffer, event);
1796
1797 length = rb_event_length(event);
1798 cpu_buffer->reader_page->read += length;
1799}
1800
1801static void rb_advance_iter(struct ring_buffer_iter *iter)
1802{
1803 struct ring_buffer *buffer;
1804 struct ring_buffer_per_cpu *cpu_buffer;
1805 struct ring_buffer_event *event;
1806 unsigned length;
1807
1808 cpu_buffer = iter->cpu_buffer;
1809 buffer = cpu_buffer->buffer;
1810
1811 /*
1812 * Check if we are at the end of the buffer.
1813 */
1814 if (iter->head >= rb_page_size(iter->head_page)) {
1815 if (RB_WARN_ON(buffer,
1816 iter->head_page == cpu_buffer->commit_page))
1817 return;
1818 rb_inc_iter(iter);
1819 return;
1820 }
1821
1822 event = rb_iter_head_event(iter);
1823
1824 length = rb_event_length(event);
1825
1826 /*
1827 * This should not be called to advance the header if we are
1828 * at the tail of the buffer.
1829 */
1830 if (RB_WARN_ON(cpu_buffer,
1831 (iter->head_page == cpu_buffer->commit_page) &&
1832 (iter->head + length > rb_commit_index(cpu_buffer))))
1833 return;
1834
1835 rb_update_iter_read_stamp(iter, event);
1836
1837 iter->head += length;
1838
1839 /* check for end of page padding */
1840 if ((iter->head >= rb_page_size(iter->head_page)) &&
1841 (iter->head_page != cpu_buffer->commit_page))
1842 rb_advance_iter(iter);
1843}
1844
1845static struct ring_buffer_event *
1846rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1847{
1848 struct ring_buffer_per_cpu *cpu_buffer;
1849 struct ring_buffer_event *event;
1850 struct buffer_page *reader;
1851 int nr_loops = 0;
1852
1853 if (!cpu_isset(cpu, buffer->cpumask))
1854 return NULL;
1855
1856 cpu_buffer = buffer->buffers[cpu];
1857
1858 again:
1859 /*
1860 * We repeat when a timestamp is encountered. It is possible
1861 * to get multiple timestamps from an interrupt entering just
1862 * as one timestamp is about to be written. The max times
1863 * that this can happen is the number of nested interrupts we
1864 * can have. Nesting 10 deep of interrupts is clearly
1865 * an anomaly.
1866 */
1867 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1868 return NULL;
1869
1870 reader = rb_get_reader_page(cpu_buffer);
1871 if (!reader)
1872 return NULL;
1873
1874 event = rb_reader_event(cpu_buffer);
1875
1876 switch (event->type) {
1877 case RINGBUF_TYPE_PADDING:
1878 RB_WARN_ON(cpu_buffer, 1);
1879 rb_advance_reader(cpu_buffer);
1880 return NULL;
1881
1882 case RINGBUF_TYPE_TIME_EXTEND:
1883 /* Internal data, OK to advance */
1884 rb_advance_reader(cpu_buffer);
1885 goto again;
1886
1887 case RINGBUF_TYPE_TIME_STAMP:
1888 /* FIXME: not implemented */
1889 rb_advance_reader(cpu_buffer);
1890 goto again;
1891
1892 case RINGBUF_TYPE_DATA:
1893 if (ts) {
1894 *ts = cpu_buffer->read_stamp + event->time_delta;
1895 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1896 }
1897 return event;
1898
1899 default:
1900 BUG();
1901 }
1902
1903 return NULL;
1904}
1905EXPORT_SYMBOL_GPL(ring_buffer_peek);
1906
1907static struct ring_buffer_event *
1908rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1909{
1910 struct ring_buffer *buffer;
1911 struct ring_buffer_per_cpu *cpu_buffer;
1912 struct ring_buffer_event *event;
1913 int nr_loops = 0;
1914
1915 if (ring_buffer_iter_empty(iter))
1916 return NULL;
1917
1918 cpu_buffer = iter->cpu_buffer;
1919 buffer = cpu_buffer->buffer;
1920
1921 again:
1922 /*
1923 * We repeat when a timestamp is encountered. It is possible
1924 * to get multiple timestamps from an interrupt entering just
1925 * as one timestamp is about to be written. The max times
1926 * that this can happen is the number of nested interrupts we
1927 * can have. Nesting 10 deep of interrupts is clearly
1928 * an anomaly.
1929 */
1930 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1931 return NULL;
1932
1933 if (rb_per_cpu_empty(cpu_buffer))
1934 return NULL;
1935
1936 event = rb_iter_head_event(iter);
1937
1938 switch (event->type) {
1939 case RINGBUF_TYPE_PADDING:
1940 rb_inc_iter(iter);
1941 goto again;
1942
1943 case RINGBUF_TYPE_TIME_EXTEND:
1944 /* Internal data, OK to advance */
1945 rb_advance_iter(iter);
1946 goto again;
1947
1948 case RINGBUF_TYPE_TIME_STAMP:
1949 /* FIXME: not implemented */
1950 rb_advance_iter(iter);
1951 goto again;
1952
1953 case RINGBUF_TYPE_DATA:
1954 if (ts) {
1955 *ts = iter->read_stamp + event->time_delta;
1956 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1957 }
1958 return event;
1959
1960 default:
1961 BUG();
1962 }
1963
1964 return NULL;
1965}
1966EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
1967
1968/**
1969 * ring_buffer_peek - peek at the next event to be read
1970 * @buffer: The ring buffer to read
1971 * @cpu: The cpu to peak at
1972 * @ts: The timestamp counter of this event.
1973 *
1974 * This will return the event that will be read next, but does
1975 * not consume the data.
1976 */
1977struct ring_buffer_event *
1978ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1979{
1980 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1981 struct ring_buffer_event *event;
1982 unsigned long flags;
1983
1984 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1985 event = rb_buffer_peek(buffer, cpu, ts);
1986 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1987
1988 return event;
1989}
1990
1991/**
1992 * ring_buffer_iter_peek - peek at the next event to be read
1993 * @iter: The ring buffer iterator
1994 * @ts: The timestamp counter of this event.
1995 *
1996 * This will return the event that will be read next, but does
1997 * not increment the iterator.
1998 */
1999struct ring_buffer_event *
2000ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
2001{
2002 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2003 struct ring_buffer_event *event;
2004 unsigned long flags;
2005
2006 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2007 event = rb_iter_peek(iter, ts);
2008 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2009
2010 return event;
2011}
2012
2013/**
2014 * ring_buffer_consume - return an event and consume it
2015 * @buffer: The ring buffer to get the next event from
2016 *
2017 * Returns the next event in the ring buffer, and that event is consumed.
2018 * Meaning, that sequential reads will keep returning a different event,
2019 * and eventually empty the ring buffer if the producer is slower.
2020 */
2021struct ring_buffer_event *
2022ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2023{
2024 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2025 struct ring_buffer_event *event;
2026 unsigned long flags;
2027
2028 if (!cpu_isset(cpu, buffer->cpumask))
2029 return NULL;
2030
2031 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2032
2033 event = rb_buffer_peek(buffer, cpu, ts);
2034 if (!event)
2035 goto out;
2036
2037 rb_advance_reader(cpu_buffer);
2038
2039 out:
2040 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2041
2042 return event;
2043}
2044EXPORT_SYMBOL_GPL(ring_buffer_consume);
2045
2046/**
2047 * ring_buffer_read_start - start a non consuming read of the buffer
2048 * @buffer: The ring buffer to read from
2049 * @cpu: The cpu buffer to iterate over
2050 *
2051 * This starts up an iteration through the buffer. It also disables
2052 * the recording to the buffer until the reading is finished.
2053 * This prevents the reading from being corrupted. This is not
2054 * a consuming read, so a producer is not expected.
2055 *
2056 * Must be paired with ring_buffer_finish.
2057 */
2058struct ring_buffer_iter *
2059ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
2060{
2061 struct ring_buffer_per_cpu *cpu_buffer;
2062 struct ring_buffer_iter *iter;
2063 unsigned long flags;
2064
2065 if (!cpu_isset(cpu, buffer->cpumask))
2066 return NULL;
2067
2068 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
2069 if (!iter)
2070 return NULL;
2071
2072 cpu_buffer = buffer->buffers[cpu];
2073
2074 iter->cpu_buffer = cpu_buffer;
2075
2076 atomic_inc(&cpu_buffer->record_disabled);
2077 synchronize_sched();
2078
2079 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2080 __raw_spin_lock(&cpu_buffer->lock);
2081 rb_iter_reset(iter);
2082 __raw_spin_unlock(&cpu_buffer->lock);
2083 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2084
2085 return iter;
2086}
2087EXPORT_SYMBOL_GPL(ring_buffer_read_start);
2088
2089/**
2090 * ring_buffer_finish - finish reading the iterator of the buffer
2091 * @iter: The iterator retrieved by ring_buffer_start
2092 *
2093 * This re-enables the recording to the buffer, and frees the
2094 * iterator.
2095 */
2096void
2097ring_buffer_read_finish(struct ring_buffer_iter *iter)
2098{
2099 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2100
2101 atomic_dec(&cpu_buffer->record_disabled);
2102 kfree(iter);
2103}
2104EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
2105
2106/**
2107 * ring_buffer_read - read the next item in the ring buffer by the iterator
2108 * @iter: The ring buffer iterator
2109 * @ts: The time stamp of the event read.
2110 *
2111 * This reads the next event in the ring buffer and increments the iterator.
2112 */
2113struct ring_buffer_event *
2114ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2115{
2116 struct ring_buffer_event *event;
2117 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2118 unsigned long flags;
2119
2120 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2121 event = rb_iter_peek(iter, ts);
2122 if (!event)
2123 goto out;
2124
2125 rb_advance_iter(iter);
2126 out:
2127 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2128
2129 return event;
2130}
2131EXPORT_SYMBOL_GPL(ring_buffer_read);
2132
2133/**
2134 * ring_buffer_size - return the size of the ring buffer (in bytes)
2135 * @buffer: The ring buffer.
2136 */
2137unsigned long ring_buffer_size(struct ring_buffer *buffer)
2138{
2139 return BUF_PAGE_SIZE * buffer->pages;
2140}
2141EXPORT_SYMBOL_GPL(ring_buffer_size);
2142
2143static void
2144rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2145{
2146 cpu_buffer->head_page
2147 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
2148 local_set(&cpu_buffer->head_page->write, 0);
2149 local_set(&cpu_buffer->head_page->page->commit, 0);
2150
2151 cpu_buffer->head_page->read = 0;
2152
2153 cpu_buffer->tail_page = cpu_buffer->head_page;
2154 cpu_buffer->commit_page = cpu_buffer->head_page;
2155
2156 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
2157 local_set(&cpu_buffer->reader_page->write, 0);
2158 local_set(&cpu_buffer->reader_page->page->commit, 0);
2159 cpu_buffer->reader_page->read = 0;
2160
2161 cpu_buffer->overrun = 0;
2162 cpu_buffer->entries = 0;
2163}
2164
2165/**
2166 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
2167 * @buffer: The ring buffer to reset a per cpu buffer of
2168 * @cpu: The CPU buffer to be reset
2169 */
2170void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2171{
2172 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2173 unsigned long flags;
2174
2175 if (!cpu_isset(cpu, buffer->cpumask))
2176 return;
2177
2178 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2179
2180 __raw_spin_lock(&cpu_buffer->lock);
2181
2182 rb_reset_cpu(cpu_buffer);
2183
2184 __raw_spin_unlock(&cpu_buffer->lock);
2185
2186 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2187}
2188EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
2189
2190/**
2191 * ring_buffer_reset - reset a ring buffer
2192 * @buffer: The ring buffer to reset all cpu buffers
2193 */
2194void ring_buffer_reset(struct ring_buffer *buffer)
2195{
2196 int cpu;
2197
2198 for_each_buffer_cpu(buffer, cpu)
2199 ring_buffer_reset_cpu(buffer, cpu);
2200}
2201EXPORT_SYMBOL_GPL(ring_buffer_reset);
2202
2203/**
2204 * rind_buffer_empty - is the ring buffer empty?
2205 * @buffer: The ring buffer to test
2206 */
2207int ring_buffer_empty(struct ring_buffer *buffer)
2208{
2209 struct ring_buffer_per_cpu *cpu_buffer;
2210 int cpu;
2211
2212 /* yes this is racy, but if you don't like the race, lock the buffer */
2213 for_each_buffer_cpu(buffer, cpu) {
2214 cpu_buffer = buffer->buffers[cpu];
2215 if (!rb_per_cpu_empty(cpu_buffer))
2216 return 0;
2217 }
2218 return 1;
2219}
2220EXPORT_SYMBOL_GPL(ring_buffer_empty);
2221
2222/**
2223 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
2224 * @buffer: The ring buffer
2225 * @cpu: The CPU buffer to test
2226 */
2227int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2228{
2229 struct ring_buffer_per_cpu *cpu_buffer;
2230
2231 if (!cpu_isset(cpu, buffer->cpumask))
2232 return 1;
2233
2234 cpu_buffer = buffer->buffers[cpu];
2235 return rb_per_cpu_empty(cpu_buffer);
2236}
2237EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
2238
2239/**
2240 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2241 * @buffer_a: One buffer to swap with
2242 * @buffer_b: The other buffer to swap with
2243 *
2244 * This function is useful for tracers that want to take a "snapshot"
2245 * of a CPU buffer and has another back up buffer lying around.
2246 * it is expected that the tracer handles the cpu buffer not being
2247 * used at the moment.
2248 */
2249int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2250 struct ring_buffer *buffer_b, int cpu)
2251{
2252 struct ring_buffer_per_cpu *cpu_buffer_a;
2253 struct ring_buffer_per_cpu *cpu_buffer_b;
2254
2255 if (!cpu_isset(cpu, buffer_a->cpumask) ||
2256 !cpu_isset(cpu, buffer_b->cpumask))
2257 return -EINVAL;
2258
2259 /* At least make sure the two buffers are somewhat the same */
2260 if (buffer_a->pages != buffer_b->pages)
2261 return -EINVAL;
2262
2263 cpu_buffer_a = buffer_a->buffers[cpu];
2264 cpu_buffer_b = buffer_b->buffers[cpu];
2265
2266 /*
2267 * We can't do a synchronize_sched here because this
2268 * function can be called in atomic context.
2269 * Normally this will be called from the same CPU as cpu.
2270 * If not it's up to the caller to protect this.
2271 */
2272 atomic_inc(&cpu_buffer_a->record_disabled);
2273 atomic_inc(&cpu_buffer_b->record_disabled);
2274
2275 buffer_a->buffers[cpu] = cpu_buffer_b;
2276 buffer_b->buffers[cpu] = cpu_buffer_a;
2277
2278 cpu_buffer_b->buffer = buffer_a;
2279 cpu_buffer_a->buffer = buffer_b;
2280
2281 atomic_dec(&cpu_buffer_a->record_disabled);
2282 atomic_dec(&cpu_buffer_b->record_disabled);
2283
2284 return 0;
2285}
2286EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
2287
2288static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2289 struct buffer_data_page *bpage)
2290{
2291 struct ring_buffer_event *event;
2292 unsigned long head;
2293
2294 __raw_spin_lock(&cpu_buffer->lock);
2295 for (head = 0; head < local_read(&bpage->commit);
2296 head += rb_event_length(event)) {
2297
2298 event = __rb_data_page_index(bpage, head);
2299 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2300 return;
2301 /* Only count data entries */
2302 if (event->type != RINGBUF_TYPE_DATA)
2303 continue;
2304 cpu_buffer->entries--;
2305 }
2306 __raw_spin_unlock(&cpu_buffer->lock);
2307}
2308
2309/**
2310 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2311 * @buffer: the buffer to allocate for.
2312 *
2313 * This function is used in conjunction with ring_buffer_read_page.
2314 * When reading a full page from the ring buffer, these functions
2315 * can be used to speed up the process. The calling function should
2316 * allocate a few pages first with this function. Then when it
2317 * needs to get pages from the ring buffer, it passes the result
2318 * of this function into ring_buffer_read_page, which will swap
2319 * the page that was allocated, with the read page of the buffer.
2320 *
2321 * Returns:
2322 * The page allocated, or NULL on error.
2323 */
2324void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2325{
2326 unsigned long addr;
2327 struct buffer_data_page *bpage;
2328
2329 addr = __get_free_page(GFP_KERNEL);
2330 if (!addr)
2331 return NULL;
2332
2333 bpage = (void *)addr;
2334
2335 return bpage;
2336}
2337
2338/**
2339 * ring_buffer_free_read_page - free an allocated read page
2340 * @buffer: the buffer the page was allocate for
2341 * @data: the page to free
2342 *
2343 * Free a page allocated from ring_buffer_alloc_read_page.
2344 */
2345void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2346{
2347 free_page((unsigned long)data);
2348}
2349
2350/**
2351 * ring_buffer_read_page - extract a page from the ring buffer
2352 * @buffer: buffer to extract from
2353 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2354 * @cpu: the cpu of the buffer to extract
2355 * @full: should the extraction only happen when the page is full.
2356 *
2357 * This function will pull out a page from the ring buffer and consume it.
2358 * @data_page must be the address of the variable that was returned
2359 * from ring_buffer_alloc_read_page. This is because the page might be used
2360 * to swap with a page in the ring buffer.
2361 *
2362 * for example:
2363 * rpage = ring_buffer_alloc_page(buffer);
2364 * if (!rpage)
2365 * return error;
2366 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2367 * if (ret)
2368 * process_page(rpage);
2369 *
2370 * When @full is set, the function will not return true unless
2371 * the writer is off the reader page.
2372 *
2373 * Note: it is up to the calling functions to handle sleeps and wakeups.
2374 * The ring buffer can be used anywhere in the kernel and can not
2375 * blindly call wake_up. The layer that uses the ring buffer must be
2376 * responsible for that.
2377 *
2378 * Returns:
2379 * 1 if data has been transferred
2380 * 0 if no data has been transferred.
2381 */
2382int ring_buffer_read_page(struct ring_buffer *buffer,
2383 void **data_page, int cpu, int full)
2384{
2385 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2386 struct ring_buffer_event *event;
2387 struct buffer_data_page *bpage;
2388 unsigned long flags;
2389 int ret = 0;
2390
2391 if (!data_page)
2392 return 0;
2393
2394 bpage = *data_page;
2395 if (!bpage)
2396 return 0;
2397
2398 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2399
2400 /*
2401 * rb_buffer_peek will get the next ring buffer if
2402 * the current reader page is empty.
2403 */
2404 event = rb_buffer_peek(buffer, cpu, NULL);
2405 if (!event)
2406 goto out;
2407
2408 /* check for data */
2409 if (!local_read(&cpu_buffer->reader_page->page->commit))
2410 goto out;
2411 /*
2412 * If the writer is already off of the read page, then simply
2413 * switch the read page with the given page. Otherwise
2414 * we need to copy the data from the reader to the writer.
2415 */
2416 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2417 unsigned int read = cpu_buffer->reader_page->read;
2418
2419 if (full)
2420 goto out;
2421 /* The writer is still on the reader page, we must copy */
2422 bpage = cpu_buffer->reader_page->page;
2423 memcpy(bpage->data,
2424 cpu_buffer->reader_page->page->data + read,
2425 local_read(&bpage->commit) - read);
2426
2427 /* consume what was read */
2428 cpu_buffer->reader_page += read;
2429
2430 } else {
2431 /* swap the pages */
2432 rb_init_page(bpage);
2433 bpage = cpu_buffer->reader_page->page;
2434 cpu_buffer->reader_page->page = *data_page;
2435 cpu_buffer->reader_page->read = 0;
2436 *data_page = bpage;
2437 }
2438 ret = 1;
2439
2440 /* update the entry counter */
2441 rb_remove_entries(cpu_buffer, bpage);
2442 out:
2443 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2444
2445 return ret;
2446}
2447
2448static ssize_t
2449rb_simple_read(struct file *filp, char __user *ubuf,
2450 size_t cnt, loff_t *ppos)
2451{
2452 long *p = filp->private_data;
2453 char buf[64];
2454 int r;
2455
2456 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2457 r = sprintf(buf, "permanently disabled\n");
2458 else
2459 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2460
2461 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2462}
2463
2464static ssize_t
2465rb_simple_write(struct file *filp, const char __user *ubuf,
2466 size_t cnt, loff_t *ppos)
2467{
2468 long *p = filp->private_data;
2469 char buf[64];
2470 long val;
2471 int ret;
2472
2473 if (cnt >= sizeof(buf))
2474 return -EINVAL;
2475
2476 if (copy_from_user(&buf, ubuf, cnt))
2477 return -EFAULT;
2478
2479 buf[cnt] = 0;
2480
2481 ret = strict_strtoul(buf, 10, &val);
2482 if (ret < 0)
2483 return ret;
2484
2485 if (val)
2486 set_bit(RB_BUFFERS_ON_BIT, p);
2487 else
2488 clear_bit(RB_BUFFERS_ON_BIT, p);
2489
2490 (*ppos)++;
2491
2492 return cnt;
2493}
2494
2495static struct file_operations rb_simple_fops = {
2496 .open = tracing_open_generic,
2497 .read = rb_simple_read,
2498 .write = rb_simple_write,
2499};
2500
2501
2502static __init int rb_init_debugfs(void)
2503{
2504 struct dentry *d_tracer;
2505 struct dentry *entry;
2506
2507 d_tracer = tracing_init_dentry();
2508
2509 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2510 &ring_buffer_flags, &rb_simple_fops);
2511 if (!entry)
2512 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2513
2514 return 0;
2515}
2516
2517fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..4185d5221633 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
14#include <linux/utsrelease.h> 14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/notifier.h>
17#include <linux/debugfs.h> 18#include <linux/debugfs.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/kdebug.h>
25#include <linux/ctype.h> 27#include <linux/ctype.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/poll.h> 29#include <linux/poll.h>
@@ -31,24 +33,97 @@
31#include <linux/writeback.h> 33#include <linux/writeback.h>
32 34
33#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
34 38
35#include "trace.h" 39#include "trace.h"
36 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
39 45
40static unsigned long __read_mostly tracing_nr_buffers; 46/*
47 * We need to change this state when a selftest is running.
48 * A selftest will lurk into the ring-buffer to count the
49 * entries inserted during the selftest although some concurrent
50 * insertions into the ring-buffer such as ftrace_printk could occurred
51 * at the same time, giving false positive or negative results.
52 */
53static bool __read_mostly tracing_selftest_running;
54
55/* For tracers that don't implement custom flags */
56static struct tracer_opt dummy_tracer_opt[] = {
57 { }
58};
59
60static struct tracer_flags dummy_tracer_flags = {
61 .val = 0,
62 .opts = dummy_tracer_opt
63};
64
65static int dummy_set_flag(u32 old_flags, u32 bit, int set)
66{
67 return 0;
68}
69
70/*
71 * Kill all tracing for good (never come back).
72 * It is initialized to 1 but will turn to zero if the initialization
73 * of the tracer is successful. But that is the only place that sets
74 * this back to zero.
75 */
76int tracing_disabled = 1;
77
78static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
79
80static inline void ftrace_disable_cpu(void)
81{
82 preempt_disable();
83 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
84}
85
86static inline void ftrace_enable_cpu(void)
87{
88 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
89 preempt_enable();
90}
91
41static cpumask_t __read_mostly tracing_buffer_mask; 92static cpumask_t __read_mostly tracing_buffer_mask;
42 93
43#define for_each_tracing_cpu(cpu) \ 94#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask) 95 for_each_cpu_mask(cpu, tracing_buffer_mask)
45 96
46static int trace_alloc_page(void); 97/*
47static int trace_free_page(void); 98 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
99 *
100 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
101 * is set, then ftrace_dump is called. This will output the contents
102 * of the ftrace buffers to the console. This is very useful for
103 * capturing traces that lead to crashes and outputing it to a
104 * serial console.
105 *
106 * It is default off, but you can enable it with either specifying
107 * "ftrace_dump_on_oops" in the kernel command line, or setting
108 * /proc/sys/kernel/ftrace_dump_on_oops to true.
109 */
110int ftrace_dump_on_oops;
48 111
49static int tracing_disabled = 1; 112static int tracing_set_tracer(char *buf);
50 113
51static unsigned long tracing_pages_allocated; 114static int __init set_ftrace(char *str)
115{
116 tracing_set_tracer(str);
117 return 1;
118}
119__setup("ftrace", set_ftrace);
120
121static int __init set_ftrace_dump_on_oops(char *str)
122{
123 ftrace_dump_on_oops = 1;
124 return 1;
125}
126__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
52 127
53long 128long
54ns2usecs(cycle_t nsec) 129ns2usecs(cycle_t nsec)
@@ -60,7 +135,9 @@ ns2usecs(cycle_t nsec)
60 135
61cycle_t ftrace_now(int cpu) 136cycle_t ftrace_now(int cpu)
62{ 137{
63 return cpu_clock(cpu); 138 u64 ts = ring_buffer_time_stamp(cpu);
139 ring_buffer_normalize_time_stamp(cpu, &ts);
140 return ts;
64} 141}
65 142
66/* 143/*
@@ -96,15 +173,35 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
96/* tracer_enabled is used to toggle activation of a tracer */ 173/* tracer_enabled is used to toggle activation of a tracer */
97static int tracer_enabled = 1; 174static int tracer_enabled = 1;
98 175
176/**
177 * tracing_is_enabled - return tracer_enabled status
178 *
179 * This function is used by other tracers to know the status
180 * of the tracer_enabled flag. Tracers may use this function
181 * to know if it should enable their features when starting
182 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
183 */
184int tracing_is_enabled(void)
185{
186 return tracer_enabled;
187}
188
99/* function tracing enabled */ 189/* function tracing enabled */
100int ftrace_function_enabled; 190int ftrace_function_enabled;
101 191
102/* 192/*
103 * trace_nr_entries is the number of entries that is allocated 193 * trace_buf_size is the size in bytes that is allocated
104 * for a buffer. Note, the number of entries is always rounded 194 * for a buffer. Note, the number of bytes is always rounded
105 * to ENTRIES_PER_PAGE. 195 * to page size.
196 *
197 * This number is purposely set to a low number of 16384.
198 * If the dump on oops happens, it will be much appreciated
199 * to not have to wait for all that output. Anyway this can be
200 * boot time and run time configurable.
106 */ 201 */
107static unsigned long trace_nr_entries = 65536UL; 202#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
203
204static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
108 205
109/* trace_types holds a link list of available tracers. */ 206/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly; 207static struct tracer *trace_types __read_mostly;
@@ -130,26 +227,9 @@ static DEFINE_MUTEX(trace_types_lock);
130/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 227/* trace_wait is a waitqueue for tasks blocked on trace_poll */
131static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 228static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
132 229
133/* trace_flags holds iter_ctrl options */ 230/* trace_flags holds trace_options default values */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 231unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
135 232 TRACE_ITER_ANNOTATE;
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153 233
154/** 234/**
155 * trace_wake_up - wake up tasks waiting for trace input 235 * trace_wake_up - wake up tasks waiting for trace input
@@ -167,51 +247,27 @@ void trace_wake_up(void)
167 wake_up(&trace_wait); 247 wake_up(&trace_wait);
168} 248}
169 249
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 250static int __init set_buf_size(char *str)
171
172static int __init set_nr_entries(char *str)
173{ 251{
174 unsigned long nr_entries; 252 unsigned long buf_size;
175 int ret; 253 int ret;
176 254
177 if (!str) 255 if (!str)
178 return 0; 256 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries); 257 ret = strict_strtoul(str, 0, &buf_size);
180 /* nr_entries can not be zero */ 258 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0) 259 if (ret < 0 || buf_size == 0)
182 return 0; 260 return 0;
183 trace_nr_entries = nr_entries; 261 trace_buf_size = buf_size;
184 return 1; 262 return 1;
185} 263}
186__setup("trace_entries=", set_nr_entries); 264__setup("trace_buf_size=", set_buf_size);
187 265
188unsigned long nsecs_to_usecs(unsigned long nsecs) 266unsigned long nsecs_to_usecs(unsigned long nsecs)
189{ 267{
190 return nsecs / 1000; 268 return nsecs / 1000;
191} 269}
192 270
193/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols.
211 */
212#define TRACE_ITER_SYM_MASK \
213 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
214
215/* These must match the bit postions in trace_iterator_flags */ 271/* These must match the bit postions in trace_iterator_flags */
216static const char *trace_options[] = { 272static const char *trace_options[] = {
217 "print-parent", 273 "print-parent",
@@ -224,6 +280,13 @@ static const char *trace_options[] = {
224 "block", 280 "block",
225 "stacktrace", 281 "stacktrace",
226 "sched-tree", 282 "sched-tree",
283 "ftrace_printk",
284 "ftrace_preempt",
285 "branch",
286 "annotate",
287 "userstacktrace",
288 "sym-userobj",
289 "printk-msg-only",
227 NULL 290 NULL
228}; 291};
229 292
@@ -257,7 +320,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
257 320
258 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 321 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
259 data->pid = tsk->pid; 322 data->pid = tsk->pid;
260 data->uid = tsk->uid; 323 data->uid = task_uid(tsk);
261 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 324 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
262 data->policy = tsk->policy; 325 data->policy = tsk->policy;
263 data->rt_priority = tsk->rt_priority; 326 data->rt_priority = tsk->rt_priority;
@@ -266,54 +329,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
266 tracing_record_cmdline(current); 329 tracing_record_cmdline(current);
267} 330}
268 331
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/** 332/**
318 * trace_seq_printf - sequence printing of trace information 333 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor 334 * @s: trace sequence descriptor
@@ -395,34 +410,51 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
395 return len; 410 return len;
396} 411}
397 412
398#define HEX_CHARS 17 413#define MAX_MEMHEX_BYTES 8
399static const char hex2asc[] = "0123456789abcdef"; 414#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
400 415
401static int 416static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 417trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{ 418{
404 unsigned char hex[HEX_CHARS]; 419 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem; 420 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j; 421 int i, j;
408 422
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN 423#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) { 424 for (i = 0, j = 0; i < len; i++) {
413#else 425#else
414 for (i = len-1, j = 0; i >= 0; i--) { 426 for (i = len-1, j = 0; i >= 0; i--) {
415#endif 427#endif
416 byte = data[i]; 428 hex[j++] = hex_asc_hi(data[i]);
417 429 hex[j++] = hex_asc_lo(data[i]);
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 } 430 }
421 hex[j++] = ' '; 431 hex[j++] = ' ';
422 432
423 return trace_seq_putmem(s, hex, j); 433 return trace_seq_putmem(s, hex, j);
424} 434}
425 435
436static int
437trace_seq_path(struct trace_seq *s, struct path *path)
438{
439 unsigned char *p;
440
441 if (s->len >= (PAGE_SIZE - 1))
442 return 0;
443 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
444 if (!IS_ERR(p)) {
445 p = mangle_path(s->buffer + s->len, p, "\n");
446 if (p) {
447 s->len = p - s->buffer;
448 return 1;
449 }
450 } else {
451 s->buffer[s->len++] = '?';
452 return 1;
453 }
454
455 return 0;
456}
457
426static void 458static void
427trace_seq_reset(struct trace_seq *s) 459trace_seq_reset(struct trace_seq *s)
428{ 460{
@@ -460,34 +492,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
460 trace_seq_reset(s); 492 trace_seq_reset(s);
461} 493}
462 494
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/** 495/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 496 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer 497 * @tr: tracer
@@ -500,17 +504,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
500void 504void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 505update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{ 506{
503 struct trace_array_cpu *data; 507 struct ring_buffer *buf = tr->buffer;
504 int i;
505 508
506 WARN_ON_ONCE(!irqs_disabled()); 509 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock); 510 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */ 511
509 for_each_tracing_cpu(i) { 512 tr->buffer = max_tr.buffer;
510 data = tr->data[i]; 513 max_tr.buffer = buf;
511 flip_trace(max_tr.data[i], data); 514
512 tracing_reset(data); 515 ftrace_disable_cpu();
513 } 516 ring_buffer_reset(tr->buffer);
517 ftrace_enable_cpu();
514 518
515 __update_max_tr(tr, tsk, cpu); 519 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock); 520 __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +531,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
527void 531void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 532update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{ 533{
530 struct trace_array_cpu *data = tr->data[cpu]; 534 int ret;
531 int i;
532 535
533 WARN_ON_ONCE(!irqs_disabled()); 536 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock); 537 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537 538
538 flip_trace(max_tr.data[cpu], data); 539 ftrace_disable_cpu();
539 tracing_reset(data); 540
541 ring_buffer_reset(max_tr.buffer);
542 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
543
544 ftrace_enable_cpu();
545
546 WARN_ON_ONCE(ret);
540 547
541 __update_max_tr(tr, tsk, cpu); 548 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock); 549 __raw_spin_unlock(&ftrace_max_lock);
@@ -559,7 +566,17 @@ int register_tracer(struct tracer *type)
559 return -1; 566 return -1;
560 } 567 }
561 568
569 /*
570 * When this gets called we hold the BKL which means that
571 * preemption is disabled. Various trace selftests however
572 * need to disable and enable preemption for successful tests.
573 * So we drop the BKL here and grab it after the tests again.
574 */
575 unlock_kernel();
562 mutex_lock(&trace_types_lock); 576 mutex_lock(&trace_types_lock);
577
578 tracing_selftest_running = true;
579
563 for (t = trace_types; t; t = t->next) { 580 for (t = trace_types; t; t = t->next) {
564 if (strcmp(type->name, t->name) == 0) { 581 if (strcmp(type->name, t->name) == 0) {
565 /* already found */ 582 /* already found */
@@ -570,13 +587,20 @@ int register_tracer(struct tracer *type)
570 } 587 }
571 } 588 }
572 589
590 if (!type->set_flag)
591 type->set_flag = &dummy_set_flag;
592 if (!type->flags)
593 type->flags = &dummy_tracer_flags;
594 else
595 if (!type->flags->opts)
596 type->flags->opts = dummy_tracer_opt;
597
573#ifdef CONFIG_FTRACE_STARTUP_TEST 598#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) { 599 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace; 600 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace; 601 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl;
579 int i; 602 int i;
603
580 /* 604 /*
581 * Run a selftest on this tracer. 605 * Run a selftest on this tracer.
582 * Here we reset the trace buffer, and set the current 606 * Here we reset the trace buffer, and set the current
@@ -584,31 +608,23 @@ int register_tracer(struct tracer *type)
584 * internal tracing to verify that everything is in order. 608 * internal tracing to verify that everything is in order.
585 * If we fail, we do not register this tracer. 609 * If we fail, we do not register this tracer.
586 */ 610 */
587 for_each_tracing_cpu(i) { 611 for_each_tracing_cpu(i)
588 data = tr->data[i]; 612 tracing_reset(tr, i);
589 if (!head_page(data)) 613
590 continue;
591 tracing_reset(data);
592 }
593 current_trace = type; 614 current_trace = type;
594 tr->ctrl = 0;
595 /* the test is responsible for initializing and enabling */ 615 /* the test is responsible for initializing and enabling */
596 pr_info("Testing tracer %s: ", type->name); 616 pr_info("Testing tracer %s: ", type->name);
597 ret = type->selftest(type, tr); 617 ret = type->selftest(type, tr);
598 /* the test is responsible for resetting too */ 618 /* the test is responsible for resetting too */
599 current_trace = saved_tracer; 619 current_trace = saved_tracer;
600 tr->ctrl = saved_ctrl;
601 if (ret) { 620 if (ret) {
602 printk(KERN_CONT "FAILED!\n"); 621 printk(KERN_CONT "FAILED!\n");
603 goto out; 622 goto out;
604 } 623 }
605 /* Only reset on passing, to avoid touching corrupted buffers */ 624 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) { 625 for_each_tracing_cpu(i)
607 data = tr->data[i]; 626 tracing_reset(tr, i);
608 if (!head_page(data)) 627
609 continue;
610 tracing_reset(data);
611 }
612 printk(KERN_CONT "PASSED\n"); 628 printk(KERN_CONT "PASSED\n");
613 } 629 }
614#endif 630#endif
@@ -620,7 +636,9 @@ int register_tracer(struct tracer *type)
620 max_tracer_type_len = len; 636 max_tracer_type_len = len;
621 637
622 out: 638 out:
639 tracing_selftest_running = false;
623 mutex_unlock(&trace_types_lock); 640 mutex_unlock(&trace_types_lock);
641 lock_kernel();
624 642
625 return ret; 643 return ret;
626} 644}
@@ -653,13 +671,21 @@ void unregister_tracer(struct tracer *type)
653 mutex_unlock(&trace_types_lock); 671 mutex_unlock(&trace_types_lock);
654} 672}
655 673
656void tracing_reset(struct trace_array_cpu *data) 674void tracing_reset(struct trace_array *tr, int cpu)
657{ 675{
658 data->trace_idx = 0; 676 ftrace_disable_cpu();
659 data->overrun = 0; 677 ring_buffer_reset_cpu(tr->buffer, cpu);
660 data->trace_head = data->trace_tail = head_page(data); 678 ftrace_enable_cpu();
661 data->trace_head_idx = 0; 679}
662 data->trace_tail_idx = 0; 680
681void tracing_reset_online_cpus(struct trace_array *tr)
682{
683 int cpu;
684
685 tr->time_start = ftrace_now(tr->cpu);
686
687 for_each_online_cpu(cpu)
688 tracing_reset(tr, cpu);
663} 689}
664 690
665#define SAVED_CMDLINES 128 691#define SAVED_CMDLINES 128
@@ -679,6 +705,91 @@ static void trace_init_cmdlines(void)
679 cmdline_idx = 0; 705 cmdline_idx = 0;
680} 706}
681 707
708static int trace_stop_count;
709static DEFINE_SPINLOCK(tracing_start_lock);
710
711/**
712 * ftrace_off_permanent - disable all ftrace code permanently
713 *
714 * This should only be called when a serious anomally has
715 * been detected. This will turn off the function tracing,
716 * ring buffers, and other tracing utilites. It takes no
717 * locks and can be called from any context.
718 */
719void ftrace_off_permanent(void)
720{
721 tracing_disabled = 1;
722 ftrace_stop();
723 tracing_off_permanent();
724}
725
726/**
727 * tracing_start - quick start of the tracer
728 *
729 * If tracing is enabled but was stopped by tracing_stop,
730 * this will start the tracer back up.
731 */
732void tracing_start(void)
733{
734 struct ring_buffer *buffer;
735 unsigned long flags;
736
737 if (tracing_disabled)
738 return;
739
740 spin_lock_irqsave(&tracing_start_lock, flags);
741 if (--trace_stop_count)
742 goto out;
743
744 if (trace_stop_count < 0) {
745 /* Someone screwed up their debugging */
746 WARN_ON_ONCE(1);
747 trace_stop_count = 0;
748 goto out;
749 }
750
751
752 buffer = global_trace.buffer;
753 if (buffer)
754 ring_buffer_record_enable(buffer);
755
756 buffer = max_tr.buffer;
757 if (buffer)
758 ring_buffer_record_enable(buffer);
759
760 ftrace_start();
761 out:
762 spin_unlock_irqrestore(&tracing_start_lock, flags);
763}
764
765/**
766 * tracing_stop - quick stop of the tracer
767 *
768 * Light weight way to stop tracing. Use in conjunction with
769 * tracing_start.
770 */
771void tracing_stop(void)
772{
773 struct ring_buffer *buffer;
774 unsigned long flags;
775
776 ftrace_stop();
777 spin_lock_irqsave(&tracing_start_lock, flags);
778 if (trace_stop_count++)
779 goto out;
780
781 buffer = global_trace.buffer;
782 if (buffer)
783 ring_buffer_record_disable(buffer);
784
785 buffer = max_tr.buffer;
786 if (buffer)
787 ring_buffer_record_disable(buffer);
788
789 out:
790 spin_unlock_irqrestore(&tracing_start_lock, flags);
791}
792
682void trace_stop_cmdline_recording(void); 793void trace_stop_cmdline_recording(void);
683 794
684static void trace_save_cmdline(struct task_struct *tsk) 795static void trace_save_cmdline(struct task_struct *tsk)
@@ -716,7 +827,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
716 spin_unlock(&trace_cmdline_lock); 827 spin_unlock(&trace_cmdline_lock);
717} 828}
718 829
719static char *trace_find_cmdline(int pid) 830char *trace_find_cmdline(int pid)
720{ 831{
721 char *cmdline = "<...>"; 832 char *cmdline = "<...>";
722 unsigned map; 833 unsigned map;
@@ -745,82 +856,21 @@ void tracing_record_cmdline(struct task_struct *tsk)
745 trace_save_cmdline(tsk); 856 trace_save_cmdline(tsk);
746} 857}
747 858
748static inline struct list_head * 859void
749trace_next_list(struct trace_array_cpu *data, struct list_head *next) 860tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
750{ 861 int pc)
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{ 862{
815 struct task_struct *tsk = current; 863 struct task_struct *tsk = current;
816 unsigned long pc;
817 864
818 pc = preempt_count(); 865 entry->preempt_count = pc & 0xff;
819 866 entry->pid = (tsk) ? tsk->pid : 0;
820 entry->preempt_count = pc & 0xff; 867 entry->tgid = (tsk) ? tsk->tgid : 0;
821 entry->pid = (tsk) ? tsk->pid : 0; 868 entry->flags =
822 entry->t = ftrace_now(raw_smp_processor_id()); 869#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 870 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
871#else
872 TRACE_FLAG_IRQS_NOSUPPORT |
873#endif
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 874 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 875 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 876 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +878,233 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
828 878
829void 879void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data, 880trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags) 881 unsigned long ip, unsigned long parent_ip, unsigned long flags,
882 int pc)
832{ 883{
833 struct trace_entry *entry; 884 struct ring_buffer_event *event;
885 struct ftrace_entry *entry;
834 unsigned long irq_flags; 886 unsigned long irq_flags;
835 887
836 raw_local_irq_save(irq_flags); 888 /* If we are reading the ring buffer, don't trace */
837 __raw_spin_lock(&data->lock); 889 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
838 entry = tracing_get_trace_entry(tr, data); 890 return;
839 tracing_generic_entry_update(entry, flags);
840 entry->type = TRACE_FN;
841 entry->fn.ip = ip;
842 entry->fn.parent_ip = parent_ip;
843 __raw_spin_unlock(&data->lock);
844 raw_local_irq_restore(irq_flags);
845}
846 891
847void 892 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
848ftrace(struct trace_array *tr, struct trace_array_cpu *data, 893 &irq_flags);
849 unsigned long ip, unsigned long parent_ip, unsigned long flags) 894 if (!event)
850{ 895 return;
851 if (likely(!atomic_read(&data->disabled))) 896 entry = ring_buffer_event_data(event);
852 trace_function(tr, data, ip, parent_ip, flags); 897 tracing_generic_entry_update(&entry->ent, flags, pc);
898 entry->ent.type = TRACE_FN;
899 entry->ip = ip;
900 entry->parent_ip = parent_ip;
901 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
902}
903
904#ifdef CONFIG_FUNCTION_GRAPH_TRACER
905static void __trace_graph_entry(struct trace_array *tr,
906 struct trace_array_cpu *data,
907 struct ftrace_graph_ent *trace,
908 unsigned long flags,
909 int pc)
910{
911 struct ring_buffer_event *event;
912 struct ftrace_graph_ent_entry *entry;
913 unsigned long irq_flags;
914
915 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
916 return;
917
918 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
919 &irq_flags);
920 if (!event)
921 return;
922 entry = ring_buffer_event_data(event);
923 tracing_generic_entry_update(&entry->ent, flags, pc);
924 entry->ent.type = TRACE_GRAPH_ENT;
925 entry->graph_ent = *trace;
926 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
853} 927}
854 928
855#ifdef CONFIG_MMIOTRACE 929static void __trace_graph_return(struct trace_array *tr,
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, 930 struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw) 931 struct ftrace_graph_ret *trace,
932 unsigned long flags,
933 int pc)
858{ 934{
859 struct trace_entry *entry; 935 struct ring_buffer_event *event;
936 struct ftrace_graph_ret_entry *entry;
860 unsigned long irq_flags; 937 unsigned long irq_flags;
861 938
862 raw_local_irq_save(irq_flags); 939 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
863 __raw_spin_lock(&data->lock); 940 return;
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869 941
870 __raw_spin_unlock(&data->lock); 942 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
871 raw_local_irq_restore(irq_flags); 943 &irq_flags);
944 if (!event)
945 return;
946 entry = ring_buffer_event_data(event);
947 tracing_generic_entry_update(&entry->ent, flags, pc);
948 entry->ent.type = TRACE_GRAPH_RET;
949 entry->ret = *trace;
950 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
951}
952#endif
872 953
873 trace_wake_up(); 954void
955ftrace(struct trace_array *tr, struct trace_array_cpu *data,
956 unsigned long ip, unsigned long parent_ip, unsigned long flags,
957 int pc)
958{
959 if (likely(!atomic_read(&data->disabled)))
960 trace_function(tr, data, ip, parent_ip, flags, pc);
874} 961}
875 962
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, 963static void ftrace_trace_stack(struct trace_array *tr,
877 struct mmiotrace_map *map) 964 struct trace_array_cpu *data,
965 unsigned long flags,
966 int skip, int pc)
878{ 967{
879 struct trace_entry *entry; 968#ifdef CONFIG_STACKTRACE
969 struct ring_buffer_event *event;
970 struct stack_entry *entry;
971 struct stack_trace trace;
880 unsigned long irq_flags; 972 unsigned long irq_flags;
881 973
882 raw_local_irq_save(irq_flags); 974 if (!(trace_flags & TRACE_ITER_STACKTRACE))
883 __raw_spin_lock(&data->lock); 975 return;
884 976
885 entry = tracing_get_trace_entry(tr, data); 977 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
886 tracing_generic_entry_update(entry, 0); 978 &irq_flags);
887 entry->type = TRACE_MMIO_MAP; 979 if (!event)
888 entry->mmiomap = *map; 980 return;
981 entry = ring_buffer_event_data(event);
982 tracing_generic_entry_update(&entry->ent, flags, pc);
983 entry->ent.type = TRACE_STACK;
889 984
890 __raw_spin_unlock(&data->lock); 985 memset(&entry->caller, 0, sizeof(entry->caller));
891 raw_local_irq_restore(irq_flags);
892 986
893 trace_wake_up(); 987 trace.nr_entries = 0;
894} 988 trace.max_entries = FTRACE_STACK_ENTRIES;
989 trace.skip = skip;
990 trace.entries = entry->caller;
991
992 save_stack_trace(&trace);
993 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
895#endif 994#endif
995}
896 996
897void __trace_stack(struct trace_array *tr, 997void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data, 998 struct trace_array_cpu *data,
899 unsigned long flags, 999 unsigned long flags,
900 int skip) 1000 int skip)
901{ 1001{
902 struct trace_entry *entry; 1002 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
1003}
1004
1005static void ftrace_trace_userstack(struct trace_array *tr,
1006 struct trace_array_cpu *data,
1007 unsigned long flags, int pc)
1008{
1009#ifdef CONFIG_STACKTRACE
1010 struct ring_buffer_event *event;
1011 struct userstack_entry *entry;
903 struct stack_trace trace; 1012 struct stack_trace trace;
1013 unsigned long irq_flags;
904 1014
905 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1015 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
906 return; 1016 return;
907 1017
908 entry = tracing_get_trace_entry(tr, data); 1018 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
909 tracing_generic_entry_update(entry, flags); 1019 &irq_flags);
910 entry->type = TRACE_STACK; 1020 if (!event)
1021 return;
1022 entry = ring_buffer_event_data(event);
1023 tracing_generic_entry_update(&entry->ent, flags, pc);
1024 entry->ent.type = TRACE_USER_STACK;
911 1025
912 memset(&entry->stack, 0, sizeof(entry->stack)); 1026 memset(&entry->caller, 0, sizeof(entry->caller));
913 1027
914 trace.nr_entries = 0; 1028 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES; 1029 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip; 1030 trace.skip = 0;
917 trace.entries = entry->stack.caller; 1031 trace.entries = entry->caller;
918 1032
919 save_stack_trace(&trace); 1033 save_stack_trace_user(&trace);
1034 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1035#endif
920} 1036}
921 1037
922void 1038void __trace_userstack(struct trace_array *tr,
923__trace_special(void *__tr, void *__data, 1039 struct trace_array_cpu *data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3) 1040 unsigned long flags)
925{ 1041{
1042 ftrace_trace_userstack(tr, data, flags, preempt_count());
1043}
1044
1045static void
1046ftrace_trace_special(void *__tr, void *__data,
1047 unsigned long arg1, unsigned long arg2, unsigned long arg3,
1048 int pc)
1049{
1050 struct ring_buffer_event *event;
926 struct trace_array_cpu *data = __data; 1051 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr; 1052 struct trace_array *tr = __tr;
928 struct trace_entry *entry; 1053 struct special_entry *entry;
929 unsigned long irq_flags; 1054 unsigned long irq_flags;
930 1055
931 raw_local_irq_save(irq_flags); 1056 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
932 __raw_spin_lock(&data->lock); 1057 &irq_flags);
933 entry = tracing_get_trace_entry(tr, data); 1058 if (!event)
934 tracing_generic_entry_update(entry, 0); 1059 return;
935 entry->type = TRACE_SPECIAL; 1060 entry = ring_buffer_event_data(event);
936 entry->special.arg1 = arg1; 1061 tracing_generic_entry_update(&entry->ent, 0, pc);
937 entry->special.arg2 = arg2; 1062 entry->ent.type = TRACE_SPECIAL;
938 entry->special.arg3 = arg3; 1063 entry->arg1 = arg1;
939 __trace_stack(tr, data, irq_flags, 4); 1064 entry->arg2 = arg2;
940 __raw_spin_unlock(&data->lock); 1065 entry->arg3 = arg3;
941 raw_local_irq_restore(irq_flags); 1066 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1067 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1068 ftrace_trace_userstack(tr, data, irq_flags, pc);
942 1069
943 trace_wake_up(); 1070 trace_wake_up();
944} 1071}
945 1072
946void 1073void
1074__trace_special(void *__tr, void *__data,
1075 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1076{
1077 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
1078}
1079
1080void
947tracing_sched_switch_trace(struct trace_array *tr, 1081tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data, 1082 struct trace_array_cpu *data,
949 struct task_struct *prev, 1083 struct task_struct *prev,
950 struct task_struct *next, 1084 struct task_struct *next,
951 unsigned long flags) 1085 unsigned long flags, int pc)
952{ 1086{
953 struct trace_entry *entry; 1087 struct ring_buffer_event *event;
1088 struct ctx_switch_entry *entry;
954 unsigned long irq_flags; 1089 unsigned long irq_flags;
955 1090
956 raw_local_irq_save(irq_flags); 1091 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957 __raw_spin_lock(&data->lock); 1092 &irq_flags);
958 entry = tracing_get_trace_entry(tr, data); 1093 if (!event)
959 tracing_generic_entry_update(entry, flags); 1094 return;
960 entry->type = TRACE_CTX; 1095 entry = ring_buffer_event_data(event);
961 entry->ctx.prev_pid = prev->pid; 1096 tracing_generic_entry_update(&entry->ent, flags, pc);
962 entry->ctx.prev_prio = prev->prio; 1097 entry->ent.type = TRACE_CTX;
963 entry->ctx.prev_state = prev->state; 1098 entry->prev_pid = prev->pid;
964 entry->ctx.next_pid = next->pid; 1099 entry->prev_prio = prev->prio;
965 entry->ctx.next_prio = next->prio; 1100 entry->prev_state = prev->state;
966 entry->ctx.next_state = next->state; 1101 entry->next_pid = next->pid;
967 __trace_stack(tr, data, flags, 5); 1102 entry->next_prio = next->prio;
968 __raw_spin_unlock(&data->lock); 1103 entry->next_state = next->state;
969 raw_local_irq_restore(irq_flags); 1104 entry->next_cpu = task_cpu(next);
1105 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1106 ftrace_trace_stack(tr, data, flags, 5, pc);
1107 ftrace_trace_userstack(tr, data, flags, pc);
970} 1108}
971 1109
972void 1110void
@@ -974,25 +1112,29 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data, 1112 struct trace_array_cpu *data,
975 struct task_struct *wakee, 1113 struct task_struct *wakee,
976 struct task_struct *curr, 1114 struct task_struct *curr,
977 unsigned long flags) 1115 unsigned long flags, int pc)
978{ 1116{
979 struct trace_entry *entry; 1117 struct ring_buffer_event *event;
1118 struct ctx_switch_entry *entry;
980 unsigned long irq_flags; 1119 unsigned long irq_flags;
981 1120
982 raw_local_irq_save(irq_flags); 1121 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
983 __raw_spin_lock(&data->lock); 1122 &irq_flags);
984 entry = tracing_get_trace_entry(tr, data); 1123 if (!event)
985 tracing_generic_entry_update(entry, flags); 1124 return;
986 entry->type = TRACE_WAKE; 1125 entry = ring_buffer_event_data(event);
987 entry->ctx.prev_pid = curr->pid; 1126 tracing_generic_entry_update(&entry->ent, flags, pc);
988 entry->ctx.prev_prio = curr->prio; 1127 entry->ent.type = TRACE_WAKE;
989 entry->ctx.prev_state = curr->state; 1128 entry->prev_pid = curr->pid;
990 entry->ctx.next_pid = wakee->pid; 1129 entry->prev_prio = curr->prio;
991 entry->ctx.next_prio = wakee->prio; 1130 entry->prev_state = curr->state;
992 entry->ctx.next_state = wakee->state; 1131 entry->next_pid = wakee->pid;
993 __trace_stack(tr, data, flags, 6); 1132 entry->next_prio = wakee->prio;
994 __raw_spin_unlock(&data->lock); 1133 entry->next_state = wakee->state;
995 raw_local_irq_restore(irq_flags); 1134 entry->next_cpu = task_cpu(wakee);
1135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1136 ftrace_trace_stack(tr, data, flags, 6, pc);
1137 ftrace_trace_userstack(tr, data, flags, pc);
996 1138
997 trace_wake_up(); 1139 trace_wake_up();
998} 1140}
@@ -1003,25 +1145,52 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1003 struct trace_array *tr = &global_trace; 1145 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data; 1146 struct trace_array_cpu *data;
1005 unsigned long flags; 1147 unsigned long flags;
1006 long disabled;
1007 int cpu; 1148 int cpu;
1149 int pc;
1008 1150
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) 1151 if (tracing_disabled)
1010 return; 1152 return;
1011 1153
1154 pc = preempt_count();
1012 local_irq_save(flags); 1155 local_irq_save(flags);
1013 cpu = raw_smp_processor_id(); 1156 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu]; 1157 data = tr->data[cpu];
1158
1159 if (likely(atomic_inc_return(&data->disabled) == 1))
1160 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1161
1162 atomic_dec(&data->disabled);
1163 local_irq_restore(flags);
1164}
1165
1166#ifdef CONFIG_FUNCTION_TRACER
1167static void
1168function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
1169{
1170 struct trace_array *tr = &global_trace;
1171 struct trace_array_cpu *data;
1172 unsigned long flags;
1173 long disabled;
1174 int cpu, resched;
1175 int pc;
1176
1177 if (unlikely(!ftrace_function_enabled))
1178 return;
1179
1180 pc = preempt_count();
1181 resched = ftrace_preempt_disable();
1182 local_save_flags(flags);
1183 cpu = raw_smp_processor_id();
1184 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled); 1185 disabled = atomic_inc_return(&data->disabled);
1016 1186
1017 if (likely(disabled == 1)) 1187 if (likely(disabled == 1))
1018 __trace_special(tr, data, arg1, arg2, arg3); 1188 trace_function(tr, data, ip, parent_ip, flags, pc);
1019 1189
1020 atomic_dec(&data->disabled); 1190 atomic_dec(&data->disabled);
1021 local_irq_restore(flags); 1191 ftrace_preempt_enable(resched);
1022} 1192}
1023 1193
1024#ifdef CONFIG_FTRACE
1025static void 1194static void
1026function_trace_call(unsigned long ip, unsigned long parent_ip) 1195function_trace_call(unsigned long ip, unsigned long parent_ip)
1027{ 1196{
@@ -1030,24 +1199,85 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1030 unsigned long flags; 1199 unsigned long flags;
1031 long disabled; 1200 long disabled;
1032 int cpu; 1201 int cpu;
1202 int pc;
1033 1203
1034 if (unlikely(!ftrace_function_enabled)) 1204 if (unlikely(!ftrace_function_enabled))
1035 return; 1205 return;
1036 1206
1037 if (skip_trace(ip)) 1207 /*
1038 return; 1208 * Need to use raw, since this must be called before the
1209 * recursive protection is performed.
1210 */
1211 local_irq_save(flags);
1212 cpu = raw_smp_processor_id();
1213 data = tr->data[cpu];
1214 disabled = atomic_inc_return(&data->disabled);
1215
1216 if (likely(disabled == 1)) {
1217 pc = preempt_count();
1218 trace_function(tr, data, ip, parent_ip, flags, pc);
1219 }
1220
1221 atomic_dec(&data->disabled);
1222 local_irq_restore(flags);
1223}
1224
1225#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1226int trace_graph_entry(struct ftrace_graph_ent *trace)
1227{
1228 struct trace_array *tr = &global_trace;
1229 struct trace_array_cpu *data;
1230 unsigned long flags;
1231 long disabled;
1232 int cpu;
1233 int pc;
1234
1235 if (!ftrace_trace_task(current))
1236 return 0;
1237
1238 if (!ftrace_graph_addr(trace->func))
1239 return 0;
1039 1240
1040 local_irq_save(flags); 1241 local_irq_save(flags);
1041 cpu = raw_smp_processor_id(); 1242 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu]; 1243 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled); 1244 disabled = atomic_inc_return(&data->disabled);
1245 if (likely(disabled == 1)) {
1246 pc = preempt_count();
1247 __trace_graph_entry(tr, data, trace, flags, pc);
1248 }
1249 /* Only do the atomic if it is not already set */
1250 if (!test_tsk_trace_graph(current))
1251 set_tsk_trace_graph(current);
1252 atomic_dec(&data->disabled);
1253 local_irq_restore(flags);
1044 1254
1045 if (likely(disabled == 1)) 1255 return 1;
1046 trace_function(tr, data, ip, parent_ip, flags); 1256}
1047 1257
1258void trace_graph_return(struct ftrace_graph_ret *trace)
1259{
1260 struct trace_array *tr = &global_trace;
1261 struct trace_array_cpu *data;
1262 unsigned long flags;
1263 long disabled;
1264 int cpu;
1265 int pc;
1266
1267 local_irq_save(flags);
1268 cpu = raw_smp_processor_id();
1269 data = tr->data[cpu];
1270 disabled = atomic_inc_return(&data->disabled);
1271 if (likely(disabled == 1)) {
1272 pc = preempt_count();
1273 __trace_graph_return(tr, data, trace, flags, pc);
1274 }
1275 if (!trace->depth)
1276 clear_tsk_trace_graph(current);
1048 atomic_dec(&data->disabled); 1277 atomic_dec(&data->disabled);
1049 local_irq_restore(flags); 1278 local_irq_restore(flags);
1050} 1279}
1280#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1051 1281
1052static struct ftrace_ops trace_ops __read_mostly = 1282static struct ftrace_ops trace_ops __read_mostly =
1053{ 1283{
@@ -1057,9 +1287,14 @@ static struct ftrace_ops trace_ops __read_mostly =
1057void tracing_start_function_trace(void) 1287void tracing_start_function_trace(void)
1058{ 1288{
1059 ftrace_function_enabled = 0; 1289 ftrace_function_enabled = 0;
1290
1291 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1292 trace_ops.func = function_trace_call_preempt_only;
1293 else
1294 trace_ops.func = function_trace_call;
1295
1060 register_ftrace_function(&trace_ops); 1296 register_ftrace_function(&trace_ops);
1061 if (tracer_enabled) 1297 ftrace_function_enabled = 1;
1062 ftrace_function_enabled = 1;
1063} 1298}
1064 1299
1065void tracing_stop_function_trace(void) 1300void tracing_stop_function_trace(void)
@@ -1071,113 +1306,99 @@ void tracing_stop_function_trace(void)
1071 1306
1072enum trace_file_type { 1307enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1, 1308 TRACE_FILE_LAT_FMT = 1,
1309 TRACE_FILE_ANNOTATE = 2,
1074}; 1310};
1075 1311
1076static struct trace_entry * 1312static void trace_iterator_increment(struct trace_iterator *iter)
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{ 1313{
1080 struct page *page; 1314 /* Don't allow ftrace to trace into the ring buffers */
1081 struct trace_entry *array; 1315 ftrace_disable_cpu();
1082 1316
1083 if (iter->next_idx[cpu] >= tr->entries || 1317 iter->idx++;
1084 iter->next_idx[cpu] >= data->trace_idx || 1318 if (iter->buffer_iter[iter->cpu])
1085 (data->trace_head == data->trace_tail && 1319 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088 1320
1089 if (!iter->next_page[cpu]) { 1321 ftrace_enable_cpu();
1090 /* Initialize the iterator for this cpu trace buffer */ 1322}
1091 WARN_ON(!data->trace_tail);
1092 page = virt_to_page(data->trace_tail);
1093 iter->next_page[cpu] = &page->lru;
1094 iter->next_page_idx[cpu] = data->trace_tail_idx;
1095 }
1096 1323
1097 page = list_entry(iter->next_page[cpu], struct page, lru); 1324static struct trace_entry *
1098 BUG_ON(&data->trace_pages == &page->lru); 1325peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1326{
1327 struct ring_buffer_event *event;
1328 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1099 1329
1100 array = page_address(page); 1330 /* Don't allow ftrace to trace into the ring buffers */
1331 ftrace_disable_cpu();
1101 1332
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); 1333 if (buf_iter)
1103 return &array[iter->next_page_idx[cpu]]; 1334 event = ring_buffer_iter_peek(buf_iter, ts);
1335 else
1336 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1337
1338 ftrace_enable_cpu();
1339
1340 return event ? ring_buffer_event_data(event) : NULL;
1104} 1341}
1105 1342
1106static struct trace_entry * 1343static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu) 1344__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1108{ 1345{
1109 struct trace_array *tr = iter->tr; 1346 struct ring_buffer *buffer = iter->tr->buffer;
1110 struct trace_entry *ent, *next = NULL; 1347 struct trace_entry *ent, *next = NULL;
1348 u64 next_ts = 0, ts;
1111 int next_cpu = -1; 1349 int next_cpu = -1;
1112 int cpu; 1350 int cpu;
1113 1351
1114 for_each_tracing_cpu(cpu) { 1352 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu])) 1353
1354 if (ring_buffer_empty_cpu(buffer, cpu))
1116 continue; 1355 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 1356
1357 ent = peek_next_entry(iter, cpu, &ts);
1358
1118 /* 1359 /*
1119 * Pick the entry with the smallest timestamp: 1360 * Pick the entry with the smallest timestamp:
1120 */ 1361 */
1121 if (ent && (!next || ent->t < next->t)) { 1362 if (ent && (!next || ts < next_ts)) {
1122 next = ent; 1363 next = ent;
1123 next_cpu = cpu; 1364 next_cpu = cpu;
1365 next_ts = ts;
1124 } 1366 }
1125 } 1367 }
1126 1368
1127 if (ent_cpu) 1369 if (ent_cpu)
1128 *ent_cpu = next_cpu; 1370 *ent_cpu = next_cpu;
1129 1371
1372 if (ent_ts)
1373 *ent_ts = next_ts;
1374
1130 return next; 1375 return next;
1131} 1376}
1132 1377
1133static void trace_iterator_increment(struct trace_iterator *iter) 1378/* Find the next real entry, without updating the iterator itself */
1379static struct trace_entry *
1380find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1134{ 1381{
1135 iter->idx++; 1382 return __find_next_entry(iter, ent_cpu, ent_ts);
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146} 1383}
1147 1384
1148static void trace_consume(struct trace_iterator *iter) 1385/* Find the next real entry, and increment the iterator to the next entry */
1386static void *find_next_entry_inc(struct trace_iterator *iter)
1149{ 1387{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 1388 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1151 1389
1152 data->trace_tail_idx++; 1390 if (iter->ent)
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { 1391 trace_iterator_increment(iter);
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157 1392
1158 /* Check if we empty it, then reset the index */ 1393 return iter->ent ? iter : NULL;
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162} 1394}
1163 1395
1164static void *find_next_entry_inc(struct trace_iterator *iter) 1396static void trace_consume(struct trace_iterator *iter)
1165{ 1397{
1166 struct trace_entry *next; 1398 /* Don't allow ftrace to trace into the ring buffers */
1167 int next_cpu = -1; 1399 ftrace_disable_cpu();
1168 1400 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1169 next = find_next_entry(iter, &next_cpu); 1401 ftrace_enable_cpu();
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181} 1402}
1182 1403
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1404static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1431,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1210 struct trace_iterator *iter = m->private; 1431 struct trace_iterator *iter = m->private;
1211 void *p = NULL; 1432 void *p = NULL;
1212 loff_t l = 0; 1433 loff_t l = 0;
1213 int i; 1434 int cpu;
1214 1435
1215 mutex_lock(&trace_types_lock); 1436 mutex_lock(&trace_types_lock);
1216 1437
@@ -1221,22 +1442,19 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1221 1442
1222 atomic_inc(&trace_record_cmdline_disabled); 1443 atomic_inc(&trace_record_cmdline_disabled);
1223 1444
1224 /* let the tracer grab locks here if needed */
1225 if (current_trace->start)
1226 current_trace->start(iter);
1227
1228 if (*pos != iter->pos) { 1445 if (*pos != iter->pos) {
1229 iter->ent = NULL; 1446 iter->ent = NULL;
1230 iter->cpu = 0; 1447 iter->cpu = 0;
1231 iter->idx = -1; 1448 iter->idx = -1;
1232 iter->prev_ent = NULL;
1233 iter->prev_cpu = -1;
1234 1449
1235 for_each_tracing_cpu(i) { 1450 ftrace_disable_cpu();
1236 iter->next_idx[i] = 0; 1451
1237 iter->next_page[i] = NULL; 1452 for_each_tracing_cpu(cpu) {
1453 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1238 } 1454 }
1239 1455
1456 ftrace_enable_cpu();
1457
1240 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1458 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1241 ; 1459 ;
1242 1460
@@ -1250,28 +1468,24 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1250 1468
1251static void s_stop(struct seq_file *m, void *p) 1469static void s_stop(struct seq_file *m, void *p)
1252{ 1470{
1253 struct trace_iterator *iter = m->private;
1254
1255 atomic_dec(&trace_record_cmdline_disabled); 1471 atomic_dec(&trace_record_cmdline_disabled);
1256
1257 /* let the tracer release locks here if needed */
1258 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1259 iter->trace->stop(iter);
1260
1261 mutex_unlock(&trace_types_lock); 1472 mutex_unlock(&trace_types_lock);
1262} 1473}
1263 1474
1264#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1265
1266#ifdef CONFIG_KRETPROBES 1475#ifdef CONFIG_KRETPROBES
1267static inline int kretprobed(unsigned long addr) 1476static inline const char *kretprobed(const char *name)
1268{ 1477{
1269 return addr == (unsigned long)kretprobe_trampoline; 1478 static const char tramp_name[] = "kretprobe_trampoline";
1479 int size = sizeof(tramp_name);
1480
1481 if (strncmp(tramp_name, name, size) == 0)
1482 return "[unknown/kretprobe'd]";
1483 return name;
1270} 1484}
1271#else 1485#else
1272static inline int kretprobed(unsigned long addr) 1486static inline const char *kretprobed(const char *name)
1273{ 1487{
1274 return 0; 1488 return name;
1275} 1489}
1276#endif /* CONFIG_KRETPROBES */ 1490#endif /* CONFIG_KRETPROBES */
1277 1491
@@ -1280,10 +1494,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1280{ 1494{
1281#ifdef CONFIG_KALLSYMS 1495#ifdef CONFIG_KALLSYMS
1282 char str[KSYM_SYMBOL_LEN]; 1496 char str[KSYM_SYMBOL_LEN];
1497 const char *name;
1283 1498
1284 kallsyms_lookup(address, NULL, NULL, NULL, str); 1499 kallsyms_lookup(address, NULL, NULL, NULL, str);
1285 1500
1286 return trace_seq_printf(s, fmt, str); 1501 name = kretprobed(str);
1502
1503 return trace_seq_printf(s, fmt, name);
1287#endif 1504#endif
1288 return 1; 1505 return 1;
1289} 1506}
@@ -1294,9 +1511,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1294{ 1511{
1295#ifdef CONFIG_KALLSYMS 1512#ifdef CONFIG_KALLSYMS
1296 char str[KSYM_SYMBOL_LEN]; 1513 char str[KSYM_SYMBOL_LEN];
1514 const char *name;
1297 1515
1298 sprint_symbol(str, address); 1516 sprint_symbol(str, address);
1299 return trace_seq_printf(s, fmt, str); 1517 name = kretprobed(str);
1518
1519 return trace_seq_printf(s, fmt, name);
1300#endif 1520#endif
1301 return 1; 1521 return 1;
1302} 1522}
@@ -1307,7 +1527,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1307# define IP_FMT "%016lx" 1527# define IP_FMT "%016lx"
1308#endif 1528#endif
1309 1529
1310static int 1530int
1311seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1531seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1312{ 1532{
1313 int ret; 1533 int ret;
@@ -1328,23 +1548,95 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1328 return ret; 1548 return ret;
1329} 1549}
1330 1550
1551static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1552 unsigned long ip, unsigned long sym_flags)
1553{
1554 struct file *file = NULL;
1555 unsigned long vmstart = 0;
1556 int ret = 1;
1557
1558 if (mm) {
1559 const struct vm_area_struct *vma;
1560
1561 down_read(&mm->mmap_sem);
1562 vma = find_vma(mm, ip);
1563 if (vma) {
1564 file = vma->vm_file;
1565 vmstart = vma->vm_start;
1566 }
1567 if (file) {
1568 ret = trace_seq_path(s, &file->f_path);
1569 if (ret)
1570 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1571 }
1572 up_read(&mm->mmap_sem);
1573 }
1574 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1575 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1576 return ret;
1577}
1578
1579static int
1580seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1581 unsigned long sym_flags)
1582{
1583 struct mm_struct *mm = NULL;
1584 int ret = 1;
1585 unsigned int i;
1586
1587 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1588 struct task_struct *task;
1589 /*
1590 * we do the lookup on the thread group leader,
1591 * since individual threads might have already quit!
1592 */
1593 rcu_read_lock();
1594 task = find_task_by_vpid(entry->ent.tgid);
1595 if (task)
1596 mm = get_task_mm(task);
1597 rcu_read_unlock();
1598 }
1599
1600 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1601 unsigned long ip = entry->caller[i];
1602
1603 if (ip == ULONG_MAX || !ret)
1604 break;
1605 if (i && ret)
1606 ret = trace_seq_puts(s, " <- ");
1607 if (!ip) {
1608 if (ret)
1609 ret = trace_seq_puts(s, "??");
1610 continue;
1611 }
1612 if (!ret)
1613 break;
1614 if (ret)
1615 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1616 }
1617
1618 if (mm)
1619 mmput(mm);
1620 return ret;
1621}
1622
1331static void print_lat_help_header(struct seq_file *m) 1623static void print_lat_help_header(struct seq_file *m)
1332{ 1624{
1333 seq_puts(m, "# _------=> CPU# \n"); 1625 seq_puts(m, "# _------=> CPU# \n");
1334 seq_puts(m, "# / _-----=> irqs-off \n"); 1626 seq_puts(m, "# / _-----=> irqs-off \n");
1335 seq_puts(m, "# | / _----=> need-resched \n"); 1627 seq_puts(m, "# | / _----=> need-resched \n");
1336 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1628 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1337 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1629 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1338 seq_puts(m, "# |||| / \n"); 1630 seq_puts(m, "# |||| / \n");
1339 seq_puts(m, "# ||||| delay \n"); 1631 seq_puts(m, "# ||||| delay \n");
1340 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1632 seq_puts(m, "# cmd pid ||||| time | caller \n");
1341 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1633 seq_puts(m, "# \\ / ||||| \\ | / \n");
1342} 1634}
1343 1635
1344static void print_func_help_header(struct seq_file *m) 1636static void print_func_help_header(struct seq_file *m)
1345{ 1637{
1346 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1638 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1347 seq_puts(m, "# | | | | |\n"); 1639 seq_puts(m, "# | | | | |\n");
1348} 1640}
1349 1641
1350 1642
@@ -1355,23 +1647,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1355 struct trace_array *tr = iter->tr; 1647 struct trace_array *tr = iter->tr;
1356 struct trace_array_cpu *data = tr->data[tr->cpu]; 1648 struct trace_array_cpu *data = tr->data[tr->cpu];
1357 struct tracer *type = current_trace; 1649 struct tracer *type = current_trace;
1358 unsigned long total = 0; 1650 unsigned long total;
1359 unsigned long entries = 0; 1651 unsigned long entries;
1360 int cpu;
1361 const char *name = "preemption"; 1652 const char *name = "preemption";
1362 1653
1363 if (type) 1654 if (type)
1364 name = type->name; 1655 name = type->name;
1365 1656
1366 for_each_tracing_cpu(cpu) { 1657 entries = ring_buffer_entries(iter->tr->buffer);
1367 if (head_page(tr->data[cpu])) { 1658 total = entries +
1368 total += tr->data[cpu]->trace_idx; 1659 ring_buffer_overruns(iter->tr->buffer);
1369 if (tr->data[cpu]->trace_idx > tr->entries)
1370 entries += tr->entries;
1371 else
1372 entries += tr->data[cpu]->trace_idx;
1373 }
1374 }
1375 1660
1376 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1661 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1377 name, UTS_RELEASE); 1662 name, UTS_RELEASE);
@@ -1428,9 +1713,10 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1428 comm = trace_find_cmdline(entry->pid); 1713 comm = trace_find_cmdline(entry->pid);
1429 1714
1430 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1715 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1431 trace_seq_printf(s, "%d", cpu); 1716 trace_seq_printf(s, "%3d", cpu);
1432 trace_seq_printf(s, "%c%c", 1717 trace_seq_printf(s, "%c%c",
1433 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1718 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1719 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1434 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1720 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1435 1721
1436 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1722 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1457,7 +1743,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1457unsigned long preempt_mark_thresh = 100; 1743unsigned long preempt_mark_thresh = 100;
1458 1744
1459static void 1745static void
1460lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1746lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1461 unsigned long rel_usecs) 1747 unsigned long rel_usecs)
1462{ 1748{
1463 trace_seq_printf(s, " %4lldus", abs_usecs); 1749 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1757,101 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1471 1757
1472static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1758static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1473 1759
1474static int 1760static int task_state_char(unsigned long state)
1761{
1762 int bit = state ? __ffs(state) + 1 : 0;
1763
1764 return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
1765}
1766
1767/*
1768 * The message is supposed to contain an ending newline.
1769 * If the printing stops prematurely, try to add a newline of our own.
1770 */
1771void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1772{
1773 struct trace_entry *ent;
1774 struct trace_field_cont *cont;
1775 bool ok = true;
1776
1777 ent = peek_next_entry(iter, iter->cpu, NULL);
1778 if (!ent || ent->type != TRACE_CONT) {
1779 trace_seq_putc(s, '\n');
1780 return;
1781 }
1782
1783 do {
1784 cont = (struct trace_field_cont *)ent;
1785 if (ok)
1786 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1787
1788 ftrace_disable_cpu();
1789
1790 if (iter->buffer_iter[iter->cpu])
1791 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1792 else
1793 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1794
1795 ftrace_enable_cpu();
1796
1797 ent = peek_next_entry(iter, iter->cpu, NULL);
1798 } while (ent && ent->type == TRACE_CONT);
1799
1800 if (!ok)
1801 trace_seq_putc(s, '\n');
1802}
1803
1804static void test_cpu_buff_start(struct trace_iterator *iter)
1805{
1806 struct trace_seq *s = &iter->seq;
1807
1808 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1809 return;
1810
1811 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1812 return;
1813
1814 if (cpu_isset(iter->cpu, iter->started))
1815 return;
1816
1817 cpu_set(iter->cpu, iter->started);
1818 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1819}
1820
1821static enum print_line_t
1475print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1822print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1476{ 1823{
1477 struct trace_seq *s = &iter->seq; 1824 struct trace_seq *s = &iter->seq;
1478 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1825 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1479 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1826 struct trace_entry *next_entry;
1480 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1827 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1481 struct trace_entry *entry = iter->ent; 1828 struct trace_entry *entry = iter->ent;
1482 unsigned long abs_usecs; 1829 unsigned long abs_usecs;
1483 unsigned long rel_usecs; 1830 unsigned long rel_usecs;
1831 u64 next_ts;
1484 char *comm; 1832 char *comm;
1485 int S, T; 1833 int S, T;
1486 int i; 1834 int i;
1487 unsigned state;
1488 1835
1836 if (entry->type == TRACE_CONT)
1837 return TRACE_TYPE_HANDLED;
1838
1839 test_cpu_buff_start(iter);
1840
1841 next_entry = find_next_entry(iter, NULL, &next_ts);
1489 if (!next_entry) 1842 if (!next_entry)
1490 next_entry = entry; 1843 next_ts = iter->ts;
1491 rel_usecs = ns2usecs(next_entry->t - entry->t); 1844 rel_usecs = ns2usecs(next_ts - iter->ts);
1492 abs_usecs = ns2usecs(entry->t - iter->tr->time_start); 1845 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1493 1846
1494 if (verbose) { 1847 if (verbose) {
1495 comm = trace_find_cmdline(entry->pid); 1848 comm = trace_find_cmdline(entry->pid);
1496 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" 1849 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1497 " %ld.%03ldms (+%ld.%03ldms): ", 1850 " %ld.%03ldms (+%ld.%03ldms): ",
1498 comm, 1851 comm,
1499 entry->pid, cpu, entry->flags, 1852 entry->pid, cpu, entry->flags,
1500 entry->preempt_count, trace_idx, 1853 entry->preempt_count, trace_idx,
1501 ns2usecs(entry->t), 1854 ns2usecs(iter->ts),
1502 abs_usecs/1000, 1855 abs_usecs/1000,
1503 abs_usecs % 1000, rel_usecs/1000, 1856 abs_usecs % 1000, rel_usecs/1000,
1504 rel_usecs % 1000); 1857 rel_usecs % 1000);
@@ -1507,52 +1860,99 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1507 lat_print_timestamp(s, abs_usecs, rel_usecs); 1860 lat_print_timestamp(s, abs_usecs, rel_usecs);
1508 } 1861 }
1509 switch (entry->type) { 1862 switch (entry->type) {
1510 case TRACE_FN: 1863 case TRACE_FN: {
1511 seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1864 struct ftrace_entry *field;
1865
1866 trace_assign_type(field, entry);
1867
1868 seq_print_ip_sym(s, field->ip, sym_flags);
1512 trace_seq_puts(s, " ("); 1869 trace_seq_puts(s, " (");
1513 if (kretprobed(entry->fn.parent_ip)) 1870 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1514 trace_seq_puts(s, KRETPROBE_MSG);
1515 else
1516 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1517 trace_seq_puts(s, ")\n"); 1871 trace_seq_puts(s, ")\n");
1518 break; 1872 break;
1873 }
1519 case TRACE_CTX: 1874 case TRACE_CTX:
1520 case TRACE_WAKE: 1875 case TRACE_WAKE: {
1521 T = entry->ctx.next_state < sizeof(state_to_char) ? 1876 struct ctx_switch_entry *field;
1522 state_to_char[entry->ctx.next_state] : 'X'; 1877
1523 1878 trace_assign_type(field, entry);
1524 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; 1879
1525 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; 1880 T = task_state_char(field->next_state);
1526 comm = trace_find_cmdline(entry->ctx.next_pid); 1881 S = task_state_char(field->prev_state);
1527 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", 1882 comm = trace_find_cmdline(field->next_pid);
1528 entry->ctx.prev_pid, 1883 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1529 entry->ctx.prev_prio, 1884 field->prev_pid,
1885 field->prev_prio,
1530 S, entry->type == TRACE_CTX ? "==>" : " +", 1886 S, entry->type == TRACE_CTX ? "==>" : " +",
1531 entry->ctx.next_pid, 1887 field->next_cpu,
1532 entry->ctx.next_prio, 1888 field->next_pid,
1889 field->next_prio,
1533 T, comm); 1890 T, comm);
1534 break; 1891 break;
1535 case TRACE_SPECIAL: 1892 }
1893 case TRACE_SPECIAL: {
1894 struct special_entry *field;
1895
1896 trace_assign_type(field, entry);
1897
1536 trace_seq_printf(s, "# %ld %ld %ld\n", 1898 trace_seq_printf(s, "# %ld %ld %ld\n",
1537 entry->special.arg1, 1899 field->arg1,
1538 entry->special.arg2, 1900 field->arg2,
1539 entry->special.arg3); 1901 field->arg3);
1540 break; 1902 break;
1541 case TRACE_STACK: 1903 }
1904 case TRACE_STACK: {
1905 struct stack_entry *field;
1906
1907 trace_assign_type(field, entry);
1908
1542 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1909 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1543 if (i) 1910 if (i)
1544 trace_seq_puts(s, " <= "); 1911 trace_seq_puts(s, " <= ");
1545 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); 1912 seq_print_ip_sym(s, field->caller[i], sym_flags);
1546 } 1913 }
1547 trace_seq_puts(s, "\n"); 1914 trace_seq_puts(s, "\n");
1548 break; 1915 break;
1916 }
1917 case TRACE_PRINT: {
1918 struct print_entry *field;
1919
1920 trace_assign_type(field, entry);
1921
1922 seq_print_ip_sym(s, field->ip, sym_flags);
1923 trace_seq_printf(s, ": %s", field->buf);
1924 if (entry->flags & TRACE_FLAG_CONT)
1925 trace_seq_print_cont(s, iter);
1926 break;
1927 }
1928 case TRACE_BRANCH: {
1929 struct trace_branch *field;
1930
1931 trace_assign_type(field, entry);
1932
1933 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1934 field->correct ? " ok " : " MISS ",
1935 field->func,
1936 field->file,
1937 field->line);
1938 break;
1939 }
1940 case TRACE_USER_STACK: {
1941 struct userstack_entry *field;
1942
1943 trace_assign_type(field, entry);
1944
1945 seq_print_userip_objs(field, s, sym_flags);
1946 trace_seq_putc(s, '\n');
1947 break;
1948 }
1549 default: 1949 default:
1550 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1950 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1551 } 1951 }
1552 return 1; 1952 return TRACE_TYPE_HANDLED;
1553} 1953}
1554 1954
1555static int print_trace_fmt(struct trace_iterator *iter) 1955static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1556{ 1956{
1557 struct trace_seq *s = &iter->seq; 1957 struct trace_seq *s = &iter->seq;
1558 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1958 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1967,154 @@ static int print_trace_fmt(struct trace_iterator *iter)
1567 1967
1568 entry = iter->ent; 1968 entry = iter->ent;
1569 1969
1970 if (entry->type == TRACE_CONT)
1971 return TRACE_TYPE_HANDLED;
1972
1973 test_cpu_buff_start(iter);
1974
1570 comm = trace_find_cmdline(iter->ent->pid); 1975 comm = trace_find_cmdline(iter->ent->pid);
1571 1976
1572 t = ns2usecs(entry->t); 1977 t = ns2usecs(iter->ts);
1573 usec_rem = do_div(t, 1000000ULL); 1978 usec_rem = do_div(t, 1000000ULL);
1574 secs = (unsigned long)t; 1979 secs = (unsigned long)t;
1575 1980
1576 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1981 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1577 if (!ret) 1982 if (!ret)
1578 return 0; 1983 return TRACE_TYPE_PARTIAL_LINE;
1579 ret = trace_seq_printf(s, "[%02d] ", iter->cpu); 1984 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1580 if (!ret) 1985 if (!ret)
1581 return 0; 1986 return TRACE_TYPE_PARTIAL_LINE;
1582 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); 1987 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1583 if (!ret) 1988 if (!ret)
1584 return 0; 1989 return TRACE_TYPE_PARTIAL_LINE;
1585 1990
1586 switch (entry->type) { 1991 switch (entry->type) {
1587 case TRACE_FN: 1992 case TRACE_FN: {
1588 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1993 struct ftrace_entry *field;
1994
1995 trace_assign_type(field, entry);
1996
1997 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1589 if (!ret) 1998 if (!ret)
1590 return 0; 1999 return TRACE_TYPE_PARTIAL_LINE;
1591 if ((sym_flags & TRACE_ITER_PRINT_PARENT) && 2000 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1592 entry->fn.parent_ip) { 2001 field->parent_ip) {
1593 ret = trace_seq_printf(s, " <-"); 2002 ret = trace_seq_printf(s, " <-");
1594 if (!ret) 2003 if (!ret)
1595 return 0; 2004 return TRACE_TYPE_PARTIAL_LINE;
1596 if (kretprobed(entry->fn.parent_ip)) 2005 ret = seq_print_ip_sym(s,
1597 ret = trace_seq_puts(s, KRETPROBE_MSG); 2006 field->parent_ip,
1598 else 2007 sym_flags);
1599 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1600 sym_flags);
1601 if (!ret) 2008 if (!ret)
1602 return 0; 2009 return TRACE_TYPE_PARTIAL_LINE;
1603 } 2010 }
1604 ret = trace_seq_printf(s, "\n"); 2011 ret = trace_seq_printf(s, "\n");
1605 if (!ret) 2012 if (!ret)
1606 return 0; 2013 return TRACE_TYPE_PARTIAL_LINE;
1607 break; 2014 break;
2015 }
1608 case TRACE_CTX: 2016 case TRACE_CTX:
1609 case TRACE_WAKE: 2017 case TRACE_WAKE: {
1610 S = entry->ctx.prev_state < sizeof(state_to_char) ? 2018 struct ctx_switch_entry *field;
1611 state_to_char[entry->ctx.prev_state] : 'X'; 2019
1612 T = entry->ctx.next_state < sizeof(state_to_char) ? 2020 trace_assign_type(field, entry);
1613 state_to_char[entry->ctx.next_state] : 'X'; 2021
1614 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", 2022 T = task_state_char(field->next_state);
1615 entry->ctx.prev_pid, 2023 S = task_state_char(field->prev_state);
1616 entry->ctx.prev_prio, 2024 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
2025 field->prev_pid,
2026 field->prev_prio,
1617 S, 2027 S,
1618 entry->type == TRACE_CTX ? "==>" : " +", 2028 entry->type == TRACE_CTX ? "==>" : " +",
1619 entry->ctx.next_pid, 2029 field->next_cpu,
1620 entry->ctx.next_prio, 2030 field->next_pid,
2031 field->next_prio,
1621 T); 2032 T);
1622 if (!ret) 2033 if (!ret)
1623 return 0; 2034 return TRACE_TYPE_PARTIAL_LINE;
1624 break; 2035 break;
1625 case TRACE_SPECIAL: 2036 }
2037 case TRACE_SPECIAL: {
2038 struct special_entry *field;
2039
2040 trace_assign_type(field, entry);
2041
1626 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 2042 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1627 entry->special.arg1, 2043 field->arg1,
1628 entry->special.arg2, 2044 field->arg2,
1629 entry->special.arg3); 2045 field->arg3);
1630 if (!ret) 2046 if (!ret)
1631 return 0; 2047 return TRACE_TYPE_PARTIAL_LINE;
1632 break; 2048 break;
1633 case TRACE_STACK: 2049 }
2050 case TRACE_STACK: {
2051 struct stack_entry *field;
2052
2053 trace_assign_type(field, entry);
2054
1634 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 2055 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1635 if (i) { 2056 if (i) {
1636 ret = trace_seq_puts(s, " <= "); 2057 ret = trace_seq_puts(s, " <= ");
1637 if (!ret) 2058 if (!ret)
1638 return 0; 2059 return TRACE_TYPE_PARTIAL_LINE;
1639 } 2060 }
1640 ret = seq_print_ip_sym(s, entry->stack.caller[i], 2061 ret = seq_print_ip_sym(s, field->caller[i],
1641 sym_flags); 2062 sym_flags);
1642 if (!ret) 2063 if (!ret)
1643 return 0; 2064 return TRACE_TYPE_PARTIAL_LINE;
1644 } 2065 }
1645 ret = trace_seq_puts(s, "\n"); 2066 ret = trace_seq_puts(s, "\n");
1646 if (!ret) 2067 if (!ret)
1647 return 0; 2068 return TRACE_TYPE_PARTIAL_LINE;
1648 break; 2069 break;
1649 } 2070 }
1650 return 1; 2071 case TRACE_PRINT: {
2072 struct print_entry *field;
2073
2074 trace_assign_type(field, entry);
2075
2076 seq_print_ip_sym(s, field->ip, sym_flags);
2077 trace_seq_printf(s, ": %s", field->buf);
2078 if (entry->flags & TRACE_FLAG_CONT)
2079 trace_seq_print_cont(s, iter);
2080 break;
2081 }
2082 case TRACE_GRAPH_RET: {
2083 return print_graph_function(iter);
2084 }
2085 case TRACE_GRAPH_ENT: {
2086 return print_graph_function(iter);
2087 }
2088 case TRACE_BRANCH: {
2089 struct trace_branch *field;
2090
2091 trace_assign_type(field, entry);
2092
2093 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2094 field->correct ? " ok " : " MISS ",
2095 field->func,
2096 field->file,
2097 field->line);
2098 break;
2099 }
2100 case TRACE_USER_STACK: {
2101 struct userstack_entry *field;
2102
2103 trace_assign_type(field, entry);
2104
2105 ret = seq_print_userip_objs(field, s, sym_flags);
2106 if (!ret)
2107 return TRACE_TYPE_PARTIAL_LINE;
2108 ret = trace_seq_putc(s, '\n');
2109 if (!ret)
2110 return TRACE_TYPE_PARTIAL_LINE;
2111 break;
2112 }
2113 }
2114 return TRACE_TYPE_HANDLED;
1651} 2115}
1652 2116
1653static int print_raw_fmt(struct trace_iterator *iter) 2117static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1654{ 2118{
1655 struct trace_seq *s = &iter->seq; 2119 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry; 2120 struct trace_entry *entry;
@@ -1659,47 +2123,75 @@ static int print_raw_fmt(struct trace_iterator *iter)
1659 2123
1660 entry = iter->ent; 2124 entry = iter->ent;
1661 2125
2126 if (entry->type == TRACE_CONT)
2127 return TRACE_TYPE_HANDLED;
2128
1662 ret = trace_seq_printf(s, "%d %d %llu ", 2129 ret = trace_seq_printf(s, "%d %d %llu ",
1663 entry->pid, iter->cpu, entry->t); 2130 entry->pid, iter->cpu, iter->ts);
1664 if (!ret) 2131 if (!ret)
1665 return 0; 2132 return TRACE_TYPE_PARTIAL_LINE;
1666 2133
1667 switch (entry->type) { 2134 switch (entry->type) {
1668 case TRACE_FN: 2135 case TRACE_FN: {
2136 struct ftrace_entry *field;
2137
2138 trace_assign_type(field, entry);
2139
1669 ret = trace_seq_printf(s, "%x %x\n", 2140 ret = trace_seq_printf(s, "%x %x\n",
1670 entry->fn.ip, entry->fn.parent_ip); 2141 field->ip,
2142 field->parent_ip);
1671 if (!ret) 2143 if (!ret)
1672 return 0; 2144 return TRACE_TYPE_PARTIAL_LINE;
1673 break; 2145 break;
2146 }
1674 case TRACE_CTX: 2147 case TRACE_CTX:
1675 case TRACE_WAKE: 2148 case TRACE_WAKE: {
1676 S = entry->ctx.prev_state < sizeof(state_to_char) ? 2149 struct ctx_switch_entry *field;
1677 state_to_char[entry->ctx.prev_state] : 'X'; 2150
1678 T = entry->ctx.next_state < sizeof(state_to_char) ? 2151 trace_assign_type(field, entry);
1679 state_to_char[entry->ctx.next_state] : 'X'; 2152
1680 if (entry->type == TRACE_WAKE) 2153 T = task_state_char(field->next_state);
1681 S = '+'; 2154 S = entry->type == TRACE_WAKE ? '+' :
1682 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", 2155 task_state_char(field->prev_state);
1683 entry->ctx.prev_pid, 2156 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1684 entry->ctx.prev_prio, 2157 field->prev_pid,
2158 field->prev_prio,
1685 S, 2159 S,
1686 entry->ctx.next_pid, 2160 field->next_cpu,
1687 entry->ctx.next_prio, 2161 field->next_pid,
2162 field->next_prio,
1688 T); 2163 T);
1689 if (!ret) 2164 if (!ret)
1690 return 0; 2165 return TRACE_TYPE_PARTIAL_LINE;
1691 break; 2166 break;
2167 }
1692 case TRACE_SPECIAL: 2168 case TRACE_SPECIAL:
1693 case TRACE_STACK: 2169 case TRACE_USER_STACK:
2170 case TRACE_STACK: {
2171 struct special_entry *field;
2172
2173 trace_assign_type(field, entry);
2174
1694 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 2175 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1695 entry->special.arg1, 2176 field->arg1,
1696 entry->special.arg2, 2177 field->arg2,
1697 entry->special.arg3); 2178 field->arg3);
1698 if (!ret) 2179 if (!ret)
1699 return 0; 2180 return TRACE_TYPE_PARTIAL_LINE;
1700 break; 2181 break;
1701 } 2182 }
1702 return 1; 2183 case TRACE_PRINT: {
2184 struct print_entry *field;
2185
2186 trace_assign_type(field, entry);
2187
2188 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
2189 if (entry->flags & TRACE_FLAG_CONT)
2190 trace_seq_print_cont(s, iter);
2191 break;
2192 }
2193 }
2194 return TRACE_TYPE_HANDLED;
1703} 2195}
1704 2196
1705#define SEQ_PUT_FIELD_RET(s, x) \ 2197#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1710,11 +2202,12 @@ do { \
1710 2202
1711#define SEQ_PUT_HEX_FIELD_RET(s, x) \ 2203#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1712do { \ 2204do { \
2205 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
1713 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ 2206 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1714 return 0; \ 2207 return 0; \
1715} while (0) 2208} while (0)
1716 2209
1717static int print_hex_fmt(struct trace_iterator *iter) 2210static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1718{ 2211{
1719 struct trace_seq *s = &iter->seq; 2212 struct trace_seq *s = &iter->seq;
1720 unsigned char newline = '\n'; 2213 unsigned char newline = '\n';
@@ -1723,97 +2216,162 @@ static int print_hex_fmt(struct trace_iterator *iter)
1723 2216
1724 entry = iter->ent; 2217 entry = iter->ent;
1725 2218
2219 if (entry->type == TRACE_CONT)
2220 return TRACE_TYPE_HANDLED;
2221
1726 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 2222 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1727 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 2223 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1728 SEQ_PUT_HEX_FIELD_RET(s, entry->t); 2224 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1729 2225
1730 switch (entry->type) { 2226 switch (entry->type) {
1731 case TRACE_FN: 2227 case TRACE_FN: {
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); 2228 struct ftrace_entry *field;
1733 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 2229
2230 trace_assign_type(field, entry);
2231
2232 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
2233 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1734 break; 2234 break;
2235 }
1735 case TRACE_CTX: 2236 case TRACE_CTX:
1736 case TRACE_WAKE: 2237 case TRACE_WAKE: {
1737 S = entry->ctx.prev_state < sizeof(state_to_char) ? 2238 struct ctx_switch_entry *field;
1738 state_to_char[entry->ctx.prev_state] : 'X'; 2239
1739 T = entry->ctx.next_state < sizeof(state_to_char) ? 2240 trace_assign_type(field, entry);
1740 state_to_char[entry->ctx.next_state] : 'X'; 2241
1741 if (entry->type == TRACE_WAKE) 2242 T = task_state_char(field->next_state);
1742 S = '+'; 2243 S = entry->type == TRACE_WAKE ? '+' :
1743 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); 2244 task_state_char(field->prev_state);
1744 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); 2245 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
2246 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1745 SEQ_PUT_HEX_FIELD_RET(s, S); 2247 SEQ_PUT_HEX_FIELD_RET(s, S);
1746 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); 2248 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); 2249 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 2250 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, T); 2251 SEQ_PUT_HEX_FIELD_RET(s, T);
1750 break; 2252 break;
2253 }
1751 case TRACE_SPECIAL: 2254 case TRACE_SPECIAL:
1752 case TRACE_STACK: 2255 case TRACE_USER_STACK:
1753 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); 2256 case TRACE_STACK: {
1754 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); 2257 struct special_entry *field;
1755 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); 2258
2259 trace_assign_type(field, entry);
2260
2261 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
2262 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
2263 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1756 break; 2264 break;
1757 } 2265 }
2266 }
1758 SEQ_PUT_FIELD_RET(s, newline); 2267 SEQ_PUT_FIELD_RET(s, newline);
1759 2268
1760 return 1; 2269 return TRACE_TYPE_HANDLED;
2270}
2271
2272static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2273{
2274 struct trace_seq *s = &iter->seq;
2275 struct trace_entry *entry = iter->ent;
2276 struct print_entry *field;
2277 int ret;
2278
2279 trace_assign_type(field, entry);
2280
2281 ret = trace_seq_printf(s, field->buf);
2282 if (!ret)
2283 return TRACE_TYPE_PARTIAL_LINE;
2284
2285 if (entry->flags & TRACE_FLAG_CONT)
2286 trace_seq_print_cont(s, iter);
2287
2288 return TRACE_TYPE_HANDLED;
1761} 2289}
1762 2290
1763static int print_bin_fmt(struct trace_iterator *iter) 2291static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1764{ 2292{
1765 struct trace_seq *s = &iter->seq; 2293 struct trace_seq *s = &iter->seq;
1766 struct trace_entry *entry; 2294 struct trace_entry *entry;
1767 2295
1768 entry = iter->ent; 2296 entry = iter->ent;
1769 2297
2298 if (entry->type == TRACE_CONT)
2299 return TRACE_TYPE_HANDLED;
2300
1770 SEQ_PUT_FIELD_RET(s, entry->pid); 2301 SEQ_PUT_FIELD_RET(s, entry->pid);
1771 SEQ_PUT_FIELD_RET(s, entry->cpu); 2302 SEQ_PUT_FIELD_RET(s, entry->cpu);
1772 SEQ_PUT_FIELD_RET(s, entry->t); 2303 SEQ_PUT_FIELD_RET(s, iter->ts);
1773 2304
1774 switch (entry->type) { 2305 switch (entry->type) {
1775 case TRACE_FN: 2306 case TRACE_FN: {
1776 SEQ_PUT_FIELD_RET(s, entry->fn.ip); 2307 struct ftrace_entry *field;
1777 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); 2308
2309 trace_assign_type(field, entry);
2310
2311 SEQ_PUT_FIELD_RET(s, field->ip);
2312 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1778 break; 2313 break;
1779 case TRACE_CTX: 2314 }
1780 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); 2315 case TRACE_CTX: {
1781 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); 2316 struct ctx_switch_entry *field;
1782 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); 2317
1783 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); 2318 trace_assign_type(field, entry);
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); 2319
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); 2320 SEQ_PUT_FIELD_RET(s, field->prev_pid);
2321 SEQ_PUT_FIELD_RET(s, field->prev_prio);
2322 SEQ_PUT_FIELD_RET(s, field->prev_state);
2323 SEQ_PUT_FIELD_RET(s, field->next_pid);
2324 SEQ_PUT_FIELD_RET(s, field->next_prio);
2325 SEQ_PUT_FIELD_RET(s, field->next_state);
1786 break; 2326 break;
2327 }
1787 case TRACE_SPECIAL: 2328 case TRACE_SPECIAL:
1788 case TRACE_STACK: 2329 case TRACE_USER_STACK:
1789 SEQ_PUT_FIELD_RET(s, entry->special.arg1); 2330 case TRACE_STACK: {
1790 SEQ_PUT_FIELD_RET(s, entry->special.arg2); 2331 struct special_entry *field;
1791 SEQ_PUT_FIELD_RET(s, entry->special.arg3); 2332
2333 trace_assign_type(field, entry);
2334
2335 SEQ_PUT_FIELD_RET(s, field->arg1);
2336 SEQ_PUT_FIELD_RET(s, field->arg2);
2337 SEQ_PUT_FIELD_RET(s, field->arg3);
1792 break; 2338 break;
1793 } 2339 }
2340 }
1794 return 1; 2341 return 1;
1795} 2342}
1796 2343
1797static int trace_empty(struct trace_iterator *iter) 2344static int trace_empty(struct trace_iterator *iter)
1798{ 2345{
1799 struct trace_array_cpu *data;
1800 int cpu; 2346 int cpu;
1801 2347
1802 for_each_tracing_cpu(cpu) { 2348 for_each_tracing_cpu(cpu) {
1803 data = iter->tr->data[cpu]; 2349 if (iter->buffer_iter[cpu]) {
1804 2350 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1805 if (head_page(data) && data->trace_idx && 2351 return 0;
1806 (data->trace_tail != data->trace_head || 2352 } else {
1807 data->trace_tail_idx != data->trace_head_idx)) 2353 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1808 return 0; 2354 return 0;
2355 }
1809 } 2356 }
2357
1810 return 1; 2358 return 1;
1811} 2359}
1812 2360
1813static int print_trace_line(struct trace_iterator *iter) 2361static enum print_line_t print_trace_line(struct trace_iterator *iter)
1814{ 2362{
1815 if (iter->trace && iter->trace->print_line) 2363 enum print_line_t ret;
1816 return iter->trace->print_line(iter); 2364
2365 if (iter->trace && iter->trace->print_line) {
2366 ret = iter->trace->print_line(iter);
2367 if (ret != TRACE_TYPE_UNHANDLED)
2368 return ret;
2369 }
2370
2371 if (iter->ent->type == TRACE_PRINT &&
2372 trace_flags & TRACE_ITER_PRINTK &&
2373 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2374 return print_printk_msg_only(iter);
1817 2375
1818 if (trace_flags & TRACE_ITER_BIN) 2376 if (trace_flags & TRACE_ITER_BIN)
1819 return print_bin_fmt(iter); 2377 return print_bin_fmt(iter);
@@ -1839,7 +2397,9 @@ static int s_show(struct seq_file *m, void *v)
1839 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2397 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1840 seq_puts(m, "#\n"); 2398 seq_puts(m, "#\n");
1841 } 2399 }
1842 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2400 if (iter->trace && iter->trace->print_header)
2401 iter->trace->print_header(m);
2402 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1843 /* print nothing if the buffers are empty */ 2403 /* print nothing if the buffers are empty */
1844 if (trace_empty(iter)) 2404 if (trace_empty(iter))
1845 return 0; 2405 return 0;
@@ -1869,6 +2429,8 @@ static struct trace_iterator *
1869__tracing_open(struct inode *inode, struct file *file, int *ret) 2429__tracing_open(struct inode *inode, struct file *file, int *ret)
1870{ 2430{
1871 struct trace_iterator *iter; 2431 struct trace_iterator *iter;
2432 struct seq_file *m;
2433 int cpu;
1872 2434
1873 if (tracing_disabled) { 2435 if (tracing_disabled) {
1874 *ret = -ENODEV; 2436 *ret = -ENODEV;
@@ -1889,28 +2451,49 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1889 iter->trace = current_trace; 2451 iter->trace = current_trace;
1890 iter->pos = -1; 2452 iter->pos = -1;
1891 2453
2454 /* Notify the tracer early; before we stop tracing. */
2455 if (iter->trace && iter->trace->open)
2456 iter->trace->open(iter);
2457
2458 /* Annotate start of buffers if we had overruns */
2459 if (ring_buffer_overruns(iter->tr->buffer))
2460 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2461
2462
2463 for_each_tracing_cpu(cpu) {
2464
2465 iter->buffer_iter[cpu] =
2466 ring_buffer_read_start(iter->tr->buffer, cpu);
2467
2468 if (!iter->buffer_iter[cpu])
2469 goto fail_buffer;
2470 }
2471
1892 /* TODO stop tracer */ 2472 /* TODO stop tracer */
1893 *ret = seq_open(file, &tracer_seq_ops); 2473 *ret = seq_open(file, &tracer_seq_ops);
1894 if (!*ret) { 2474 if (*ret)
1895 struct seq_file *m = file->private_data; 2475 goto fail_buffer;
1896 m->private = iter;
1897 2476
1898 /* stop the trace while dumping */ 2477 m = file->private_data;
1899 if (iter->tr->ctrl) { 2478 m->private = iter;
1900 tracer_enabled = 0; 2479
1901 ftrace_function_enabled = 0; 2480 /* stop the trace while dumping */
1902 } 2481 tracing_stop();
1903 2482
1904 if (iter->trace && iter->trace->open)
1905 iter->trace->open(iter);
1906 } else {
1907 kfree(iter);
1908 iter = NULL;
1909 }
1910 mutex_unlock(&trace_types_lock); 2483 mutex_unlock(&trace_types_lock);
1911 2484
1912 out: 2485 out:
1913 return iter; 2486 return iter;
2487
2488 fail_buffer:
2489 for_each_tracing_cpu(cpu) {
2490 if (iter->buffer_iter[cpu])
2491 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2492 }
2493 mutex_unlock(&trace_types_lock);
2494 kfree(iter);
2495
2496 return ERR_PTR(-ENOMEM);
1914} 2497}
1915 2498
1916int tracing_open_generic(struct inode *inode, struct file *filp) 2499int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,20 +2509,19 @@ int tracing_release(struct inode *inode, struct file *file)
1926{ 2509{
1927 struct seq_file *m = (struct seq_file *)file->private_data; 2510 struct seq_file *m = (struct seq_file *)file->private_data;
1928 struct trace_iterator *iter = m->private; 2511 struct trace_iterator *iter = m->private;
2512 int cpu;
1929 2513
1930 mutex_lock(&trace_types_lock); 2514 mutex_lock(&trace_types_lock);
2515 for_each_tracing_cpu(cpu) {
2516 if (iter->buffer_iter[cpu])
2517 ring_buffer_read_finish(iter->buffer_iter[cpu]);
2518 }
2519
1931 if (iter->trace && iter->trace->close) 2520 if (iter->trace && iter->trace->close)
1932 iter->trace->close(iter); 2521 iter->trace->close(iter);
1933 2522
1934 /* reenable tracing if it was previously enabled */ 2523 /* reenable tracing if it was previously enabled */
1935 if (iter->tr->ctrl) { 2524 tracing_start();
1936 tracer_enabled = 1;
1937 /*
1938 * It is safe to enable function tracing even if it
1939 * isn't used
1940 */
1941 ftrace_function_enabled = 1;
1942 }
1943 mutex_unlock(&trace_types_lock); 2525 mutex_unlock(&trace_types_lock);
1944 2526
1945 seq_release(inode, file); 2527 seq_release(inode, file);
@@ -2117,7 +2699,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2117 if (err) 2699 if (err)
2118 goto err_unlock; 2700 goto err_unlock;
2119 2701
2120 raw_local_irq_disable(); 2702 local_irq_disable();
2121 __raw_spin_lock(&ftrace_max_lock); 2703 __raw_spin_lock(&ftrace_max_lock);
2122 for_each_tracing_cpu(cpu) { 2704 for_each_tracing_cpu(cpu) {
2123 /* 2705 /*
@@ -2134,7 +2716,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2134 } 2716 }
2135 } 2717 }
2136 __raw_spin_unlock(&ftrace_max_lock); 2718 __raw_spin_unlock(&ftrace_max_lock);
2137 raw_local_irq_enable(); 2719 local_irq_enable();
2138 2720
2139 tracing_cpumask = tracing_cpumask_new; 2721 tracing_cpumask = tracing_cpumask_new;
2140 2722
@@ -2155,13 +2737,16 @@ static struct file_operations tracing_cpumask_fops = {
2155}; 2737};
2156 2738
2157static ssize_t 2739static ssize_t
2158tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2740tracing_trace_options_read(struct file *filp, char __user *ubuf,
2159 size_t cnt, loff_t *ppos) 2741 size_t cnt, loff_t *ppos)
2160{ 2742{
2743 int i;
2161 char *buf; 2744 char *buf;
2162 int r = 0; 2745 int r = 0;
2163 int len = 0; 2746 int len = 0;
2164 int i; 2747 u32 tracer_flags = current_trace->flags->val;
2748 struct tracer_opt *trace_opts = current_trace->flags->opts;
2749
2165 2750
2166 /* calulate max size */ 2751 /* calulate max size */
2167 for (i = 0; trace_options[i]; i++) { 2752 for (i = 0; trace_options[i]; i++) {
@@ -2169,6 +2754,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2169 len += 3; /* "no" and space */ 2754 len += 3; /* "no" and space */
2170 } 2755 }
2171 2756
2757 /*
2758 * Increase the size with names of options specific
2759 * of the current tracer.
2760 */
2761 for (i = 0; trace_opts[i].name; i++) {
2762 len += strlen(trace_opts[i].name);
2763 len += 3; /* "no" and space */
2764 }
2765
2172 /* +2 for \n and \0 */ 2766 /* +2 for \n and \0 */
2173 buf = kmalloc(len + 2, GFP_KERNEL); 2767 buf = kmalloc(len + 2, GFP_KERNEL);
2174 if (!buf) 2768 if (!buf)
@@ -2181,6 +2775,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2181 r += sprintf(buf + r, "no%s ", trace_options[i]); 2775 r += sprintf(buf + r, "no%s ", trace_options[i]);
2182 } 2776 }
2183 2777
2778 for (i = 0; trace_opts[i].name; i++) {
2779 if (tracer_flags & trace_opts[i].bit)
2780 r += sprintf(buf + r, "%s ",
2781 trace_opts[i].name);
2782 else
2783 r += sprintf(buf + r, "no%s ",
2784 trace_opts[i].name);
2785 }
2786
2184 r += sprintf(buf + r, "\n"); 2787 r += sprintf(buf + r, "\n");
2185 WARN_ON(r >= len + 2); 2788 WARN_ON(r >= len + 2);
2186 2789
@@ -2191,13 +2794,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2191 return r; 2794 return r;
2192} 2795}
2193 2796
2797/* Try to assign a tracer specific option */
2798static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2799{
2800 struct tracer_flags *trace_flags = trace->flags;
2801 struct tracer_opt *opts = NULL;
2802 int ret = 0, i = 0;
2803 int len;
2804
2805 for (i = 0; trace_flags->opts[i].name; i++) {
2806 opts = &trace_flags->opts[i];
2807 len = strlen(opts->name);
2808
2809 if (strncmp(cmp, opts->name, len) == 0) {
2810 ret = trace->set_flag(trace_flags->val,
2811 opts->bit, !neg);
2812 break;
2813 }
2814 }
2815 /* Not found */
2816 if (!trace_flags->opts[i].name)
2817 return -EINVAL;
2818
2819 /* Refused to handle */
2820 if (ret)
2821 return ret;
2822
2823 if (neg)
2824 trace_flags->val &= ~opts->bit;
2825 else
2826 trace_flags->val |= opts->bit;
2827
2828 return 0;
2829}
2830
2194static ssize_t 2831static ssize_t
2195tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2832tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2196 size_t cnt, loff_t *ppos) 2833 size_t cnt, loff_t *ppos)
2197{ 2834{
2198 char buf[64]; 2835 char buf[64];
2199 char *cmp = buf; 2836 char *cmp = buf;
2200 int neg = 0; 2837 int neg = 0;
2838 int ret;
2201 int i; 2839 int i;
2202 2840
2203 if (cnt >= sizeof(buf)) 2841 if (cnt >= sizeof(buf))
@@ -2224,11 +2862,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2224 break; 2862 break;
2225 } 2863 }
2226 } 2864 }
2227 /* 2865
2228 * If no option could be set, return an error: 2866 /* If no option could be set, test the specific tracer options */
2229 */ 2867 if (!trace_options[i]) {
2230 if (!trace_options[i]) 2868 ret = set_tracer_option(current_trace, cmp, neg);
2231 return -EINVAL; 2869 if (ret)
2870 return ret;
2871 }
2232 2872
2233 filp->f_pos += cnt; 2873 filp->f_pos += cnt;
2234 2874
@@ -2237,8 +2877,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2237 2877
2238static struct file_operations tracing_iter_fops = { 2878static struct file_operations tracing_iter_fops = {
2239 .open = tracing_open_generic, 2879 .open = tracing_open_generic,
2240 .read = tracing_iter_ctrl_read, 2880 .read = tracing_trace_options_read,
2241 .write = tracing_iter_ctrl_write, 2881 .write = tracing_trace_options_write,
2242}; 2882};
2243 2883
2244static const char readme_msg[] = 2884static const char readme_msg[] =
@@ -2252,9 +2892,9 @@ static const char readme_msg[] =
2252 "# echo sched_switch > /debug/tracing/current_tracer\n" 2892 "# echo sched_switch > /debug/tracing/current_tracer\n"
2253 "# cat /debug/tracing/current_tracer\n" 2893 "# cat /debug/tracing/current_tracer\n"
2254 "sched_switch\n" 2894 "sched_switch\n"
2255 "# cat /debug/tracing/iter_ctrl\n" 2895 "# cat /debug/tracing/trace_options\n"
2256 "noprint-parent nosym-offset nosym-addr noverbose\n" 2896 "noprint-parent nosym-offset nosym-addr noverbose\n"
2257 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2897 "# echo print-parent > /debug/tracing/trace_options\n"
2258 "# echo 1 > /debug/tracing/tracing_enabled\n" 2898 "# echo 1 > /debug/tracing/tracing_enabled\n"
2259 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2899 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2260 "echo 0 > /debug/tracing/tracing_enabled\n" 2900 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2277,11 +2917,10 @@ static ssize_t
2277tracing_ctrl_read(struct file *filp, char __user *ubuf, 2917tracing_ctrl_read(struct file *filp, char __user *ubuf,
2278 size_t cnt, loff_t *ppos) 2918 size_t cnt, loff_t *ppos)
2279{ 2919{
2280 struct trace_array *tr = filp->private_data;
2281 char buf[64]; 2920 char buf[64];
2282 int r; 2921 int r;
2283 2922
2284 r = sprintf(buf, "%ld\n", tr->ctrl); 2923 r = sprintf(buf, "%u\n", tracer_enabled);
2285 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2924 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2286} 2925}
2287 2926
@@ -2309,16 +2948,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2309 val = !!val; 2948 val = !!val;
2310 2949
2311 mutex_lock(&trace_types_lock); 2950 mutex_lock(&trace_types_lock);
2312 if (tr->ctrl ^ val) { 2951 if (tracer_enabled ^ val) {
2313 if (val) 2952 if (val) {
2314 tracer_enabled = 1; 2953 tracer_enabled = 1;
2315 else 2954 if (current_trace->start)
2955 current_trace->start(tr);
2956 tracing_start();
2957 } else {
2316 tracer_enabled = 0; 2958 tracer_enabled = 0;
2317 2959 tracing_stop();
2318 tr->ctrl = val; 2960 if (current_trace->stop)
2319 2961 current_trace->stop(tr);
2320 if (current_trace && current_trace->ctrl_update) 2962 }
2321 current_trace->ctrl_update(tr);
2322 } 2963 }
2323 mutex_unlock(&trace_types_lock); 2964 mutex_unlock(&trace_types_lock);
2324 2965
@@ -2344,14 +2985,52 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2344 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2985 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2345} 2986}
2346 2987
2988static int tracing_set_tracer(char *buf)
2989{
2990 struct trace_array *tr = &global_trace;
2991 struct tracer *t;
2992 int ret = 0;
2993
2994 mutex_lock(&trace_types_lock);
2995 for (t = trace_types; t; t = t->next) {
2996 if (strcmp(t->name, buf) == 0)
2997 break;
2998 }
2999 if (!t) {
3000 ret = -EINVAL;
3001 goto out;
3002 }
3003 if (t == current_trace)
3004 goto out;
3005
3006 trace_branch_disable();
3007 if (current_trace && current_trace->reset)
3008 current_trace->reset(tr);
3009
3010 current_trace = t;
3011 if (t->init) {
3012 ret = t->init(tr);
3013 if (ret)
3014 goto out;
3015 }
3016
3017 trace_branch_enable(tr);
3018 out:
3019 mutex_unlock(&trace_types_lock);
3020
3021 return ret;
3022}
3023
2347static ssize_t 3024static ssize_t
2348tracing_set_trace_write(struct file *filp, const char __user *ubuf, 3025tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2349 size_t cnt, loff_t *ppos) 3026 size_t cnt, loff_t *ppos)
2350{ 3027{
2351 struct trace_array *tr = &global_trace;
2352 struct tracer *t;
2353 char buf[max_tracer_type_len+1]; 3028 char buf[max_tracer_type_len+1];
2354 int i; 3029 int i;
3030 size_t ret;
3031 int err;
3032
3033 ret = cnt;
2355 3034
2356 if (cnt > max_tracer_type_len) 3035 if (cnt > max_tracer_type_len)
2357 cnt = max_tracer_type_len; 3036 cnt = max_tracer_type_len;
@@ -2365,27 +3044,13 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2365 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) 3044 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2366 buf[i] = 0; 3045 buf[i] = 0;
2367 3046
2368 mutex_lock(&trace_types_lock); 3047 err = tracing_set_tracer(buf);
2369 for (t = trace_types; t; t = t->next) { 3048 if (err)
2370 if (strcmp(t->name, buf) == 0) 3049 return err;
2371 break;
2372 }
2373 if (!t || t == current_trace)
2374 goto out;
2375
2376 if (current_trace && current_trace->reset)
2377 current_trace->reset(tr);
2378
2379 current_trace = t;
2380 if (t->init)
2381 t->init(tr);
2382 3050
2383 out: 3051 filp->f_pos += ret;
2384 mutex_unlock(&trace_types_lock);
2385 3052
2386 filp->f_pos += cnt; 3053 return ret;
2387
2388 return cnt;
2389} 3054}
2390 3055
2391static ssize_t 3056static ssize_t
@@ -2450,6 +3115,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2450 return -ENOMEM; 3115 return -ENOMEM;
2451 3116
2452 mutex_lock(&trace_types_lock); 3117 mutex_lock(&trace_types_lock);
3118
3119 /* trace pipe does not show start of buffer */
3120 cpus_setall(iter->started);
3121
2453 iter->tr = &global_trace; 3122 iter->tr = &global_trace;
2454 iter->trace = current_trace; 3123 iter->trace = current_trace;
2455 filp->private_data = iter; 3124 filp->private_data = iter;
@@ -2500,20 +3169,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2500 size_t cnt, loff_t *ppos) 3169 size_t cnt, loff_t *ppos)
2501{ 3170{
2502 struct trace_iterator *iter = filp->private_data; 3171 struct trace_iterator *iter = filp->private_data;
2503 struct trace_array_cpu *data;
2504 static cpumask_t mask;
2505 unsigned long flags;
2506#ifdef CONFIG_FTRACE
2507 int ftrace_save;
2508#endif
2509 int cpu;
2510 ssize_t sret; 3172 ssize_t sret;
2511 3173
2512 /* return any leftover data */ 3174 /* return any leftover data */
2513 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 3175 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2514 if (sret != -EBUSY) 3176 if (sret != -EBUSY)
2515 return sret; 3177 return sret;
2516 sret = 0;
2517 3178
2518 trace_seq_reset(&iter->seq); 3179 trace_seq_reset(&iter->seq);
2519 3180
@@ -2524,6 +3185,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2524 goto out; 3185 goto out;
2525 } 3186 }
2526 3187
3188waitagain:
3189 sret = 0;
2527 while (trace_empty(iter)) { 3190 while (trace_empty(iter)) {
2528 3191
2529 if ((filp->f_flags & O_NONBLOCK)) { 3192 if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +3251,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2588 offsetof(struct trace_iterator, seq)); 3251 offsetof(struct trace_iterator, seq));
2589 iter->pos = -1; 3252 iter->pos = -1;
2590 3253
2591 /*
2592 * We need to stop all tracing on all CPUS to read the
2593 * the next buffer. This is a bit expensive, but is
2594 * not done often. We fill all what we can read,
2595 * and then release the locks again.
2596 */
2597
2598 cpus_clear(mask);
2599 local_irq_save(flags);
2600#ifdef CONFIG_FTRACE
2601 ftrace_save = ftrace_enabled;
2602 ftrace_enabled = 0;
2603#endif
2604 smp_wmb();
2605 for_each_tracing_cpu(cpu) {
2606 data = iter->tr->data[cpu];
2607
2608 if (!head_page(data) || !data->trace_idx)
2609 continue;
2610
2611 atomic_inc(&data->disabled);
2612 cpu_set(cpu, mask);
2613 }
2614
2615 for_each_cpu_mask(cpu, mask) {
2616 data = iter->tr->data[cpu];
2617 __raw_spin_lock(&data->lock);
2618
2619 if (data->overrun > iter->last_overrun[cpu])
2620 iter->overrun[cpu] +=
2621 data->overrun - iter->last_overrun[cpu];
2622 iter->last_overrun[cpu] = data->overrun;
2623 }
2624
2625 while (find_next_entry_inc(iter) != NULL) { 3254 while (find_next_entry_inc(iter) != NULL) {
2626 int ret; 3255 enum print_line_t ret;
2627 int len = iter->seq.len; 3256 int len = iter->seq.len;
2628 3257
2629 ret = print_trace_line(iter); 3258 ret = print_trace_line(iter);
2630 if (!ret) { 3259 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2631 /* don't print partial lines */ 3260 /* don't print partial lines */
2632 iter->seq.len = len; 3261 iter->seq.len = len;
2633 break; 3262 break;
@@ -2639,26 +3268,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2639 break; 3268 break;
2640 } 3269 }
2641 3270
2642 for_each_cpu_mask(cpu, mask) {
2643 data = iter->tr->data[cpu];
2644 __raw_spin_unlock(&data->lock);
2645 }
2646
2647 for_each_cpu_mask(cpu, mask) {
2648 data = iter->tr->data[cpu];
2649 atomic_dec(&data->disabled);
2650 }
2651#ifdef CONFIG_FTRACE
2652 ftrace_enabled = ftrace_save;
2653#endif
2654 local_irq_restore(flags);
2655
2656 /* Now copy what we have to the user */ 3271 /* Now copy what we have to the user */
2657 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 3272 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658 if (iter->seq.readpos >= iter->seq.len) 3273 if (iter->seq.readpos >= iter->seq.len)
2659 trace_seq_reset(&iter->seq); 3274 trace_seq_reset(&iter->seq);
3275
3276 /*
3277 * If there was nothing to send to user, inspite of consuming trace
3278 * entries, go back to wait for more entries.
3279 */
2660 if (sret == -EBUSY) 3280 if (sret == -EBUSY)
2661 sret = 0; 3281 goto waitagain;
2662 3282
2663out: 3283out:
2664 mutex_unlock(&trace_types_lock); 3284 mutex_unlock(&trace_types_lock);
@@ -2674,7 +3294,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2674 char buf[64]; 3294 char buf[64];
2675 int r; 3295 int r;
2676 3296
2677 r = sprintf(buf, "%lu\n", tr->entries); 3297 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2678 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3298 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2679} 3299}
2680 3300
@@ -2684,7 +3304,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2684{ 3304{
2685 unsigned long val; 3305 unsigned long val;
2686 char buf[64]; 3306 char buf[64];
2687 int i, ret; 3307 int ret, cpu;
2688 3308
2689 if (cnt >= sizeof(buf)) 3309 if (cnt >= sizeof(buf))
2690 return -EINVAL; 3310 return -EINVAL;
@@ -2704,71 +3324,109 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 3324
2705 mutex_lock(&trace_types_lock); 3325 mutex_lock(&trace_types_lock);
2706 3326
2707 if (current_trace != &no_tracer) { 3327 tracing_stop();
2708 cnt = -EBUSY;
2709 pr_info("ftrace: set current_tracer to none"
2710 " before modifying buffer size\n");
2711 goto out;
2712 }
2713
2714 if (val > global_trace.entries) {
2715 long pages_requested;
2716 unsigned long freeable_pages;
2717
2718 /* make sure we have enough memory before mapping */
2719 pages_requested =
2720 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2721
2722 /* account for each buffer (and max_tr) */
2723 pages_requested *= tracing_nr_buffers * 2;
2724 3328
2725 /* Check for overflow */ 3329 /* disable all cpu buffers */
2726 if (pages_requested < 0) { 3330 for_each_tracing_cpu(cpu) {
2727 cnt = -ENOMEM; 3331 if (global_trace.data[cpu])
2728 goto out; 3332 atomic_inc(&global_trace.data[cpu]->disabled);
2729 } 3333 if (max_tr.data[cpu])
3334 atomic_inc(&max_tr.data[cpu]->disabled);
3335 }
2730 3336
2731 freeable_pages = determine_dirtyable_memory(); 3337 /* value is in KB */
3338 val <<= 10;
2732 3339
2733 /* we only allow to request 1/4 of useable memory */ 3340 if (val != global_trace.entries) {
2734 if (pages_requested > 3341 ret = ring_buffer_resize(global_trace.buffer, val);
2735 ((freeable_pages + tracing_pages_allocated) / 4)) { 3342 if (ret < 0) {
2736 cnt = -ENOMEM; 3343 cnt = ret;
2737 goto out; 3344 goto out;
2738 } 3345 }
2739 3346
2740 while (global_trace.entries < val) { 3347 ret = ring_buffer_resize(max_tr.buffer, val);
2741 if (trace_alloc_page()) { 3348 if (ret < 0) {
2742 cnt = -ENOMEM; 3349 int r;
2743 goto out; 3350 cnt = ret;
3351 r = ring_buffer_resize(global_trace.buffer,
3352 global_trace.entries);
3353 if (r < 0) {
3354 /* AARGH! We are left with different
3355 * size max buffer!!!! */
3356 WARN_ON(1);
3357 tracing_disabled = 1;
2744 } 3358 }
2745 /* double check that we don't go over the known pages */ 3359 goto out;
2746 if (tracing_pages_allocated > pages_requested)
2747 break;
2748 } 3360 }
2749 3361
2750 } else { 3362 global_trace.entries = val;
2751 /* include the number of entries in val (inc of page entries) */
2752 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2753 trace_free_page();
2754 } 3363 }
2755 3364
2756 /* check integrity */
2757 for_each_tracing_cpu(i)
2758 check_pages(global_trace.data[i]);
2759
2760 filp->f_pos += cnt; 3365 filp->f_pos += cnt;
2761 3366
2762 /* If check pages failed, return ENOMEM */ 3367 /* If check pages failed, return ENOMEM */
2763 if (tracing_disabled) 3368 if (tracing_disabled)
2764 cnt = -ENOMEM; 3369 cnt = -ENOMEM;
2765 out: 3370 out:
3371 for_each_tracing_cpu(cpu) {
3372 if (global_trace.data[cpu])
3373 atomic_dec(&global_trace.data[cpu]->disabled);
3374 if (max_tr.data[cpu])
3375 atomic_dec(&max_tr.data[cpu]->disabled);
3376 }
3377
3378 tracing_start();
2766 max_tr.entries = global_trace.entries; 3379 max_tr.entries = global_trace.entries;
2767 mutex_unlock(&trace_types_lock); 3380 mutex_unlock(&trace_types_lock);
2768 3381
2769 return cnt; 3382 return cnt;
2770} 3383}
2771 3384
3385static int mark_printk(const char *fmt, ...)
3386{
3387 int ret;
3388 va_list args;
3389 va_start(args, fmt);
3390 ret = trace_vprintk(0, -1, fmt, args);
3391 va_end(args);
3392 return ret;
3393}
3394
3395static ssize_t
3396tracing_mark_write(struct file *filp, const char __user *ubuf,
3397 size_t cnt, loff_t *fpos)
3398{
3399 char *buf;
3400 char *end;
3401
3402 if (tracing_disabled)
3403 return -EINVAL;
3404
3405 if (cnt > TRACE_BUF_SIZE)
3406 cnt = TRACE_BUF_SIZE;
3407
3408 buf = kmalloc(cnt + 1, GFP_KERNEL);
3409 if (buf == NULL)
3410 return -ENOMEM;
3411
3412 if (copy_from_user(buf, ubuf, cnt)) {
3413 kfree(buf);
3414 return -EFAULT;
3415 }
3416
3417 /* Cut from the first nil or newline. */
3418 buf[cnt] = '\0';
3419 end = strchr(buf, '\n');
3420 if (end)
3421 *end = '\0';
3422
3423 cnt = mark_printk("%s\n", buf);
3424 kfree(buf);
3425 *fpos += cnt;
3426
3427 return cnt;
3428}
3429
2772static struct file_operations tracing_max_lat_fops = { 3430static struct file_operations tracing_max_lat_fops = {
2773 .open = tracing_open_generic, 3431 .open = tracing_open_generic,
2774 .read = tracing_max_lat_read, 3432 .read = tracing_max_lat_read,
@@ -2800,24 +3458,45 @@ static struct file_operations tracing_entries_fops = {
2800 .write = tracing_entries_write, 3458 .write = tracing_entries_write,
2801}; 3459};
2802 3460
3461static struct file_operations tracing_mark_fops = {
3462 .open = tracing_open_generic,
3463 .write = tracing_mark_write,
3464};
3465
2803#ifdef CONFIG_DYNAMIC_FTRACE 3466#ifdef CONFIG_DYNAMIC_FTRACE
2804 3467
3468int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3469{
3470 return 0;
3471}
3472
2805static ssize_t 3473static ssize_t
2806tracing_read_long(struct file *filp, char __user *ubuf, 3474tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2807 size_t cnt, loff_t *ppos) 3475 size_t cnt, loff_t *ppos)
2808{ 3476{
3477 static char ftrace_dyn_info_buffer[1024];
3478 static DEFINE_MUTEX(dyn_info_mutex);
2809 unsigned long *p = filp->private_data; 3479 unsigned long *p = filp->private_data;
2810 char buf[64]; 3480 char *buf = ftrace_dyn_info_buffer;
3481 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2811 int r; 3482 int r;
2812 3483
2813 r = sprintf(buf, "%ld\n", *p); 3484 mutex_lock(&dyn_info_mutex);
3485 r = sprintf(buf, "%ld ", *p);
2814 3486
2815 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3487 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3488 buf[r++] = '\n';
3489
3490 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3491
3492 mutex_unlock(&dyn_info_mutex);
3493
3494 return r;
2816} 3495}
2817 3496
2818static struct file_operations tracing_read_long_fops = { 3497static struct file_operations tracing_dyn_info_fops = {
2819 .open = tracing_open_generic, 3498 .open = tracing_open_generic,
2820 .read = tracing_read_long, 3499 .read = tracing_read_dyn_info,
2821}; 3500};
2822#endif 3501#endif
2823 3502
@@ -2846,7 +3525,7 @@ struct dentry *tracing_init_dentry(void)
2846#include "trace_selftest.c" 3525#include "trace_selftest.c"
2847#endif 3526#endif
2848 3527
2849static __init void tracer_init_debugfs(void) 3528static __init int tracer_init_debugfs(void)
2850{ 3529{
2851 struct dentry *d_tracer; 3530 struct dentry *d_tracer;
2852 struct dentry *entry; 3531 struct dentry *entry;
@@ -2858,10 +3537,10 @@ static __init void tracer_init_debugfs(void)
2858 if (!entry) 3537 if (!entry)
2859 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3538 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2860 3539
2861 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3540 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2862 NULL, &tracing_iter_fops); 3541 NULL, &tracing_iter_fops);
2863 if (!entry) 3542 if (!entry)
2864 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3543 pr_warning("Could not create debugfs 'trace_options' entry\n");
2865 3544
2866 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3545 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2867 NULL, &tracing_cpumask_fops); 3546 NULL, &tracing_cpumask_fops);
@@ -2881,12 +3560,12 @@ static __init void tracer_init_debugfs(void)
2881 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 3560 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2882 &global_trace, &show_traces_fops); 3561 &global_trace, &show_traces_fops);
2883 if (!entry) 3562 if (!entry)
2884 pr_warning("Could not create debugfs 'trace' entry\n"); 3563 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2885 3564
2886 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 3565 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2887 &global_trace, &set_tracer_fops); 3566 &global_trace, &set_tracer_fops);
2888 if (!entry) 3567 if (!entry)
2889 pr_warning("Could not create debugfs 'trace' entry\n"); 3568 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2890 3569
2891 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, 3570 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2892 &tracing_max_latency, 3571 &tracing_max_latency,
@@ -2899,7 +3578,7 @@ static __init void tracer_init_debugfs(void)
2899 &tracing_thresh, &tracing_max_lat_fops); 3578 &tracing_thresh, &tracing_max_lat_fops);
2900 if (!entry) 3579 if (!entry)
2901 pr_warning("Could not create debugfs " 3580 pr_warning("Could not create debugfs "
2902 "'tracing_threash' entry\n"); 3581 "'tracing_thresh' entry\n");
2903 entry = debugfs_create_file("README", 0644, d_tracer, 3582 entry = debugfs_create_file("README", 0644, d_tracer,
2904 NULL, &tracing_readme_fops); 3583 NULL, &tracing_readme_fops);
2905 if (!entry) 3584 if (!entry)
@@ -2909,18 +3588,24 @@ static __init void tracer_init_debugfs(void)
2909 NULL, &tracing_pipe_fops); 3588 NULL, &tracing_pipe_fops);
2910 if (!entry) 3589 if (!entry)
2911 pr_warning("Could not create debugfs " 3590 pr_warning("Could not create debugfs "
2912 "'tracing_threash' entry\n"); 3591 "'trace_pipe' entry\n");
2913 3592
2914 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3593 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2915 &global_trace, &tracing_entries_fops); 3594 &global_trace, &tracing_entries_fops);
2916 if (!entry) 3595 if (!entry)
2917 pr_warning("Could not create debugfs " 3596 pr_warning("Could not create debugfs "
2918 "'tracing_threash' entry\n"); 3597 "'buffer_size_kb' entry\n");
3598
3599 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3600 NULL, &tracing_mark_fops);
3601 if (!entry)
3602 pr_warning("Could not create debugfs "
3603 "'trace_marker' entry\n");
2919 3604
2920#ifdef CONFIG_DYNAMIC_FTRACE 3605#ifdef CONFIG_DYNAMIC_FTRACE
2921 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3606 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2922 &ftrace_update_tot_cnt, 3607 &ftrace_update_tot_cnt,
2923 &tracing_read_long_fops); 3608 &tracing_dyn_info_fops);
2924 if (!entry) 3609 if (!entry)
2925 pr_warning("Could not create debugfs " 3610 pr_warning("Could not create debugfs "
2926 "'dyn_ftrace_total_info' entry\n"); 3611 "'dyn_ftrace_total_info' entry\n");
@@ -2928,230 +3613,268 @@ static __init void tracer_init_debugfs(void)
2928#ifdef CONFIG_SYSPROF_TRACER 3613#ifdef CONFIG_SYSPROF_TRACER
2929 init_tracer_sysprof_debugfs(d_tracer); 3614 init_tracer_sysprof_debugfs(d_tracer);
2930#endif 3615#endif
3616 return 0;
2931} 3617}
2932 3618
2933static int trace_alloc_page(void) 3619int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
2934{ 3620{
3621 static DEFINE_SPINLOCK(trace_buf_lock);
3622 static char trace_buf[TRACE_BUF_SIZE];
3623
3624 struct ring_buffer_event *event;
3625 struct trace_array *tr = &global_trace;
2935 struct trace_array_cpu *data; 3626 struct trace_array_cpu *data;
2936 struct page *page, *tmp; 3627 int cpu, len = 0, size, pc;
2937 LIST_HEAD(pages); 3628 struct print_entry *entry;
2938 void *array; 3629 unsigned long irq_flags;
2939 unsigned pages_allocated = 0;
2940 int i;
2941 3630
2942 /* first allocate a page for each CPU */ 3631 if (tracing_disabled || tracing_selftest_running)
2943 for_each_tracing_cpu(i) { 3632 return 0;
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_pages;
2949 }
2950 3633
2951 pages_allocated++; 3634 pc = preempt_count();
2952 page = virt_to_page(array); 3635 preempt_disable_notrace();
2953 list_add(&page->lru, &pages); 3636 cpu = raw_smp_processor_id();
3637 data = tr->data[cpu];
2954 3638
2955/* Only allocate if we are actually using the max trace */ 3639 if (unlikely(atomic_read(&data->disabled)))
2956#ifdef CONFIG_TRACER_MAX_TRACE 3640 goto out;
2957 array = (void *)__get_free_page(GFP_KERNEL);
2958 if (array == NULL) {
2959 printk(KERN_ERR "tracer: failed to allocate page"
2960 "for trace buffer!\n");
2961 goto free_pages;
2962 }
2963 pages_allocated++;
2964 page = virt_to_page(array);
2965 list_add(&page->lru, &pages);
2966#endif
2967 }
2968 3641
2969 /* Now that we successfully allocate a page per CPU, add them */ 3642 pause_graph_tracing();
2970 for_each_tracing_cpu(i) { 3643 spin_lock_irqsave(&trace_buf_lock, irq_flags);
2971 data = global_trace.data[i]; 3644 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2972 page = list_entry(pages.next, struct page, lru); 3645
2973 list_del_init(&page->lru); 3646 len = min(len, TRACE_BUF_SIZE-1);
2974 list_add_tail(&page->lru, &data->trace_pages); 3647 trace_buf[len] = 0;
2975 ClearPageLRU(page); 3648
3649 size = sizeof(*entry) + len + 1;
3650 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3651 if (!event)
3652 goto out_unlock;
3653 entry = ring_buffer_event_data(event);
3654 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3655 entry->ent.type = TRACE_PRINT;
3656 entry->ip = ip;
3657 entry->depth = depth;
3658
3659 memcpy(&entry->buf, trace_buf, len);
3660 entry->buf[len] = 0;
3661 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3662
3663 out_unlock:
3664 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3665 unpause_graph_tracing();
3666 out:
3667 preempt_enable_notrace();
2976 3668
2977#ifdef CONFIG_TRACER_MAX_TRACE 3669 return len;
2978 data = max_tr.data[i]; 3670}
2979 page = list_entry(pages.next, struct page, lru); 3671EXPORT_SYMBOL_GPL(trace_vprintk);
2980 list_del_init(&page->lru);
2981 list_add_tail(&page->lru, &data->trace_pages);
2982 SetPageLRU(page);
2983#endif
2984 }
2985 tracing_pages_allocated += pages_allocated;
2986 global_trace.entries += ENTRIES_PER_PAGE;
2987 3672
2988 return 0; 3673int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3674{
3675 int ret;
3676 va_list ap;
2989 3677
2990 free_pages: 3678 if (!(trace_flags & TRACE_ITER_PRINTK))
2991 list_for_each_entry_safe(page, tmp, &pages, lru) { 3679 return 0;
2992 list_del_init(&page->lru); 3680
2993 __free_page(page); 3681 va_start(ap, fmt);
3682 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3683 va_end(ap);
3684 return ret;
3685}
3686EXPORT_SYMBOL_GPL(__ftrace_printk);
3687
3688static int trace_panic_handler(struct notifier_block *this,
3689 unsigned long event, void *unused)
3690{
3691 if (ftrace_dump_on_oops)
3692 ftrace_dump();
3693 return NOTIFY_OK;
3694}
3695
3696static struct notifier_block trace_panic_notifier = {
3697 .notifier_call = trace_panic_handler,
3698 .next = NULL,
3699 .priority = 150 /* priority: INT_MAX >= x >= 0 */
3700};
3701
3702static int trace_die_handler(struct notifier_block *self,
3703 unsigned long val,
3704 void *data)
3705{
3706 switch (val) {
3707 case DIE_OOPS:
3708 if (ftrace_dump_on_oops)
3709 ftrace_dump();
3710 break;
3711 default:
3712 break;
2994 } 3713 }
2995 return -ENOMEM; 3714 return NOTIFY_OK;
3715}
3716
3717static struct notifier_block trace_die_notifier = {
3718 .notifier_call = trace_die_handler,
3719 .priority = 200
3720};
3721
3722/*
3723 * printk is set to max of 1024, we really don't need it that big.
3724 * Nothing should be printing 1000 characters anyway.
3725 */
3726#define TRACE_MAX_PRINT 1000
3727
3728/*
3729 * Define here KERN_TRACE so that we have one place to modify
3730 * it if we decide to change what log level the ftrace dump
3731 * should be at.
3732 */
3733#define KERN_TRACE KERN_INFO
3734
3735static void
3736trace_printk_seq(struct trace_seq *s)
3737{
3738 /* Probably should print a warning here. */
3739 if (s->len >= 1000)
3740 s->len = 1000;
3741
3742 /* should be zero ended, but we are paranoid. */
3743 s->buffer[s->len] = 0;
3744
3745 printk(KERN_TRACE "%s", s->buffer);
3746
3747 trace_seq_reset(s);
2996} 3748}
2997 3749
2998static int trace_free_page(void) 3750void ftrace_dump(void)
2999{ 3751{
3000 struct trace_array_cpu *data; 3752 static DEFINE_SPINLOCK(ftrace_dump_lock);
3001 struct page *page; 3753 /* use static because iter can be a bit big for the stack */
3002 struct list_head *p; 3754 static struct trace_iterator iter;
3003 int i; 3755 static cpumask_t mask;
3004 int ret = 0; 3756 static int dump_ran;
3757 unsigned long flags;
3758 int cnt = 0, cpu;
3005 3759
3006 /* free one page from each buffer */ 3760 /* only one dump */
3007 for_each_tracing_cpu(i) { 3761 spin_lock_irqsave(&ftrace_dump_lock, flags);
3008 data = global_trace.data[i]; 3762 if (dump_ran)
3009 p = data->trace_pages.next; 3763 goto out;
3010 if (p == &data->trace_pages) {
3011 /* should never happen */
3012 WARN_ON(1);
3013 tracing_disabled = 1;
3014 ret = -1;
3015 break;
3016 }
3017 page = list_entry(p, struct page, lru);
3018 ClearPageLRU(page);
3019 list_del(&page->lru);
3020 tracing_pages_allocated--;
3021 tracing_pages_allocated--;
3022 __free_page(page);
3023 3764
3024 tracing_reset(data); 3765 dump_ran = 1;
3025 3766
3026#ifdef CONFIG_TRACER_MAX_TRACE 3767 /* No turning back! */
3027 data = max_tr.data[i]; 3768 ftrace_kill();
3028 p = data->trace_pages.next; 3769
3029 if (p == &data->trace_pages) { 3770 for_each_tracing_cpu(cpu) {
3030 /* should never happen */ 3771 atomic_inc(&global_trace.data[cpu]->disabled);
3031 WARN_ON(1); 3772 }
3032 tracing_disabled = 1; 3773
3033 ret = -1; 3774 /* don't look at user memory in panic mode */
3034 break; 3775 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3776
3777 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3778
3779 iter.tr = &global_trace;
3780 iter.trace = current_trace;
3781
3782 /*
3783 * We need to stop all tracing on all CPUS to read the
3784 * the next buffer. This is a bit expensive, but is
3785 * not done often. We fill all what we can read,
3786 * and then release the locks again.
3787 */
3788
3789 cpus_clear(mask);
3790
3791 while (!trace_empty(&iter)) {
3792
3793 if (!cnt)
3794 printk(KERN_TRACE "---------------------------------\n");
3795
3796 cnt++;
3797
3798 /* reset all but tr, trace, and overruns */
3799 memset(&iter.seq, 0,
3800 sizeof(struct trace_iterator) -
3801 offsetof(struct trace_iterator, seq));
3802 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3803 iter.pos = -1;
3804
3805 if (find_next_entry_inc(&iter) != NULL) {
3806 print_trace_line(&iter);
3807 trace_consume(&iter);
3035 } 3808 }
3036 page = list_entry(p, struct page, lru);
3037 ClearPageLRU(page);
3038 list_del(&page->lru);
3039 __free_page(page);
3040 3809
3041 tracing_reset(data); 3810 trace_printk_seq(&iter.seq);
3042#endif
3043 } 3811 }
3044 global_trace.entries -= ENTRIES_PER_PAGE;
3045 3812
3046 return ret; 3813 if (!cnt)
3814 printk(KERN_TRACE " (ftrace buffer empty)\n");
3815 else
3816 printk(KERN_TRACE "---------------------------------\n");
3817
3818 out:
3819 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3047} 3820}
3048 3821
3049__init static int tracer_alloc_buffers(void) 3822__init static int tracer_alloc_buffers(void)
3050{ 3823{
3051 struct trace_array_cpu *data; 3824 struct trace_array_cpu *data;
3052 void *array;
3053 struct page *page;
3054 int pages = 0;
3055 int ret = -ENOMEM;
3056 int i; 3825 int i;
3057 3826
3058 /* TODO: make the number of buffers hot pluggable with CPUS */ 3827 /* TODO: make the number of buffers hot pluggable with CPUS */
3059 tracing_nr_buffers = num_possible_cpus();
3060 tracing_buffer_mask = cpu_possible_map; 3828 tracing_buffer_mask = cpu_possible_map;
3061 3829
3062 /* Allocate the first page for all buffers */ 3830 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3063 for_each_tracing_cpu(i) { 3831 TRACE_BUFFER_FLAGS);
3064 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 3832 if (!global_trace.buffer) {
3065 max_tr.data[i] = &per_cpu(max_data, i); 3833 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3066 3834 WARN_ON(1);
3067 array = (void *)__get_free_page(GFP_KERNEL); 3835 return 0;
3068 if (array == NULL) { 3836 }
3069 printk(KERN_ERR "tracer: failed to allocate page" 3837 global_trace.entries = ring_buffer_size(global_trace.buffer);
3070 "for trace buffer!\n");
3071 goto free_buffers;
3072 }
3073
3074 /* set the array to the list */
3075 INIT_LIST_HEAD(&data->trace_pages);
3076 page = virt_to_page(array);
3077 list_add(&page->lru, &data->trace_pages);
3078 /* use the LRU flag to differentiate the two buffers */
3079 ClearPageLRU(page);
3080
3081 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3082 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3083 3838
3084/* Only allocate if we are actually using the max trace */
3085#ifdef CONFIG_TRACER_MAX_TRACE 3839#ifdef CONFIG_TRACER_MAX_TRACE
3086 array = (void *)__get_free_page(GFP_KERNEL); 3840 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3087 if (array == NULL) { 3841 TRACE_BUFFER_FLAGS);
3088 printk(KERN_ERR "tracer: failed to allocate page" 3842 if (!max_tr.buffer) {
3089 "for trace buffer!\n"); 3843 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3090 goto free_buffers; 3844 WARN_ON(1);
3091 } 3845 ring_buffer_free(global_trace.buffer);
3092 3846 return 0;
3093 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3094 page = virt_to_page(array);
3095 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3096 SetPageLRU(page);
3097#endif
3098 } 3847 }
3848 max_tr.entries = ring_buffer_size(max_tr.buffer);
3849 WARN_ON(max_tr.entries != global_trace.entries);
3850#endif
3099 3851
3100 /* 3852 /* Allocate the first page for all buffers */
3101 * Since we allocate by orders of pages, we may be able to 3853 for_each_tracing_cpu(i) {
3102 * round up a bit. 3854 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3103 */ 3855 max_tr.data[i] = &per_cpu(max_data, i);
3104 global_trace.entries = ENTRIES_PER_PAGE;
3105 pages++;
3106
3107 while (global_trace.entries < trace_nr_entries) {
3108 if (trace_alloc_page())
3109 break;
3110 pages++;
3111 } 3856 }
3112 max_tr.entries = global_trace.entries;
3113
3114 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3115 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3116 pr_info(" actual entries %ld\n", global_trace.entries);
3117
3118 tracer_init_debugfs();
3119 3857
3120 trace_init_cmdlines(); 3858 trace_init_cmdlines();
3121 3859
3122 register_tracer(&no_tracer); 3860 register_tracer(&nop_trace);
3123 current_trace = &no_tracer; 3861#ifdef CONFIG_BOOT_TRACER
3862 register_tracer(&boot_tracer);
3863 current_trace = &boot_tracer;
3864 current_trace->init(&global_trace);
3865#else
3866 current_trace = &nop_trace;
3867#endif
3124 3868
3125 /* All seems OK, enable tracing */ 3869 /* All seems OK, enable tracing */
3126 global_trace.ctrl = tracer_enabled;
3127 tracing_disabled = 0; 3870 tracing_disabled = 0;
3128 3871
3129 return 0; 3872 atomic_notifier_chain_register(&panic_notifier_list,
3873 &trace_panic_notifier);
3130 3874
3131 free_buffers: 3875 register_die_notifier(&trace_die_notifier);
3132 for (i-- ; i >= 0; i--) {
3133 struct page *page, *tmp;
3134 struct trace_array_cpu *data = global_trace.data[i];
3135
3136 if (data) {
3137 list_for_each_entry_safe(page, tmp,
3138 &data->trace_pages, lru) {
3139 list_del_init(&page->lru);
3140 __free_page(page);
3141 }
3142 }
3143 3876
3144#ifdef CONFIG_TRACER_MAX_TRACE 3877 return 0;
3145 data = max_tr.data[i];
3146 if (data) {
3147 list_for_each_entry_safe(page, tmp,
3148 &data->trace_pages, lru) {
3149 list_del_init(&page->lru);
3150 __free_page(page);
3151 }
3152 }
3153#endif
3154 }
3155 return ret;
3156} 3878}
3157fs_initcall(tracer_alloc_buffers); 3879early_initcall(tracer_alloc_buffers);
3880fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..cc7a4f864036 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,10 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h>
8#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h>
11#include <trace/boot.h>
9 12
10enum trace_type { 13enum trace_type {
11 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -13,38 +16,80 @@ enum trace_type {
13 TRACE_FN, 16 TRACE_FN,
14 TRACE_CTX, 17 TRACE_CTX,
15 TRACE_WAKE, 18 TRACE_WAKE,
19 TRACE_CONT,
16 TRACE_STACK, 20 TRACE_STACK,
21 TRACE_PRINT,
17 TRACE_SPECIAL, 22 TRACE_SPECIAL,
18 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_GRAPH_RET,
29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES,
32 TRACE_POWER,
20 33
21 __TRACE_LAST_TYPE 34 __TRACE_LAST_TYPE
22}; 35};
23 36
24/* 37/*
38 * The trace entry - the most basic unit of tracing. This is what
39 * is printed in the end as a single line in the trace output, such as:
40 *
41 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
42 */
43struct trace_entry {
44 unsigned char type;
45 unsigned char cpu;
46 unsigned char flags;
47 unsigned char preempt_count;
48 int pid;
49 int tgid;
50};
51
52/*
25 * Function trace entry - function address and parent function addres: 53 * Function trace entry - function address and parent function addres:
26 */ 54 */
27struct ftrace_entry { 55struct ftrace_entry {
56 struct trace_entry ent;
28 unsigned long ip; 57 unsigned long ip;
29 unsigned long parent_ip; 58 unsigned long parent_ip;
30}; 59};
31 60
61/* Function call entry */
62struct ftrace_graph_ent_entry {
63 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent;
65};
66
67/* Function return entry */
68struct ftrace_graph_ret_entry {
69 struct trace_entry ent;
70 struct ftrace_graph_ret ret;
71};
72extern struct tracer boot_tracer;
73
32/* 74/*
33 * Context switch trace entry - which task (and prio) we switched from/to: 75 * Context switch trace entry - which task (and prio) we switched from/to:
34 */ 76 */
35struct ctx_switch_entry { 77struct ctx_switch_entry {
78 struct trace_entry ent;
36 unsigned int prev_pid; 79 unsigned int prev_pid;
37 unsigned char prev_prio; 80 unsigned char prev_prio;
38 unsigned char prev_state; 81 unsigned char prev_state;
39 unsigned int next_pid; 82 unsigned int next_pid;
40 unsigned char next_prio; 83 unsigned char next_prio;
41 unsigned char next_state; 84 unsigned char next_state;
85 unsigned int next_cpu;
42}; 86};
43 87
44/* 88/*
45 * Special (free-form) trace entry: 89 * Special (free-form) trace entry:
46 */ 90 */
47struct special_entry { 91struct special_entry {
92 struct trace_entry ent;
48 unsigned long arg1; 93 unsigned long arg1;
49 unsigned long arg2; 94 unsigned long arg2;
50 unsigned long arg3; 95 unsigned long arg3;
@@ -57,33 +102,94 @@ struct special_entry {
57#define FTRACE_STACK_ENTRIES 8 102#define FTRACE_STACK_ENTRIES 8
58 103
59struct stack_entry { 104struct stack_entry {
105 struct trace_entry ent;
106 unsigned long caller[FTRACE_STACK_ENTRIES];
107};
108
109struct userstack_entry {
110 struct trace_entry ent;
60 unsigned long caller[FTRACE_STACK_ENTRIES]; 111 unsigned long caller[FTRACE_STACK_ENTRIES];
61}; 112};
62 113
63/* 114/*
64 * The trace entry - the most basic unit of tracing. This is what 115 * ftrace_printk entry:
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */ 116 */
69struct trace_entry { 117struct print_entry {
70 char type; 118 struct trace_entry ent;
71 char cpu; 119 unsigned long ip;
72 char flags; 120 int depth;
73 char preempt_count; 121 char buf[];
74 int pid; 122};
75 cycle_t t; 123
76 union { 124#define TRACE_OLD_SIZE 88
77 struct ftrace_entry fn; 125
78 struct ctx_switch_entry ctx; 126struct trace_field_cont {
79 struct special_entry special; 127 unsigned char type;
80 struct stack_entry stack; 128 /* Temporary till we get rid of this completely */
81 struct mmiotrace_rw mmiorw; 129 char buf[TRACE_OLD_SIZE - 1];
82 struct mmiotrace_map mmiomap;
83 };
84}; 130};
85 131
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) 132struct trace_mmiotrace_rw {
133 struct trace_entry ent;
134 struct mmiotrace_rw rw;
135};
136
137struct trace_mmiotrace_map {
138 struct trace_entry ent;
139 struct mmiotrace_map map;
140};
141
142struct trace_boot_call {
143 struct trace_entry ent;
144 struct boot_trace_call boot_call;
145};
146
147struct trace_boot_ret {
148 struct trace_entry ent;
149 struct boot_trace_ret boot_ret;
150};
151
152#define TRACE_FUNC_SIZE 30
153#define TRACE_FILE_SIZE 20
154struct trace_branch {
155 struct trace_entry ent;
156 unsigned line;
157 char func[TRACE_FUNC_SIZE+1];
158 char file[TRACE_FILE_SIZE+1];
159 char correct;
160};
161
162struct hw_branch_entry {
163 struct trace_entry ent;
164 u64 from;
165 u64 to;
166};
167
168struct trace_power {
169 struct trace_entry ent;
170 struct power_trace state_data;
171};
172
173/*
174 * trace_flag_type is an enumeration that holds different
175 * states when a trace occurs. These are:
176 * IRQS_OFF - interrupts were disabled
177 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
178 * NEED_RESCED - reschedule is requested
179 * HARDIRQ - inside an interrupt handler
180 * SOFTIRQ - inside a softirq handler
181 * CONT - multiple entries hold the trace item
182 */
183enum trace_flag_type {
184 TRACE_FLAG_IRQS_OFF = 0x01,
185 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
186 TRACE_FLAG_NEED_RESCHED = 0x04,
187 TRACE_FLAG_HARDIRQ = 0x08,
188 TRACE_FLAG_SOFTIRQ = 0x10,
189 TRACE_FLAG_CONT = 0x20,
190};
191
192#define TRACE_BUF_SIZE 1024
87 193
88/* 194/*
89 * The CPU trace array - it consists of thousands of trace entries 195 * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +197,9 @@ struct trace_entry {
91 * the trace, etc.) 197 * the trace, etc.)
92 */ 198 */
93struct trace_array_cpu { 199struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled; 200 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98 201
99 /* these fields get copied into max-trace: */ 202 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx; 203 unsigned long trace_idx;
105 unsigned long overrun; 204 unsigned long overrun;
106 unsigned long saved_latency; 205 unsigned long saved_latency;
@@ -124,37 +223,123 @@ struct trace_iterator;
124 * They have on/off state as well: 223 * They have on/off state as well:
125 */ 224 */
126struct trace_array { 225struct trace_array {
226 struct ring_buffer *buffer;
127 unsigned long entries; 227 unsigned long entries;
128 long ctrl;
129 int cpu; 228 int cpu;
130 cycle_t time_start; 229 cycle_t time_start;
131 struct task_struct *waiter; 230 struct task_struct *waiter;
132 struct trace_array_cpu *data[NR_CPUS]; 231 struct trace_array_cpu *data[NR_CPUS];
133}; 232};
134 233
234#define FTRACE_CMP_TYPE(var, type) \
235 __builtin_types_compatible_p(typeof(var), type *)
236
237#undef IF_ASSIGN
238#define IF_ASSIGN(var, entry, etype, id) \
239 if (FTRACE_CMP_TYPE(var, etype)) { \
240 var = (typeof(var))(entry); \
241 WARN_ON(id && (entry)->type != id); \
242 break; \
243 }
244
245/* Will cause compile errors if type is not found. */
246extern void __ftrace_bad_type(void);
247
248/*
249 * The trace_assign_type is a verifier that the entry type is
250 * the same as the type being assigned. To add new types simply
251 * add a line with the following format:
252 *
253 * IF_ASSIGN(var, ent, type, id);
254 *
255 * Where "type" is the trace type that includes the trace_entry
256 * as the "ent" item. And "id" is the trace identifier that is
257 * used in the trace_type enum.
258 *
259 * If the type can have more than one id, then use zero.
260 */
261#define trace_assign_type(var, ent) \
262 do { \
263 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
269 IF_ASSIGN(var, ent, struct special_entry, 0); \
270 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
271 TRACE_MMIO_RW); \
272 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
273 TRACE_MMIO_MAP); \
274 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
275 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
276 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
277 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
278 TRACE_GRAPH_ENT); \
279 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
280 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
283 __ftrace_bad_type(); \
284 } while (0)
285
286/* Return values for print_line callback */
287enum print_line_t {
288 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
289 TRACE_TYPE_HANDLED = 1,
290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
291};
292
293
294/*
295 * An option specific to a tracer. This is a boolean value.
296 * The bit is the bit index that sets its value on the
297 * flags value in struct tracer_flags.
298 */
299struct tracer_opt {
300 const char *name; /* Will appear on the trace_options file */
301 u32 bit; /* Mask assigned in val field in tracer_flags */
302};
303
304/*
305 * The set of specific options for a tracer. Your tracer
306 * have to set the initial value of the flags val.
307 */
308struct tracer_flags {
309 u32 val;
310 struct tracer_opt *opts;
311};
312
313/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b
315
135/* 316/*
136 * A specific tracer, represented by methods that operate on a trace array: 317 * A specific tracer, represented by methods that operate on a trace array:
137 */ 318 */
138struct tracer { 319struct tracer {
139 const char *name; 320 const char *name;
140 void (*init)(struct trace_array *tr); 321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr);
141 void (*reset)(struct trace_array *tr); 323 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr);
142 void (*open)(struct trace_iterator *iter); 326 void (*open)(struct trace_iterator *iter);
143 void (*pipe_open)(struct trace_iterator *iter); 327 void (*pipe_open)(struct trace_iterator *iter);
144 void (*close)(struct trace_iterator *iter); 328 void (*close)(struct trace_iterator *iter);
145 void (*start)(struct trace_iterator *iter);
146 void (*stop)(struct trace_iterator *iter);
147 ssize_t (*read)(struct trace_iterator *iter, 329 ssize_t (*read)(struct trace_iterator *iter,
148 struct file *filp, char __user *ubuf, 330 struct file *filp, char __user *ubuf,
149 size_t cnt, loff_t *ppos); 331 size_t cnt, loff_t *ppos);
150 void (*ctrl_update)(struct trace_array *tr);
151#ifdef CONFIG_FTRACE_STARTUP_TEST 332#ifdef CONFIG_FTRACE_STARTUP_TEST
152 int (*selftest)(struct tracer *trace, 333 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr); 334 struct trace_array *tr);
154#endif 335#endif
155 int (*print_line)(struct trace_iterator *iter); 336 void (*print_header)(struct seq_file *m);
337 enum print_line_t (*print_line)(struct trace_iterator *iter);
338 /* If you handled the flag setting, return 0 */
339 int (*set_flag)(u32 old_flags, u32 bit, int set);
156 struct tracer *next; 340 struct tracer *next;
157 int print_max; 341 int print_max;
342 struct tracer_flags *flags;
158}; 343};
159 344
160struct trace_seq { 345struct trace_seq {
@@ -171,60 +356,72 @@ struct trace_iterator {
171 struct trace_array *tr; 356 struct trace_array *tr;
172 struct tracer *trace; 357 struct tracer *trace;
173 void *private; 358 void *private;
174 long last_overrun[NR_CPUS]; 359 struct ring_buffer_iter *buffer_iter[NR_CPUS];
175 long overrun[NR_CPUS];
176 360
177 /* The below is zeroed out in pipe_read */ 361 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq; 362 struct trace_seq seq;
179 struct trace_entry *ent; 363 struct trace_entry *ent;
180 int cpu; 364 int cpu;
181 365 u64 ts;
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184 366
185 unsigned long iter_flags; 367 unsigned long iter_flags;
186 loff_t pos; 368 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx; 369 long idx;
370
371 cpumask_t started;
191}; 372};
192 373
193void tracing_reset(struct trace_array_cpu *data); 374int tracing_is_enabled(void);
375void trace_wake_up(void);
376void tracing_reset(struct trace_array *tr, int cpu);
377void tracing_reset_online_cpus(struct trace_array *tr);
194int tracing_open_generic(struct inode *inode, struct file *filp); 378int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void); 379struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 380void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197 381
382struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
383 struct trace_array_cpu *data);
384void tracing_generic_entry_update(struct trace_entry *entry,
385 unsigned long flags,
386 int pc);
387
198void ftrace(struct trace_array *tr, 388void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data, 389 struct trace_array_cpu *data,
200 unsigned long ip, 390 unsigned long ip,
201 unsigned long parent_ip, 391 unsigned long parent_ip,
202 unsigned long flags); 392 unsigned long flags, int pc);
203void tracing_sched_switch_trace(struct trace_array *tr, 393void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data, 394 struct trace_array_cpu *data,
205 struct task_struct *prev, 395 struct task_struct *prev,
206 struct task_struct *next, 396 struct task_struct *next,
207 unsigned long flags); 397 unsigned long flags, int pc);
208void tracing_record_cmdline(struct task_struct *tsk); 398void tracing_record_cmdline(struct task_struct *tsk);
209 399
210void tracing_sched_wakeup_trace(struct trace_array *tr, 400void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data, 401 struct trace_array_cpu *data,
212 struct task_struct *wakee, 402 struct task_struct *wakee,
213 struct task_struct *cur, 403 struct task_struct *cur,
214 unsigned long flags); 404 unsigned long flags, int pc);
215void trace_special(struct trace_array *tr, 405void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data, 406 struct trace_array_cpu *data,
217 unsigned long arg1, 407 unsigned long arg1,
218 unsigned long arg2, 408 unsigned long arg2,
219 unsigned long arg3); 409 unsigned long arg3, int pc);
220void trace_function(struct trace_array *tr, 410void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data, 411 struct trace_array_cpu *data,
222 unsigned long ip, 412 unsigned long ip,
223 unsigned long parent_ip, 413 unsigned long parent_ip,
224 unsigned long flags); 414 unsigned long flags, int pc);
415
416void trace_graph_return(struct ftrace_graph_ret *trace);
417int trace_graph_entry(struct ftrace_graph_ent *trace);
418void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
225 419
226void tracing_start_cmdline_record(void); 420void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void); 421void tracing_stop_cmdline_record(void);
422void tracing_sched_switch_assign_trace(struct trace_array *tr);
423void tracing_stop_sched_switch_record(void);
424void tracing_start_sched_switch_record(void);
228int register_tracer(struct tracer *type); 425int register_tracer(struct tracer *type);
229void unregister_tracer(struct tracer *type); 426void unregister_tracer(struct tracer *type);
230 427
@@ -239,7 +436,7 @@ void update_max_tr_single(struct trace_array *tr,
239 436
240extern cycle_t ftrace_now(int cpu); 437extern cycle_t ftrace_now(int cpu);
241 438
242#ifdef CONFIG_FTRACE 439#ifdef CONFIG_FUNCTION_TRACER
243void tracing_start_function_trace(void); 440void tracing_start_function_trace(void);
244void tracing_stop_function_trace(void); 441void tracing_stop_function_trace(void);
245#else 442#else
@@ -260,6 +457,7 @@ struct tracer_switch_ops {
260 struct tracer_switch_ops *next; 457 struct tracer_switch_ops *next;
261}; 458};
262 459
460char *trace_find_cmdline(int pid);
263#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 461#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
264 462
265#ifdef CONFIG_DYNAMIC_FTRACE 463#ifdef CONFIG_DYNAMIC_FTRACE
@@ -268,54 +466,96 @@ extern unsigned long ftrace_update_tot_cnt;
268extern int DYN_FTRACE_TEST_NAME(void); 466extern int DYN_FTRACE_TEST_NAME(void);
269#endif 467#endif
270 468
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST 469#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace, 470extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr); 471 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace, 472extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr); 473 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace, 474extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr); 475 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, 476extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr); 477 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace, 478extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr); 479 struct trace_array *tr);
300#endif 480extern int trace_selftest_startup_nop(struct tracer *trace,
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER 481 struct trace_array *tr);
302extern int trace_selftest_startup_sched_switch(struct tracer *trace, 482extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr); 483 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace, 484extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr); 485 struct trace_array *tr);
308#endif 486extern int trace_selftest_startup_branch(struct tracer *trace,
487 struct trace_array *tr);
309#endif /* CONFIG_FTRACE_STARTUP_TEST */ 488#endif /* CONFIG_FTRACE_STARTUP_TEST */
310 489
311extern void *head_page(struct trace_array_cpu *data); 490extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 491extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
492extern void trace_seq_print_cont(struct trace_seq *s,
493 struct trace_iterator *iter);
494
495extern int
496seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
497 unsigned long sym_flags);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 498extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt); 499 size_t cnt);
315extern long ns2usecs(cycle_t nsec); 500extern long ns2usecs(cycle_t nsec);
501extern int
502trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
316 503
317extern unsigned long trace_flags; 504extern unsigned long trace_flags;
318 505
506/* Standard output formatting function used for function return traces */
507#ifdef CONFIG_FUNCTION_GRAPH_TRACER
508extern enum print_line_t print_graph_function(struct trace_iterator *iter);
509
510#ifdef CONFIG_DYNAMIC_FTRACE
511/* TODO: make this variable */
512#define FTRACE_GRAPH_MAX_FUNCS 32
513extern int ftrace_graph_count;
514extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
515
516static inline int ftrace_graph_addr(unsigned long addr)
517{
518 int i;
519
520 if (!ftrace_graph_count || test_tsk_trace_graph(current))
521 return 1;
522
523 for (i = 0; i < ftrace_graph_count; i++) {
524 if (addr == ftrace_graph_funcs[i])
525 return 1;
526 }
527
528 return 0;
529}
530#else
531static inline int ftrace_trace_addr(unsigned long addr)
532{
533 return 1;
534}
535static inline int ftrace_graph_addr(unsigned long addr)
536{
537 return 1;
538}
539#endif /* CONFIG_DYNAMIC_FTRACE */
540
541#else /* CONFIG_FUNCTION_GRAPH_TRACER */
542static inline enum print_line_t
543print_graph_function(struct trace_iterator *iter)
544{
545 return TRACE_TYPE_UNHANDLED;
546}
547#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
548
549extern struct pid *ftrace_pid_trace;
550
551static inline int ftrace_trace_task(struct task_struct *task)
552{
553 if (!ftrace_pid_trace)
554 return 1;
555
556 return test_tsk_trace_trace(task);
557}
558
319/* 559/*
320 * trace_iterator_flags is an enumeration that defines bit 560 * trace_iterator_flags is an enumeration that defines bit
321 * positions into trace_flags that controls the output. 561 * positions into trace_flags that controls the output.
@@ -334,6 +574,94 @@ enum trace_iterator_flags {
334 TRACE_ITER_BLOCK = 0x80, 574 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100, 575 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200, 576 TRACE_ITER_SCHED_TREE = 0x200,
577 TRACE_ITER_PRINTK = 0x400,
578 TRACE_ITER_PREEMPTONLY = 0x800,
579 TRACE_ITER_BRANCH = 0x1000,
580 TRACE_ITER_ANNOTATE = 0x2000,
581 TRACE_ITER_USERSTACKTRACE = 0x4000,
582 TRACE_ITER_SYM_USEROBJ = 0x8000,
583 TRACE_ITER_PRINTK_MSGONLY = 0x10000
337}; 584};
338 585
586/*
587 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
588 * control the output of kernel symbols.
589 */
590#define TRACE_ITER_SYM_MASK \
591 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
592
593extern struct tracer nop_trace;
594
595/**
596 * ftrace_preempt_disable - disable preemption scheduler safe
597 *
598 * When tracing can happen inside the scheduler, there exists
599 * cases that the tracing might happen before the need_resched
600 * flag is checked. If this happens and the tracer calls
601 * preempt_enable (after a disable), a schedule might take place
602 * causing an infinite recursion.
603 *
604 * To prevent this, we read the need_recshed flag before
605 * disabling preemption. When we want to enable preemption we
606 * check the flag, if it is set, then we call preempt_enable_no_resched.
607 * Otherwise, we call preempt_enable.
608 *
609 * The rational for doing the above is that if need resched is set
610 * and we have yet to reschedule, we are either in an atomic location
611 * (where we do not need to check for scheduling) or we are inside
612 * the scheduler and do not want to resched.
613 */
614static inline int ftrace_preempt_disable(void)
615{
616 int resched;
617
618 resched = need_resched();
619 preempt_disable_notrace();
620
621 return resched;
622}
623
624/**
625 * ftrace_preempt_enable - enable preemption scheduler safe
626 * @resched: the return value from ftrace_preempt_disable
627 *
628 * This is a scheduler safe way to enable preemption and not miss
629 * any preemption checks. The disabled saved the state of preemption.
630 * If resched is set, then we were either inside an atomic or
631 * are inside the scheduler (we would have already scheduled
632 * otherwise). In this case, we do not want to call normal
633 * preempt_enable, but preempt_enable_no_resched instead.
634 */
635static inline void ftrace_preempt_enable(int resched)
636{
637 if (resched)
638 preempt_enable_no_resched_notrace();
639 else
640 preempt_enable_notrace();
641}
642
643#ifdef CONFIG_BRANCH_TRACER
644extern int enable_branch_tracing(struct trace_array *tr);
645extern void disable_branch_tracing(void);
646static inline int trace_branch_enable(struct trace_array *tr)
647{
648 if (trace_flags & TRACE_ITER_BRANCH)
649 return enable_branch_tracing(tr);
650 return 0;
651}
652static inline void trace_branch_disable(void)
653{
654 /* due to races, always disable */
655 disable_branch_tracing();
656}
657#else
658static inline int trace_branch_enable(struct trace_array *tr)
659{
660 return 0;
661}
662static inline void trace_branch_disable(void)
663{
664}
665#endif /* CONFIG_BRANCH_TRACER */
666
339#endif /* _LINUX_KERNEL_TRACE_H */ 667#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..3ccebde28482
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,186 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12
13#include "trace.h"
14
15static struct trace_array *boot_trace;
16static bool pre_initcalls_finished;
17
18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
23void start_boot_trace(void)
24{
25 pre_initcalls_finished = true;
26}
27
28void enable_boot_trace(void)
29{
30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
32}
33
34void disable_boot_trace(void)
35{
36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
38}
39
40static int boot_trace_init(struct trace_array *tr)
41{
42 int cpu;
43 boot_trace = tr;
44
45 for_each_cpu_mask(cpu, cpu_possible_map)
46 tracing_reset(tr, cpu);
47
48 tracing_sched_switch_assign_trace(tr);
49 return 0;
50}
51
52static enum print_line_t
53initcall_call_print_line(struct trace_iterator *iter)
54{
55 struct trace_entry *entry = iter->ent;
56 struct trace_seq *s = &iter->seq;
57 struct trace_boot_call *field;
58 struct boot_trace_call *call;
59 u64 ts;
60 unsigned long nsec_rem;
61 int ret;
62
63 trace_assign_type(field, entry);
64 call = &field->boot_call;
65 ts = iter->ts;
66 nsec_rem = do_div(ts, 1000000000);
67
68 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
69 (unsigned long)ts, nsec_rem, call->func, call->caller);
70
71 if (!ret)
72 return TRACE_TYPE_PARTIAL_LINE;
73 else
74 return TRACE_TYPE_HANDLED;
75}
76
77static enum print_line_t
78initcall_ret_print_line(struct trace_iterator *iter)
79{
80 struct trace_entry *entry = iter->ent;
81 struct trace_seq *s = &iter->seq;
82 struct trace_boot_ret *field;
83 struct boot_trace_ret *init_ret;
84 u64 ts;
85 unsigned long nsec_rem;
86 int ret;
87
88 trace_assign_type(field, entry);
89 init_ret = &field->boot_ret;
90 ts = iter->ts;
91 nsec_rem = do_div(ts, 1000000000);
92
93 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
94 "returned %d after %llu msecs\n",
95 (unsigned long) ts,
96 nsec_rem,
97 init_ret->func, init_ret->result, init_ret->duration);
98
99 if (!ret)
100 return TRACE_TYPE_PARTIAL_LINE;
101 else
102 return TRACE_TYPE_HANDLED;
103}
104
105static enum print_line_t initcall_print_line(struct trace_iterator *iter)
106{
107 struct trace_entry *entry = iter->ent;
108
109 switch (entry->type) {
110 case TRACE_BOOT_CALL:
111 return initcall_call_print_line(iter);
112 case TRACE_BOOT_RET:
113 return initcall_ret_print_line(iter);
114 default:
115 return TRACE_TYPE_UNHANDLED;
116 }
117}
118
119struct tracer boot_tracer __read_mostly =
120{
121 .name = "initcall",
122 .init = boot_trace_init,
123 .reset = tracing_reset_online_cpus,
124 .print_line = initcall_print_line,
125};
126
127void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
128{
129 struct ring_buffer_event *event;
130 struct trace_boot_call *entry;
131 unsigned long irq_flags;
132 struct trace_array *tr = boot_trace;
133
134 if (!pre_initcalls_finished)
135 return;
136
137 /* Get its name now since this function could
138 * disappear because it is in the .init section.
139 */
140 sprint_symbol(bt->func, (unsigned long)fn);
141 preempt_disable();
142
143 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
144 &irq_flags);
145 if (!event)
146 goto out;
147 entry = ring_buffer_event_data(event);
148 tracing_generic_entry_update(&entry->ent, 0, 0);
149 entry->ent.type = TRACE_BOOT_CALL;
150 entry->boot_call = *bt;
151 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
152
153 trace_wake_up();
154
155 out:
156 preempt_enable();
157}
158
159void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
160{
161 struct ring_buffer_event *event;
162 struct trace_boot_ret *entry;
163 unsigned long irq_flags;
164 struct trace_array *tr = boot_trace;
165
166 if (!pre_initcalls_finished)
167 return;
168
169 sprint_symbol(bt->func, (unsigned long)fn);
170 preempt_disable();
171
172 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
173 &irq_flags);
174 if (!event)
175 goto out;
176 entry = ring_buffer_event_data(event);
177 tracing_generic_entry_update(&entry->ent, 0, 0);
178 entry->ent.type = TRACE_BOOT_RET;
179 entry->boot_ret = *bt;
180 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
181
182 trace_wake_up();
183
184 out:
185 preempt_enable();
186}
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..6c00feb3bac7
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,342 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/irqflags.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h>
12#include <linux/module.h>
13#include <linux/ftrace.h>
14#include <linux/hash.h>
15#include <linux/fs.h>
16#include <asm/local.h>
17#include "trace.h"
18
19#ifdef CONFIG_BRANCH_TRACER
20
21static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex);
23static struct trace_array *branch_tracer;
24
25static void
26probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
27{
28 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event;
30 struct trace_branch *entry;
31 unsigned long flags, irq_flags;
32 int cpu, pc;
33 const char *p;
34
35 /*
36 * I would love to save just the ftrace_likely_data pointer, but
37 * this code can also be used by modules. Ugly things can happen
38 * if the module is unloaded, and then we go and read the
39 * pointer. This is slower, but much safer.
40 */
41
42 if (unlikely(!tr))
43 return;
44
45 local_irq_save(flags);
46 cpu = raw_smp_processor_id();
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
51 &irq_flags);
52 if (!event)
53 goto out;
54
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59
60 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file);
62 while (p >= f->file && *p != '/')
63 p--;
64 p++;
65
66 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
67 strncpy(entry->file, p, TRACE_FILE_SIZE);
68 entry->func[TRACE_FUNC_SIZE] = 0;
69 entry->file[TRACE_FILE_SIZE] = 0;
70 entry->line = f->line;
71 entry->correct = val == expect;
72
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
74
75 out:
76 atomic_dec(&tr->data[cpu]->disabled);
77 local_irq_restore(flags);
78}
79
80static inline
81void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
82{
83 if (!branch_tracing_enabled)
84 return;
85
86 probe_likely_condition(f, val, expect);
87}
88
89int enable_branch_tracing(struct trace_array *tr)
90{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr;
95 /*
96 * Must be seen before enabling. The reader is a condition
97 * where we do not need a matching rmb()
98 */
99 smp_wmb();
100 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex);
102
103 return ret;
104}
105
106void disable_branch_tracing(void)
107{
108 mutex_lock(&branch_tracing_mutex);
109
110 if (!branch_tracing_enabled)
111 goto out_unlock;
112
113 branch_tracing_enabled--;
114
115 out_unlock:
116 mutex_unlock(&branch_tracing_mutex);
117}
118
119static void start_branch_trace(struct trace_array *tr)
120{
121 enable_branch_tracing(tr);
122}
123
124static void stop_branch_trace(struct trace_array *tr)
125{
126 disable_branch_tracing();
127}
128
129static int branch_trace_init(struct trace_array *tr)
130{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr);
137 return 0;
138}
139
140static void branch_trace_reset(struct trace_array *tr)
141{
142 stop_branch_trace(tr);
143}
144
145struct tracer branch_trace __read_mostly =
146{
147 .name = "branch",
148 .init = branch_trace_init,
149 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch,
152#endif
153};
154
155__init static int init_branch_trace(void)
156{
157 return register_tracer(&branch_trace);
158}
159
160device_initcall(init_branch_trace);
161#else
162static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
164{
165}
166#endif /* CONFIG_BRANCH_TRACER */
167
168void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
169{
170 /*
171 * I would love to have a trace point here instead, but the
172 * trace point code is so inundated with unlikely and likely
173 * conditions that the recursive nightmare that exists is too
174 * much to try to get working. At least for now.
175 */
176 trace_likely_condition(f, val, expect);
177
178 /* FIXME: Make this atomic! */
179 if (val == expect)
180 f->correct++;
181 else
182 f->incorrect++;
183}
184EXPORT_SYMBOL(ftrace_likely_update);
185
186struct ftrace_pointer {
187 void *start;
188 void *stop;
189 int hit;
190};
191
192static void *
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{
195 const struct ftrace_pointer *f = m->private;
196 struct ftrace_branch_data *p = v;
197
198 (*pos)++;
199
200 if (v == (void *)1)
201 return f->start;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209}
210
211static void *t_start(struct seq_file *m, loff_t *pos)
212{
213 void *t = (void *)1;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218
219 return t;
220}
221
222static void t_stop(struct seq_file *m, void *p)
223{
224}
225
226static int t_show(struct seq_file *m, void *v)
227{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v;
230 const char *f;
231 long percent;
232
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/')
249 f--;
250 f++;
251
252 /*
253 * The miss is overlayed on correct, and hit on incorrect.
254 */
255 if (p->correct) {
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0)
263 seq_printf(m, " X ");
264 else
265 seq_printf(m, "%3ld ", percent);
266 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
267 return 0;
268}
269
270static struct seq_operations tracing_likely_seq_ops = {
271 .start = t_start,
272 .next = t_next,
273 .stop = t_stop,
274 .show = t_show,
275};
276
277static int tracing_branch_open(struct inode *inode, struct file *file)
278{
279 int ret;
280
281 ret = seq_open(file, &tracing_likely_seq_ops);
282 if (!ret) {
283 struct seq_file *m = file->private_data;
284 m->private = (void *)inode->i_private;
285 }
286
287 return ret;
288}
289
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295
296#ifdef CONFIG_PROFILE_ALL_BRANCHES
297extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[];
299
300static const struct ftrace_pointer ftrace_branch_pos = {
301 .start = __start_branch_profile,
302 .stop = __stop_branch_profile,
303 .hit = 1,
304};
305
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */
307
308extern unsigned long __start_annotated_branch_profile[];
309extern unsigned long __stop_annotated_branch_profile[];
310
311static const struct ftrace_pointer ftrace_annotated_branch_pos = {
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315
316static __init int ftrace_branch_init(void)
317{
318 struct dentry *d_tracer;
319 struct dentry *entry;
320
321 d_tracer = tracing_init_dentry();
322
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
324 (void *)&ftrace_annotated_branch_pos,
325 &tracing_branch_fops);
326 if (!entry)
327 pr_warning("Could not create debugfs "
328 "'profile_annotatet_branch' entry\n");
329
330#ifdef CONFIG_PROFILE_ALL_BRANCHES
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer,
332 (void *)&ftrace_branch_pos,
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338
339 return 0;
340}
341
342device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..9236d7e25a16 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,20 +16,10 @@
16 16
17#include "trace.h" 17#include "trace.h"
18 18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr) 19static void start_function_trace(struct trace_array *tr)
30{ 20{
31 tr->cpu = get_cpu(); 21 tr->cpu = get_cpu();
32 function_reset(tr); 22 tracing_reset_online_cpus(tr);
33 put_cpu(); 23 put_cpu();
34 24
35 tracing_start_cmdline_record(); 25 tracing_start_cmdline_record();
@@ -42,32 +32,28 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 32 tracing_stop_cmdline_record();
43} 33}
44 34
45static void function_trace_init(struct trace_array *tr) 35static int function_trace_init(struct trace_array *tr)
46{ 36{
47 if (tr->ctrl) 37 start_function_trace(tr);
48 start_function_trace(tr); 38 return 0;
49} 39}
50 40
51static void function_trace_reset(struct trace_array *tr) 41static void function_trace_reset(struct trace_array *tr)
52{ 42{
53 if (tr->ctrl) 43 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 44}
56 45
57static void function_trace_ctrl_update(struct trace_array *tr) 46static void function_trace_start(struct trace_array *tr)
58{ 47{
59 if (tr->ctrl) 48 tracing_reset_online_cpus(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 49}
64 50
65static struct tracer function_trace __read_mostly = 51static struct tracer function_trace __read_mostly =
66{ 52{
67 .name = "ftrace", 53 .name = "function",
68 .init = function_trace_init, 54 .init = function_trace_init,
69 .reset = function_trace_reset, 55 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 56 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 57#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 58 .selftest = trace_selftest_startup_function,
73#endif 59#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 000000000000..4bf39fcae97a
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,669 @@
1/*
2 *
3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16#define TRACE_GRAPH_INDENT 2
17
18/* Flag options */
19#define TRACE_GRAPH_PRINT_OVERRUN 0x1
20#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8
23
24static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
29 /* Display Overhead ? */
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
33 { } /* Empty entry */
34};
35
36static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
39 .opts = trace_opts
40};
41
42/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44
45static int graph_trace_init(struct trace_array *tr)
46{
47 int cpu, ret;
48
49 for_each_online_cpu(cpu)
50 tracing_reset(tr, cpu);
51
52 ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry);
54 if (ret)
55 return ret;
56 tracing_start_cmdline_record();
57
58 return 0;
59}
60
61static void graph_trace_reset(struct trace_array *tr)
62{
63 tracing_stop_cmdline_record();
64 unregister_ftrace_graph();
65}
66
67static inline int log10_cpu(int nb)
68{
69 if (nb / 100)
70 return 3;
71 if (nb / 10)
72 return 2;
73 return 1;
74}
75
76static enum print_line_t
77print_graph_cpu(struct trace_seq *s, int cpu)
78{
79 int i;
80 int ret;
81 int log10_this = log10_cpu(cpu);
82 int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
83
84
85 /*
86 * Start with a space character - to make it stand out
87 * to the right a bit when trace output is pasted into
88 * email:
89 */
90 ret = trace_seq_printf(s, " ");
91
92 /*
93 * Tricky - we space the CPU field according to the max
94 * number of online CPUs. On a 2-cpu system it would take
95 * a maximum of 1 digit - on a 128 cpu system it would
96 * take up to 3 digits:
97 */
98 for (i = 0; i < log10_all - log10_this; i++) {
99 ret = trace_seq_printf(s, " ");
100 if (!ret)
101 return TRACE_TYPE_PARTIAL_LINE;
102 }
103 ret = trace_seq_printf(s, "%d) ", cpu);
104 if (!ret)
105 return TRACE_TYPE_PARTIAL_LINE;
106
107 return TRACE_TYPE_HANDLED;
108}
109
110#define TRACE_GRAPH_PROCINFO_LENGTH 14
111
112static enum print_line_t
113print_graph_proc(struct trace_seq *s, pid_t pid)
114{
115 int i;
116 int ret;
117 int len;
118 char comm[8];
119 int spaces = 0;
120 /* sign + log10(MAX_INT) + '\0' */
121 char pid_str[11];
122
123 strncpy(comm, trace_find_cmdline(pid), 7);
124 comm[7] = '\0';
125 sprintf(pid_str, "%d", pid);
126
127 /* 1 stands for the "-" character */
128 len = strlen(comm) + strlen(pid_str) + 1;
129
130 if (len < TRACE_GRAPH_PROCINFO_LENGTH)
131 spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;
132
133 /* First spaces to align center */
134 for (i = 0; i < spaces / 2; i++) {
135 ret = trace_seq_printf(s, " ");
136 if (!ret)
137 return TRACE_TYPE_PARTIAL_LINE;
138 }
139
140 ret = trace_seq_printf(s, "%s-%s", comm, pid_str);
141 if (!ret)
142 return TRACE_TYPE_PARTIAL_LINE;
143
144 /* Last spaces to align center */
145 for (i = 0; i < spaces - (spaces / 2); i++) {
146 ret = trace_seq_printf(s, " ");
147 if (!ret)
148 return TRACE_TYPE_PARTIAL_LINE;
149 }
150 return TRACE_TYPE_HANDLED;
151}
152
153
154/* If the pid changed since the last trace, output this event */
155static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu)
157{
158 pid_t prev_pid;
159 int ret;
160
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
162 return TRACE_TYPE_HANDLED;
163
164 prev_pid = last_pid[cpu];
165 last_pid[cpu] = pid;
166
167/*
168 * Context-switch trace line:
169
170 ------------------------------------------
171 | 1) migration/0--1 => sshd-1755
172 ------------------------------------------
173
174 */
175 ret = trace_seq_printf(s,
176 " ------------------------------------------\n");
177 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE;
179
180 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE;
183
184 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE;
187
188 ret = trace_seq_printf(s, " => ");
189 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE;
191
192 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE;
195
196 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n");
198 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE;
200
201 return ret;
202}
203
204static bool
205trace_branch_is_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr)
207{
208 struct ring_buffer_iter *ring_iter;
209 struct ring_buffer_event *event;
210 struct ftrace_graph_ret_entry *next;
211
212 ring_iter = iter->buffer_iter[iter->cpu];
213
214 if (!ring_iter)
215 return false;
216
217 event = ring_buffer_iter_peek(ring_iter, NULL);
218
219 if (!event)
220 return false;
221
222 next = ring_buffer_event_data(event);
223
224 if (next->ent.type != TRACE_GRAPH_RET)
225 return false;
226
227 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func)
229 return false;
230
231 return true;
232}
233
234static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid)
237{
238 int ret;
239
240 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED;
243
244 if (type == TRACE_GRAPH_ENT) {
245 ret = trace_seq_printf(s, "==========> | ");
246 } else {
247 /* Cpu */
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
249 ret = print_graph_cpu(s, cpu);
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258
259 ret = trace_seq_printf(s, " | ");
260 if (!ret)
261 return TRACE_TYPE_PARTIAL_LINE;
262 }
263
264 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
266 ret = trace_seq_printf(s, " ");
267 if (!ret)
268 return TRACE_TYPE_PARTIAL_LINE;
269 }
270
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED;
276}
277
278static enum print_line_t
279print_graph_duration(unsigned long long duration, struct trace_seq *s)
280{
281 unsigned long nsecs_rem = do_div(duration, 1000);
282 /* log10(ULONG_MAX) + '\0' */
283 char msecs_str[21];
284 char nsecs_str[5];
285 int ret, len;
286 int i;
287
288 sprintf(msecs_str, "%lu", (unsigned long) duration);
289
290 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str);
292 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE;
294
295 len = strlen(msecs_str);
296
297 /* Print nsecs (we don't want to exceed 7 numbers) */
298 if (len < 7) {
299 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
300 ret = trace_seq_printf(s, ".%s", nsecs_str);
301 if (!ret)
302 return TRACE_TYPE_PARTIAL_LINE;
303 len += strlen(nsecs_str);
304 }
305
306 ret = trace_seq_printf(s, " us ");
307 if (!ret)
308 return TRACE_TYPE_PARTIAL_LINE;
309
310 /* Print remaining spaces to fit the row's width */
311 for (i = len; i < 7; i++) {
312 ret = trace_seq_printf(s, " ");
313 if (!ret)
314 return TRACE_TYPE_PARTIAL_LINE;
315 }
316
317 ret = trace_seq_printf(s, "| ");
318 if (!ret)
319 return TRACE_TYPE_PARTIAL_LINE;
320 return TRACE_TYPE_HANDLED;
321
322}
323
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */
340static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
343{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call;
348 unsigned long long duration;
349 int ret;
350 int i;
351
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime;
357
358 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
360 ret = print_graph_overhead(duration, s);
361 if (!ret)
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364
365 /* Duration */
366 ret = print_graph_duration(duration, s);
367 if (ret == TRACE_TYPE_PARTIAL_LINE)
368 return TRACE_TYPE_PARTIAL_LINE;
369
370 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
372 ret = trace_seq_printf(s, " ");
373 if (!ret)
374 return TRACE_TYPE_PARTIAL_LINE;
375 }
376
377 ret = seq_print_ip_sym(s, call->func, 0);
378 if (!ret)
379 return TRACE_TYPE_PARTIAL_LINE;
380
381 ret = trace_seq_printf(s, "();\n");
382 if (!ret)
383 return TRACE_TYPE_PARTIAL_LINE;
384
385 return TRACE_TYPE_HANDLED;
386}
387
388static enum print_line_t
389print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
390 struct trace_seq *s, pid_t pid, int cpu)
391{
392 int i;
393 int ret;
394 struct ftrace_graph_ent *call = &entry->graph_ent;
395
396 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
398 ret = trace_seq_printf(s, " ");
399 if (!ret)
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402
403 /* Interrupt */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | ");
408 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 }
414
415
416 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " ");
419 if (!ret)
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 ret = seq_print_ip_sym(s, call->func, 0);
424 if (!ret)
425 return TRACE_TYPE_PARTIAL_LINE;
426
427 ret = trace_seq_printf(s, "() {\n");
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu)
437{
438 int ret;
439 struct trace_entry *ent = iter->ent;
440
441 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE;
444
445 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu);
448 if (ret == TRACE_TYPE_PARTIAL_LINE)
449 return TRACE_TYPE_PARTIAL_LINE;
450 }
451
452 /* Proc */
453 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
454 ret = print_graph_proc(s, ent->pid);
455 if (ret == TRACE_TYPE_PARTIAL_LINE)
456 return TRACE_TYPE_PARTIAL_LINE;
457
458 ret = trace_seq_printf(s, " | ");
459 if (!ret)
460 return TRACE_TYPE_PARTIAL_LINE;
461 }
462
463 if (trace_branch_is_leaf(iter, field))
464 return print_graph_entry_leaf(iter, field, s);
465 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467
468}
469
470static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu)
473{
474 int i;
475 int ret;
476 unsigned long long duration = trace->rettime - trace->calltime;
477
478 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE;
481
482 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu);
485 if (ret == TRACE_TYPE_PARTIAL_LINE)
486 return TRACE_TYPE_PARTIAL_LINE;
487 }
488
489 /* Proc */
490 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
491 ret = print_graph_proc(s, ent->pid);
492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494
495 ret = trace_seq_printf(s, " | ");
496 if (!ret)
497 return TRACE_TYPE_PARTIAL_LINE;
498 }
499
500 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
502 ret = print_graph_overhead(duration, s);
503 if (!ret)
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506
507 /* Duration */
508 ret = print_graph_duration(duration, s);
509 if (ret == TRACE_TYPE_PARTIAL_LINE)
510 return TRACE_TYPE_PARTIAL_LINE;
511
512 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
514 ret = trace_seq_printf(s, " ");
515 if (!ret)
516 return TRACE_TYPE_PARTIAL_LINE;
517 }
518
519 ret = trace_seq_printf(s, "}\n");
520 if (!ret)
521 return TRACE_TYPE_PARTIAL_LINE;
522
523 /* Overrun */
524 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
525 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
526 trace->overrun);
527 if (!ret)
528 return TRACE_TYPE_PARTIAL_LINE;
529 }
530
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE;
534
535 return TRACE_TYPE_HANDLED;
536}
537
538static enum print_line_t
539print_graph_comment(struct print_entry *trace, struct trace_seq *s,
540 struct trace_entry *ent, struct trace_iterator *iter)
541{
542 int i;
543 int ret;
544
545 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE;
548
549 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE;
554 }
555
556 /* Proc */
557 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
558 ret = print_graph_proc(s, ent->pid);
559 if (ret == TRACE_TYPE_PARTIAL_LINE)
560 return TRACE_TYPE_PARTIAL_LINE;
561
562 ret = trace_seq_printf(s, " | ");
563 if (!ret)
564 return TRACE_TYPE_PARTIAL_LINE;
565 }
566
567 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
569 ret = trace_seq_printf(s, " ");
570 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE;
572 }
573
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */
580 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
582 ret = trace_seq_printf(s, " ");
583 if (!ret)
584 return TRACE_TYPE_PARTIAL_LINE;
585 }
586
587 /* The comment */
588 ret = trace_seq_printf(s, "/* %s", trace->buf);
589 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE;
591
592 if (ent->flags & TRACE_FLAG_CONT)
593 trace_seq_print_cont(s, iter);
594
595 ret = trace_seq_printf(s, " */\n");
596 if (!ret)
597 return TRACE_TYPE_PARTIAL_LINE;
598
599 return TRACE_TYPE_HANDLED;
600}
601
602
603enum print_line_t
604print_graph_function(struct trace_iterator *iter)
605{
606 struct trace_seq *s = &iter->seq;
607 struct trace_entry *entry = iter->ent;
608
609 switch (entry->type) {
610 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter,
614 iter->cpu);
615 }
616 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu);
620 }
621 case TRACE_PRINT: {
622 struct print_entry *field;
623 trace_assign_type(field, entry);
624 return print_graph_comment(field, s, entry, iter);
625 }
626 default:
627 return TRACE_TYPE_UNHANDLED;
628 }
629}
630
631static void print_graph_headers(struct seq_file *s)
632{
633 /* 1st line */
634 seq_printf(s, "# ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU ");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
640 seq_printf(s, "OVERHEAD/");
641 seq_printf(s, "DURATION FUNCTION CALLS\n");
642
643 /* 2nd line */
644 seq_printf(s, "# ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
650 seq_printf(s, "| ");
651 seq_printf(s, "| | | | |\n");
652 } else
653 seq_printf(s, " | | | | |\n");
654}
655static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph",
657 .init = graph_trace_init,
658 .reset = graph_trace_reset,
659 .print_line = print_graph_function,
660 .print_header = print_graph_headers,
661 .flags = &tracer_flags,
662};
663
664static __init int init_graph_trace(void)
665{
666 return register_tracer(&graph_trace);
667}
668
669device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
new file mode 100644
index 000000000000..b6a3e20a49a9
--- /dev/null
+++ b/kernel/trace/trace_hw_branches.c
@@ -0,0 +1,195 @@
1/*
2 * h/w branch tracer for x86 based on bts
3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/kallsyms.h>
13
14#include <asm/ds.h>
15
16#include "trace.h"
17
18
19#define SIZEOF_BTS (1 << 13)
20
21static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23
24#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id())
26
27
28static void bts_trace_start_cpu(void *arg)
29{
30 if (this_tracer)
31 ds_release_bts(this_tracer);
32
33 this_tracer =
34 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
35 /* ovfl = */ NULL, /* th = */ (size_t)-1,
36 BTS_KERNEL);
37 if (IS_ERR(this_tracer)) {
38 this_tracer = NULL;
39 return;
40 }
41}
42
43static void bts_trace_start(struct trace_array *tr)
44{
45 int cpu;
46
47 tracing_reset_online_cpus(tr);
48
49 for_each_cpu_mask(cpu, cpu_possible_map)
50 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
51}
52
53static void bts_trace_stop_cpu(void *arg)
54{
55 if (this_tracer) {
56 ds_release_bts(this_tracer);
57 this_tracer = NULL;
58 }
59}
60
61static void bts_trace_stop(struct trace_array *tr)
62{
63 int cpu;
64
65 for_each_cpu_mask(cpu, cpu_possible_map)
66 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
67}
68
69static int bts_trace_init(struct trace_array *tr)
70{
71 tracing_reset_online_cpus(tr);
72 bts_trace_start(tr);
73
74 return 0;
75}
76
77static void bts_trace_print_header(struct seq_file *m)
78{
79 seq_puts(m,
80 "# CPU# FROM TO FUNCTION\n");
81 seq_puts(m,
82 "# | | | |\n");
83}
84
85static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
86{
87 struct trace_entry *entry = iter->ent;
88 struct trace_seq *seq = &iter->seq;
89 struct hw_branch_entry *it;
90
91 trace_assign_type(it, entry);
92
93 if (entry->type == TRACE_HW_BRANCHES) {
94 if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
95 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
96 it->from, it->to) &&
97 (!it->from ||
98 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
99 trace_seq_printf(seq, "\n"))
100 return TRACE_TYPE_HANDLED;
101 return TRACE_TYPE_PARTIAL_LINE;;
102 }
103 return TRACE_TYPE_UNHANDLED;
104}
105
106void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
107{
108 struct ring_buffer_event *event;
109 struct hw_branch_entry *entry;
110 unsigned long irq;
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
113 if (!event)
114 return;
115 entry = ring_buffer_event_data(event);
116 tracing_generic_entry_update(&entry->ent, 0, from);
117 entry->ent.type = TRACE_HW_BRANCHES;
118 entry->ent.cpu = smp_processor_id();
119 entry->from = from;
120 entry->to = to;
121 ring_buffer_unlock_commit(tr->buffer, event, irq);
122}
123
124static void trace_bts_at(struct trace_array *tr,
125 const struct bts_trace *trace, void *at)
126{
127 struct bts_struct bts;
128 int err = 0;
129
130 WARN_ON_ONCE(!trace->read);
131 if (!trace->read)
132 return;
133
134 err = trace->read(this_tracer, at, &bts);
135 if (err < 0)
136 return;
137
138 switch (bts.qualifier) {
139 case BTS_BRANCH:
140 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
141 break;
142 }
143}
144
145static void trace_bts_cpu(void *arg)
146{
147 struct trace_array *tr = (struct trace_array *) arg;
148 const struct bts_trace *trace;
149 unsigned char *at;
150
151 if (!this_tracer)
152 return;
153
154 ds_suspend_bts(this_tracer);
155 trace = ds_read_bts(this_tracer);
156 if (!trace)
157 goto out;
158
159 for (at = trace->ds.top; (void *)at < trace->ds.end;
160 at += trace->ds.size)
161 trace_bts_at(tr, trace, at);
162
163 for (at = trace->ds.begin; (void *)at < trace->ds.top;
164 at += trace->ds.size)
165 trace_bts_at(tr, trace, at);
166
167out:
168 ds_resume_bts(this_tracer);
169}
170
171static void trace_bts_prepare(struct trace_iterator *iter)
172{
173 int cpu;
174
175 for_each_cpu_mask(cpu, cpu_possible_map)
176 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
177}
178
179struct tracer bts_tracer __read_mostly =
180{
181 .name = "hw-branch-tracer",
182 .init = bts_trace_init,
183 .reset = bts_trace_stop,
184 .print_header = bts_trace_print_header,
185 .print_line = bts_trace_print_line,
186 .start = bts_trace_start,
187 .stop = bts_trace_stop,
188 .open = trace_bts_prepare
189};
190
191__init static int init_bts_trace(void)
192{
193 return register_tracer(&bts_tracer);
194}
195device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags); 98 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
130 unsigned long latency, t0, t1; 130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta; 131 cycle_t T0, T1, delta;
132 unsigned long flags; 132 unsigned long flags;
133 int pc;
133 134
134 /* 135 /*
135 * usecs conversion is slow so we try to delay the conversion 136 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
141 142
142 local_save_flags(flags); 143 local_save_flags(flags);
143 144
145 pc = preempt_count();
146
144 if (!report_latency(delta)) 147 if (!report_latency(delta))
145 goto out; 148 goto out;
146 149
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 153 if (!report_latency(delta))
151 goto out_unlock; 154 goto out_unlock;
152 155
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
154 157
155 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
156 159
@@ -173,8 +176,8 @@ out_unlock:
173out: 176out:
174 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data); 179 tracing_reset(tr, cpu);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
178} 181}
179 182
180static inline void 183static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
203 data->critical_sequence = max_sequence; 206 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu); 207 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip; 208 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data); 209 tracing_reset(tr, cpu);
207 210
208 local_save_flags(flags); 211 local_save_flags(flags);
209 212
210 trace_function(tr, data, ip, parent_ip, flags); 213 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
211 214
212 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
213 216
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
234 237
235 data = tr->data[cpu]; 238 data = tr->data[cpu];
236 239
237 if (unlikely(!data) || unlikely(!head_page(data)) || 240 if (unlikely(!data) ||
238 !data->critical_start || atomic_read(&data->disabled)) 241 !data->critical_start || atomic_read(&data->disabled))
239 return; 242 return;
240 243
241 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
242 245
243 local_save_flags(flags); 246 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags); 247 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0; 249 data->critical_start = 0;
247 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
@@ -350,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
350} 353}
351#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
352 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
353static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
354{ 363{
355 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
356 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
357} 372}
358 373
359static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
360{ 375{
361 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
362 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
363} 379}
364 380
@@ -367,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
367 irqsoff_trace = tr; 383 irqsoff_trace = tr;
368 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
369 smp_wmb(); 385 smp_wmb();
370 386 start_irqsoff_tracer(tr);
371 if (tr->ctrl)
372 start_irqsoff_tracer(tr);
373} 387}
374 388
375static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
376{ 390{
377 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
378 stop_irqsoff_tracer(tr);
379} 392}
380 393
381static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
382{ 395{
383 if (tr->ctrl) 396 tracer_enabled = 1;
384 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
385 else 398}
386 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
387} 404}
388 405
389static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
390{ 407{
391 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
392 if (iter->tr->ctrl) 409 tracer_enabled = 0;
393 stop_irqsoff_tracer(iter->tr);
394} 410}
395 411
396static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
397{ 413{
398 if (iter->tr->ctrl) 414 /* restart tracing */
399 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
400} 416}
401 417
402#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
403static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
404{ 420{
405 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
406 422
407 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
408} 425}
409static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
410{ 427{
411 .name = "irqsoff", 428 .name = "irqsoff",
412 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
413 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
414 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
415 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
416 .ctrl_update = irqsoff_tracer_ctrl_update,
417 .print_max = 1, 435 .print_max = 1,
418#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
419 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -425,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
425#endif 443#endif
426 444
427#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
428static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
429{ 447{
430 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
431 449
432 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
433} 452}
434 453
435static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -437,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
437 .name = "preemptoff", 456 .name = "preemptoff",
438 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
439 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
440 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
441 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
442 .ctrl_update = irqsoff_tracer_ctrl_update,
443 .print_max = 1, 463 .print_max = 1,
444#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
445 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -453,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
453#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
454 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
455 475
456static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
457{ 477{
458 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
459 479
460 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
461} 482}
462 483
463static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -465,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
465 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
466 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
467 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
468 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
469 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
470 .ctrl_update = irqsoff_tracer_ctrl_update,
471 .print_max = 1, 493 .print_max = 1,
472#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
473 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..fffcb069f1dc 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -18,46 +18,39 @@ struct header_iter {
18 18
19static struct trace_array *mmio_trace_array; 19static struct trace_array *mmio_trace_array;
20static bool overrun_detected; 20static bool overrun_detected;
21static unsigned long prev_overruns;
21 22
22static void mmio_reset_data(struct trace_array *tr) 23static void mmio_reset_data(struct trace_array *tr)
23{ 24{
24 int cpu;
25
26 overrun_detected = false; 25 overrun_detected = false;
27 tr->time_start = ftrace_now(tr->cpu); 26 prev_overruns = 0;
28 27
29 for_each_online_cpu(cpu) 28 tracing_reset_online_cpus(tr);
30 tracing_reset(tr->data[cpu]);
31} 29}
32 30
33static void mmio_trace_init(struct trace_array *tr) 31static int mmio_trace_init(struct trace_array *tr)
34{ 32{
35 pr_debug("in %s\n", __func__); 33 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr; 34 mmio_trace_array = tr;
37 if (tr->ctrl) { 35
38 mmio_reset_data(tr); 36 mmio_reset_data(tr);
39 enable_mmiotrace(); 37 enable_mmiotrace();
40 } 38 return 0;
41} 39}
42 40
43static void mmio_trace_reset(struct trace_array *tr) 41static void mmio_trace_reset(struct trace_array *tr)
44{ 42{
45 pr_debug("in %s\n", __func__); 43 pr_debug("in %s\n", __func__);
46 if (tr->ctrl) 44
47 disable_mmiotrace(); 45 disable_mmiotrace();
48 mmio_reset_data(tr); 46 mmio_reset_data(tr);
49 mmio_trace_array = NULL; 47 mmio_trace_array = NULL;
50} 48}
51 49
52static void mmio_trace_ctrl_update(struct trace_array *tr) 50static void mmio_trace_start(struct trace_array *tr)
53{ 51{
54 pr_debug("in %s\n", __func__); 52 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) { 53 mmio_reset_data(tr);
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61} 54}
62 55
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 56static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -128,12 +121,12 @@ static void mmio_close(struct trace_iterator *iter)
128 121
129static unsigned long count_overruns(struct trace_iterator *iter) 122static unsigned long count_overruns(struct trace_iterator *iter)
130{ 123{
131 int cpu;
132 unsigned long cnt = 0; 124 unsigned long cnt = 0;
133 for_each_online_cpu(cpu) { 125 unsigned long over = ring_buffer_overruns(iter->tr->buffer);
134 cnt += iter->overrun[cpu]; 126
135 iter->overrun[cpu] = 0; 127 if (over > prev_overruns)
136 } 128 cnt = over - prev_overruns;
129 prev_overruns = over;
137 return cnt; 130 return cnt;
138} 131}
139 132
@@ -171,17 +164,21 @@ print_out:
171 return (ret == -EBUSY) ? 0 : ret; 164 return (ret == -EBUSY) ? 0 : ret;
172} 165}
173 166
174static int mmio_print_rw(struct trace_iterator *iter) 167static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
175{ 168{
176 struct trace_entry *entry = iter->ent; 169 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw; 170 struct trace_mmiotrace_rw *field;
171 struct mmiotrace_rw *rw;
178 struct trace_seq *s = &iter->seq; 172 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t); 173 unsigned long long t = ns2usecs(iter->ts);
180 unsigned long usec_rem = do_div(t, 1000000ULL); 174 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t; 175 unsigned secs = (unsigned long)t;
182 int ret = 1; 176 int ret = 1;
183 177
184 switch (entry->mmiorw.opcode) { 178 trace_assign_type(field, entry);
179 rw = &field->rw;
180
181 switch (rw->opcode) {
185 case MMIO_READ: 182 case MMIO_READ:
186 ret = trace_seq_printf(s, 183 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 184 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +206,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
209 break; 206 break;
210 } 207 }
211 if (ret) 208 if (ret)
212 return 1; 209 return TRACE_TYPE_HANDLED;
213 return 0; 210 return TRACE_TYPE_PARTIAL_LINE;
214} 211}
215 212
216static int mmio_print_map(struct trace_iterator *iter) 213static enum print_line_t mmio_print_map(struct trace_iterator *iter)
217{ 214{
218 struct trace_entry *entry = iter->ent; 215 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap; 216 struct trace_mmiotrace_map *field;
217 struct mmiotrace_map *m;
220 struct trace_seq *s = &iter->seq; 218 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t); 219 unsigned long long t = ns2usecs(iter->ts);
222 unsigned long usec_rem = do_div(t, 1000000ULL); 220 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t; 221 unsigned secs = (unsigned long)t;
224 int ret = 1; 222 int ret;
225 223
226 switch (entry->mmiorw.opcode) { 224 trace_assign_type(field, entry);
225 m = &field->map;
226
227 switch (m->opcode) {
227 case MMIO_PROBE: 228 case MMIO_PROBE:
228 ret = trace_seq_printf(s, 229 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 230 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +242,43 @@ static int mmio_print_map(struct trace_iterator *iter)
241 break; 242 break;
242 } 243 }
243 if (ret) 244 if (ret)
244 return 1; 245 return TRACE_TYPE_HANDLED;
245 return 0; 246 return TRACE_TYPE_PARTIAL_LINE;
247}
248
249static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
250{
251 struct trace_entry *entry = iter->ent;
252 struct print_entry *print = (struct print_entry *)entry;
253 const char *msg = print->buf;
254 struct trace_seq *s = &iter->seq;
255 unsigned long long t = ns2usecs(iter->ts);
256 unsigned long usec_rem = do_div(t, 1000000ULL);
257 unsigned secs = (unsigned long)t;
258 int ret;
259
260 /* The trailing newline must be in the message. */
261 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
262 if (!ret)
263 return TRACE_TYPE_PARTIAL_LINE;
264
265 if (entry->flags & TRACE_FLAG_CONT)
266 trace_seq_print_cont(s, iter);
267
268 return TRACE_TYPE_HANDLED;
246} 269}
247 270
248/* return 0 to abort printing without consuming current entry in pipe mode */ 271static enum print_line_t mmio_print_line(struct trace_iterator *iter)
249static int mmio_print_line(struct trace_iterator *iter)
250{ 272{
251 switch (iter->ent->type) { 273 switch (iter->ent->type) {
252 case TRACE_MMIO_RW: 274 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter); 275 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP: 276 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter); 277 return mmio_print_map(iter);
278 case TRACE_PRINT:
279 return mmio_print_mark(iter);
256 default: 280 default:
257 return 1; /* ignore unknown entries */ 281 return TRACE_TYPE_HANDLED; /* ignore unknown entries */
258 } 282 }
259} 283}
260 284
@@ -263,10 +287,10 @@ static struct tracer mmio_tracer __read_mostly =
263 .name = "mmiotrace", 287 .name = "mmiotrace",
264 .init = mmio_trace_init, 288 .init = mmio_trace_init,
265 .reset = mmio_trace_reset, 289 .reset = mmio_trace_reset,
290 .start = mmio_trace_start,
266 .pipe_open = mmio_pipe_open, 291 .pipe_open = mmio_pipe_open,
267 .close = mmio_close, 292 .close = mmio_close,
268 .read = mmio_read, 293 .read = mmio_read,
269 .ctrl_update = mmio_trace_ctrl_update,
270 .print_line = mmio_print_line, 294 .print_line = mmio_print_line,
271}; 295};
272 296
@@ -276,6 +300,27 @@ __init static int init_mmio_trace(void)
276} 300}
277device_initcall(init_mmio_trace); 301device_initcall(init_mmio_trace);
278 302
303static void __trace_mmiotrace_rw(struct trace_array *tr,
304 struct trace_array_cpu *data,
305 struct mmiotrace_rw *rw)
306{
307 struct ring_buffer_event *event;
308 struct trace_mmiotrace_rw *entry;
309 unsigned long irq_flags;
310
311 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
312 &irq_flags);
313 if (!event)
314 return;
315 entry = ring_buffer_event_data(event);
316 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
317 entry->ent.type = TRACE_MMIO_RW;
318 entry->rw = *rw;
319 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
320
321 trace_wake_up();
322}
323
279void mmio_trace_rw(struct mmiotrace_rw *rw) 324void mmio_trace_rw(struct mmiotrace_rw *rw)
280{ 325{
281 struct trace_array *tr = mmio_trace_array; 326 struct trace_array *tr = mmio_trace_array;
@@ -283,6 +328,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
283 __trace_mmiotrace_rw(tr, data, rw); 328 __trace_mmiotrace_rw(tr, data, rw);
284} 329}
285 330
331static void __trace_mmiotrace_map(struct trace_array *tr,
332 struct trace_array_cpu *data,
333 struct mmiotrace_map *map)
334{
335 struct ring_buffer_event *event;
336 struct trace_mmiotrace_map *entry;
337 unsigned long irq_flags;
338
339 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
340 &irq_flags);
341 if (!event)
342 return;
343 entry = ring_buffer_event_data(event);
344 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
345 entry->ent.type = TRACE_MMIO_MAP;
346 entry->map = *map;
347 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
348
349 trace_wake_up();
350}
351
286void mmio_trace_mapping(struct mmiotrace_map *map) 352void mmio_trace_mapping(struct mmiotrace_map *map)
287{ 353{
288 struct trace_array *tr = mmio_trace_array; 354 struct trace_array *tr = mmio_trace_array;
@@ -293,3 +359,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
293 __trace_mmiotrace_map(tr, data, map); 359 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable(); 360 preempt_enable();
295} 361}
362
363int mmio_trace_printk(const char *fmt, va_list args)
364{
365 return trace_vprintk(0, -1, fmt, args);
366}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..b9767acd30ac
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,105 @@
1/*
2 * nop tracer
3 *
4 * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12
13#include "trace.h"
14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
36static struct trace_array *ctx_trace;
37
38static void start_nop_trace(struct trace_array *tr)
39{
40 /* Nothing to do! */
41}
42
43static void stop_nop_trace(struct trace_array *tr)
44{
45 /* Nothing to do! */
46}
47
48static int nop_trace_init(struct trace_array *tr)
49{
50 int cpu;
51 ctx_trace = tr;
52
53 for_each_online_cpu(cpu)
54 tracing_reset(tr, cpu);
55
56 start_nop_trace(tr);
57 return 0;
58}
59
60static void nop_trace_reset(struct trace_array *tr)
61{
62 stop_nop_trace(tr);
63}
64
65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
71{
72 /*
73 * Note that you don't need to update nop_flags.val yourself.
74 * The tracing Api will do it automatically if you return 0
75 */
76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
91}
92
93
94struct tracer nop_trace __read_mostly =
95{
96 .name = "nop",
97 .init = nop_trace_init,
98 .reset = nop_trace_reset,
99#ifdef CONFIG_FTRACE_SELFTEST
100 .selftest = trace_selftest_startup_nop,
101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
104};
105
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <linux/ftrace.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19
20static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled;
22
23
24static void start_power_trace(struct trace_array *tr)
25{
26 trace_power_enabled = 1;
27}
28
29static void stop_power_trace(struct trace_array *tr)
30{
31 trace_power_enabled = 0;
32}
33
34
35static int power_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 power_trace = tr;
39
40 trace_power_enabled = 1;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44 return 0;
45}
46
47static enum print_line_t power_print_line(struct trace_iterator *iter)
48{
49 int ret = 0;
50 struct trace_entry *entry = iter->ent;
51 struct trace_power *field ;
52 struct power_trace *it;
53 struct trace_seq *s = &iter->seq;
54 struct timespec stamp;
55 struct timespec duration;
56
57 trace_assign_type(field, entry);
58 it = &field->state_data;
59 stamp = ktime_to_timespec(it->stamp);
60 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
61
62 if (entry->type == TRACE_POWER) {
63 if (it->type == POWER_CSTATE)
64 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
65 stamp.tv_sec,
66 stamp.tv_nsec,
67 it->state, iter->cpu,
68 duration.tv_sec,
69 duration.tv_nsec);
70 if (it->type == POWER_PSTATE)
71 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
72 stamp.tv_sec,
73 stamp.tv_nsec,
74 it->state, iter->cpu);
75 if (!ret)
76 return TRACE_TYPE_PARTIAL_LINE;
77 return TRACE_TYPE_HANDLED;
78 }
79 return TRACE_TYPE_UNHANDLED;
80}
81
82static struct tracer power_tracer __read_mostly =
83{
84 .name = "power",
85 .init = power_trace_init,
86 .start = start_power_trace,
87 .stop = stop_power_trace,
88 .reset = stop_power_trace,
89 .print_line = power_print_line,
90};
91
92static int init_power_trace(void)
93{
94 return register_tracer(&power_tracer);
95}
96device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..df175cb4564f 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,25 +9,27 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
23 struct task_struct *next) 24 struct task_struct *next)
24{ 25{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data; 26 struct trace_array_cpu *data;
28 unsigned long flags; 27 unsigned long flags;
29 long disabled;
30 int cpu; 28 int cpu;
29 int pc;
30
31 if (!sched_ref)
32 return;
31 33
32 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next); 35 tracing_record_cmdline(next);
@@ -35,183 +37,95 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
35 if (!tracer_enabled) 37 if (!tracer_enabled)
36 return; 38 return;
37 39
40 pc = preempt_count();
38 local_irq_save(flags); 41 local_irq_save(flags);
39 cpu = raw_smp_processor_id(); 42 cpu = raw_smp_processor_id();
40 data = tr->data[cpu]; 43 data = ctx_trace->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42 44
43 if (likely(disabled == 1)) 45 if (likely(!atomic_read(&data->disabled)))
44 tracing_sched_switch_trace(tr, data, prev, next, flags); 46 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
45 47
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags); 48 local_irq_restore(flags);
48} 49}
49 50
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void 51static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct 52probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
78 task_struct *curr)
79{ 53{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
83 unsigned long flags; 55 unsigned long flags;
84 long disabled; 56 int cpu, pc;
85 int cpu;
86 57
87 if (!tracer_enabled) 58 if (!likely(tracer_enabled))
88 return; 59 return;
89 60
90 tracing_record_cmdline(curr); 61 pc = preempt_count();
62 tracing_record_cmdline(current);
91 63
92 local_irq_save(flags); 64 local_irq_save(flags);
93 cpu = raw_smp_processor_id(); 65 cpu = raw_smp_processor_id();
94 data = tr->data[cpu]; 66 data = ctx_trace->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96 67
97 if (likely(disabled == 1)) 68 if (likely(!atomic_read(&data->disabled)))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); 69 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
70 flags, pc);
99 71
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags); 72 local_irq_restore(flags);
102} 73}
103 74
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr)
130{
131 int cpu;
132
133 tr->time_start = ftrace_now(tr->cpu);
134
135 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]);
137}
138
139static int tracing_sched_register(void) 75static int tracing_sched_register(void)
140{ 76{
141 int ret; 77 int ret;
142 78
143 ret = marker_probe_register("kernel_sched_wakeup", 79 ret = register_trace_sched_wakeup(probe_sched_wakeup);
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) { 80 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker" 81 pr_info("wakeup trace: Couldn't activate tracepoint"
149 " probe to kernel_sched_wakeup\n"); 82 " probe to kernel_sched_wakeup\n");
150 return ret; 83 return ret;
151 } 84 }
152 85
153 ret = marker_probe_register("kernel_sched_wakeup_new", 86 ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) { 87 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker" 88 pr_info("wakeup trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_wakeup_new\n"); 89 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe; 90 goto fail_deprobe;
161 } 91 }
162 92
163 ret = marker_probe_register("kernel_sched_schedule", 93 ret = register_trace_sched_switch(probe_sched_switch);
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) { 94 if (ret) {
169 pr_info("sched trace: Couldn't add marker" 95 pr_info("sched trace: Couldn't activate tracepoint"
170 " probe to kernel_sched_schedule\n"); 96 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new; 97 goto fail_deprobe_wake_new;
172 } 98 }
173 99
174 return ret; 100 return ret;
175fail_deprobe_wake_new: 101fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new", 102 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe: 103fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup", 104 unregister_trace_sched_wakeup(probe_sched_wakeup);
181 wake_up_callback,
182 &ctx_trace);
183 return ret; 105 return ret;
184} 106}
185 107
186static void tracing_sched_unregister(void) 108static void tracing_sched_unregister(void)
187{ 109{
188 marker_probe_unregister("kernel_sched_schedule", 110 unregister_trace_sched_switch(probe_sched_switch);
189 sched_switch_callback, 111 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
190 &ctx_trace); 112 unregister_trace_sched_wakeup(probe_sched_wakeup);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197} 113}
198 114
199static void tracing_start_sched_switch(void) 115static void tracing_start_sched_switch(void)
200{ 116{
201 long ref; 117 mutex_lock(&sched_register_mutex);
202 118 if (!(sched_ref++))
203 ref = atomic_inc_return(&sched_ref);
204 if (ref == 1)
205 tracing_sched_register(); 119 tracing_sched_register();
120 mutex_unlock(&sched_register_mutex);
206} 121}
207 122
208static void tracing_stop_sched_switch(void) 123static void tracing_stop_sched_switch(void)
209{ 124{
210 long ref; 125 mutex_lock(&sched_register_mutex);
211 126 if (!(--sched_ref))
212 ref = atomic_dec_and_test(&sched_ref);
213 if (ref)
214 tracing_sched_unregister(); 127 tracing_sched_unregister();
128 mutex_unlock(&sched_register_mutex);
215} 129}
216 130
217void tracing_start_cmdline_record(void) 131void tracing_start_cmdline_record(void)
@@ -224,40 +138,86 @@ void tracing_stop_cmdline_record(void)
224 tracing_stop_sched_switch(); 138 tracing_stop_sched_switch();
225} 139}
226 140
141/**
142 * tracing_start_sched_switch_record - start tracing context switches
143 *
144 * Turns on context switch tracing for a tracer.
145 */
146void tracing_start_sched_switch_record(void)
147{
148 if (unlikely(!ctx_trace)) {
149 WARN_ON(1);
150 return;
151 }
152
153 tracing_start_sched_switch();
154
155 mutex_lock(&sched_register_mutex);
156 tracer_enabled++;
157 mutex_unlock(&sched_register_mutex);
158}
159
160/**
161 * tracing_stop_sched_switch_record - start tracing context switches
162 *
163 * Turns off context switch tracing for a tracer.
164 */
165void tracing_stop_sched_switch_record(void)
166{
167 mutex_lock(&sched_register_mutex);
168 tracer_enabled--;
169 WARN_ON(tracer_enabled < 0);
170 mutex_unlock(&sched_register_mutex);
171
172 tracing_stop_sched_switch();
173}
174
175/**
176 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
177 * @tr: trace array pointer to assign
178 *
179 * Some tracers might want to record the context switches in their
180 * trace. This function lets those tracers assign the trace array
181 * to use.
182 */
183void tracing_sched_switch_assign_trace(struct trace_array *tr)
184{
185 ctx_trace = tr;
186}
187
227static void start_sched_trace(struct trace_array *tr) 188static void start_sched_trace(struct trace_array *tr)
228{ 189{
229 sched_switch_reset(tr); 190 tracing_reset_online_cpus(tr);
230 tracing_start_cmdline_record(); 191 tracing_start_sched_switch_record();
231 tracer_enabled = 1;
232} 192}
233 193
234static void stop_sched_trace(struct trace_array *tr) 194static void stop_sched_trace(struct trace_array *tr)
235{ 195{
236 tracer_enabled = 0; 196 tracing_stop_sched_switch_record();
237 tracing_stop_cmdline_record();
238} 197}
239 198
240static void sched_switch_trace_init(struct trace_array *tr) 199static int sched_switch_trace_init(struct trace_array *tr)
241{ 200{
242 ctx_trace = tr; 201 ctx_trace = tr;
243 202 start_sched_trace(tr);
244 if (tr->ctrl) 203 return 0;
245 start_sched_trace(tr);
246} 204}
247 205
248static void sched_switch_trace_reset(struct trace_array *tr) 206static void sched_switch_trace_reset(struct trace_array *tr)
249{ 207{
250 if (tr->ctrl) 208 if (sched_ref)
251 stop_sched_trace(tr); 209 stop_sched_trace(tr);
252} 210}
253 211
254static void sched_switch_trace_ctrl_update(struct trace_array *tr) 212static void sched_switch_trace_start(struct trace_array *tr)
255{ 213{
256 /* When starting a new trace, reset the buffers */ 214 tracing_reset_online_cpus(tr);
257 if (tr->ctrl) 215 tracing_start_sched_switch();
258 start_sched_trace(tr); 216}
259 else 217
260 stop_sched_trace(tr); 218static void sched_switch_trace_stop(struct trace_array *tr)
219{
220 tracing_stop_sched_switch();
261} 221}
262 222
263static struct tracer sched_switch_trace __read_mostly = 223static struct tracer sched_switch_trace __read_mostly =
@@ -265,7 +225,8 @@ static struct tracer sched_switch_trace __read_mostly =
265 .name = "sched_switch", 225 .name = "sched_switch",
266 .init = sched_switch_trace_init, 226 .init = sched_switch_trace_init,
267 .reset = sched_switch_trace_reset, 227 .reset = sched_switch_trace_reset,
268 .ctrl_update = sched_switch_trace_ctrl_update, 228 .start = sched_switch_trace_start,
229 .stop = sched_switch_trace_stop,
269#ifdef CONFIG_FTRACE_SELFTEST 230#ifdef CONFIG_FTRACE_SELFTEST
270 .selftest = trace_selftest_startup_sched_switch, 231 .selftest = trace_selftest_startup_sched_switch,
271#endif 232#endif
@@ -273,14 +234,7 @@ static struct tracer sched_switch_trace __read_mostly =
273 234
274__init static int init_sched_switch_trace(void) 235__init static int init_sched_switch_trace(void)
275{ 236{
276 int ret = 0;
277
278 if (atomic_read(&sched_ref))
279 ret = tracing_sched_register();
280 if (ret) {
281 pr_info("error registering scheduler trace\n");
282 return ret;
283 }
284 return register_tracer(&sched_switch_trace); 237 return register_tracer(&sched_switch_trace);
285} 238}
286device_initcall(init_sched_switch_trace); 239device_initcall(init_sched_switch_trace);
240
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..43586b689e31 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/marker.h> 18#include <trace/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -44,12 +44,13 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
44 long disabled; 44 long disabled;
45 int resched; 45 int resched;
46 int cpu; 46 int cpu;
47 int pc;
47 48
48 if (likely(!wakeup_task)) 49 if (likely(!wakeup_task))
49 return; 50 return;
50 51
51 resched = need_resched(); 52 pc = preempt_count();
52 preempt_disable_notrace(); 53 resched = ftrace_preempt_disable();
53 54
54 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
55 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -70,7 +71,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
70 if (task_cpu(wakeup_task) != cpu) 71 if (task_cpu(wakeup_task) != cpu)
71 goto unlock; 72 goto unlock;
72 73
73 trace_function(tr, data, ip, parent_ip, flags); 74 trace_function(tr, data, ip, parent_ip, flags, pc);
74 75
75 unlock: 76 unlock:
76 __raw_spin_unlock(&wakeup_lock); 77 __raw_spin_unlock(&wakeup_lock);
@@ -79,22 +80,14 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
79 out: 80 out:
80 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
81 82
82 /* 83 ftrace_preempt_enable(resched);
83 * To prevent recursion from the scheduler, if the
84 * resched flag was set before we entered, then
85 * don't reschedule.
86 */
87 if (resched)
88 preempt_enable_no_resched_notrace();
89 else
90 preempt_enable_notrace();
91} 84}
92 85
93static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
94{ 87{
95 .func = wakeup_tracer_call, 88 .func = wakeup_tracer_call,
96}; 89};
97#endif /* CONFIG_FTRACE */ 90#endif /* CONFIG_FUNCTION_TRACER */
98 91
99/* 92/*
100 * Should this new latency be reported/recorded? 93 * Should this new latency be reported/recorded?
@@ -112,17 +105,18 @@ static int report_latency(cycle_t delta)
112} 105}
113 106
114static void notrace 107static void notrace
115wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 108probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
116 struct task_struct *next) 109 struct task_struct *next)
117{ 110{
118 unsigned long latency = 0, t0 = 0, t1 = 0; 111 unsigned long latency = 0, t0 = 0, t1 = 0;
119 struct trace_array **ptr = private;
120 struct trace_array *tr = *ptr;
121 struct trace_array_cpu *data; 112 struct trace_array_cpu *data;
122 cycle_t T0, T1, delta; 113 cycle_t T0, T1, delta;
123 unsigned long flags; 114 unsigned long flags;
124 long disabled; 115 long disabled;
125 int cpu; 116 int cpu;
117 int pc;
118
119 tracing_record_cmdline(prev);
126 120
127 if (unlikely(!tracer_enabled)) 121 if (unlikely(!tracer_enabled))
128 return; 122 return;
@@ -139,12 +133,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
139 if (next != wakeup_task) 133 if (next != wakeup_task)
140 return; 134 return;
141 135
136 pc = preempt_count();
137
142 /* The task we are waiting for is waking up */ 138 /* The task we are waiting for is waking up */
143 data = tr->data[wakeup_cpu]; 139 data = wakeup_trace->data[wakeup_cpu];
144 140
145 /* disable local data, not wakeup_cpu data */ 141 /* disable local data, not wakeup_cpu data */
146 cpu = raw_smp_processor_id(); 142 cpu = raw_smp_processor_id();
147 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 143 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
148 if (likely(disabled != 1)) 144 if (likely(disabled != 1))
149 goto out; 145 goto out;
150 146
@@ -155,7 +151,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
155 if (unlikely(!tracer_enabled || next != wakeup_task)) 151 if (unlikely(!tracer_enabled || next != wakeup_task))
156 goto out_unlock; 152 goto out_unlock;
157 153
158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 154 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
159 155
160 /* 156 /*
161 * usecs conversion is slow so we try to delay the conversion 157 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +170,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
174 t0 = nsecs_to_usecs(T0); 170 t0 = nsecs_to_usecs(T0);
175 t1 = nsecs_to_usecs(T1); 171 t1 = nsecs_to_usecs(T1);
176 172
177 update_max_tr(tr, wakeup_task, wakeup_cpu); 173 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 174
179out_unlock: 175out_unlock:
180 __wakeup_reset(tr); 176 __wakeup_reset(wakeup_trace);
181 __raw_spin_unlock(&wakeup_lock); 177 __raw_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags); 178 local_irq_restore(flags);
183out: 179out:
184 atomic_dec(&tr->data[cpu]->disabled); 180 atomic_dec(&wakeup_trace->data[cpu]->disabled);
185}
186
187static notrace void
188sched_switch_callback(void *probe_data, void *call_data,
189 const char *format, va_list *args)
190{
191 struct task_struct *prev;
192 struct task_struct *next;
193 struct rq *__rq;
194
195 /* skip prev_pid %d next_pid %d prev_state %ld */
196 (void)va_arg(*args, int);
197 (void)va_arg(*args, int);
198 (void)va_arg(*args, long);
199 __rq = va_arg(*args, typeof(__rq));
200 prev = va_arg(*args, typeof(prev));
201 next = va_arg(*args, typeof(next));
202
203 tracing_record_cmdline(prev);
204
205 /*
206 * If tracer_switch_func only points to the local
207 * switch func, it still needs the ptr passed to it.
208 */
209 wakeup_sched_switch(probe_data, __rq, prev, next);
210} 181}
211 182
212static void __wakeup_reset(struct trace_array *tr) 183static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +187,7 @@ static void __wakeup_reset(struct trace_array *tr)
216 187
217 for_each_possible_cpu(cpu) { 188 for_each_possible_cpu(cpu) {
218 data = tr->data[cpu]; 189 data = tr->data[cpu];
219 tracing_reset(data); 190 tracing_reset(tr, cpu);
220 } 191 }
221 192
222 wakeup_cpu = -1; 193 wakeup_cpu = -1;
@@ -240,19 +211,26 @@ static void wakeup_reset(struct trace_array *tr)
240} 211}
241 212
242static void 213static void
243wakeup_check_start(struct trace_array *tr, struct task_struct *p, 214probe_wakeup(struct rq *rq, struct task_struct *p, int success)
244 struct task_struct *curr)
245{ 215{
246 int cpu = smp_processor_id(); 216 int cpu = smp_processor_id();
247 unsigned long flags; 217 unsigned long flags;
248 long disabled; 218 long disabled;
219 int pc;
220
221 if (likely(!tracer_enabled))
222 return;
223
224 tracing_record_cmdline(p);
225 tracing_record_cmdline(current);
249 226
250 if (likely(!rt_task(p)) || 227 if (likely(!rt_task(p)) ||
251 p->prio >= wakeup_prio || 228 p->prio >= wakeup_prio ||
252 p->prio >= curr->prio) 229 p->prio >= current->prio)
253 return; 230 return;
254 231
255 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 232 pc = preempt_count();
233 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
256 if (unlikely(disabled != 1)) 234 if (unlikely(disabled != 1))
257 goto out; 235 goto out;
258 236
@@ -264,7 +242,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
264 goto out_locked; 242 goto out_locked;
265 243
266 /* reset the trace */ 244 /* reset the trace */
267 __wakeup_reset(tr); 245 __wakeup_reset(wakeup_trace);
268 246
269 wakeup_cpu = task_cpu(p); 247 wakeup_cpu = task_cpu(p);
270 wakeup_prio = p->prio; 248 wakeup_prio = p->prio;
@@ -274,74 +252,43 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
274 252
275 local_save_flags(flags); 253 local_save_flags(flags);
276 254
277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 255 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 trace_function(tr, tr->data[wakeup_cpu], 256 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
279 CALLER_ADDR1, CALLER_ADDR2, flags); 257 CALLER_ADDR1, CALLER_ADDR2, flags, pc);
280 258
281out_locked: 259out_locked:
282 __raw_spin_unlock(&wakeup_lock); 260 __raw_spin_unlock(&wakeup_lock);
283out: 261out:
284 atomic_dec(&tr->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
285} 263}
286 264
287static notrace void 265/*
288wake_up_callback(void *probe_data, void *call_data, 266 * save_tracer_enabled is used to save the state of the tracer_enabled
289 const char *format, va_list *args) 267 * variable when we disable it when we open a trace output file.
290{ 268 */
291 struct trace_array **ptr = probe_data; 269static int save_tracer_enabled;
292 struct trace_array *tr = *ptr;
293 struct task_struct *curr;
294 struct task_struct *task;
295 struct rq *__rq;
296
297 if (likely(!tracer_enabled))
298 return;
299
300 /* Skip pid %d state %ld */
301 (void)va_arg(*args, int);
302 (void)va_arg(*args, long);
303 /* now get the meat: "rq %p task %p rq->curr %p" */
304 __rq = va_arg(*args, typeof(__rq));
305 task = va_arg(*args, typeof(task));
306 curr = va_arg(*args, typeof(curr));
307
308 tracing_record_cmdline(task);
309 tracing_record_cmdline(curr);
310
311 wakeup_check_start(tr, task, curr);
312}
313 270
314static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
315{ 272{
316 int ret; 273 int ret;
317 274
318 ret = marker_probe_register("kernel_sched_wakeup", 275 ret = register_trace_sched_wakeup(probe_wakeup);
319 "pid %d state %ld ## rq %p task %p rq->curr %p",
320 wake_up_callback,
321 &wakeup_trace);
322 if (ret) { 276 if (ret) {
323 pr_info("wakeup trace: Couldn't add marker" 277 pr_info("wakeup trace: Couldn't activate tracepoint"
324 " probe to kernel_sched_wakeup\n"); 278 " probe to kernel_sched_wakeup\n");
325 return; 279 return;
326 } 280 }
327 281
328 ret = marker_probe_register("kernel_sched_wakeup_new", 282 ret = register_trace_sched_wakeup_new(probe_wakeup);
329 "pid %d state %ld ## rq %p task %p rq->curr %p",
330 wake_up_callback,
331 &wakeup_trace);
332 if (ret) { 283 if (ret) {
333 pr_info("wakeup trace: Couldn't add marker" 284 pr_info("wakeup trace: Couldn't activate tracepoint"
334 " probe to kernel_sched_wakeup_new\n"); 285 " probe to kernel_sched_wakeup_new\n");
335 goto fail_deprobe; 286 goto fail_deprobe;
336 } 287 }
337 288
338 ret = marker_probe_register("kernel_sched_schedule", 289 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
339 "prev_pid %d next_pid %d prev_state %ld "
340 "## rq %p prev %p next %p",
341 sched_switch_callback,
342 &wakeup_trace);
343 if (ret) { 290 if (ret) {
344 pr_info("sched trace: Couldn't add marker" 291 pr_info("sched trace: Couldn't activate tracepoint"
345 " probe to kernel_sched_schedule\n"); 292 " probe to kernel_sched_schedule\n");
346 goto fail_deprobe_wake_new; 293 goto fail_deprobe_wake_new;
347 } 294 }
@@ -359,71 +306,71 @@ static void start_wakeup_tracer(struct trace_array *tr)
359 306
360 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
361 308
362 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
363 316
364 return; 317 return;
365fail_deprobe_wake_new: 318fail_deprobe_wake_new:
366 marker_probe_unregister("kernel_sched_wakeup_new", 319 unregister_trace_sched_wakeup_new(probe_wakeup);
367 wake_up_callback,
368 &wakeup_trace);
369fail_deprobe: 320fail_deprobe:
370 marker_probe_unregister("kernel_sched_wakeup", 321 unregister_trace_sched_wakeup(probe_wakeup);
371 wake_up_callback,
372 &wakeup_trace);
373} 322}
374 323
375static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
376{ 325{
377 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
379 marker_probe_unregister("kernel_sched_schedule", 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
380 sched_switch_callback, 330 unregister_trace_sched_wakeup_new(probe_wakeup);
381 &wakeup_trace); 331 unregister_trace_sched_wakeup(probe_wakeup);
382 marker_probe_unregister("kernel_sched_wakeup_new",
383 wake_up_callback,
384 &wakeup_trace);
385 marker_probe_unregister("kernel_sched_wakeup",
386 wake_up_callback,
387 &wakeup_trace);
388} 332}
389 333
390static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
391{ 335{
392 wakeup_trace = tr; 336 wakeup_trace = tr;
393 337 start_wakeup_tracer(tr);
394 if (tr->ctrl) 338 return 0;
395 start_wakeup_tracer(tr);
396} 339}
397 340
398static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
399{ 342{
400 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
401 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
402 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
403 wakeup_reset(tr);
404 }
405} 346}
406 347
407static void wakeup_tracer_ctrl_update(struct trace_array *tr) 348static void wakeup_tracer_start(struct trace_array *tr)
408{ 349{
409 if (tr->ctrl) 350 wakeup_reset(tr);
410 start_wakeup_tracer(tr); 351 tracer_enabled = 1;
411 else 352 save_tracer_enabled = 1;
412 stop_wakeup_tracer(tr); 353}
354
355static void wakeup_tracer_stop(struct trace_array *tr)
356{
357 tracer_enabled = 0;
358 save_tracer_enabled = 0;
413} 359}
414 360
415static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
416{ 362{
417 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
418 if (iter->tr->ctrl) 364 tracer_enabled = 0;
419 stop_wakeup_tracer(iter->tr);
420} 365}
421 366
422static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
423{ 368{
424 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
425 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
426 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
427} 374}
428 375
429static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -431,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
431 .name = "wakeup", 378 .name = "wakeup",
432 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
433 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
434 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
435 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
436 .ctrl_update = wakeup_tracer_ctrl_update,
437 .print_max = 1, 385 .print_max = 1,
438#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
439 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,30 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
12 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT:
13 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
14 return 1; 17 return 1;
15 } 18 }
16 return 0; 19 return 0;
17} 20}
18 21
19static int 22static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{ 23{
22 struct trace_entry *entries; 24 struct ring_buffer_event *event;
23 struct page *page; 25 struct trace_entry *entry;
24 int idx = 0;
25 int i;
26 26
27 BUG_ON(list_empty(&data->trace_pages)); 27 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 page = list_entry(data->trace_pages.next, struct page, lru); 28 entry = ring_buffer_event_data(event);
29 entries = page_address(page);
30 29
31 check_pages(data); 30 if (!trace_valid_entry(entry)) {
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ", 31 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type); 32 entry->type);
46 goto failed; 33 goto failed;
47 } 34 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 } 35 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0; 36 return 0;
72 37
73 failed: 38 failed:
@@ -87,20 +52,18 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
87 int cpu, ret = 0; 52 int cpu, ret = 0;
88 53
89 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags); 55 local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95 57
96 cnt += tr->data[cpu]->trace_idx; 58 cnt = ring_buffer_entries(tr->buffer);
97 59
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]); 60 for_each_possible_cpu(cpu) {
61 ret = trace_test_buffer_cpu(tr, cpu);
99 if (ret) 62 if (ret)
100 break; 63 break;
101 } 64 }
102 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
103 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
104 67
105 if (count) 68 if (count)
106 *count = cnt; 69 *count = cnt;
@@ -108,7 +71,12 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
108 return ret; 71 return ret;
109} 72}
110 73
111#ifdef CONFIG_FTRACE 74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
79#ifdef CONFIG_FUNCTION_TRACER
112 80
113#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
114 82
@@ -120,11 +88,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr, 88 struct trace_array *tr,
121 int (*func)(void)) 89 int (*func)(void))
122{ 90{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled; 91 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled; 92 int save_tracer_enabled = tracer_enabled;
93 unsigned long count;
127 char *func_name; 94 char *func_name;
95 int ret;
128 96
129 /* The ftrace test PASSED */ 97 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n"); 98 printk(KERN_CONT "PASSED\n");
@@ -137,13 +105,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
137 /* passed in by parameter to fool gcc from optimizing */ 105 /* passed in by parameter to fool gcc from optimizing */
138 func(); 106 func();
139 107
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /* 108 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the 109 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to 110 * start of the function names. We simply put a '*' to
@@ -155,8 +116,12 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
155 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
156 117
157 /* enable tracing */ 118 /* enable tracing */
158 tr->ctrl = 1; 119 ret = trace->init(tr);
159 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
124
160 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
161 msleep(100); 126 msleep(100);
162 127
@@ -178,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
178 msleep(100); 143 msleep(100);
179 144
180 /* stop the tracing. */ 145 /* stop the tracing. */
181 tr->ctrl = 0; 146 tracing_stop();
182 trace->ctrl_update(tr);
183 ftrace_enabled = 0; 147 ftrace_enabled = 0;
184 148
185 /* check the trace buffer */ 149 /* check the trace buffer */
186 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
187 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
188 153
189 /* we should only have one item */ 154 /* we should only have one item */
190 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -192,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
192 ret = -1; 157 ret = -1;
193 goto out; 158 goto out;
194 } 159 }
160
195 out: 161 out:
196 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
197 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -212,37 +178,34 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
212int 178int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 179trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{ 180{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled; 181 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled; 182 int save_tracer_enabled = tracer_enabled;
183 unsigned long count;
184 int ret;
219 185
220 /* make sure msleep has been recorded */ 186 /* make sure msleep has been recorded */
221 msleep(1); 187 msleep(1);
222 188
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */ 189 /* start the tracing */
231 ftrace_enabled = 1; 190 ftrace_enabled = 1;
232 tracer_enabled = 1; 191 tracer_enabled = 1;
233 192
234 tr->ctrl = 1; 193 ret = trace->init(tr);
235 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
236 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
237 msleep(100); 200 msleep(100);
238 /* stop the tracing. */ 201 /* stop the tracing. */
239 tr->ctrl = 0; 202 tracing_stop();
240 trace->ctrl_update(tr);
241 ftrace_enabled = 0; 203 ftrace_enabled = 0;
242 204
243 /* check the trace buffer */ 205 /* check the trace buffer */
244 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
245 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
246 209
247 if (!ret && !count) { 210 if (!ret && !count) {
248 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -263,7 +226,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
263 226
264 return ret; 227 return ret;
265} 228}
266#endif /* CONFIG_FTRACE */ 229#endif /* CONFIG_FUNCTION_TRACER */
267 230
268#ifdef CONFIG_IRQSOFF_TRACER 231#ifdef CONFIG_IRQSOFF_TRACER
269int 232int
@@ -274,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
274 int ret; 237 int ret;
275 238
276 /* start the tracing */ 239 /* start the tracing */
277 tr->ctrl = 1; 240 ret = trace->init(tr);
278 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
279 /* reset the max latency */ 246 /* reset the max latency */
280 tracing_max_latency = 0; 247 tracing_max_latency = 0;
281 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -283,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
283 udelay(100); 250 udelay(100);
284 local_irq_enable(); 251 local_irq_enable();
285 /* stop the tracing. */ 252 /* stop the tracing. */
286 tr->ctrl = 0; 253 tracing_stop();
287 trace->ctrl_update(tr);
288 /* check both trace buffers */ 254 /* check both trace buffers */
289 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
290 if (!ret) 256 if (!ret)
291 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
292 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
293 260
294 if (!ret && !count) { 261 if (!ret && !count) {
295 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -310,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
310 unsigned long count; 277 unsigned long count;
311 int ret; 278 int ret;
312 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
313 /* start the tracing */ 293 /* start the tracing */
314 tr->ctrl = 1; 294 ret = trace->init(tr);
315 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
316 /* reset the max latency */ 300 /* reset the max latency */
317 tracing_max_latency = 0; 301 tracing_max_latency = 0;
318 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -320,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
320 udelay(100); 304 udelay(100);
321 preempt_enable(); 305 preempt_enable();
322 /* stop the tracing. */ 306 /* stop the tracing. */
323 tr->ctrl = 0; 307 tracing_stop();
324 trace->ctrl_update(tr);
325 /* check both trace buffers */ 308 /* check both trace buffers */
326 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
327 if (!ret) 310 if (!ret)
328 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
329 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
330 314
331 if (!ret && !count) { 315 if (!ret && !count) {
332 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -347,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
347 unsigned long count; 331 unsigned long count;
348 int ret; 332 int ret;
349 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
350 /* start the tracing */ 347 /* start the tracing */
351 tr->ctrl = 1; 348 ret = trace->init(tr);
352 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
353 353
354 /* reset the max latency */ 354 /* reset the max latency */
355 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -363,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
363 local_irq_enable(); 363 local_irq_enable();
364 364
365 /* stop the tracing. */ 365 /* stop the tracing. */
366 tr->ctrl = 0; 366 tracing_stop();
367 trace->ctrl_update(tr);
368 /* check both trace buffers */ 367 /* check both trace buffers */
369 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
370 if (ret) 369 if (ret) {
370 tracing_start();
371 goto out; 371 goto out;
372 }
372 373
373 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
374 if (ret) 375 if (ret) {
376 tracing_start();
375 goto out; 377 goto out;
378 }
376 379
377 if (!ret && !count) { 380 if (!ret && !count) {
378 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
379 ret = -1; 382 ret = -1;
383 tracing_start();
380 goto out; 384 goto out;
381 } 385 }
382 386
383 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
384 tracing_max_latency = 0; 388 tracing_max_latency = 0;
385 tr->ctrl = 1; 389 tracing_start();
386 trace->ctrl_update(tr);
387 preempt_disable(); 390 preempt_disable();
388 local_irq_disable(); 391 local_irq_disable();
389 udelay(100); 392 udelay(100);
@@ -392,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
392 local_irq_enable(); 395 local_irq_enable();
393 396
394 /* stop the tracing. */ 397 /* stop the tracing. */
395 tr->ctrl = 0; 398 tracing_stop();
396 trace->ctrl_update(tr);
397 /* check both trace buffers */ 399 /* check both trace buffers */
398 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
399 if (ret) 401 if (ret)
@@ -409,12 +411,22 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
409 411
410 out: 412 out:
411 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
412 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
413 416
414 return ret; 417 return ret;
415} 418}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ 419#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417 420
421#ifdef CONFIG_NOP_TRACER
422int
423trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
424{
425 /* What could possibly go wrong? */
426 return 0;
427}
428#endif
429
418#ifdef CONFIG_SCHED_TRACER 430#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data) 431static int trace_wakeup_test_thread(void *data)
420{ 432{
@@ -465,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
465 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
466 478
467 /* start the tracing */ 479 /* start the tracing */
468 tr->ctrl = 1; 480 ret = trace->init(tr);
469 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
470 /* reset the max latency */ 486 /* reset the max latency */
471 tracing_max_latency = 0; 487 tracing_max_latency = 0;
472 488
@@ -486,9 +502,11 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
486 502
487 wake_up_process(p); 503 wake_up_process(p);
488 504
505 /* give a little time to let the thread wake up */
506 msleep(100);
507
489 /* stop the tracing. */ 508 /* stop the tracing. */
490 tr->ctrl = 0; 509 tracing_stop();
491 trace->ctrl_update(tr);
492 /* check both trace buffers */ 510 /* check both trace buffers */
493 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
494 if (!ret) 512 if (!ret)
@@ -496,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
496 514
497 515
498 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
499 518
500 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
501 520
@@ -519,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
519 int ret; 538 int ret;
520 539
521 /* start the tracing */ 540 /* start the tracing */
522 tr->ctrl = 1; 541 ret = trace->init(tr);
523 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
524 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
525 msleep(100); 548 msleep(100);
526 /* stop the tracing. */ 549 /* stop the tracing. */
527 tr->ctrl = 0; 550 tracing_stop();
528 trace->ctrl_update(tr);
529 /* check the trace buffer */ 551 /* check the trace buffer */
530 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
531 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
532 555
533 if (!ret && !count) { 556 if (!ret && !count) {
534 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -547,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
547 int ret; 570 int ret;
548 571
549 /* start the tracing */ 572 /* start the tracing */
550 tr->ctrl = 1; 573 ret = trace->init(tr);
551 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
552 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
553 msleep(100); 580 msleep(100);
554 /* stop the tracing. */ 581 /* stop the tracing. */
555 tr->ctrl = 0; 582 tracing_stop();
556 trace->ctrl_update(tr);
557 /* check the trace buffer */ 583 /* check the trace buffer */
558 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
559 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
560 587
561 return ret; 588 return ret;
562} 589}
563#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..d0871bc0aca5
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,360 @@
1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/sysctl.h>
14#include <linux/init.h>
15#include <linux/fs.h>
16#include "trace.h"
17
18#define STACK_TRACE_ENTRIES 500
19
20static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
21 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
22static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
23
24static struct stack_trace max_stack_trace = {
25 .max_entries = STACK_TRACE_ENTRIES,
26 .entries = stack_dump_trace,
27};
28
29static unsigned long max_stack_size;
30static raw_spinlock_t max_stack_lock =
31 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
32
33static int stack_trace_disabled __read_mostly;
34static DEFINE_PER_CPU(int, trace_active);
35static DEFINE_MUTEX(stack_sysctl_mutex);
36
37int stack_tracer_enabled;
38static int last_stack_tracer_enabled;
39
40static inline void check_stack(void)
41{
42 unsigned long this_size, flags;
43 unsigned long *p, *top, *start;
44 int i;
45
46 this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
47 this_size = THREAD_SIZE - this_size;
48
49 if (this_size <= max_stack_size)
50 return;
51
52 /* we do not handle interrupt stacks yet */
53 if (!object_is_on_stack(&this_size))
54 return;
55
56 local_irq_save(flags);
57 __raw_spin_lock(&max_stack_lock);
58
59 /* a race could have already updated it */
60 if (this_size <= max_stack_size)
61 goto out;
62
63 max_stack_size = this_size;
64
65 max_stack_trace.nr_entries = 0;
66 max_stack_trace.skip = 3;
67
68 save_stack_trace(&max_stack_trace);
69
70 /*
71 * Now find where in the stack these are.
72 */
73 i = 0;
74 start = &this_size;
75 top = (unsigned long *)
76 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
77
78 /*
79 * Loop through all the entries. One of the entries may
80 * for some reason be missed on the stack, so we may
81 * have to account for them. If they are all there, this
82 * loop will only happen once. This code only takes place
83 * on a new max, so it is far from a fast path.
84 */
85 while (i < max_stack_trace.nr_entries) {
86 int found = 0;
87
88 stack_dump_index[i] = this_size;
89 p = start;
90
91 for (; p < top && i < max_stack_trace.nr_entries; p++) {
92 if (*p == stack_dump_trace[i]) {
93 this_size = stack_dump_index[i++] =
94 (top - p) * sizeof(unsigned long);
95 found = 1;
96 /* Start the search from here */
97 start = p + 1;
98 }
99 }
100
101 if (!found)
102 i++;
103 }
104
105 out:
106 __raw_spin_unlock(&max_stack_lock);
107 local_irq_restore(flags);
108}
109
110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{
113 int cpu, resched;
114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return;
117
118 resched = ftrace_preempt_disable();
119
120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */
122 if (per_cpu(trace_active, cpu)++ != 0)
123 goto out;
124
125 check_stack();
126
127 out:
128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched);
131}
132
133static struct ftrace_ops trace_ops __read_mostly =
134{
135 .func = stack_trace_call,
136};
137
138static ssize_t
139stack_max_size_read(struct file *filp, char __user *ubuf,
140 size_t count, loff_t *ppos)
141{
142 unsigned long *ptr = filp->private_data;
143 char buf[64];
144 int r;
145
146 r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
147 if (r > sizeof(buf))
148 r = sizeof(buf);
149 return simple_read_from_buffer(ubuf, count, ppos, buf, r);
150}
151
152static ssize_t
153stack_max_size_write(struct file *filp, const char __user *ubuf,
154 size_t count, loff_t *ppos)
155{
156 long *ptr = filp->private_data;
157 unsigned long val, flags;
158 char buf[64];
159 int ret;
160
161 if (count >= sizeof(buf))
162 return -EINVAL;
163
164 if (copy_from_user(&buf, ubuf, count))
165 return -EFAULT;
166
167 buf[count] = 0;
168
169 ret = strict_strtoul(buf, 10, &val);
170 if (ret < 0)
171 return ret;
172
173 local_irq_save(flags);
174 __raw_spin_lock(&max_stack_lock);
175 *ptr = val;
176 __raw_spin_unlock(&max_stack_lock);
177 local_irq_restore(flags);
178
179 return count;
180}
181
182static const struct file_operations stack_max_size_fops = {
183 .open = tracing_open_generic,
184 .read = stack_max_size_read,
185 .write = stack_max_size_write,
186};
187
188static void *
189t_next(struct seq_file *m, void *v, loff_t *pos)
190{
191 long i;
192
193 (*pos)++;
194
195 if (v == SEQ_START_TOKEN)
196 i = 0;
197 else {
198 i = *(long *)v;
199 i++;
200 }
201
202 if (i >= max_stack_trace.nr_entries ||
203 stack_dump_trace[i] == ULONG_MAX)
204 return NULL;
205
206 m->private = (void *)i;
207
208 return &m->private;
209}
210
211static void *t_start(struct seq_file *m, loff_t *pos)
212{
213 void *t = SEQ_START_TOKEN;
214 loff_t l = 0;
215
216 local_irq_disable();
217 __raw_spin_lock(&max_stack_lock);
218
219 if (*pos == 0)
220 return SEQ_START_TOKEN;
221
222 for (; t && l < *pos; t = t_next(m, t, &l))
223 ;
224
225 return t;
226}
227
228static void t_stop(struct seq_file *m, void *p)
229{
230 __raw_spin_unlock(&max_stack_lock);
231 local_irq_enable();
232}
233
234static int trace_lookup_stack(struct seq_file *m, long i)
235{
236 unsigned long addr = stack_dump_trace[i];
237#ifdef CONFIG_KALLSYMS
238 char str[KSYM_SYMBOL_LEN];
239
240 sprint_symbol(str, addr);
241
242 return seq_printf(m, "%s\n", str);
243#else
244 return seq_printf(m, "%p\n", (void*)addr);
245#endif
246}
247
248static int t_show(struct seq_file *m, void *v)
249{
250 long i;
251 int size;
252
253 if (v == SEQ_START_TOKEN) {
254 seq_printf(m, " Depth Size Location"
255 " (%d entries)\n"
256 " ----- ---- --------\n",
257 max_stack_trace.nr_entries);
258 return 0;
259 }
260
261 i = *(long *)v;
262
263 if (i >= max_stack_trace.nr_entries ||
264 stack_dump_trace[i] == ULONG_MAX)
265 return 0;
266
267 if (i+1 == max_stack_trace.nr_entries ||
268 stack_dump_trace[i+1] == ULONG_MAX)
269 size = stack_dump_index[i];
270 else
271 size = stack_dump_index[i] - stack_dump_index[i+1];
272
273 seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
274
275 trace_lookup_stack(m, i);
276
277 return 0;
278}
279
280static const struct seq_operations stack_trace_seq_ops = {
281 .start = t_start,
282 .next = t_next,
283 .stop = t_stop,
284 .show = t_show,
285};
286
287static int stack_trace_open(struct inode *inode, struct file *file)
288{
289 int ret;
290
291 ret = seq_open(file, &stack_trace_seq_ops);
292
293 return ret;
294}
295
296static const struct file_operations stack_trace_fops = {
297 .open = stack_trace_open,
298 .read = seq_read,
299 .llseek = seq_lseek,
300};
301
302int
303stack_trace_sysctl(struct ctl_table *table, int write,
304 struct file *file, void __user *buffer, size_t *lenp,
305 loff_t *ppos)
306{
307 int ret;
308
309 mutex_lock(&stack_sysctl_mutex);
310
311 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
312
313 if (ret || !write ||
314 (last_stack_tracer_enabled == stack_tracer_enabled))
315 goto out;
316
317 last_stack_tracer_enabled = stack_tracer_enabled;
318
319 if (stack_tracer_enabled)
320 register_ftrace_function(&trace_ops);
321 else
322 unregister_ftrace_function(&trace_ops);
323
324 out:
325 mutex_unlock(&stack_sysctl_mutex);
326 return ret;
327}
328
329static __init int enable_stacktrace(char *str)
330{
331 stack_tracer_enabled = 1;
332 last_stack_tracer_enabled = 1;
333 return 1;
334}
335__setup("stacktrace", enable_stacktrace);
336
337static __init int stack_trace_init(void)
338{
339 struct dentry *d_tracer;
340 struct dentry *entry;
341
342 d_tracer = tracing_init_dentry();
343
344 entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
345 &max_stack_size, &stack_max_size_fops);
346 if (!entry)
347 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
348
349 entry = debugfs_create_file("stack_trace", 0444, d_tracer,
350 NULL, &stack_trace_fops);
351 if (!entry)
352 pr_warning("Could not create debugfs 'stack_trace' entry\n");
353
354 if (stack_tracer_enabled)
355 register_ftrace_function(&trace_ops);
356
357 return 0;
358}
359
360device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a135..a5779bd975db 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,6 @@ static void start_stack_timer(int cpu)
202 202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
206 205
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 206 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208} 207}
@@ -234,20 +233,10 @@ static void stop_stack_timers(void)
234 stop_stack_timer(cpu); 233 stop_stack_timer(cpu);
235} 234}
236 235
237static void stack_reset(struct trace_array *tr)
238{
239 int cpu;
240
241 tr->time_start = ftrace_now(tr->cpu);
242
243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]);
245}
246
247static void start_stack_trace(struct trace_array *tr) 236static void start_stack_trace(struct trace_array *tr)
248{ 237{
249 mutex_lock(&sample_timer_lock); 238 mutex_lock(&sample_timer_lock);
250 stack_reset(tr); 239 tracing_reset_online_cpus(tr);
251 start_stack_timers(); 240 start_stack_timers();
252 tracer_enabled = 1; 241 tracer_enabled = 1;
253 mutex_unlock(&sample_timer_lock); 242 mutex_unlock(&sample_timer_lock);
@@ -261,27 +250,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 250 mutex_unlock(&sample_timer_lock);
262} 251}
263 252
264static void stack_trace_init(struct trace_array *tr) 253static int stack_trace_init(struct trace_array *tr)
265{ 254{
266 sysprof_trace = tr; 255 sysprof_trace = tr;
267 256
268 if (tr->ctrl) 257 start_stack_trace(tr);
269 start_stack_trace(tr); 258 return 0;
270} 259}
271 260
272static void stack_trace_reset(struct trace_array *tr) 261static void stack_trace_reset(struct trace_array *tr)
273{ 262{
274 if (tr->ctrl) 263 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 264}
286 265
287static struct tracer stack_trace __read_mostly = 266static struct tracer stack_trace __read_mostly =
@@ -289,7 +268,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 268 .name = "sysprof",
290 .init = stack_trace_init, 269 .init = stack_trace_init,
291 .reset = stack_trace_reset, 270 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 271#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 272 .selftest = trace_selftest_startup_sysprof,
295#endif 273#endif