aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2008-11-24 13:54:37 -0500
committerThomas Gleixner <tglx@linutronix.de>2008-11-24 13:54:37 -0500
commit3e1d7a6219ab64e13b10b1a77c0625db9a8bd8db (patch)
treec682da7317845d7b1336e3d8498cf83bdf8f5900 /kernel/trace
parent42569c39917a08e8de1e8b5685463be7b74baebd (diff)
parent13d428afc007fcfcd6deeb215618f54cf9c0cae6 (diff)
Merge branch 'linus' into core/futexes
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig87
-rw-r--r--kernel/trace/Makefile10
-rw-r--r--kernel/trace/ftrace.c826
-rw-r--r--kernel/trace/ring_buffer.c2186
-rw-r--r--kernel/trace/trace.c1902
-rw-r--r--kernel/trace/trace.h215
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c4
-rw-r--r--kernel/trace/trace_irqsoff.c23
-rw-r--r--kernel/trace/trace_mmiotrace.c116
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c137
-rw-r--r--kernel/trace/trace_sched_wakeup.c152
-rw-r--r--kernel/trace/trace_selftest.c101
-rw-r--r--kernel/trace/trace_stack.c314
-rw-r--r--kernel/trace/trace_sysprof.c4
16 files changed, 4410 insertions, 1857 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..33dbefd471e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,40 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4config HAVE_FTRACE 5
6config NOP_TRACER
7 bool
8
9config HAVE_FUNCTION_TRACER
5 bool 10 bool
6 11
7config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
8 bool 13 bool
9 14
15config HAVE_FTRACE_MCOUNT_RECORD
16 bool
17
10config TRACER_MAX_TRACE 18config TRACER_MAX_TRACE
11 bool 19 bool
12 20
21config RING_BUFFER
22 bool
23
13config TRACING 24config TRACING
14 bool 25 bool
15 select DEBUG_FS 26 select DEBUG_FS
16 select STACKTRACE 27 select RING_BUFFER
28 select STACKTRACE if STACKTRACE_SUPPORT
29 select TRACEPOINTS
30 select NOP_TRACER
17 31
18config FTRACE 32menu "Tracers"
33
34config FUNCTION_TRACER
19 bool "Kernel Function Tracer" 35 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE 36 depends on HAVE_FUNCTION_TRACER
37 depends on DEBUG_KERNEL
21 select FRAME_POINTER 38 select FRAME_POINTER
22 select TRACING 39 select TRACING
23 select CONTEXT_SWITCH_TRACER 40 select CONTEXT_SWITCH_TRACER
@@ -35,7 +52,7 @@ config IRQSOFF_TRACER
35 default n 52 default n
36 depends on TRACE_IRQFLAGS_SUPPORT 53 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME 54 depends on GENERIC_TIME
38 depends on HAVE_FTRACE 55 depends on DEBUG_KERNEL
39 select TRACE_IRQFLAGS 56 select TRACE_IRQFLAGS
40 select TRACING 57 select TRACING
41 select TRACER_MAX_TRACE 58 select TRACER_MAX_TRACE
@@ -58,7 +75,7 @@ config PREEMPT_TRACER
58 default n 75 default n
59 depends on GENERIC_TIME 76 depends on GENERIC_TIME
60 depends on PREEMPT 77 depends on PREEMPT
61 depends on HAVE_FTRACE 78 depends on DEBUG_KERNEL
62 select TRACING 79 select TRACING
63 select TRACER_MAX_TRACE 80 select TRACER_MAX_TRACE
64 help 81 help
@@ -85,7 +102,7 @@ config SYSPROF_TRACER
85 102
86config SCHED_TRACER 103config SCHED_TRACER
87 bool "Scheduling Latency Tracer" 104 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE 105 depends on DEBUG_KERNEL
89 select TRACING 106 select TRACING
90 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE 108 select TRACER_MAX_TRACE
@@ -95,17 +112,56 @@ config SCHED_TRACER
95 112
96config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches" 114 bool "Trace process context switches"
98 depends on HAVE_FTRACE 115 depends on DEBUG_KERNEL
99 select TRACING 116 select TRACING
100 select MARKERS 117 select MARKERS
101 help 118 help
102 This tracer gets called from the context switch and records 119 This tracer gets called from the context switch and records
103 all switching of tasks. 120 all switching of tasks.
104 121
122config BOOT_TRACER
123 bool "Trace boot initcalls"
124 depends on DEBUG_KERNEL
125 select TRACING
126 select CONTEXT_SWITCH_TRACER
127 help
128 This tracer helps developers to optimize boot times: it records
129 the timings of the initcalls and traces key events and the identity
130 of tasks that can cause boot delays, such as context-switches.
131
132 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
133 produce pretty graphics about boot inefficiencies, giving a visual
134 representation of the delays during initcalls - but the raw
135 /debug/tracing/trace text output is readable too.
136
137 ( Note that tracing self tests can't be enabled if this tracer is
138 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. )
140
141config STACK_TRACER
142 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER
144 depends on DEBUG_KERNEL
145 select FUNCTION_TRACER
146 select STACKTRACE
147 help
148 This special tracer records the maximum stack footprint of the
149 kernel and displays it in debugfs/tracing/stack_trace.
150
151 This tracer works by hooking into every function call that the
152 kernel executes, and keeping a maximum stack depth value and
153 stack-trace saved. Because this logic has to execute in every
154 kernel function, all the time, this option can slow down the
155 kernel measurably and is generally intended for kernel
156 developers only.
157
158 Say N if unsure.
159
105config DYNAMIC_FTRACE 160config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically" 161 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE 162 depends on FUNCTION_TRACER
108 depends on HAVE_DYNAMIC_FTRACE 163 depends on HAVE_DYNAMIC_FTRACE
164 depends on DEBUG_KERNEL
109 default y 165 default y
110 help 166 help
111 This option will modify all the calls to ftrace dynamically 167 This option will modify all the calls to ftrace dynamically
@@ -113,7 +169,7 @@ config DYNAMIC_FTRACE
113 with a No-Op instruction) as they are called. A table is 169 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again. 170 created to dynamically enable them again.
115 171
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 172 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active. 173 has native performance as long as no tracing is active.
118 174
119 The changes to the code are done by a kernel thread that 175 The changes to the code are done by a kernel thread that
@@ -121,15 +177,22 @@ config DYNAMIC_FTRACE
121 were made. If so, it runs stop_machine (stops all CPUS) 177 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace. 178 and modifies the code to jump over the call to ftrace.
123 179
180config FTRACE_MCOUNT_RECORD
181 def_bool y
182 depends on DYNAMIC_FTRACE
183 depends on HAVE_FTRACE_MCOUNT_RECORD
184
124config FTRACE_SELFTEST 185config FTRACE_SELFTEST
125 bool 186 bool
126 187
127config FTRACE_STARTUP_TEST 188config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace" 189 bool "Perform a startup test on ftrace"
129 depends on TRACING 190 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
130 select FTRACE_SELFTEST 191 select FTRACE_SELFTEST
131 help 192 help
132 This option performs a series of startup tests on ftrace. On bootup 193 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is 194 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured 195 functioning properly. It will do tests on all the configured
135 tracers of ftrace. 196 tracers of ftrace.
197
198endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..c8228b1a49e9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,15 +10,19 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
14 15
15obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o 19obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
23obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
23 27
24libftrace-y := ftrace.o 28libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..78db083390f0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
@@ -81,7 +92,7 @@ void clear_ftrace_function(void)
81 92
82static int __register_ftrace_function(struct ftrace_ops *ops) 93static int __register_ftrace_function(struct ftrace_ops *ops)
83{ 94{
84 /* Should never be called by interrupts */ 95 /* should not be called from interrupt context */
85 spin_lock(&ftrace_lock); 96 spin_lock(&ftrace_lock);
86 97
87 ops->next = ftrace_list; 98 ops->next = ftrace_list;
@@ -115,6 +126,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
115 struct ftrace_ops **p; 126 struct ftrace_ops **p;
116 int ret = 0; 127 int ret = 0;
117 128
129 /* should not be called from interrupt context */
118 spin_lock(&ftrace_lock); 130 spin_lock(&ftrace_lock);
119 131
120 /* 132 /*
@@ -152,8 +164,17 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152} 164}
153 165
154#ifdef CONFIG_DYNAMIC_FTRACE 166#ifdef CONFIG_DYNAMIC_FTRACE
167#ifndef CONFIG_FTRACE_MCOUNT_RECORD
168# error Dynamic ftrace depends on MCOUNT_RECORD
169#endif
155 170
156static struct task_struct *ftraced_task; 171/*
172 * Since MCOUNT_ADDR may point to mcount itself, we do not want
173 * to get it confused by reading a reference in the code as we
174 * are parsing on objcopy output of text. Use a variable for
175 * it instead.
176 */
177static unsigned long mcount_addr = MCOUNT_ADDR;
157 178
158enum { 179enum {
159 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -164,15 +185,9 @@ enum {
164}; 185};
165 186
166static int ftrace_filtered; 187static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171 188
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 189static LIST_HEAD(ftrace_new_addrs);
173 190
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock); 191static DEFINE_MUTEX(ftrace_regex_lock);
177 192
178struct ftrace_page { 193struct ftrace_page {
@@ -190,16 +205,13 @@ struct ftrace_page {
190static struct ftrace_page *ftrace_pages_start; 205static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages; 206static struct ftrace_page *ftrace_pages;
192 207
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records; 208static struct dyn_ftrace *ftrace_free_records;
200 209
201 210
202#ifdef CONFIG_KPROBES 211#ifdef CONFIG_KPROBES
212
213static int frozen_record_count;
214
203static inline void freeze_record(struct dyn_ftrace *rec) 215static inline void freeze_record(struct dyn_ftrace *rec)
204{ 216{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) { 217 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -226,79 +238,36 @@ static inline int record_frozen(struct dyn_ftrace *rec)
226# define record_frozen(rec) ({ 0; }) 238# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */ 239#endif /* CONFIG_KPROBES */
228 240
229int skip_trace(unsigned long ip) 241static void ftrace_free_rec(struct dyn_ftrace *rec)
230{ 242{
231 unsigned long fl; 243 rec->ip = (unsigned long)ftrace_free_records;
232 struct dyn_ftrace *rec; 244 ftrace_free_records = rec;
233 struct hlist_node *t; 245 rec->flags |= FTRACE_FL_FREE;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264} 246}
265 247
266static inline int 248void ftrace_release(void *start, unsigned long size)
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{ 249{
269 struct dyn_ftrace *p; 250 struct dyn_ftrace *rec;
270 struct hlist_node *t; 251 struct ftrace_page *pg;
271 int found = 0; 252 unsigned long s = (unsigned long)start;
272 253 unsigned long e = s + size;
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { 254 int i;
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282 255
283static inline void 256 if (ftrace_disabled || !start)
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) 257 return;
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288 258
289/* called from kstop_machine */ 259 /* should not be called from interrupt context */
290static inline void ftrace_del_hash(struct dyn_ftrace *node) 260 spin_lock(&ftrace_lock);
291{
292 hlist_del(&node->node);
293}
294 261
295static void ftrace_free_rec(struct dyn_ftrace *rec) 262 for (pg = ftrace_pages_start; pg; pg = pg->next) {
296{ 263 for (i = 0; i < pg->index; i++) {
297 /* no locking, only called from kstop_machine */ 264 rec = &pg->records[i];
298 265
299 rec->ip = (unsigned long)ftrace_free_records; 266 if ((rec->ip >= s) && (rec->ip < e))
300 ftrace_free_records = rec; 267 ftrace_free_rec(rec);
301 rec->flags |= FTRACE_FL_FREE; 268 }
269 }
270 spin_unlock(&ftrace_lock);
302} 271}
303 272
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 273static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -310,10 +279,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
310 rec = ftrace_free_records; 279 rec = ftrace_free_records;
311 280
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 281 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1); 282 FTRACE_WARN_ON_ONCE(1);
314 ftrace_free_records = NULL; 283 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL; 284 return NULL;
318 } 285 }
319 286
@@ -323,175 +290,125 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
323 } 290 }
324 291
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 292 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next) 293 if (!ftrace_pages->next) {
327 return NULL; 294 /* allocate another page */
295 ftrace_pages->next =
296 (void *)get_zeroed_page(GFP_KERNEL);
297 if (!ftrace_pages->next)
298 return NULL;
299 }
328 ftrace_pages = ftrace_pages->next; 300 ftrace_pages = ftrace_pages->next;
329 } 301 }
330 302
331 return &ftrace_pages->records[ftrace_pages->index++]; 303 return &ftrace_pages->records[ftrace_pages->index++];
332} 304}
333 305
334static void 306static struct dyn_ftrace *
335ftrace_record_ip(unsigned long ip) 307ftrace_record_ip(unsigned long ip)
336{ 308{
337 struct dyn_ftrace *node; 309 struct dyn_ftrace *rec;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343 310
344 if (!ftrace_enabled || ftrace_disabled) 311 if (!ftrace_enabled || ftrace_disabled)
345 return; 312 return NULL;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367 313
368 if (ftrace_ip_in_hash(ip, key)) 314 rec = ftrace_alloc_dyn_node(ip);
369 goto out; 315 if (!rec)
370 316 return NULL;
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382
383 node->ip = ip;
384
385 ftrace_add_hash(node, key);
386 317
387 ftraced_trigger = 1; 318 rec->ip = ip;
388 319
389 out_unlock: 320 list_add(&rec->list, &ftrace_new_addrs);
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393 321
394 /* prevent recursion with scheduler */ 322 return rec;
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399} 323}
400 324
401#define FTRACE_ADDR ((long)(ftrace_caller)) 325#define FTRACE_ADDR ((long)(ftrace_caller))
402 326
403static int 327static int
404__ftrace_replace_code(struct dyn_ftrace *rec, 328__ftrace_replace_code(struct dyn_ftrace *rec,
405 unsigned char *old, unsigned char *new, int enable) 329 unsigned char *nop, int enable)
406{ 330{
407 unsigned long ip, fl; 331 unsigned long ip, fl;
332 unsigned char *call, *old, *new;
408 333
409 ip = rec->ip; 334 ip = rec->ip;
410 335
411 if (ftrace_filtered && enable) { 336 /*
337 * If this record is not to be traced and
338 * it is not enabled then do nothing.
339 *
340 * If this record is not to be traced and
341 * it is enabled then disabled it.
342 *
343 */
344 if (rec->flags & FTRACE_FL_NOTRACE) {
345 if (rec->flags & FTRACE_FL_ENABLED)
346 rec->flags &= ~FTRACE_FL_ENABLED;
347 else
348 return 0;
349
350 } else if (ftrace_filtered && enable) {
412 /* 351 /*
413 * If filtering is on: 352 * Filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */ 353 */
433 354
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | 355 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
435 FTRACE_FL_ENABLED);
436 356
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || 357 /* Record is filtered and enabled, do nothing */
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || 358 if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0; 359 return 0;
441 360
442 /* 361 /* Record is not filtered and is not enabled do nothing */
443 * If it is enabled disable it, 362 if (!fl)
444 * otherwise enable it! 363 return 0;
445 */ 364
446 if (fl & FTRACE_FL_ENABLED) { 365 /* Record is not filtered but enabled, disable it */
447 /* swap new and old */ 366 if (fl == FTRACE_FL_ENABLED)
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED; 367 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else { 368 else
452 new = ftrace_call_replace(ip, FTRACE_ADDR); 369 /* Otherwise record is filtered but not enabled, enable it */
453 rec->flags |= FTRACE_FL_ENABLED; 370 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else { 371 } else {
372 /* Disable or not filtered */
456 373
457 if (enable) { 374 if (enable) {
458 /* 375 /* if record is enabled, do nothing */
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED) 376 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0; 377 return 0;
378
473 rec->flags |= FTRACE_FL_ENABLED; 379 rec->flags |= FTRACE_FL_ENABLED;
380
474 } else { 381 } else {
382
383 /* if record is not enabled do nothing */
475 if (!(rec->flags & FTRACE_FL_ENABLED)) 384 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0; 385 return 0;
386
477 rec->flags &= ~FTRACE_FL_ENABLED; 387 rec->flags &= ~FTRACE_FL_ENABLED;
478 } 388 }
479 } 389 }
480 390
391 call = ftrace_call_replace(ip, FTRACE_ADDR);
392
393 if (rec->flags & FTRACE_FL_ENABLED) {
394 old = nop;
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
481 return ftrace_modify_code(ip, old, new); 401 return ftrace_modify_code(ip, old, new);
482} 402}
483 403
484static void ftrace_replace_code(int enable) 404static void ftrace_replace_code(int enable)
485{ 405{
486 int i, failed; 406 int i, failed;
487 unsigned char *new = NULL, *old = NULL; 407 unsigned char *nop = NULL;
488 struct dyn_ftrace *rec; 408 struct dyn_ftrace *rec;
489 struct ftrace_page *pg; 409 struct ftrace_page *pg;
490 410
491 if (enable) 411 nop = ftrace_nop_replace();
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495 412
496 for (pg = ftrace_pages_start; pg; pg = pg->next) { 413 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) { 414 for (i = 0; i < pg->index; i++) {
@@ -509,12 +426,11 @@ static void ftrace_replace_code(int enable)
509 unfreeze_record(rec); 426 unfreeze_record(rec);
510 } 427 }
511 428
512 failed = __ftrace_replace_code(rec, old, new, enable); 429 failed = __ftrace_replace_code(rec, nop, enable);
513 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
514 rec->flags |= FTRACE_FL_FAILED; 431 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) || 432 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) { 433 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec); 434 ftrace_free_rec(rec);
519 } 435 }
520 } 436 }
@@ -522,13 +438,14 @@ static void ftrace_replace_code(int enable)
522 } 438 }
523} 439}
524 440
525static void ftrace_shutdown_replenish(void) 441static void print_ip_ins(const char *fmt, unsigned char *p)
526{ 442{
527 if (ftrace_pages->next) 443 int i;
528 return;
529 444
530 /* allocate another page */ 445 printk(KERN_CONT "%s", fmt);
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); 446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
532} 449}
533 450
534static int 451static int
@@ -536,52 +453,59 @@ ftrace_code_disable(struct dyn_ftrace *rec)
536{ 453{
537 unsigned long ip; 454 unsigned long ip;
538 unsigned char *nop, *call; 455 unsigned char *nop, *call;
539 int failed; 456 int ret;
540 457
541 ip = rec->ip; 458 ip = rec->ip;
542 459
543 nop = ftrace_nop_replace(); 460 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR); 461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) {
465 switch (ret) {
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
545 490
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED; 491 rec->flags |= FTRACE_FL_FAILED;
549 return 0; 492 return 0;
550 } 493 }
551 return 1; 494 return 1;
552} 495}
553 496
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data) 497static int __ftrace_modify_code(void *data)
557{ 498{
558 unsigned long addr;
559 int *command = data; 499 int *command = data;
560 500
561 if (*command & FTRACE_ENABLE_CALLS) { 501 if (*command & FTRACE_ENABLE_CALLS)
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1); 502 ftrace_replace_code(1);
568 tracing_on = 1; 503 else if (*command & FTRACE_DISABLE_CALLS)
569 } else if (*command & FTRACE_DISABLE_CALLS) {
570 ftrace_replace_code(0); 504 ftrace_replace_code(0);
571 tracing_on = 0;
572 }
573 505
574 if (*command & FTRACE_UPDATE_TRACE_FUNC) 506 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function); 507 ftrace_update_ftrace_func(ftrace_trace_function);
576 508
577 if (*command & FTRACE_ENABLE_MCOUNT) {
578 addr = (unsigned long)ftrace_record_ip;
579 ftrace_mcount_set(&addr);
580 } else if (*command & FTRACE_DISABLE_MCOUNT) {
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584
585 return 0; 509 return 0;
586} 510}
587 511
@@ -590,26 +514,9 @@ static void ftrace_run_update_code(int command)
590 stop_machine(__ftrace_modify_code, &command, NULL); 514 stop_machine(__ftrace_modify_code, &command, NULL);
591} 515}
592 516
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func; 517static ftrace_func_t saved_ftrace_func;
518static int ftrace_start;
519static DEFINE_MUTEX(ftrace_start_lock);
613 520
614static void ftrace_startup(void) 521static void ftrace_startup(void)
615{ 522{
@@ -618,10 +525,9 @@ static void ftrace_startup(void)
618 if (unlikely(ftrace_disabled)) 525 if (unlikely(ftrace_disabled))
619 return; 526 return;
620 527
621 mutex_lock(&ftraced_lock); 528 mutex_lock(&ftrace_start_lock);
622 ftraced_suspend++; 529 ftrace_start++;
623 if (ftraced_suspend == 1) 530 command |= FTRACE_ENABLE_CALLS;
624 command |= FTRACE_ENABLE_CALLS;
625 531
626 if (saved_ftrace_func != ftrace_trace_function) { 532 if (saved_ftrace_func != ftrace_trace_function) {
627 saved_ftrace_func = ftrace_trace_function; 533 saved_ftrace_func = ftrace_trace_function;
@@ -633,7 +539,7 @@ static void ftrace_startup(void)
633 539
634 ftrace_run_update_code(command); 540 ftrace_run_update_code(command);
635 out: 541 out:
636 mutex_unlock(&ftraced_lock); 542 mutex_unlock(&ftrace_start_lock);
637} 543}
638 544
639static void ftrace_shutdown(void) 545static void ftrace_shutdown(void)
@@ -643,9 +549,9 @@ static void ftrace_shutdown(void)
643 if (unlikely(ftrace_disabled)) 549 if (unlikely(ftrace_disabled))
644 return; 550 return;
645 551
646 mutex_lock(&ftraced_lock); 552 mutex_lock(&ftrace_start_lock);
647 ftraced_suspend--; 553 ftrace_start--;
648 if (!ftraced_suspend) 554 if (!ftrace_start)
649 command |= FTRACE_DISABLE_CALLS; 555 command |= FTRACE_DISABLE_CALLS;
650 556
651 if (saved_ftrace_func != ftrace_trace_function) { 557 if (saved_ftrace_func != ftrace_trace_function) {
@@ -658,7 +564,7 @@ static void ftrace_shutdown(void)
658 564
659 ftrace_run_update_code(command); 565 ftrace_run_update_code(command);
660 out: 566 out:
661 mutex_unlock(&ftraced_lock); 567 mutex_unlock(&ftrace_start_lock);
662} 568}
663 569
664static void ftrace_startup_sysctl(void) 570static void ftrace_startup_sysctl(void)
@@ -668,15 +574,15 @@ static void ftrace_startup_sysctl(void)
668 if (unlikely(ftrace_disabled)) 574 if (unlikely(ftrace_disabled))
669 return; 575 return;
670 576
671 mutex_lock(&ftraced_lock); 577 mutex_lock(&ftrace_start_lock);
672 /* Force update next time */ 578 /* Force update next time */
673 saved_ftrace_func = NULL; 579 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */ 580 /* ftrace_start is true if we want ftrace running */
675 if (ftraced_suspend) 581 if (ftrace_start)
676 command |= FTRACE_ENABLE_CALLS; 582 command |= FTRACE_ENABLE_CALLS;
677 583
678 ftrace_run_update_code(command); 584 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock); 585 mutex_unlock(&ftrace_start_lock);
680} 586}
681 587
682static void ftrace_shutdown_sysctl(void) 588static void ftrace_shutdown_sysctl(void)
@@ -686,153 +592,51 @@ static void ftrace_shutdown_sysctl(void)
686 if (unlikely(ftrace_disabled)) 592 if (unlikely(ftrace_disabled))
687 return; 593 return;
688 594
689 mutex_lock(&ftraced_lock); 595 mutex_lock(&ftrace_start_lock);
690 /* ftraced_suspend is true if ftrace is running */ 596 /* ftrace_start is true if ftrace is running */
691 if (ftraced_suspend) 597 if (ftrace_start)
692 command |= FTRACE_DISABLE_CALLS; 598 command |= FTRACE_DISABLE_CALLS;
693 599
694 ftrace_run_update_code(command); 600 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock); 601 mutex_unlock(&ftrace_start_lock);
696} 602}
697 603
698static cycle_t ftrace_update_time; 604static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt; 605static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt; 606unsigned long ftrace_update_tot_cnt;
701 607
702static int __ftrace_update_code(void *ignore) 608static int ftrace_update_code(void)
703{ 609{
704 int i, save_ftrace_enabled; 610 struct dyn_ftrace *p, *t;
705 cycle_t start, stop; 611 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714 612
715 start = ftrace_now(raw_smp_processor_id()); 613 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0; 614 ftrace_update_cnt = 0;
717 615
718 /* No locks needed, the machine is stopped! */ 616 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722
723 /* all CPUS are stopped, we are safe to modify code */
724 hlist_for_each_entry_safe(p, t, n, head, node) {
725 /* Skip over failed records which have not been
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737 617
738 /* Ignore updates to this record's mcount site. 618 /* If something went wrong, bail without enabling anything */
739 * Reintroduce this record at the head of this 619 if (unlikely(ftrace_disabled))
740 * bucket to attempt to "convert" it again if 620 return -1;
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752 621
753 /* convert record (i.e, patch mcount-call with NOP) */ 622 list_del_init(&p->list);
754 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++;
757 } else {
758 if ((system_state == SYSTEM_BOOTING) ||
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765 623
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) { 624 /* convert record (i.e, patch mcount-call with NOP) */
767 hlist_del(&p->node); 625 if (ftrace_code_disable(p)) {
768 INIT_HLIST_NODE(&p->node); 626 p->flags |= FTRACE_FL_CONVERTED;
769 hlist_add_head(&p->node, head); 627 ftrace_update_cnt++;
770 } 628 } else
629 ftrace_free_rec(p);
771 } 630 }
772 631
773 stop = ftrace_now(raw_smp_processor_id()); 632 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start; 633 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt; 634 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780
781 return 0;
782}
783 635
784static int ftrace_update_code(void)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine(__ftrace_update_code, NULL, NULL);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0; 636 return 0;
833} 637}
834 638
835static int __init ftrace_dyn_table_alloc(void) 639static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
836{ 640{
837 struct ftrace_page *pg; 641 struct ftrace_page *pg;
838 int cnt; 642 int cnt;
@@ -859,7 +663,9 @@ static int __init ftrace_dyn_table_alloc(void)
859 663
860 pg = ftrace_pages = ftrace_pages_start; 664 pg = ftrace_pages = ftrace_pages_start;
861 665
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE; 666 cnt = num_to_init / ENTRIES_PER_PAGE;
667 pr_info("ftrace: allocating %ld entries in %d pages\n",
668 num_to_init, cnt + 1);
863 669
864 for (i = 0; i < cnt; i++) { 670 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 671 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +707,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
901 707
902 (*pos)++; 708 (*pos)++;
903 709
710 /* should not be called from interrupt context */
711 spin_lock(&ftrace_lock);
904 retry: 712 retry:
905 if (iter->idx >= iter->pg->index) { 713 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) { 714 if (iter->pg->next) {
@@ -910,12 +718,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
910 } 718 }
911 } else { 719 } else {
912 rec = &iter->pg->records[iter->idx++]; 720 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) && 721 if ((rec->flags & FTRACE_FL_FREE) ||
722
723 (!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) || 724 (rec->flags & FTRACE_FL_FAILED)) ||
915 725
916 ((iter->flags & FTRACE_ITER_FAILURES) && 726 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) || 727 !(rec->flags & FTRACE_FL_FAILED)) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919 728
920 ((iter->flags & FTRACE_ITER_FILTER) && 729 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) || 730 !(rec->flags & FTRACE_FL_FILTER)) ||
@@ -926,6 +735,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
926 goto retry; 735 goto retry;
927 } 736 }
928 } 737 }
738 spin_unlock(&ftrace_lock);
929 739
930 iter->pos = *pos; 740 iter->pos = *pos;
931 741
@@ -938,13 +748,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
938 void *p = NULL; 748 void *p = NULL;
939 loff_t l = -1; 749 loff_t l = -1;
940 750
941 if (*pos != iter->pos) { 751 if (*pos > iter->pos)
942 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l)) 752 *pos = iter->pos;
943 ; 753
944 } else { 754 l = *pos;
945 l = *pos; 755 p = t_next(m, p, &l);
946 p = t_next(m, p, &l);
947 }
948 756
949 return p; 757 return p;
950} 758}
@@ -955,15 +763,21 @@ static void t_stop(struct seq_file *m, void *p)
955 763
956static int t_show(struct seq_file *m, void *v) 764static int t_show(struct seq_file *m, void *v)
957{ 765{
766 struct ftrace_iterator *iter = m->private;
958 struct dyn_ftrace *rec = v; 767 struct dyn_ftrace *rec = v;
959 char str[KSYM_SYMBOL_LEN]; 768 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
960 770
961 if (!rec) 771 if (!rec)
962 return 0; 772 return 0;
963 773
964 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
965 775
966 seq_printf(m, "%s\n", str); 776 ret = seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
967 781
968 return 0; 782 return 0;
969} 783}
@@ -989,7 +803,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
989 return -ENOMEM; 803 return -ENOMEM;
990 804
991 iter->pg = ftrace_pages_start; 805 iter->pg = ftrace_pages_start;
992 iter->pos = -1; 806 iter->pos = 0;
993 807
994 ret = seq_open(file, &show_ftrace_seq_ops); 808 ret = seq_open(file, &show_ftrace_seq_ops);
995 if (!ret) { 809 if (!ret) {
@@ -1039,8 +853,8 @@ static void ftrace_filter_reset(int enable)
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 853 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i; 854 unsigned i;
1041 855
1042 /* keep kstop machine from running */ 856 /* should not be called from interrupt context */
1043 preempt_disable(); 857 spin_lock(&ftrace_lock);
1044 if (enable) 858 if (enable)
1045 ftrace_filtered = 0; 859 ftrace_filtered = 0;
1046 pg = ftrace_pages_start; 860 pg = ftrace_pages_start;
@@ -1053,7 +867,7 @@ static void ftrace_filter_reset(int enable)
1053 } 867 }
1054 pg = pg->next; 868 pg = pg->next;
1055 } 869 }
1056 preempt_enable(); 870 spin_unlock(&ftrace_lock);
1057} 871}
1058 872
1059static int 873static int
@@ -1076,7 +890,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1076 890
1077 if (file->f_mode & FMODE_READ) { 891 if (file->f_mode & FMODE_READ) {
1078 iter->pg = ftrace_pages_start; 892 iter->pg = ftrace_pages_start;
1079 iter->pos = -1; 893 iter->pos = 0;
1080 iter->flags = enable ? FTRACE_ITER_FILTER : 894 iter->flags = enable ? FTRACE_ITER_FILTER :
1081 FTRACE_ITER_NOTRACE; 895 FTRACE_ITER_NOTRACE;
1082 896
@@ -1165,8 +979,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
1165 } 979 }
1166 } 980 }
1167 981
1168 /* keep kstop machine from running */ 982 /* should not be called from interrupt context */
1169 preempt_disable(); 983 spin_lock(&ftrace_lock);
1170 if (enable) 984 if (enable)
1171 ftrace_filtered = 1; 985 ftrace_filtered = 1;
1172 pg = ftrace_pages_start; 986 pg = ftrace_pages_start;
@@ -1203,7 +1017,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
1203 } 1017 }
1204 pg = pg->next; 1018 pg = pg->next;
1205 } 1019 }
1206 preempt_enable(); 1020 spin_unlock(&ftrace_lock);
1207} 1021}
1208 1022
1209static ssize_t 1023static ssize_t
@@ -1366,10 +1180,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1366 } 1180 }
1367 1181
1368 mutex_lock(&ftrace_sysctl_lock); 1182 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock); 1183 mutex_lock(&ftrace_start_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1184 if (ftrace_start && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock); 1186 mutex_unlock(&ftrace_start_lock);
1373 mutex_unlock(&ftrace_sysctl_lock); 1187 mutex_unlock(&ftrace_sysctl_lock);
1374 1188
1375 kfree(iter); 1189 kfree(iter);
@@ -1389,55 +1203,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1389 return ftrace_regex_release(inode, file, 0); 1203 return ftrace_regex_release(inode, file, 0);
1390} 1204}
1391 1205
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = { 1206static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open, 1207 .open = ftrace_avail_open,
1443 .read = seq_read, 1208 .read = seq_read,
@@ -1468,54 +1233,6 @@ static struct file_operations ftrace_notrace_fops = {
1468 .release = ftrace_notrace_release, 1233 .release = ftrace_notrace_release,
1469}; 1234};
1470 1235
1471static struct file_operations ftraced_fops = {
1472 .open = tracing_open_generic,
1473 .read = ftraced_read,
1474 .write = ftraced_write,
1475};
1476
1477/**
1478 * ftrace_force_update - force an update to all recording ftrace functions
1479 */
1480int ftrace_force_update(void)
1481{
1482 int ret = 0;
1483
1484 if (unlikely(ftrace_disabled))
1485 return -ENODEV;
1486
1487 mutex_lock(&ftrace_sysctl_lock);
1488 mutex_lock(&ftraced_lock);
1489
1490 /*
1491 * If ftraced_trigger is not set, then there is nothing
1492 * to update.
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496
1497 mutex_unlock(&ftraced_lock);
1498 mutex_unlock(&ftrace_sysctl_lock);
1499
1500 return ret;
1501}
1502
1503static void ftrace_force_shutdown(void)
1504{
1505 struct task_struct *task;
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1507
1508 mutex_lock(&ftraced_lock);
1509 task = ftraced_task;
1510 ftraced_task = NULL;
1511 ftraced_suspend = -1;
1512 ftrace_run_update_code(command);
1513 mutex_unlock(&ftraced_lock);
1514
1515 if (task)
1516 kthread_stop(task);
1517}
1518
1519static __init int ftrace_init_debugfs(void) 1236static __init int ftrace_init_debugfs(void)
1520{ 1237{
1521 struct dentry *d_tracer; 1238 struct dentry *d_tracer;
@@ -1546,97 +1263,103 @@ static __init int ftrace_init_debugfs(void)
1546 pr_warning("Could not create debugfs " 1263 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n"); 1264 "'set_ftrace_notrace' entry\n");
1548 1265
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1550 NULL, &ftraced_fops);
1551 if (!entry)
1552 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n");
1554 return 0; 1266 return 0;
1555} 1267}
1556 1268
1557fs_initcall(ftrace_init_debugfs); 1269fs_initcall(ftrace_init_debugfs);
1558 1270
1559static int __init ftrace_dynamic_init(void) 1271static int ftrace_convert_nops(unsigned long *start,
1272 unsigned long *end)
1560{ 1273{
1561 struct task_struct *p; 1274 unsigned long *p;
1562 unsigned long addr; 1275 unsigned long addr;
1276 unsigned long flags;
1277
1278 mutex_lock(&ftrace_start_lock);
1279 p = start;
1280 while (p < end) {
1281 addr = ftrace_call_adjust(*p++);
1282 ftrace_record_ip(addr);
1283 }
1284
1285 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags);
1287 ftrace_update_code();
1288 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock);
1290
1291 return 0;
1292}
1293
1294void ftrace_init_module(unsigned long *start, unsigned long *end)
1295{
1296 if (ftrace_disabled || start == end)
1297 return;
1298 ftrace_convert_nops(start, end);
1299}
1300
1301extern unsigned long __start_mcount_loc[];
1302extern unsigned long __stop_mcount_loc[];
1303
1304void __init ftrace_init(void)
1305{
1306 unsigned long count, addr, flags;
1563 int ret; 1307 int ret;
1564 1308
1565 addr = (unsigned long)ftrace_record_ip; 1309 /* Keep the ftrace pointer to the stub */
1310 addr = (unsigned long)ftrace_stub;
1566 1311
1567 stop_machine(ftrace_dyn_arch_init, &addr, NULL); 1312 local_irq_save(flags);
1313 ftrace_dyn_arch_init(&addr);
1314 local_irq_restore(flags);
1568 1315
1569 /* ftrace_dyn_arch_init places the return code in addr */ 1316 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) { 1317 if (addr)
1571 ret = (int)addr;
1572 goto failed; 1318 goto failed;
1573 }
1574 1319
1575 ret = ftrace_dyn_table_alloc(); 1320 count = __stop_mcount_loc - __start_mcount_loc;
1576 if (ret)
1577 goto failed;
1578 1321
1579 p = kthread_run(ftraced, NULL, "ftraced"); 1322 ret = ftrace_dyn_table_alloc(count);
1580 if (IS_ERR(p)) { 1323 if (ret)
1581 ret = -1;
1582 goto failed; 1324 goto failed;
1583 }
1584 1325
1585 last_ftrace_enabled = ftrace_enabled = 1; 1326 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587 1327
1588 return 0; 1328 ret = ftrace_convert_nops(__start_mcount_loc,
1329 __stop_mcount_loc);
1589 1330
1331 return;
1590 failed: 1332 failed:
1591 ftrace_disabled = 1; 1333 ftrace_disabled = 1;
1592 return ret;
1593} 1334}
1594 1335
1595core_initcall(ftrace_dynamic_init);
1596#else 1336#else
1337
1338static int __init ftrace_nodyn_init(void)
1339{
1340 ftrace_enabled = 1;
1341 return 0;
1342}
1343device_initcall(ftrace_nodyn_init);
1344
1597# define ftrace_startup() do { } while (0) 1345# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0) 1346# define ftrace_shutdown() do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0) 1347# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0) 1348# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */ 1349#endif /* CONFIG_DYNAMIC_FTRACE */
1603 1350
1604/** 1351/**
1605 * ftrace_kill_atomic - kill ftrace from critical sections 1352 * ftrace_kill - kill ftrace
1606 * 1353 *
1607 * This function should be used by panic code. It stops ftrace 1354 * This function should be used by panic code. It stops ftrace
1608 * but in a not so nice way. If you need to simply kill ftrace 1355 * but in a not so nice way. If you need to simply kill ftrace
1609 * from a non-atomic section, use ftrace_kill. 1356 * from a non-atomic section, use ftrace_kill.
1610 */ 1357 */
1611void ftrace_kill_atomic(void)
1612{
1613 ftrace_disabled = 1;
1614 ftrace_enabled = 0;
1615#ifdef CONFIG_DYNAMIC_FTRACE
1616 ftraced_suspend = -1;
1617#endif
1618 clear_ftrace_function();
1619}
1620
1621/**
1622 * ftrace_kill - totally shutdown ftrace
1623 *
1624 * This is a safety measure. If something was detected that seems
1625 * wrong, calling this function will keep ftrace from doing
1626 * any more modifications, and updates.
1627 * used when something went wrong.
1628 */
1629void ftrace_kill(void) 1358void ftrace_kill(void)
1630{ 1359{
1631 mutex_lock(&ftrace_sysctl_lock);
1632 ftrace_disabled = 1; 1360 ftrace_disabled = 1;
1633 ftrace_enabled = 0; 1361 ftrace_enabled = 0;
1634
1635 clear_ftrace_function(); 1362 clear_ftrace_function();
1636 mutex_unlock(&ftrace_sysctl_lock);
1637
1638 /* Try to totally disable ftrace */
1639 ftrace_force_shutdown();
1640} 1363}
1641 1364
1642/** 1365/**
@@ -1725,3 +1448,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1725 mutex_unlock(&ftrace_sysctl_lock); 1448 mutex_unlock(&ftrace_sysctl_lock);
1726 return ret; 1449 return ret;
1727} 1450}
1451
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..f780e9552f91
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2186 @@
1/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/spinlock.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/percpu.h>
12#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h>
15#include <linux/hash.h>
16#include <linux/list.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21/* Global flag to disable all recording to ring buffers */
22static int ring_buffers_off __read_mostly;
23
24/**
25 * tracing_on - enable all tracing buffers
26 *
27 * This function enables all tracing buffers that may have been
28 * disabled with tracing_off.
29 */
30void tracing_on(void)
31{
32 ring_buffers_off = 0;
33}
34
35/**
36 * tracing_off - turn off all tracing buffers
37 *
38 * This function stops all tracing buffers from recording data.
39 * It does not disable any overhead the tracers themselves may
40 * be causing. This function simply causes all recording to
41 * the ring buffers to fail.
42 */
43void tracing_off(void)
44{
45 ring_buffers_off = 1;
46}
47
48/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0
50
51/* FIXME!!! */
52u64 ring_buffer_time_stamp(int cpu)
53{
54 u64 time;
55
56 preempt_disable_notrace();
57 /* shift to debug/test normalization and TIME_EXTENTS */
58 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace();
60
61 return time;
62}
63
64void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
65{
66 /* Just stupid testing the normalize function and deltas */
67 *ts >>= DEBUG_SHIFT;
68}
69
70#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
71#define RB_ALIGNMENT_SHIFT 2
72#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
73#define RB_MAX_SMALL_DATA 28
74
75enum {
76 RB_LEN_TIME_EXTEND = 8,
77 RB_LEN_TIME_STAMP = 16,
78};
79
80/* inline for ring buffer fast paths */
81static inline unsigned
82rb_event_length(struct ring_buffer_event *event)
83{
84 unsigned length;
85
86 switch (event->type) {
87 case RINGBUF_TYPE_PADDING:
88 /* undefined */
89 return -1;
90
91 case RINGBUF_TYPE_TIME_EXTEND:
92 return RB_LEN_TIME_EXTEND;
93
94 case RINGBUF_TYPE_TIME_STAMP:
95 return RB_LEN_TIME_STAMP;
96
97 case RINGBUF_TYPE_DATA:
98 if (event->len)
99 length = event->len << RB_ALIGNMENT_SHIFT;
100 else
101 length = event->array[0];
102 return length + RB_EVNT_HDR_SIZE;
103 default:
104 BUG();
105 }
106 /* not hit */
107 return 0;
108}
109
110/**
111 * ring_buffer_event_length - return the length of the event
112 * @event: the event to get the length of
113 */
114unsigned ring_buffer_event_length(struct ring_buffer_event *event)
115{
116 return rb_event_length(event);
117}
118
119/* inline for ring buffer fast paths */
120static inline void *
121rb_event_data(struct ring_buffer_event *event)
122{
123 BUG_ON(event->type != RINGBUF_TYPE_DATA);
124 /* If length is in len field, then array[0] has the data */
125 if (event->len)
126 return (void *)&event->array[0];
127 /* Otherwise length is in array[0] and array[1] has the data */
128 return (void *)&event->array[1];
129}
130
131/**
132 * ring_buffer_event_data - return the data of the event
133 * @event: the event to get the data from
134 */
135void *ring_buffer_event_data(struct ring_buffer_event *event)
136{
137 return rb_event_data(event);
138}
139
140#define for_each_buffer_cpu(buffer, cpu) \
141 for_each_cpu_mask(cpu, buffer->cpumask)
142
143#define TS_SHIFT 27
144#define TS_MASK ((1ULL << TS_SHIFT) - 1)
145#define TS_DELTA_TEST (~TS_MASK)
146
147/*
148 * This hack stolen from mm/slob.c.
149 * We can store per page timing information in the page frame of the page.
150 * Thanks to Peter Zijlstra for suggesting this idea.
151 */
152struct buffer_page {
153 u64 time_stamp; /* page time stamp */
154 local_t write; /* index for next write */
155 local_t commit; /* write commited index */
156 unsigned read; /* index for next read */
157 struct list_head list; /* list of free pages */
158 void *page; /* Actual data page */
159};
160
161/*
162 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
163 * this issue out.
164 */
165static inline void free_buffer_page(struct buffer_page *bpage)
166{
167 if (bpage->page)
168 free_page((unsigned long)bpage->page);
169 kfree(bpage);
170}
171
172/*
173 * We need to fit the time_stamp delta into 27 bits.
174 */
175static inline int test_time_stamp(u64 delta)
176{
177 if (delta & TS_DELTA_TEST)
178 return 1;
179 return 0;
180}
181
182#define BUF_PAGE_SIZE PAGE_SIZE
183
184/*
185 * head_page == tail_page && head == tail then buffer is empty.
186 */
187struct ring_buffer_per_cpu {
188 int cpu;
189 struct ring_buffer *buffer;
190 spinlock_t lock;
191 struct lock_class_key lock_key;
192 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */
194 struct buffer_page *tail_page; /* write to tail */
195 struct buffer_page *commit_page; /* commited pages */
196 struct buffer_page *reader_page;
197 unsigned long overrun;
198 unsigned long entries;
199 u64 write_stamp;
200 u64 read_stamp;
201 atomic_t record_disabled;
202};
203
204struct ring_buffer {
205 unsigned long size;
206 unsigned pages;
207 unsigned flags;
208 int cpus;
209 cpumask_t cpumask;
210 atomic_t record_disabled;
211
212 struct mutex mutex;
213
214 struct ring_buffer_per_cpu **buffers;
215};
216
217struct ring_buffer_iter {
218 struct ring_buffer_per_cpu *cpu_buffer;
219 unsigned long head;
220 struct buffer_page *head_page;
221 u64 read_stamp;
222};
223
224#define RB_WARN_ON(buffer, cond) \
225 do { \
226 if (unlikely(cond)) { \
227 atomic_inc(&buffer->record_disabled); \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \
248 } \
249 } while (0)
250
251/**
252 * check_pages - integrity check of buffer pages
253 * @cpu_buffer: CPU buffer with pages to test
254 *
255 * As a safty measure we check to make sure the data pages have not
256 * been corrupted.
257 */
258static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
259{
260 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp;
262
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
265
266 list_for_each_entry_safe(page, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer,
268 page->list.next->prev != &page->list);
269 RB_WARN_ON_RET(cpu_buffer,
270 page->list.prev->next != &page->list);
271 }
272
273 return 0;
274}
275
276static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
277 unsigned nr_pages)
278{
279 struct list_head *head = &cpu_buffer->pages;
280 struct buffer_page *page, *tmp;
281 unsigned long addr;
282 LIST_HEAD(pages);
283 unsigned i;
284
285 for (i = 0; i < nr_pages; i++) {
286 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
287 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
288 if (!page)
289 goto free_pages;
290 list_add(&page->list, &pages);
291
292 addr = __get_free_page(GFP_KERNEL);
293 if (!addr)
294 goto free_pages;
295 page->page = (void *)addr;
296 }
297
298 list_splice(&pages, head);
299
300 rb_check_pages(cpu_buffer);
301
302 return 0;
303
304 free_pages:
305 list_for_each_entry_safe(page, tmp, &pages, list) {
306 list_del_init(&page->list);
307 free_buffer_page(page);
308 }
309 return -ENOMEM;
310}
311
312static struct ring_buffer_per_cpu *
313rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
314{
315 struct ring_buffer_per_cpu *cpu_buffer;
316 struct buffer_page *page;
317 unsigned long addr;
318 int ret;
319
320 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
321 GFP_KERNEL, cpu_to_node(cpu));
322 if (!cpu_buffer)
323 return NULL;
324
325 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock);
328 INIT_LIST_HEAD(&cpu_buffer->pages);
329
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
331 GFP_KERNEL, cpu_to_node(cpu));
332 if (!page)
333 goto fail_free_buffer;
334
335 cpu_buffer->reader_page = page;
336 addr = __get_free_page(GFP_KERNEL);
337 if (!addr)
338 goto fail_free_reader;
339 page->page = (void *)addr;
340
341 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
342
343 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
344 if (ret < 0)
345 goto fail_free_reader;
346
347 cpu_buffer->head_page
348 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
349 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
350
351 return cpu_buffer;
352
353 fail_free_reader:
354 free_buffer_page(cpu_buffer->reader_page);
355
356 fail_free_buffer:
357 kfree(cpu_buffer);
358 return NULL;
359}
360
361static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
362{
363 struct list_head *head = &cpu_buffer->pages;
364 struct buffer_page *page, *tmp;
365
366 list_del_init(&cpu_buffer->reader_page->list);
367 free_buffer_page(cpu_buffer->reader_page);
368
369 list_for_each_entry_safe(page, tmp, head, list) {
370 list_del_init(&page->list);
371 free_buffer_page(page);
372 }
373 kfree(cpu_buffer);
374}
375
376/*
377 * Causes compile errors if the struct buffer_page gets bigger
378 * than the struct page.
379 */
380extern int ring_buffer_page_too_big(void);
381
382/**
383 * ring_buffer_alloc - allocate a new ring_buffer
384 * @size: the size in bytes that is needed.
385 * @flags: attributes to set for the ring buffer.
386 *
387 * Currently the only flag that is available is the RB_FL_OVERWRITE
388 * flag. This flag means that the buffer will overwrite old data
389 * when the buffer wraps. If this flag is not set, the buffer will
390 * drop data when the tail hits the head.
391 */
392struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
393{
394 struct ring_buffer *buffer;
395 int bsize;
396 int cpu;
397
398 /* Paranoid! Optimizes out when all is well */
399 if (sizeof(struct buffer_page) > sizeof(struct page))
400 ring_buffer_page_too_big();
401
402
403 /* keep it in its own cache line */
404 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
405 GFP_KERNEL);
406 if (!buffer)
407 return NULL;
408
409 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
410 buffer->flags = flags;
411
412 /* need at least two pages */
413 if (buffer->pages == 1)
414 buffer->pages++;
415
416 buffer->cpumask = cpu_possible_map;
417 buffer->cpus = nr_cpu_ids;
418
419 bsize = sizeof(void *) * nr_cpu_ids;
420 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
421 GFP_KERNEL);
422 if (!buffer->buffers)
423 goto fail_free_buffer;
424
425 for_each_buffer_cpu(buffer, cpu) {
426 buffer->buffers[cpu] =
427 rb_allocate_cpu_buffer(buffer, cpu);
428 if (!buffer->buffers[cpu])
429 goto fail_free_buffers;
430 }
431
432 mutex_init(&buffer->mutex);
433
434 return buffer;
435
436 fail_free_buffers:
437 for_each_buffer_cpu(buffer, cpu) {
438 if (buffer->buffers[cpu])
439 rb_free_cpu_buffer(buffer->buffers[cpu]);
440 }
441 kfree(buffer->buffers);
442
443 fail_free_buffer:
444 kfree(buffer);
445 return NULL;
446}
447
448/**
449 * ring_buffer_free - free a ring buffer.
450 * @buffer: the buffer to free.
451 */
452void
453ring_buffer_free(struct ring_buffer *buffer)
454{
455 int cpu;
456
457 for_each_buffer_cpu(buffer, cpu)
458 rb_free_cpu_buffer(buffer->buffers[cpu]);
459
460 kfree(buffer);
461}
462
463static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
464
465static void
466rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467{
468 struct buffer_page *page;
469 struct list_head *p;
470 unsigned i;
471
472 atomic_inc(&cpu_buffer->record_disabled);
473 synchronize_sched();
474
475 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages));
477 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list);
480 free_buffer_page(page);
481 }
482 BUG_ON(list_empty(&cpu_buffer->pages));
483
484 rb_reset_cpu(cpu_buffer);
485
486 rb_check_pages(cpu_buffer);
487
488 atomic_dec(&cpu_buffer->record_disabled);
489
490}
491
492static void
493rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
494 struct list_head *pages, unsigned nr_pages)
495{
496 struct buffer_page *page;
497 struct list_head *p;
498 unsigned i;
499
500 atomic_inc(&cpu_buffer->record_disabled);
501 synchronize_sched();
502
503 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages));
505 p = pages->next;
506 page = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list);
508 list_add_tail(&page->list, &cpu_buffer->pages);
509 }
510 rb_reset_cpu(cpu_buffer);
511
512 rb_check_pages(cpu_buffer);
513
514 atomic_dec(&cpu_buffer->record_disabled);
515}
516
517/**
518 * ring_buffer_resize - resize the ring buffer
519 * @buffer: the buffer to resize.
520 * @size: the new size.
521 *
522 * The tracer is responsible for making sure that the buffer is
523 * not being used while changing the size.
524 * Note: We may be able to change the above requirement by using
525 * RCU synchronizations.
526 *
527 * Minimum size is 2 * BUF_PAGE_SIZE.
528 *
529 * Returns -1 on failure.
530 */
531int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
532{
533 struct ring_buffer_per_cpu *cpu_buffer;
534 unsigned nr_pages, rm_pages, new_pages;
535 struct buffer_page *page, *tmp;
536 unsigned long buffer_size;
537 unsigned long addr;
538 LIST_HEAD(pages);
539 int i, cpu;
540
541 /*
542 * Always succeed at resizing a non-existent buffer:
543 */
544 if (!buffer)
545 return size;
546
547 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
548 size *= BUF_PAGE_SIZE;
549 buffer_size = buffer->pages * BUF_PAGE_SIZE;
550
551 /* we need a minimum of two pages */
552 if (size < BUF_PAGE_SIZE * 2)
553 size = BUF_PAGE_SIZE * 2;
554
555 if (size == buffer_size)
556 return size;
557
558 mutex_lock(&buffer->mutex);
559
560 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
561
562 if (size < buffer_size) {
563
564 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages);
566
567 rm_pages = buffer->pages - nr_pages;
568
569 for_each_buffer_cpu(buffer, cpu) {
570 cpu_buffer = buffer->buffers[cpu];
571 rb_remove_pages(cpu_buffer, rm_pages);
572 }
573 goto out;
574 }
575
576 /*
577 * This is a bit more difficult. We only want to add pages
578 * when we can allocate enough for all CPUs. We do this
579 * by allocating all the pages and storing them on a local
580 * link list. If we succeed in our allocation, then we
581 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM;
583 */
584 BUG_ON(nr_pages <= buffer->pages);
585 new_pages = nr_pages - buffer->pages;
586
587 for_each_buffer_cpu(buffer, cpu) {
588 for (i = 0; i < new_pages; i++) {
589 page = kzalloc_node(ALIGN(sizeof(*page),
590 cache_line_size()),
591 GFP_KERNEL, cpu_to_node(cpu));
592 if (!page)
593 goto free_pages;
594 list_add(&page->list, &pages);
595 addr = __get_free_page(GFP_KERNEL);
596 if (!addr)
597 goto free_pages;
598 page->page = (void *)addr;
599 }
600 }
601
602 for_each_buffer_cpu(buffer, cpu) {
603 cpu_buffer = buffer->buffers[cpu];
604 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 }
606
607 BUG_ON(!list_empty(&pages));
608
609 out:
610 buffer->pages = nr_pages;
611 mutex_unlock(&buffer->mutex);
612
613 return size;
614
615 free_pages:
616 list_for_each_entry_safe(page, tmp, &pages, list) {
617 list_del_init(&page->list);
618 free_buffer_page(page);
619 }
620 mutex_unlock(&buffer->mutex);
621 return -ENOMEM;
622}
623
624static inline int rb_null_event(struct ring_buffer_event *event)
625{
626 return event->type == RINGBUF_TYPE_PADDING;
627}
628
629static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
630{
631 return page->page + index;
632}
633
634static inline struct ring_buffer_event *
635rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
636{
637 return __rb_page_index(cpu_buffer->reader_page,
638 cpu_buffer->reader_page->read);
639}
640
641static inline struct ring_buffer_event *
642rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
643{
644 return __rb_page_index(cpu_buffer->head_page,
645 cpu_buffer->head_page->read);
646}
647
648static inline struct ring_buffer_event *
649rb_iter_head_event(struct ring_buffer_iter *iter)
650{
651 return __rb_page_index(iter->head_page, iter->head);
652}
653
654static inline unsigned rb_page_write(struct buffer_page *bpage)
655{
656 return local_read(&bpage->write);
657}
658
659static inline unsigned rb_page_commit(struct buffer_page *bpage)
660{
661 return local_read(&bpage->commit);
662}
663
664/* Size is determined by what has been commited */
665static inline unsigned rb_page_size(struct buffer_page *bpage)
666{
667 return rb_page_commit(bpage);
668}
669
670static inline unsigned
671rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
672{
673 return rb_page_commit(cpu_buffer->commit_page);
674}
675
676static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
677{
678 return rb_page_commit(cpu_buffer->head_page);
679}
680
681/*
682 * When the tail hits the head and the buffer is in overwrite mode,
683 * the head jumps to the next page and all content on the previous
684 * page is discarded. But before doing so, we update the overrun
685 * variable of the buffer.
686 */
687static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
688{
689 struct ring_buffer_event *event;
690 unsigned long head;
691
692 for (head = 0; head < rb_head_size(cpu_buffer);
693 head += rb_event_length(event)) {
694
695 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event));
697 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA)
699 continue;
700 cpu_buffer->overrun++;
701 cpu_buffer->entries--;
702 }
703}
704
705static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
706 struct buffer_page **page)
707{
708 struct list_head *p = (*page)->list.next;
709
710 if (p == &cpu_buffer->pages)
711 p = p->next;
712
713 *page = list_entry(p, struct buffer_page, list);
714}
715
716static inline unsigned
717rb_event_index(struct ring_buffer_event *event)
718{
719 unsigned long addr = (unsigned long)event;
720
721 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
722}
723
724static inline int
725rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
726 struct ring_buffer_event *event)
727{
728 unsigned long addr = (unsigned long)event;
729 unsigned long index;
730
731 index = rb_event_index(event);
732 addr &= PAGE_MASK;
733
734 return cpu_buffer->commit_page->page == (void *)addr &&
735 rb_commit_index(cpu_buffer) == index;
736}
737
738static inline void
739rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
740 struct ring_buffer_event *event)
741{
742 unsigned long addr = (unsigned long)event;
743 unsigned long index;
744
745 index = rb_event_index(event);
746 addr &= PAGE_MASK;
747
748 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page);
751 cpu_buffer->commit_page->commit =
752 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
754 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
755 }
756
757 /* Now set the commit to the event's index */
758 local_set(&cpu_buffer->commit_page->commit, index);
759}
760
761static inline void
762rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
763{
764 /*
765 * We only race with interrupts and NMIs on this CPU.
766 * If we own the commit event, then we can commit
767 * all others that interrupted us, since the interruptions
768 * are in stack format (they finish before they come
769 * back to us). This allows us to do a simple loop to
770 * assign the commit to the tail.
771 */
772 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
773 cpu_buffer->commit_page->commit =
774 cpu_buffer->commit_page->write;
775 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
776 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
777 /* add barrier to keep gcc from optimizing too much */
778 barrier();
779 }
780 while (rb_commit_index(cpu_buffer) !=
781 rb_page_write(cpu_buffer->commit_page)) {
782 cpu_buffer->commit_page->commit =
783 cpu_buffer->commit_page->write;
784 barrier();
785 }
786}
787
788static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
789{
790 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
791 cpu_buffer->reader_page->read = 0;
792}
793
794static inline void rb_inc_iter(struct ring_buffer_iter *iter)
795{
796 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
797
798 /*
799 * The iterator could be on the reader page (it starts there).
800 * But the head could have moved, since the reader was
801 * found. Check for this case and assign the iterator
802 * to the head page instead of next.
803 */
804 if (iter->head_page == cpu_buffer->reader_page)
805 iter->head_page = cpu_buffer->head_page;
806 else
807 rb_inc_page(cpu_buffer, &iter->head_page);
808
809 iter->read_stamp = iter->head_page->time_stamp;
810 iter->head = 0;
811}
812
813/**
814 * ring_buffer_update_event - update event type and data
815 * @event: the even to update
816 * @type: the type of event
817 * @length: the size of the event field in the ring buffer
818 *
819 * Update the type and data fields of the event. The length
820 * is the actual size that is written to the ring buffer,
821 * and with this, we can determine what to place into the
822 * data field.
823 */
824static inline void
825rb_update_event(struct ring_buffer_event *event,
826 unsigned type, unsigned length)
827{
828 event->type = type;
829
830 switch (type) {
831
832 case RINGBUF_TYPE_PADDING:
833 break;
834
835 case RINGBUF_TYPE_TIME_EXTEND:
836 event->len =
837 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
838 >> RB_ALIGNMENT_SHIFT;
839 break;
840
841 case RINGBUF_TYPE_TIME_STAMP:
842 event->len =
843 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
844 >> RB_ALIGNMENT_SHIFT;
845 break;
846
847 case RINGBUF_TYPE_DATA:
848 length -= RB_EVNT_HDR_SIZE;
849 if (length > RB_MAX_SMALL_DATA) {
850 event->len = 0;
851 event->array[0] = length;
852 } else
853 event->len =
854 (length + (RB_ALIGNMENT-1))
855 >> RB_ALIGNMENT_SHIFT;
856 break;
857 default:
858 BUG();
859 }
860}
861
862static inline unsigned rb_calculate_event_length(unsigned length)
863{
864 struct ring_buffer_event event; /* Used only for sizeof array */
865
866 /* zero length can cause confusions */
867 if (!length)
868 length = 1;
869
870 if (length > RB_MAX_SMALL_DATA)
871 length += sizeof(event.array[0]);
872
873 length += RB_EVNT_HDR_SIZE;
874 length = ALIGN(length, RB_ALIGNMENT);
875
876 return length;
877}
878
879static struct ring_buffer_event *
880__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
881 unsigned type, unsigned long length, u64 *ts)
882{
883 struct buffer_page *tail_page, *head_page, *reader_page;
884 unsigned long tail, write;
885 struct ring_buffer *buffer = cpu_buffer->buffer;
886 struct ring_buffer_event *event;
887 unsigned long flags;
888
889 tail_page = cpu_buffer->tail_page;
890 write = local_add_return(length, &tail_page->write);
891 tail = write - length;
892
893 /* See if we shot pass the end of this buffer page */
894 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page;
896
897 spin_lock_irqsave(&cpu_buffer->lock, flags);
898
899 rb_inc_page(cpu_buffer, &next_page);
900
901 head_page = cpu_buffer->head_page;
902 reader_page = cpu_buffer->reader_page;
903
904 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page);
906
907 /*
908 * If for some reason, we had an interrupt storm that made
909 * it all the way around the buffer, bail, and warn
910 * about it.
911 */
912 if (unlikely(next_page == cpu_buffer->commit_page)) {
913 WARN_ON_ONCE(1);
914 goto out_unlock;
915 }
916
917 if (next_page == head_page) {
918 if (!(buffer->flags & RB_FL_OVERWRITE)) {
919 /* reset write */
920 if (tail <= BUF_PAGE_SIZE)
921 local_set(&tail_page->write, tail);
922 goto out_unlock;
923 }
924
925 /* tail_page has not moved yet? */
926 if (tail_page == cpu_buffer->tail_page) {
927 /* count overflows */
928 rb_update_overflow(cpu_buffer);
929
930 rb_inc_page(cpu_buffer, &head_page);
931 cpu_buffer->head_page = head_page;
932 cpu_buffer->head_page->read = 0;
933 }
934 }
935
936 /*
937 * If the tail page is still the same as what we think
938 * it is, then it is up to us to update the tail
939 * pointer.
940 */
941 if (tail_page == cpu_buffer->tail_page) {
942 local_set(&next_page->write, 0);
943 local_set(&next_page->commit, 0);
944 cpu_buffer->tail_page = next_page;
945
946 /* reread the time stamp */
947 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
948 cpu_buffer->tail_page->time_stamp = *ts;
949 }
950
951 /*
952 * The actual tail page has moved forward.
953 */
954 if (tail < BUF_PAGE_SIZE) {
955 /* Mark the rest of the page with padding */
956 event = __rb_page_index(tail_page, tail);
957 event->type = RINGBUF_TYPE_PADDING;
958 }
959
960 if (tail <= BUF_PAGE_SIZE)
961 /* Set the write back to the previous setting */
962 local_set(&tail_page->write, tail);
963
964 /*
965 * If this was a commit entry that failed,
966 * increment that too
967 */
968 if (tail_page == cpu_buffer->commit_page &&
969 tail == rb_commit_index(cpu_buffer)) {
970 rb_set_commit_to_write(cpu_buffer);
971 }
972
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
974
975 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN);
977 }
978
979 /* We reserved something on the buffer */
980
981 BUG_ON(write > BUF_PAGE_SIZE);
982
983 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length);
985
986 /*
987 * If this is a commit and the tail is zero, then update
988 * this page's time stamp.
989 */
990 if (!tail && rb_is_commit(cpu_buffer, event))
991 cpu_buffer->commit_page->time_stamp = *ts;
992
993 return event;
994
995 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
997 return NULL;
998}
999
1000static int
1001rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1002 u64 *ts, u64 *delta)
1003{
1004 struct ring_buffer_event *event;
1005 static int once;
1006 int ret;
1007
1008 if (unlikely(*delta > (1ULL << 59) && !once++)) {
1009 printk(KERN_WARNING "Delta way too big! %llu"
1010 " ts=%llu write stamp = %llu\n",
1011 (unsigned long long)*delta,
1012 (unsigned long long)*ts,
1013 (unsigned long long)cpu_buffer->write_stamp);
1014 WARN_ON(1);
1015 }
1016
1017 /*
1018 * The delta is too big, we to add a
1019 * new timestamp.
1020 */
1021 event = __rb_reserve_next(cpu_buffer,
1022 RINGBUF_TYPE_TIME_EXTEND,
1023 RB_LEN_TIME_EXTEND,
1024 ts);
1025 if (!event)
1026 return -EBUSY;
1027
1028 if (PTR_ERR(event) == -EAGAIN)
1029 return -EAGAIN;
1030
1031 /* Only a commited time event can update the write stamp */
1032 if (rb_is_commit(cpu_buffer, event)) {
1033 /*
1034 * If this is the first on the page, then we need to
1035 * update the page itself, and just put in a zero.
1036 */
1037 if (rb_event_index(event)) {
1038 event->time_delta = *delta & TS_MASK;
1039 event->array[0] = *delta >> TS_SHIFT;
1040 } else {
1041 cpu_buffer->commit_page->time_stamp = *ts;
1042 event->time_delta = 0;
1043 event->array[0] = 0;
1044 }
1045 cpu_buffer->write_stamp = *ts;
1046 /* let the caller know this was the commit */
1047 ret = 1;
1048 } else {
1049 /* Darn, this is just wasted space */
1050 event->time_delta = 0;
1051 event->array[0] = 0;
1052 ret = 0;
1053 }
1054
1055 *delta = 0;
1056
1057 return ret;
1058}
1059
1060static struct ring_buffer_event *
1061rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1062 unsigned type, unsigned long length)
1063{
1064 struct ring_buffer_event *event;
1065 u64 ts, delta;
1066 int commit = 0;
1067 int nr_loops = 0;
1068
1069 again:
1070 /*
1071 * We allow for interrupts to reenter here and do a trace.
1072 * If one does, it will cause this original code to loop
1073 * back here. Even with heavy interrupts happening, this
1074 * should only happen a few times in a row. If this happens
1075 * 1000 times in a row, there must be either an interrupt
1076 * storm or we have something buggy.
1077 * Bail!
1078 */
1079 if (unlikely(++nr_loops > 1000)) {
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL;
1082 }
1083
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085
1086 /*
1087 * Only the first commit can update the timestamp.
1088 * Yes there is a race here. If an interrupt comes in
1089 * just after the conditional and it traces too, then it
1090 * will also check the deltas. More than one timestamp may
1091 * also be made. But only the entry that did the actual
1092 * commit will be something other than zero.
1093 */
1094 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1095 rb_page_write(cpu_buffer->tail_page) ==
1096 rb_commit_index(cpu_buffer)) {
1097
1098 delta = ts - cpu_buffer->write_stamp;
1099
1100 /* make sure this delta is calculated here */
1101 barrier();
1102
1103 /* Did the write stamp get updated already? */
1104 if (unlikely(ts < cpu_buffer->write_stamp))
1105 delta = 0;
1106
1107 if (test_time_stamp(delta)) {
1108
1109 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1110
1111 if (commit == -EBUSY)
1112 return NULL;
1113
1114 if (commit == -EAGAIN)
1115 goto again;
1116
1117 RB_WARN_ON(cpu_buffer, commit < 0);
1118 }
1119 } else
1120 /* Non commits have zero deltas */
1121 delta = 0;
1122
1123 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1124 if (PTR_ERR(event) == -EAGAIN)
1125 goto again;
1126
1127 if (!event) {
1128 if (unlikely(commit))
1129 /*
1130 * Ouch! We needed a timestamp and it was commited. But
1131 * we didn't get our event reserved.
1132 */
1133 rb_set_commit_to_write(cpu_buffer);
1134 return NULL;
1135 }
1136
1137 /*
1138 * If the timestamp was commited, make the commit our entry
1139 * now so that we will update it when needed.
1140 */
1141 if (commit)
1142 rb_set_commit_event(cpu_buffer, event);
1143 else if (!rb_is_commit(cpu_buffer, event))
1144 delta = 0;
1145
1146 event->time_delta = delta;
1147
1148 return event;
1149}
1150
1151static DEFINE_PER_CPU(int, rb_need_resched);
1152
1153/**
1154 * ring_buffer_lock_reserve - reserve a part of the buffer
1155 * @buffer: the ring buffer to reserve from
1156 * @length: the length of the data to reserve (excluding event header)
1157 * @flags: a pointer to save the interrupt flags
1158 *
1159 * Returns a reseverd event on the ring buffer to copy directly to.
1160 * The user of this interface will need to get the body to write into
1161 * and can use the ring_buffer_event_data() interface.
1162 *
1163 * The length is the length of the data needed, not the event length
1164 * which also includes the event header.
1165 *
1166 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1167 * If NULL is returned, then nothing has been allocated or locked.
1168 */
1169struct ring_buffer_event *
1170ring_buffer_lock_reserve(struct ring_buffer *buffer,
1171 unsigned long length,
1172 unsigned long *flags)
1173{
1174 struct ring_buffer_per_cpu *cpu_buffer;
1175 struct ring_buffer_event *event;
1176 int cpu, resched;
1177
1178 if (ring_buffers_off)
1179 return NULL;
1180
1181 if (atomic_read(&buffer->record_disabled))
1182 return NULL;
1183
1184 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched();
1186 preempt_disable_notrace();
1187
1188 cpu = raw_smp_processor_id();
1189
1190 if (!cpu_isset(cpu, buffer->cpumask))
1191 goto out;
1192
1193 cpu_buffer = buffer->buffers[cpu];
1194
1195 if (atomic_read(&cpu_buffer->record_disabled))
1196 goto out;
1197
1198 length = rb_calculate_event_length(length);
1199 if (length > BUF_PAGE_SIZE)
1200 goto out;
1201
1202 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1203 if (!event)
1204 goto out;
1205
1206 /*
1207 * Need to store resched state on this cpu.
1208 * Only the first needs to.
1209 */
1210
1211 if (preempt_count() == 1)
1212 per_cpu(rb_need_resched, cpu) = resched;
1213
1214 return event;
1215
1216 out:
1217 if (resched)
1218 preempt_enable_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL;
1222}
1223
1224static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1225 struct ring_buffer_event *event)
1226{
1227 cpu_buffer->entries++;
1228
1229 /* Only process further if we own the commit */
1230 if (!rb_is_commit(cpu_buffer, event))
1231 return;
1232
1233 cpu_buffer->write_stamp += event->time_delta;
1234
1235 rb_set_commit_to_write(cpu_buffer);
1236}
1237
1238/**
1239 * ring_buffer_unlock_commit - commit a reserved
1240 * @buffer: The buffer to commit to
1241 * @event: The event pointer to commit.
1242 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1243 *
1244 * This commits the data to the ring buffer, and releases any locks held.
1245 *
1246 * Must be paired with ring_buffer_lock_reserve.
1247 */
1248int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1249 struct ring_buffer_event *event,
1250 unsigned long flags)
1251{
1252 struct ring_buffer_per_cpu *cpu_buffer;
1253 int cpu = raw_smp_processor_id();
1254
1255 cpu_buffer = buffer->buffers[cpu];
1256
1257 rb_commit(cpu_buffer, event);
1258
1259 /*
1260 * Only the last preempt count needs to restore preemption.
1261 */
1262 if (preempt_count() == 1) {
1263 if (per_cpu(rb_need_resched, cpu))
1264 preempt_enable_no_resched_notrace();
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace();
1269
1270 return 0;
1271}
1272
1273/**
1274 * ring_buffer_write - write data to the buffer without reserving
1275 * @buffer: The ring buffer to write to.
1276 * @length: The length of the data being written (excluding the event header)
1277 * @data: The data to write to the buffer.
1278 *
1279 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1280 * one function. If you already have the data to write to the buffer, it
1281 * may be easier to simply call this function.
1282 *
1283 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1284 * and not the length of the event which would hold the header.
1285 */
1286int ring_buffer_write(struct ring_buffer *buffer,
1287 unsigned long length,
1288 void *data)
1289{
1290 struct ring_buffer_per_cpu *cpu_buffer;
1291 struct ring_buffer_event *event;
1292 unsigned long event_length;
1293 void *body;
1294 int ret = -EBUSY;
1295 int cpu, resched;
1296
1297 if (ring_buffers_off)
1298 return -EBUSY;
1299
1300 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY;
1302
1303 resched = need_resched();
1304 preempt_disable_notrace();
1305
1306 cpu = raw_smp_processor_id();
1307
1308 if (!cpu_isset(cpu, buffer->cpumask))
1309 goto out;
1310
1311 cpu_buffer = buffer->buffers[cpu];
1312
1313 if (atomic_read(&cpu_buffer->record_disabled))
1314 goto out;
1315
1316 event_length = rb_calculate_event_length(length);
1317 event = rb_reserve_next_event(cpu_buffer,
1318 RINGBUF_TYPE_DATA, event_length);
1319 if (!event)
1320 goto out;
1321
1322 body = rb_event_data(event);
1323
1324 memcpy(body, data, length);
1325
1326 rb_commit(cpu_buffer, event);
1327
1328 ret = 0;
1329 out:
1330 if (resched)
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334
1335 return ret;
1336}
1337
1338static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1339{
1340 struct buffer_page *reader = cpu_buffer->reader_page;
1341 struct buffer_page *head = cpu_buffer->head_page;
1342 struct buffer_page *commit = cpu_buffer->commit_page;
1343
1344 return reader->read == rb_page_commit(reader) &&
1345 (commit == reader ||
1346 (commit == head &&
1347 head->read == rb_page_commit(commit)));
1348}
1349
1350/**
1351 * ring_buffer_record_disable - stop all writes into the buffer
1352 * @buffer: The ring buffer to stop writes to.
1353 *
1354 * This prevents all writes to the buffer. Any attempt to write
1355 * to the buffer after this will fail and return NULL.
1356 *
1357 * The caller should call synchronize_sched() after this.
1358 */
1359void ring_buffer_record_disable(struct ring_buffer *buffer)
1360{
1361 atomic_inc(&buffer->record_disabled);
1362}
1363
1364/**
1365 * ring_buffer_record_enable - enable writes to the buffer
1366 * @buffer: The ring buffer to enable writes
1367 *
1368 * Note, multiple disables will need the same number of enables
1369 * to truely enable the writing (much like preempt_disable).
1370 */
1371void ring_buffer_record_enable(struct ring_buffer *buffer)
1372{
1373 atomic_dec(&buffer->record_disabled);
1374}
1375
1376/**
1377 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1378 * @buffer: The ring buffer to stop writes to.
1379 * @cpu: The CPU buffer to stop
1380 *
1381 * This prevents all writes to the buffer. Any attempt to write
1382 * to the buffer after this will fail and return NULL.
1383 *
1384 * The caller should call synchronize_sched() after this.
1385 */
1386void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1387{
1388 struct ring_buffer_per_cpu *cpu_buffer;
1389
1390 if (!cpu_isset(cpu, buffer->cpumask))
1391 return;
1392
1393 cpu_buffer = buffer->buffers[cpu];
1394 atomic_inc(&cpu_buffer->record_disabled);
1395}
1396
1397/**
1398 * ring_buffer_record_enable_cpu - enable writes to the buffer
1399 * @buffer: The ring buffer to enable writes
1400 * @cpu: The CPU to enable.
1401 *
1402 * Note, multiple disables will need the same number of enables
1403 * to truely enable the writing (much like preempt_disable).
1404 */
1405void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1406{
1407 struct ring_buffer_per_cpu *cpu_buffer;
1408
1409 if (!cpu_isset(cpu, buffer->cpumask))
1410 return;
1411
1412 cpu_buffer = buffer->buffers[cpu];
1413 atomic_dec(&cpu_buffer->record_disabled);
1414}
1415
1416/**
1417 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1418 * @buffer: The ring buffer
1419 * @cpu: The per CPU buffer to get the entries from.
1420 */
1421unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1422{
1423 struct ring_buffer_per_cpu *cpu_buffer;
1424
1425 if (!cpu_isset(cpu, buffer->cpumask))
1426 return 0;
1427
1428 cpu_buffer = buffer->buffers[cpu];
1429 return cpu_buffer->entries;
1430}
1431
1432/**
1433 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1434 * @buffer: The ring buffer
1435 * @cpu: The per CPU buffer to get the number of overruns from
1436 */
1437unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1438{
1439 struct ring_buffer_per_cpu *cpu_buffer;
1440
1441 if (!cpu_isset(cpu, buffer->cpumask))
1442 return 0;
1443
1444 cpu_buffer = buffer->buffers[cpu];
1445 return cpu_buffer->overrun;
1446}
1447
1448/**
1449 * ring_buffer_entries - get the number of entries in a buffer
1450 * @buffer: The ring buffer
1451 *
1452 * Returns the total number of entries in the ring buffer
1453 * (all CPU entries)
1454 */
1455unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1456{
1457 struct ring_buffer_per_cpu *cpu_buffer;
1458 unsigned long entries = 0;
1459 int cpu;
1460
1461 /* if you care about this being correct, lock the buffer */
1462 for_each_buffer_cpu(buffer, cpu) {
1463 cpu_buffer = buffer->buffers[cpu];
1464 entries += cpu_buffer->entries;
1465 }
1466
1467 return entries;
1468}
1469
1470/**
1471 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1472 * @buffer: The ring buffer
1473 *
1474 * Returns the total number of overruns in the ring buffer
1475 * (all CPU entries)
1476 */
1477unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1478{
1479 struct ring_buffer_per_cpu *cpu_buffer;
1480 unsigned long overruns = 0;
1481 int cpu;
1482
1483 /* if you care about this being correct, lock the buffer */
1484 for_each_buffer_cpu(buffer, cpu) {
1485 cpu_buffer = buffer->buffers[cpu];
1486 overruns += cpu_buffer->overrun;
1487 }
1488
1489 return overruns;
1490}
1491
1492/**
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502
1503 /* Iterator usage is expected to have record disabled */
1504 if (list_empty(&cpu_buffer->reader_page->list)) {
1505 iter->head_page = cpu_buffer->head_page;
1506 iter->head = cpu_buffer->head_page->read;
1507 } else {
1508 iter->head_page = cpu_buffer->reader_page;
1509 iter->head = cpu_buffer->reader_page->read;
1510 }
1511 if (iter->head)
1512 iter->read_stamp = cpu_buffer->read_stamp;
1513 else
1514 iter->read_stamp = iter->head_page->time_stamp;
1515}
1516
1517/**
1518 * ring_buffer_iter_empty - check if an iterator has no more to read
1519 * @iter: The iterator to check
1520 */
1521int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1522{
1523 struct ring_buffer_per_cpu *cpu_buffer;
1524
1525 cpu_buffer = iter->cpu_buffer;
1526
1527 return iter->head_page == cpu_buffer->commit_page &&
1528 iter->head == rb_commit_index(cpu_buffer);
1529}
1530
1531static void
1532rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1533 struct ring_buffer_event *event)
1534{
1535 u64 delta;
1536
1537 switch (event->type) {
1538 case RINGBUF_TYPE_PADDING:
1539 return;
1540
1541 case RINGBUF_TYPE_TIME_EXTEND:
1542 delta = event->array[0];
1543 delta <<= TS_SHIFT;
1544 delta += event->time_delta;
1545 cpu_buffer->read_stamp += delta;
1546 return;
1547
1548 case RINGBUF_TYPE_TIME_STAMP:
1549 /* FIXME: not implemented */
1550 return;
1551
1552 case RINGBUF_TYPE_DATA:
1553 cpu_buffer->read_stamp += event->time_delta;
1554 return;
1555
1556 default:
1557 BUG();
1558 }
1559 return;
1560}
1561
1562static void
1563rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1564 struct ring_buffer_event *event)
1565{
1566 u64 delta;
1567
1568 switch (event->type) {
1569 case RINGBUF_TYPE_PADDING:
1570 return;
1571
1572 case RINGBUF_TYPE_TIME_EXTEND:
1573 delta = event->array[0];
1574 delta <<= TS_SHIFT;
1575 delta += event->time_delta;
1576 iter->read_stamp += delta;
1577 return;
1578
1579 case RINGBUF_TYPE_TIME_STAMP:
1580 /* FIXME: not implemented */
1581 return;
1582
1583 case RINGBUF_TYPE_DATA:
1584 iter->read_stamp += event->time_delta;
1585 return;
1586
1587 default:
1588 BUG();
1589 }
1590 return;
1591}
1592
1593static struct buffer_page *
1594rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1595{
1596 struct buffer_page *reader = NULL;
1597 unsigned long flags;
1598 int nr_loops = 0;
1599
1600 spin_lock_irqsave(&cpu_buffer->lock, flags);
1601
1602 again:
1603 /*
1604 * This should normally only loop twice. But because the
1605 * start of the reader inserts an empty page, it causes
1606 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of).
1608 */
1609 if (unlikely(++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL;
1612 goto out;
1613 }
1614
1615 reader = cpu_buffer->reader_page;
1616
1617 /* If there's more to read, return this page */
1618 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1619 goto out;
1620
1621 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader));
1624
1625 /* check if we caught up to the tail */
1626 reader = NULL;
1627 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1628 goto out;
1629
1630 /*
1631 * Splice the empty reader page into the list around the head.
1632 * Reset the reader page to size zero.
1633 */
1634
1635 reader = cpu_buffer->head_page;
1636 cpu_buffer->reader_page->list.next = reader->list.next;
1637 cpu_buffer->reader_page->list.prev = reader->list.prev;
1638
1639 local_set(&cpu_buffer->reader_page->write, 0);
1640 local_set(&cpu_buffer->reader_page->commit, 0);
1641
1642 /* Make the reader page now replace the head */
1643 reader->list.prev->next = &cpu_buffer->reader_page->list;
1644 reader->list.next->prev = &cpu_buffer->reader_page->list;
1645
1646 /*
1647 * If the tail is on the reader, then we must set the head
1648 * to the inserted page, otherwise we set it one before.
1649 */
1650 cpu_buffer->head_page = cpu_buffer->reader_page;
1651
1652 if (cpu_buffer->commit_page != reader)
1653 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1654
1655 /* Finally update the reader page to the new head */
1656 cpu_buffer->reader_page = reader;
1657 rb_reset_reader_page(cpu_buffer);
1658
1659 goto again;
1660
1661 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1663
1664 return reader;
1665}
1666
1667static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1668{
1669 struct ring_buffer_event *event;
1670 struct buffer_page *reader;
1671 unsigned length;
1672
1673 reader = rb_get_reader_page(cpu_buffer);
1674
1675 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader);
1677
1678 event = rb_reader_event(cpu_buffer);
1679
1680 if (event->type == RINGBUF_TYPE_DATA)
1681 cpu_buffer->entries--;
1682
1683 rb_update_read_stamp(cpu_buffer, event);
1684
1685 length = rb_event_length(event);
1686 cpu_buffer->reader_page->read += length;
1687}
1688
1689static void rb_advance_iter(struct ring_buffer_iter *iter)
1690{
1691 struct ring_buffer *buffer;
1692 struct ring_buffer_per_cpu *cpu_buffer;
1693 struct ring_buffer_event *event;
1694 unsigned length;
1695
1696 cpu_buffer = iter->cpu_buffer;
1697 buffer = cpu_buffer->buffer;
1698
1699 /*
1700 * Check if we are at the end of the buffer.
1701 */
1702 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page);
1704 rb_inc_iter(iter);
1705 return;
1706 }
1707
1708 event = rb_iter_head_event(iter);
1709
1710 length = rb_event_length(event);
1711
1712 /*
1713 * This should not be called to advance the header if we are
1714 * at the tail of the buffer.
1715 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
1717 (iter->head + length > rb_commit_index(cpu_buffer)));
1718
1719 rb_update_iter_read_stamp(iter, event);
1720
1721 iter->head += length;
1722
1723 /* check for end of page padding */
1724 if ((iter->head >= rb_page_size(iter->head_page)) &&
1725 (iter->head_page != cpu_buffer->commit_page))
1726 rb_advance_iter(iter);
1727}
1728
1729/**
1730 * ring_buffer_peek - peek at the next event to be read
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{
1741 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event;
1743 struct buffer_page *reader;
1744 int nr_loops = 0;
1745
1746 if (!cpu_isset(cpu, buffer->cpumask))
1747 return NULL;
1748
1749 cpu_buffer = buffer->buffers[cpu];
1750
1751 again:
1752 /*
1753 * We repeat when a timestamp is encountered. It is possible
1754 * to get multiple timestamps from an interrupt entering just
1755 * as one timestamp is about to be written. The max times
1756 * that this can happen is the number of nested interrupts we
1757 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly.
1759 */
1760 if (unlikely(++nr_loops > 10)) {
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL;
1763 }
1764
1765 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader)
1767 return NULL;
1768
1769 event = rb_reader_event(cpu_buffer);
1770
1771 switch (event->type) {
1772 case RINGBUF_TYPE_PADDING:
1773 RB_WARN_ON(cpu_buffer, 1);
1774 rb_advance_reader(cpu_buffer);
1775 return NULL;
1776
1777 case RINGBUF_TYPE_TIME_EXTEND:
1778 /* Internal data, OK to advance */
1779 rb_advance_reader(cpu_buffer);
1780 goto again;
1781
1782 case RINGBUF_TYPE_TIME_STAMP:
1783 /* FIXME: not implemented */
1784 rb_advance_reader(cpu_buffer);
1785 goto again;
1786
1787 case RINGBUF_TYPE_DATA:
1788 if (ts) {
1789 *ts = cpu_buffer->read_stamp + event->time_delta;
1790 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1791 }
1792 return event;
1793
1794 default:
1795 BUG();
1796 }
1797
1798 return NULL;
1799}
1800
1801/**
1802 * ring_buffer_iter_peek - peek at the next event to be read
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{
1812 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer;
1814 struct ring_buffer_event *event;
1815 int nr_loops = 0;
1816
1817 if (ring_buffer_iter_empty(iter))
1818 return NULL;
1819
1820 cpu_buffer = iter->cpu_buffer;
1821 buffer = cpu_buffer->buffer;
1822
1823 again:
1824 /*
1825 * We repeat when a timestamp is encountered. It is possible
1826 * to get multiple timestamps from an interrupt entering just
1827 * as one timestamp is about to be written. The max times
1828 * that this can happen is the number of nested interrupts we
1829 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly.
1831 */
1832 if (unlikely(++nr_loops > 10)) {
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL;
1835 }
1836
1837 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL;
1839
1840 event = rb_iter_head_event(iter);
1841
1842 switch (event->type) {
1843 case RINGBUF_TYPE_PADDING:
1844 rb_inc_iter(iter);
1845 goto again;
1846
1847 case RINGBUF_TYPE_TIME_EXTEND:
1848 /* Internal data, OK to advance */
1849 rb_advance_iter(iter);
1850 goto again;
1851
1852 case RINGBUF_TYPE_TIME_STAMP:
1853 /* FIXME: not implemented */
1854 rb_advance_iter(iter);
1855 goto again;
1856
1857 case RINGBUF_TYPE_DATA:
1858 if (ts) {
1859 *ts = iter->read_stamp + event->time_delta;
1860 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1861 }
1862 return event;
1863
1864 default:
1865 BUG();
1866 }
1867
1868 return NULL;
1869}
1870
1871/**
1872 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from
1874 *
1875 * Returns the next event in the ring buffer, and that event is consumed.
1876 * Meaning, that sequential reads will keep returning a different event,
1877 * and eventually empty the ring buffer if the producer is slower.
1878 */
1879struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{
1882 struct ring_buffer_per_cpu *cpu_buffer;
1883 struct ring_buffer_event *event;
1884
1885 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL;
1887
1888 event = ring_buffer_peek(buffer, cpu, ts);
1889 if (!event)
1890 return NULL;
1891
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer);
1894
1895 return event;
1896}
1897
1898/**
1899 * ring_buffer_read_start - start a non consuming read of the buffer
1900 * @buffer: The ring buffer to read from
1901 * @cpu: The cpu buffer to iterate over
1902 *
1903 * This starts up an iteration through the buffer. It also disables
1904 * the recording to the buffer until the reading is finished.
1905 * This prevents the reading from being corrupted. This is not
1906 * a consuming read, so a producer is not expected.
1907 *
1908 * Must be paired with ring_buffer_finish.
1909 */
1910struct ring_buffer_iter *
1911ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1912{
1913 struct ring_buffer_per_cpu *cpu_buffer;
1914 struct ring_buffer_iter *iter;
1915 unsigned long flags;
1916
1917 if (!cpu_isset(cpu, buffer->cpumask))
1918 return NULL;
1919
1920 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1921 if (!iter)
1922 return NULL;
1923
1924 cpu_buffer = buffer->buffers[cpu];
1925
1926 iter->cpu_buffer = cpu_buffer;
1927
1928 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched();
1930
1931 spin_lock_irqsave(&cpu_buffer->lock, flags);
1932 ring_buffer_iter_reset(iter);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1934
1935 return iter;
1936}
1937
1938/**
1939 * ring_buffer_finish - finish reading the iterator of the buffer
1940 * @iter: The iterator retrieved by ring_buffer_start
1941 *
1942 * This re-enables the recording to the buffer, and frees the
1943 * iterator.
1944 */
1945void
1946ring_buffer_read_finish(struct ring_buffer_iter *iter)
1947{
1948 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1949
1950 atomic_dec(&cpu_buffer->record_disabled);
1951 kfree(iter);
1952}
1953
1954/**
1955 * ring_buffer_read - read the next item in the ring buffer by the iterator
1956 * @iter: The ring buffer iterator
1957 * @ts: The time stamp of the event read.
1958 *
1959 * This reads the next event in the ring buffer and increments the iterator.
1960 */
1961struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{
1964 struct ring_buffer_event *event;
1965
1966 event = ring_buffer_iter_peek(iter, ts);
1967 if (!event)
1968 return NULL;
1969
1970 rb_advance_iter(iter);
1971
1972 return event;
1973}
1974
1975/**
1976 * ring_buffer_size - return the size of the ring buffer (in bytes)
1977 * @buffer: The ring buffer.
1978 */
1979unsigned long ring_buffer_size(struct ring_buffer *buffer)
1980{
1981 return BUF_PAGE_SIZE * buffer->pages;
1982}
1983
1984static void
1985rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1986{
1987 cpu_buffer->head_page
1988 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1989 local_set(&cpu_buffer->head_page->write, 0);
1990 local_set(&cpu_buffer->head_page->commit, 0);
1991
1992 cpu_buffer->head_page->read = 0;
1993
1994 cpu_buffer->tail_page = cpu_buffer->head_page;
1995 cpu_buffer->commit_page = cpu_buffer->head_page;
1996
1997 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1998 local_set(&cpu_buffer->reader_page->write, 0);
1999 local_set(&cpu_buffer->reader_page->commit, 0);
2000 cpu_buffer->reader_page->read = 0;
2001
2002 cpu_buffer->overrun = 0;
2003 cpu_buffer->entries = 0;
2004}
2005
2006/**
2007 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
2008 * @buffer: The ring buffer to reset a per cpu buffer of
2009 * @cpu: The CPU buffer to be reset
2010 */
2011void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2012{
2013 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2014 unsigned long flags;
2015
2016 if (!cpu_isset(cpu, buffer->cpumask))
2017 return;
2018
2019 spin_lock_irqsave(&cpu_buffer->lock, flags);
2020
2021 rb_reset_cpu(cpu_buffer);
2022
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
2024}
2025
2026/**
2027 * ring_buffer_reset - reset a ring buffer
2028 * @buffer: The ring buffer to reset all cpu buffers
2029 */
2030void ring_buffer_reset(struct ring_buffer *buffer)
2031{
2032 int cpu;
2033
2034 for_each_buffer_cpu(buffer, cpu)
2035 ring_buffer_reset_cpu(buffer, cpu);
2036}
2037
2038/**
2039 * rind_buffer_empty - is the ring buffer empty?
2040 * @buffer: The ring buffer to test
2041 */
2042int ring_buffer_empty(struct ring_buffer *buffer)
2043{
2044 struct ring_buffer_per_cpu *cpu_buffer;
2045 int cpu;
2046
2047 /* yes this is racy, but if you don't like the race, lock the buffer */
2048 for_each_buffer_cpu(buffer, cpu) {
2049 cpu_buffer = buffer->buffers[cpu];
2050 if (!rb_per_cpu_empty(cpu_buffer))
2051 return 0;
2052 }
2053 return 1;
2054}
2055
2056/**
2057 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
2058 * @buffer: The ring buffer
2059 * @cpu: The CPU buffer to test
2060 */
2061int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2062{
2063 struct ring_buffer_per_cpu *cpu_buffer;
2064
2065 if (!cpu_isset(cpu, buffer->cpumask))
2066 return 1;
2067
2068 cpu_buffer = buffer->buffers[cpu];
2069 return rb_per_cpu_empty(cpu_buffer);
2070}
2071
2072/**
2073 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2074 * @buffer_a: One buffer to swap with
2075 * @buffer_b: The other buffer to swap with
2076 *
2077 * This function is useful for tracers that want to take a "snapshot"
2078 * of a CPU buffer and has another back up buffer lying around.
2079 * it is expected that the tracer handles the cpu buffer not being
2080 * used at the moment.
2081 */
2082int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2083 struct ring_buffer *buffer_b, int cpu)
2084{
2085 struct ring_buffer_per_cpu *cpu_buffer_a;
2086 struct ring_buffer_per_cpu *cpu_buffer_b;
2087
2088 if (!cpu_isset(cpu, buffer_a->cpumask) ||
2089 !cpu_isset(cpu, buffer_b->cpumask))
2090 return -EINVAL;
2091
2092 /* At least make sure the two buffers are somewhat the same */
2093 if (buffer_a->size != buffer_b->size ||
2094 buffer_a->pages != buffer_b->pages)
2095 return -EINVAL;
2096
2097 cpu_buffer_a = buffer_a->buffers[cpu];
2098 cpu_buffer_b = buffer_b->buffers[cpu];
2099
2100 /*
2101 * We can't do a synchronize_sched here because this
2102 * function can be called in atomic context.
2103 * Normally this will be called from the same CPU as cpu.
2104 * If not it's up to the caller to protect this.
2105 */
2106 atomic_inc(&cpu_buffer_a->record_disabled);
2107 atomic_inc(&cpu_buffer_b->record_disabled);
2108
2109 buffer_a->buffers[cpu] = cpu_buffer_b;
2110 buffer_b->buffers[cpu] = cpu_buffer_a;
2111
2112 cpu_buffer_b->buffer = buffer_a;
2113 cpu_buffer_a->buffer = buffer_b;
2114
2115 atomic_dec(&cpu_buffer_a->record_disabled);
2116 atomic_dec(&cpu_buffer_b->record_disabled);
2117
2118 return 0;
2119}
2120
2121static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos)
2124{
2125 int *p = filp->private_data;
2126 char buf[64];
2127 int r;
2128
2129 /* !ring_buffers_off == tracing_on */
2130 r = sprintf(buf, "%d\n", !*p);
2131
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133}
2134
2135static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos)
2138{
2139 int *p = filp->private_data;
2140 char buf[64];
2141 long val;
2142 int ret;
2143
2144 if (cnt >= sizeof(buf))
2145 return -EINVAL;
2146
2147 if (copy_from_user(&buf, ubuf, cnt))
2148 return -EFAULT;
2149
2150 buf[cnt] = 0;
2151
2152 ret = strict_strtoul(buf, 10, &val);
2153 if (ret < 0)
2154 return ret;
2155
2156 /* !ring_buffers_off == tracing_on */
2157 *p = !val;
2158
2159 (*ppos)++;
2160
2161 return cnt;
2162}
2163
2164static struct file_operations rb_simple_fops = {
2165 .open = tracing_open_generic,
2166 .read = rb_simple_read,
2167 .write = rb_simple_write,
2168};
2169
2170
2171static __init int rb_init_debugfs(void)
2172{
2173 struct dentry *d_tracer;
2174 struct dentry *entry;
2175
2176 d_tracer = tracing_init_dentry();
2177
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops);
2180 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182
2183 return 0;
2184}
2185
2186fs_initcall(rb_init_debugfs);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..d86e3252f300 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
14#include <linux/utsrelease.h> 14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/notifier.h>
17#include <linux/debugfs.h> 18#include <linux/debugfs.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/kdebug.h>
25#include <linux/ctype.h> 27#include <linux/ctype.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/poll.h> 29#include <linux/poll.h>
@@ -31,25 +33,37 @@
31#include <linux/writeback.h> 33#include <linux/writeback.h>
32 34
33#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
34 38
35#include "trace.h" 39#include "trace.h"
36 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
39 45
40static unsigned long __read_mostly tracing_nr_buffers; 46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47
48static inline void ftrace_disable_cpu(void)
49{
50 preempt_disable();
51 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
52}
53
54static inline void ftrace_enable_cpu(void)
55{
56 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
57 preempt_enable();
58}
59
41static cpumask_t __read_mostly tracing_buffer_mask; 60static cpumask_t __read_mostly tracing_buffer_mask;
42 61
43#define for_each_tracing_cpu(cpu) \ 62#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask) 63 for_each_cpu_mask(cpu, tracing_buffer_mask)
45 64
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1; 65static int tracing_disabled = 1;
50 66
51static unsigned long tracing_pages_allocated;
52
53long 67long
54ns2usecs(cycle_t nsec) 68ns2usecs(cycle_t nsec)
55{ 69{
@@ -60,7 +74,9 @@ ns2usecs(cycle_t nsec)
60 74
61cycle_t ftrace_now(int cpu) 75cycle_t ftrace_now(int cpu)
62{ 76{
63 return cpu_clock(cpu); 77 u64 ts = ring_buffer_time_stamp(cpu);
78 ring_buffer_normalize_time_stamp(cpu, &ts);
79 return ts;
64} 80}
65 81
66/* 82/*
@@ -100,11 +116,18 @@ static int tracer_enabled = 1;
100int ftrace_function_enabled; 116int ftrace_function_enabled;
101 117
102/* 118/*
103 * trace_nr_entries is the number of entries that is allocated 119 * trace_buf_size is the size in bytes that is allocated
104 * for a buffer. Note, the number of entries is always rounded 120 * for a buffer. Note, the number of bytes is always rounded
105 * to ENTRIES_PER_PAGE. 121 * to page size.
122 *
123 * This number is purposely set to a low number of 16384.
124 * If the dump on oops happens, it will be much appreciated
125 * to not have to wait for all that output. Anyway this can be
126 * boot time and run time configurable.
106 */ 127 */
107static unsigned long trace_nr_entries = 65536UL; 128#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
129
130static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
108 131
109/* trace_types holds a link list of available tracers. */ 132/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly; 133static struct tracer *trace_types __read_mostly;
@@ -133,24 +156,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
133/* trace_flags holds iter_ctrl options */ 156/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135 158
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/** 159/**
155 * trace_wake_up - wake up tasks waiting for trace input 160 * trace_wake_up - wake up tasks waiting for trace input
156 * 161 *
@@ -167,23 +172,21 @@ void trace_wake_up(void)
167 wake_up(&trace_wait); 172 wake_up(&trace_wait);
168} 173}
169 174
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 175static int __init set_buf_size(char *str)
171
172static int __init set_nr_entries(char *str)
173{ 176{
174 unsigned long nr_entries; 177 unsigned long buf_size;
175 int ret; 178 int ret;
176 179
177 if (!str) 180 if (!str)
178 return 0; 181 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries); 182 ret = strict_strtoul(str, 0, &buf_size);
180 /* nr_entries can not be zero */ 183 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0) 184 if (ret < 0 || buf_size == 0)
182 return 0; 185 return 0;
183 trace_nr_entries = nr_entries; 186 trace_buf_size = buf_size;
184 return 1; 187 return 1;
185} 188}
186__setup("trace_entries=", set_nr_entries); 189__setup("trace_buf_size=", set_buf_size);
187 190
188unsigned long nsecs_to_usecs(unsigned long nsecs) 191unsigned long nsecs_to_usecs(unsigned long nsecs)
189{ 192{
@@ -191,21 +194,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
191} 194}
192 195
193/* 196/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that 197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols. 198 * control the output of kernel symbols.
211 */ 199 */
@@ -224,6 +212,7 @@ static const char *trace_options[] = {
224 "block", 212 "block",
225 "stacktrace", 213 "stacktrace",
226 "sched-tree", 214 "sched-tree",
215 "ftrace_printk",
227 NULL 216 NULL
228}; 217};
229 218
@@ -266,54 +255,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
266 tracing_record_cmdline(current); 255 tracing_record_cmdline(current);
267} 256}
268 257
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/** 258/**
318 * trace_seq_printf - sequence printing of trace information 259 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor 260 * @s: trace sequence descriptor
@@ -395,28 +336,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
395 return len; 336 return len;
396} 337}
397 338
398#define HEX_CHARS 17 339#define MAX_MEMHEX_BYTES 8
399static const char hex2asc[] = "0123456789abcdef"; 340#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
400 341
401static int 342static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 343trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{ 344{
404 unsigned char hex[HEX_CHARS]; 345 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem; 346 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j; 347 int i, j;
408 348
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN 349#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) { 350 for (i = 0, j = 0; i < len; i++) {
413#else 351#else
414 for (i = len-1, j = 0; i >= 0; i--) { 352 for (i = len-1, j = 0; i >= 0; i--) {
415#endif 353#endif
416 byte = data[i]; 354 hex[j++] = hex_asc_hi(data[i]);
417 355 hex[j++] = hex_asc_lo(data[i]);
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 } 356 }
421 hex[j++] = ' '; 357 hex[j++] = ' ';
422 358
@@ -460,34 +396,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
460 trace_seq_reset(s); 396 trace_seq_reset(s);
461} 397}
462 398
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/** 399/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 400 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer 401 * @tr: tracer
@@ -500,17 +408,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
500void 408void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 409update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{ 410{
503 struct trace_array_cpu *data; 411 struct ring_buffer *buf = tr->buffer;
504 int i;
505 412
506 WARN_ON_ONCE(!irqs_disabled()); 413 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock); 414 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */ 415
509 for_each_tracing_cpu(i) { 416 tr->buffer = max_tr.buffer;
510 data = tr->data[i]; 417 max_tr.buffer = buf;
511 flip_trace(max_tr.data[i], data); 418
512 tracing_reset(data); 419 ftrace_disable_cpu();
513 } 420 ring_buffer_reset(tr->buffer);
421 ftrace_enable_cpu();
514 422
515 __update_max_tr(tr, tsk, cpu); 423 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock); 424 __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +435,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
527void 435void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 436update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{ 437{
530 struct trace_array_cpu *data = tr->data[cpu]; 438 int ret;
531 int i;
532 439
533 WARN_ON_ONCE(!irqs_disabled()); 440 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock); 441 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537 442
538 flip_trace(max_tr.data[cpu], data); 443 ftrace_disable_cpu();
539 tracing_reset(data); 444
445 ring_buffer_reset(max_tr.buffer);
446 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
447
448 ftrace_enable_cpu();
449
450 WARN_ON_ONCE(ret);
540 451
541 __update_max_tr(tr, tsk, cpu); 452 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock); 453 __raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +484,6 @@ int register_tracer(struct tracer *type)
573#ifdef CONFIG_FTRACE_STARTUP_TEST 484#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) { 485 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace; 486 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace; 487 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl; 488 int saved_ctrl = tr->ctrl;
579 int i; 489 int i;
@@ -585,10 +495,7 @@ int register_tracer(struct tracer *type)
585 * If we fail, we do not register this tracer. 495 * If we fail, we do not register this tracer.
586 */ 496 */
587 for_each_tracing_cpu(i) { 497 for_each_tracing_cpu(i) {
588 data = tr->data[i]; 498 tracing_reset(tr, i);
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 } 499 }
593 current_trace = type; 500 current_trace = type;
594 tr->ctrl = 0; 501 tr->ctrl = 0;
@@ -604,10 +511,7 @@ int register_tracer(struct tracer *type)
604 } 511 }
605 /* Only reset on passing, to avoid touching corrupted buffers */ 512 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) { 513 for_each_tracing_cpu(i) {
607 data = tr->data[i]; 514 tracing_reset(tr, i);
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 } 515 }
612 printk(KERN_CONT "PASSED\n"); 516 printk(KERN_CONT "PASSED\n");
613 } 517 }
@@ -653,13 +557,11 @@ void unregister_tracer(struct tracer *type)
653 mutex_unlock(&trace_types_lock); 557 mutex_unlock(&trace_types_lock);
654} 558}
655 559
656void tracing_reset(struct trace_array_cpu *data) 560void tracing_reset(struct trace_array *tr, int cpu)
657{ 561{
658 data->trace_idx = 0; 562 ftrace_disable_cpu();
659 data->overrun = 0; 563 ring_buffer_reset_cpu(tr->buffer, cpu);
660 data->trace_head = data->trace_tail = head_page(data); 564 ftrace_enable_cpu();
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663} 565}
664 566
665#define SAVED_CMDLINES 128 567#define SAVED_CMDLINES 128
@@ -745,82 +647,20 @@ void tracing_record_cmdline(struct task_struct *tsk)
745 trace_save_cmdline(tsk); 647 trace_save_cmdline(tsk);
746} 648}
747 649
748static inline struct list_head * 650void
749trace_next_list(struct trace_array_cpu *data, struct list_head *next) 651tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
750{ 652 int pc)
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{ 653{
815 struct task_struct *tsk = current; 654 struct task_struct *tsk = current;
816 unsigned long pc;
817
818 pc = preempt_count();
819 655
820 entry->preempt_count = pc & 0xff; 656 entry->preempt_count = pc & 0xff;
821 entry->pid = (tsk) ? tsk->pid : 0; 657 entry->pid = (tsk) ? tsk->pid : 0;
822 entry->t = ftrace_now(raw_smp_processor_id()); 658 entry->flags =
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
661#else
662 TRACE_FLAG_IRQS_NOSUPPORT |
663#endif
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 664 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 665 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 666 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +668,141 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
828 668
829void 669void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data, 670trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags) 671 unsigned long ip, unsigned long parent_ip, unsigned long flags,
672 int pc)
832{ 673{
833 struct trace_entry *entry; 674 struct ring_buffer_event *event;
675 struct ftrace_entry *entry;
834 unsigned long irq_flags; 676 unsigned long irq_flags;
835 677
836 raw_local_irq_save(irq_flags); 678 /* If we are reading the ring buffer, don't trace */
837 __raw_spin_lock(&data->lock); 679 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
838 entry = tracing_get_trace_entry(tr, data); 680 return;
839 tracing_generic_entry_update(entry, flags); 681
840 entry->type = TRACE_FN; 682 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
841 entry->fn.ip = ip; 683 &irq_flags);
842 entry->fn.parent_ip = parent_ip; 684 if (!event)
843 __raw_spin_unlock(&data->lock); 685 return;
844 raw_local_irq_restore(irq_flags); 686 entry = ring_buffer_event_data(event);
687 tracing_generic_entry_update(&entry->ent, flags, pc);
688 entry->ent.type = TRACE_FN;
689 entry->ip = ip;
690 entry->parent_ip = parent_ip;
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
845} 692}
846 693
847void 694void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data, 695ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags) 696 unsigned long ip, unsigned long parent_ip, unsigned long flags,
697 int pc)
850{ 698{
851 if (likely(!atomic_read(&data->disabled))) 699 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags); 700 trace_function(tr, data, ip, parent_ip, flags, pc);
853} 701}
854 702
855#ifdef CONFIG_MMIOTRACE 703static void ftrace_trace_stack(struct trace_array *tr,
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, 704 struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw) 705 unsigned long flags,
706 int skip, int pc)
858{ 707{
859 struct trace_entry *entry; 708#ifdef CONFIG_STACKTRACE
709 struct ring_buffer_event *event;
710 struct stack_entry *entry;
711 struct stack_trace trace;
860 unsigned long irq_flags; 712 unsigned long irq_flags;
861 713
862 raw_local_irq_save(irq_flags); 714 if (!(trace_flags & TRACE_ITER_STACKTRACE))
863 __raw_spin_lock(&data->lock); 715 return;
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881 716
882 raw_local_irq_save(irq_flags); 717 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
883 __raw_spin_lock(&data->lock); 718 &irq_flags);
719 if (!event)
720 return;
721 entry = ring_buffer_event_data(event);
722 tracing_generic_entry_update(&entry->ent, flags, pc);
723 entry->ent.type = TRACE_STACK;
884 724
885 entry = tracing_get_trace_entry(tr, data); 725 memset(&entry->caller, 0, sizeof(entry->caller));
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889 726
890 __raw_spin_unlock(&data->lock); 727 trace.nr_entries = 0;
891 raw_local_irq_restore(irq_flags); 728 trace.max_entries = FTRACE_STACK_ENTRIES;
729 trace.skip = skip;
730 trace.entries = entry->caller;
892 731
893 trace_wake_up(); 732 save_stack_trace(&trace);
894} 733 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
895#endif 734#endif
735}
896 736
897void __trace_stack(struct trace_array *tr, 737void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data, 738 struct trace_array_cpu *data,
899 unsigned long flags, 739 unsigned long flags,
900 int skip) 740 int skip)
901{ 741{
902 struct trace_entry *entry; 742 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920} 743}
921 744
922void 745static void
923__trace_special(void *__tr, void *__data, 746ftrace_trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3) 747 unsigned long arg1, unsigned long arg2, unsigned long arg3,
748 int pc)
925{ 749{
750 struct ring_buffer_event *event;
926 struct trace_array_cpu *data = __data; 751 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr; 752 struct trace_array *tr = __tr;
928 struct trace_entry *entry; 753 struct special_entry *entry;
929 unsigned long irq_flags; 754 unsigned long irq_flags;
930 755
931 raw_local_irq_save(irq_flags); 756 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
932 __raw_spin_lock(&data->lock); 757 &irq_flags);
933 entry = tracing_get_trace_entry(tr, data); 758 if (!event)
934 tracing_generic_entry_update(entry, 0); 759 return;
935 entry->type = TRACE_SPECIAL; 760 entry = ring_buffer_event_data(event);
936 entry->special.arg1 = arg1; 761 tracing_generic_entry_update(&entry->ent, 0, pc);
937 entry->special.arg2 = arg2; 762 entry->ent.type = TRACE_SPECIAL;
938 entry->special.arg3 = arg3; 763 entry->arg1 = arg1;
939 __trace_stack(tr, data, irq_flags, 4); 764 entry->arg2 = arg2;
940 __raw_spin_unlock(&data->lock); 765 entry->arg3 = arg3;
941 raw_local_irq_restore(irq_flags); 766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
942 768
943 trace_wake_up(); 769 trace_wake_up();
944} 770}
945 771
946void 772void
773__trace_special(void *__tr, void *__data,
774 unsigned long arg1, unsigned long arg2, unsigned long arg3)
775{
776 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
777}
778
779void
947tracing_sched_switch_trace(struct trace_array *tr, 780tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data, 781 struct trace_array_cpu *data,
949 struct task_struct *prev, 782 struct task_struct *prev,
950 struct task_struct *next, 783 struct task_struct *next,
951 unsigned long flags) 784 unsigned long flags, int pc)
952{ 785{
953 struct trace_entry *entry; 786 struct ring_buffer_event *event;
787 struct ctx_switch_entry *entry;
954 unsigned long irq_flags; 788 unsigned long irq_flags;
955 789
956 raw_local_irq_save(irq_flags); 790 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957 __raw_spin_lock(&data->lock); 791 &irq_flags);
958 entry = tracing_get_trace_entry(tr, data); 792 if (!event)
959 tracing_generic_entry_update(entry, flags); 793 return;
960 entry->type = TRACE_CTX; 794 entry = ring_buffer_event_data(event);
961 entry->ctx.prev_pid = prev->pid; 795 tracing_generic_entry_update(&entry->ent, flags, pc);
962 entry->ctx.prev_prio = prev->prio; 796 entry->ent.type = TRACE_CTX;
963 entry->ctx.prev_state = prev->state; 797 entry->prev_pid = prev->pid;
964 entry->ctx.next_pid = next->pid; 798 entry->prev_prio = prev->prio;
965 entry->ctx.next_prio = next->prio; 799 entry->prev_state = prev->state;
966 entry->ctx.next_state = next->state; 800 entry->next_pid = next->pid;
967 __trace_stack(tr, data, flags, 5); 801 entry->next_prio = next->prio;
968 __raw_spin_unlock(&data->lock); 802 entry->next_state = next->state;
969 raw_local_irq_restore(irq_flags); 803 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc);
970} 806}
971 807
972void 808void
@@ -974,25 +810,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data, 810 struct trace_array_cpu *data,
975 struct task_struct *wakee, 811 struct task_struct *wakee,
976 struct task_struct *curr, 812 struct task_struct *curr,
977 unsigned long flags) 813 unsigned long flags, int pc)
978{ 814{
979 struct trace_entry *entry; 815 struct ring_buffer_event *event;
816 struct ctx_switch_entry *entry;
980 unsigned long irq_flags; 817 unsigned long irq_flags;
981 818
982 raw_local_irq_save(irq_flags); 819 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
983 __raw_spin_lock(&data->lock); 820 &irq_flags);
984 entry = tracing_get_trace_entry(tr, data); 821 if (!event)
985 tracing_generic_entry_update(entry, flags); 822 return;
986 entry->type = TRACE_WAKE; 823 entry = ring_buffer_event_data(event);
987 entry->ctx.prev_pid = curr->pid; 824 tracing_generic_entry_update(&entry->ent, flags, pc);
988 entry->ctx.prev_prio = curr->prio; 825 entry->ent.type = TRACE_WAKE;
989 entry->ctx.prev_state = curr->state; 826 entry->prev_pid = curr->pid;
990 entry->ctx.next_pid = wakee->pid; 827 entry->prev_prio = curr->prio;
991 entry->ctx.next_prio = wakee->prio; 828 entry->prev_state = curr->state;
992 entry->ctx.next_state = wakee->state; 829 entry->next_pid = wakee->pid;
993 __trace_stack(tr, data, flags, 6); 830 entry->next_prio = wakee->prio;
994 __raw_spin_unlock(&data->lock); 831 entry->next_state = wakee->state;
995 raw_local_irq_restore(irq_flags); 832 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc);
996 835
997 trace_wake_up(); 836 trace_wake_up();
998} 837}
@@ -1002,26 +841,24 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{ 841{
1003 struct trace_array *tr = &global_trace; 842 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data; 843 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu; 844 int cpu;
845 int pc;
1008 846
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) 847 if (tracing_disabled || !tr->ctrl)
1010 return; 848 return;
1011 849
1012 local_irq_save(flags); 850 pc = preempt_count();
851 preempt_disable_notrace();
1013 cpu = raw_smp_processor_id(); 852 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu]; 853 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016 854
1017 if (likely(disabled == 1)) 855 if (likely(!atomic_read(&data->disabled)))
1018 __trace_special(tr, data, arg1, arg2, arg3); 856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1019 857
1020 atomic_dec(&data->disabled); 858 preempt_enable_notrace();
1021 local_irq_restore(flags);
1022} 859}
1023 860
1024#ifdef CONFIG_FTRACE 861#ifdef CONFIG_FUNCTION_TRACER
1025static void 862static void
1026function_trace_call(unsigned long ip, unsigned long parent_ip) 863function_trace_call(unsigned long ip, unsigned long parent_ip)
1027{ 864{
@@ -1029,24 +866,28 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1029 struct trace_array_cpu *data; 866 struct trace_array_cpu *data;
1030 unsigned long flags; 867 unsigned long flags;
1031 long disabled; 868 long disabled;
1032 int cpu; 869 int cpu, resched;
870 int pc;
1033 871
1034 if (unlikely(!ftrace_function_enabled)) 872 if (unlikely(!ftrace_function_enabled))
1035 return; 873 return;
1036 874
1037 if (skip_trace(ip)) 875 pc = preempt_count();
1038 return; 876 resched = need_resched();
1039 877 preempt_disable_notrace();
1040 local_irq_save(flags); 878 local_save_flags(flags);
1041 cpu = raw_smp_processor_id(); 879 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu]; 880 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled); 881 disabled = atomic_inc_return(&data->disabled);
1044 882
1045 if (likely(disabled == 1)) 883 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags); 884 trace_function(tr, data, ip, parent_ip, flags, pc);
1047 885
1048 atomic_dec(&data->disabled); 886 atomic_dec(&data->disabled);
1049 local_irq_restore(flags); 887 if (resched)
888 preempt_enable_no_resched_notrace();
889 else
890 preempt_enable_notrace();
1050} 891}
1051 892
1052static struct ftrace_ops trace_ops __read_mostly = 893static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,111 +914,96 @@ enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1, 914 TRACE_FILE_LAT_FMT = 1,
1074}; 915};
1075 916
1076static struct trace_entry * 917static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{ 918{
1080 struct page *page; 919 /* Don't allow ftrace to trace into the ring buffers */
1081 struct trace_entry *array; 920 ftrace_disable_cpu();
1082 921
1083 if (iter->next_idx[cpu] >= tr->entries || 922 iter->idx++;
1084 iter->next_idx[cpu] >= data->trace_idx || 923 if (iter->buffer_iter[iter->cpu])
1085 (data->trace_head == data->trace_tail && 924 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088 925
1089 if (!iter->next_page[cpu]) { 926 ftrace_enable_cpu();
1090 /* Initialize the iterator for this cpu trace buffer */ 927}
1091 WARN_ON(!data->trace_tail);
1092 page = virt_to_page(data->trace_tail);
1093 iter->next_page[cpu] = &page->lru;
1094 iter->next_page_idx[cpu] = data->trace_tail_idx;
1095 }
1096 928
1097 page = list_entry(iter->next_page[cpu], struct page, lru); 929static struct trace_entry *
1098 BUG_ON(&data->trace_pages == &page->lru); 930peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
931{
932 struct ring_buffer_event *event;
933 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
934
935 /* Don't allow ftrace to trace into the ring buffers */
936 ftrace_disable_cpu();
937
938 if (buf_iter)
939 event = ring_buffer_iter_peek(buf_iter, ts);
940 else
941 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1099 942
1100 array = page_address(page); 943 ftrace_enable_cpu();
1101 944
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); 945 return event ? ring_buffer_event_data(event) : NULL;
1103 return &array[iter->next_page_idx[cpu]];
1104} 946}
1105 947
1106static struct trace_entry * 948static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu) 949__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1108{ 950{
1109 struct trace_array *tr = iter->tr; 951 struct ring_buffer *buffer = iter->tr->buffer;
1110 struct trace_entry *ent, *next = NULL; 952 struct trace_entry *ent, *next = NULL;
953 u64 next_ts = 0, ts;
1111 int next_cpu = -1; 954 int next_cpu = -1;
1112 int cpu; 955 int cpu;
1113 956
1114 for_each_tracing_cpu(cpu) { 957 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu])) 958
959 if (ring_buffer_empty_cpu(buffer, cpu))
1116 continue; 960 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 961
962 ent = peek_next_entry(iter, cpu, &ts);
963
1118 /* 964 /*
1119 * Pick the entry with the smallest timestamp: 965 * Pick the entry with the smallest timestamp:
1120 */ 966 */
1121 if (ent && (!next || ent->t < next->t)) { 967 if (ent && (!next || ts < next_ts)) {
1122 next = ent; 968 next = ent;
1123 next_cpu = cpu; 969 next_cpu = cpu;
970 next_ts = ts;
1124 } 971 }
1125 } 972 }
1126 973
1127 if (ent_cpu) 974 if (ent_cpu)
1128 *ent_cpu = next_cpu; 975 *ent_cpu = next_cpu;
1129 976
977 if (ent_ts)
978 *ent_ts = next_ts;
979
1130 return next; 980 return next;
1131} 981}
1132 982
1133static void trace_iterator_increment(struct trace_iterator *iter) 983/* Find the next real entry, without updating the iterator itself */
984static struct trace_entry *
985find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1134{ 986{
1135 iter->idx++; 987 return __find_next_entry(iter, ent_cpu, ent_ts);
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146} 988}
1147 989
1148static void trace_consume(struct trace_iterator *iter) 990/* Find the next real entry, and increment the iterator to the next entry */
991static void *find_next_entry_inc(struct trace_iterator *iter)
1149{ 992{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 993 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1151 994
1152 data->trace_tail_idx++; 995 if (iter->ent)
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { 996 trace_iterator_increment(iter, iter->cpu);
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157 997
1158 /* Check if we empty it, then reset the index */ 998 return iter->ent ? iter : NULL;
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162} 999}
1163 1000
1164static void *find_next_entry_inc(struct trace_iterator *iter) 1001static void trace_consume(struct trace_iterator *iter)
1165{ 1002{
1166 struct trace_entry *next; 1003 /* Don't allow ftrace to trace into the ring buffers */
1167 int next_cpu = -1; 1004 ftrace_disable_cpu();
1168 1005 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1169 next = find_next_entry(iter, &next_cpu); 1006 ftrace_enable_cpu();
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181} 1007}
1182 1008
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1009static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1036,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1210 struct trace_iterator *iter = m->private; 1036 struct trace_iterator *iter = m->private;
1211 void *p = NULL; 1037 void *p = NULL;
1212 loff_t l = 0; 1038 loff_t l = 0;
1213 int i; 1039 int cpu;
1214 1040
1215 mutex_lock(&trace_types_lock); 1041 mutex_lock(&trace_types_lock);
1216 1042
@@ -1229,14 +1055,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1229 iter->ent = NULL; 1055 iter->ent = NULL;
1230 iter->cpu = 0; 1056 iter->cpu = 0;
1231 iter->idx = -1; 1057 iter->idx = -1;
1232 iter->prev_ent = NULL;
1233 iter->prev_cpu = -1;
1234 1058
1235 for_each_tracing_cpu(i) { 1059 ftrace_disable_cpu();
1236 iter->next_idx[i] = 0; 1060
1237 iter->next_page[i] = NULL; 1061 for_each_tracing_cpu(cpu) {
1062 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1238 } 1063 }
1239 1064
1065 ftrace_enable_cpu();
1066
1240 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1067 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1241 ; 1068 ;
1242 1069
@@ -1261,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
1261 mutex_unlock(&trace_types_lock); 1088 mutex_unlock(&trace_types_lock);
1262} 1089}
1263 1090
1264#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1265
1266#ifdef CONFIG_KRETPROBES 1091#ifdef CONFIG_KRETPROBES
1267static inline int kretprobed(unsigned long addr) 1092static inline const char *kretprobed(const char *name)
1268{ 1093{
1269 return addr == (unsigned long)kretprobe_trampoline; 1094 static const char tramp_name[] = "kretprobe_trampoline";
1095 int size = sizeof(tramp_name);
1096
1097 if (strncmp(tramp_name, name, size) == 0)
1098 return "[unknown/kretprobe'd]";
1099 return name;
1270} 1100}
1271#else 1101#else
1272static inline int kretprobed(unsigned long addr) 1102static inline const char *kretprobed(const char *name)
1273{ 1103{
1274 return 0; 1104 return name;
1275} 1105}
1276#endif /* CONFIG_KRETPROBES */ 1106#endif /* CONFIG_KRETPROBES */
1277 1107
@@ -1280,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1280{ 1110{
1281#ifdef CONFIG_KALLSYMS 1111#ifdef CONFIG_KALLSYMS
1282 char str[KSYM_SYMBOL_LEN]; 1112 char str[KSYM_SYMBOL_LEN];
1113 const char *name;
1283 1114
1284 kallsyms_lookup(address, NULL, NULL, NULL, str); 1115 kallsyms_lookup(address, NULL, NULL, NULL, str);
1285 1116
1286 return trace_seq_printf(s, fmt, str); 1117 name = kretprobed(str);
1118
1119 return trace_seq_printf(s, fmt, name);
1287#endif 1120#endif
1288 return 1; 1121 return 1;
1289} 1122}
@@ -1294,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1294{ 1127{
1295#ifdef CONFIG_KALLSYMS 1128#ifdef CONFIG_KALLSYMS
1296 char str[KSYM_SYMBOL_LEN]; 1129 char str[KSYM_SYMBOL_LEN];
1130 const char *name;
1297 1131
1298 sprint_symbol(str, address); 1132 sprint_symbol(str, address);
1299 return trace_seq_printf(s, fmt, str); 1133 name = kretprobed(str);
1134
1135 return trace_seq_printf(s, fmt, name);
1300#endif 1136#endif
1301 return 1; 1137 return 1;
1302} 1138}
@@ -1330,21 +1166,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1330 1166
1331static void print_lat_help_header(struct seq_file *m) 1167static void print_lat_help_header(struct seq_file *m)
1332{ 1168{
1333 seq_puts(m, "# _------=> CPU# \n"); 1169 seq_puts(m, "# _------=> CPU# \n");
1334 seq_puts(m, "# / _-----=> irqs-off \n"); 1170 seq_puts(m, "# / _-----=> irqs-off \n");
1335 seq_puts(m, "# | / _----=> need-resched \n"); 1171 seq_puts(m, "# | / _----=> need-resched \n");
1336 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1172 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1337 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1173 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1338 seq_puts(m, "# |||| / \n"); 1174 seq_puts(m, "# |||| / \n");
1339 seq_puts(m, "# ||||| delay \n"); 1175 seq_puts(m, "# ||||| delay \n");
1340 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1176 seq_puts(m, "# cmd pid ||||| time | caller \n");
1341 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1177 seq_puts(m, "# \\ / ||||| \\ | / \n");
1342} 1178}
1343 1179
1344static void print_func_help_header(struct seq_file *m) 1180static void print_func_help_header(struct seq_file *m)
1345{ 1181{
1346 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1182 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1347 seq_puts(m, "# | | | | |\n"); 1183 seq_puts(m, "# | | | | |\n");
1348} 1184}
1349 1185
1350 1186
@@ -1355,23 +1191,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1355 struct trace_array *tr = iter->tr; 1191 struct trace_array *tr = iter->tr;
1356 struct trace_array_cpu *data = tr->data[tr->cpu]; 1192 struct trace_array_cpu *data = tr->data[tr->cpu];
1357 struct tracer *type = current_trace; 1193 struct tracer *type = current_trace;
1358 unsigned long total = 0; 1194 unsigned long total;
1359 unsigned long entries = 0; 1195 unsigned long entries;
1360 int cpu;
1361 const char *name = "preemption"; 1196 const char *name = "preemption";
1362 1197
1363 if (type) 1198 if (type)
1364 name = type->name; 1199 name = type->name;
1365 1200
1366 for_each_tracing_cpu(cpu) { 1201 entries = ring_buffer_entries(iter->tr->buffer);
1367 if (head_page(tr->data[cpu])) { 1202 total = entries +
1368 total += tr->data[cpu]->trace_idx; 1203 ring_buffer_overruns(iter->tr->buffer);
1369 if (tr->data[cpu]->trace_idx > tr->entries)
1370 entries += tr->entries;
1371 else
1372 entries += tr->data[cpu]->trace_idx;
1373 }
1374 }
1375 1204
1376 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1205 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1377 name, UTS_RELEASE); 1206 name, UTS_RELEASE);
@@ -1428,9 +1257,10 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1428 comm = trace_find_cmdline(entry->pid); 1257 comm = trace_find_cmdline(entry->pid);
1429 1258
1430 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1259 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1431 trace_seq_printf(s, "%d", cpu); 1260 trace_seq_printf(s, "%3d", cpu);
1432 trace_seq_printf(s, "%c%c", 1261 trace_seq_printf(s, "%c%c",
1433 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1262 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1263 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1434 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1264 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1435 1265
1436 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1266 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1457,7 +1287,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1457unsigned long preempt_mark_thresh = 100; 1287unsigned long preempt_mark_thresh = 100;
1458 1288
1459static void 1289static void
1460lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1290lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1461 unsigned long rel_usecs) 1291 unsigned long rel_usecs)
1462{ 1292{
1463 trace_seq_printf(s, " %4lldus", abs_usecs); 1293 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1301,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1471 1301
1472static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1302static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1473 1303
1474static int 1304/*
1305 * The message is supposed to contain an ending newline.
1306 * If the printing stops prematurely, try to add a newline of our own.
1307 */
1308void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1309{
1310 struct trace_entry *ent;
1311 struct trace_field_cont *cont;
1312 bool ok = true;
1313
1314 ent = peek_next_entry(iter, iter->cpu, NULL);
1315 if (!ent || ent->type != TRACE_CONT) {
1316 trace_seq_putc(s, '\n');
1317 return;
1318 }
1319
1320 do {
1321 cont = (struct trace_field_cont *)ent;
1322 if (ok)
1323 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1324
1325 ftrace_disable_cpu();
1326
1327 if (iter->buffer_iter[iter->cpu])
1328 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1329 else
1330 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1331
1332 ftrace_enable_cpu();
1333
1334 ent = peek_next_entry(iter, iter->cpu, NULL);
1335 } while (ent && ent->type == TRACE_CONT);
1336
1337 if (!ok)
1338 trace_seq_putc(s, '\n');
1339}
1340
1341static enum print_line_t
1475print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1476{ 1343{
1477 struct trace_seq *s = &iter->seq; 1344 struct trace_seq *s = &iter->seq;
1478 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1345 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1479 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1346 struct trace_entry *next_entry;
1480 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1347 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1481 struct trace_entry *entry = iter->ent; 1348 struct trace_entry *entry = iter->ent;
1482 unsigned long abs_usecs; 1349 unsigned long abs_usecs;
1483 unsigned long rel_usecs; 1350 unsigned long rel_usecs;
1351 u64 next_ts;
1484 char *comm; 1352 char *comm;
1485 int S, T; 1353 int S, T;
1486 int i; 1354 int i;
1487 unsigned state; 1355 unsigned state;
1488 1356
1357 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED;
1359
1360 next_entry = find_next_entry(iter, NULL, &next_ts);
1489 if (!next_entry) 1361 if (!next_entry)
1490 next_entry = entry; 1362 next_ts = iter->ts;
1491 rel_usecs = ns2usecs(next_entry->t - entry->t); 1363 rel_usecs = ns2usecs(next_ts - iter->ts);
1492 abs_usecs = ns2usecs(entry->t - iter->tr->time_start); 1364 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1493 1365
1494 if (verbose) { 1366 if (verbose) {
1495 comm = trace_find_cmdline(entry->pid); 1367 comm = trace_find_cmdline(entry->pid);
1496 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" 1368 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1497 " %ld.%03ldms (+%ld.%03ldms): ", 1369 " %ld.%03ldms (+%ld.%03ldms): ",
1498 comm, 1370 comm,
1499 entry->pid, cpu, entry->flags, 1371 entry->pid, cpu, entry->flags,
1500 entry->preempt_count, trace_idx, 1372 entry->preempt_count, trace_idx,
1501 ns2usecs(entry->t), 1373 ns2usecs(iter->ts),
1502 abs_usecs/1000, 1374 abs_usecs/1000,
1503 abs_usecs % 1000, rel_usecs/1000, 1375 abs_usecs % 1000, rel_usecs/1000,
1504 rel_usecs % 1000); 1376 rel_usecs % 1000);
@@ -1507,52 +1379,82 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1507 lat_print_timestamp(s, abs_usecs, rel_usecs); 1379 lat_print_timestamp(s, abs_usecs, rel_usecs);
1508 } 1380 }
1509 switch (entry->type) { 1381 switch (entry->type) {
1510 case TRACE_FN: 1382 case TRACE_FN: {
1511 seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1383 struct ftrace_entry *field;
1384
1385 trace_assign_type(field, entry);
1386
1387 seq_print_ip_sym(s, field->ip, sym_flags);
1512 trace_seq_puts(s, " ("); 1388 trace_seq_puts(s, " (");
1513 if (kretprobed(entry->fn.parent_ip)) 1389 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1514 trace_seq_puts(s, KRETPROBE_MSG);
1515 else
1516 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1517 trace_seq_puts(s, ")\n"); 1390 trace_seq_puts(s, ")\n");
1518 break; 1391 break;
1392 }
1519 case TRACE_CTX: 1393 case TRACE_CTX:
1520 case TRACE_WAKE: 1394 case TRACE_WAKE: {
1521 T = entry->ctx.next_state < sizeof(state_to_char) ? 1395 struct ctx_switch_entry *field;
1522 state_to_char[entry->ctx.next_state] : 'X'; 1396
1397 trace_assign_type(field, entry);
1398
1399 T = field->next_state < sizeof(state_to_char) ?
1400 state_to_char[field->next_state] : 'X';
1523 1401
1524 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; 1402 state = field->prev_state ?
1403 __ffs(field->prev_state) + 1 : 0;
1525 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; 1404 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1526 comm = trace_find_cmdline(entry->ctx.next_pid); 1405 comm = trace_find_cmdline(field->next_pid);
1527 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", 1406 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1528 entry->ctx.prev_pid, 1407 field->prev_pid,
1529 entry->ctx.prev_prio, 1408 field->prev_prio,
1530 S, entry->type == TRACE_CTX ? "==>" : " +", 1409 S, entry->type == TRACE_CTX ? "==>" : " +",
1531 entry->ctx.next_pid, 1410 field->next_cpu,
1532 entry->ctx.next_prio, 1411 field->next_pid,
1412 field->next_prio,
1533 T, comm); 1413 T, comm);
1534 break; 1414 break;
1535 case TRACE_SPECIAL: 1415 }
1416 case TRACE_SPECIAL: {
1417 struct special_entry *field;
1418
1419 trace_assign_type(field, entry);
1420
1536 trace_seq_printf(s, "# %ld %ld %ld\n", 1421 trace_seq_printf(s, "# %ld %ld %ld\n",
1537 entry->special.arg1, 1422 field->arg1,
1538 entry->special.arg2, 1423 field->arg2,
1539 entry->special.arg3); 1424 field->arg3);
1540 break; 1425 break;
1541 case TRACE_STACK: 1426 }
1427 case TRACE_STACK: {
1428 struct stack_entry *field;
1429
1430 trace_assign_type(field, entry);
1431
1542 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1432 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1543 if (i) 1433 if (i)
1544 trace_seq_puts(s, " <= "); 1434 trace_seq_puts(s, " <= ");
1545 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); 1435 seq_print_ip_sym(s, field->caller[i], sym_flags);
1546 } 1436 }
1547 trace_seq_puts(s, "\n"); 1437 trace_seq_puts(s, "\n");
1548 break; 1438 break;
1439 }
1440 case TRACE_PRINT: {
1441 struct print_entry *field;
1442
1443 trace_assign_type(field, entry);
1444
1445 seq_print_ip_sym(s, field->ip, sym_flags);
1446 trace_seq_printf(s, ": %s", field->buf);
1447 if (entry->flags & TRACE_FLAG_CONT)
1448 trace_seq_print_cont(s, iter);
1449 break;
1450 }
1549 default: 1451 default:
1550 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1452 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1551 } 1453 }
1552 return 1; 1454 return TRACE_TYPE_HANDLED;
1553} 1455}
1554 1456
1555static int print_trace_fmt(struct trace_iterator *iter) 1457static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1556{ 1458{
1557 struct trace_seq *s = &iter->seq; 1459 struct trace_seq *s = &iter->seq;
1558 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1460 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1469,123 @@ static int print_trace_fmt(struct trace_iterator *iter)
1567 1469
1568 entry = iter->ent; 1470 entry = iter->ent;
1569 1471
1472 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED;
1474
1570 comm = trace_find_cmdline(iter->ent->pid); 1475 comm = trace_find_cmdline(iter->ent->pid);
1571 1476
1572 t = ns2usecs(entry->t); 1477 t = ns2usecs(iter->ts);
1573 usec_rem = do_div(t, 1000000ULL); 1478 usec_rem = do_div(t, 1000000ULL);
1574 secs = (unsigned long)t; 1479 secs = (unsigned long)t;
1575 1480
1576 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1481 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1577 if (!ret) 1482 if (!ret)
1578 return 0; 1483 return TRACE_TYPE_PARTIAL_LINE;
1579 ret = trace_seq_printf(s, "[%02d] ", iter->cpu); 1484 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1580 if (!ret) 1485 if (!ret)
1581 return 0; 1486 return TRACE_TYPE_PARTIAL_LINE;
1582 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); 1487 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1583 if (!ret) 1488 if (!ret)
1584 return 0; 1489 return TRACE_TYPE_PARTIAL_LINE;
1585 1490
1586 switch (entry->type) { 1491 switch (entry->type) {
1587 case TRACE_FN: 1492 case TRACE_FN: {
1588 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1493 struct ftrace_entry *field;
1494
1495 trace_assign_type(field, entry);
1496
1497 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1589 if (!ret) 1498 if (!ret)
1590 return 0; 1499 return TRACE_TYPE_PARTIAL_LINE;
1591 if ((sym_flags & TRACE_ITER_PRINT_PARENT) && 1500 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1592 entry->fn.parent_ip) { 1501 field->parent_ip) {
1593 ret = trace_seq_printf(s, " <-"); 1502 ret = trace_seq_printf(s, " <-");
1594 if (!ret) 1503 if (!ret)
1595 return 0; 1504 return TRACE_TYPE_PARTIAL_LINE;
1596 if (kretprobed(entry->fn.parent_ip)) 1505 ret = seq_print_ip_sym(s,
1597 ret = trace_seq_puts(s, KRETPROBE_MSG); 1506 field->parent_ip,
1598 else 1507 sym_flags);
1599 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1600 sym_flags);
1601 if (!ret) 1508 if (!ret)
1602 return 0; 1509 return TRACE_TYPE_PARTIAL_LINE;
1603 } 1510 }
1604 ret = trace_seq_printf(s, "\n"); 1511 ret = trace_seq_printf(s, "\n");
1605 if (!ret) 1512 if (!ret)
1606 return 0; 1513 return TRACE_TYPE_PARTIAL_LINE;
1607 break; 1514 break;
1515 }
1608 case TRACE_CTX: 1516 case TRACE_CTX:
1609 case TRACE_WAKE: 1517 case TRACE_WAKE: {
1610 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1518 struct ctx_switch_entry *field;
1611 state_to_char[entry->ctx.prev_state] : 'X'; 1519
1612 T = entry->ctx.next_state < sizeof(state_to_char) ? 1520 trace_assign_type(field, entry);
1613 state_to_char[entry->ctx.next_state] : 'X'; 1521
1614 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", 1522 S = field->prev_state < sizeof(state_to_char) ?
1615 entry->ctx.prev_pid, 1523 state_to_char[field->prev_state] : 'X';
1616 entry->ctx.prev_prio, 1524 T = field->next_state < sizeof(state_to_char) ?
1525 state_to_char[field->next_state] : 'X';
1526 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1527 field->prev_pid,
1528 field->prev_prio,
1617 S, 1529 S,
1618 entry->type == TRACE_CTX ? "==>" : " +", 1530 entry->type == TRACE_CTX ? "==>" : " +",
1619 entry->ctx.next_pid, 1531 field->next_cpu,
1620 entry->ctx.next_prio, 1532 field->next_pid,
1533 field->next_prio,
1621 T); 1534 T);
1622 if (!ret) 1535 if (!ret)
1623 return 0; 1536 return TRACE_TYPE_PARTIAL_LINE;
1624 break; 1537 break;
1625 case TRACE_SPECIAL: 1538 }
1539 case TRACE_SPECIAL: {
1540 struct special_entry *field;
1541
1542 trace_assign_type(field, entry);
1543
1626 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1544 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1627 entry->special.arg1, 1545 field->arg1,
1628 entry->special.arg2, 1546 field->arg2,
1629 entry->special.arg3); 1547 field->arg3);
1630 if (!ret) 1548 if (!ret)
1631 return 0; 1549 return TRACE_TYPE_PARTIAL_LINE;
1632 break; 1550 break;
1633 case TRACE_STACK: 1551 }
1552 case TRACE_STACK: {
1553 struct stack_entry *field;
1554
1555 trace_assign_type(field, entry);
1556
1634 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1557 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1635 if (i) { 1558 if (i) {
1636 ret = trace_seq_puts(s, " <= "); 1559 ret = trace_seq_puts(s, " <= ");
1637 if (!ret) 1560 if (!ret)
1638 return 0; 1561 return TRACE_TYPE_PARTIAL_LINE;
1639 } 1562 }
1640 ret = seq_print_ip_sym(s, entry->stack.caller[i], 1563 ret = seq_print_ip_sym(s, field->caller[i],
1641 sym_flags); 1564 sym_flags);
1642 if (!ret) 1565 if (!ret)
1643 return 0; 1566 return TRACE_TYPE_PARTIAL_LINE;
1644 } 1567 }
1645 ret = trace_seq_puts(s, "\n"); 1568 ret = trace_seq_puts(s, "\n");
1646 if (!ret) 1569 if (!ret)
1647 return 0; 1570 return TRACE_TYPE_PARTIAL_LINE;
1648 break; 1571 break;
1649 } 1572 }
1650 return 1; 1573 case TRACE_PRINT: {
1574 struct print_entry *field;
1575
1576 trace_assign_type(field, entry);
1577
1578 seq_print_ip_sym(s, field->ip, sym_flags);
1579 trace_seq_printf(s, ": %s", field->buf);
1580 if (entry->flags & TRACE_FLAG_CONT)
1581 trace_seq_print_cont(s, iter);
1582 break;
1583 }
1584 }
1585 return TRACE_TYPE_HANDLED;
1651} 1586}
1652 1587
1653static int print_raw_fmt(struct trace_iterator *iter) 1588static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1654{ 1589{
1655 struct trace_seq *s = &iter->seq; 1590 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry; 1591 struct trace_entry *entry;
@@ -1659,47 +1594,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
1659 1594
1660 entry = iter->ent; 1595 entry = iter->ent;
1661 1596
1597 if (entry->type == TRACE_CONT)
1598 return TRACE_TYPE_HANDLED;
1599
1662 ret = trace_seq_printf(s, "%d %d %llu ", 1600 ret = trace_seq_printf(s, "%d %d %llu ",
1663 entry->pid, iter->cpu, entry->t); 1601 entry->pid, iter->cpu, iter->ts);
1664 if (!ret) 1602 if (!ret)
1665 return 0; 1603 return TRACE_TYPE_PARTIAL_LINE;
1666 1604
1667 switch (entry->type) { 1605 switch (entry->type) {
1668 case TRACE_FN: 1606 case TRACE_FN: {
1607 struct ftrace_entry *field;
1608
1609 trace_assign_type(field, entry);
1610
1669 ret = trace_seq_printf(s, "%x %x\n", 1611 ret = trace_seq_printf(s, "%x %x\n",
1670 entry->fn.ip, entry->fn.parent_ip); 1612 field->ip,
1613 field->parent_ip);
1671 if (!ret) 1614 if (!ret)
1672 return 0; 1615 return TRACE_TYPE_PARTIAL_LINE;
1673 break; 1616 break;
1617 }
1674 case TRACE_CTX: 1618 case TRACE_CTX:
1675 case TRACE_WAKE: 1619 case TRACE_WAKE: {
1676 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1620 struct ctx_switch_entry *field;
1677 state_to_char[entry->ctx.prev_state] : 'X'; 1621
1678 T = entry->ctx.next_state < sizeof(state_to_char) ? 1622 trace_assign_type(field, entry);
1679 state_to_char[entry->ctx.next_state] : 'X'; 1623
1624 S = field->prev_state < sizeof(state_to_char) ?
1625 state_to_char[field->prev_state] : 'X';
1626 T = field->next_state < sizeof(state_to_char) ?
1627 state_to_char[field->next_state] : 'X';
1680 if (entry->type == TRACE_WAKE) 1628 if (entry->type == TRACE_WAKE)
1681 S = '+'; 1629 S = '+';
1682 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", 1630 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1683 entry->ctx.prev_pid, 1631 field->prev_pid,
1684 entry->ctx.prev_prio, 1632 field->prev_prio,
1685 S, 1633 S,
1686 entry->ctx.next_pid, 1634 field->next_cpu,
1687 entry->ctx.next_prio, 1635 field->next_pid,
1636 field->next_prio,
1688 T); 1637 T);
1689 if (!ret) 1638 if (!ret)
1690 return 0; 1639 return TRACE_TYPE_PARTIAL_LINE;
1691 break; 1640 break;
1641 }
1692 case TRACE_SPECIAL: 1642 case TRACE_SPECIAL:
1693 case TRACE_STACK: 1643 case TRACE_STACK: {
1644 struct special_entry *field;
1645
1646 trace_assign_type(field, entry);
1647
1694 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1648 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1695 entry->special.arg1, 1649 field->arg1,
1696 entry->special.arg2, 1650 field->arg2,
1697 entry->special.arg3); 1651 field->arg3);
1698 if (!ret) 1652 if (!ret)
1699 return 0; 1653 return TRACE_TYPE_PARTIAL_LINE;
1700 break; 1654 break;
1701 } 1655 }
1702 return 1; 1656 case TRACE_PRINT: {
1657 struct print_entry *field;
1658
1659 trace_assign_type(field, entry);
1660
1661 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1662 if (entry->flags & TRACE_FLAG_CONT)
1663 trace_seq_print_cont(s, iter);
1664 break;
1665 }
1666 }
1667 return TRACE_TYPE_HANDLED;
1703} 1668}
1704 1669
1705#define SEQ_PUT_FIELD_RET(s, x) \ 1670#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1710,11 +1675,12 @@ do { \
1710 1675
1711#define SEQ_PUT_HEX_FIELD_RET(s, x) \ 1676#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1712do { \ 1677do { \
1678 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
1713 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ 1679 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1714 return 0; \ 1680 return 0; \
1715} while (0) 1681} while (0)
1716 1682
1717static int print_hex_fmt(struct trace_iterator *iter) 1683static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1718{ 1684{
1719 struct trace_seq *s = &iter->seq; 1685 struct trace_seq *s = &iter->seq;
1720 unsigned char newline = '\n'; 1686 unsigned char newline = '\n';
@@ -1723,97 +1689,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
1723 1689
1724 entry = iter->ent; 1690 entry = iter->ent;
1725 1691
1692 if (entry->type == TRACE_CONT)
1693 return TRACE_TYPE_HANDLED;
1694
1726 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1695 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1727 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1696 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1728 SEQ_PUT_HEX_FIELD_RET(s, entry->t); 1697 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1729 1698
1730 switch (entry->type) { 1699 switch (entry->type) {
1731 case TRACE_FN: 1700 case TRACE_FN: {
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); 1701 struct ftrace_entry *field;
1733 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1702
1703 trace_assign_type(field, entry);
1704
1705 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1706 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1734 break; 1707 break;
1708 }
1735 case TRACE_CTX: 1709 case TRACE_CTX:
1736 case TRACE_WAKE: 1710 case TRACE_WAKE: {
1737 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1711 struct ctx_switch_entry *field;
1738 state_to_char[entry->ctx.prev_state] : 'X'; 1712
1739 T = entry->ctx.next_state < sizeof(state_to_char) ? 1713 trace_assign_type(field, entry);
1740 state_to_char[entry->ctx.next_state] : 'X'; 1714
1715 S = field->prev_state < sizeof(state_to_char) ?
1716 state_to_char[field->prev_state] : 'X';
1717 T = field->next_state < sizeof(state_to_char) ?
1718 state_to_char[field->next_state] : 'X';
1741 if (entry->type == TRACE_WAKE) 1719 if (entry->type == TRACE_WAKE)
1742 S = '+'; 1720 S = '+';
1743 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); 1721 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1744 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); 1722 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1745 SEQ_PUT_HEX_FIELD_RET(s, S); 1723 SEQ_PUT_HEX_FIELD_RET(s, S);
1746 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); 1724 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); 1725 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1726 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, T); 1727 SEQ_PUT_HEX_FIELD_RET(s, T);
1750 break; 1728 break;
1729 }
1751 case TRACE_SPECIAL: 1730 case TRACE_SPECIAL:
1752 case TRACE_STACK: 1731 case TRACE_STACK: {
1753 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); 1732 struct special_entry *field;
1754 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); 1733
1755 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); 1734 trace_assign_type(field, entry);
1735
1736 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1737 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1738 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1756 break; 1739 break;
1757 } 1740 }
1741 }
1758 SEQ_PUT_FIELD_RET(s, newline); 1742 SEQ_PUT_FIELD_RET(s, newline);
1759 1743
1760 return 1; 1744 return TRACE_TYPE_HANDLED;
1761} 1745}
1762 1746
1763static int print_bin_fmt(struct trace_iterator *iter) 1747static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1764{ 1748{
1765 struct trace_seq *s = &iter->seq; 1749 struct trace_seq *s = &iter->seq;
1766 struct trace_entry *entry; 1750 struct trace_entry *entry;
1767 1751
1768 entry = iter->ent; 1752 entry = iter->ent;
1769 1753
1754 if (entry->type == TRACE_CONT)
1755 return TRACE_TYPE_HANDLED;
1756
1770 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1771 SEQ_PUT_FIELD_RET(s, entry->cpu); 1758 SEQ_PUT_FIELD_RET(s, entry->cpu);
1772 SEQ_PUT_FIELD_RET(s, entry->t); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1773 1760
1774 switch (entry->type) { 1761 switch (entry->type) {
1775 case TRACE_FN: 1762 case TRACE_FN: {
1776 SEQ_PUT_FIELD_RET(s, entry->fn.ip); 1763 struct ftrace_entry *field;
1777 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); 1764
1765 trace_assign_type(field, entry);
1766
1767 SEQ_PUT_FIELD_RET(s, field->ip);
1768 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1778 break; 1769 break;
1779 case TRACE_CTX: 1770 }
1780 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); 1771 case TRACE_CTX: {
1781 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); 1772 struct ctx_switch_entry *field;
1782 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); 1773
1783 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); 1774 trace_assign_type(field, entry);
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); 1775
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); 1776 SEQ_PUT_FIELD_RET(s, field->prev_pid);
1777 SEQ_PUT_FIELD_RET(s, field->prev_prio);
1778 SEQ_PUT_FIELD_RET(s, field->prev_state);
1779 SEQ_PUT_FIELD_RET(s, field->next_pid);
1780 SEQ_PUT_FIELD_RET(s, field->next_prio);
1781 SEQ_PUT_FIELD_RET(s, field->next_state);
1786 break; 1782 break;
1783 }
1787 case TRACE_SPECIAL: 1784 case TRACE_SPECIAL:
1788 case TRACE_STACK: 1785 case TRACE_STACK: {
1789 SEQ_PUT_FIELD_RET(s, entry->special.arg1); 1786 struct special_entry *field;
1790 SEQ_PUT_FIELD_RET(s, entry->special.arg2); 1787
1791 SEQ_PUT_FIELD_RET(s, entry->special.arg3); 1788 trace_assign_type(field, entry);
1789
1790 SEQ_PUT_FIELD_RET(s, field->arg1);
1791 SEQ_PUT_FIELD_RET(s, field->arg2);
1792 SEQ_PUT_FIELD_RET(s, field->arg3);
1792 break; 1793 break;
1793 } 1794 }
1795 }
1794 return 1; 1796 return 1;
1795} 1797}
1796 1798
1797static int trace_empty(struct trace_iterator *iter) 1799static int trace_empty(struct trace_iterator *iter)
1798{ 1800{
1799 struct trace_array_cpu *data;
1800 int cpu; 1801 int cpu;
1801 1802
1802 for_each_tracing_cpu(cpu) { 1803 for_each_tracing_cpu(cpu) {
1803 data = iter->tr->data[cpu]; 1804 if (iter->buffer_iter[cpu]) {
1804 1805 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1805 if (head_page(data) && data->trace_idx && 1806 return 0;
1806 (data->trace_tail != data->trace_head || 1807 } else {
1807 data->trace_tail_idx != data->trace_head_idx)) 1808 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1808 return 0; 1809 return 0;
1810 }
1809 } 1811 }
1812
1810 return 1; 1813 return 1;
1811} 1814}
1812 1815
1813static int print_trace_line(struct trace_iterator *iter) 1816static enum print_line_t print_trace_line(struct trace_iterator *iter)
1814{ 1817{
1815 if (iter->trace && iter->trace->print_line) 1818 enum print_line_t ret;
1816 return iter->trace->print_line(iter); 1819
1820 if (iter->trace && iter->trace->print_line) {
1821 ret = iter->trace->print_line(iter);
1822 if (ret != TRACE_TYPE_UNHANDLED)
1823 return ret;
1824 }
1817 1825
1818 if (trace_flags & TRACE_ITER_BIN) 1826 if (trace_flags & TRACE_ITER_BIN)
1819 return print_bin_fmt(iter); 1827 return print_bin_fmt(iter);
@@ -1869,6 +1877,8 @@ static struct trace_iterator *
1869__tracing_open(struct inode *inode, struct file *file, int *ret) 1877__tracing_open(struct inode *inode, struct file *file, int *ret)
1870{ 1878{
1871 struct trace_iterator *iter; 1879 struct trace_iterator *iter;
1880 struct seq_file *m;
1881 int cpu;
1872 1882
1873 if (tracing_disabled) { 1883 if (tracing_disabled) {
1874 *ret = -ENODEV; 1884 *ret = -ENODEV;
@@ -1889,28 +1899,46 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1889 iter->trace = current_trace; 1899 iter->trace = current_trace;
1890 iter->pos = -1; 1900 iter->pos = -1;
1891 1901
1902 for_each_tracing_cpu(cpu) {
1903
1904 iter->buffer_iter[cpu] =
1905 ring_buffer_read_start(iter->tr->buffer, cpu);
1906
1907 if (!iter->buffer_iter[cpu])
1908 goto fail_buffer;
1909 }
1910
1892 /* TODO stop tracer */ 1911 /* TODO stop tracer */
1893 *ret = seq_open(file, &tracer_seq_ops); 1912 *ret = seq_open(file, &tracer_seq_ops);
1894 if (!*ret) { 1913 if (*ret)
1895 struct seq_file *m = file->private_data; 1914 goto fail_buffer;
1896 m->private = iter;
1897 1915
1898 /* stop the trace while dumping */ 1916 m = file->private_data;
1899 if (iter->tr->ctrl) { 1917 m->private = iter;
1900 tracer_enabled = 0;
1901 ftrace_function_enabled = 0;
1902 }
1903 1918
1904 if (iter->trace && iter->trace->open) 1919 /* stop the trace while dumping */
1905 iter->trace->open(iter); 1920 if (iter->tr->ctrl) {
1906 } else { 1921 tracer_enabled = 0;
1907 kfree(iter); 1922 ftrace_function_enabled = 0;
1908 iter = NULL;
1909 } 1923 }
1924
1925 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter);
1927
1910 mutex_unlock(&trace_types_lock); 1928 mutex_unlock(&trace_types_lock);
1911 1929
1912 out: 1930 out:
1913 return iter; 1931 return iter;
1932
1933 fail_buffer:
1934 for_each_tracing_cpu(cpu) {
1935 if (iter->buffer_iter[cpu])
1936 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1937 }
1938 mutex_unlock(&trace_types_lock);
1939 kfree(iter);
1940
1941 return ERR_PTR(-ENOMEM);
1914} 1942}
1915 1943
1916int tracing_open_generic(struct inode *inode, struct file *filp) 1944int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,8 +1954,14 @@ int tracing_release(struct inode *inode, struct file *file)
1926{ 1954{
1927 struct seq_file *m = (struct seq_file *)file->private_data; 1955 struct seq_file *m = (struct seq_file *)file->private_data;
1928 struct trace_iterator *iter = m->private; 1956 struct trace_iterator *iter = m->private;
1957 int cpu;
1929 1958
1930 mutex_lock(&trace_types_lock); 1959 mutex_lock(&trace_types_lock);
1960 for_each_tracing_cpu(cpu) {
1961 if (iter->buffer_iter[cpu])
1962 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1963 }
1964
1931 if (iter->trace && iter->trace->close) 1965 if (iter->trace && iter->trace->close)
1932 iter->trace->close(iter); 1966 iter->trace->close(iter);
1933 1967
@@ -2352,6 +2386,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2352 struct tracer *t; 2386 struct tracer *t;
2353 char buf[max_tracer_type_len+1]; 2387 char buf[max_tracer_type_len+1];
2354 int i; 2388 int i;
2389 size_t ret;
2390
2391 ret = cnt;
2355 2392
2356 if (cnt > max_tracer_type_len) 2393 if (cnt > max_tracer_type_len)
2357 cnt = max_tracer_type_len; 2394 cnt = max_tracer_type_len;
@@ -2370,7 +2407,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2370 if (strcmp(t->name, buf) == 0) 2407 if (strcmp(t->name, buf) == 0)
2371 break; 2408 break;
2372 } 2409 }
2373 if (!t || t == current_trace) 2410 if (!t) {
2411 ret = -EINVAL;
2412 goto out;
2413 }
2414 if (t == current_trace)
2374 goto out; 2415 goto out;
2375 2416
2376 if (current_trace && current_trace->reset) 2417 if (current_trace && current_trace->reset)
@@ -2383,9 +2424,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 out: 2424 out:
2384 mutex_unlock(&trace_types_lock); 2425 mutex_unlock(&trace_types_lock);
2385 2426
2386 filp->f_pos += cnt; 2427 if (ret > 0)
2428 filp->f_pos += ret;
2387 2429
2388 return cnt; 2430 return ret;
2389} 2431}
2390 2432
2391static ssize_t 2433static ssize_t
@@ -2500,20 +2542,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2500 size_t cnt, loff_t *ppos) 2542 size_t cnt, loff_t *ppos)
2501{ 2543{
2502 struct trace_iterator *iter = filp->private_data; 2544 struct trace_iterator *iter = filp->private_data;
2503 struct trace_array_cpu *data;
2504 static cpumask_t mask;
2505 unsigned long flags;
2506#ifdef CONFIG_FTRACE
2507 int ftrace_save;
2508#endif
2509 int cpu;
2510 ssize_t sret; 2545 ssize_t sret;
2511 2546
2512 /* return any leftover data */ 2547 /* return any leftover data */
2513 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2548 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2514 if (sret != -EBUSY) 2549 if (sret != -EBUSY)
2515 return sret; 2550 return sret;
2516 sret = 0;
2517 2551
2518 trace_seq_reset(&iter->seq); 2552 trace_seq_reset(&iter->seq);
2519 2553
@@ -2524,6 +2558,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2524 goto out; 2558 goto out;
2525 } 2559 }
2526 2560
2561waitagain:
2562 sret = 0;
2527 while (trace_empty(iter)) { 2563 while (trace_empty(iter)) {
2528 2564
2529 if ((filp->f_flags & O_NONBLOCK)) { 2565 if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +2624,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2588 offsetof(struct trace_iterator, seq)); 2624 offsetof(struct trace_iterator, seq));
2589 iter->pos = -1; 2625 iter->pos = -1;
2590 2626
2591 /*
2592 * We need to stop all tracing on all CPUS to read the
2593 * the next buffer. This is a bit expensive, but is
2594 * not done often. We fill all what we can read,
2595 * and then release the locks again.
2596 */
2597
2598 cpus_clear(mask);
2599 local_irq_save(flags);
2600#ifdef CONFIG_FTRACE
2601 ftrace_save = ftrace_enabled;
2602 ftrace_enabled = 0;
2603#endif
2604 smp_wmb();
2605 for_each_tracing_cpu(cpu) {
2606 data = iter->tr->data[cpu];
2607
2608 if (!head_page(data) || !data->trace_idx)
2609 continue;
2610
2611 atomic_inc(&data->disabled);
2612 cpu_set(cpu, mask);
2613 }
2614
2615 for_each_cpu_mask(cpu, mask) {
2616 data = iter->tr->data[cpu];
2617 __raw_spin_lock(&data->lock);
2618
2619 if (data->overrun > iter->last_overrun[cpu])
2620 iter->overrun[cpu] +=
2621 data->overrun - iter->last_overrun[cpu];
2622 iter->last_overrun[cpu] = data->overrun;
2623 }
2624
2625 while (find_next_entry_inc(iter) != NULL) { 2627 while (find_next_entry_inc(iter) != NULL) {
2626 int ret; 2628 enum print_line_t ret;
2627 int len = iter->seq.len; 2629 int len = iter->seq.len;
2628 2630
2629 ret = print_trace_line(iter); 2631 ret = print_trace_line(iter);
2630 if (!ret) { 2632 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2631 /* don't print partial lines */ 2633 /* don't print partial lines */
2632 iter->seq.len = len; 2634 iter->seq.len = len;
2633 break; 2635 break;
@@ -2639,26 +2641,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2639 break; 2641 break;
2640 } 2642 }
2641 2643
2642 for_each_cpu_mask(cpu, mask) {
2643 data = iter->tr->data[cpu];
2644 __raw_spin_unlock(&data->lock);
2645 }
2646
2647 for_each_cpu_mask(cpu, mask) {
2648 data = iter->tr->data[cpu];
2649 atomic_dec(&data->disabled);
2650 }
2651#ifdef CONFIG_FTRACE
2652 ftrace_enabled = ftrace_save;
2653#endif
2654 local_irq_restore(flags);
2655
2656 /* Now copy what we have to the user */ 2644 /* Now copy what we have to the user */
2657 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2645 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658 if (iter->seq.readpos >= iter->seq.len) 2646 if (iter->seq.readpos >= iter->seq.len)
2659 trace_seq_reset(&iter->seq); 2647 trace_seq_reset(&iter->seq);
2648
2649 /*
2650 * If there was nothing to send to user, inspite of consuming trace
2651 * entries, go back to wait for more entries.
2652 */
2660 if (sret == -EBUSY) 2653 if (sret == -EBUSY)
2661 sret = 0; 2654 goto waitagain;
2662 2655
2663out: 2656out:
2664 mutex_unlock(&trace_types_lock); 2657 mutex_unlock(&trace_types_lock);
@@ -2684,7 +2677,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2684{ 2677{
2685 unsigned long val; 2678 unsigned long val;
2686 char buf[64]; 2679 char buf[64];
2687 int i, ret; 2680 int ret, cpu;
2681 struct trace_array *tr = filp->private_data;
2688 2682
2689 if (cnt >= sizeof(buf)) 2683 if (cnt >= sizeof(buf))
2690 return -EINVAL; 2684 return -EINVAL;
@@ -2704,71 +2698,111 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 2698
2705 mutex_lock(&trace_types_lock); 2699 mutex_lock(&trace_types_lock);
2706 2700
2707 if (current_trace != &no_tracer) { 2701 if (tr->ctrl) {
2708 cnt = -EBUSY; 2702 cnt = -EBUSY;
2709 pr_info("ftrace: set current_tracer to none" 2703 pr_info("ftrace: please disable tracing"
2710 " before modifying buffer size\n"); 2704 " before modifying buffer size\n");
2711 goto out; 2705 goto out;
2712 } 2706 }
2713 2707
2714 if (val > global_trace.entries) { 2708 /* disable all cpu buffers */
2715 long pages_requested; 2709 for_each_tracing_cpu(cpu) {
2716 unsigned long freeable_pages; 2710 if (global_trace.data[cpu])
2717 2711 atomic_inc(&global_trace.data[cpu]->disabled);
2718 /* make sure we have enough memory before mapping */ 2712 if (max_tr.data[cpu])
2719 pages_requested = 2713 atomic_inc(&max_tr.data[cpu]->disabled);
2720 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE; 2714 }
2721
2722 /* account for each buffer (and max_tr) */
2723 pages_requested *= tracing_nr_buffers * 2;
2724
2725 /* Check for overflow */
2726 if (pages_requested < 0) {
2727 cnt = -ENOMEM;
2728 goto out;
2729 }
2730
2731 freeable_pages = determine_dirtyable_memory();
2732 2715
2733 /* we only allow to request 1/4 of useable memory */ 2716 if (val != global_trace.entries) {
2734 if (pages_requested > 2717 ret = ring_buffer_resize(global_trace.buffer, val);
2735 ((freeable_pages + tracing_pages_allocated) / 4)) { 2718 if (ret < 0) {
2736 cnt = -ENOMEM; 2719 cnt = ret;
2737 goto out; 2720 goto out;
2738 } 2721 }
2739 2722
2740 while (global_trace.entries < val) { 2723 ret = ring_buffer_resize(max_tr.buffer, val);
2741 if (trace_alloc_page()) { 2724 if (ret < 0) {
2742 cnt = -ENOMEM; 2725 int r;
2743 goto out; 2726 cnt = ret;
2727 r = ring_buffer_resize(global_trace.buffer,
2728 global_trace.entries);
2729 if (r < 0) {
2730 /* AARGH! We are left with different
2731 * size max buffer!!!! */
2732 WARN_ON(1);
2733 tracing_disabled = 1;
2744 } 2734 }
2745 /* double check that we don't go over the known pages */ 2735 goto out;
2746 if (tracing_pages_allocated > pages_requested)
2747 break;
2748 } 2736 }
2749 2737
2750 } else { 2738 global_trace.entries = val;
2751 /* include the number of entries in val (inc of page entries) */
2752 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2753 trace_free_page();
2754 } 2739 }
2755 2740
2756 /* check integrity */
2757 for_each_tracing_cpu(i)
2758 check_pages(global_trace.data[i]);
2759
2760 filp->f_pos += cnt; 2741 filp->f_pos += cnt;
2761 2742
2762 /* If check pages failed, return ENOMEM */ 2743 /* If check pages failed, return ENOMEM */
2763 if (tracing_disabled) 2744 if (tracing_disabled)
2764 cnt = -ENOMEM; 2745 cnt = -ENOMEM;
2765 out: 2746 out:
2747 for_each_tracing_cpu(cpu) {
2748 if (global_trace.data[cpu])
2749 atomic_dec(&global_trace.data[cpu]->disabled);
2750 if (max_tr.data[cpu])
2751 atomic_dec(&max_tr.data[cpu]->disabled);
2752 }
2753
2766 max_tr.entries = global_trace.entries; 2754 max_tr.entries = global_trace.entries;
2767 mutex_unlock(&trace_types_lock); 2755 mutex_unlock(&trace_types_lock);
2768 2756
2769 return cnt; 2757 return cnt;
2770} 2758}
2771 2759
2760static int mark_printk(const char *fmt, ...)
2761{
2762 int ret;
2763 va_list args;
2764 va_start(args, fmt);
2765 ret = trace_vprintk(0, fmt, args);
2766 va_end(args);
2767 return ret;
2768}
2769
2770static ssize_t
2771tracing_mark_write(struct file *filp, const char __user *ubuf,
2772 size_t cnt, loff_t *fpos)
2773{
2774 char *buf;
2775 char *end;
2776 struct trace_array *tr = &global_trace;
2777
2778 if (!tr->ctrl || tracing_disabled)
2779 return -EINVAL;
2780
2781 if (cnt > TRACE_BUF_SIZE)
2782 cnt = TRACE_BUF_SIZE;
2783
2784 buf = kmalloc(cnt + 1, GFP_KERNEL);
2785 if (buf == NULL)
2786 return -ENOMEM;
2787
2788 if (copy_from_user(buf, ubuf, cnt)) {
2789 kfree(buf);
2790 return -EFAULT;
2791 }
2792
2793 /* Cut from the first nil or newline. */
2794 buf[cnt] = '\0';
2795 end = strchr(buf, '\n');
2796 if (end)
2797 *end = '\0';
2798
2799 cnt = mark_printk("%s\n", buf);
2800 kfree(buf);
2801 *fpos += cnt;
2802
2803 return cnt;
2804}
2805
2772static struct file_operations tracing_max_lat_fops = { 2806static struct file_operations tracing_max_lat_fops = {
2773 .open = tracing_open_generic, 2807 .open = tracing_open_generic,
2774 .read = tracing_max_lat_read, 2808 .read = tracing_max_lat_read,
@@ -2800,6 +2834,11 @@ static struct file_operations tracing_entries_fops = {
2800 .write = tracing_entries_write, 2834 .write = tracing_entries_write,
2801}; 2835};
2802 2836
2837static struct file_operations tracing_mark_fops = {
2838 .open = tracing_open_generic,
2839 .write = tracing_mark_write,
2840};
2841
2803#ifdef CONFIG_DYNAMIC_FTRACE 2842#ifdef CONFIG_DYNAMIC_FTRACE
2804 2843
2805static ssize_t 2844static ssize_t
@@ -2846,7 +2885,7 @@ struct dentry *tracing_init_dentry(void)
2846#include "trace_selftest.c" 2885#include "trace_selftest.c"
2847#endif 2886#endif
2848 2887
2849static __init void tracer_init_debugfs(void) 2888static __init int tracer_init_debugfs(void)
2850{ 2889{
2851 struct dentry *d_tracer; 2890 struct dentry *d_tracer;
2852 struct dentry *entry; 2891 struct dentry *entry;
@@ -2881,12 +2920,12 @@ static __init void tracer_init_debugfs(void)
2881 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 2920 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2882 &global_trace, &show_traces_fops); 2921 &global_trace, &show_traces_fops);
2883 if (!entry) 2922 if (!entry)
2884 pr_warning("Could not create debugfs 'trace' entry\n"); 2923 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2885 2924
2886 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 2925 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2887 &global_trace, &set_tracer_fops); 2926 &global_trace, &set_tracer_fops);
2888 if (!entry) 2927 if (!entry)
2889 pr_warning("Could not create debugfs 'trace' entry\n"); 2928 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2890 2929
2891 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, 2930 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2892 &tracing_max_latency, 2931 &tracing_max_latency,
@@ -2899,7 +2938,7 @@ static __init void tracer_init_debugfs(void)
2899 &tracing_thresh, &tracing_max_lat_fops); 2938 &tracing_thresh, &tracing_max_lat_fops);
2900 if (!entry) 2939 if (!entry)
2901 pr_warning("Could not create debugfs " 2940 pr_warning("Could not create debugfs "
2902 "'tracing_threash' entry\n"); 2941 "'tracing_thresh' entry\n");
2903 entry = debugfs_create_file("README", 0644, d_tracer, 2942 entry = debugfs_create_file("README", 0644, d_tracer,
2904 NULL, &tracing_readme_fops); 2943 NULL, &tracing_readme_fops);
2905 if (!entry) 2944 if (!entry)
@@ -2909,13 +2948,19 @@ static __init void tracer_init_debugfs(void)
2909 NULL, &tracing_pipe_fops); 2948 NULL, &tracing_pipe_fops);
2910 if (!entry) 2949 if (!entry)
2911 pr_warning("Could not create debugfs " 2950 pr_warning("Could not create debugfs "
2912 "'tracing_threash' entry\n"); 2951 "'trace_pipe' entry\n");
2913 2952
2914 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 2953 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2915 &global_trace, &tracing_entries_fops); 2954 &global_trace, &tracing_entries_fops);
2916 if (!entry) 2955 if (!entry)
2917 pr_warning("Could not create debugfs " 2956 pr_warning("Could not create debugfs "
2918 "'tracing_threash' entry\n"); 2957 "'trace_entries' entry\n");
2958
2959 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2960 NULL, &tracing_mark_fops);
2961 if (!entry)
2962 pr_warning("Could not create debugfs "
2963 "'trace_marker' entry\n");
2919 2964
2920#ifdef CONFIG_DYNAMIC_FTRACE 2965#ifdef CONFIG_DYNAMIC_FTRACE
2921 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 2966 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2928,230 +2973,263 @@ static __init void tracer_init_debugfs(void)
2928#ifdef CONFIG_SYSPROF_TRACER 2973#ifdef CONFIG_SYSPROF_TRACER
2929 init_tracer_sysprof_debugfs(d_tracer); 2974 init_tracer_sysprof_debugfs(d_tracer);
2930#endif 2975#endif
2976 return 0;
2931} 2977}
2932 2978
2933static int trace_alloc_page(void) 2979int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2934{ 2980{
2981 static DEFINE_SPINLOCK(trace_buf_lock);
2982 static char trace_buf[TRACE_BUF_SIZE];
2983
2984 struct ring_buffer_event *event;
2985 struct trace_array *tr = &global_trace;
2935 struct trace_array_cpu *data; 2986 struct trace_array_cpu *data;
2936 struct page *page, *tmp; 2987 struct print_entry *entry;
2937 LIST_HEAD(pages); 2988 unsigned long flags, irq_flags;
2938 void *array; 2989 int cpu, len = 0, size, pc;
2939 unsigned pages_allocated = 0;
2940 int i;
2941 2990
2942 /* first allocate a page for each CPU */ 2991 if (!tr->ctrl || tracing_disabled)
2943 for_each_tracing_cpu(i) { 2992 return 0;
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_pages;
2949 }
2950 2993
2951 pages_allocated++; 2994 pc = preempt_count();
2952 page = virt_to_page(array); 2995 preempt_disable_notrace();
2953 list_add(&page->lru, &pages); 2996 cpu = raw_smp_processor_id();
2997 data = tr->data[cpu];
2954 2998
2955/* Only allocate if we are actually using the max trace */ 2999 if (unlikely(atomic_read(&data->disabled)))
2956#ifdef CONFIG_TRACER_MAX_TRACE 3000 goto out;
2957 array = (void *)__get_free_page(GFP_KERNEL);
2958 if (array == NULL) {
2959 printk(KERN_ERR "tracer: failed to allocate page"
2960 "for trace buffer!\n");
2961 goto free_pages;
2962 }
2963 pages_allocated++;
2964 page = virt_to_page(array);
2965 list_add(&page->lru, &pages);
2966#endif
2967 }
2968 3001
2969 /* Now that we successfully allocate a page per CPU, add them */ 3002 spin_lock_irqsave(&trace_buf_lock, flags);
2970 for_each_tracing_cpu(i) { 3003 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2971 data = global_trace.data[i];
2972 page = list_entry(pages.next, struct page, lru);
2973 list_del_init(&page->lru);
2974 list_add_tail(&page->lru, &data->trace_pages);
2975 ClearPageLRU(page);
2976 3004
2977#ifdef CONFIG_TRACER_MAX_TRACE 3005 len = min(len, TRACE_BUF_SIZE-1);
2978 data = max_tr.data[i]; 3006 trace_buf[len] = 0;
2979 page = list_entry(pages.next, struct page, lru);
2980 list_del_init(&page->lru);
2981 list_add_tail(&page->lru, &data->trace_pages);
2982 SetPageLRU(page);
2983#endif
2984 }
2985 tracing_pages_allocated += pages_allocated;
2986 global_trace.entries += ENTRIES_PER_PAGE;
2987 3007
2988 return 0; 3008 size = sizeof(*entry) + len + 1;
3009 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
3010 if (!event)
3011 goto out_unlock;
3012 entry = ring_buffer_event_data(event);
3013 tracing_generic_entry_update(&entry->ent, flags, pc);
3014 entry->ent.type = TRACE_PRINT;
3015 entry->ip = ip;
2989 3016
2990 free_pages: 3017 memcpy(&entry->buf, trace_buf, len);
2991 list_for_each_entry_safe(page, tmp, &pages, lru) { 3018 entry->buf[len] = 0;
2992 list_del_init(&page->lru); 3019 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
2993 __free_page(page); 3020
2994 } 3021 out_unlock:
2995 return -ENOMEM; 3022 spin_unlock_irqrestore(&trace_buf_lock, flags);
3023
3024 out:
3025 preempt_enable_notrace();
3026
3027 return len;
2996} 3028}
3029EXPORT_SYMBOL_GPL(trace_vprintk);
2997 3030
2998static int trace_free_page(void) 3031int __ftrace_printk(unsigned long ip, const char *fmt, ...)
2999{ 3032{
3000 struct trace_array_cpu *data; 3033 int ret;
3001 struct page *page; 3034 va_list ap;
3002 struct list_head *p;
3003 int i;
3004 int ret = 0;
3005 3035
3006 /* free one page from each buffer */ 3036 if (!(trace_flags & TRACE_ITER_PRINTK))
3007 for_each_tracing_cpu(i) { 3037 return 0;
3008 data = global_trace.data[i];
3009 p = data->trace_pages.next;
3010 if (p == &data->trace_pages) {
3011 /* should never happen */
3012 WARN_ON(1);
3013 tracing_disabled = 1;
3014 ret = -1;
3015 break;
3016 }
3017 page = list_entry(p, struct page, lru);
3018 ClearPageLRU(page);
3019 list_del(&page->lru);
3020 tracing_pages_allocated--;
3021 tracing_pages_allocated--;
3022 __free_page(page);
3023 3038
3024 tracing_reset(data); 3039 va_start(ap, fmt);
3040 ret = trace_vprintk(ip, fmt, ap);
3041 va_end(ap);
3042 return ret;
3043}
3044EXPORT_SYMBOL_GPL(__ftrace_printk);
3025 3045
3026#ifdef CONFIG_TRACER_MAX_TRACE 3046static int trace_panic_handler(struct notifier_block *this,
3027 data = max_tr.data[i]; 3047 unsigned long event, void *unused)
3028 p = data->trace_pages.next; 3048{
3029 if (p == &data->trace_pages) { 3049 ftrace_dump();
3030 /* should never happen */ 3050 return NOTIFY_OK;
3031 WARN_ON(1); 3051}
3032 tracing_disabled = 1;
3033 ret = -1;
3034 break;
3035 }
3036 page = list_entry(p, struct page, lru);
3037 ClearPageLRU(page);
3038 list_del(&page->lru);
3039 __free_page(page);
3040 3052
3041 tracing_reset(data); 3053static struct notifier_block trace_panic_notifier = {
3042#endif 3054 .notifier_call = trace_panic_handler,
3043 } 3055 .next = NULL,
3044 global_trace.entries -= ENTRIES_PER_PAGE; 3056 .priority = 150 /* priority: INT_MAX >= x >= 0 */
3057};
3045 3058
3046 return ret; 3059static int trace_die_handler(struct notifier_block *self,
3060 unsigned long val,
3061 void *data)
3062{
3063 switch (val) {
3064 case DIE_OOPS:
3065 ftrace_dump();
3066 break;
3067 default:
3068 break;
3069 }
3070 return NOTIFY_OK;
3047} 3071}
3048 3072
3049__init static int tracer_alloc_buffers(void) 3073static struct notifier_block trace_die_notifier = {
3074 .notifier_call = trace_die_handler,
3075 .priority = 200
3076};
3077
3078/*
3079 * printk is set to max of 1024, we really don't need it that big.
3080 * Nothing should be printing 1000 characters anyway.
3081 */
3082#define TRACE_MAX_PRINT 1000
3083
3084/*
3085 * Define here KERN_TRACE so that we have one place to modify
3086 * it if we decide to change what log level the ftrace dump
3087 * should be at.
3088 */
3089#define KERN_TRACE KERN_INFO
3090
3091static void
3092trace_printk_seq(struct trace_seq *s)
3050{ 3093{
3051 struct trace_array_cpu *data; 3094 /* Probably should print a warning here. */
3052 void *array; 3095 if (s->len >= 1000)
3053 struct page *page; 3096 s->len = 1000;
3054 int pages = 0;
3055 int ret = -ENOMEM;
3056 int i;
3057 3097
3058 /* TODO: make the number of buffers hot pluggable with CPUS */ 3098 /* should be zero ended, but we are paranoid. */
3059 tracing_nr_buffers = num_possible_cpus(); 3099 s->buffer[s->len] = 0;
3060 tracing_buffer_mask = cpu_possible_map;
3061 3100
3062 /* Allocate the first page for all buffers */ 3101 printk(KERN_TRACE "%s", s->buffer);
3063 for_each_tracing_cpu(i) { 3102
3064 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i); 3103 trace_seq_reset(s);
3065 max_tr.data[i] = &per_cpu(max_data, i); 3104}
3066 3105
3067 array = (void *)__get_free_page(GFP_KERNEL);
3068 if (array == NULL) {
3069 printk(KERN_ERR "tracer: failed to allocate page"
3070 "for trace buffer!\n");
3071 goto free_buffers;
3072 }
3073 3106
3074 /* set the array to the list */ 3107void ftrace_dump(void)
3075 INIT_LIST_HEAD(&data->trace_pages); 3108{
3076 page = virt_to_page(array); 3109 static DEFINE_SPINLOCK(ftrace_dump_lock);
3077 list_add(&page->lru, &data->trace_pages); 3110 /* use static because iter can be a bit big for the stack */
3078 /* use the LRU flag to differentiate the two buffers */ 3111 static struct trace_iterator iter;
3079 ClearPageLRU(page); 3112 static cpumask_t mask;
3113 static int dump_ran;
3114 unsigned long flags;
3115 int cnt = 0, cpu;
3080 3116
3081 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 3117 /* only one dump */
3082 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 3118 spin_lock_irqsave(&ftrace_dump_lock, flags);
3119 if (dump_ran)
3120 goto out;
3083 3121
3084/* Only allocate if we are actually using the max trace */ 3122 dump_ran = 1;
3085#ifdef CONFIG_TRACER_MAX_TRACE
3086 array = (void *)__get_free_page(GFP_KERNEL);
3087 if (array == NULL) {
3088 printk(KERN_ERR "tracer: failed to allocate page"
3089 "for trace buffer!\n");
3090 goto free_buffers;
3091 }
3092 3123
3093 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); 3124 /* No turning back! */
3094 page = virt_to_page(array); 3125 ftrace_kill();
3095 list_add(&page->lru, &max_tr.data[i]->trace_pages); 3126
3096 SetPageLRU(page); 3127 for_each_tracing_cpu(cpu) {
3097#endif 3128 atomic_inc(&global_trace.data[cpu]->disabled);
3098 } 3129 }
3099 3130
3131 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3132
3133 iter.tr = &global_trace;
3134 iter.trace = current_trace;
3135
3100 /* 3136 /*
3101 * Since we allocate by orders of pages, we may be able to 3137 * We need to stop all tracing on all CPUS to read the
3102 * round up a bit. 3138 * the next buffer. This is a bit expensive, but is
3139 * not done often. We fill all what we can read,
3140 * and then release the locks again.
3103 */ 3141 */
3104 global_trace.entries = ENTRIES_PER_PAGE;
3105 pages++;
3106 3142
3107 while (global_trace.entries < trace_nr_entries) { 3143 cpus_clear(mask);
3108 if (trace_alloc_page()) 3144
3109 break; 3145 while (!trace_empty(&iter)) {
3110 pages++; 3146
3147 if (!cnt)
3148 printk(KERN_TRACE "---------------------------------\n");
3149
3150 cnt++;
3151
3152 /* reset all but tr, trace, and overruns */
3153 memset(&iter.seq, 0,
3154 sizeof(struct trace_iterator) -
3155 offsetof(struct trace_iterator, seq));
3156 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3157 iter.pos = -1;
3158
3159 if (find_next_entry_inc(&iter) != NULL) {
3160 print_trace_line(&iter);
3161 trace_consume(&iter);
3162 }
3163
3164 trace_printk_seq(&iter.seq);
3111 } 3165 }
3112 max_tr.entries = global_trace.entries;
3113 3166
3114 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", 3167 if (!cnt)
3115 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); 3168 printk(KERN_TRACE " (ftrace buffer empty)\n");
3116 pr_info(" actual entries %ld\n", global_trace.entries); 3169 else
3170 printk(KERN_TRACE "---------------------------------\n");
3171
3172 out:
3173 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3174}
3175
3176__init static int tracer_alloc_buffers(void)
3177{
3178 struct trace_array_cpu *data;
3179 int i;
3180
3181 /* TODO: make the number of buffers hot pluggable with CPUS */
3182 tracing_buffer_mask = cpu_possible_map;
3183
3184 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3185 TRACE_BUFFER_FLAGS);
3186 if (!global_trace.buffer) {
3187 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3188 WARN_ON(1);
3189 return 0;
3190 }
3191 global_trace.entries = ring_buffer_size(global_trace.buffer);
3192
3193#ifdef CONFIG_TRACER_MAX_TRACE
3194 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3195 TRACE_BUFFER_FLAGS);
3196 if (!max_tr.buffer) {
3197 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3198 WARN_ON(1);
3199 ring_buffer_free(global_trace.buffer);
3200 return 0;
3201 }
3202 max_tr.entries = ring_buffer_size(max_tr.buffer);
3203 WARN_ON(max_tr.entries != global_trace.entries);
3204#endif
3117 3205
3118 tracer_init_debugfs(); 3206 /* Allocate the first page for all buffers */
3207 for_each_tracing_cpu(i) {
3208 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3209 max_tr.data[i] = &per_cpu(max_data, i);
3210 }
3119 3211
3120 trace_init_cmdlines(); 3212 trace_init_cmdlines();
3121 3213
3122 register_tracer(&no_tracer); 3214 register_tracer(&nop_trace);
3123 current_trace = &no_tracer; 3215#ifdef CONFIG_BOOT_TRACER
3216 register_tracer(&boot_tracer);
3217 current_trace = &boot_tracer;
3218 current_trace->init(&global_trace);
3219#else
3220 current_trace = &nop_trace;
3221#endif
3124 3222
3125 /* All seems OK, enable tracing */ 3223 /* All seems OK, enable tracing */
3126 global_trace.ctrl = tracer_enabled; 3224 global_trace.ctrl = tracer_enabled;
3127 tracing_disabled = 0; 3225 tracing_disabled = 0;
3128 3226
3129 return 0; 3227 atomic_notifier_chain_register(&panic_notifier_list,
3130 3228 &trace_panic_notifier);
3131 free_buffers:
3132 for (i-- ; i >= 0; i--) {
3133 struct page *page, *tmp;
3134 struct trace_array_cpu *data = global_trace.data[i];
3135 3229
3136 if (data) { 3230 register_die_notifier(&trace_die_notifier);
3137 list_for_each_entry_safe(page, tmp,
3138 &data->trace_pages, lru) {
3139 list_del_init(&page->lru);
3140 __free_page(page);
3141 }
3142 }
3143 3231
3144#ifdef CONFIG_TRACER_MAX_TRACE 3232 return 0;
3145 data = max_tr.data[i];
3146 if (data) {
3147 list_for_each_entry_safe(page, tmp,
3148 &data->trace_pages, lru) {
3149 list_del_init(&page->lru);
3150 __free_page(page);
3151 }
3152 }
3153#endif
3154 }
3155 return ret;
3156} 3233}
3157fs_initcall(tracer_alloc_buffers); 3234early_initcall(tracer_alloc_buffers);
3235fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..8465ad052707 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,9 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h>
8#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h>
9 11
10enum trace_type { 12enum trace_type {
11 __TRACE_FIRST_TYPE = 0, 13 __TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
13 TRACE_FN, 15 TRACE_FN,
14 TRACE_CTX, 16 TRACE_CTX,
15 TRACE_WAKE, 17 TRACE_WAKE,
18 TRACE_CONT,
16 TRACE_STACK, 19 TRACE_STACK,
20 TRACE_PRINT,
17 TRACE_SPECIAL, 21 TRACE_SPECIAL,
18 TRACE_MMIO_RW, 22 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP, 23 TRACE_MMIO_MAP,
24 TRACE_BOOT,
20 25
21 __TRACE_LAST_TYPE 26 __TRACE_LAST_TYPE
22}; 27};
23 28
24/* 29/*
30 * The trace entry - the most basic unit of tracing. This is what
31 * is printed in the end as a single line in the trace output, such as:
32 *
33 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
34 */
35struct trace_entry {
36 unsigned char type;
37 unsigned char cpu;
38 unsigned char flags;
39 unsigned char preempt_count;
40 int pid;
41};
42
43/*
25 * Function trace entry - function address and parent function addres: 44 * Function trace entry - function address and parent function addres:
26 */ 45 */
27struct ftrace_entry { 46struct ftrace_entry {
47 struct trace_entry ent;
28 unsigned long ip; 48 unsigned long ip;
29 unsigned long parent_ip; 49 unsigned long parent_ip;
30}; 50};
51extern struct tracer boot_tracer;
31 52
32/* 53/*
33 * Context switch trace entry - which task (and prio) we switched from/to: 54 * Context switch trace entry - which task (and prio) we switched from/to:
34 */ 55 */
35struct ctx_switch_entry { 56struct ctx_switch_entry {
57 struct trace_entry ent;
36 unsigned int prev_pid; 58 unsigned int prev_pid;
37 unsigned char prev_prio; 59 unsigned char prev_prio;
38 unsigned char prev_state; 60 unsigned char prev_state;
39 unsigned int next_pid; 61 unsigned int next_pid;
40 unsigned char next_prio; 62 unsigned char next_prio;
41 unsigned char next_state; 63 unsigned char next_state;
64 unsigned int next_cpu;
42}; 65};
43 66
44/* 67/*
45 * Special (free-form) trace entry: 68 * Special (free-form) trace entry:
46 */ 69 */
47struct special_entry { 70struct special_entry {
71 struct trace_entry ent;
48 unsigned long arg1; 72 unsigned long arg1;
49 unsigned long arg2; 73 unsigned long arg2;
50 unsigned long arg3; 74 unsigned long arg3;
@@ -57,33 +81,62 @@ struct special_entry {
57#define FTRACE_STACK_ENTRIES 8 81#define FTRACE_STACK_ENTRIES 8
58 82
59struct stack_entry { 83struct stack_entry {
84 struct trace_entry ent;
60 unsigned long caller[FTRACE_STACK_ENTRIES]; 85 unsigned long caller[FTRACE_STACK_ENTRIES];
61}; 86};
62 87
63/* 88/*
64 * The trace entry - the most basic unit of tracing. This is what 89 * ftrace_printk entry:
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */ 90 */
69struct trace_entry { 91struct print_entry {
70 char type; 92 struct trace_entry ent;
71 char cpu; 93 unsigned long ip;
72 char flags; 94 char buf[];
73 char preempt_count; 95};
74 int pid; 96
75 cycle_t t; 97#define TRACE_OLD_SIZE 88
76 union { 98
77 struct ftrace_entry fn; 99struct trace_field_cont {
78 struct ctx_switch_entry ctx; 100 unsigned char type;
79 struct special_entry special; 101 /* Temporary till we get rid of this completely */
80 struct stack_entry stack; 102 char buf[TRACE_OLD_SIZE - 1];
81 struct mmiotrace_rw mmiorw; 103};
82 struct mmiotrace_map mmiomap; 104
83 }; 105struct trace_mmiotrace_rw {
106 struct trace_entry ent;
107 struct mmiotrace_rw rw;
108};
109
110struct trace_mmiotrace_map {
111 struct trace_entry ent;
112 struct mmiotrace_map map;
113};
114
115struct trace_boot {
116 struct trace_entry ent;
117 struct boot_trace initcall;
118};
119
120/*
121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled
124 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
125 * NEED_RESCED - reschedule is requested
126 * HARDIRQ - inside an interrupt handler
127 * SOFTIRQ - inside a softirq handler
128 * CONT - multiple entries hold the trace item
129 */
130enum trace_flag_type {
131 TRACE_FLAG_IRQS_OFF = 0x01,
132 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
133 TRACE_FLAG_NEED_RESCHED = 0x04,
134 TRACE_FLAG_HARDIRQ = 0x08,
135 TRACE_FLAG_SOFTIRQ = 0x10,
136 TRACE_FLAG_CONT = 0x20,
84}; 137};
85 138
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) 139#define TRACE_BUF_SIZE 1024
87 140
88/* 141/*
89 * The CPU trace array - it consists of thousands of trace entries 142 * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +144,9 @@ struct trace_entry {
91 * the trace, etc.) 144 * the trace, etc.)
92 */ 145 */
93struct trace_array_cpu { 146struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled; 147 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98 148
99 /* these fields get copied into max-trace: */ 149 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx; 150 unsigned long trace_idx;
105 unsigned long overrun; 151 unsigned long overrun;
106 unsigned long saved_latency; 152 unsigned long saved_latency;
@@ -124,6 +170,7 @@ struct trace_iterator;
124 * They have on/off state as well: 170 * They have on/off state as well:
125 */ 171 */
126struct trace_array { 172struct trace_array {
173 struct ring_buffer *buffer;
127 unsigned long entries; 174 unsigned long entries;
128 long ctrl; 175 long ctrl;
129 int cpu; 176 int cpu;
@@ -132,6 +179,56 @@ struct trace_array {
132 struct trace_array_cpu *data[NR_CPUS]; 179 struct trace_array_cpu *data[NR_CPUS];
133}; 180};
134 181
182#define FTRACE_CMP_TYPE(var, type) \
183 __builtin_types_compatible_p(typeof(var), type *)
184
185#undef IF_ASSIGN
186#define IF_ASSIGN(var, entry, etype, id) \
187 if (FTRACE_CMP_TYPE(var, etype)) { \
188 var = (typeof(var))(entry); \
189 WARN_ON(id && (entry)->type != id); \
190 break; \
191 }
192
193/* Will cause compile errors if type is not found. */
194extern void __ftrace_bad_type(void);
195
196/*
197 * The trace_assign_type is a verifier that the entry type is
198 * the same as the type being assigned. To add new types simply
199 * add a line with the following format:
200 *
201 * IF_ASSIGN(var, ent, type, id);
202 *
203 * Where "type" is the trace type that includes the trace_entry
204 * as the "ent" item. And "id" is the trace identifier that is
205 * used in the trace_type enum.
206 *
207 * If the type can have more than one id, then use zero.
208 */
209#define trace_assign_type(var, ent) \
210 do { \
211 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
222 __ftrace_bad_type(); \
223 } while (0)
224
225/* Return values for print_line callback */
226enum print_line_t {
227 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
228 TRACE_TYPE_HANDLED = 1,
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230};
231
135/* 232/*
136 * A specific tracer, represented by methods that operate on a trace array: 233 * A specific tracer, represented by methods that operate on a trace array:
137 */ 234 */
@@ -152,7 +249,7 @@ struct tracer {
152 int (*selftest)(struct tracer *trace, 249 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr); 250 struct trace_array *tr);
154#endif 251#endif
155 int (*print_line)(struct trace_iterator *iter); 252 enum print_line_t (*print_line)(struct trace_iterator *iter);
156 struct tracer *next; 253 struct tracer *next;
157 int print_max; 254 int print_max;
158}; 255};
@@ -171,57 +268,58 @@ struct trace_iterator {
171 struct trace_array *tr; 268 struct trace_array *tr;
172 struct tracer *trace; 269 struct tracer *trace;
173 void *private; 270 void *private;
174 long last_overrun[NR_CPUS]; 271 struct ring_buffer_iter *buffer_iter[NR_CPUS];
175 long overrun[NR_CPUS];
176 272
177 /* The below is zeroed out in pipe_read */ 273 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq; 274 struct trace_seq seq;
179 struct trace_entry *ent; 275 struct trace_entry *ent;
180 int cpu; 276 int cpu;
181 277 u64 ts;
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184 278
185 unsigned long iter_flags; 279 unsigned long iter_flags;
186 loff_t pos; 280 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx; 281 long idx;
191}; 282};
192 283
193void tracing_reset(struct trace_array_cpu *data); 284void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu);
194int tracing_open_generic(struct inode *inode, struct file *filp); 286int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void); 287struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 288void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197 289
290struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
291 struct trace_array_cpu *data);
292void tracing_generic_entry_update(struct trace_entry *entry,
293 unsigned long flags,
294 int pc);
295
198void ftrace(struct trace_array *tr, 296void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data, 297 struct trace_array_cpu *data,
200 unsigned long ip, 298 unsigned long ip,
201 unsigned long parent_ip, 299 unsigned long parent_ip,
202 unsigned long flags); 300 unsigned long flags, int pc);
203void tracing_sched_switch_trace(struct trace_array *tr, 301void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data, 302 struct trace_array_cpu *data,
205 struct task_struct *prev, 303 struct task_struct *prev,
206 struct task_struct *next, 304 struct task_struct *next,
207 unsigned long flags); 305 unsigned long flags, int pc);
208void tracing_record_cmdline(struct task_struct *tsk); 306void tracing_record_cmdline(struct task_struct *tsk);
209 307
210void tracing_sched_wakeup_trace(struct trace_array *tr, 308void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data, 309 struct trace_array_cpu *data,
212 struct task_struct *wakee, 310 struct task_struct *wakee,
213 struct task_struct *cur, 311 struct task_struct *cur,
214 unsigned long flags); 312 unsigned long flags, int pc);
215void trace_special(struct trace_array *tr, 313void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data, 314 struct trace_array_cpu *data,
217 unsigned long arg1, 315 unsigned long arg1,
218 unsigned long arg2, 316 unsigned long arg2,
219 unsigned long arg3); 317 unsigned long arg3, int pc);
220void trace_function(struct trace_array *tr, 318void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data, 319 struct trace_array_cpu *data,
222 unsigned long ip, 320 unsigned long ip,
223 unsigned long parent_ip, 321 unsigned long parent_ip,
224 unsigned long flags); 322 unsigned long flags, int pc);
225 323
226void tracing_start_cmdline_record(void); 324void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void); 325void tracing_stop_cmdline_record(void);
@@ -239,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
239 337
240extern cycle_t ftrace_now(int cpu); 338extern cycle_t ftrace_now(int cpu);
241 339
242#ifdef CONFIG_FTRACE 340#ifdef CONFIG_FUNCTION_TRACER
243void tracing_start_function_trace(void); 341void tracing_start_function_trace(void);
244void tracing_stop_function_trace(void); 342void tracing_stop_function_trace(void);
245#else 343#else
@@ -268,51 +366,33 @@ extern unsigned long ftrace_update_tot_cnt;
268extern int DYN_FTRACE_TEST_NAME(void); 366extern int DYN_FTRACE_TEST_NAME(void);
269#endif 367#endif
270 368
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST 369#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace, 370extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr); 371 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace, 372extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr); 373 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace, 374extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr); 375 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, 376extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr); 377 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace, 378extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr); 379 struct trace_array *tr);
300#endif 380extern int trace_selftest_startup_nop(struct tracer *trace,
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER 381 struct trace_array *tr);
302extern int trace_selftest_startup_sched_switch(struct tracer *trace, 382extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr); 383 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace, 384extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr); 385 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */ 386#endif /* CONFIG_FTRACE_STARTUP_TEST */
310 387
311extern void *head_page(struct trace_array_cpu *data); 388extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt); 393 size_t cnt);
315extern long ns2usecs(cycle_t nsec); 394extern long ns2usecs(cycle_t nsec);
395extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
316 396
317extern unsigned long trace_flags; 397extern unsigned long trace_flags;
318 398
@@ -334,6 +414,9 @@ enum trace_iterator_flags {
334 TRACE_ITER_BLOCK = 0x80, 414 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100, 415 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200, 416 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400,
337}; 418};
338 419
420extern struct tracer nop_trace;
421
339#endif /* _LINUX_KERNEL_TRACE_H */ 422#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..d0a5e50eeff2
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12
13#include "trace.h"
14
15static struct trace_array *boot_trace;
16static int trace_boot_enabled;
17
18
19/* Should be started after do_pre_smp_initcalls() in init/main.c */
20void start_boot_trace(void)
21{
22 trace_boot_enabled = 1;
23}
24
25void stop_boot_trace(void)
26{
27 trace_boot_enabled = 0;
28}
29
30void reset_boot_trace(struct trace_array *tr)
31{
32 stop_boot_trace();
33}
34
35static void boot_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 boot_trace = tr;
39
40 trace_boot_enabled = 0;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44}
45
46static void boot_trace_ctrl_update(struct trace_array *tr)
47{
48 if (tr->ctrl)
49 start_boot_trace();
50 else
51 stop_boot_trace();
52}
53
54static enum print_line_t initcall_print_line(struct trace_iterator *iter)
55{
56 int ret;
57 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime);
62 struct timespec rettime = ktime_to_timespec(it->rettime);
63
64 if (entry->type == TRACE_BOOT) {
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
66 calltime.tv_sec,
67 calltime.tv_nsec,
68 it->func, it->caller);
69 if (!ret)
70 return TRACE_TYPE_PARTIAL_LINE;
71
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n",
74 rettime.tv_sec,
75 rettime.tv_nsec,
76 it->func, it->result, it->duration);
77
78 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE;
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85struct tracer boot_tracer __read_mostly =
86{
87 .name = "initcall",
88 .init = boot_trace_init,
89 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line,
92};
93
94void trace_boot(struct boot_trace *it, initcall_t fn)
95{
96 struct ring_buffer_event *event;
97 struct trace_boot *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace;
101
102 if (!trace_boot_enabled)
103 return;
104
105 /* Get its name now since this function could
106 * disappear because it is in the .init section.
107 */
108 sprint_symbol(it->func, (unsigned long)fn);
109 preempt_disable();
110 data = tr->data[smp_processor_id()];
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags);
114 if (!event)
115 goto out;
116 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT;
119 entry->initcall = *it;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121
122 trace_wake_up();
123
124 out:
125 preempt_enable();
126}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..0f85a64003d3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
23 tr->time_start = ftrace_now(tr->cpu); 23 tr->time_start = ftrace_now(tr->cpu);
24 24
25 for_each_online_cpu(cpu) 25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]); 26 tracing_reset(tr, cpu);
27} 27}
28 28
29static void start_function_trace(struct trace_array *tr) 29static void start_function_trace(struct trace_array *tr)
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
64 64
65static struct tracer function_trace __read_mostly = 65static struct tracer function_trace __read_mostly =
66{ 66{
67 .name = "ftrace", 67 .name = "function",
68 .init = function_trace_init, 68 .init = function_trace_init,
69 .reset = function_trace_reset, 69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 70 .ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..9c74071c10e0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags); 98 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
130 unsigned long latency, t0, t1; 130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta; 131 cycle_t T0, T1, delta;
132 unsigned long flags; 132 unsigned long flags;
133 int pc;
133 134
134 /* 135 /*
135 * usecs conversion is slow so we try to delay the conversion 136 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
141 142
142 local_save_flags(flags); 143 local_save_flags(flags);
143 144
145 pc = preempt_count();
146
144 if (!report_latency(delta)) 147 if (!report_latency(delta))
145 goto out; 148 goto out;
146 149
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 153 if (!report_latency(delta))
151 goto out_unlock; 154 goto out_unlock;
152 155
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
154 157
155 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
156 159
@@ -173,8 +176,8 @@ out_unlock:
173out: 176out:
174 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data); 179 tracing_reset(tr, cpu);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
178} 181}
179 182
180static inline void 183static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
203 data->critical_sequence = max_sequence; 206 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu); 207 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip; 208 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data); 209 tracing_reset(tr, cpu);
207 210
208 local_save_flags(flags); 211 local_save_flags(flags);
209 212
210 trace_function(tr, data, ip, parent_ip, flags); 213 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
211 214
212 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
213 216
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
234 237
235 data = tr->data[cpu]; 238 data = tr->data[cpu];
236 239
237 if (unlikely(!data) || unlikely(!head_page(data)) || 240 if (unlikely(!data) ||
238 !data->critical_start || atomic_read(&data->disabled)) 241 !data->critical_start || atomic_read(&data->disabled))
239 return; 242 return;
240 243
241 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
242 245
243 local_save_flags(flags); 246 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags); 247 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0; 249 data->critical_start = 0;
247 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..f28484618ff0 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
27 tr->time_start = ftrace_now(tr->cpu); 27 tr->time_start = ftrace_now(tr->cpu);
28 28
29 for_each_online_cpu(cpu) 29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]); 30 tracing_reset(tr, cpu);
31} 31}
32 32
33static void mmio_trace_init(struct trace_array *tr) 33static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
130{ 130{
131 int cpu; 131 int cpu;
132 unsigned long cnt = 0; 132 unsigned long cnt = 0;
133/* FIXME: */
134#if 0
133 for_each_online_cpu(cpu) { 135 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu]; 136 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0; 137 iter->overrun[cpu] = 0;
136 } 138 }
139#endif
140 (void)cpu;
137 return cnt; 141 return cnt;
138} 142}
139 143
@@ -171,17 +175,21 @@ print_out:
171 return (ret == -EBUSY) ? 0 : ret; 175 return (ret == -EBUSY) ? 0 : ret;
172} 176}
173 177
174static int mmio_print_rw(struct trace_iterator *iter) 178static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
175{ 179{
176 struct trace_entry *entry = iter->ent; 180 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw; 181 struct trace_mmiotrace_rw *field;
182 struct mmiotrace_rw *rw;
178 struct trace_seq *s = &iter->seq; 183 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t); 184 unsigned long long t = ns2usecs(iter->ts);
180 unsigned long usec_rem = do_div(t, 1000000ULL); 185 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t; 186 unsigned secs = (unsigned long)t;
182 int ret = 1; 187 int ret = 1;
183 188
184 switch (entry->mmiorw.opcode) { 189 trace_assign_type(field, entry);
190 rw = &field->rw;
191
192 switch (rw->opcode) {
185 case MMIO_READ: 193 case MMIO_READ:
186 ret = trace_seq_printf(s, 194 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 195 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
209 break; 217 break;
210 } 218 }
211 if (ret) 219 if (ret)
212 return 1; 220 return TRACE_TYPE_HANDLED;
213 return 0; 221 return TRACE_TYPE_PARTIAL_LINE;
214} 222}
215 223
216static int mmio_print_map(struct trace_iterator *iter) 224static enum print_line_t mmio_print_map(struct trace_iterator *iter)
217{ 225{
218 struct trace_entry *entry = iter->ent; 226 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap; 227 struct trace_mmiotrace_map *field;
228 struct mmiotrace_map *m;
220 struct trace_seq *s = &iter->seq; 229 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t); 230 unsigned long long t = ns2usecs(iter->ts);
222 unsigned long usec_rem = do_div(t, 1000000ULL); 231 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t; 232 unsigned secs = (unsigned long)t;
224 int ret = 1; 233 int ret;
225 234
226 switch (entry->mmiorw.opcode) { 235 trace_assign_type(field, entry);
236 m = &field->map;
237
238 switch (m->opcode) {
227 case MMIO_PROBE: 239 case MMIO_PROBE:
228 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 241 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
241 break; 253 break;
242 } 254 }
243 if (ret) 255 if (ret)
244 return 1; 256 return TRACE_TYPE_HANDLED;
245 return 0; 257 return TRACE_TYPE_PARTIAL_LINE;
258}
259
260static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
261{
262 struct trace_entry *entry = iter->ent;
263 struct print_entry *print = (struct print_entry *)entry;
264 const char *msg = print->buf;
265 struct trace_seq *s = &iter->seq;
266 unsigned long long t = ns2usecs(iter->ts);
267 unsigned long usec_rem = do_div(t, 1000000ULL);
268 unsigned secs = (unsigned long)t;
269 int ret;
270
271 /* The trailing newline must be in the message. */
272 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275
276 if (entry->flags & TRACE_FLAG_CONT)
277 trace_seq_print_cont(s, iter);
278
279 return TRACE_TYPE_HANDLED;
246} 280}
247 281
248/* return 0 to abort printing without consuming current entry in pipe mode */ 282static enum print_line_t mmio_print_line(struct trace_iterator *iter)
249static int mmio_print_line(struct trace_iterator *iter)
250{ 283{
251 switch (iter->ent->type) { 284 switch (iter->ent->type) {
252 case TRACE_MMIO_RW: 285 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter); 286 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP: 287 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter); 288 return mmio_print_map(iter);
289 case TRACE_PRINT:
290 return mmio_print_mark(iter);
256 default: 291 default:
257 return 1; /* ignore unknown entries */ 292 return TRACE_TYPE_HANDLED; /* ignore unknown entries */
258 } 293 }
259} 294}
260 295
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
276} 311}
277device_initcall(init_mmio_trace); 312device_initcall(init_mmio_trace);
278 313
314static void __trace_mmiotrace_rw(struct trace_array *tr,
315 struct trace_array_cpu *data,
316 struct mmiotrace_rw *rw)
317{
318 struct ring_buffer_event *event;
319 struct trace_mmiotrace_rw *entry;
320 unsigned long irq_flags;
321
322 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
323 &irq_flags);
324 if (!event)
325 return;
326 entry = ring_buffer_event_data(event);
327 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
328 entry->ent.type = TRACE_MMIO_RW;
329 entry->rw = *rw;
330 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
331
332 trace_wake_up();
333}
334
279void mmio_trace_rw(struct mmiotrace_rw *rw) 335void mmio_trace_rw(struct mmiotrace_rw *rw)
280{ 336{
281 struct trace_array *tr = mmio_trace_array; 337 struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
283 __trace_mmiotrace_rw(tr, data, rw); 339 __trace_mmiotrace_rw(tr, data, rw);
284} 340}
285 341
342static void __trace_mmiotrace_map(struct trace_array *tr,
343 struct trace_array_cpu *data,
344 struct mmiotrace_map *map)
345{
346 struct ring_buffer_event *event;
347 struct trace_mmiotrace_map *entry;
348 unsigned long irq_flags;
349
350 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
351 &irq_flags);
352 if (!event)
353 return;
354 entry = ring_buffer_event_data(event);
355 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
356 entry->ent.type = TRACE_MMIO_MAP;
357 entry->map = *map;
358 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
359
360 trace_wake_up();
361}
362
286void mmio_trace_mapping(struct mmiotrace_map *map) 363void mmio_trace_mapping(struct mmiotrace_map *map)
287{ 364{
288 struct trace_array *tr = mmio_trace_array; 365 struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
293 __trace_mmiotrace_map(tr, data, map); 370 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable(); 371 preempt_enable();
295} 372}
373
374int mmio_trace_printk(const char *fmt, va_list args)
375{
376 return trace_vprintk(0, fmt, args);
377}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..4592b4862515
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
1/*
2 * nop tracer
3 *
4 * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12
13#include "trace.h"
14
15static struct trace_array *ctx_trace;
16
17static void start_nop_trace(struct trace_array *tr)
18{
19 /* Nothing to do! */
20}
21
22static void stop_nop_trace(struct trace_array *tr)
23{
24 /* Nothing to do! */
25}
26
27static void nop_trace_init(struct trace_array *tr)
28{
29 int cpu;
30 ctx_trace = tr;
31
32 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu);
34
35 if (tr->ctrl)
36 start_nop_trace(tr);
37}
38
39static void nop_trace_reset(struct trace_array *tr)
40{
41 if (tr->ctrl)
42 stop_nop_trace(tr);
43}
44
45static void nop_trace_ctrl_update(struct trace_array *tr)
46{
47 /* When starting a new trace, reset the buffers */
48 if (tr->ctrl)
49 start_nop_trace(tr);
50 else
51 stop_nop_trace(tr);
52}
53
54struct tracer nop_trace __read_mostly =
55{
56 .name = "nop",
57 .init = nop_trace_init,
58 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop,
62#endif
63};
64
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..b8f56beb1a62 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -19,15 +19,16 @@ static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static atomic_t sched_ref;
20 20
21static void 21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev, 22probe_sched_switch(struct rq *__rq, struct task_struct *prev,
23 struct task_struct *next) 23 struct task_struct *next)
24{ 24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data; 25 struct trace_array_cpu *data;
28 unsigned long flags; 26 unsigned long flags;
29 long disabled;
30 int cpu; 27 int cpu;
28 int pc;
29
30 if (!atomic_read(&sched_ref))
31 return;
31 32
32 tracing_record_cmdline(prev); 33 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next); 34 tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
35 if (!tracer_enabled) 36 if (!tracer_enabled)
36 return; 37 return;
37 38
39 pc = preempt_count();
38 local_irq_save(flags); 40 local_irq_save(flags);
39 cpu = raw_smp_processor_id(); 41 cpu = raw_smp_processor_id();
40 data = tr->data[cpu]; 42 data = ctx_trace->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42 43
43 if (likely(disabled == 1)) 44 if (likely(!atomic_read(&data->disabled)))
44 tracing_sched_switch_trace(tr, data, prev, next, flags); 45 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
45 46
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags); 47 local_irq_restore(flags);
48} 48}
49 49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void 50static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct 51probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
78 task_struct *curr)
79{ 52{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data; 53 struct trace_array_cpu *data;
83 unsigned long flags; 54 unsigned long flags;
84 long disabled; 55 int cpu, pc;
85 int cpu;
86 56
87 if (!tracer_enabled) 57 if (!likely(tracer_enabled))
88 return; 58 return;
89 59
90 tracing_record_cmdline(curr); 60 pc = preempt_count();
61 tracing_record_cmdline(current);
91 62
92 local_irq_save(flags); 63 local_irq_save(flags);
93 cpu = raw_smp_processor_id(); 64 cpu = raw_smp_processor_id();
94 data = tr->data[cpu]; 65 data = ctx_trace->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96 66
97 if (likely(disabled == 1)) 67 if (likely(!atomic_read(&data->disabled)))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); 68 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
69 flags, pc);
99 70
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags); 71 local_irq_restore(flags);
102} 72}
103 73
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr) 74static void sched_switch_reset(struct trace_array *tr)
130{ 75{
131 int cpu; 76 int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
133 tr->time_start = ftrace_now(tr->cpu); 78 tr->time_start = ftrace_now(tr->cpu);
134 79
135 for_each_online_cpu(cpu) 80 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]); 81 tracing_reset(tr, cpu);
137} 82}
138 83
139static int tracing_sched_register(void) 84static int tracing_sched_register(void)
140{ 85{
141 int ret; 86 int ret;
142 87
143 ret = marker_probe_register("kernel_sched_wakeup", 88 ret = register_trace_sched_wakeup(probe_sched_wakeup);
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) { 89 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker" 90 pr_info("wakeup trace: Couldn't activate tracepoint"
149 " probe to kernel_sched_wakeup\n"); 91 " probe to kernel_sched_wakeup\n");
150 return ret; 92 return ret;
151 } 93 }
152 94
153 ret = marker_probe_register("kernel_sched_wakeup_new", 95 ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) { 96 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker" 97 pr_info("wakeup trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_wakeup_new\n"); 98 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe; 99 goto fail_deprobe;
161 } 100 }
162 101
163 ret = marker_probe_register("kernel_sched_schedule", 102 ret = register_trace_sched_switch(probe_sched_switch);
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) { 103 if (ret) {
169 pr_info("sched trace: Couldn't add marker" 104 pr_info("sched trace: Couldn't activate tracepoint"
170 " probe to kernel_sched_schedule\n"); 105 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new; 106 goto fail_deprobe_wake_new;
172 } 107 }
173 108
174 return ret; 109 return ret;
175fail_deprobe_wake_new: 110fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new", 111 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe: 112fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup", 113 unregister_trace_sched_wakeup(probe_sched_wakeup);
181 wake_up_callback,
182 &ctx_trace);
183 return ret; 114 return ret;
184} 115}
185 116
186static void tracing_sched_unregister(void) 117static void tracing_sched_unregister(void)
187{ 118{
188 marker_probe_unregister("kernel_sched_schedule", 119 unregister_trace_sched_switch(probe_sched_switch);
189 sched_switch_callback, 120 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
190 &ctx_trace); 121 unregister_trace_sched_wakeup(probe_sched_wakeup);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197} 122}
198 123
199static void tracing_start_sched_switch(void) 124static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..3ae93f16b565 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/marker.h> 18#include <trace/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
44 long disabled; 44 long disabled;
45 int resched; 45 int resched;
46 int cpu; 46 int cpu;
47 int pc;
47 48
48 if (likely(!wakeup_task)) 49 if (likely(!wakeup_task))
49 return; 50 return;
50 51
52 pc = preempt_count();
51 resched = need_resched(); 53 resched = need_resched();
52 preempt_disable_notrace(); 54 preempt_disable_notrace();
53 55
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
70 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
71 goto unlock; 73 goto unlock;
72 74
73 trace_function(tr, data, ip, parent_ip, flags); 75 trace_function(tr, data, ip, parent_ip, flags, pc);
74 76
75 unlock: 77 unlock:
76 __raw_spin_unlock(&wakeup_lock); 78 __raw_spin_unlock(&wakeup_lock);
@@ -94,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
94{ 96{
95 .func = wakeup_tracer_call, 97 .func = wakeup_tracer_call,
96}; 98};
97#endif /* CONFIG_FTRACE */ 99#endif /* CONFIG_FUNCTION_TRACER */
98 100
99/* 101/*
100 * Should this new latency be reported/recorded? 102 * Should this new latency be reported/recorded?
@@ -112,17 +114,18 @@ static int report_latency(cycle_t delta)
112} 114}
113 115
114static void notrace 116static void notrace
115wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 117probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
116 struct task_struct *next) 118 struct task_struct *next)
117{ 119{
118 unsigned long latency = 0, t0 = 0, t1 = 0; 120 unsigned long latency = 0, t0 = 0, t1 = 0;
119 struct trace_array **ptr = private;
120 struct trace_array *tr = *ptr;
121 struct trace_array_cpu *data; 121 struct trace_array_cpu *data;
122 cycle_t T0, T1, delta; 122 cycle_t T0, T1, delta;
123 unsigned long flags; 123 unsigned long flags;
124 long disabled; 124 long disabled;
125 int cpu; 125 int cpu;
126 int pc;
127
128 tracing_record_cmdline(prev);
126 129
127 if (unlikely(!tracer_enabled)) 130 if (unlikely(!tracer_enabled))
128 return; 131 return;
@@ -139,12 +142,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
139 if (next != wakeup_task) 142 if (next != wakeup_task)
140 return; 143 return;
141 144
145 pc = preempt_count();
146
142 /* The task we are waiting for is waking up */ 147 /* The task we are waiting for is waking up */
143 data = tr->data[wakeup_cpu]; 148 data = wakeup_trace->data[wakeup_cpu];
144 149
145 /* disable local data, not wakeup_cpu data */ 150 /* disable local data, not wakeup_cpu data */
146 cpu = raw_smp_processor_id(); 151 cpu = raw_smp_processor_id();
147 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 152 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
148 if (likely(disabled != 1)) 153 if (likely(disabled != 1))
149 goto out; 154 goto out;
150 155
@@ -155,7 +160,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
155 if (unlikely(!tracer_enabled || next != wakeup_task)) 160 if (unlikely(!tracer_enabled || next != wakeup_task))
156 goto out_unlock; 161 goto out_unlock;
157 162
158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 163 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
159 164
160 /* 165 /*
161 * usecs conversion is slow so we try to delay the conversion 166 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
174 t0 = nsecs_to_usecs(T0); 179 t0 = nsecs_to_usecs(T0);
175 t1 = nsecs_to_usecs(T1); 180 t1 = nsecs_to_usecs(T1);
176 181
177 update_max_tr(tr, wakeup_task, wakeup_cpu); 182 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 183
179out_unlock: 184out_unlock:
180 __wakeup_reset(tr); 185 __wakeup_reset(wakeup_trace);
181 __raw_spin_unlock(&wakeup_lock); 186 __raw_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags); 187 local_irq_restore(flags);
183out: 188out:
184 atomic_dec(&tr->data[cpu]->disabled); 189 atomic_dec(&wakeup_trace->data[cpu]->disabled);
185}
186
187static notrace void
188sched_switch_callback(void *probe_data, void *call_data,
189 const char *format, va_list *args)
190{
191 struct task_struct *prev;
192 struct task_struct *next;
193 struct rq *__rq;
194
195 /* skip prev_pid %d next_pid %d prev_state %ld */
196 (void)va_arg(*args, int);
197 (void)va_arg(*args, int);
198 (void)va_arg(*args, long);
199 __rq = va_arg(*args, typeof(__rq));
200 prev = va_arg(*args, typeof(prev));
201 next = va_arg(*args, typeof(next));
202
203 tracing_record_cmdline(prev);
204
205 /*
206 * If tracer_switch_func only points to the local
207 * switch func, it still needs the ptr passed to it.
208 */
209 wakeup_sched_switch(probe_data, __rq, prev, next);
210} 190}
211 191
212static void __wakeup_reset(struct trace_array *tr) 192static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +196,7 @@ static void __wakeup_reset(struct trace_array *tr)
216 196
217 for_each_possible_cpu(cpu) { 197 for_each_possible_cpu(cpu) {
218 data = tr->data[cpu]; 198 data = tr->data[cpu];
219 tracing_reset(data); 199 tracing_reset(tr, cpu);
220 } 200 }
221 201
222 wakeup_cpu = -1; 202 wakeup_cpu = -1;
@@ -240,19 +220,26 @@ static void wakeup_reset(struct trace_array *tr)
240} 220}
241 221
242static void 222static void
243wakeup_check_start(struct trace_array *tr, struct task_struct *p, 223probe_wakeup(struct rq *rq, struct task_struct *p)
244 struct task_struct *curr)
245{ 224{
246 int cpu = smp_processor_id(); 225 int cpu = smp_processor_id();
247 unsigned long flags; 226 unsigned long flags;
248 long disabled; 227 long disabled;
228 int pc;
229
230 if (likely(!tracer_enabled))
231 return;
232
233 tracing_record_cmdline(p);
234 tracing_record_cmdline(current);
249 235
250 if (likely(!rt_task(p)) || 236 if (likely(!rt_task(p)) ||
251 p->prio >= wakeup_prio || 237 p->prio >= wakeup_prio ||
252 p->prio >= curr->prio) 238 p->prio >= current->prio)
253 return; 239 return;
254 240
255 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 241 pc = preempt_count();
242 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
256 if (unlikely(disabled != 1)) 243 if (unlikely(disabled != 1))
257 goto out; 244 goto out;
258 245
@@ -264,7 +251,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
264 goto out_locked; 251 goto out_locked;
265 252
266 /* reset the trace */ 253 /* reset the trace */
267 __wakeup_reset(tr); 254 __wakeup_reset(wakeup_trace);
268 255
269 wakeup_cpu = task_cpu(p); 256 wakeup_cpu = task_cpu(p);
270 wakeup_prio = p->prio; 257 wakeup_prio = p->prio;
@@ -274,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
274 261
275 local_save_flags(flags); 262 local_save_flags(flags);
276 263
277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 264 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 trace_function(tr, tr->data[wakeup_cpu], 265 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
279 CALLER_ADDR1, CALLER_ADDR2, flags); 266 CALLER_ADDR1, CALLER_ADDR2, flags, pc);
280 267
281out_locked: 268out_locked:
282 __raw_spin_unlock(&wakeup_lock); 269 __raw_spin_unlock(&wakeup_lock);
283out: 270out:
284 atomic_dec(&tr->data[cpu]->disabled); 271 atomic_dec(&wakeup_trace->data[cpu]->disabled);
285}
286
287static notrace void
288wake_up_callback(void *probe_data, void *call_data,
289 const char *format, va_list *args)
290{
291 struct trace_array **ptr = probe_data;
292 struct trace_array *tr = *ptr;
293 struct task_struct *curr;
294 struct task_struct *task;
295 struct rq *__rq;
296
297 if (likely(!tracer_enabled))
298 return;
299
300 /* Skip pid %d state %ld */
301 (void)va_arg(*args, int);
302 (void)va_arg(*args, long);
303 /* now get the meat: "rq %p task %p rq->curr %p" */
304 __rq = va_arg(*args, typeof(__rq));
305 task = va_arg(*args, typeof(task));
306 curr = va_arg(*args, typeof(curr));
307
308 tracing_record_cmdline(task);
309 tracing_record_cmdline(curr);
310
311 wakeup_check_start(tr, task, curr);
312} 272}
313 273
314static void start_wakeup_tracer(struct trace_array *tr) 274static void start_wakeup_tracer(struct trace_array *tr)
315{ 275{
316 int ret; 276 int ret;
317 277
318 ret = marker_probe_register("kernel_sched_wakeup", 278 ret = register_trace_sched_wakeup(probe_wakeup);
319 "pid %d state %ld ## rq %p task %p rq->curr %p",
320 wake_up_callback,
321 &wakeup_trace);
322 if (ret) { 279 if (ret) {
323 pr_info("wakeup trace: Couldn't add marker" 280 pr_info("wakeup trace: Couldn't activate tracepoint"
324 " probe to kernel_sched_wakeup\n"); 281 " probe to kernel_sched_wakeup\n");
325 return; 282 return;
326 } 283 }
327 284
328 ret = marker_probe_register("kernel_sched_wakeup_new", 285 ret = register_trace_sched_wakeup_new(probe_wakeup);
329 "pid %d state %ld ## rq %p task %p rq->curr %p",
330 wake_up_callback,
331 &wakeup_trace);
332 if (ret) { 286 if (ret) {
333 pr_info("wakeup trace: Couldn't add marker" 287 pr_info("wakeup trace: Couldn't activate tracepoint"
334 " probe to kernel_sched_wakeup_new\n"); 288 " probe to kernel_sched_wakeup_new\n");
335 goto fail_deprobe; 289 goto fail_deprobe;
336 } 290 }
337 291
338 ret = marker_probe_register("kernel_sched_schedule", 292 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
339 "prev_pid %d next_pid %d prev_state %ld "
340 "## rq %p prev %p next %p",
341 sched_switch_callback,
342 &wakeup_trace);
343 if (ret) { 293 if (ret) {
344 pr_info("sched trace: Couldn't add marker" 294 pr_info("sched trace: Couldn't activate tracepoint"
345 " probe to kernel_sched_schedule\n"); 295 " probe to kernel_sched_schedule\n");
346 goto fail_deprobe_wake_new; 296 goto fail_deprobe_wake_new;
347 } 297 }
@@ -363,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
363 313
364 return; 314 return;
365fail_deprobe_wake_new: 315fail_deprobe_wake_new:
366 marker_probe_unregister("kernel_sched_wakeup_new", 316 unregister_trace_sched_wakeup_new(probe_wakeup);
367 wake_up_callback,
368 &wakeup_trace);
369fail_deprobe: 317fail_deprobe:
370 marker_probe_unregister("kernel_sched_wakeup", 318 unregister_trace_sched_wakeup(probe_wakeup);
371 wake_up_callback,
372 &wakeup_trace);
373} 319}
374 320
375static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
376{ 322{
377 tracer_enabled = 0; 323 tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
379 marker_probe_unregister("kernel_sched_schedule", 325 unregister_trace_sched_switch(probe_wakeup_sched_switch);
380 sched_switch_callback, 326 unregister_trace_sched_wakeup_new(probe_wakeup);
381 &wakeup_trace); 327 unregister_trace_sched_wakeup(probe_wakeup);
382 marker_probe_unregister("kernel_sched_wakeup_new",
383 wake_up_callback,
384 &wakeup_trace);
385 marker_probe_unregister("kernel_sched_wakeup",
386 wake_up_callback,
387 &wakeup_trace);
388} 328}
389 329
390static void wakeup_tracer_init(struct trace_array *tr) 330static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..90bc752a7580 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
12 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT:
13 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
14 return 1; 16 return 1;
15 } 17 }
16 return 0; 18 return 0;
17} 19}
18 20
19static int 21static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{ 22{
22 struct trace_entry *entries; 23 struct ring_buffer_event *event;
23 struct page *page; 24 struct trace_entry *entry;
24 int idx = 0;
25 int i;
26 25
27 BUG_ON(list_empty(&data->trace_pages)); 26 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 page = list_entry(data->trace_pages.next, struct page, lru); 27 entry = ring_buffer_event_data(event);
29 entries = page_address(page);
30 28
31 check_pages(data); 29 if (!trace_valid_entry(entry)) {
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ", 30 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type); 31 entry->type);
46 goto failed; 32 goto failed;
47 } 33 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 } 34 }
70
71 return 0; 35 return 0;
72 36
73 failed: 37 failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
89 /* Don't allow flipping of max traces now */ 53 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags); 54 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock); 55 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95 56
96 cnt += tr->data[cpu]->trace_idx; 57 cnt = ring_buffer_entries(tr->buffer);
97 58
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]); 59 for_each_possible_cpu(cpu) {
60 ret = trace_test_buffer_cpu(tr, cpu);
99 if (ret) 61 if (ret)
100 break; 62 break;
101 } 63 }
@@ -108,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
108 return ret; 70 return ret;
109} 71}
110 72
111#ifdef CONFIG_FTRACE 73#ifdef CONFIG_FUNCTION_TRACER
112 74
113#ifdef CONFIG_DYNAMIC_FTRACE 75#ifdef CONFIG_DYNAMIC_FTRACE
114 76
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr, 82 struct trace_array *tr,
121 int (*func)(void)) 83 int (*func)(void))
122{ 84{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled; 85 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled; 86 int save_tracer_enabled = tracer_enabled;
87 unsigned long count;
127 char *func_name; 88 char *func_name;
89 int ret;
128 90
129 /* The ftrace test PASSED */ 91 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n"); 92 printk(KERN_CONT "PASSED\n");
@@ -137,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
137 /* passed in by parameter to fool gcc from optimizing */ 99 /* passed in by parameter to fool gcc from optimizing */
138 func(); 100 func();
139 101
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /* 102 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the 103 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to 104 * start of the function names. We simply put a '*' to
@@ -157,6 +112,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
157 /* enable tracing */ 112 /* enable tracing */
158 tr->ctrl = 1; 113 tr->ctrl = 1;
159 trace->init(tr); 114 trace->init(tr);
115
160 /* Sleep for a 1/10 of a second */ 116 /* Sleep for a 1/10 of a second */
161 msleep(100); 117 msleep(100);
162 118
@@ -212,21 +168,14 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
212int 168int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 169trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{ 170{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled; 171 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled; 172 int save_tracer_enabled = tracer_enabled;
173 unsigned long count;
174 int ret;
219 175
220 /* make sure msleep has been recorded */ 176 /* make sure msleep has been recorded */
221 msleep(1); 177 msleep(1);
222 178
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */ 179 /* start the tracing */
231 ftrace_enabled = 1; 180 ftrace_enabled = 1;
232 tracer_enabled = 1; 181 tracer_enabled = 1;
@@ -263,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
263 212
264 return ret; 213 return ret;
265} 214}
266#endif /* CONFIG_FTRACE */ 215#endif /* CONFIG_FUNCTION_TRACER */
267 216
268#ifdef CONFIG_IRQSOFF_TRACER 217#ifdef CONFIG_IRQSOFF_TRACER
269int 218int
@@ -415,6 +364,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
415} 364}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ 365#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417 366
367#ifdef CONFIG_NOP_TRACER
368int
369trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
370{
371 /* What could possibly go wrong? */
372 return 0;
373}
374#endif
375
418#ifdef CONFIG_SCHED_TRACER 376#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data) 377static int trace_wakeup_test_thread(void *data)
420{ 378{
@@ -486,6 +444,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
486 444
487 wake_up_process(p); 445 wake_up_process(p);
488 446
447 /* give a little time to let the thread wake up */
448 msleep(100);
449
489 /* stop the tracing. */ 450 /* stop the tracing. */
490 tr->ctrl = 0; 451 tr->ctrl = 0;
491 trace->ctrl_update(tr); 452 trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..be682b62fe58
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,314 @@
1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15#include "trace.h"
16
17#define STACK_TRACE_ENTRIES 500
18
19static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
20 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
21static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
22
23static struct stack_trace max_stack_trace = {
24 .max_entries = STACK_TRACE_ENTRIES,
25 .entries = stack_dump_trace,
26};
27
28static unsigned long max_stack_size;
29static raw_spinlock_t max_stack_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31
32static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active);
34
35static inline void check_stack(void)
36{
37 unsigned long this_size, flags;
38 unsigned long *p, *top, *start;
39 int i;
40
41 this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
42 this_size = THREAD_SIZE - this_size;
43
44 if (this_size <= max_stack_size)
45 return;
46
47 /* we do not handle interrupt stacks yet */
48 if (!object_is_on_stack(&this_size))
49 return;
50
51 raw_local_irq_save(flags);
52 __raw_spin_lock(&max_stack_lock);
53
54 /* a race could have already updated it */
55 if (this_size <= max_stack_size)
56 goto out;
57
58 max_stack_size = this_size;
59
60 max_stack_trace.nr_entries = 0;
61 max_stack_trace.skip = 3;
62
63 save_stack_trace(&max_stack_trace);
64
65 /*
66 * Now find where in the stack these are.
67 */
68 i = 0;
69 start = &this_size;
70 top = (unsigned long *)
71 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
72
73 /*
74 * Loop through all the entries. One of the entries may
75 * for some reason be missed on the stack, so we may
76 * have to account for them. If they are all there, this
77 * loop will only happen once. This code only takes place
78 * on a new max, so it is far from a fast path.
79 */
80 while (i < max_stack_trace.nr_entries) {
81
82 stack_dump_index[i] = this_size;
83 p = start;
84
85 for (; p < top && i < max_stack_trace.nr_entries; p++) {
86 if (*p == stack_dump_trace[i]) {
87 this_size = stack_dump_index[i++] =
88 (top - p) * sizeof(unsigned long);
89 /* Start the search from here */
90 start = p + 1;
91 }
92 }
93
94 i++;
95 }
96
97 out:
98 __raw_spin_unlock(&max_stack_lock);
99 raw_local_irq_restore(flags);
100}
101
102static void
103stack_trace_call(unsigned long ip, unsigned long parent_ip)
104{
105 int cpu, resched;
106
107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return;
109
110 resched = need_resched();
111 preempt_disable_notrace();
112
113 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */
115 if (per_cpu(trace_active, cpu)++ != 0)
116 goto out;
117
118 check_stack();
119
120 out:
121 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */
123 if (resched)
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127}
128
129static struct ftrace_ops trace_ops __read_mostly =
130{
131 .func = stack_trace_call,
132};
133
134static ssize_t
135stack_max_size_read(struct file *filp, char __user *ubuf,
136 size_t count, loff_t *ppos)
137{
138 unsigned long *ptr = filp->private_data;
139 char buf[64];
140 int r;
141
142 r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
143 if (r > sizeof(buf))
144 r = sizeof(buf);
145 return simple_read_from_buffer(ubuf, count, ppos, buf, r);
146}
147
148static ssize_t
149stack_max_size_write(struct file *filp, const char __user *ubuf,
150 size_t count, loff_t *ppos)
151{
152 long *ptr = filp->private_data;
153 unsigned long val, flags;
154 char buf[64];
155 int ret;
156
157 if (count >= sizeof(buf))
158 return -EINVAL;
159
160 if (copy_from_user(&buf, ubuf, count))
161 return -EFAULT;
162
163 buf[count] = 0;
164
165 ret = strict_strtoul(buf, 10, &val);
166 if (ret < 0)
167 return ret;
168
169 raw_local_irq_save(flags);
170 __raw_spin_lock(&max_stack_lock);
171 *ptr = val;
172 __raw_spin_unlock(&max_stack_lock);
173 raw_local_irq_restore(flags);
174
175 return count;
176}
177
178static struct file_operations stack_max_size_fops = {
179 .open = tracing_open_generic,
180 .read = stack_max_size_read,
181 .write = stack_max_size_write,
182};
183
184static void *
185t_next(struct seq_file *m, void *v, loff_t *pos)
186{
187 long i = (long)m->private;
188
189 (*pos)++;
190
191 i++;
192
193 if (i >= max_stack_trace.nr_entries ||
194 stack_dump_trace[i] == ULONG_MAX)
195 return NULL;
196
197 m->private = (void *)i;
198
199 return &m->private;
200}
201
202static void *t_start(struct seq_file *m, loff_t *pos)
203{
204 void *t = &m->private;
205 loff_t l = 0;
206
207 local_irq_disable();
208 __raw_spin_lock(&max_stack_lock);
209
210 for (; t && l < *pos; t = t_next(m, t, &l))
211 ;
212
213 return t;
214}
215
216static void t_stop(struct seq_file *m, void *p)
217{
218 __raw_spin_unlock(&max_stack_lock);
219 local_irq_enable();
220}
221
222static int trace_lookup_stack(struct seq_file *m, long i)
223{
224 unsigned long addr = stack_dump_trace[i];
225#ifdef CONFIG_KALLSYMS
226 char str[KSYM_SYMBOL_LEN];
227
228 sprint_symbol(str, addr);
229
230 return seq_printf(m, "%s\n", str);
231#else
232 return seq_printf(m, "%p\n", (void*)addr);
233#endif
234}
235
236static int t_show(struct seq_file *m, void *v)
237{
238 long i = *(long *)v;
239 int size;
240
241 if (i < 0) {
242 seq_printf(m, " Depth Size Location"
243 " (%d entries)\n"
244 " ----- ---- --------\n",
245 max_stack_trace.nr_entries);
246 return 0;
247 }
248
249 if (i >= max_stack_trace.nr_entries ||
250 stack_dump_trace[i] == ULONG_MAX)
251 return 0;
252
253 if (i+1 == max_stack_trace.nr_entries ||
254 stack_dump_trace[i+1] == ULONG_MAX)
255 size = stack_dump_index[i];
256 else
257 size = stack_dump_index[i] - stack_dump_index[i+1];
258
259 seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
260
261 trace_lookup_stack(m, i);
262
263 return 0;
264}
265
266static struct seq_operations stack_trace_seq_ops = {
267 .start = t_start,
268 .next = t_next,
269 .stop = t_stop,
270 .show = t_show,
271};
272
273static int stack_trace_open(struct inode *inode, struct file *file)
274{
275 int ret;
276
277 ret = seq_open(file, &stack_trace_seq_ops);
278 if (!ret) {
279 struct seq_file *m = file->private_data;
280 m->private = (void *)-1;
281 }
282
283 return ret;
284}
285
286static struct file_operations stack_trace_fops = {
287 .open = stack_trace_open,
288 .read = seq_read,
289 .llseek = seq_lseek,
290};
291
292static __init int stack_trace_init(void)
293{
294 struct dentry *d_tracer;
295 struct dentry *entry;
296
297 d_tracer = tracing_init_dentry();
298
299 entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
300 &max_stack_size, &stack_max_size_fops);
301 if (!entry)
302 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
303
304 entry = debugfs_create_file("stack_trace", 0444, d_tracer,
305 NULL, &stack_trace_fops);
306 if (!entry)
307 pr_warning("Could not create debugfs 'stack_trace' entry\n");
308
309 register_ftrace_function(&trace_ops);
310
311 return 0;
312}
313
314device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index bb948e52ce20..9587d3bcba55 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
202 202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; 205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
206 206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208} 208}
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
241 tr->time_start = ftrace_now(tr->cpu); 241 tr->time_start = ftrace_now(tr->cpu);
242 242
243 for_each_online_cpu(cpu) 243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]); 244 tracing_reset(tr, cpu);
245} 245}
246 246
247static void start_stack_trace(struct trace_array *tr) 247static void start_stack_trace(struct trace_array *tr)