aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-11-10 03:10:44 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-10 03:10:44 -0500
commita5a64498c194c82ecad3a2d67cff6231cda8d3dd (patch)
tree723d5d81419f9960b8d30ed9a2ece8a58d6c4328 /kernel/trace
parentbb93d802ae5c1949977cc6da247b218240677f11 (diff)
parentf7160c7573615ec82c691e294cf80d920b5d588d (diff)
Merge commit 'v2.6.28-rc4' into timers/rtc
Conflicts: drivers/rtc/rtc-cmos.c
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig87
-rw-r--r--kernel/trace/Makefile10
-rw-r--r--kernel/trace/ftrace.c691
-rw-r--r--kernel/trace/ring_buffer.c2072
-rw-r--r--kernel/trace/trace.c1890
-rw-r--r--kernel/trace/trace.h215
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c4
-rw-r--r--kernel/trace/trace_irqsoff.c23
-rw-r--r--kernel/trace/trace_mmiotrace.c116
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c137
-rw-r--r--kernel/trace/trace_sched_wakeup.c152
-rw-r--r--kernel/trace/trace_selftest.c101
-rw-r--r--kernel/trace/trace_stack.c314
-rw-r--r--kernel/trace/trace_sysprof.c2
16 files changed, 4216 insertions, 1788 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..33dbefd471e8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,40 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4config HAVE_FTRACE 5
6config NOP_TRACER
7 bool
8
9config HAVE_FUNCTION_TRACER
5 bool 10 bool
6 11
7config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
8 bool 13 bool
9 14
15config HAVE_FTRACE_MCOUNT_RECORD
16 bool
17
10config TRACER_MAX_TRACE 18config TRACER_MAX_TRACE
11 bool 19 bool
12 20
21config RING_BUFFER
22 bool
23
13config TRACING 24config TRACING
14 bool 25 bool
15 select DEBUG_FS 26 select DEBUG_FS
16 select STACKTRACE 27 select RING_BUFFER
28 select STACKTRACE if STACKTRACE_SUPPORT
29 select TRACEPOINTS
30 select NOP_TRACER
17 31
18config FTRACE 32menu "Tracers"
33
34config FUNCTION_TRACER
19 bool "Kernel Function Tracer" 35 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE 36 depends on HAVE_FUNCTION_TRACER
37 depends on DEBUG_KERNEL
21 select FRAME_POINTER 38 select FRAME_POINTER
22 select TRACING 39 select TRACING
23 select CONTEXT_SWITCH_TRACER 40 select CONTEXT_SWITCH_TRACER
@@ -35,7 +52,7 @@ config IRQSOFF_TRACER
35 default n 52 default n
36 depends on TRACE_IRQFLAGS_SUPPORT 53 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME 54 depends on GENERIC_TIME
38 depends on HAVE_FTRACE 55 depends on DEBUG_KERNEL
39 select TRACE_IRQFLAGS 56 select TRACE_IRQFLAGS
40 select TRACING 57 select TRACING
41 select TRACER_MAX_TRACE 58 select TRACER_MAX_TRACE
@@ -58,7 +75,7 @@ config PREEMPT_TRACER
58 default n 75 default n
59 depends on GENERIC_TIME 76 depends on GENERIC_TIME
60 depends on PREEMPT 77 depends on PREEMPT
61 depends on HAVE_FTRACE 78 depends on DEBUG_KERNEL
62 select TRACING 79 select TRACING
63 select TRACER_MAX_TRACE 80 select TRACER_MAX_TRACE
64 help 81 help
@@ -85,7 +102,7 @@ config SYSPROF_TRACER
85 102
86config SCHED_TRACER 103config SCHED_TRACER
87 bool "Scheduling Latency Tracer" 104 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE 105 depends on DEBUG_KERNEL
89 select TRACING 106 select TRACING
90 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE 108 select TRACER_MAX_TRACE
@@ -95,17 +112,56 @@ config SCHED_TRACER
95 112
96config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches" 114 bool "Trace process context switches"
98 depends on HAVE_FTRACE 115 depends on DEBUG_KERNEL
99 select TRACING 116 select TRACING
100 select MARKERS 117 select MARKERS
101 help 118 help
102 This tracer gets called from the context switch and records 119 This tracer gets called from the context switch and records
103 all switching of tasks. 120 all switching of tasks.
104 121
122config BOOT_TRACER
123 bool "Trace boot initcalls"
124 depends on DEBUG_KERNEL
125 select TRACING
126 select CONTEXT_SWITCH_TRACER
127 help
128 This tracer helps developers to optimize boot times: it records
129 the timings of the initcalls and traces key events and the identity
130 of tasks that can cause boot delays, such as context-switches.
131
132 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
133 produce pretty graphics about boot inefficiencies, giving a visual
134 representation of the delays during initcalls - but the raw
135 /debug/tracing/trace text output is readable too.
136
137 ( Note that tracing self tests can't be enabled if this tracer is
138 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. )
140
141config STACK_TRACER
142 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER
144 depends on DEBUG_KERNEL
145 select FUNCTION_TRACER
146 select STACKTRACE
147 help
148 This special tracer records the maximum stack footprint of the
149 kernel and displays it in debugfs/tracing/stack_trace.
150
151 This tracer works by hooking into every function call that the
152 kernel executes, and keeping a maximum stack depth value and
153 stack-trace saved. Because this logic has to execute in every
154 kernel function, all the time, this option can slow down the
155 kernel measurably and is generally intended for kernel
156 developers only.
157
158 Say N if unsure.
159
105config DYNAMIC_FTRACE 160config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically" 161 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE 162 depends on FUNCTION_TRACER
108 depends on HAVE_DYNAMIC_FTRACE 163 depends on HAVE_DYNAMIC_FTRACE
164 depends on DEBUG_KERNEL
109 default y 165 default y
110 help 166 help
111 This option will modify all the calls to ftrace dynamically 167 This option will modify all the calls to ftrace dynamically
@@ -113,7 +169,7 @@ config DYNAMIC_FTRACE
113 with a No-Op instruction) as they are called. A table is 169 with a No-Op instruction) as they are called. A table is
114 created to dynamically enable them again. 170 created to dynamically enable them again.
115 171
116 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 172 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
117 has native performance as long as no tracing is active. 173 has native performance as long as no tracing is active.
118 174
119 The changes to the code are done by a kernel thread that 175 The changes to the code are done by a kernel thread that
@@ -121,15 +177,22 @@ config DYNAMIC_FTRACE
121 were made. If so, it runs stop_machine (stops all CPUS) 177 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace. 178 and modifies the code to jump over the call to ftrace.
123 179
180config FTRACE_MCOUNT_RECORD
181 def_bool y
182 depends on DYNAMIC_FTRACE
183 depends on HAVE_FTRACE_MCOUNT_RECORD
184
124config FTRACE_SELFTEST 185config FTRACE_SELFTEST
125 bool 186 bool
126 187
127config FTRACE_STARTUP_TEST 188config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace" 189 bool "Perform a startup test on ftrace"
129 depends on TRACING 190 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
130 select FTRACE_SELFTEST 191 select FTRACE_SELFTEST
131 help 192 help
132 This option performs a series of startup tests on ftrace. On bootup 193 This option performs a series of startup tests on ftrace. On bootup
133 a series of tests are made to verify that the tracer is 194 a series of tests are made to verify that the tracer is
134 functioning properly. It will do tests on all the configured 195 functioning properly. It will do tests on all the configured
135 tracers of ftrace. 196 tracers of ftrace.
197
198endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de17288..c8228b1a49e9 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,15 +10,19 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
14 15
15obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
18obj-$(CONFIG_FTRACE) += trace_functions.o 19obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
23obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
23 27
24libftrace-y := ftrace.o 28libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..4a39d24568c8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
@@ -81,7 +92,7 @@ void clear_ftrace_function(void)
81 92
82static int __register_ftrace_function(struct ftrace_ops *ops) 93static int __register_ftrace_function(struct ftrace_ops *ops)
83{ 94{
84 /* Should never be called by interrupts */ 95 /* should not be called from interrupt context */
85 spin_lock(&ftrace_lock); 96 spin_lock(&ftrace_lock);
86 97
87 ops->next = ftrace_list; 98 ops->next = ftrace_list;
@@ -115,6 +126,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
115 struct ftrace_ops **p; 126 struct ftrace_ops **p;
116 int ret = 0; 127 int ret = 0;
117 128
129 /* should not be called from interrupt context */
118 spin_lock(&ftrace_lock); 130 spin_lock(&ftrace_lock);
119 131
120 /* 132 /*
@@ -152,8 +164,17 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152} 164}
153 165
154#ifdef CONFIG_DYNAMIC_FTRACE 166#ifdef CONFIG_DYNAMIC_FTRACE
167#ifndef CONFIG_FTRACE_MCOUNT_RECORD
168# error Dynamic ftrace depends on MCOUNT_RECORD
169#endif
155 170
156static struct task_struct *ftraced_task; 171/*
172 * Since MCOUNT_ADDR may point to mcount itself, we do not want
173 * to get it confused by reading a reference in the code as we
174 * are parsing on objcopy output of text. Use a variable for
175 * it instead.
176 */
177static unsigned long mcount_addr = MCOUNT_ADDR;
157 178
158enum { 179enum {
159 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
@@ -165,14 +186,9 @@ enum {
165 186
166static int ftrace_filtered; 187static int ftrace_filtered;
167static int tracing_on; 188static int tracing_on;
168static int frozen_record_count;
169 189
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE]; 190static LIST_HEAD(ftrace_new_addrs);
171 191
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock); 192static DEFINE_MUTEX(ftrace_regex_lock);
177 193
178struct ftrace_page { 194struct ftrace_page {
@@ -190,16 +206,13 @@ struct ftrace_page {
190static struct ftrace_page *ftrace_pages_start; 206static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages; 207static struct ftrace_page *ftrace_pages;
192 208
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records; 209static struct dyn_ftrace *ftrace_free_records;
200 210
201 211
202#ifdef CONFIG_KPROBES 212#ifdef CONFIG_KPROBES
213
214static int frozen_record_count;
215
203static inline void freeze_record(struct dyn_ftrace *rec) 216static inline void freeze_record(struct dyn_ftrace *rec)
204{ 217{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) { 218 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -226,79 +239,36 @@ static inline int record_frozen(struct dyn_ftrace *rec)
226# define record_frozen(rec) ({ 0; }) 239# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */ 240#endif /* CONFIG_KPROBES */
228 241
229int skip_trace(unsigned long ip) 242static void ftrace_free_rec(struct dyn_ftrace *rec)
230{ 243{
231 unsigned long fl; 244 rec->ip = (unsigned long)ftrace_free_records;
232 struct dyn_ftrace *rec; 245 ftrace_free_records = rec;
233 struct hlist_node *t; 246 rec->flags |= FTRACE_FL_FREE;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264} 247}
265 248
266static inline int 249void ftrace_release(void *start, unsigned long size)
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{ 250{
269 struct dyn_ftrace *p; 251 struct dyn_ftrace *rec;
270 struct hlist_node *t; 252 struct ftrace_page *pg;
271 int found = 0; 253 unsigned long s = (unsigned long)start;
272 254 unsigned long e = s + size;
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) { 255 int i;
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282 256
283static inline void 257 if (ftrace_disabled || !start)
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key) 258 return;
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288 259
289/* called from kstop_machine */ 260 /* should not be called from interrupt context */
290static inline void ftrace_del_hash(struct dyn_ftrace *node) 261 spin_lock(&ftrace_lock);
291{
292 hlist_del(&node->node);
293}
294 262
295static void ftrace_free_rec(struct dyn_ftrace *rec) 263 for (pg = ftrace_pages_start; pg; pg = pg->next) {
296{ 264 for (i = 0; i < pg->index; i++) {
297 /* no locking, only called from kstop_machine */ 265 rec = &pg->records[i];
298 266
299 rec->ip = (unsigned long)ftrace_free_records; 267 if ((rec->ip >= s) && (rec->ip < e))
300 ftrace_free_records = rec; 268 ftrace_free_rec(rec);
301 rec->flags |= FTRACE_FL_FREE; 269 }
270 }
271 spin_unlock(&ftrace_lock);
302} 272}
303 273
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 274static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -310,10 +280,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
310 rec = ftrace_free_records; 280 rec = ftrace_free_records;
311 281
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 282 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1); 283 FTRACE_WARN_ON_ONCE(1);
314 ftrace_free_records = NULL; 284 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL; 285 return NULL;
318 } 286 }
319 287
@@ -323,79 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
323 } 291 }
324 292
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 293 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next) 294 if (!ftrace_pages->next) {
327 return NULL; 295 /* allocate another page */
296 ftrace_pages->next =
297 (void *)get_zeroed_page(GFP_KERNEL);
298 if (!ftrace_pages->next)
299 return NULL;
300 }
328 ftrace_pages = ftrace_pages->next; 301 ftrace_pages = ftrace_pages->next;
329 } 302 }
330 303
331 return &ftrace_pages->records[ftrace_pages->index++]; 304 return &ftrace_pages->records[ftrace_pages->index++];
332} 305}
333 306
334static void 307static struct dyn_ftrace *
335ftrace_record_ip(unsigned long ip) 308ftrace_record_ip(unsigned long ip)
336{ 309{
337 struct dyn_ftrace *node; 310 struct dyn_ftrace *rec;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343 311
344 if (!ftrace_enabled || ftrace_disabled) 312 if (!ftrace_enabled || ftrace_disabled)
345 return; 313 return NULL;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382 314
383 node->ip = ip; 315 rec = ftrace_alloc_dyn_node(ip);
384 316 if (!rec)
385 ftrace_add_hash(node, key); 317 return NULL;
386 318
387 ftraced_trigger = 1; 319 rec->ip = ip;
388 320
389 out_unlock: 321 list_add(&rec->list, &ftrace_new_addrs);
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393 322
394 /* prevent recursion with scheduler */ 323 return rec;
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399} 324}
400 325
401#define FTRACE_ADDR ((long)(ftrace_caller)) 326#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -514,7 +439,6 @@ static void ftrace_replace_code(int enable)
514 rec->flags |= FTRACE_FL_FAILED; 439 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) || 440 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) { 441 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec); 442 ftrace_free_rec(rec);
519 } 443 }
520 } 444 }
@@ -522,13 +446,14 @@ static void ftrace_replace_code(int enable)
522 } 446 }
523} 447}
524 448
525static void ftrace_shutdown_replenish(void) 449static void print_ip_ins(const char *fmt, unsigned char *p)
526{ 450{
527 if (ftrace_pages->next) 451 int i;
528 return; 452
453 printk(KERN_CONT "%s", fmt);
529 454
530 /* allocate another page */ 455 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); 456 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
532} 457}
533 458
534static int 459static int
@@ -536,34 +461,52 @@ ftrace_code_disable(struct dyn_ftrace *rec)
536{ 461{
537 unsigned long ip; 462 unsigned long ip;
538 unsigned char *nop, *call; 463 unsigned char *nop, *call;
539 int failed; 464 int ret;
540 465
541 ip = rec->ip; 466 ip = rec->ip;
542 467
543 nop = ftrace_nop_replace(); 468 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR); 469 call = ftrace_call_replace(ip, mcount_addr);
470
471 ret = ftrace_modify_code(ip, call, nop);
472 if (ret) {
473 switch (ret) {
474 case -EFAULT:
475 FTRACE_WARN_ON_ONCE(1);
476 pr_info("ftrace faulted on modifying ");
477 print_ip_sym(ip);
478 break;
479 case -EINVAL:
480 FTRACE_WARN_ON_ONCE(1);
481 pr_info("ftrace failed to modify ");
482 print_ip_sym(ip);
483 print_ip_ins(" expected: ", call);
484 print_ip_ins(" actual: ", (unsigned char *)ip);
485 print_ip_ins(" replace: ", nop);
486 printk(KERN_CONT "\n");
487 break;
488 case -EPERM:
489 FTRACE_WARN_ON_ONCE(1);
490 pr_info("ftrace faulted on writing ");
491 print_ip_sym(ip);
492 break;
493 default:
494 FTRACE_WARN_ON_ONCE(1);
495 pr_info("ftrace faulted on unknown error ");
496 print_ip_sym(ip);
497 }
545 498
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED; 499 rec->flags |= FTRACE_FL_FAILED;
549 return 0; 500 return 0;
550 } 501 }
551 return 1; 502 return 1;
552} 503}
553 504
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data) 505static int __ftrace_modify_code(void *data)
557{ 506{
558 unsigned long addr;
559 int *command = data; 507 int *command = data;
560 508
561 if (*command & FTRACE_ENABLE_CALLS) { 509 if (*command & FTRACE_ENABLE_CALLS) {
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1); 510 ftrace_replace_code(1);
568 tracing_on = 1; 511 tracing_on = 1;
569 } else if (*command & FTRACE_DISABLE_CALLS) { 512 } else if (*command & FTRACE_DISABLE_CALLS) {
@@ -574,14 +517,6 @@ static int __ftrace_modify_code(void *data)
574 if (*command & FTRACE_UPDATE_TRACE_FUNC) 517 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function); 518 ftrace_update_ftrace_func(ftrace_trace_function);
576 519
577 if (*command & FTRACE_ENABLE_MCOUNT) {
578 addr = (unsigned long)ftrace_record_ip;
579 ftrace_mcount_set(&addr);
580 } else if (*command & FTRACE_DISABLE_MCOUNT) {
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584
585 return 0; 520 return 0;
586} 521}
587 522
@@ -590,26 +525,9 @@ static void ftrace_run_update_code(int command)
590 stop_machine(__ftrace_modify_code, &command, NULL); 525 stop_machine(__ftrace_modify_code, &command, NULL);
591} 526}
592 527
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func; 528static ftrace_func_t saved_ftrace_func;
529static int ftrace_start;
530static DEFINE_MUTEX(ftrace_start_lock);
613 531
614static void ftrace_startup(void) 532static void ftrace_startup(void)
615{ 533{
@@ -618,9 +536,9 @@ static void ftrace_startup(void)
618 if (unlikely(ftrace_disabled)) 536 if (unlikely(ftrace_disabled))
619 return; 537 return;
620 538
621 mutex_lock(&ftraced_lock); 539 mutex_lock(&ftrace_start_lock);
622 ftraced_suspend++; 540 ftrace_start++;
623 if (ftraced_suspend == 1) 541 if (ftrace_start == 1)
624 command |= FTRACE_ENABLE_CALLS; 542 command |= FTRACE_ENABLE_CALLS;
625 543
626 if (saved_ftrace_func != ftrace_trace_function) { 544 if (saved_ftrace_func != ftrace_trace_function) {
@@ -633,7 +551,7 @@ static void ftrace_startup(void)
633 551
634 ftrace_run_update_code(command); 552 ftrace_run_update_code(command);
635 out: 553 out:
636 mutex_unlock(&ftraced_lock); 554 mutex_unlock(&ftrace_start_lock);
637} 555}
638 556
639static void ftrace_shutdown(void) 557static void ftrace_shutdown(void)
@@ -643,9 +561,9 @@ static void ftrace_shutdown(void)
643 if (unlikely(ftrace_disabled)) 561 if (unlikely(ftrace_disabled))
644 return; 562 return;
645 563
646 mutex_lock(&ftraced_lock); 564 mutex_lock(&ftrace_start_lock);
647 ftraced_suspend--; 565 ftrace_start--;
648 if (!ftraced_suspend) 566 if (!ftrace_start)
649 command |= FTRACE_DISABLE_CALLS; 567 command |= FTRACE_DISABLE_CALLS;
650 568
651 if (saved_ftrace_func != ftrace_trace_function) { 569 if (saved_ftrace_func != ftrace_trace_function) {
@@ -658,7 +576,7 @@ static void ftrace_shutdown(void)
658 576
659 ftrace_run_update_code(command); 577 ftrace_run_update_code(command);
660 out: 578 out:
661 mutex_unlock(&ftraced_lock); 579 mutex_unlock(&ftrace_start_lock);
662} 580}
663 581
664static void ftrace_startup_sysctl(void) 582static void ftrace_startup_sysctl(void)
@@ -668,15 +586,15 @@ static void ftrace_startup_sysctl(void)
668 if (unlikely(ftrace_disabled)) 586 if (unlikely(ftrace_disabled))
669 return; 587 return;
670 588
671 mutex_lock(&ftraced_lock); 589 mutex_lock(&ftrace_start_lock);
672 /* Force update next time */ 590 /* Force update next time */
673 saved_ftrace_func = NULL; 591 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */ 592 /* ftrace_start is true if we want ftrace running */
675 if (ftraced_suspend) 593 if (ftrace_start)
676 command |= FTRACE_ENABLE_CALLS; 594 command |= FTRACE_ENABLE_CALLS;
677 595
678 ftrace_run_update_code(command); 596 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock); 597 mutex_unlock(&ftrace_start_lock);
680} 598}
681 599
682static void ftrace_shutdown_sysctl(void) 600static void ftrace_shutdown_sysctl(void)
@@ -686,153 +604,51 @@ static void ftrace_shutdown_sysctl(void)
686 if (unlikely(ftrace_disabled)) 604 if (unlikely(ftrace_disabled))
687 return; 605 return;
688 606
689 mutex_lock(&ftraced_lock); 607 mutex_lock(&ftrace_start_lock);
690 /* ftraced_suspend is true if ftrace is running */ 608 /* ftrace_start is true if ftrace is running */
691 if (ftraced_suspend) 609 if (ftrace_start)
692 command |= FTRACE_DISABLE_CALLS; 610 command |= FTRACE_DISABLE_CALLS;
693 611
694 ftrace_run_update_code(command); 612 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock); 613 mutex_unlock(&ftrace_start_lock);
696} 614}
697 615
698static cycle_t ftrace_update_time; 616static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt; 617static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt; 618unsigned long ftrace_update_tot_cnt;
701 619
702static int __ftrace_update_code(void *ignore) 620static int ftrace_update_code(void)
703{ 621{
704 int i, save_ftrace_enabled; 622 struct dyn_ftrace *p, *t;
705 cycle_t start, stop; 623 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714 624
715 start = ftrace_now(raw_smp_processor_id()); 625 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0; 626 ftrace_update_cnt = 0;
717 627
718 /* No locks needed, the machine is stopped! */ 628 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722 629
723 /* all CPUS are stopped, we are safe to modify code */ 630 /* If something went wrong, bail without enabling anything */
724 hlist_for_each_entry_safe(p, t, n, head, node) { 631 if (unlikely(ftrace_disabled))
725 /* Skip over failed records which have not been 632 return -1;
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737 633
738 /* Ignore updates to this record's mcount site. 634 list_del_init(&p->list);
739 * Reintroduce this record at the head of this
740 * bucket to attempt to "convert" it again if
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752 635
753 /* convert record (i.e, patch mcount-call with NOP) */ 636 /* convert record (i.e, patch mcount-call with NOP) */
754 if (ftrace_code_disable(p)) { 637 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED; 638 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++; 639 ftrace_update_cnt++;
757 } else { 640 } else
758 if ((system_state == SYSTEM_BOOTING) || 641 ftrace_free_rec(p);
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
767 hlist_del(&p->node);
768 INIT_HLIST_NODE(&p->node);
769 hlist_add_head(&p->node, head);
770 }
771 } 642 }
772 643
773 stop = ftrace_now(raw_smp_processor_id()); 644 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start; 645 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt; 646 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780 647
781 return 0; 648 return 0;
782} 649}
783 650
784static int ftrace_update_code(void) 651static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine(__ftrace_update_code, NULL, NULL);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{ 652{
837 struct ftrace_page *pg; 653 struct ftrace_page *pg;
838 int cnt; 654 int cnt;
@@ -859,7 +675,9 @@ static int __init ftrace_dyn_table_alloc(void)
859 675
860 pg = ftrace_pages = ftrace_pages_start; 676 pg = ftrace_pages = ftrace_pages_start;
861 677
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE; 678 cnt = num_to_init / ENTRIES_PER_PAGE;
679 pr_info("ftrace: allocating %ld entries in %d pages\n",
680 num_to_init, cnt);
863 681
864 for (i = 0; i < cnt; i++) { 682 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 683 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +719,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
901 719
902 (*pos)++; 720 (*pos)++;
903 721
722 /* should not be called from interrupt context */
723 spin_lock(&ftrace_lock);
904 retry: 724 retry:
905 if (iter->idx >= iter->pg->index) { 725 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) { 726 if (iter->pg->next) {
@@ -910,15 +730,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
910 } 730 }
911 } else { 731 } else {
912 rec = &iter->pg->records[iter->idx++]; 732 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) && 733 if ((rec->flags & FTRACE_FL_FREE) ||
734
735 (!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) || 736 (rec->flags & FTRACE_FL_FAILED)) ||
915 737
916 ((iter->flags & FTRACE_ITER_FAILURES) && 738 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) || 739 !(rec->flags & FTRACE_FL_FAILED)) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922 740
923 ((iter->flags & FTRACE_ITER_NOTRACE) && 741 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) { 742 !(rec->flags & FTRACE_FL_NOTRACE))) {
@@ -926,6 +744,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
926 goto retry; 744 goto retry;
927 } 745 }
928 } 746 }
747 spin_unlock(&ftrace_lock);
929 748
930 iter->pos = *pos; 749 iter->pos = *pos;
931 750
@@ -1039,8 +858,8 @@ static void ftrace_filter_reset(int enable)
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 858 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i; 859 unsigned i;
1041 860
1042 /* keep kstop machine from running */ 861 /* should not be called from interrupt context */
1043 preempt_disable(); 862 spin_lock(&ftrace_lock);
1044 if (enable) 863 if (enable)
1045 ftrace_filtered = 0; 864 ftrace_filtered = 0;
1046 pg = ftrace_pages_start; 865 pg = ftrace_pages_start;
@@ -1053,7 +872,7 @@ static void ftrace_filter_reset(int enable)
1053 } 872 }
1054 pg = pg->next; 873 pg = pg->next;
1055 } 874 }
1056 preempt_enable(); 875 spin_unlock(&ftrace_lock);
1057} 876}
1058 877
1059static int 878static int
@@ -1165,8 +984,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
1165 } 984 }
1166 } 985 }
1167 986
1168 /* keep kstop machine from running */ 987 /* should not be called from interrupt context */
1169 preempt_disable(); 988 spin_lock(&ftrace_lock);
1170 if (enable) 989 if (enable)
1171 ftrace_filtered = 1; 990 ftrace_filtered = 1;
1172 pg = ftrace_pages_start; 991 pg = ftrace_pages_start;
@@ -1203,7 +1022,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
1203 } 1022 }
1204 pg = pg->next; 1023 pg = pg->next;
1205 } 1024 }
1206 preempt_enable(); 1025 spin_unlock(&ftrace_lock);
1207} 1026}
1208 1027
1209static ssize_t 1028static ssize_t
@@ -1366,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1366 } 1185 }
1367 1186
1368 mutex_lock(&ftrace_sysctl_lock); 1187 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock); 1188 mutex_lock(&ftrace_start_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1189 if (iter->filtered && ftrace_start && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock); 1191 mutex_unlock(&ftrace_start_lock);
1373 mutex_unlock(&ftrace_sysctl_lock); 1192 mutex_unlock(&ftrace_sysctl_lock);
1374 1193
1375 kfree(iter); 1194 kfree(iter);
@@ -1389,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1389 return ftrace_regex_release(inode, file, 0); 1208 return ftrace_regex_release(inode, file, 0);
1390} 1209}
1391 1210
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = { 1211static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open, 1212 .open = ftrace_avail_open,
1443 .read = seq_read, 1213 .read = seq_read,
@@ -1468,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
1468 .release = ftrace_notrace_release, 1238 .release = ftrace_notrace_release,
1469}; 1239};
1470 1240
1471static struct file_operations ftraced_fops = {
1472 .open = tracing_open_generic,
1473 .read = ftraced_read,
1474 .write = ftraced_write,
1475};
1476
1477/**
1478 * ftrace_force_update - force an update to all recording ftrace functions
1479 */
1480int ftrace_force_update(void)
1481{
1482 int ret = 0;
1483
1484 if (unlikely(ftrace_disabled))
1485 return -ENODEV;
1486
1487 mutex_lock(&ftrace_sysctl_lock);
1488 mutex_lock(&ftraced_lock);
1489
1490 /*
1491 * If ftraced_trigger is not set, then there is nothing
1492 * to update.
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496
1497 mutex_unlock(&ftraced_lock);
1498 mutex_unlock(&ftrace_sysctl_lock);
1499
1500 return ret;
1501}
1502
1503static void ftrace_force_shutdown(void)
1504{
1505 struct task_struct *task;
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1507
1508 mutex_lock(&ftraced_lock);
1509 task = ftraced_task;
1510 ftraced_task = NULL;
1511 ftraced_suspend = -1;
1512 ftrace_run_update_code(command);
1513 mutex_unlock(&ftraced_lock);
1514
1515 if (task)
1516 kthread_stop(task);
1517}
1518
1519static __init int ftrace_init_debugfs(void) 1241static __init int ftrace_init_debugfs(void)
1520{ 1242{
1521 struct dentry *d_tracer; 1243 struct dentry *d_tracer;
@@ -1546,97 +1268,103 @@ static __init int ftrace_init_debugfs(void)
1546 pr_warning("Could not create debugfs " 1268 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n"); 1269 "'set_ftrace_notrace' entry\n");
1548 1270
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1550 NULL, &ftraced_fops);
1551 if (!entry)
1552 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n");
1554 return 0; 1271 return 0;
1555} 1272}
1556 1273
1557fs_initcall(ftrace_init_debugfs); 1274fs_initcall(ftrace_init_debugfs);
1558 1275
1559static int __init ftrace_dynamic_init(void) 1276static int ftrace_convert_nops(unsigned long *start,
1277 unsigned long *end)
1560{ 1278{
1561 struct task_struct *p; 1279 unsigned long *p;
1562 unsigned long addr; 1280 unsigned long addr;
1281 unsigned long flags;
1282
1283 mutex_lock(&ftrace_start_lock);
1284 p = start;
1285 while (p < end) {
1286 addr = ftrace_call_adjust(*p++);
1287 ftrace_record_ip(addr);
1288 }
1289
1290 /* disable interrupts to prevent kstop machine */
1291 local_irq_save(flags);
1292 ftrace_update_code();
1293 local_irq_restore(flags);
1294 mutex_unlock(&ftrace_start_lock);
1295
1296 return 0;
1297}
1298
1299void ftrace_init_module(unsigned long *start, unsigned long *end)
1300{
1301 if (ftrace_disabled || start == end)
1302 return;
1303 ftrace_convert_nops(start, end);
1304}
1305
1306extern unsigned long __start_mcount_loc[];
1307extern unsigned long __stop_mcount_loc[];
1308
1309void __init ftrace_init(void)
1310{
1311 unsigned long count, addr, flags;
1563 int ret; 1312 int ret;
1564 1313
1565 addr = (unsigned long)ftrace_record_ip; 1314 /* Keep the ftrace pointer to the stub */
1315 addr = (unsigned long)ftrace_stub;
1566 1316
1567 stop_machine(ftrace_dyn_arch_init, &addr, NULL); 1317 local_irq_save(flags);
1318 ftrace_dyn_arch_init(&addr);
1319 local_irq_restore(flags);
1568 1320
1569 /* ftrace_dyn_arch_init places the return code in addr */ 1321 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) { 1322 if (addr)
1571 ret = (int)addr;
1572 goto failed; 1323 goto failed;
1573 }
1574 1324
1575 ret = ftrace_dyn_table_alloc(); 1325 count = __stop_mcount_loc - __start_mcount_loc;
1576 if (ret)
1577 goto failed;
1578 1326
1579 p = kthread_run(ftraced, NULL, "ftraced"); 1327 ret = ftrace_dyn_table_alloc(count);
1580 if (IS_ERR(p)) { 1328 if (ret)
1581 ret = -1;
1582 goto failed; 1329 goto failed;
1583 }
1584 1330
1585 last_ftrace_enabled = ftrace_enabled = 1; 1331 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587 1332
1588 return 0; 1333 ret = ftrace_convert_nops(__start_mcount_loc,
1334 __stop_mcount_loc);
1589 1335
1336 return;
1590 failed: 1337 failed:
1591 ftrace_disabled = 1; 1338 ftrace_disabled = 1;
1592 return ret;
1593} 1339}
1594 1340
1595core_initcall(ftrace_dynamic_init);
1596#else 1341#else
1342
1343static int __init ftrace_nodyn_init(void)
1344{
1345 ftrace_enabled = 1;
1346 return 0;
1347}
1348device_initcall(ftrace_nodyn_init);
1349
1597# define ftrace_startup() do { } while (0) 1350# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0) 1351# define ftrace_shutdown() do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0) 1352# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0) 1353# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */ 1354#endif /* CONFIG_DYNAMIC_FTRACE */
1603 1355
1604/** 1356/**
1605 * ftrace_kill_atomic - kill ftrace from critical sections 1357 * ftrace_kill - kill ftrace
1606 * 1358 *
1607 * This function should be used by panic code. It stops ftrace 1359 * This function should be used by panic code. It stops ftrace
1608 * but in a not so nice way. If you need to simply kill ftrace 1360 * but in a not so nice way. If you need to simply kill ftrace
1609 * from a non-atomic section, use ftrace_kill. 1361 * from a non-atomic section, use ftrace_kill.
1610 */ 1362 */
1611void ftrace_kill_atomic(void)
1612{
1613 ftrace_disabled = 1;
1614 ftrace_enabled = 0;
1615#ifdef CONFIG_DYNAMIC_FTRACE
1616 ftraced_suspend = -1;
1617#endif
1618 clear_ftrace_function();
1619}
1620
1621/**
1622 * ftrace_kill - totally shutdown ftrace
1623 *
1624 * This is a safety measure. If something was detected that seems
1625 * wrong, calling this function will keep ftrace from doing
1626 * any more modifications, and updates.
1627 * used when something went wrong.
1628 */
1629void ftrace_kill(void) 1363void ftrace_kill(void)
1630{ 1364{
1631 mutex_lock(&ftrace_sysctl_lock);
1632 ftrace_disabled = 1; 1365 ftrace_disabled = 1;
1633 ftrace_enabled = 0; 1366 ftrace_enabled = 0;
1634
1635 clear_ftrace_function(); 1367 clear_ftrace_function();
1636 mutex_unlock(&ftrace_sysctl_lock);
1637
1638 /* Try to totally disable ftrace */
1639 ftrace_force_shutdown();
1640} 1368}
1641 1369
1642/** 1370/**
@@ -1725,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1725 mutex_unlock(&ftrace_sysctl_lock); 1453 mutex_unlock(&ftrace_sysctl_lock);
1726 return ret; 1454 return ret;
1727} 1455}
1456
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 000000000000..3f3380638646
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2072 @@
1/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/spinlock.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/percpu.h>
12#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h>
15#include <linux/hash.h>
16#include <linux/list.h>
17#include <linux/fs.h>
18
19/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0
21
22/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu)
24{
25 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT;
27}
28
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
30{
31 /* Just stupid testing the normalize function and deltas */
32 *ts >>= DEBUG_SHIFT;
33}
34
35#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
36#define RB_ALIGNMENT_SHIFT 2
37#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
38#define RB_MAX_SMALL_DATA 28
39
40enum {
41 RB_LEN_TIME_EXTEND = 8,
42 RB_LEN_TIME_STAMP = 16,
43};
44
45/* inline for ring buffer fast paths */
46static inline unsigned
47rb_event_length(struct ring_buffer_event *event)
48{
49 unsigned length;
50
51 switch (event->type) {
52 case RINGBUF_TYPE_PADDING:
53 /* undefined */
54 return -1;
55
56 case RINGBUF_TYPE_TIME_EXTEND:
57 return RB_LEN_TIME_EXTEND;
58
59 case RINGBUF_TYPE_TIME_STAMP:
60 return RB_LEN_TIME_STAMP;
61
62 case RINGBUF_TYPE_DATA:
63 if (event->len)
64 length = event->len << RB_ALIGNMENT_SHIFT;
65 else
66 length = event->array[0];
67 return length + RB_EVNT_HDR_SIZE;
68 default:
69 BUG();
70 }
71 /* not hit */
72 return 0;
73}
74
75/**
76 * ring_buffer_event_length - return the length of the event
77 * @event: the event to get the length of
78 */
79unsigned ring_buffer_event_length(struct ring_buffer_event *event)
80{
81 return rb_event_length(event);
82}
83
84/* inline for ring buffer fast paths */
85static inline void *
86rb_event_data(struct ring_buffer_event *event)
87{
88 BUG_ON(event->type != RINGBUF_TYPE_DATA);
89 /* If length is in len field, then array[0] has the data */
90 if (event->len)
91 return (void *)&event->array[0];
92 /* Otherwise length is in array[0] and array[1] has the data */
93 return (void *)&event->array[1];
94}
95
96/**
97 * ring_buffer_event_data - return the data of the event
98 * @event: the event to get the data from
99 */
100void *ring_buffer_event_data(struct ring_buffer_event *event)
101{
102 return rb_event_data(event);
103}
104
105#define for_each_buffer_cpu(buffer, cpu) \
106 for_each_cpu_mask(cpu, buffer->cpumask)
107
108#define TS_SHIFT 27
109#define TS_MASK ((1ULL << TS_SHIFT) - 1)
110#define TS_DELTA_TEST (~TS_MASK)
111
112/*
113 * This hack stolen from mm/slob.c.
114 * We can store per page timing information in the page frame of the page.
115 * Thanks to Peter Zijlstra for suggesting this idea.
116 */
117struct buffer_page {
118 u64 time_stamp; /* page time stamp */
119 local_t write; /* index for next write */
120 local_t commit; /* write commited index */
121 unsigned read; /* index for next read */
122 struct list_head list; /* list of free pages */
123 void *page; /* Actual data page */
124};
125
126/*
127 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
128 * this issue out.
129 */
130static inline void free_buffer_page(struct buffer_page *bpage)
131{
132 if (bpage->page)
133 free_page((unsigned long)bpage->page);
134 kfree(bpage);
135}
136
137/*
138 * We need to fit the time_stamp delta into 27 bits.
139 */
140static inline int test_time_stamp(u64 delta)
141{
142 if (delta & TS_DELTA_TEST)
143 return 1;
144 return 0;
145}
146
147#define BUF_PAGE_SIZE PAGE_SIZE
148
149/*
150 * head_page == tail_page && head == tail then buffer is empty.
151 */
152struct ring_buffer_per_cpu {
153 int cpu;
154 struct ring_buffer *buffer;
155 spinlock_t lock;
156 struct lock_class_key lock_key;
157 struct list_head pages;
158 struct buffer_page *head_page; /* read from head */
159 struct buffer_page *tail_page; /* write to tail */
160 struct buffer_page *commit_page; /* commited pages */
161 struct buffer_page *reader_page;
162 unsigned long overrun;
163 unsigned long entries;
164 u64 write_stamp;
165 u64 read_stamp;
166 atomic_t record_disabled;
167};
168
169struct ring_buffer {
170 unsigned long size;
171 unsigned pages;
172 unsigned flags;
173 int cpus;
174 cpumask_t cpumask;
175 atomic_t record_disabled;
176
177 struct mutex mutex;
178
179 struct ring_buffer_per_cpu **buffers;
180};
181
182struct ring_buffer_iter {
183 struct ring_buffer_per_cpu *cpu_buffer;
184 unsigned long head;
185 struct buffer_page *head_page;
186 u64 read_stamp;
187};
188
189#define RB_WARN_ON(buffer, cond) \
190 do { \
191 if (unlikely(cond)) { \
192 atomic_inc(&buffer->record_disabled); \
193 WARN_ON(1); \
194 } \
195 } while (0)
196
197#define RB_WARN_ON_RET(buffer, cond) \
198 do { \
199 if (unlikely(cond)) { \
200 atomic_inc(&buffer->record_disabled); \
201 WARN_ON(1); \
202 return -1; \
203 } \
204 } while (0)
205
206#define RB_WARN_ON_ONCE(buffer, cond) \
207 do { \
208 static int once; \
209 if (unlikely(cond) && !once) { \
210 once++; \
211 atomic_inc(&buffer->record_disabled); \
212 WARN_ON(1); \
213 } \
214 } while (0)
215
216/**
217 * check_pages - integrity check of buffer pages
218 * @cpu_buffer: CPU buffer with pages to test
219 *
220 * As a safty measure we check to make sure the data pages have not
221 * been corrupted.
222 */
223static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
224{
225 struct list_head *head = &cpu_buffer->pages;
226 struct buffer_page *page, *tmp;
227
228 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
229 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
230
231 list_for_each_entry_safe(page, tmp, head, list) {
232 RB_WARN_ON_RET(cpu_buffer,
233 page->list.next->prev != &page->list);
234 RB_WARN_ON_RET(cpu_buffer,
235 page->list.prev->next != &page->list);
236 }
237
238 return 0;
239}
240
241static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
242 unsigned nr_pages)
243{
244 struct list_head *head = &cpu_buffer->pages;
245 struct buffer_page *page, *tmp;
246 unsigned long addr;
247 LIST_HEAD(pages);
248 unsigned i;
249
250 for (i = 0; i < nr_pages; i++) {
251 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
252 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
253 if (!page)
254 goto free_pages;
255 list_add(&page->list, &pages);
256
257 addr = __get_free_page(GFP_KERNEL);
258 if (!addr)
259 goto free_pages;
260 page->page = (void *)addr;
261 }
262
263 list_splice(&pages, head);
264
265 rb_check_pages(cpu_buffer);
266
267 return 0;
268
269 free_pages:
270 list_for_each_entry_safe(page, tmp, &pages, list) {
271 list_del_init(&page->list);
272 free_buffer_page(page);
273 }
274 return -ENOMEM;
275}
276
277static struct ring_buffer_per_cpu *
278rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
279{
280 struct ring_buffer_per_cpu *cpu_buffer;
281 struct buffer_page *page;
282 unsigned long addr;
283 int ret;
284
285 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
286 GFP_KERNEL, cpu_to_node(cpu));
287 if (!cpu_buffer)
288 return NULL;
289
290 cpu_buffer->cpu = cpu;
291 cpu_buffer->buffer = buffer;
292 spin_lock_init(&cpu_buffer->lock);
293 INIT_LIST_HEAD(&cpu_buffer->pages);
294
295 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
296 GFP_KERNEL, cpu_to_node(cpu));
297 if (!page)
298 goto fail_free_buffer;
299
300 cpu_buffer->reader_page = page;
301 addr = __get_free_page(GFP_KERNEL);
302 if (!addr)
303 goto fail_free_reader;
304 page->page = (void *)addr;
305
306 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
307
308 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
309 if (ret < 0)
310 goto fail_free_reader;
311
312 cpu_buffer->head_page
313 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
314 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
315
316 return cpu_buffer;
317
318 fail_free_reader:
319 free_buffer_page(cpu_buffer->reader_page);
320
321 fail_free_buffer:
322 kfree(cpu_buffer);
323 return NULL;
324}
325
326static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
327{
328 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *page, *tmp;
330
331 list_del_init(&cpu_buffer->reader_page->list);
332 free_buffer_page(cpu_buffer->reader_page);
333
334 list_for_each_entry_safe(page, tmp, head, list) {
335 list_del_init(&page->list);
336 free_buffer_page(page);
337 }
338 kfree(cpu_buffer);
339}
340
341/*
342 * Causes compile errors if the struct buffer_page gets bigger
343 * than the struct page.
344 */
345extern int ring_buffer_page_too_big(void);
346
347/**
348 * ring_buffer_alloc - allocate a new ring_buffer
349 * @size: the size in bytes that is needed.
350 * @flags: attributes to set for the ring buffer.
351 *
352 * Currently the only flag that is available is the RB_FL_OVERWRITE
353 * flag. This flag means that the buffer will overwrite old data
354 * when the buffer wraps. If this flag is not set, the buffer will
355 * drop data when the tail hits the head.
356 */
357struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
358{
359 struct ring_buffer *buffer;
360 int bsize;
361 int cpu;
362
363 /* Paranoid! Optimizes out when all is well */
364 if (sizeof(struct buffer_page) > sizeof(struct page))
365 ring_buffer_page_too_big();
366
367
368 /* keep it in its own cache line */
369 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
370 GFP_KERNEL);
371 if (!buffer)
372 return NULL;
373
374 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
375 buffer->flags = flags;
376
377 /* need at least two pages */
378 if (buffer->pages == 1)
379 buffer->pages++;
380
381 buffer->cpumask = cpu_possible_map;
382 buffer->cpus = nr_cpu_ids;
383
384 bsize = sizeof(void *) * nr_cpu_ids;
385 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
386 GFP_KERNEL);
387 if (!buffer->buffers)
388 goto fail_free_buffer;
389
390 for_each_buffer_cpu(buffer, cpu) {
391 buffer->buffers[cpu] =
392 rb_allocate_cpu_buffer(buffer, cpu);
393 if (!buffer->buffers[cpu])
394 goto fail_free_buffers;
395 }
396
397 mutex_init(&buffer->mutex);
398
399 return buffer;
400
401 fail_free_buffers:
402 for_each_buffer_cpu(buffer, cpu) {
403 if (buffer->buffers[cpu])
404 rb_free_cpu_buffer(buffer->buffers[cpu]);
405 }
406 kfree(buffer->buffers);
407
408 fail_free_buffer:
409 kfree(buffer);
410 return NULL;
411}
412
413/**
414 * ring_buffer_free - free a ring buffer.
415 * @buffer: the buffer to free.
416 */
417void
418ring_buffer_free(struct ring_buffer *buffer)
419{
420 int cpu;
421
422 for_each_buffer_cpu(buffer, cpu)
423 rb_free_cpu_buffer(buffer->buffers[cpu]);
424
425 kfree(buffer);
426}
427
428static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
429
430static void
431rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
432{
433 struct buffer_page *page;
434 struct list_head *p;
435 unsigned i;
436
437 atomic_inc(&cpu_buffer->record_disabled);
438 synchronize_sched();
439
440 for (i = 0; i < nr_pages; i++) {
441 BUG_ON(list_empty(&cpu_buffer->pages));
442 p = cpu_buffer->pages.next;
443 page = list_entry(p, struct buffer_page, list);
444 list_del_init(&page->list);
445 free_buffer_page(page);
446 }
447 BUG_ON(list_empty(&cpu_buffer->pages));
448
449 rb_reset_cpu(cpu_buffer);
450
451 rb_check_pages(cpu_buffer);
452
453 atomic_dec(&cpu_buffer->record_disabled);
454
455}
456
457static void
458rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
459 struct list_head *pages, unsigned nr_pages)
460{
461 struct buffer_page *page;
462 struct list_head *p;
463 unsigned i;
464
465 atomic_inc(&cpu_buffer->record_disabled);
466 synchronize_sched();
467
468 for (i = 0; i < nr_pages; i++) {
469 BUG_ON(list_empty(pages));
470 p = pages->next;
471 page = list_entry(p, struct buffer_page, list);
472 list_del_init(&page->list);
473 list_add_tail(&page->list, &cpu_buffer->pages);
474 }
475 rb_reset_cpu(cpu_buffer);
476
477 rb_check_pages(cpu_buffer);
478
479 atomic_dec(&cpu_buffer->record_disabled);
480}
481
482/**
483 * ring_buffer_resize - resize the ring buffer
484 * @buffer: the buffer to resize.
485 * @size: the new size.
486 *
487 * The tracer is responsible for making sure that the buffer is
488 * not being used while changing the size.
489 * Note: We may be able to change the above requirement by using
490 * RCU synchronizations.
491 *
492 * Minimum size is 2 * BUF_PAGE_SIZE.
493 *
494 * Returns -1 on failure.
495 */
496int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
497{
498 struct ring_buffer_per_cpu *cpu_buffer;
499 unsigned nr_pages, rm_pages, new_pages;
500 struct buffer_page *page, *tmp;
501 unsigned long buffer_size;
502 unsigned long addr;
503 LIST_HEAD(pages);
504 int i, cpu;
505
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE;
509
510 /* we need a minimum of two pages */
511 if (size < BUF_PAGE_SIZE * 2)
512 size = BUF_PAGE_SIZE * 2;
513
514 if (size == buffer_size)
515 return size;
516
517 mutex_lock(&buffer->mutex);
518
519 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
520
521 if (size < buffer_size) {
522
523 /* easy case, just free pages */
524 BUG_ON(nr_pages >= buffer->pages);
525
526 rm_pages = buffer->pages - nr_pages;
527
528 for_each_buffer_cpu(buffer, cpu) {
529 cpu_buffer = buffer->buffers[cpu];
530 rb_remove_pages(cpu_buffer, rm_pages);
531 }
532 goto out;
533 }
534
535 /*
536 * This is a bit more difficult. We only want to add pages
537 * when we can allocate enough for all CPUs. We do this
538 * by allocating all the pages and storing them on a local
539 * link list. If we succeed in our allocation, then we
540 * add these pages to the cpu_buffers. Otherwise we just free
541 * them all and return -ENOMEM;
542 */
543 BUG_ON(nr_pages <= buffer->pages);
544 new_pages = nr_pages - buffer->pages;
545
546 for_each_buffer_cpu(buffer, cpu) {
547 for (i = 0; i < new_pages; i++) {
548 page = kzalloc_node(ALIGN(sizeof(*page),
549 cache_line_size()),
550 GFP_KERNEL, cpu_to_node(cpu));
551 if (!page)
552 goto free_pages;
553 list_add(&page->list, &pages);
554 addr = __get_free_page(GFP_KERNEL);
555 if (!addr)
556 goto free_pages;
557 page->page = (void *)addr;
558 }
559 }
560
561 for_each_buffer_cpu(buffer, cpu) {
562 cpu_buffer = buffer->buffers[cpu];
563 rb_insert_pages(cpu_buffer, &pages, new_pages);
564 }
565
566 BUG_ON(!list_empty(&pages));
567
568 out:
569 buffer->pages = nr_pages;
570 mutex_unlock(&buffer->mutex);
571
572 return size;
573
574 free_pages:
575 list_for_each_entry_safe(page, tmp, &pages, list) {
576 list_del_init(&page->list);
577 free_buffer_page(page);
578 }
579 return -ENOMEM;
580}
581
582static inline int rb_null_event(struct ring_buffer_event *event)
583{
584 return event->type == RINGBUF_TYPE_PADDING;
585}
586
587static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
588{
589 return page->page + index;
590}
591
592static inline struct ring_buffer_event *
593rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
594{
595 return __rb_page_index(cpu_buffer->reader_page,
596 cpu_buffer->reader_page->read);
597}
598
599static inline struct ring_buffer_event *
600rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
601{
602 return __rb_page_index(cpu_buffer->head_page,
603 cpu_buffer->head_page->read);
604}
605
606static inline struct ring_buffer_event *
607rb_iter_head_event(struct ring_buffer_iter *iter)
608{
609 return __rb_page_index(iter->head_page, iter->head);
610}
611
612static inline unsigned rb_page_write(struct buffer_page *bpage)
613{
614 return local_read(&bpage->write);
615}
616
617static inline unsigned rb_page_commit(struct buffer_page *bpage)
618{
619 return local_read(&bpage->commit);
620}
621
622/* Size is determined by what has been commited */
623static inline unsigned rb_page_size(struct buffer_page *bpage)
624{
625 return rb_page_commit(bpage);
626}
627
628static inline unsigned
629rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
630{
631 return rb_page_commit(cpu_buffer->commit_page);
632}
633
634static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
635{
636 return rb_page_commit(cpu_buffer->head_page);
637}
638
639/*
640 * When the tail hits the head and the buffer is in overwrite mode,
641 * the head jumps to the next page and all content on the previous
642 * page is discarded. But before doing so, we update the overrun
643 * variable of the buffer.
644 */
645static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
646{
647 struct ring_buffer_event *event;
648 unsigned long head;
649
650 for (head = 0; head < rb_head_size(cpu_buffer);
651 head += rb_event_length(event)) {
652
653 event = __rb_page_index(cpu_buffer->head_page, head);
654 BUG_ON(rb_null_event(event));
655 /* Only count data entries */
656 if (event->type != RINGBUF_TYPE_DATA)
657 continue;
658 cpu_buffer->overrun++;
659 cpu_buffer->entries--;
660 }
661}
662
663static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
664 struct buffer_page **page)
665{
666 struct list_head *p = (*page)->list.next;
667
668 if (p == &cpu_buffer->pages)
669 p = p->next;
670
671 *page = list_entry(p, struct buffer_page, list);
672}
673
674static inline unsigned
675rb_event_index(struct ring_buffer_event *event)
676{
677 unsigned long addr = (unsigned long)event;
678
679 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
680}
681
682static inline int
683rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
684 struct ring_buffer_event *event)
685{
686 unsigned long addr = (unsigned long)event;
687 unsigned long index;
688
689 index = rb_event_index(event);
690 addr &= PAGE_MASK;
691
692 return cpu_buffer->commit_page->page == (void *)addr &&
693 rb_commit_index(cpu_buffer) == index;
694}
695
696static inline void
697rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
698 struct ring_buffer_event *event)
699{
700 unsigned long addr = (unsigned long)event;
701 unsigned long index;
702
703 index = rb_event_index(event);
704 addr &= PAGE_MASK;
705
706 while (cpu_buffer->commit_page->page != (void *)addr) {
707 RB_WARN_ON(cpu_buffer,
708 cpu_buffer->commit_page == cpu_buffer->tail_page);
709 cpu_buffer->commit_page->commit =
710 cpu_buffer->commit_page->write;
711 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
712 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
713 }
714
715 /* Now set the commit to the event's index */
716 local_set(&cpu_buffer->commit_page->commit, index);
717}
718
719static inline void
720rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
721{
722 /*
723 * We only race with interrupts and NMIs on this CPU.
724 * If we own the commit event, then we can commit
725 * all others that interrupted us, since the interruptions
726 * are in stack format (they finish before they come
727 * back to us). This allows us to do a simple loop to
728 * assign the commit to the tail.
729 */
730 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
731 cpu_buffer->commit_page->commit =
732 cpu_buffer->commit_page->write;
733 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
734 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
735 /* add barrier to keep gcc from optimizing too much */
736 barrier();
737 }
738 while (rb_commit_index(cpu_buffer) !=
739 rb_page_write(cpu_buffer->commit_page)) {
740 cpu_buffer->commit_page->commit =
741 cpu_buffer->commit_page->write;
742 barrier();
743 }
744}
745
746static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
747{
748 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
749 cpu_buffer->reader_page->read = 0;
750}
751
752static inline void rb_inc_iter(struct ring_buffer_iter *iter)
753{
754 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
755
756 /*
757 * The iterator could be on the reader page (it starts there).
758 * But the head could have moved, since the reader was
759 * found. Check for this case and assign the iterator
760 * to the head page instead of next.
761 */
762 if (iter->head_page == cpu_buffer->reader_page)
763 iter->head_page = cpu_buffer->head_page;
764 else
765 rb_inc_page(cpu_buffer, &iter->head_page);
766
767 iter->read_stamp = iter->head_page->time_stamp;
768 iter->head = 0;
769}
770
771/**
772 * ring_buffer_update_event - update event type and data
773 * @event: the even to update
774 * @type: the type of event
775 * @length: the size of the event field in the ring buffer
776 *
777 * Update the type and data fields of the event. The length
778 * is the actual size that is written to the ring buffer,
779 * and with this, we can determine what to place into the
780 * data field.
781 */
782static inline void
783rb_update_event(struct ring_buffer_event *event,
784 unsigned type, unsigned length)
785{
786 event->type = type;
787
788 switch (type) {
789
790 case RINGBUF_TYPE_PADDING:
791 break;
792
793 case RINGBUF_TYPE_TIME_EXTEND:
794 event->len =
795 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
796 >> RB_ALIGNMENT_SHIFT;
797 break;
798
799 case RINGBUF_TYPE_TIME_STAMP:
800 event->len =
801 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
802 >> RB_ALIGNMENT_SHIFT;
803 break;
804
805 case RINGBUF_TYPE_DATA:
806 length -= RB_EVNT_HDR_SIZE;
807 if (length > RB_MAX_SMALL_DATA) {
808 event->len = 0;
809 event->array[0] = length;
810 } else
811 event->len =
812 (length + (RB_ALIGNMENT-1))
813 >> RB_ALIGNMENT_SHIFT;
814 break;
815 default:
816 BUG();
817 }
818}
819
820static inline unsigned rb_calculate_event_length(unsigned length)
821{
822 struct ring_buffer_event event; /* Used only for sizeof array */
823
824 /* zero length can cause confusions */
825 if (!length)
826 length = 1;
827
828 if (length > RB_MAX_SMALL_DATA)
829 length += sizeof(event.array[0]);
830
831 length += RB_EVNT_HDR_SIZE;
832 length = ALIGN(length, RB_ALIGNMENT);
833
834 return length;
835}
836
837static struct ring_buffer_event *
838__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
839 unsigned type, unsigned long length, u64 *ts)
840{
841 struct buffer_page *tail_page, *head_page, *reader_page;
842 unsigned long tail, write;
843 struct ring_buffer *buffer = cpu_buffer->buffer;
844 struct ring_buffer_event *event;
845 unsigned long flags;
846
847 tail_page = cpu_buffer->tail_page;
848 write = local_add_return(length, &tail_page->write);
849 tail = write - length;
850
851 /* See if we shot pass the end of this buffer page */
852 if (write > BUF_PAGE_SIZE) {
853 struct buffer_page *next_page = tail_page;
854
855 spin_lock_irqsave(&cpu_buffer->lock, flags);
856
857 rb_inc_page(cpu_buffer, &next_page);
858
859 head_page = cpu_buffer->head_page;
860 reader_page = cpu_buffer->reader_page;
861
862 /* we grabbed the lock before incrementing */
863 RB_WARN_ON(cpu_buffer, next_page == reader_page);
864
865 /*
866 * If for some reason, we had an interrupt storm that made
867 * it all the way around the buffer, bail, and warn
868 * about it.
869 */
870 if (unlikely(next_page == cpu_buffer->commit_page)) {
871 WARN_ON_ONCE(1);
872 goto out_unlock;
873 }
874
875 if (next_page == head_page) {
876 if (!(buffer->flags & RB_FL_OVERWRITE)) {
877 /* reset write */
878 if (tail <= BUF_PAGE_SIZE)
879 local_set(&tail_page->write, tail);
880 goto out_unlock;
881 }
882
883 /* tail_page has not moved yet? */
884 if (tail_page == cpu_buffer->tail_page) {
885 /* count overflows */
886 rb_update_overflow(cpu_buffer);
887
888 rb_inc_page(cpu_buffer, &head_page);
889 cpu_buffer->head_page = head_page;
890 cpu_buffer->head_page->read = 0;
891 }
892 }
893
894 /*
895 * If the tail page is still the same as what we think
896 * it is, then it is up to us to update the tail
897 * pointer.
898 */
899 if (tail_page == cpu_buffer->tail_page) {
900 local_set(&next_page->write, 0);
901 local_set(&next_page->commit, 0);
902 cpu_buffer->tail_page = next_page;
903
904 /* reread the time stamp */
905 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
906 cpu_buffer->tail_page->time_stamp = *ts;
907 }
908
909 /*
910 * The actual tail page has moved forward.
911 */
912 if (tail < BUF_PAGE_SIZE) {
913 /* Mark the rest of the page with padding */
914 event = __rb_page_index(tail_page, tail);
915 event->type = RINGBUF_TYPE_PADDING;
916 }
917
918 if (tail <= BUF_PAGE_SIZE)
919 /* Set the write back to the previous setting */
920 local_set(&tail_page->write, tail);
921
922 /*
923 * If this was a commit entry that failed,
924 * increment that too
925 */
926 if (tail_page == cpu_buffer->commit_page &&
927 tail == rb_commit_index(cpu_buffer)) {
928 rb_set_commit_to_write(cpu_buffer);
929 }
930
931 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
932
933 /* fail and let the caller try again */
934 return ERR_PTR(-EAGAIN);
935 }
936
937 /* We reserved something on the buffer */
938
939 BUG_ON(write > BUF_PAGE_SIZE);
940
941 event = __rb_page_index(tail_page, tail);
942 rb_update_event(event, type, length);
943
944 /*
945 * If this is a commit and the tail is zero, then update
946 * this page's time stamp.
947 */
948 if (!tail && rb_is_commit(cpu_buffer, event))
949 cpu_buffer->commit_page->time_stamp = *ts;
950
951 return event;
952
953 out_unlock:
954 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
955 return NULL;
956}
957
958static int
959rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
960 u64 *ts, u64 *delta)
961{
962 struct ring_buffer_event *event;
963 static int once;
964 int ret;
965
966 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n",
969 (unsigned long long)*delta,
970 (unsigned long long)*ts,
971 (unsigned long long)cpu_buffer->write_stamp);
972 WARN_ON(1);
973 }
974
975 /*
976 * The delta is too big, we to add a
977 * new timestamp.
978 */
979 event = __rb_reserve_next(cpu_buffer,
980 RINGBUF_TYPE_TIME_EXTEND,
981 RB_LEN_TIME_EXTEND,
982 ts);
983 if (!event)
984 return -EBUSY;
985
986 if (PTR_ERR(event) == -EAGAIN)
987 return -EAGAIN;
988
989 /* Only a commited time event can update the write stamp */
990 if (rb_is_commit(cpu_buffer, event)) {
991 /*
992 * If this is the first on the page, then we need to
993 * update the page itself, and just put in a zero.
994 */
995 if (rb_event_index(event)) {
996 event->time_delta = *delta & TS_MASK;
997 event->array[0] = *delta >> TS_SHIFT;
998 } else {
999 cpu_buffer->commit_page->time_stamp = *ts;
1000 event->time_delta = 0;
1001 event->array[0] = 0;
1002 }
1003 cpu_buffer->write_stamp = *ts;
1004 /* let the caller know this was the commit */
1005 ret = 1;
1006 } else {
1007 /* Darn, this is just wasted space */
1008 event->time_delta = 0;
1009 event->array[0] = 0;
1010 ret = 0;
1011 }
1012
1013 *delta = 0;
1014
1015 return ret;
1016}
1017
1018static struct ring_buffer_event *
1019rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1020 unsigned type, unsigned long length)
1021{
1022 struct ring_buffer_event *event;
1023 u64 ts, delta;
1024 int commit = 0;
1025 int nr_loops = 0;
1026
1027 again:
1028 /*
1029 * We allow for interrupts to reenter here and do a trace.
1030 * If one does, it will cause this original code to loop
1031 * back here. Even with heavy interrupts happening, this
1032 * should only happen a few times in a row. If this happens
1033 * 1000 times in a row, there must be either an interrupt
1034 * storm or we have something buggy.
1035 * Bail!
1036 */
1037 if (unlikely(++nr_loops > 1000)) {
1038 RB_WARN_ON(cpu_buffer, 1);
1039 return NULL;
1040 }
1041
1042 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1043
1044 /*
1045 * Only the first commit can update the timestamp.
1046 * Yes there is a race here. If an interrupt comes in
1047 * just after the conditional and it traces too, then it
1048 * will also check the deltas. More than one timestamp may
1049 * also be made. But only the entry that did the actual
1050 * commit will be something other than zero.
1051 */
1052 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1053 rb_page_write(cpu_buffer->tail_page) ==
1054 rb_commit_index(cpu_buffer)) {
1055
1056 delta = ts - cpu_buffer->write_stamp;
1057
1058 /* make sure this delta is calculated here */
1059 barrier();
1060
1061 /* Did the write stamp get updated already? */
1062 if (unlikely(ts < cpu_buffer->write_stamp))
1063 goto again;
1064
1065 if (test_time_stamp(delta)) {
1066
1067 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1068
1069 if (commit == -EBUSY)
1070 return NULL;
1071
1072 if (commit == -EAGAIN)
1073 goto again;
1074
1075 RB_WARN_ON(cpu_buffer, commit < 0);
1076 }
1077 } else
1078 /* Non commits have zero deltas */
1079 delta = 0;
1080
1081 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1082 if (PTR_ERR(event) == -EAGAIN)
1083 goto again;
1084
1085 if (!event) {
1086 if (unlikely(commit))
1087 /*
1088 * Ouch! We needed a timestamp and it was commited. But
1089 * we didn't get our event reserved.
1090 */
1091 rb_set_commit_to_write(cpu_buffer);
1092 return NULL;
1093 }
1094
1095 /*
1096 * If the timestamp was commited, make the commit our entry
1097 * now so that we will update it when needed.
1098 */
1099 if (commit)
1100 rb_set_commit_event(cpu_buffer, event);
1101 else if (!rb_is_commit(cpu_buffer, event))
1102 delta = 0;
1103
1104 event->time_delta = delta;
1105
1106 return event;
1107}
1108
1109static DEFINE_PER_CPU(int, rb_need_resched);
1110
1111/**
1112 * ring_buffer_lock_reserve - reserve a part of the buffer
1113 * @buffer: the ring buffer to reserve from
1114 * @length: the length of the data to reserve (excluding event header)
1115 * @flags: a pointer to save the interrupt flags
1116 *
1117 * Returns a reseverd event on the ring buffer to copy directly to.
1118 * The user of this interface will need to get the body to write into
1119 * and can use the ring_buffer_event_data() interface.
1120 *
1121 * The length is the length of the data needed, not the event length
1122 * which also includes the event header.
1123 *
1124 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1125 * If NULL is returned, then nothing has been allocated or locked.
1126 */
1127struct ring_buffer_event *
1128ring_buffer_lock_reserve(struct ring_buffer *buffer,
1129 unsigned long length,
1130 unsigned long *flags)
1131{
1132 struct ring_buffer_per_cpu *cpu_buffer;
1133 struct ring_buffer_event *event;
1134 int cpu, resched;
1135
1136 if (atomic_read(&buffer->record_disabled))
1137 return NULL;
1138
1139 /* If we are tracing schedule, we don't want to recurse */
1140 resched = need_resched();
1141 preempt_disable_notrace();
1142
1143 cpu = raw_smp_processor_id();
1144
1145 if (!cpu_isset(cpu, buffer->cpumask))
1146 goto out;
1147
1148 cpu_buffer = buffer->buffers[cpu];
1149
1150 if (atomic_read(&cpu_buffer->record_disabled))
1151 goto out;
1152
1153 length = rb_calculate_event_length(length);
1154 if (length > BUF_PAGE_SIZE)
1155 goto out;
1156
1157 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1158 if (!event)
1159 goto out;
1160
1161 /*
1162 * Need to store resched state on this cpu.
1163 * Only the first needs to.
1164 */
1165
1166 if (preempt_count() == 1)
1167 per_cpu(rb_need_resched, cpu) = resched;
1168
1169 return event;
1170
1171 out:
1172 if (resched)
1173 preempt_enable_notrace();
1174 else
1175 preempt_enable_notrace();
1176 return NULL;
1177}
1178
1179static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1180 struct ring_buffer_event *event)
1181{
1182 cpu_buffer->entries++;
1183
1184 /* Only process further if we own the commit */
1185 if (!rb_is_commit(cpu_buffer, event))
1186 return;
1187
1188 cpu_buffer->write_stamp += event->time_delta;
1189
1190 rb_set_commit_to_write(cpu_buffer);
1191}
1192
1193/**
1194 * ring_buffer_unlock_commit - commit a reserved
1195 * @buffer: The buffer to commit to
1196 * @event: The event pointer to commit.
1197 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1198 *
1199 * This commits the data to the ring buffer, and releases any locks held.
1200 *
1201 * Must be paired with ring_buffer_lock_reserve.
1202 */
1203int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1204 struct ring_buffer_event *event,
1205 unsigned long flags)
1206{
1207 struct ring_buffer_per_cpu *cpu_buffer;
1208 int cpu = raw_smp_processor_id();
1209
1210 cpu_buffer = buffer->buffers[cpu];
1211
1212 rb_commit(cpu_buffer, event);
1213
1214 /*
1215 * Only the last preempt count needs to restore preemption.
1216 */
1217 if (preempt_count() == 1) {
1218 if (per_cpu(rb_need_resched, cpu))
1219 preempt_enable_no_resched_notrace();
1220 else
1221 preempt_enable_notrace();
1222 } else
1223 preempt_enable_no_resched_notrace();
1224
1225 return 0;
1226}
1227
1228/**
1229 * ring_buffer_write - write data to the buffer without reserving
1230 * @buffer: The ring buffer to write to.
1231 * @length: The length of the data being written (excluding the event header)
1232 * @data: The data to write to the buffer.
1233 *
1234 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1235 * one function. If you already have the data to write to the buffer, it
1236 * may be easier to simply call this function.
1237 *
1238 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1239 * and not the length of the event which would hold the header.
1240 */
1241int ring_buffer_write(struct ring_buffer *buffer,
1242 unsigned long length,
1243 void *data)
1244{
1245 struct ring_buffer_per_cpu *cpu_buffer;
1246 struct ring_buffer_event *event;
1247 unsigned long event_length;
1248 void *body;
1249 int ret = -EBUSY;
1250 int cpu, resched;
1251
1252 if (atomic_read(&buffer->record_disabled))
1253 return -EBUSY;
1254
1255 resched = need_resched();
1256 preempt_disable_notrace();
1257
1258 cpu = raw_smp_processor_id();
1259
1260 if (!cpu_isset(cpu, buffer->cpumask))
1261 goto out;
1262
1263 cpu_buffer = buffer->buffers[cpu];
1264
1265 if (atomic_read(&cpu_buffer->record_disabled))
1266 goto out;
1267
1268 event_length = rb_calculate_event_length(length);
1269 event = rb_reserve_next_event(cpu_buffer,
1270 RINGBUF_TYPE_DATA, event_length);
1271 if (!event)
1272 goto out;
1273
1274 body = rb_event_data(event);
1275
1276 memcpy(body, data, length);
1277
1278 rb_commit(cpu_buffer, event);
1279
1280 ret = 0;
1281 out:
1282 if (resched)
1283 preempt_enable_no_resched_notrace();
1284 else
1285 preempt_enable_notrace();
1286
1287 return ret;
1288}
1289
1290static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1291{
1292 struct buffer_page *reader = cpu_buffer->reader_page;
1293 struct buffer_page *head = cpu_buffer->head_page;
1294 struct buffer_page *commit = cpu_buffer->commit_page;
1295
1296 return reader->read == rb_page_commit(reader) &&
1297 (commit == reader ||
1298 (commit == head &&
1299 head->read == rb_page_commit(commit)));
1300}
1301
1302/**
1303 * ring_buffer_record_disable - stop all writes into the buffer
1304 * @buffer: The ring buffer to stop writes to.
1305 *
1306 * This prevents all writes to the buffer. Any attempt to write
1307 * to the buffer after this will fail and return NULL.
1308 *
1309 * The caller should call synchronize_sched() after this.
1310 */
1311void ring_buffer_record_disable(struct ring_buffer *buffer)
1312{
1313 atomic_inc(&buffer->record_disabled);
1314}
1315
1316/**
1317 * ring_buffer_record_enable - enable writes to the buffer
1318 * @buffer: The ring buffer to enable writes
1319 *
1320 * Note, multiple disables will need the same number of enables
1321 * to truely enable the writing (much like preempt_disable).
1322 */
1323void ring_buffer_record_enable(struct ring_buffer *buffer)
1324{
1325 atomic_dec(&buffer->record_disabled);
1326}
1327
1328/**
1329 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1330 * @buffer: The ring buffer to stop writes to.
1331 * @cpu: The CPU buffer to stop
1332 *
1333 * This prevents all writes to the buffer. Any attempt to write
1334 * to the buffer after this will fail and return NULL.
1335 *
1336 * The caller should call synchronize_sched() after this.
1337 */
1338void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1339{
1340 struct ring_buffer_per_cpu *cpu_buffer;
1341
1342 if (!cpu_isset(cpu, buffer->cpumask))
1343 return;
1344
1345 cpu_buffer = buffer->buffers[cpu];
1346 atomic_inc(&cpu_buffer->record_disabled);
1347}
1348
1349/**
1350 * ring_buffer_record_enable_cpu - enable writes to the buffer
1351 * @buffer: The ring buffer to enable writes
1352 * @cpu: The CPU to enable.
1353 *
1354 * Note, multiple disables will need the same number of enables
1355 * to truely enable the writing (much like preempt_disable).
1356 */
1357void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1358{
1359 struct ring_buffer_per_cpu *cpu_buffer;
1360
1361 if (!cpu_isset(cpu, buffer->cpumask))
1362 return;
1363
1364 cpu_buffer = buffer->buffers[cpu];
1365 atomic_dec(&cpu_buffer->record_disabled);
1366}
1367
1368/**
1369 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1370 * @buffer: The ring buffer
1371 * @cpu: The per CPU buffer to get the entries from.
1372 */
1373unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1374{
1375 struct ring_buffer_per_cpu *cpu_buffer;
1376
1377 if (!cpu_isset(cpu, buffer->cpumask))
1378 return 0;
1379
1380 cpu_buffer = buffer->buffers[cpu];
1381 return cpu_buffer->entries;
1382}
1383
1384/**
1385 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1386 * @buffer: The ring buffer
1387 * @cpu: The per CPU buffer to get the number of overruns from
1388 */
1389unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1390{
1391 struct ring_buffer_per_cpu *cpu_buffer;
1392
1393 if (!cpu_isset(cpu, buffer->cpumask))
1394 return 0;
1395
1396 cpu_buffer = buffer->buffers[cpu];
1397 return cpu_buffer->overrun;
1398}
1399
1400/**
1401 * ring_buffer_entries - get the number of entries in a buffer
1402 * @buffer: The ring buffer
1403 *
1404 * Returns the total number of entries in the ring buffer
1405 * (all CPU entries)
1406 */
1407unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1408{
1409 struct ring_buffer_per_cpu *cpu_buffer;
1410 unsigned long entries = 0;
1411 int cpu;
1412
1413 /* if you care about this being correct, lock the buffer */
1414 for_each_buffer_cpu(buffer, cpu) {
1415 cpu_buffer = buffer->buffers[cpu];
1416 entries += cpu_buffer->entries;
1417 }
1418
1419 return entries;
1420}
1421
1422/**
1423 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1424 * @buffer: The ring buffer
1425 *
1426 * Returns the total number of overruns in the ring buffer
1427 * (all CPU entries)
1428 */
1429unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1430{
1431 struct ring_buffer_per_cpu *cpu_buffer;
1432 unsigned long overruns = 0;
1433 int cpu;
1434
1435 /* if you care about this being correct, lock the buffer */
1436 for_each_buffer_cpu(buffer, cpu) {
1437 cpu_buffer = buffer->buffers[cpu];
1438 overruns += cpu_buffer->overrun;
1439 }
1440
1441 return overruns;
1442}
1443
1444/**
1445 * ring_buffer_iter_reset - reset an iterator
1446 * @iter: The iterator to reset
1447 *
1448 * Resets the iterator, so that it will start from the beginning
1449 * again.
1450 */
1451void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1452{
1453 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1454
1455 /* Iterator usage is expected to have record disabled */
1456 if (list_empty(&cpu_buffer->reader_page->list)) {
1457 iter->head_page = cpu_buffer->head_page;
1458 iter->head = cpu_buffer->head_page->read;
1459 } else {
1460 iter->head_page = cpu_buffer->reader_page;
1461 iter->head = cpu_buffer->reader_page->read;
1462 }
1463 if (iter->head)
1464 iter->read_stamp = cpu_buffer->read_stamp;
1465 else
1466 iter->read_stamp = iter->head_page->time_stamp;
1467}
1468
1469/**
1470 * ring_buffer_iter_empty - check if an iterator has no more to read
1471 * @iter: The iterator to check
1472 */
1473int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1474{
1475 struct ring_buffer_per_cpu *cpu_buffer;
1476
1477 cpu_buffer = iter->cpu_buffer;
1478
1479 return iter->head_page == cpu_buffer->commit_page &&
1480 iter->head == rb_commit_index(cpu_buffer);
1481}
1482
1483static void
1484rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1485 struct ring_buffer_event *event)
1486{
1487 u64 delta;
1488
1489 switch (event->type) {
1490 case RINGBUF_TYPE_PADDING:
1491 return;
1492
1493 case RINGBUF_TYPE_TIME_EXTEND:
1494 delta = event->array[0];
1495 delta <<= TS_SHIFT;
1496 delta += event->time_delta;
1497 cpu_buffer->read_stamp += delta;
1498 return;
1499
1500 case RINGBUF_TYPE_TIME_STAMP:
1501 /* FIXME: not implemented */
1502 return;
1503
1504 case RINGBUF_TYPE_DATA:
1505 cpu_buffer->read_stamp += event->time_delta;
1506 return;
1507
1508 default:
1509 BUG();
1510 }
1511 return;
1512}
1513
1514static void
1515rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1516 struct ring_buffer_event *event)
1517{
1518 u64 delta;
1519
1520 switch (event->type) {
1521 case RINGBUF_TYPE_PADDING:
1522 return;
1523
1524 case RINGBUF_TYPE_TIME_EXTEND:
1525 delta = event->array[0];
1526 delta <<= TS_SHIFT;
1527 delta += event->time_delta;
1528 iter->read_stamp += delta;
1529 return;
1530
1531 case RINGBUF_TYPE_TIME_STAMP:
1532 /* FIXME: not implemented */
1533 return;
1534
1535 case RINGBUF_TYPE_DATA:
1536 iter->read_stamp += event->time_delta;
1537 return;
1538
1539 default:
1540 BUG();
1541 }
1542 return;
1543}
1544
1545static struct buffer_page *
1546rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1547{
1548 struct buffer_page *reader = NULL;
1549 unsigned long flags;
1550 int nr_loops = 0;
1551
1552 spin_lock_irqsave(&cpu_buffer->lock, flags);
1553
1554 again:
1555 /*
1556 * This should normally only loop twice. But because the
1557 * start of the reader inserts an empty page, it causes
1558 * a case where we will loop three times. There should be no
1559 * reason to loop four times (that I know of).
1560 */
1561 if (unlikely(++nr_loops > 3)) {
1562 RB_WARN_ON(cpu_buffer, 1);
1563 reader = NULL;
1564 goto out;
1565 }
1566
1567 reader = cpu_buffer->reader_page;
1568
1569 /* If there's more to read, return this page */
1570 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1571 goto out;
1572
1573 /* Never should we have an index greater than the size */
1574 RB_WARN_ON(cpu_buffer,
1575 cpu_buffer->reader_page->read > rb_page_size(reader));
1576
1577 /* check if we caught up to the tail */
1578 reader = NULL;
1579 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1580 goto out;
1581
1582 /*
1583 * Splice the empty reader page into the list around the head.
1584 * Reset the reader page to size zero.
1585 */
1586
1587 reader = cpu_buffer->head_page;
1588 cpu_buffer->reader_page->list.next = reader->list.next;
1589 cpu_buffer->reader_page->list.prev = reader->list.prev;
1590
1591 local_set(&cpu_buffer->reader_page->write, 0);
1592 local_set(&cpu_buffer->reader_page->commit, 0);
1593
1594 /* Make the reader page now replace the head */
1595 reader->list.prev->next = &cpu_buffer->reader_page->list;
1596 reader->list.next->prev = &cpu_buffer->reader_page->list;
1597
1598 /*
1599 * If the tail is on the reader, then we must set the head
1600 * to the inserted page, otherwise we set it one before.
1601 */
1602 cpu_buffer->head_page = cpu_buffer->reader_page;
1603
1604 if (cpu_buffer->commit_page != reader)
1605 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1606
1607 /* Finally update the reader page to the new head */
1608 cpu_buffer->reader_page = reader;
1609 rb_reset_reader_page(cpu_buffer);
1610
1611 goto again;
1612
1613 out:
1614 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1615
1616 return reader;
1617}
1618
1619static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1620{
1621 struct ring_buffer_event *event;
1622 struct buffer_page *reader;
1623 unsigned length;
1624
1625 reader = rb_get_reader_page(cpu_buffer);
1626
1627 /* This function should not be called when buffer is empty */
1628 BUG_ON(!reader);
1629
1630 event = rb_reader_event(cpu_buffer);
1631
1632 if (event->type == RINGBUF_TYPE_DATA)
1633 cpu_buffer->entries--;
1634
1635 rb_update_read_stamp(cpu_buffer, event);
1636
1637 length = rb_event_length(event);
1638 cpu_buffer->reader_page->read += length;
1639}
1640
1641static void rb_advance_iter(struct ring_buffer_iter *iter)
1642{
1643 struct ring_buffer *buffer;
1644 struct ring_buffer_per_cpu *cpu_buffer;
1645 struct ring_buffer_event *event;
1646 unsigned length;
1647
1648 cpu_buffer = iter->cpu_buffer;
1649 buffer = cpu_buffer->buffer;
1650
1651 /*
1652 * Check if we are at the end of the buffer.
1653 */
1654 if (iter->head >= rb_page_size(iter->head_page)) {
1655 BUG_ON(iter->head_page == cpu_buffer->commit_page);
1656 rb_inc_iter(iter);
1657 return;
1658 }
1659
1660 event = rb_iter_head_event(iter);
1661
1662 length = rb_event_length(event);
1663
1664 /*
1665 * This should not be called to advance the header if we are
1666 * at the tail of the buffer.
1667 */
1668 BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
1669 (iter->head + length > rb_commit_index(cpu_buffer)));
1670
1671 rb_update_iter_read_stamp(iter, event);
1672
1673 iter->head += length;
1674
1675 /* check for end of page padding */
1676 if ((iter->head >= rb_page_size(iter->head_page)) &&
1677 (iter->head_page != cpu_buffer->commit_page))
1678 rb_advance_iter(iter);
1679}
1680
1681/**
1682 * ring_buffer_peek - peek at the next event to be read
1683 * @buffer: The ring buffer to read
1684 * @cpu: The cpu to peak at
1685 * @ts: The timestamp counter of this event.
1686 *
1687 * This will return the event that will be read next, but does
1688 * not consume the data.
1689 */
1690struct ring_buffer_event *
1691ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1692{
1693 struct ring_buffer_per_cpu *cpu_buffer;
1694 struct ring_buffer_event *event;
1695 struct buffer_page *reader;
1696 int nr_loops = 0;
1697
1698 if (!cpu_isset(cpu, buffer->cpumask))
1699 return NULL;
1700
1701 cpu_buffer = buffer->buffers[cpu];
1702
1703 again:
1704 /*
1705 * We repeat when a timestamp is encountered. It is possible
1706 * to get multiple timestamps from an interrupt entering just
1707 * as one timestamp is about to be written. The max times
1708 * that this can happen is the number of nested interrupts we
1709 * can have. Nesting 10 deep of interrupts is clearly
1710 * an anomaly.
1711 */
1712 if (unlikely(++nr_loops > 10)) {
1713 RB_WARN_ON(cpu_buffer, 1);
1714 return NULL;
1715 }
1716
1717 reader = rb_get_reader_page(cpu_buffer);
1718 if (!reader)
1719 return NULL;
1720
1721 event = rb_reader_event(cpu_buffer);
1722
1723 switch (event->type) {
1724 case RINGBUF_TYPE_PADDING:
1725 RB_WARN_ON(cpu_buffer, 1);
1726 rb_advance_reader(cpu_buffer);
1727 return NULL;
1728
1729 case RINGBUF_TYPE_TIME_EXTEND:
1730 /* Internal data, OK to advance */
1731 rb_advance_reader(cpu_buffer);
1732 goto again;
1733
1734 case RINGBUF_TYPE_TIME_STAMP:
1735 /* FIXME: not implemented */
1736 rb_advance_reader(cpu_buffer);
1737 goto again;
1738
1739 case RINGBUF_TYPE_DATA:
1740 if (ts) {
1741 *ts = cpu_buffer->read_stamp + event->time_delta;
1742 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1743 }
1744 return event;
1745
1746 default:
1747 BUG();
1748 }
1749
1750 return NULL;
1751}
1752
1753/**
1754 * ring_buffer_iter_peek - peek at the next event to be read
1755 * @iter: The ring buffer iterator
1756 * @ts: The timestamp counter of this event.
1757 *
1758 * This will return the event that will be read next, but does
1759 * not increment the iterator.
1760 */
1761struct ring_buffer_event *
1762ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1763{
1764 struct ring_buffer *buffer;
1765 struct ring_buffer_per_cpu *cpu_buffer;
1766 struct ring_buffer_event *event;
1767 int nr_loops = 0;
1768
1769 if (ring_buffer_iter_empty(iter))
1770 return NULL;
1771
1772 cpu_buffer = iter->cpu_buffer;
1773 buffer = cpu_buffer->buffer;
1774
1775 again:
1776 /*
1777 * We repeat when a timestamp is encountered. It is possible
1778 * to get multiple timestamps from an interrupt entering just
1779 * as one timestamp is about to be written. The max times
1780 * that this can happen is the number of nested interrupts we
1781 * can have. Nesting 10 deep of interrupts is clearly
1782 * an anomaly.
1783 */
1784 if (unlikely(++nr_loops > 10)) {
1785 RB_WARN_ON(cpu_buffer, 1);
1786 return NULL;
1787 }
1788
1789 if (rb_per_cpu_empty(cpu_buffer))
1790 return NULL;
1791
1792 event = rb_iter_head_event(iter);
1793
1794 switch (event->type) {
1795 case RINGBUF_TYPE_PADDING:
1796 rb_inc_iter(iter);
1797 goto again;
1798
1799 case RINGBUF_TYPE_TIME_EXTEND:
1800 /* Internal data, OK to advance */
1801 rb_advance_iter(iter);
1802 goto again;
1803
1804 case RINGBUF_TYPE_TIME_STAMP:
1805 /* FIXME: not implemented */
1806 rb_advance_iter(iter);
1807 goto again;
1808
1809 case RINGBUF_TYPE_DATA:
1810 if (ts) {
1811 *ts = iter->read_stamp + event->time_delta;
1812 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1813 }
1814 return event;
1815
1816 default:
1817 BUG();
1818 }
1819
1820 return NULL;
1821}
1822
1823/**
1824 * ring_buffer_consume - return an event and consume it
1825 * @buffer: The ring buffer to get the next event from
1826 *
1827 * Returns the next event in the ring buffer, and that event is consumed.
1828 * Meaning, that sequential reads will keep returning a different event,
1829 * and eventually empty the ring buffer if the producer is slower.
1830 */
1831struct ring_buffer_event *
1832ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1833{
1834 struct ring_buffer_per_cpu *cpu_buffer;
1835 struct ring_buffer_event *event;
1836
1837 if (!cpu_isset(cpu, buffer->cpumask))
1838 return NULL;
1839
1840 event = ring_buffer_peek(buffer, cpu, ts);
1841 if (!event)
1842 return NULL;
1843
1844 cpu_buffer = buffer->buffers[cpu];
1845 rb_advance_reader(cpu_buffer);
1846
1847 return event;
1848}
1849
1850/**
1851 * ring_buffer_read_start - start a non consuming read of the buffer
1852 * @buffer: The ring buffer to read from
1853 * @cpu: The cpu buffer to iterate over
1854 *
1855 * This starts up an iteration through the buffer. It also disables
1856 * the recording to the buffer until the reading is finished.
1857 * This prevents the reading from being corrupted. This is not
1858 * a consuming read, so a producer is not expected.
1859 *
1860 * Must be paired with ring_buffer_finish.
1861 */
1862struct ring_buffer_iter *
1863ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1864{
1865 struct ring_buffer_per_cpu *cpu_buffer;
1866 struct ring_buffer_iter *iter;
1867 unsigned long flags;
1868
1869 if (!cpu_isset(cpu, buffer->cpumask))
1870 return NULL;
1871
1872 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1873 if (!iter)
1874 return NULL;
1875
1876 cpu_buffer = buffer->buffers[cpu];
1877
1878 iter->cpu_buffer = cpu_buffer;
1879
1880 atomic_inc(&cpu_buffer->record_disabled);
1881 synchronize_sched();
1882
1883 spin_lock_irqsave(&cpu_buffer->lock, flags);
1884 ring_buffer_iter_reset(iter);
1885 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1886
1887 return iter;
1888}
1889
1890/**
1891 * ring_buffer_finish - finish reading the iterator of the buffer
1892 * @iter: The iterator retrieved by ring_buffer_start
1893 *
1894 * This re-enables the recording to the buffer, and frees the
1895 * iterator.
1896 */
1897void
1898ring_buffer_read_finish(struct ring_buffer_iter *iter)
1899{
1900 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1901
1902 atomic_dec(&cpu_buffer->record_disabled);
1903 kfree(iter);
1904}
1905
1906/**
1907 * ring_buffer_read - read the next item in the ring buffer by the iterator
1908 * @iter: The ring buffer iterator
1909 * @ts: The time stamp of the event read.
1910 *
1911 * This reads the next event in the ring buffer and increments the iterator.
1912 */
1913struct ring_buffer_event *
1914ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1915{
1916 struct ring_buffer_event *event;
1917
1918 event = ring_buffer_iter_peek(iter, ts);
1919 if (!event)
1920 return NULL;
1921
1922 rb_advance_iter(iter);
1923
1924 return event;
1925}
1926
1927/**
1928 * ring_buffer_size - return the size of the ring buffer (in bytes)
1929 * @buffer: The ring buffer.
1930 */
1931unsigned long ring_buffer_size(struct ring_buffer *buffer)
1932{
1933 return BUF_PAGE_SIZE * buffer->pages;
1934}
1935
1936static void
1937rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1938{
1939 cpu_buffer->head_page
1940 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1941 local_set(&cpu_buffer->head_page->write, 0);
1942 local_set(&cpu_buffer->head_page->commit, 0);
1943
1944 cpu_buffer->head_page->read = 0;
1945
1946 cpu_buffer->tail_page = cpu_buffer->head_page;
1947 cpu_buffer->commit_page = cpu_buffer->head_page;
1948
1949 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1950 local_set(&cpu_buffer->reader_page->write, 0);
1951 local_set(&cpu_buffer->reader_page->commit, 0);
1952 cpu_buffer->reader_page->read = 0;
1953
1954 cpu_buffer->overrun = 0;
1955 cpu_buffer->entries = 0;
1956}
1957
1958/**
1959 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
1960 * @buffer: The ring buffer to reset a per cpu buffer of
1961 * @cpu: The CPU buffer to be reset
1962 */
1963void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1964{
1965 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1966 unsigned long flags;
1967
1968 if (!cpu_isset(cpu, buffer->cpumask))
1969 return;
1970
1971 spin_lock_irqsave(&cpu_buffer->lock, flags);
1972
1973 rb_reset_cpu(cpu_buffer);
1974
1975 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1976}
1977
1978/**
1979 * ring_buffer_reset - reset a ring buffer
1980 * @buffer: The ring buffer to reset all cpu buffers
1981 */
1982void ring_buffer_reset(struct ring_buffer *buffer)
1983{
1984 int cpu;
1985
1986 for_each_buffer_cpu(buffer, cpu)
1987 ring_buffer_reset_cpu(buffer, cpu);
1988}
1989
1990/**
1991 * rind_buffer_empty - is the ring buffer empty?
1992 * @buffer: The ring buffer to test
1993 */
1994int ring_buffer_empty(struct ring_buffer *buffer)
1995{
1996 struct ring_buffer_per_cpu *cpu_buffer;
1997 int cpu;
1998
1999 /* yes this is racy, but if you don't like the race, lock the buffer */
2000 for_each_buffer_cpu(buffer, cpu) {
2001 cpu_buffer = buffer->buffers[cpu];
2002 if (!rb_per_cpu_empty(cpu_buffer))
2003 return 0;
2004 }
2005 return 1;
2006}
2007
2008/**
2009 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
2010 * @buffer: The ring buffer
2011 * @cpu: The CPU buffer to test
2012 */
2013int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2014{
2015 struct ring_buffer_per_cpu *cpu_buffer;
2016
2017 if (!cpu_isset(cpu, buffer->cpumask))
2018 return 1;
2019
2020 cpu_buffer = buffer->buffers[cpu];
2021 return rb_per_cpu_empty(cpu_buffer);
2022}
2023
2024/**
2025 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2026 * @buffer_a: One buffer to swap with
2027 * @buffer_b: The other buffer to swap with
2028 *
2029 * This function is useful for tracers that want to take a "snapshot"
2030 * of a CPU buffer and has another back up buffer lying around.
2031 * it is expected that the tracer handles the cpu buffer not being
2032 * used at the moment.
2033 */
2034int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2035 struct ring_buffer *buffer_b, int cpu)
2036{
2037 struct ring_buffer_per_cpu *cpu_buffer_a;
2038 struct ring_buffer_per_cpu *cpu_buffer_b;
2039
2040 if (!cpu_isset(cpu, buffer_a->cpumask) ||
2041 !cpu_isset(cpu, buffer_b->cpumask))
2042 return -EINVAL;
2043
2044 /* At least make sure the two buffers are somewhat the same */
2045 if (buffer_a->size != buffer_b->size ||
2046 buffer_a->pages != buffer_b->pages)
2047 return -EINVAL;
2048
2049 cpu_buffer_a = buffer_a->buffers[cpu];
2050 cpu_buffer_b = buffer_b->buffers[cpu];
2051
2052 /*
2053 * We can't do a synchronize_sched here because this
2054 * function can be called in atomic context.
2055 * Normally this will be called from the same CPU as cpu.
2056 * If not it's up to the caller to protect this.
2057 */
2058 atomic_inc(&cpu_buffer_a->record_disabled);
2059 atomic_inc(&cpu_buffer_b->record_disabled);
2060
2061 buffer_a->buffers[cpu] = cpu_buffer_b;
2062 buffer_b->buffers[cpu] = cpu_buffer_a;
2063
2064 cpu_buffer_b->buffer = buffer_a;
2065 cpu_buffer_a->buffer = buffer_b;
2066
2067 atomic_dec(&cpu_buffer_a->record_disabled);
2068 atomic_dec(&cpu_buffer_b->record_disabled);
2069
2070 return 0;
2071}
2072
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f3fb3db61c3..9f3b478f9171 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
14#include <linux/utsrelease.h> 14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/notifier.h>
17#include <linux/debugfs.h> 18#include <linux/debugfs.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/kdebug.h>
25#include <linux/ctype.h> 27#include <linux/ctype.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/poll.h> 29#include <linux/poll.h>
@@ -31,25 +33,37 @@
31#include <linux/writeback.h> 33#include <linux/writeback.h>
32 34
33#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
34 38
35#include "trace.h" 39#include "trace.h"
36 40
41#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
42
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
39 45
40static unsigned long __read_mostly tracing_nr_buffers; 46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47
48static inline void ftrace_disable_cpu(void)
49{
50 preempt_disable();
51 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
52}
53
54static inline void ftrace_enable_cpu(void)
55{
56 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
57 preempt_enable();
58}
59
41static cpumask_t __read_mostly tracing_buffer_mask; 60static cpumask_t __read_mostly tracing_buffer_mask;
42 61
43#define for_each_tracing_cpu(cpu) \ 62#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask) 63 for_each_cpu_mask(cpu, tracing_buffer_mask)
45 64
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1; 65static int tracing_disabled = 1;
50 66
51static unsigned long tracing_pages_allocated;
52
53long 67long
54ns2usecs(cycle_t nsec) 68ns2usecs(cycle_t nsec)
55{ 69{
@@ -60,7 +74,9 @@ ns2usecs(cycle_t nsec)
60 74
61cycle_t ftrace_now(int cpu) 75cycle_t ftrace_now(int cpu)
62{ 76{
63 return cpu_clock(cpu); 77 u64 ts = ring_buffer_time_stamp(cpu);
78 ring_buffer_normalize_time_stamp(cpu, &ts);
79 return ts;
64} 80}
65 81
66/* 82/*
@@ -100,11 +116,18 @@ static int tracer_enabled = 1;
100int ftrace_function_enabled; 116int ftrace_function_enabled;
101 117
102/* 118/*
103 * trace_nr_entries is the number of entries that is allocated 119 * trace_buf_size is the size in bytes that is allocated
104 * for a buffer. Note, the number of entries is always rounded 120 * for a buffer. Note, the number of bytes is always rounded
105 * to ENTRIES_PER_PAGE. 121 * to page size.
122 *
123 * This number is purposely set to a low number of 16384.
124 * If the dump on oops happens, it will be much appreciated
125 * to not have to wait for all that output. Anyway this can be
126 * boot time and run time configurable.
106 */ 127 */
107static unsigned long trace_nr_entries = 65536UL; 128#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
129
130static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
108 131
109/* trace_types holds a link list of available tracers. */ 132/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly; 133static struct tracer *trace_types __read_mostly;
@@ -133,24 +156,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
133/* trace_flags holds iter_ctrl options */ 156/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135 158
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/** 159/**
155 * trace_wake_up - wake up tasks waiting for trace input 160 * trace_wake_up - wake up tasks waiting for trace input
156 * 161 *
@@ -167,23 +172,21 @@ void trace_wake_up(void)
167 wake_up(&trace_wait); 172 wake_up(&trace_wait);
168} 173}
169 174
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 175static int __init set_buf_size(char *str)
171
172static int __init set_nr_entries(char *str)
173{ 176{
174 unsigned long nr_entries; 177 unsigned long buf_size;
175 int ret; 178 int ret;
176 179
177 if (!str) 180 if (!str)
178 return 0; 181 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries); 182 ret = strict_strtoul(str, 0, &buf_size);
180 /* nr_entries can not be zero */ 183 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0) 184 if (ret < 0 || buf_size == 0)
182 return 0; 185 return 0;
183 trace_nr_entries = nr_entries; 186 trace_buf_size = buf_size;
184 return 1; 187 return 1;
185} 188}
186__setup("trace_entries=", set_nr_entries); 189__setup("trace_buf_size=", set_buf_size);
187 190
188unsigned long nsecs_to_usecs(unsigned long nsecs) 191unsigned long nsecs_to_usecs(unsigned long nsecs)
189{ 192{
@@ -191,21 +194,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
191} 194}
192 195
193/* 196/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that 197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols. 198 * control the output of kernel symbols.
211 */ 199 */
@@ -224,6 +212,7 @@ static const char *trace_options[] = {
224 "block", 212 "block",
225 "stacktrace", 213 "stacktrace",
226 "sched-tree", 214 "sched-tree",
215 "ftrace_printk",
227 NULL 216 NULL
228}; 217};
229 218
@@ -266,54 +255,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
266 tracing_record_cmdline(current); 255 tracing_record_cmdline(current);
267} 256}
268 257
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/** 258/**
318 * trace_seq_printf - sequence printing of trace information 259 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor 260 * @s: trace sequence descriptor
@@ -395,28 +336,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
395 return len; 336 return len;
396} 337}
397 338
398#define HEX_CHARS 17 339#define MAX_MEMHEX_BYTES 8
399static const char hex2asc[] = "0123456789abcdef"; 340#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
400 341
401static int 342static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 343trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{ 344{
404 unsigned char hex[HEX_CHARS]; 345 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem; 346 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j; 347 int i, j;
408 348
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN 349#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) { 350 for (i = 0, j = 0; i < len; i++) {
413#else 351#else
414 for (i = len-1, j = 0; i >= 0; i--) { 352 for (i = len-1, j = 0; i >= 0; i--) {
415#endif 353#endif
416 byte = data[i]; 354 hex[j++] = hex_asc_hi(data[i]);
417 355 hex[j++] = hex_asc_lo(data[i]);
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 } 356 }
421 hex[j++] = ' '; 357 hex[j++] = ' ';
422 358
@@ -460,34 +396,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
460 trace_seq_reset(s); 396 trace_seq_reset(s);
461} 397}
462 398
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/** 399/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 400 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer 401 * @tr: tracer
@@ -500,17 +408,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
500void 408void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 409update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{ 410{
503 struct trace_array_cpu *data; 411 struct ring_buffer *buf = tr->buffer;
504 int i;
505 412
506 WARN_ON_ONCE(!irqs_disabled()); 413 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock); 414 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */ 415
509 for_each_tracing_cpu(i) { 416 tr->buffer = max_tr.buffer;
510 data = tr->data[i]; 417 max_tr.buffer = buf;
511 flip_trace(max_tr.data[i], data); 418
512 tracing_reset(data); 419 ftrace_disable_cpu();
513 } 420 ring_buffer_reset(tr->buffer);
421 ftrace_enable_cpu();
514 422
515 __update_max_tr(tr, tsk, cpu); 423 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock); 424 __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +435,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
527void 435void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 436update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{ 437{
530 struct trace_array_cpu *data = tr->data[cpu]; 438 int ret;
531 int i;
532 439
533 WARN_ON_ONCE(!irqs_disabled()); 440 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock); 441 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537 442
538 flip_trace(max_tr.data[cpu], data); 443 ftrace_disable_cpu();
539 tracing_reset(data); 444
445 ring_buffer_reset(max_tr.buffer);
446 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
447
448 ftrace_enable_cpu();
449
450 WARN_ON_ONCE(ret);
540 451
541 __update_max_tr(tr, tsk, cpu); 452 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock); 453 __raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +484,6 @@ int register_tracer(struct tracer *type)
573#ifdef CONFIG_FTRACE_STARTUP_TEST 484#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) { 485 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace; 486 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace; 487 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl; 488 int saved_ctrl = tr->ctrl;
579 int i; 489 int i;
@@ -585,10 +495,7 @@ int register_tracer(struct tracer *type)
585 * If we fail, we do not register this tracer. 495 * If we fail, we do not register this tracer.
586 */ 496 */
587 for_each_tracing_cpu(i) { 497 for_each_tracing_cpu(i) {
588 data = tr->data[i]; 498 tracing_reset(tr, i);
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 } 499 }
593 current_trace = type; 500 current_trace = type;
594 tr->ctrl = 0; 501 tr->ctrl = 0;
@@ -604,10 +511,7 @@ int register_tracer(struct tracer *type)
604 } 511 }
605 /* Only reset on passing, to avoid touching corrupted buffers */ 512 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) { 513 for_each_tracing_cpu(i) {
607 data = tr->data[i]; 514 tracing_reset(tr, i);
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 } 515 }
612 printk(KERN_CONT "PASSED\n"); 516 printk(KERN_CONT "PASSED\n");
613 } 517 }
@@ -653,13 +557,11 @@ void unregister_tracer(struct tracer *type)
653 mutex_unlock(&trace_types_lock); 557 mutex_unlock(&trace_types_lock);
654} 558}
655 559
656void tracing_reset(struct trace_array_cpu *data) 560void tracing_reset(struct trace_array *tr, int cpu)
657{ 561{
658 data->trace_idx = 0; 562 ftrace_disable_cpu();
659 data->overrun = 0; 563 ring_buffer_reset_cpu(tr->buffer, cpu);
660 data->trace_head = data->trace_tail = head_page(data); 564 ftrace_enable_cpu();
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663} 565}
664 566
665#define SAVED_CMDLINES 128 567#define SAVED_CMDLINES 128
@@ -745,82 +647,20 @@ void tracing_record_cmdline(struct task_struct *tsk)
745 trace_save_cmdline(tsk); 647 trace_save_cmdline(tsk);
746} 648}
747 649
748static inline struct list_head * 650void
749trace_next_list(struct trace_array_cpu *data, struct list_head *next) 651tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
750{ 652 int pc)
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{ 653{
815 struct task_struct *tsk = current; 654 struct task_struct *tsk = current;
816 unsigned long pc;
817 655
818 pc = preempt_count(); 656 entry->preempt_count = pc & 0xff;
819 657 entry->pid = (tsk) ? tsk->pid : 0;
820 entry->preempt_count = pc & 0xff; 658 entry->flags =
821 entry->pid = (tsk) ? tsk->pid : 0; 659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
822 entry->t = ftrace_now(raw_smp_processor_id()); 660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 661#else
662 TRACE_FLAG_IRQS_NOSUPPORT |
663#endif
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 664 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 665 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 666 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +668,141 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
828 668
829void 669void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data, 670trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags) 671 unsigned long ip, unsigned long parent_ip, unsigned long flags,
672 int pc)
832{ 673{
833 struct trace_entry *entry; 674 struct ring_buffer_event *event;
675 struct ftrace_entry *entry;
834 unsigned long irq_flags; 676 unsigned long irq_flags;
835 677
836 raw_local_irq_save(irq_flags); 678 /* If we are reading the ring buffer, don't trace */
837 __raw_spin_lock(&data->lock); 679 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
838 entry = tracing_get_trace_entry(tr, data); 680 return;
839 tracing_generic_entry_update(entry, flags); 681
840 entry->type = TRACE_FN; 682 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
841 entry->fn.ip = ip; 683 &irq_flags);
842 entry->fn.parent_ip = parent_ip; 684 if (!event)
843 __raw_spin_unlock(&data->lock); 685 return;
844 raw_local_irq_restore(irq_flags); 686 entry = ring_buffer_event_data(event);
687 tracing_generic_entry_update(&entry->ent, flags, pc);
688 entry->ent.type = TRACE_FN;
689 entry->ip = ip;
690 entry->parent_ip = parent_ip;
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
845} 692}
846 693
847void 694void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data, 695ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags) 696 unsigned long ip, unsigned long parent_ip, unsigned long flags,
697 int pc)
850{ 698{
851 if (likely(!atomic_read(&data->disabled))) 699 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags); 700 trace_function(tr, data, ip, parent_ip, flags, pc);
853} 701}
854 702
855#ifdef CONFIG_MMIOTRACE 703static void ftrace_trace_stack(struct trace_array *tr,
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, 704 struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw) 705 unsigned long flags,
706 int skip, int pc)
858{ 707{
859 struct trace_entry *entry; 708#ifdef CONFIG_STACKTRACE
709 struct ring_buffer_event *event;
710 struct stack_entry *entry;
711 struct stack_trace trace;
860 unsigned long irq_flags; 712 unsigned long irq_flags;
861 713
862 raw_local_irq_save(irq_flags); 714 if (!(trace_flags & TRACE_ITER_STACKTRACE))
863 __raw_spin_lock(&data->lock); 715 return;
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881 716
882 raw_local_irq_save(irq_flags); 717 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
883 __raw_spin_lock(&data->lock); 718 &irq_flags);
719 if (!event)
720 return;
721 entry = ring_buffer_event_data(event);
722 tracing_generic_entry_update(&entry->ent, flags, pc);
723 entry->ent.type = TRACE_STACK;
884 724
885 entry = tracing_get_trace_entry(tr, data); 725 memset(&entry->caller, 0, sizeof(entry->caller));
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889 726
890 __raw_spin_unlock(&data->lock); 727 trace.nr_entries = 0;
891 raw_local_irq_restore(irq_flags); 728 trace.max_entries = FTRACE_STACK_ENTRIES;
729 trace.skip = skip;
730 trace.entries = entry->caller;
892 731
893 trace_wake_up(); 732 save_stack_trace(&trace);
894} 733 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
895#endif 734#endif
735}
896 736
897void __trace_stack(struct trace_array *tr, 737void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data, 738 struct trace_array_cpu *data,
899 unsigned long flags, 739 unsigned long flags,
900 int skip) 740 int skip)
901{ 741{
902 struct trace_entry *entry; 742 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920} 743}
921 744
922void 745static void
923__trace_special(void *__tr, void *__data, 746ftrace_trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3) 747 unsigned long arg1, unsigned long arg2, unsigned long arg3,
748 int pc)
925{ 749{
750 struct ring_buffer_event *event;
926 struct trace_array_cpu *data = __data; 751 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr; 752 struct trace_array *tr = __tr;
928 struct trace_entry *entry; 753 struct special_entry *entry;
929 unsigned long irq_flags; 754 unsigned long irq_flags;
930 755
931 raw_local_irq_save(irq_flags); 756 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
932 __raw_spin_lock(&data->lock); 757 &irq_flags);
933 entry = tracing_get_trace_entry(tr, data); 758 if (!event)
934 tracing_generic_entry_update(entry, 0); 759 return;
935 entry->type = TRACE_SPECIAL; 760 entry = ring_buffer_event_data(event);
936 entry->special.arg1 = arg1; 761 tracing_generic_entry_update(&entry->ent, 0, pc);
937 entry->special.arg2 = arg2; 762 entry->ent.type = TRACE_SPECIAL;
938 entry->special.arg3 = arg3; 763 entry->arg1 = arg1;
939 __trace_stack(tr, data, irq_flags, 4); 764 entry->arg2 = arg2;
940 __raw_spin_unlock(&data->lock); 765 entry->arg3 = arg3;
941 raw_local_irq_restore(irq_flags); 766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
942 768
943 trace_wake_up(); 769 trace_wake_up();
944} 770}
945 771
946void 772void
773__trace_special(void *__tr, void *__data,
774 unsigned long arg1, unsigned long arg2, unsigned long arg3)
775{
776 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
777}
778
779void
947tracing_sched_switch_trace(struct trace_array *tr, 780tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data, 781 struct trace_array_cpu *data,
949 struct task_struct *prev, 782 struct task_struct *prev,
950 struct task_struct *next, 783 struct task_struct *next,
951 unsigned long flags) 784 unsigned long flags, int pc)
952{ 785{
953 struct trace_entry *entry; 786 struct ring_buffer_event *event;
787 struct ctx_switch_entry *entry;
954 unsigned long irq_flags; 788 unsigned long irq_flags;
955 789
956 raw_local_irq_save(irq_flags); 790 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957 __raw_spin_lock(&data->lock); 791 &irq_flags);
958 entry = tracing_get_trace_entry(tr, data); 792 if (!event)
959 tracing_generic_entry_update(entry, flags); 793 return;
960 entry->type = TRACE_CTX; 794 entry = ring_buffer_event_data(event);
961 entry->ctx.prev_pid = prev->pid; 795 tracing_generic_entry_update(&entry->ent, flags, pc);
962 entry->ctx.prev_prio = prev->prio; 796 entry->ent.type = TRACE_CTX;
963 entry->ctx.prev_state = prev->state; 797 entry->prev_pid = prev->pid;
964 entry->ctx.next_pid = next->pid; 798 entry->prev_prio = prev->prio;
965 entry->ctx.next_prio = next->prio; 799 entry->prev_state = prev->state;
966 entry->ctx.next_state = next->state; 800 entry->next_pid = next->pid;
967 __trace_stack(tr, data, flags, 5); 801 entry->next_prio = next->prio;
968 __raw_spin_unlock(&data->lock); 802 entry->next_state = next->state;
969 raw_local_irq_restore(irq_flags); 803 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc);
970} 806}
971 807
972void 808void
@@ -974,25 +810,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data, 810 struct trace_array_cpu *data,
975 struct task_struct *wakee, 811 struct task_struct *wakee,
976 struct task_struct *curr, 812 struct task_struct *curr,
977 unsigned long flags) 813 unsigned long flags, int pc)
978{ 814{
979 struct trace_entry *entry; 815 struct ring_buffer_event *event;
816 struct ctx_switch_entry *entry;
980 unsigned long irq_flags; 817 unsigned long irq_flags;
981 818
982 raw_local_irq_save(irq_flags); 819 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
983 __raw_spin_lock(&data->lock); 820 &irq_flags);
984 entry = tracing_get_trace_entry(tr, data); 821 if (!event)
985 tracing_generic_entry_update(entry, flags); 822 return;
986 entry->type = TRACE_WAKE; 823 entry = ring_buffer_event_data(event);
987 entry->ctx.prev_pid = curr->pid; 824 tracing_generic_entry_update(&entry->ent, flags, pc);
988 entry->ctx.prev_prio = curr->prio; 825 entry->ent.type = TRACE_WAKE;
989 entry->ctx.prev_state = curr->state; 826 entry->prev_pid = curr->pid;
990 entry->ctx.next_pid = wakee->pid; 827 entry->prev_prio = curr->prio;
991 entry->ctx.next_prio = wakee->prio; 828 entry->prev_state = curr->state;
992 entry->ctx.next_state = wakee->state; 829 entry->next_pid = wakee->pid;
993 __trace_stack(tr, data, flags, 6); 830 entry->next_prio = wakee->prio;
994 __raw_spin_unlock(&data->lock); 831 entry->next_state = wakee->state;
995 raw_local_irq_restore(irq_flags); 832 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc);
996 835
997 trace_wake_up(); 836 trace_wake_up();
998} 837}
@@ -1002,26 +841,24 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{ 841{
1003 struct trace_array *tr = &global_trace; 842 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data; 843 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu; 844 int cpu;
845 int pc;
1008 846
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) 847 if (tracing_disabled || !tr->ctrl)
1010 return; 848 return;
1011 849
1012 local_irq_save(flags); 850 pc = preempt_count();
851 preempt_disable_notrace();
1013 cpu = raw_smp_processor_id(); 852 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu]; 853 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016 854
1017 if (likely(disabled == 1)) 855 if (likely(!atomic_read(&data->disabled)))
1018 __trace_special(tr, data, arg1, arg2, arg3); 856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1019 857
1020 atomic_dec(&data->disabled); 858 preempt_enable_notrace();
1021 local_irq_restore(flags);
1022} 859}
1023 860
1024#ifdef CONFIG_FTRACE 861#ifdef CONFIG_FUNCTION_TRACER
1025static void 862static void
1026function_trace_call(unsigned long ip, unsigned long parent_ip) 863function_trace_call(unsigned long ip, unsigned long parent_ip)
1027{ 864{
@@ -1029,24 +866,28 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1029 struct trace_array_cpu *data; 866 struct trace_array_cpu *data;
1030 unsigned long flags; 867 unsigned long flags;
1031 long disabled; 868 long disabled;
1032 int cpu; 869 int cpu, resched;
870 int pc;
1033 871
1034 if (unlikely(!ftrace_function_enabled)) 872 if (unlikely(!ftrace_function_enabled))
1035 return; 873 return;
1036 874
1037 if (skip_trace(ip)) 875 pc = preempt_count();
1038 return; 876 resched = need_resched();
1039 877 preempt_disable_notrace();
1040 local_irq_save(flags); 878 local_save_flags(flags);
1041 cpu = raw_smp_processor_id(); 879 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu]; 880 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled); 881 disabled = atomic_inc_return(&data->disabled);
1044 882
1045 if (likely(disabled == 1)) 883 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags); 884 trace_function(tr, data, ip, parent_ip, flags, pc);
1047 885
1048 atomic_dec(&data->disabled); 886 atomic_dec(&data->disabled);
1049 local_irq_restore(flags); 887 if (resched)
888 preempt_enable_no_resched_notrace();
889 else
890 preempt_enable_notrace();
1050} 891}
1051 892
1052static struct ftrace_ops trace_ops __read_mostly = 893static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,111 +914,96 @@ enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1, 914 TRACE_FILE_LAT_FMT = 1,
1074}; 915};
1075 916
1076static struct trace_entry * 917static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{ 918{
1080 struct page *page; 919 /* Don't allow ftrace to trace into the ring buffers */
1081 struct trace_entry *array; 920 ftrace_disable_cpu();
1082 921
1083 if (iter->next_idx[cpu] >= tr->entries || 922 iter->idx++;
1084 iter->next_idx[cpu] >= data->trace_idx || 923 if (iter->buffer_iter[iter->cpu])
1085 (data->trace_head == data->trace_tail && 924 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088 925
1089 if (!iter->next_page[cpu]) { 926 ftrace_enable_cpu();
1090 /* Initialize the iterator for this cpu trace buffer */ 927}
1091 WARN_ON(!data->trace_tail); 928
1092 page = virt_to_page(data->trace_tail); 929static struct trace_entry *
1093 iter->next_page[cpu] = &page->lru; 930peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1094 iter->next_page_idx[cpu] = data->trace_tail_idx; 931{
1095 } 932 struct ring_buffer_event *event;
933 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1096 934
1097 page = list_entry(iter->next_page[cpu], struct page, lru); 935 /* Don't allow ftrace to trace into the ring buffers */
1098 BUG_ON(&data->trace_pages == &page->lru); 936 ftrace_disable_cpu();
1099 937
1100 array = page_address(page); 938 if (buf_iter)
939 event = ring_buffer_iter_peek(buf_iter, ts);
940 else
941 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
942
943 ftrace_enable_cpu();
1101 944
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); 945 return event ? ring_buffer_event_data(event) : NULL;
1103 return &array[iter->next_page_idx[cpu]];
1104} 946}
1105 947
1106static struct trace_entry * 948static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu) 949__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1108{ 950{
1109 struct trace_array *tr = iter->tr; 951 struct ring_buffer *buffer = iter->tr->buffer;
1110 struct trace_entry *ent, *next = NULL; 952 struct trace_entry *ent, *next = NULL;
953 u64 next_ts = 0, ts;
1111 int next_cpu = -1; 954 int next_cpu = -1;
1112 int cpu; 955 int cpu;
1113 956
1114 for_each_tracing_cpu(cpu) { 957 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu])) 958
959 if (ring_buffer_empty_cpu(buffer, cpu))
1116 continue; 960 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 961
962 ent = peek_next_entry(iter, cpu, &ts);
963
1118 /* 964 /*
1119 * Pick the entry with the smallest timestamp: 965 * Pick the entry with the smallest timestamp:
1120 */ 966 */
1121 if (ent && (!next || ent->t < next->t)) { 967 if (ent && (!next || ts < next_ts)) {
1122 next = ent; 968 next = ent;
1123 next_cpu = cpu; 969 next_cpu = cpu;
970 next_ts = ts;
1124 } 971 }
1125 } 972 }
1126 973
1127 if (ent_cpu) 974 if (ent_cpu)
1128 *ent_cpu = next_cpu; 975 *ent_cpu = next_cpu;
1129 976
977 if (ent_ts)
978 *ent_ts = next_ts;
979
1130 return next; 980 return next;
1131} 981}
1132 982
1133static void trace_iterator_increment(struct trace_iterator *iter) 983/* Find the next real entry, without updating the iterator itself */
984static struct trace_entry *
985find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1134{ 986{
1135 iter->idx++; 987 return __find_next_entry(iter, ent_cpu, ent_ts);
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146} 988}
1147 989
1148static void trace_consume(struct trace_iterator *iter) 990/* Find the next real entry, and increment the iterator to the next entry */
991static void *find_next_entry_inc(struct trace_iterator *iter)
1149{ 992{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 993 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1151 994
1152 data->trace_tail_idx++; 995 if (iter->ent)
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { 996 trace_iterator_increment(iter, iter->cpu);
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157 997
1158 /* Check if we empty it, then reset the index */ 998 return iter->ent ? iter : NULL;
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162} 999}
1163 1000
1164static void *find_next_entry_inc(struct trace_iterator *iter) 1001static void trace_consume(struct trace_iterator *iter)
1165{ 1002{
1166 struct trace_entry *next; 1003 /* Don't allow ftrace to trace into the ring buffers */
1167 int next_cpu = -1; 1004 ftrace_disable_cpu();
1168 1005 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1169 next = find_next_entry(iter, &next_cpu); 1006 ftrace_enable_cpu();
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181} 1007}
1182 1008
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1009static void *s_next(struct seq_file *m, void *v, loff_t *pos)
@@ -1210,7 +1036,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1210 struct trace_iterator *iter = m->private; 1036 struct trace_iterator *iter = m->private;
1211 void *p = NULL; 1037 void *p = NULL;
1212 loff_t l = 0; 1038 loff_t l = 0;
1213 int i; 1039 int cpu;
1214 1040
1215 mutex_lock(&trace_types_lock); 1041 mutex_lock(&trace_types_lock);
1216 1042
@@ -1229,14 +1055,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1229 iter->ent = NULL; 1055 iter->ent = NULL;
1230 iter->cpu = 0; 1056 iter->cpu = 0;
1231 iter->idx = -1; 1057 iter->idx = -1;
1232 iter->prev_ent = NULL;
1233 iter->prev_cpu = -1;
1234 1058
1235 for_each_tracing_cpu(i) { 1059 ftrace_disable_cpu();
1236 iter->next_idx[i] = 0; 1060
1237 iter->next_page[i] = NULL; 1061 for_each_tracing_cpu(cpu) {
1062 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1238 } 1063 }
1239 1064
1065 ftrace_enable_cpu();
1066
1240 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1067 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1241 ; 1068 ;
1242 1069
@@ -1261,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
1261 mutex_unlock(&trace_types_lock); 1088 mutex_unlock(&trace_types_lock);
1262} 1089}
1263 1090
1264#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1265
1266#ifdef CONFIG_KRETPROBES 1091#ifdef CONFIG_KRETPROBES
1267static inline int kretprobed(unsigned long addr) 1092static inline const char *kretprobed(const char *name)
1268{ 1093{
1269 return addr == (unsigned long)kretprobe_trampoline; 1094 static const char tramp_name[] = "kretprobe_trampoline";
1095 int size = sizeof(tramp_name);
1096
1097 if (strncmp(tramp_name, name, size) == 0)
1098 return "[unknown/kretprobe'd]";
1099 return name;
1270} 1100}
1271#else 1101#else
1272static inline int kretprobed(unsigned long addr) 1102static inline const char *kretprobed(const char *name)
1273{ 1103{
1274 return 0; 1104 return name;
1275} 1105}
1276#endif /* CONFIG_KRETPROBES */ 1106#endif /* CONFIG_KRETPROBES */
1277 1107
@@ -1280,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1280{ 1110{
1281#ifdef CONFIG_KALLSYMS 1111#ifdef CONFIG_KALLSYMS
1282 char str[KSYM_SYMBOL_LEN]; 1112 char str[KSYM_SYMBOL_LEN];
1113 const char *name;
1283 1114
1284 kallsyms_lookup(address, NULL, NULL, NULL, str); 1115 kallsyms_lookup(address, NULL, NULL, NULL, str);
1285 1116
1286 return trace_seq_printf(s, fmt, str); 1117 name = kretprobed(str);
1118
1119 return trace_seq_printf(s, fmt, name);
1287#endif 1120#endif
1288 return 1; 1121 return 1;
1289} 1122}
@@ -1294,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1294{ 1127{
1295#ifdef CONFIG_KALLSYMS 1128#ifdef CONFIG_KALLSYMS
1296 char str[KSYM_SYMBOL_LEN]; 1129 char str[KSYM_SYMBOL_LEN];
1130 const char *name;
1297 1131
1298 sprint_symbol(str, address); 1132 sprint_symbol(str, address);
1299 return trace_seq_printf(s, fmt, str); 1133 name = kretprobed(str);
1134
1135 return trace_seq_printf(s, fmt, name);
1300#endif 1136#endif
1301 return 1; 1137 return 1;
1302} 1138}
@@ -1330,21 +1166,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1330 1166
1331static void print_lat_help_header(struct seq_file *m) 1167static void print_lat_help_header(struct seq_file *m)
1332{ 1168{
1333 seq_puts(m, "# _------=> CPU# \n"); 1169 seq_puts(m, "# _------=> CPU# \n");
1334 seq_puts(m, "# / _-----=> irqs-off \n"); 1170 seq_puts(m, "# / _-----=> irqs-off \n");
1335 seq_puts(m, "# | / _----=> need-resched \n"); 1171 seq_puts(m, "# | / _----=> need-resched \n");
1336 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1172 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1337 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1173 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1338 seq_puts(m, "# |||| / \n"); 1174 seq_puts(m, "# |||| / \n");
1339 seq_puts(m, "# ||||| delay \n"); 1175 seq_puts(m, "# ||||| delay \n");
1340 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1176 seq_puts(m, "# cmd pid ||||| time | caller \n");
1341 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1177 seq_puts(m, "# \\ / ||||| \\ | / \n");
1342} 1178}
1343 1179
1344static void print_func_help_header(struct seq_file *m) 1180static void print_func_help_header(struct seq_file *m)
1345{ 1181{
1346 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1182 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1347 seq_puts(m, "# | | | | |\n"); 1183 seq_puts(m, "# | | | | |\n");
1348} 1184}
1349 1185
1350 1186
@@ -1355,23 +1191,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1355 struct trace_array *tr = iter->tr; 1191 struct trace_array *tr = iter->tr;
1356 struct trace_array_cpu *data = tr->data[tr->cpu]; 1192 struct trace_array_cpu *data = tr->data[tr->cpu];
1357 struct tracer *type = current_trace; 1193 struct tracer *type = current_trace;
1358 unsigned long total = 0; 1194 unsigned long total;
1359 unsigned long entries = 0; 1195 unsigned long entries;
1360 int cpu;
1361 const char *name = "preemption"; 1196 const char *name = "preemption";
1362 1197
1363 if (type) 1198 if (type)
1364 name = type->name; 1199 name = type->name;
1365 1200
1366 for_each_tracing_cpu(cpu) { 1201 entries = ring_buffer_entries(iter->tr->buffer);
1367 if (head_page(tr->data[cpu])) { 1202 total = entries +
1368 total += tr->data[cpu]->trace_idx; 1203 ring_buffer_overruns(iter->tr->buffer);
1369 if (tr->data[cpu]->trace_idx > tr->entries)
1370 entries += tr->entries;
1371 else
1372 entries += tr->data[cpu]->trace_idx;
1373 }
1374 }
1375 1204
1376 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1205 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1377 name, UTS_RELEASE); 1206 name, UTS_RELEASE);
@@ -1428,9 +1257,10 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1428 comm = trace_find_cmdline(entry->pid); 1257 comm = trace_find_cmdline(entry->pid);
1429 1258
1430 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1259 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1431 trace_seq_printf(s, "%d", cpu); 1260 trace_seq_printf(s, "%3d", cpu);
1432 trace_seq_printf(s, "%c%c", 1261 trace_seq_printf(s, "%c%c",
1433 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1262 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
1263 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
1434 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1264 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1435 1265
1436 hardirq = entry->flags & TRACE_FLAG_HARDIRQ; 1266 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1457,7 +1287,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1457unsigned long preempt_mark_thresh = 100; 1287unsigned long preempt_mark_thresh = 100;
1458 1288
1459static void 1289static void
1460lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1290lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1461 unsigned long rel_usecs) 1291 unsigned long rel_usecs)
1462{ 1292{
1463 trace_seq_printf(s, " %4lldus", abs_usecs); 1293 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1471,34 +1301,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1471 1301
1472static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1302static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1473 1303
1474static int 1304/*
1305 * The message is supposed to contain an ending newline.
1306 * If the printing stops prematurely, try to add a newline of our own.
1307 */
1308void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1309{
1310 struct trace_entry *ent;
1311 struct trace_field_cont *cont;
1312 bool ok = true;
1313
1314 ent = peek_next_entry(iter, iter->cpu, NULL);
1315 if (!ent || ent->type != TRACE_CONT) {
1316 trace_seq_putc(s, '\n');
1317 return;
1318 }
1319
1320 do {
1321 cont = (struct trace_field_cont *)ent;
1322 if (ok)
1323 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1324
1325 ftrace_disable_cpu();
1326
1327 if (iter->buffer_iter[iter->cpu])
1328 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1329 else
1330 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1331
1332 ftrace_enable_cpu();
1333
1334 ent = peek_next_entry(iter, iter->cpu, NULL);
1335 } while (ent && ent->type == TRACE_CONT);
1336
1337 if (!ok)
1338 trace_seq_putc(s, '\n');
1339}
1340
1341static enum print_line_t
1475print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1476{ 1343{
1477 struct trace_seq *s = &iter->seq; 1344 struct trace_seq *s = &iter->seq;
1478 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1345 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1479 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1346 struct trace_entry *next_entry;
1480 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1347 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1481 struct trace_entry *entry = iter->ent; 1348 struct trace_entry *entry = iter->ent;
1482 unsigned long abs_usecs; 1349 unsigned long abs_usecs;
1483 unsigned long rel_usecs; 1350 unsigned long rel_usecs;
1351 u64 next_ts;
1484 char *comm; 1352 char *comm;
1485 int S, T; 1353 int S, T;
1486 int i; 1354 int i;
1487 unsigned state; 1355 unsigned state;
1488 1356
1357 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED;
1359
1360 next_entry = find_next_entry(iter, NULL, &next_ts);
1489 if (!next_entry) 1361 if (!next_entry)
1490 next_entry = entry; 1362 next_ts = iter->ts;
1491 rel_usecs = ns2usecs(next_entry->t - entry->t); 1363 rel_usecs = ns2usecs(next_ts - iter->ts);
1492 abs_usecs = ns2usecs(entry->t - iter->tr->time_start); 1364 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1493 1365
1494 if (verbose) { 1366 if (verbose) {
1495 comm = trace_find_cmdline(entry->pid); 1367 comm = trace_find_cmdline(entry->pid);
1496 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" 1368 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1497 " %ld.%03ldms (+%ld.%03ldms): ", 1369 " %ld.%03ldms (+%ld.%03ldms): ",
1498 comm, 1370 comm,
1499 entry->pid, cpu, entry->flags, 1371 entry->pid, cpu, entry->flags,
1500 entry->preempt_count, trace_idx, 1372 entry->preempt_count, trace_idx,
1501 ns2usecs(entry->t), 1373 ns2usecs(iter->ts),
1502 abs_usecs/1000, 1374 abs_usecs/1000,
1503 abs_usecs % 1000, rel_usecs/1000, 1375 abs_usecs % 1000, rel_usecs/1000,
1504 rel_usecs % 1000); 1376 rel_usecs % 1000);
@@ -1507,52 +1379,82 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1507 lat_print_timestamp(s, abs_usecs, rel_usecs); 1379 lat_print_timestamp(s, abs_usecs, rel_usecs);
1508 } 1380 }
1509 switch (entry->type) { 1381 switch (entry->type) {
1510 case TRACE_FN: 1382 case TRACE_FN: {
1511 seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1383 struct ftrace_entry *field;
1384
1385 trace_assign_type(field, entry);
1386
1387 seq_print_ip_sym(s, field->ip, sym_flags);
1512 trace_seq_puts(s, " ("); 1388 trace_seq_puts(s, " (");
1513 if (kretprobed(entry->fn.parent_ip)) 1389 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1514 trace_seq_puts(s, KRETPROBE_MSG);
1515 else
1516 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1517 trace_seq_puts(s, ")\n"); 1390 trace_seq_puts(s, ")\n");
1518 break; 1391 break;
1392 }
1519 case TRACE_CTX: 1393 case TRACE_CTX:
1520 case TRACE_WAKE: 1394 case TRACE_WAKE: {
1521 T = entry->ctx.next_state < sizeof(state_to_char) ? 1395 struct ctx_switch_entry *field;
1522 state_to_char[entry->ctx.next_state] : 'X';
1523 1396
1524 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; 1397 trace_assign_type(field, entry);
1398
1399 T = field->next_state < sizeof(state_to_char) ?
1400 state_to_char[field->next_state] : 'X';
1401
1402 state = field->prev_state ?
1403 __ffs(field->prev_state) + 1 : 0;
1525 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; 1404 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1526 comm = trace_find_cmdline(entry->ctx.next_pid); 1405 comm = trace_find_cmdline(field->next_pid);
1527 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", 1406 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1528 entry->ctx.prev_pid, 1407 field->prev_pid,
1529 entry->ctx.prev_prio, 1408 field->prev_prio,
1530 S, entry->type == TRACE_CTX ? "==>" : " +", 1409 S, entry->type == TRACE_CTX ? "==>" : " +",
1531 entry->ctx.next_pid, 1410 field->next_cpu,
1532 entry->ctx.next_prio, 1411 field->next_pid,
1412 field->next_prio,
1533 T, comm); 1413 T, comm);
1534 break; 1414 break;
1535 case TRACE_SPECIAL: 1415 }
1416 case TRACE_SPECIAL: {
1417 struct special_entry *field;
1418
1419 trace_assign_type(field, entry);
1420
1536 trace_seq_printf(s, "# %ld %ld %ld\n", 1421 trace_seq_printf(s, "# %ld %ld %ld\n",
1537 entry->special.arg1, 1422 field->arg1,
1538 entry->special.arg2, 1423 field->arg2,
1539 entry->special.arg3); 1424 field->arg3);
1540 break; 1425 break;
1541 case TRACE_STACK: 1426 }
1427 case TRACE_STACK: {
1428 struct stack_entry *field;
1429
1430 trace_assign_type(field, entry);
1431
1542 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1432 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1543 if (i) 1433 if (i)
1544 trace_seq_puts(s, " <= "); 1434 trace_seq_puts(s, " <= ");
1545 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); 1435 seq_print_ip_sym(s, field->caller[i], sym_flags);
1546 } 1436 }
1547 trace_seq_puts(s, "\n"); 1437 trace_seq_puts(s, "\n");
1548 break; 1438 break;
1439 }
1440 case TRACE_PRINT: {
1441 struct print_entry *field;
1442
1443 trace_assign_type(field, entry);
1444
1445 seq_print_ip_sym(s, field->ip, sym_flags);
1446 trace_seq_printf(s, ": %s", field->buf);
1447 if (entry->flags & TRACE_FLAG_CONT)
1448 trace_seq_print_cont(s, iter);
1449 break;
1450 }
1549 default: 1451 default:
1550 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1452 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1551 } 1453 }
1552 return 1; 1454 return TRACE_TYPE_HANDLED;
1553} 1455}
1554 1456
1555static int print_trace_fmt(struct trace_iterator *iter) 1457static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1556{ 1458{
1557 struct trace_seq *s = &iter->seq; 1459 struct trace_seq *s = &iter->seq;
1558 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1460 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1567,90 +1469,123 @@ static int print_trace_fmt(struct trace_iterator *iter)
1567 1469
1568 entry = iter->ent; 1470 entry = iter->ent;
1569 1471
1472 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED;
1474
1570 comm = trace_find_cmdline(iter->ent->pid); 1475 comm = trace_find_cmdline(iter->ent->pid);
1571 1476
1572 t = ns2usecs(entry->t); 1477 t = ns2usecs(iter->ts);
1573 usec_rem = do_div(t, 1000000ULL); 1478 usec_rem = do_div(t, 1000000ULL);
1574 secs = (unsigned long)t; 1479 secs = (unsigned long)t;
1575 1480
1576 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1481 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1577 if (!ret) 1482 if (!ret)
1578 return 0; 1483 return TRACE_TYPE_PARTIAL_LINE;
1579 ret = trace_seq_printf(s, "[%02d] ", iter->cpu); 1484 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1580 if (!ret) 1485 if (!ret)
1581 return 0; 1486 return TRACE_TYPE_PARTIAL_LINE;
1582 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); 1487 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1583 if (!ret) 1488 if (!ret)
1584 return 0; 1489 return TRACE_TYPE_PARTIAL_LINE;
1585 1490
1586 switch (entry->type) { 1491 switch (entry->type) {
1587 case TRACE_FN: 1492 case TRACE_FN: {
1588 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1493 struct ftrace_entry *field;
1494
1495 trace_assign_type(field, entry);
1496
1497 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1589 if (!ret) 1498 if (!ret)
1590 return 0; 1499 return TRACE_TYPE_PARTIAL_LINE;
1591 if ((sym_flags & TRACE_ITER_PRINT_PARENT) && 1500 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1592 entry->fn.parent_ip) { 1501 field->parent_ip) {
1593 ret = trace_seq_printf(s, " <-"); 1502 ret = trace_seq_printf(s, " <-");
1594 if (!ret) 1503 if (!ret)
1595 return 0; 1504 return TRACE_TYPE_PARTIAL_LINE;
1596 if (kretprobed(entry->fn.parent_ip)) 1505 ret = seq_print_ip_sym(s,
1597 ret = trace_seq_puts(s, KRETPROBE_MSG); 1506 field->parent_ip,
1598 else 1507 sym_flags);
1599 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1600 sym_flags);
1601 if (!ret) 1508 if (!ret)
1602 return 0; 1509 return TRACE_TYPE_PARTIAL_LINE;
1603 } 1510 }
1604 ret = trace_seq_printf(s, "\n"); 1511 ret = trace_seq_printf(s, "\n");
1605 if (!ret) 1512 if (!ret)
1606 return 0; 1513 return TRACE_TYPE_PARTIAL_LINE;
1607 break; 1514 break;
1515 }
1608 case TRACE_CTX: 1516 case TRACE_CTX:
1609 case TRACE_WAKE: 1517 case TRACE_WAKE: {
1610 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1518 struct ctx_switch_entry *field;
1611 state_to_char[entry->ctx.prev_state] : 'X'; 1519
1612 T = entry->ctx.next_state < sizeof(state_to_char) ? 1520 trace_assign_type(field, entry);
1613 state_to_char[entry->ctx.next_state] : 'X'; 1521
1614 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", 1522 S = field->prev_state < sizeof(state_to_char) ?
1615 entry->ctx.prev_pid, 1523 state_to_char[field->prev_state] : 'X';
1616 entry->ctx.prev_prio, 1524 T = field->next_state < sizeof(state_to_char) ?
1525 state_to_char[field->next_state] : 'X';
1526 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1527 field->prev_pid,
1528 field->prev_prio,
1617 S, 1529 S,
1618 entry->type == TRACE_CTX ? "==>" : " +", 1530 entry->type == TRACE_CTX ? "==>" : " +",
1619 entry->ctx.next_pid, 1531 field->next_cpu,
1620 entry->ctx.next_prio, 1532 field->next_pid,
1533 field->next_prio,
1621 T); 1534 T);
1622 if (!ret) 1535 if (!ret)
1623 return 0; 1536 return TRACE_TYPE_PARTIAL_LINE;
1624 break; 1537 break;
1625 case TRACE_SPECIAL: 1538 }
1539 case TRACE_SPECIAL: {
1540 struct special_entry *field;
1541
1542 trace_assign_type(field, entry);
1543
1626 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1544 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1627 entry->special.arg1, 1545 field->arg1,
1628 entry->special.arg2, 1546 field->arg2,
1629 entry->special.arg3); 1547 field->arg3);
1630 if (!ret) 1548 if (!ret)
1631 return 0; 1549 return TRACE_TYPE_PARTIAL_LINE;
1632 break; 1550 break;
1633 case TRACE_STACK: 1551 }
1552 case TRACE_STACK: {
1553 struct stack_entry *field;
1554
1555 trace_assign_type(field, entry);
1556
1634 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1557 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1635 if (i) { 1558 if (i) {
1636 ret = trace_seq_puts(s, " <= "); 1559 ret = trace_seq_puts(s, " <= ");
1637 if (!ret) 1560 if (!ret)
1638 return 0; 1561 return TRACE_TYPE_PARTIAL_LINE;
1639 } 1562 }
1640 ret = seq_print_ip_sym(s, entry->stack.caller[i], 1563 ret = seq_print_ip_sym(s, field->caller[i],
1641 sym_flags); 1564 sym_flags);
1642 if (!ret) 1565 if (!ret)
1643 return 0; 1566 return TRACE_TYPE_PARTIAL_LINE;
1644 } 1567 }
1645 ret = trace_seq_puts(s, "\n"); 1568 ret = trace_seq_puts(s, "\n");
1646 if (!ret) 1569 if (!ret)
1647 return 0; 1570 return TRACE_TYPE_PARTIAL_LINE;
1648 break; 1571 break;
1649 } 1572 }
1650 return 1; 1573 case TRACE_PRINT: {
1574 struct print_entry *field;
1575
1576 trace_assign_type(field, entry);
1577
1578 seq_print_ip_sym(s, field->ip, sym_flags);
1579 trace_seq_printf(s, ": %s", field->buf);
1580 if (entry->flags & TRACE_FLAG_CONT)
1581 trace_seq_print_cont(s, iter);
1582 break;
1583 }
1584 }
1585 return TRACE_TYPE_HANDLED;
1651} 1586}
1652 1587
1653static int print_raw_fmt(struct trace_iterator *iter) 1588static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1654{ 1589{
1655 struct trace_seq *s = &iter->seq; 1590 struct trace_seq *s = &iter->seq;
1656 struct trace_entry *entry; 1591 struct trace_entry *entry;
@@ -1659,47 +1594,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
1659 1594
1660 entry = iter->ent; 1595 entry = iter->ent;
1661 1596
1597 if (entry->type == TRACE_CONT)
1598 return TRACE_TYPE_HANDLED;
1599
1662 ret = trace_seq_printf(s, "%d %d %llu ", 1600 ret = trace_seq_printf(s, "%d %d %llu ",
1663 entry->pid, iter->cpu, entry->t); 1601 entry->pid, iter->cpu, iter->ts);
1664 if (!ret) 1602 if (!ret)
1665 return 0; 1603 return TRACE_TYPE_PARTIAL_LINE;
1666 1604
1667 switch (entry->type) { 1605 switch (entry->type) {
1668 case TRACE_FN: 1606 case TRACE_FN: {
1607 struct ftrace_entry *field;
1608
1609 trace_assign_type(field, entry);
1610
1669 ret = trace_seq_printf(s, "%x %x\n", 1611 ret = trace_seq_printf(s, "%x %x\n",
1670 entry->fn.ip, entry->fn.parent_ip); 1612 field->ip,
1613 field->parent_ip);
1671 if (!ret) 1614 if (!ret)
1672 return 0; 1615 return TRACE_TYPE_PARTIAL_LINE;
1673 break; 1616 break;
1617 }
1674 case TRACE_CTX: 1618 case TRACE_CTX:
1675 case TRACE_WAKE: 1619 case TRACE_WAKE: {
1676 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1620 struct ctx_switch_entry *field;
1677 state_to_char[entry->ctx.prev_state] : 'X'; 1621
1678 T = entry->ctx.next_state < sizeof(state_to_char) ? 1622 trace_assign_type(field, entry);
1679 state_to_char[entry->ctx.next_state] : 'X'; 1623
1624 S = field->prev_state < sizeof(state_to_char) ?
1625 state_to_char[field->prev_state] : 'X';
1626 T = field->next_state < sizeof(state_to_char) ?
1627 state_to_char[field->next_state] : 'X';
1680 if (entry->type == TRACE_WAKE) 1628 if (entry->type == TRACE_WAKE)
1681 S = '+'; 1629 S = '+';
1682 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", 1630 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1683 entry->ctx.prev_pid, 1631 field->prev_pid,
1684 entry->ctx.prev_prio, 1632 field->prev_prio,
1685 S, 1633 S,
1686 entry->ctx.next_pid, 1634 field->next_cpu,
1687 entry->ctx.next_prio, 1635 field->next_pid,
1636 field->next_prio,
1688 T); 1637 T);
1689 if (!ret) 1638 if (!ret)
1690 return 0; 1639 return TRACE_TYPE_PARTIAL_LINE;
1691 break; 1640 break;
1641 }
1692 case TRACE_SPECIAL: 1642 case TRACE_SPECIAL:
1693 case TRACE_STACK: 1643 case TRACE_STACK: {
1644 struct special_entry *field;
1645
1646 trace_assign_type(field, entry);
1647
1694 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1648 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1695 entry->special.arg1, 1649 field->arg1,
1696 entry->special.arg2, 1650 field->arg2,
1697 entry->special.arg3); 1651 field->arg3);
1698 if (!ret) 1652 if (!ret)
1699 return 0; 1653 return TRACE_TYPE_PARTIAL_LINE;
1700 break; 1654 break;
1701 } 1655 }
1702 return 1; 1656 case TRACE_PRINT: {
1657 struct print_entry *field;
1658
1659 trace_assign_type(field, entry);
1660
1661 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1662 if (entry->flags & TRACE_FLAG_CONT)
1663 trace_seq_print_cont(s, iter);
1664 break;
1665 }
1666 }
1667 return TRACE_TYPE_HANDLED;
1703} 1668}
1704 1669
1705#define SEQ_PUT_FIELD_RET(s, x) \ 1670#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1710,11 +1675,12 @@ do { \
1710 1675
1711#define SEQ_PUT_HEX_FIELD_RET(s, x) \ 1676#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1712do { \ 1677do { \
1678 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
1713 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ 1679 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1714 return 0; \ 1680 return 0; \
1715} while (0) 1681} while (0)
1716 1682
1717static int print_hex_fmt(struct trace_iterator *iter) 1683static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1718{ 1684{
1719 struct trace_seq *s = &iter->seq; 1685 struct trace_seq *s = &iter->seq;
1720 unsigned char newline = '\n'; 1686 unsigned char newline = '\n';
@@ -1723,97 +1689,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
1723 1689
1724 entry = iter->ent; 1690 entry = iter->ent;
1725 1691
1692 if (entry->type == TRACE_CONT)
1693 return TRACE_TYPE_HANDLED;
1694
1726 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1695 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1727 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1696 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1728 SEQ_PUT_HEX_FIELD_RET(s, entry->t); 1697 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1729 1698
1730 switch (entry->type) { 1699 switch (entry->type) {
1731 case TRACE_FN: 1700 case TRACE_FN: {
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); 1701 struct ftrace_entry *field;
1733 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1702
1703 trace_assign_type(field, entry);
1704
1705 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1706 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1734 break; 1707 break;
1708 }
1735 case TRACE_CTX: 1709 case TRACE_CTX:
1736 case TRACE_WAKE: 1710 case TRACE_WAKE: {
1737 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1711 struct ctx_switch_entry *field;
1738 state_to_char[entry->ctx.prev_state] : 'X'; 1712
1739 T = entry->ctx.next_state < sizeof(state_to_char) ? 1713 trace_assign_type(field, entry);
1740 state_to_char[entry->ctx.next_state] : 'X'; 1714
1715 S = field->prev_state < sizeof(state_to_char) ?
1716 state_to_char[field->prev_state] : 'X';
1717 T = field->next_state < sizeof(state_to_char) ?
1718 state_to_char[field->next_state] : 'X';
1741 if (entry->type == TRACE_WAKE) 1719 if (entry->type == TRACE_WAKE)
1742 S = '+'; 1720 S = '+';
1743 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); 1721 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1744 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); 1722 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1745 SEQ_PUT_HEX_FIELD_RET(s, S); 1723 SEQ_PUT_HEX_FIELD_RET(s, S);
1746 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); 1724 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); 1725 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1726 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, T); 1727 SEQ_PUT_HEX_FIELD_RET(s, T);
1750 break; 1728 break;
1729 }
1751 case TRACE_SPECIAL: 1730 case TRACE_SPECIAL:
1752 case TRACE_STACK: 1731 case TRACE_STACK: {
1753 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); 1732 struct special_entry *field;
1754 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); 1733
1755 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); 1734 trace_assign_type(field, entry);
1735
1736 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1737 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1738 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1756 break; 1739 break;
1757 } 1740 }
1741 }
1758 SEQ_PUT_FIELD_RET(s, newline); 1742 SEQ_PUT_FIELD_RET(s, newline);
1759 1743
1760 return 1; 1744 return TRACE_TYPE_HANDLED;
1761} 1745}
1762 1746
1763static int print_bin_fmt(struct trace_iterator *iter) 1747static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1764{ 1748{
1765 struct trace_seq *s = &iter->seq; 1749 struct trace_seq *s = &iter->seq;
1766 struct trace_entry *entry; 1750 struct trace_entry *entry;
1767 1751
1768 entry = iter->ent; 1752 entry = iter->ent;
1769 1753
1754 if (entry->type == TRACE_CONT)
1755 return TRACE_TYPE_HANDLED;
1756
1770 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1771 SEQ_PUT_FIELD_RET(s, entry->cpu); 1758 SEQ_PUT_FIELD_RET(s, iter->cpu);
1772 SEQ_PUT_FIELD_RET(s, entry->t); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1773 1760
1774 switch (entry->type) { 1761 switch (entry->type) {
1775 case TRACE_FN: 1762 case TRACE_FN: {
1776 SEQ_PUT_FIELD_RET(s, entry->fn.ip); 1763 struct ftrace_entry *field;
1777 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); 1764
1765 trace_assign_type(field, entry);
1766
1767 SEQ_PUT_FIELD_RET(s, field->ip);
1768 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1778 break; 1769 break;
1779 case TRACE_CTX: 1770 }
1780 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); 1771 case TRACE_CTX: {
1781 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); 1772 struct ctx_switch_entry *field;
1782 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); 1773
1783 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); 1774 trace_assign_type(field, entry);
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); 1775
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); 1776 SEQ_PUT_FIELD_RET(s, field->prev_pid);
1777 SEQ_PUT_FIELD_RET(s, field->prev_prio);
1778 SEQ_PUT_FIELD_RET(s, field->prev_state);
1779 SEQ_PUT_FIELD_RET(s, field->next_pid);
1780 SEQ_PUT_FIELD_RET(s, field->next_prio);
1781 SEQ_PUT_FIELD_RET(s, field->next_state);
1786 break; 1782 break;
1783 }
1787 case TRACE_SPECIAL: 1784 case TRACE_SPECIAL:
1788 case TRACE_STACK: 1785 case TRACE_STACK: {
1789 SEQ_PUT_FIELD_RET(s, entry->special.arg1); 1786 struct special_entry *field;
1790 SEQ_PUT_FIELD_RET(s, entry->special.arg2); 1787
1791 SEQ_PUT_FIELD_RET(s, entry->special.arg3); 1788 trace_assign_type(field, entry);
1789
1790 SEQ_PUT_FIELD_RET(s, field->arg1);
1791 SEQ_PUT_FIELD_RET(s, field->arg2);
1792 SEQ_PUT_FIELD_RET(s, field->arg3);
1792 break; 1793 break;
1793 } 1794 }
1795 }
1794 return 1; 1796 return 1;
1795} 1797}
1796 1798
1797static int trace_empty(struct trace_iterator *iter) 1799static int trace_empty(struct trace_iterator *iter)
1798{ 1800{
1799 struct trace_array_cpu *data;
1800 int cpu; 1801 int cpu;
1801 1802
1802 for_each_tracing_cpu(cpu) { 1803 for_each_tracing_cpu(cpu) {
1803 data = iter->tr->data[cpu]; 1804 if (iter->buffer_iter[cpu]) {
1804 1805 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1805 if (head_page(data) && data->trace_idx && 1806 return 0;
1806 (data->trace_tail != data->trace_head || 1807 } else {
1807 data->trace_tail_idx != data->trace_head_idx)) 1808 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1808 return 0; 1809 return 0;
1810 }
1809 } 1811 }
1812
1810 return 1; 1813 return 1;
1811} 1814}
1812 1815
1813static int print_trace_line(struct trace_iterator *iter) 1816static enum print_line_t print_trace_line(struct trace_iterator *iter)
1814{ 1817{
1815 if (iter->trace && iter->trace->print_line) 1818 enum print_line_t ret;
1816 return iter->trace->print_line(iter); 1819
1820 if (iter->trace && iter->trace->print_line) {
1821 ret = iter->trace->print_line(iter);
1822 if (ret != TRACE_TYPE_UNHANDLED)
1823 return ret;
1824 }
1817 1825
1818 if (trace_flags & TRACE_ITER_BIN) 1826 if (trace_flags & TRACE_ITER_BIN)
1819 return print_bin_fmt(iter); 1827 return print_bin_fmt(iter);
@@ -1869,6 +1877,8 @@ static struct trace_iterator *
1869__tracing_open(struct inode *inode, struct file *file, int *ret) 1877__tracing_open(struct inode *inode, struct file *file, int *ret)
1870{ 1878{
1871 struct trace_iterator *iter; 1879 struct trace_iterator *iter;
1880 struct seq_file *m;
1881 int cpu;
1872 1882
1873 if (tracing_disabled) { 1883 if (tracing_disabled) {
1874 *ret = -ENODEV; 1884 *ret = -ENODEV;
@@ -1889,28 +1899,45 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1889 iter->trace = current_trace; 1899 iter->trace = current_trace;
1890 iter->pos = -1; 1900 iter->pos = -1;
1891 1901
1902 for_each_tracing_cpu(cpu) {
1903
1904 iter->buffer_iter[cpu] =
1905 ring_buffer_read_start(iter->tr->buffer, cpu);
1906
1907 if (!iter->buffer_iter[cpu])
1908 goto fail_buffer;
1909 }
1910
1892 /* TODO stop tracer */ 1911 /* TODO stop tracer */
1893 *ret = seq_open(file, &tracer_seq_ops); 1912 *ret = seq_open(file, &tracer_seq_ops);
1894 if (!*ret) { 1913 if (*ret)
1895 struct seq_file *m = file->private_data; 1914 goto fail_buffer;
1896 m->private = iter;
1897 1915
1898 /* stop the trace while dumping */ 1916 m = file->private_data;
1899 if (iter->tr->ctrl) { 1917 m->private = iter;
1900 tracer_enabled = 0;
1901 ftrace_function_enabled = 0;
1902 }
1903 1918
1904 if (iter->trace && iter->trace->open) 1919 /* stop the trace while dumping */
1905 iter->trace->open(iter); 1920 if (iter->tr->ctrl) {
1906 } else { 1921 tracer_enabled = 0;
1907 kfree(iter); 1922 ftrace_function_enabled = 0;
1908 iter = NULL;
1909 } 1923 }
1924
1925 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter);
1927
1910 mutex_unlock(&trace_types_lock); 1928 mutex_unlock(&trace_types_lock);
1911 1929
1912 out: 1930 out:
1913 return iter; 1931 return iter;
1932
1933 fail_buffer:
1934 for_each_tracing_cpu(cpu) {
1935 if (iter->buffer_iter[cpu])
1936 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1937 }
1938 mutex_unlock(&trace_types_lock);
1939
1940 return ERR_PTR(-ENOMEM);
1914} 1941}
1915 1942
1916int tracing_open_generic(struct inode *inode, struct file *filp) 1943int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1926,8 +1953,14 @@ int tracing_release(struct inode *inode, struct file *file)
1926{ 1953{
1927 struct seq_file *m = (struct seq_file *)file->private_data; 1954 struct seq_file *m = (struct seq_file *)file->private_data;
1928 struct trace_iterator *iter = m->private; 1955 struct trace_iterator *iter = m->private;
1956 int cpu;
1929 1957
1930 mutex_lock(&trace_types_lock); 1958 mutex_lock(&trace_types_lock);
1959 for_each_tracing_cpu(cpu) {
1960 if (iter->buffer_iter[cpu])
1961 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1962 }
1963
1931 if (iter->trace && iter->trace->close) 1964 if (iter->trace && iter->trace->close)
1932 iter->trace->close(iter); 1965 iter->trace->close(iter);
1933 1966
@@ -2352,6 +2385,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2352 struct tracer *t; 2385 struct tracer *t;
2353 char buf[max_tracer_type_len+1]; 2386 char buf[max_tracer_type_len+1];
2354 int i; 2387 int i;
2388 size_t ret;
2389
2390 ret = cnt;
2355 2391
2356 if (cnt > max_tracer_type_len) 2392 if (cnt > max_tracer_type_len)
2357 cnt = max_tracer_type_len; 2393 cnt = max_tracer_type_len;
@@ -2370,7 +2406,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2370 if (strcmp(t->name, buf) == 0) 2406 if (strcmp(t->name, buf) == 0)
2371 break; 2407 break;
2372 } 2408 }
2373 if (!t || t == current_trace) 2409 if (!t) {
2410 ret = -EINVAL;
2411 goto out;
2412 }
2413 if (t == current_trace)
2374 goto out; 2414 goto out;
2375 2415
2376 if (current_trace && current_trace->reset) 2416 if (current_trace && current_trace->reset)
@@ -2383,9 +2423,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 out: 2423 out:
2384 mutex_unlock(&trace_types_lock); 2424 mutex_unlock(&trace_types_lock);
2385 2425
2386 filp->f_pos += cnt; 2426 if (ret > 0)
2427 filp->f_pos += ret;
2387 2428
2388 return cnt; 2429 return ret;
2389} 2430}
2390 2431
2391static ssize_t 2432static ssize_t
@@ -2500,20 +2541,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2500 size_t cnt, loff_t *ppos) 2541 size_t cnt, loff_t *ppos)
2501{ 2542{
2502 struct trace_iterator *iter = filp->private_data; 2543 struct trace_iterator *iter = filp->private_data;
2503 struct trace_array_cpu *data;
2504 static cpumask_t mask;
2505 unsigned long flags;
2506#ifdef CONFIG_FTRACE
2507 int ftrace_save;
2508#endif
2509 int cpu;
2510 ssize_t sret; 2544 ssize_t sret;
2511 2545
2512 /* return any leftover data */ 2546 /* return any leftover data */
2513 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2547 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2514 if (sret != -EBUSY) 2548 if (sret != -EBUSY)
2515 return sret; 2549 return sret;
2516 sret = 0;
2517 2550
2518 trace_seq_reset(&iter->seq); 2551 trace_seq_reset(&iter->seq);
2519 2552
@@ -2524,6 +2557,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2524 goto out; 2557 goto out;
2525 } 2558 }
2526 2559
2560waitagain:
2561 sret = 0;
2527 while (trace_empty(iter)) { 2562 while (trace_empty(iter)) {
2528 2563
2529 if ((filp->f_flags & O_NONBLOCK)) { 2564 if ((filp->f_flags & O_NONBLOCK)) {
@@ -2588,46 +2623,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2588 offsetof(struct trace_iterator, seq)); 2623 offsetof(struct trace_iterator, seq));
2589 iter->pos = -1; 2624 iter->pos = -1;
2590 2625
2591 /*
2592 * We need to stop all tracing on all CPUS to read the
2593 * the next buffer. This is a bit expensive, but is
2594 * not done often. We fill all what we can read,
2595 * and then release the locks again.
2596 */
2597
2598 cpus_clear(mask);
2599 local_irq_save(flags);
2600#ifdef CONFIG_FTRACE
2601 ftrace_save = ftrace_enabled;
2602 ftrace_enabled = 0;
2603#endif
2604 smp_wmb();
2605 for_each_tracing_cpu(cpu) {
2606 data = iter->tr->data[cpu];
2607
2608 if (!head_page(data) || !data->trace_idx)
2609 continue;
2610
2611 atomic_inc(&data->disabled);
2612 cpu_set(cpu, mask);
2613 }
2614
2615 for_each_cpu_mask(cpu, mask) {
2616 data = iter->tr->data[cpu];
2617 __raw_spin_lock(&data->lock);
2618
2619 if (data->overrun > iter->last_overrun[cpu])
2620 iter->overrun[cpu] +=
2621 data->overrun - iter->last_overrun[cpu];
2622 iter->last_overrun[cpu] = data->overrun;
2623 }
2624
2625 while (find_next_entry_inc(iter) != NULL) { 2626 while (find_next_entry_inc(iter) != NULL) {
2626 int ret; 2627 enum print_line_t ret;
2627 int len = iter->seq.len; 2628 int len = iter->seq.len;
2628 2629
2629 ret = print_trace_line(iter); 2630 ret = print_trace_line(iter);
2630 if (!ret) { 2631 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2631 /* don't print partial lines */ 2632 /* don't print partial lines */
2632 iter->seq.len = len; 2633 iter->seq.len = len;
2633 break; 2634 break;
@@ -2639,26 +2640,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2639 break; 2640 break;
2640 } 2641 }
2641 2642
2642 for_each_cpu_mask(cpu, mask) {
2643 data = iter->tr->data[cpu];
2644 __raw_spin_unlock(&data->lock);
2645 }
2646
2647 for_each_cpu_mask(cpu, mask) {
2648 data = iter->tr->data[cpu];
2649 atomic_dec(&data->disabled);
2650 }
2651#ifdef CONFIG_FTRACE
2652 ftrace_enabled = ftrace_save;
2653#endif
2654 local_irq_restore(flags);
2655
2656 /* Now copy what we have to the user */ 2643 /* Now copy what we have to the user */
2657 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2644 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2658 if (iter->seq.readpos >= iter->seq.len) 2645 if (iter->seq.readpos >= iter->seq.len)
2659 trace_seq_reset(&iter->seq); 2646 trace_seq_reset(&iter->seq);
2647
2648 /*
2649 * If there was nothing to send to user, inspite of consuming trace
2650 * entries, go back to wait for more entries.
2651 */
2660 if (sret == -EBUSY) 2652 if (sret == -EBUSY)
2661 sret = 0; 2653 goto waitagain;
2662 2654
2663out: 2655out:
2664 mutex_unlock(&trace_types_lock); 2656 mutex_unlock(&trace_types_lock);
@@ -2684,7 +2676,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2684{ 2676{
2685 unsigned long val; 2677 unsigned long val;
2686 char buf[64]; 2678 char buf[64];
2687 int i, ret; 2679 int ret;
2680 struct trace_array *tr = filp->private_data;
2688 2681
2689 if (cnt >= sizeof(buf)) 2682 if (cnt >= sizeof(buf))
2690 return -EINVAL; 2683 return -EINVAL;
@@ -2704,59 +2697,38 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 2697
2705 mutex_lock(&trace_types_lock); 2698 mutex_lock(&trace_types_lock);
2706 2699
2707 if (current_trace != &no_tracer) { 2700 if (tr->ctrl) {
2708 cnt = -EBUSY; 2701 cnt = -EBUSY;
2709 pr_info("ftrace: set current_tracer to none" 2702 pr_info("ftrace: please disable tracing"
2710 " before modifying buffer size\n"); 2703 " before modifying buffer size\n");
2711 goto out; 2704 goto out;
2712 } 2705 }
2713 2706
2714 if (val > global_trace.entries) { 2707 if (val != global_trace.entries) {
2715 long pages_requested; 2708 ret = ring_buffer_resize(global_trace.buffer, val);
2716 unsigned long freeable_pages; 2709 if (ret < 0) {
2717 2710 cnt = ret;
2718 /* make sure we have enough memory before mapping */
2719 pages_requested =
2720 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2721
2722 /* account for each buffer (and max_tr) */
2723 pages_requested *= tracing_nr_buffers * 2;
2724
2725 /* Check for overflow */
2726 if (pages_requested < 0) {
2727 cnt = -ENOMEM;
2728 goto out; 2711 goto out;
2729 } 2712 }
2730 2713
2731 freeable_pages = determine_dirtyable_memory(); 2714 ret = ring_buffer_resize(max_tr.buffer, val);
2732 2715 if (ret < 0) {
2733 /* we only allow to request 1/4 of useable memory */ 2716 int r;
2734 if (pages_requested > 2717 cnt = ret;
2735 ((freeable_pages + tracing_pages_allocated) / 4)) { 2718 r = ring_buffer_resize(global_trace.buffer,
2736 cnt = -ENOMEM; 2719 global_trace.entries);
2737 goto out; 2720 if (r < 0) {
2738 } 2721 /* AARGH! We are left with different
2739 2722 * size max buffer!!!! */
2740 while (global_trace.entries < val) { 2723 WARN_ON(1);
2741 if (trace_alloc_page()) { 2724 tracing_disabled = 1;
2742 cnt = -ENOMEM;
2743 goto out;
2744 } 2725 }
2745 /* double check that we don't go over the known pages */ 2726 goto out;
2746 if (tracing_pages_allocated > pages_requested)
2747 break;
2748 } 2727 }
2749 2728
2750 } else { 2729 global_trace.entries = val;
2751 /* include the number of entries in val (inc of page entries) */
2752 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2753 trace_free_page();
2754 } 2730 }
2755 2731
2756 /* check integrity */
2757 for_each_tracing_cpu(i)
2758 check_pages(global_trace.data[i]);
2759
2760 filp->f_pos += cnt; 2732 filp->f_pos += cnt;
2761 2733
2762 /* If check pages failed, return ENOMEM */ 2734 /* If check pages failed, return ENOMEM */
@@ -2769,6 +2741,52 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2769 return cnt; 2741 return cnt;
2770} 2742}
2771 2743
2744static int mark_printk(const char *fmt, ...)
2745{
2746 int ret;
2747 va_list args;
2748 va_start(args, fmt);
2749 ret = trace_vprintk(0, fmt, args);
2750 va_end(args);
2751 return ret;
2752}
2753
2754static ssize_t
2755tracing_mark_write(struct file *filp, const char __user *ubuf,
2756 size_t cnt, loff_t *fpos)
2757{
2758 char *buf;
2759 char *end;
2760 struct trace_array *tr = &global_trace;
2761
2762 if (!tr->ctrl || tracing_disabled)
2763 return -EINVAL;
2764
2765 if (cnt > TRACE_BUF_SIZE)
2766 cnt = TRACE_BUF_SIZE;
2767
2768 buf = kmalloc(cnt + 1, GFP_KERNEL);
2769 if (buf == NULL)
2770 return -ENOMEM;
2771
2772 if (copy_from_user(buf, ubuf, cnt)) {
2773 kfree(buf);
2774 return -EFAULT;
2775 }
2776
2777 /* Cut from the first nil or newline. */
2778 buf[cnt] = '\0';
2779 end = strchr(buf, '\n');
2780 if (end)
2781 *end = '\0';
2782
2783 cnt = mark_printk("%s\n", buf);
2784 kfree(buf);
2785 *fpos += cnt;
2786
2787 return cnt;
2788}
2789
2772static struct file_operations tracing_max_lat_fops = { 2790static struct file_operations tracing_max_lat_fops = {
2773 .open = tracing_open_generic, 2791 .open = tracing_open_generic,
2774 .read = tracing_max_lat_read, 2792 .read = tracing_max_lat_read,
@@ -2800,6 +2818,11 @@ static struct file_operations tracing_entries_fops = {
2800 .write = tracing_entries_write, 2818 .write = tracing_entries_write,
2801}; 2819};
2802 2820
2821static struct file_operations tracing_mark_fops = {
2822 .open = tracing_open_generic,
2823 .write = tracing_mark_write,
2824};
2825
2803#ifdef CONFIG_DYNAMIC_FTRACE 2826#ifdef CONFIG_DYNAMIC_FTRACE
2804 2827
2805static ssize_t 2828static ssize_t
@@ -2846,7 +2869,7 @@ struct dentry *tracing_init_dentry(void)
2846#include "trace_selftest.c" 2869#include "trace_selftest.c"
2847#endif 2870#endif
2848 2871
2849static __init void tracer_init_debugfs(void) 2872static __init int tracer_init_debugfs(void)
2850{ 2873{
2851 struct dentry *d_tracer; 2874 struct dentry *d_tracer;
2852 struct dentry *entry; 2875 struct dentry *entry;
@@ -2881,12 +2904,12 @@ static __init void tracer_init_debugfs(void)
2881 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 2904 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2882 &global_trace, &show_traces_fops); 2905 &global_trace, &show_traces_fops);
2883 if (!entry) 2906 if (!entry)
2884 pr_warning("Could not create debugfs 'trace' entry\n"); 2907 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2885 2908
2886 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 2909 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2887 &global_trace, &set_tracer_fops); 2910 &global_trace, &set_tracer_fops);
2888 if (!entry) 2911 if (!entry)
2889 pr_warning("Could not create debugfs 'trace' entry\n"); 2912 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2890 2913
2891 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, 2914 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2892 &tracing_max_latency, 2915 &tracing_max_latency,
@@ -2899,7 +2922,7 @@ static __init void tracer_init_debugfs(void)
2899 &tracing_thresh, &tracing_max_lat_fops); 2922 &tracing_thresh, &tracing_max_lat_fops);
2900 if (!entry) 2923 if (!entry)
2901 pr_warning("Could not create debugfs " 2924 pr_warning("Could not create debugfs "
2902 "'tracing_threash' entry\n"); 2925 "'tracing_thresh' entry\n");
2903 entry = debugfs_create_file("README", 0644, d_tracer, 2926 entry = debugfs_create_file("README", 0644, d_tracer,
2904 NULL, &tracing_readme_fops); 2927 NULL, &tracing_readme_fops);
2905 if (!entry) 2928 if (!entry)
@@ -2909,13 +2932,19 @@ static __init void tracer_init_debugfs(void)
2909 NULL, &tracing_pipe_fops); 2932 NULL, &tracing_pipe_fops);
2910 if (!entry) 2933 if (!entry)
2911 pr_warning("Could not create debugfs " 2934 pr_warning("Could not create debugfs "
2912 "'tracing_threash' entry\n"); 2935 "'trace_pipe' entry\n");
2913 2936
2914 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 2937 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2915 &global_trace, &tracing_entries_fops); 2938 &global_trace, &tracing_entries_fops);
2916 if (!entry) 2939 if (!entry)
2917 pr_warning("Could not create debugfs " 2940 pr_warning("Could not create debugfs "
2918 "'tracing_threash' entry\n"); 2941 "'trace_entries' entry\n");
2942
2943 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2944 NULL, &tracing_mark_fops);
2945 if (!entry)
2946 pr_warning("Could not create debugfs "
2947 "'trace_marker' entry\n");
2919 2948
2920#ifdef CONFIG_DYNAMIC_FTRACE 2949#ifdef CONFIG_DYNAMIC_FTRACE
2921 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 2950 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2928,230 +2957,263 @@ static __init void tracer_init_debugfs(void)
2928#ifdef CONFIG_SYSPROF_TRACER 2957#ifdef CONFIG_SYSPROF_TRACER
2929 init_tracer_sysprof_debugfs(d_tracer); 2958 init_tracer_sysprof_debugfs(d_tracer);
2930#endif 2959#endif
2960 return 0;
2931} 2961}
2932 2962
2933static int trace_alloc_page(void) 2963int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2934{ 2964{
2965 static DEFINE_SPINLOCK(trace_buf_lock);
2966 static char trace_buf[TRACE_BUF_SIZE];
2967
2968 struct ring_buffer_event *event;
2969 struct trace_array *tr = &global_trace;
2935 struct trace_array_cpu *data; 2970 struct trace_array_cpu *data;
2936 struct page *page, *tmp; 2971 struct print_entry *entry;
2937 LIST_HEAD(pages); 2972 unsigned long flags, irq_flags;
2938 void *array; 2973 int cpu, len = 0, size, pc;
2939 unsigned pages_allocated = 0;
2940 int i;
2941 2974
2942 /* first allocate a page for each CPU */ 2975 if (!tr->ctrl || tracing_disabled)
2943 for_each_tracing_cpu(i) { 2976 return 0;
2944 array = (void *)__get_free_page(GFP_KERNEL);
2945 if (array == NULL) {
2946 printk(KERN_ERR "tracer: failed to allocate page"
2947 "for trace buffer!\n");
2948 goto free_pages;
2949 }
2950 2977
2951 pages_allocated++; 2978 pc = preempt_count();
2952 page = virt_to_page(array); 2979 preempt_disable_notrace();
2953 list_add(&page->lru, &pages); 2980 cpu = raw_smp_processor_id();
2981 data = tr->data[cpu];
2954 2982
2955/* Only allocate if we are actually using the max trace */ 2983 if (unlikely(atomic_read(&data->disabled)))
2956#ifdef CONFIG_TRACER_MAX_TRACE 2984 goto out;
2957 array = (void *)__get_free_page(GFP_KERNEL);
2958 if (array == NULL) {
2959 printk(KERN_ERR "tracer: failed to allocate page"
2960 "for trace buffer!\n");
2961 goto free_pages;
2962 }
2963 pages_allocated++;
2964 page = virt_to_page(array);
2965 list_add(&page->lru, &pages);
2966#endif
2967 }
2968 2985
2969 /* Now that we successfully allocate a page per CPU, add them */ 2986 spin_lock_irqsave(&trace_buf_lock, flags);
2970 for_each_tracing_cpu(i) { 2987 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2971 data = global_trace.data[i];
2972 page = list_entry(pages.next, struct page, lru);
2973 list_del_init(&page->lru);
2974 list_add_tail(&page->lru, &data->trace_pages);
2975 ClearPageLRU(page);
2976 2988
2977#ifdef CONFIG_TRACER_MAX_TRACE 2989 len = min(len, TRACE_BUF_SIZE-1);
2978 data = max_tr.data[i]; 2990 trace_buf[len] = 0;
2979 page = list_entry(pages.next, struct page, lru);
2980 list_del_init(&page->lru);
2981 list_add_tail(&page->lru, &data->trace_pages);
2982 SetPageLRU(page);
2983#endif
2984 }
2985 tracing_pages_allocated += pages_allocated;
2986 global_trace.entries += ENTRIES_PER_PAGE;
2987 2991
2988 return 0; 2992 size = sizeof(*entry) + len + 1;
2993 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
2994 if (!event)
2995 goto out_unlock;
2996 entry = ring_buffer_event_data(event);
2997 tracing_generic_entry_update(&entry->ent, flags, pc);
2998 entry->ent.type = TRACE_PRINT;
2999 entry->ip = ip;
2989 3000
2990 free_pages: 3001 memcpy(&entry->buf, trace_buf, len);
2991 list_for_each_entry_safe(page, tmp, &pages, lru) { 3002 entry->buf[len] = 0;
2992 list_del_init(&page->lru); 3003 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
2993 __free_page(page); 3004
2994 } 3005 out_unlock:
2995 return -ENOMEM; 3006 spin_unlock_irqrestore(&trace_buf_lock, flags);
3007
3008 out:
3009 preempt_enable_notrace();
3010
3011 return len;
2996} 3012}
3013EXPORT_SYMBOL_GPL(trace_vprintk);
2997 3014
2998static int trace_free_page(void) 3015int __ftrace_printk(unsigned long ip, const char *fmt, ...)
2999{ 3016{
3000 struct trace_array_cpu *data; 3017 int ret;
3001 struct page *page; 3018 va_list ap;
3002 struct list_head *p;
3003 int i;
3004 int ret = 0;
3005 3019
3006 /* free one page from each buffer */ 3020 if (!(trace_flags & TRACE_ITER_PRINTK))
3007 for_each_tracing_cpu(i) { 3021 return 0;
3008 data = global_trace.data[i];
3009 p = data->trace_pages.next;
3010 if (p == &data->trace_pages) {
3011 /* should never happen */
3012 WARN_ON(1);
3013 tracing_disabled = 1;
3014 ret = -1;
3015 break;
3016 }
3017 page = list_entry(p, struct page, lru);
3018 ClearPageLRU(page);
3019 list_del(&page->lru);
3020 tracing_pages_allocated--;
3021 tracing_pages_allocated--;
3022 __free_page(page);
3023 3022
3024 tracing_reset(data); 3023 va_start(ap, fmt);
3024 ret = trace_vprintk(ip, fmt, ap);
3025 va_end(ap);
3026 return ret;
3027}
3028EXPORT_SYMBOL_GPL(__ftrace_printk);
3025 3029
3026#ifdef CONFIG_TRACER_MAX_TRACE 3030static int trace_panic_handler(struct notifier_block *this,
3027 data = max_tr.data[i]; 3031 unsigned long event, void *unused)
3028 p = data->trace_pages.next; 3032{
3029 if (p == &data->trace_pages) { 3033 ftrace_dump();
3030 /* should never happen */ 3034 return NOTIFY_OK;
3031 WARN_ON(1); 3035}
3032 tracing_disabled = 1;
3033 ret = -1;
3034 break;
3035 }
3036 page = list_entry(p, struct page, lru);
3037 ClearPageLRU(page);
3038 list_del(&page->lru);
3039 __free_page(page);
3040 3036
3041 tracing_reset(data); 3037static struct notifier_block trace_panic_notifier = {
3042#endif 3038 .notifier_call = trace_panic_handler,
3043 } 3039 .next = NULL,
3044 global_trace.entries -= ENTRIES_PER_PAGE; 3040 .priority = 150 /* priority: INT_MAX >= x >= 0 */
3041};
3045 3042
3046 return ret; 3043static int trace_die_handler(struct notifier_block *self,
3044 unsigned long val,
3045 void *data)
3046{
3047 switch (val) {
3048 case DIE_OOPS:
3049 ftrace_dump();
3050 break;
3051 default:
3052 break;
3053 }
3054 return NOTIFY_OK;
3047} 3055}
3048 3056
3049__init static int tracer_alloc_buffers(void) 3057static struct notifier_block trace_die_notifier = {
3058 .notifier_call = trace_die_handler,
3059 .priority = 200
3060};
3061
3062/*
3063 * printk is set to max of 1024, we really don't need it that big.
3064 * Nothing should be printing 1000 characters anyway.
3065 */
3066#define TRACE_MAX_PRINT 1000
3067
3068/*
3069 * Define here KERN_TRACE so that we have one place to modify
3070 * it if we decide to change what log level the ftrace dump
3071 * should be at.
3072 */
3073#define KERN_TRACE KERN_INFO
3074
3075static void
3076trace_printk_seq(struct trace_seq *s)
3050{ 3077{
3051 struct trace_array_cpu *data; 3078 /* Probably should print a warning here. */
3052 void *array; 3079 if (s->len >= 1000)
3053 struct page *page; 3080 s->len = 1000;
3054 int pages = 0;
3055 int ret = -ENOMEM;
3056 int i;
3057 3081
3058 /* TODO: make the number of buffers hot pluggable with CPUS */ 3082 /* should be zero ended, but we are paranoid. */
3059 tracing_nr_buffers = num_possible_cpus(); 3083 s->buffer[s->len] = 0;
3060 tracing_buffer_mask = cpu_possible_map;
3061 3084
3062 /* Allocate the first page for all buffers */ 3085 printk(KERN_TRACE "%s", s->buffer);
3063 for_each_tracing_cpu(i) {
3064 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3065 max_tr.data[i] = &per_cpu(max_data, i);
3066 3086
3067 array = (void *)__get_free_page(GFP_KERNEL); 3087 trace_seq_reset(s);
3068 if (array == NULL) { 3088}
3069 printk(KERN_ERR "tracer: failed to allocate page" 3089
3070 "for trace buffer!\n"); 3090
3071 goto free_buffers; 3091void ftrace_dump(void)
3072 } 3092{
3093 static DEFINE_SPINLOCK(ftrace_dump_lock);
3094 /* use static because iter can be a bit big for the stack */
3095 static struct trace_iterator iter;
3096 static cpumask_t mask;
3097 static int dump_ran;
3098 unsigned long flags;
3099 int cnt = 0, cpu;
3073 3100
3074 /* set the array to the list */ 3101 /* only one dump */
3075 INIT_LIST_HEAD(&data->trace_pages); 3102 spin_lock_irqsave(&ftrace_dump_lock, flags);
3076 page = virt_to_page(array); 3103 if (dump_ran)
3077 list_add(&page->lru, &data->trace_pages); 3104 goto out;
3078 /* use the LRU flag to differentiate the two buffers */
3079 ClearPageLRU(page);
3080 3105
3081 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 3106 dump_ran = 1;
3082 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3083 3107
3084/* Only allocate if we are actually using the max trace */ 3108 /* No turning back! */
3085#ifdef CONFIG_TRACER_MAX_TRACE 3109 ftrace_kill();
3086 array = (void *)__get_free_page(GFP_KERNEL);
3087 if (array == NULL) {
3088 printk(KERN_ERR "tracer: failed to allocate page"
3089 "for trace buffer!\n");
3090 goto free_buffers;
3091 }
3092 3110
3093 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); 3111 for_each_tracing_cpu(cpu) {
3094 page = virt_to_page(array); 3112 atomic_inc(&global_trace.data[cpu]->disabled);
3095 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3096 SetPageLRU(page);
3097#endif
3098 } 3113 }
3099 3114
3115 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3116
3117 iter.tr = &global_trace;
3118 iter.trace = current_trace;
3119
3100 /* 3120 /*
3101 * Since we allocate by orders of pages, we may be able to 3121 * We need to stop all tracing on all CPUS to read the
3102 * round up a bit. 3122 * the next buffer. This is a bit expensive, but is
3123 * not done often. We fill all what we can read,
3124 * and then release the locks again.
3103 */ 3125 */
3104 global_trace.entries = ENTRIES_PER_PAGE;
3105 pages++;
3106 3126
3107 while (global_trace.entries < trace_nr_entries) { 3127 cpus_clear(mask);
3108 if (trace_alloc_page()) 3128
3109 break; 3129 while (!trace_empty(&iter)) {
3110 pages++; 3130
3131 if (!cnt)
3132 printk(KERN_TRACE "---------------------------------\n");
3133
3134 cnt++;
3135
3136 /* reset all but tr, trace, and overruns */
3137 memset(&iter.seq, 0,
3138 sizeof(struct trace_iterator) -
3139 offsetof(struct trace_iterator, seq));
3140 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3141 iter.pos = -1;
3142
3143 if (find_next_entry_inc(&iter) != NULL) {
3144 print_trace_line(&iter);
3145 trace_consume(&iter);
3146 }
3147
3148 trace_printk_seq(&iter.seq);
3111 } 3149 }
3112 max_tr.entries = global_trace.entries;
3113 3150
3114 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", 3151 if (!cnt)
3115 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); 3152 printk(KERN_TRACE " (ftrace buffer empty)\n");
3116 pr_info(" actual entries %ld\n", global_trace.entries); 3153 else
3154 printk(KERN_TRACE "---------------------------------\n");
3117 3155
3118 tracer_init_debugfs(); 3156 out:
3157 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3158}
3159
3160__init static int tracer_alloc_buffers(void)
3161{
3162 struct trace_array_cpu *data;
3163 int i;
3164
3165 /* TODO: make the number of buffers hot pluggable with CPUS */
3166 tracing_buffer_mask = cpu_possible_map;
3167
3168 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3169 TRACE_BUFFER_FLAGS);
3170 if (!global_trace.buffer) {
3171 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3172 WARN_ON(1);
3173 return 0;
3174 }
3175 global_trace.entries = ring_buffer_size(global_trace.buffer);
3176
3177#ifdef CONFIG_TRACER_MAX_TRACE
3178 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3179 TRACE_BUFFER_FLAGS);
3180 if (!max_tr.buffer) {
3181 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3182 WARN_ON(1);
3183 ring_buffer_free(global_trace.buffer);
3184 return 0;
3185 }
3186 max_tr.entries = ring_buffer_size(max_tr.buffer);
3187 WARN_ON(max_tr.entries != global_trace.entries);
3188#endif
3189
3190 /* Allocate the first page for all buffers */
3191 for_each_tracing_cpu(i) {
3192 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3193 max_tr.data[i] = &per_cpu(max_data, i);
3194 }
3119 3195
3120 trace_init_cmdlines(); 3196 trace_init_cmdlines();
3121 3197
3122 register_tracer(&no_tracer); 3198 register_tracer(&nop_trace);
3123 current_trace = &no_tracer; 3199#ifdef CONFIG_BOOT_TRACER
3200 register_tracer(&boot_tracer);
3201 current_trace = &boot_tracer;
3202 current_trace->init(&global_trace);
3203#else
3204 current_trace = &nop_trace;
3205#endif
3124 3206
3125 /* All seems OK, enable tracing */ 3207 /* All seems OK, enable tracing */
3126 global_trace.ctrl = tracer_enabled; 3208 global_trace.ctrl = tracer_enabled;
3127 tracing_disabled = 0; 3209 tracing_disabled = 0;
3128 3210
3129 return 0; 3211 atomic_notifier_chain_register(&panic_notifier_list,
3212 &trace_panic_notifier);
3130 3213
3131 free_buffers: 3214 register_die_notifier(&trace_die_notifier);
3132 for (i-- ; i >= 0; i--) {
3133 struct page *page, *tmp;
3134 struct trace_array_cpu *data = global_trace.data[i];
3135 3215
3136 if (data) { 3216 return 0;
3137 list_for_each_entry_safe(page, tmp,
3138 &data->trace_pages, lru) {
3139 list_del_init(&page->lru);
3140 __free_page(page);
3141 }
3142 }
3143
3144#ifdef CONFIG_TRACER_MAX_TRACE
3145 data = max_tr.data[i];
3146 if (data) {
3147 list_for_each_entry_safe(page, tmp,
3148 &data->trace_pages, lru) {
3149 list_del_init(&page->lru);
3150 __free_page(page);
3151 }
3152 }
3153#endif
3154 }
3155 return ret;
3156} 3217}
3157fs_initcall(tracer_alloc_buffers); 3218early_initcall(tracer_alloc_buffers);
3219fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2b..8465ad052707 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,9 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h>
8#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h>
9 11
10enum trace_type { 12enum trace_type {
11 __TRACE_FIRST_TYPE = 0, 13 __TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
13 TRACE_FN, 15 TRACE_FN,
14 TRACE_CTX, 16 TRACE_CTX,
15 TRACE_WAKE, 17 TRACE_WAKE,
18 TRACE_CONT,
16 TRACE_STACK, 19 TRACE_STACK,
20 TRACE_PRINT,
17 TRACE_SPECIAL, 21 TRACE_SPECIAL,
18 TRACE_MMIO_RW, 22 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP, 23 TRACE_MMIO_MAP,
24 TRACE_BOOT,
20 25
21 __TRACE_LAST_TYPE 26 __TRACE_LAST_TYPE
22}; 27};
23 28
24/* 29/*
30 * The trace entry - the most basic unit of tracing. This is what
31 * is printed in the end as a single line in the trace output, such as:
32 *
33 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
34 */
35struct trace_entry {
36 unsigned char type;
37 unsigned char cpu;
38 unsigned char flags;
39 unsigned char preempt_count;
40 int pid;
41};
42
43/*
25 * Function trace entry - function address and parent function addres: 44 * Function trace entry - function address and parent function addres:
26 */ 45 */
27struct ftrace_entry { 46struct ftrace_entry {
47 struct trace_entry ent;
28 unsigned long ip; 48 unsigned long ip;
29 unsigned long parent_ip; 49 unsigned long parent_ip;
30}; 50};
51extern struct tracer boot_tracer;
31 52
32/* 53/*
33 * Context switch trace entry - which task (and prio) we switched from/to: 54 * Context switch trace entry - which task (and prio) we switched from/to:
34 */ 55 */
35struct ctx_switch_entry { 56struct ctx_switch_entry {
57 struct trace_entry ent;
36 unsigned int prev_pid; 58 unsigned int prev_pid;
37 unsigned char prev_prio; 59 unsigned char prev_prio;
38 unsigned char prev_state; 60 unsigned char prev_state;
39 unsigned int next_pid; 61 unsigned int next_pid;
40 unsigned char next_prio; 62 unsigned char next_prio;
41 unsigned char next_state; 63 unsigned char next_state;
64 unsigned int next_cpu;
42}; 65};
43 66
44/* 67/*
45 * Special (free-form) trace entry: 68 * Special (free-form) trace entry:
46 */ 69 */
47struct special_entry { 70struct special_entry {
71 struct trace_entry ent;
48 unsigned long arg1; 72 unsigned long arg1;
49 unsigned long arg2; 73 unsigned long arg2;
50 unsigned long arg3; 74 unsigned long arg3;
@@ -57,33 +81,62 @@ struct special_entry {
57#define FTRACE_STACK_ENTRIES 8 81#define FTRACE_STACK_ENTRIES 8
58 82
59struct stack_entry { 83struct stack_entry {
84 struct trace_entry ent;
60 unsigned long caller[FTRACE_STACK_ENTRIES]; 85 unsigned long caller[FTRACE_STACK_ENTRIES];
61}; 86};
62 87
63/* 88/*
64 * The trace entry - the most basic unit of tracing. This is what 89 * ftrace_printk entry:
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */ 90 */
69struct trace_entry { 91struct print_entry {
70 char type; 92 struct trace_entry ent;
71 char cpu; 93 unsigned long ip;
72 char flags; 94 char buf[];
73 char preempt_count; 95};
74 int pid; 96
75 cycle_t t; 97#define TRACE_OLD_SIZE 88
76 union { 98
77 struct ftrace_entry fn; 99struct trace_field_cont {
78 struct ctx_switch_entry ctx; 100 unsigned char type;
79 struct special_entry special; 101 /* Temporary till we get rid of this completely */
80 struct stack_entry stack; 102 char buf[TRACE_OLD_SIZE - 1];
81 struct mmiotrace_rw mmiorw; 103};
82 struct mmiotrace_map mmiomap; 104
83 }; 105struct trace_mmiotrace_rw {
106 struct trace_entry ent;
107 struct mmiotrace_rw rw;
108};
109
110struct trace_mmiotrace_map {
111 struct trace_entry ent;
112 struct mmiotrace_map map;
113};
114
115struct trace_boot {
116 struct trace_entry ent;
117 struct boot_trace initcall;
118};
119
120/*
121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled
124 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
125 * NEED_RESCED - reschedule is requested
126 * HARDIRQ - inside an interrupt handler
127 * SOFTIRQ - inside a softirq handler
128 * CONT - multiple entries hold the trace item
129 */
130enum trace_flag_type {
131 TRACE_FLAG_IRQS_OFF = 0x01,
132 TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
133 TRACE_FLAG_NEED_RESCHED = 0x04,
134 TRACE_FLAG_HARDIRQ = 0x08,
135 TRACE_FLAG_SOFTIRQ = 0x10,
136 TRACE_FLAG_CONT = 0x20,
84}; 137};
85 138
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) 139#define TRACE_BUF_SIZE 1024
87 140
88/* 141/*
89 * The CPU trace array - it consists of thousands of trace entries 142 * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +144,9 @@ struct trace_entry {
91 * the trace, etc.) 144 * the trace, etc.)
92 */ 145 */
93struct trace_array_cpu { 146struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled; 147 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98 148
99 /* these fields get copied into max-trace: */ 149 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx; 150 unsigned long trace_idx;
105 unsigned long overrun; 151 unsigned long overrun;
106 unsigned long saved_latency; 152 unsigned long saved_latency;
@@ -124,6 +170,7 @@ struct trace_iterator;
124 * They have on/off state as well: 170 * They have on/off state as well:
125 */ 171 */
126struct trace_array { 172struct trace_array {
173 struct ring_buffer *buffer;
127 unsigned long entries; 174 unsigned long entries;
128 long ctrl; 175 long ctrl;
129 int cpu; 176 int cpu;
@@ -132,6 +179,56 @@ struct trace_array {
132 struct trace_array_cpu *data[NR_CPUS]; 179 struct trace_array_cpu *data[NR_CPUS];
133}; 180};
134 181
182#define FTRACE_CMP_TYPE(var, type) \
183 __builtin_types_compatible_p(typeof(var), type *)
184
185#undef IF_ASSIGN
186#define IF_ASSIGN(var, entry, etype, id) \
187 if (FTRACE_CMP_TYPE(var, etype)) { \
188 var = (typeof(var))(entry); \
189 WARN_ON(id && (entry)->type != id); \
190 break; \
191 }
192
193/* Will cause compile errors if type is not found. */
194extern void __ftrace_bad_type(void);
195
196/*
197 * The trace_assign_type is a verifier that the entry type is
198 * the same as the type being assigned. To add new types simply
199 * add a line with the following format:
200 *
201 * IF_ASSIGN(var, ent, type, id);
202 *
203 * Where "type" is the trace type that includes the trace_entry
204 * as the "ent" item. And "id" is the trace identifier that is
205 * used in the trace_type enum.
206 *
207 * If the type can have more than one id, then use zero.
208 */
209#define trace_assign_type(var, ent) \
210 do { \
211 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
222 __ftrace_bad_type(); \
223 } while (0)
224
225/* Return values for print_line callback */
226enum print_line_t {
227 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
228 TRACE_TYPE_HANDLED = 1,
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230};
231
135/* 232/*
136 * A specific tracer, represented by methods that operate on a trace array: 233 * A specific tracer, represented by methods that operate on a trace array:
137 */ 234 */
@@ -152,7 +249,7 @@ struct tracer {
152 int (*selftest)(struct tracer *trace, 249 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr); 250 struct trace_array *tr);
154#endif 251#endif
155 int (*print_line)(struct trace_iterator *iter); 252 enum print_line_t (*print_line)(struct trace_iterator *iter);
156 struct tracer *next; 253 struct tracer *next;
157 int print_max; 254 int print_max;
158}; 255};
@@ -171,57 +268,58 @@ struct trace_iterator {
171 struct trace_array *tr; 268 struct trace_array *tr;
172 struct tracer *trace; 269 struct tracer *trace;
173 void *private; 270 void *private;
174 long last_overrun[NR_CPUS]; 271 struct ring_buffer_iter *buffer_iter[NR_CPUS];
175 long overrun[NR_CPUS];
176 272
177 /* The below is zeroed out in pipe_read */ 273 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq; 274 struct trace_seq seq;
179 struct trace_entry *ent; 275 struct trace_entry *ent;
180 int cpu; 276 int cpu;
181 277 u64 ts;
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184 278
185 unsigned long iter_flags; 279 unsigned long iter_flags;
186 loff_t pos; 280 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx; 281 long idx;
191}; 282};
192 283
193void tracing_reset(struct trace_array_cpu *data); 284void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu);
194int tracing_open_generic(struct inode *inode, struct file *filp); 286int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void); 287struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 288void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197 289
290struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
291 struct trace_array_cpu *data);
292void tracing_generic_entry_update(struct trace_entry *entry,
293 unsigned long flags,
294 int pc);
295
198void ftrace(struct trace_array *tr, 296void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data, 297 struct trace_array_cpu *data,
200 unsigned long ip, 298 unsigned long ip,
201 unsigned long parent_ip, 299 unsigned long parent_ip,
202 unsigned long flags); 300 unsigned long flags, int pc);
203void tracing_sched_switch_trace(struct trace_array *tr, 301void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data, 302 struct trace_array_cpu *data,
205 struct task_struct *prev, 303 struct task_struct *prev,
206 struct task_struct *next, 304 struct task_struct *next,
207 unsigned long flags); 305 unsigned long flags, int pc);
208void tracing_record_cmdline(struct task_struct *tsk); 306void tracing_record_cmdline(struct task_struct *tsk);
209 307
210void tracing_sched_wakeup_trace(struct trace_array *tr, 308void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data, 309 struct trace_array_cpu *data,
212 struct task_struct *wakee, 310 struct task_struct *wakee,
213 struct task_struct *cur, 311 struct task_struct *cur,
214 unsigned long flags); 312 unsigned long flags, int pc);
215void trace_special(struct trace_array *tr, 313void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data, 314 struct trace_array_cpu *data,
217 unsigned long arg1, 315 unsigned long arg1,
218 unsigned long arg2, 316 unsigned long arg2,
219 unsigned long arg3); 317 unsigned long arg3, int pc);
220void trace_function(struct trace_array *tr, 318void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data, 319 struct trace_array_cpu *data,
222 unsigned long ip, 320 unsigned long ip,
223 unsigned long parent_ip, 321 unsigned long parent_ip,
224 unsigned long flags); 322 unsigned long flags, int pc);
225 323
226void tracing_start_cmdline_record(void); 324void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void); 325void tracing_stop_cmdline_record(void);
@@ -239,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
239 337
240extern cycle_t ftrace_now(int cpu); 338extern cycle_t ftrace_now(int cpu);
241 339
242#ifdef CONFIG_FTRACE 340#ifdef CONFIG_FUNCTION_TRACER
243void tracing_start_function_trace(void); 341void tracing_start_function_trace(void);
244void tracing_stop_function_trace(void); 342void tracing_stop_function_trace(void);
245#else 343#else
@@ -268,51 +366,33 @@ extern unsigned long ftrace_update_tot_cnt;
268extern int DYN_FTRACE_TEST_NAME(void); 366extern int DYN_FTRACE_TEST_NAME(void);
269#endif 367#endif
270 368
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST 369#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace, 370extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr); 371 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace, 372extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr); 373 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace, 374extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr); 375 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, 376extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr); 377 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace, 378extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr); 379 struct trace_array *tr);
300#endif 380extern int trace_selftest_startup_nop(struct tracer *trace,
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER 381 struct trace_array *tr);
302extern int trace_selftest_startup_sched_switch(struct tracer *trace, 382extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr); 383 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace, 384extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr); 385 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */ 386#endif /* CONFIG_FTRACE_STARTUP_TEST */
310 387
311extern void *head_page(struct trace_array_cpu *data); 388extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt); 393 size_t cnt);
315extern long ns2usecs(cycle_t nsec); 394extern long ns2usecs(cycle_t nsec);
395extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
316 396
317extern unsigned long trace_flags; 397extern unsigned long trace_flags;
318 398
@@ -334,6 +414,9 @@ enum trace_iterator_flags {
334 TRACE_ITER_BLOCK = 0x80, 414 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100, 415 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200, 416 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400,
337}; 418};
338 419
420extern struct tracer nop_trace;
421
339#endif /* _LINUX_KERNEL_TRACE_H */ 422#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 000000000000..d0a5e50eeff2
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12
13#include "trace.h"
14
15static struct trace_array *boot_trace;
16static int trace_boot_enabled;
17
18
19/* Should be started after do_pre_smp_initcalls() in init/main.c */
20void start_boot_trace(void)
21{
22 trace_boot_enabled = 1;
23}
24
25void stop_boot_trace(void)
26{
27 trace_boot_enabled = 0;
28}
29
30void reset_boot_trace(struct trace_array *tr)
31{
32 stop_boot_trace();
33}
34
35static void boot_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 boot_trace = tr;
39
40 trace_boot_enabled = 0;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44}
45
46static void boot_trace_ctrl_update(struct trace_array *tr)
47{
48 if (tr->ctrl)
49 start_boot_trace();
50 else
51 stop_boot_trace();
52}
53
54static enum print_line_t initcall_print_line(struct trace_iterator *iter)
55{
56 int ret;
57 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime);
62 struct timespec rettime = ktime_to_timespec(it->rettime);
63
64 if (entry->type == TRACE_BOOT) {
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
66 calltime.tv_sec,
67 calltime.tv_nsec,
68 it->func, it->caller);
69 if (!ret)
70 return TRACE_TYPE_PARTIAL_LINE;
71
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n",
74 rettime.tv_sec,
75 rettime.tv_nsec,
76 it->func, it->result, it->duration);
77
78 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE;
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85struct tracer boot_tracer __read_mostly =
86{
87 .name = "initcall",
88 .init = boot_trace_init,
89 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line,
92};
93
94void trace_boot(struct boot_trace *it, initcall_t fn)
95{
96 struct ring_buffer_event *event;
97 struct trace_boot *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace;
101
102 if (!trace_boot_enabled)
103 return;
104
105 /* Get its name now since this function could
106 * disappear because it is in the .init section.
107 */
108 sprint_symbol(it->func, (unsigned long)fn);
109 preempt_disable();
110 data = tr->data[smp_processor_id()];
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags);
114 if (!event)
115 goto out;
116 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT;
119 entry->initcall = *it;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121
122 trace_wake_up();
123
124 out:
125 preempt_enable();
126}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 312144897970..0f85a64003d3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
23 tr->time_start = ftrace_now(tr->cpu); 23 tr->time_start = ftrace_now(tr->cpu);
24 24
25 for_each_online_cpu(cpu) 25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]); 26 tracing_reset(tr, cpu);
27} 27}
28 28
29static void start_function_trace(struct trace_array *tr) 29static void start_function_trace(struct trace_array *tr)
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
64 64
65static struct tracer function_trace __read_mostly = 65static struct tracer function_trace __read_mostly =
66{ 66{
67 .name = "ftrace", 67 .name = "function",
68 .init = function_trace_init, 68 .init = function_trace_init,
69 .reset = function_trace_reset, 69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 70 .ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ece6cfb649fa..9c74071c10e0 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags); 98 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
130 unsigned long latency, t0, t1; 130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta; 131 cycle_t T0, T1, delta;
132 unsigned long flags; 132 unsigned long flags;
133 int pc;
133 134
134 /* 135 /*
135 * usecs conversion is slow so we try to delay the conversion 136 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
141 142
142 local_save_flags(flags); 143 local_save_flags(flags);
143 144
145 pc = preempt_count();
146
144 if (!report_latency(delta)) 147 if (!report_latency(delta))
145 goto out; 148 goto out;
146 149
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 153 if (!report_latency(delta))
151 goto out_unlock; 154 goto out_unlock;
152 155
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
154 157
155 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
156 159
@@ -173,8 +176,8 @@ out_unlock:
173out: 176out:
174 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data); 179 tracing_reset(tr, cpu);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
178} 181}
179 182
180static inline void 183static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
203 data->critical_sequence = max_sequence; 206 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu); 207 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip; 208 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data); 209 tracing_reset(tr, cpu);
207 210
208 local_save_flags(flags); 211 local_save_flags(flags);
209 212
210 trace_function(tr, data, ip, parent_ip, flags); 213 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
211 214
212 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
213 216
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
234 237
235 data = tr->data[cpu]; 238 data = tr->data[cpu];
236 239
237 if (unlikely(!data) || unlikely(!head_page(data)) || 240 if (unlikely(!data) ||
238 !data->critical_start || atomic_read(&data->disabled)) 241 !data->critical_start || atomic_read(&data->disabled))
239 return; 242 return;
240 243
241 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
242 245
243 local_save_flags(flags); 246 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags); 247 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0; 249 data->critical_start = 0;
247 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb4..f28484618ff0 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
27 tr->time_start = ftrace_now(tr->cpu); 27 tr->time_start = ftrace_now(tr->cpu);
28 28
29 for_each_online_cpu(cpu) 29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]); 30 tracing_reset(tr, cpu);
31} 31}
32 32
33static void mmio_trace_init(struct trace_array *tr) 33static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
130{ 130{
131 int cpu; 131 int cpu;
132 unsigned long cnt = 0; 132 unsigned long cnt = 0;
133/* FIXME: */
134#if 0
133 for_each_online_cpu(cpu) { 135 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu]; 136 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0; 137 iter->overrun[cpu] = 0;
136 } 138 }
139#endif
140 (void)cpu;
137 return cnt; 141 return cnt;
138} 142}
139 143
@@ -171,17 +175,21 @@ print_out:
171 return (ret == -EBUSY) ? 0 : ret; 175 return (ret == -EBUSY) ? 0 : ret;
172} 176}
173 177
174static int mmio_print_rw(struct trace_iterator *iter) 178static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
175{ 179{
176 struct trace_entry *entry = iter->ent; 180 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw; 181 struct trace_mmiotrace_rw *field;
182 struct mmiotrace_rw *rw;
178 struct trace_seq *s = &iter->seq; 183 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t); 184 unsigned long long t = ns2usecs(iter->ts);
180 unsigned long usec_rem = do_div(t, 1000000ULL); 185 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t; 186 unsigned secs = (unsigned long)t;
182 int ret = 1; 187 int ret = 1;
183 188
184 switch (entry->mmiorw.opcode) { 189 trace_assign_type(field, entry);
190 rw = &field->rw;
191
192 switch (rw->opcode) {
185 case MMIO_READ: 193 case MMIO_READ:
186 ret = trace_seq_printf(s, 194 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 195 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
209 break; 217 break;
210 } 218 }
211 if (ret) 219 if (ret)
212 return 1; 220 return TRACE_TYPE_HANDLED;
213 return 0; 221 return TRACE_TYPE_PARTIAL_LINE;
214} 222}
215 223
216static int mmio_print_map(struct trace_iterator *iter) 224static enum print_line_t mmio_print_map(struct trace_iterator *iter)
217{ 225{
218 struct trace_entry *entry = iter->ent; 226 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap; 227 struct trace_mmiotrace_map *field;
228 struct mmiotrace_map *m;
220 struct trace_seq *s = &iter->seq; 229 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t); 230 unsigned long long t = ns2usecs(iter->ts);
222 unsigned long usec_rem = do_div(t, 1000000ULL); 231 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t; 232 unsigned secs = (unsigned long)t;
224 int ret = 1; 233 int ret;
225 234
226 switch (entry->mmiorw.opcode) { 235 trace_assign_type(field, entry);
236 m = &field->map;
237
238 switch (m->opcode) {
227 case MMIO_PROBE: 239 case MMIO_PROBE:
228 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 241 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
241 break; 253 break;
242 } 254 }
243 if (ret) 255 if (ret)
244 return 1; 256 return TRACE_TYPE_HANDLED;
245 return 0; 257 return TRACE_TYPE_PARTIAL_LINE;
258}
259
260static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
261{
262 struct trace_entry *entry = iter->ent;
263 struct print_entry *print = (struct print_entry *)entry;
264 const char *msg = print->buf;
265 struct trace_seq *s = &iter->seq;
266 unsigned long long t = ns2usecs(iter->ts);
267 unsigned long usec_rem = do_div(t, 1000000ULL);
268 unsigned secs = (unsigned long)t;
269 int ret;
270
271 /* The trailing newline must be in the message. */
272 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275
276 if (entry->flags & TRACE_FLAG_CONT)
277 trace_seq_print_cont(s, iter);
278
279 return TRACE_TYPE_HANDLED;
246} 280}
247 281
248/* return 0 to abort printing without consuming current entry in pipe mode */ 282static enum print_line_t mmio_print_line(struct trace_iterator *iter)
249static int mmio_print_line(struct trace_iterator *iter)
250{ 283{
251 switch (iter->ent->type) { 284 switch (iter->ent->type) {
252 case TRACE_MMIO_RW: 285 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter); 286 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP: 287 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter); 288 return mmio_print_map(iter);
289 case TRACE_PRINT:
290 return mmio_print_mark(iter);
256 default: 291 default:
257 return 1; /* ignore unknown entries */ 292 return TRACE_TYPE_HANDLED; /* ignore unknown entries */
258 } 293 }
259} 294}
260 295
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
276} 311}
277device_initcall(init_mmio_trace); 312device_initcall(init_mmio_trace);
278 313
314static void __trace_mmiotrace_rw(struct trace_array *tr,
315 struct trace_array_cpu *data,
316 struct mmiotrace_rw *rw)
317{
318 struct ring_buffer_event *event;
319 struct trace_mmiotrace_rw *entry;
320 unsigned long irq_flags;
321
322 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
323 &irq_flags);
324 if (!event)
325 return;
326 entry = ring_buffer_event_data(event);
327 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
328 entry->ent.type = TRACE_MMIO_RW;
329 entry->rw = *rw;
330 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
331
332 trace_wake_up();
333}
334
279void mmio_trace_rw(struct mmiotrace_rw *rw) 335void mmio_trace_rw(struct mmiotrace_rw *rw)
280{ 336{
281 struct trace_array *tr = mmio_trace_array; 337 struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
283 __trace_mmiotrace_rw(tr, data, rw); 339 __trace_mmiotrace_rw(tr, data, rw);
284} 340}
285 341
342static void __trace_mmiotrace_map(struct trace_array *tr,
343 struct trace_array_cpu *data,
344 struct mmiotrace_map *map)
345{
346 struct ring_buffer_event *event;
347 struct trace_mmiotrace_map *entry;
348 unsigned long irq_flags;
349
350 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
351 &irq_flags);
352 if (!event)
353 return;
354 entry = ring_buffer_event_data(event);
355 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
356 entry->ent.type = TRACE_MMIO_MAP;
357 entry->map = *map;
358 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
359
360 trace_wake_up();
361}
362
286void mmio_trace_mapping(struct mmiotrace_map *map) 363void mmio_trace_mapping(struct mmiotrace_map *map)
287{ 364{
288 struct trace_array *tr = mmio_trace_array; 365 struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
293 __trace_mmiotrace_map(tr, data, map); 370 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable(); 371 preempt_enable();
295} 372}
373
374int mmio_trace_printk(const char *fmt, va_list args)
375{
376 return trace_vprintk(0, fmt, args);
377}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 000000000000..4592b4862515
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
1/*
2 * nop tracer
3 *
4 * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12
13#include "trace.h"
14
15static struct trace_array *ctx_trace;
16
17static void start_nop_trace(struct trace_array *tr)
18{
19 /* Nothing to do! */
20}
21
22static void stop_nop_trace(struct trace_array *tr)
23{
24 /* Nothing to do! */
25}
26
27static void nop_trace_init(struct trace_array *tr)
28{
29 int cpu;
30 ctx_trace = tr;
31
32 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu);
34
35 if (tr->ctrl)
36 start_nop_trace(tr);
37}
38
39static void nop_trace_reset(struct trace_array *tr)
40{
41 if (tr->ctrl)
42 stop_nop_trace(tr);
43}
44
45static void nop_trace_ctrl_update(struct trace_array *tr)
46{
47 /* When starting a new trace, reset the buffers */
48 if (tr->ctrl)
49 start_nop_trace(tr);
50 else
51 stop_nop_trace(tr);
52}
53
54struct tracer nop_trace __read_mostly =
55{
56 .name = "nop",
57 .init = nop_trace_init,
58 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop,
62#endif
63};
64
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..b8f56beb1a62 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -19,15 +19,16 @@ static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static atomic_t sched_ref;
20 20
21static void 21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev, 22probe_sched_switch(struct rq *__rq, struct task_struct *prev,
23 struct task_struct *next) 23 struct task_struct *next)
24{ 24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data; 25 struct trace_array_cpu *data;
28 unsigned long flags; 26 unsigned long flags;
29 long disabled;
30 int cpu; 27 int cpu;
28 int pc;
29
30 if (!atomic_read(&sched_ref))
31 return;
31 32
32 tracing_record_cmdline(prev); 33 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next); 34 tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
35 if (!tracer_enabled) 36 if (!tracer_enabled)
36 return; 37 return;
37 38
39 pc = preempt_count();
38 local_irq_save(flags); 40 local_irq_save(flags);
39 cpu = raw_smp_processor_id(); 41 cpu = raw_smp_processor_id();
40 data = tr->data[cpu]; 42 data = ctx_trace->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42 43
43 if (likely(disabled == 1)) 44 if (likely(!atomic_read(&data->disabled)))
44 tracing_sched_switch_trace(tr, data, prev, next, flags); 45 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
45 46
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags); 47 local_irq_restore(flags);
48} 48}
49 49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void 50static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct 51probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
78 task_struct *curr)
79{ 52{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data; 53 struct trace_array_cpu *data;
83 unsigned long flags; 54 unsigned long flags;
84 long disabled; 55 int cpu, pc;
85 int cpu;
86 56
87 if (!tracer_enabled) 57 if (!likely(tracer_enabled))
88 return; 58 return;
89 59
90 tracing_record_cmdline(curr); 60 pc = preempt_count();
61 tracing_record_cmdline(current);
91 62
92 local_irq_save(flags); 63 local_irq_save(flags);
93 cpu = raw_smp_processor_id(); 64 cpu = raw_smp_processor_id();
94 data = tr->data[cpu]; 65 data = ctx_trace->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96 66
97 if (likely(disabled == 1)) 67 if (likely(!atomic_read(&data->disabled)))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); 68 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
69 flags, pc);
99 70
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags); 71 local_irq_restore(flags);
102} 72}
103 73
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr) 74static void sched_switch_reset(struct trace_array *tr)
130{ 75{
131 int cpu; 76 int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
133 tr->time_start = ftrace_now(tr->cpu); 78 tr->time_start = ftrace_now(tr->cpu);
134 79
135 for_each_online_cpu(cpu) 80 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]); 81 tracing_reset(tr, cpu);
137} 82}
138 83
139static int tracing_sched_register(void) 84static int tracing_sched_register(void)
140{ 85{
141 int ret; 86 int ret;
142 87
143 ret = marker_probe_register("kernel_sched_wakeup", 88 ret = register_trace_sched_wakeup(probe_sched_wakeup);
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) { 89 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker" 90 pr_info("wakeup trace: Couldn't activate tracepoint"
149 " probe to kernel_sched_wakeup\n"); 91 " probe to kernel_sched_wakeup\n");
150 return ret; 92 return ret;
151 } 93 }
152 94
153 ret = marker_probe_register("kernel_sched_wakeup_new", 95 ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) { 96 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker" 97 pr_info("wakeup trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_wakeup_new\n"); 98 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe; 99 goto fail_deprobe;
161 } 100 }
162 101
163 ret = marker_probe_register("kernel_sched_schedule", 102 ret = register_trace_sched_switch(probe_sched_switch);
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) { 103 if (ret) {
169 pr_info("sched trace: Couldn't add marker" 104 pr_info("sched trace: Couldn't activate tracepoint"
170 " probe to kernel_sched_schedule\n"); 105 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new; 106 goto fail_deprobe_wake_new;
172 } 107 }
173 108
174 return ret; 109 return ret;
175fail_deprobe_wake_new: 110fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new", 111 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe: 112fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup", 113 unregister_trace_sched_wakeup(probe_sched_wakeup);
181 wake_up_callback,
182 &ctx_trace);
183 return ret; 114 return ret;
184} 115}
185 116
186static void tracing_sched_unregister(void) 117static void tracing_sched_unregister(void)
187{ 118{
188 marker_probe_unregister("kernel_sched_schedule", 119 unregister_trace_sched_switch(probe_sched_switch);
189 sched_switch_callback, 120 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
190 &ctx_trace); 121 unregister_trace_sched_wakeup(probe_sched_wakeup);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197} 122}
198 123
199static void tracing_start_sched_switch(void) 124static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..3ae93f16b565 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/marker.h> 18#include <trace/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -44,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
44 long disabled; 44 long disabled;
45 int resched; 45 int resched;
46 int cpu; 46 int cpu;
47 int pc;
47 48
48 if (likely(!wakeup_task)) 49 if (likely(!wakeup_task))
49 return; 50 return;
50 51
52 pc = preempt_count();
51 resched = need_resched(); 53 resched = need_resched();
52 preempt_disable_notrace(); 54 preempt_disable_notrace();
53 55
@@ -70,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
70 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
71 goto unlock; 73 goto unlock;
72 74
73 trace_function(tr, data, ip, parent_ip, flags); 75 trace_function(tr, data, ip, parent_ip, flags, pc);
74 76
75 unlock: 77 unlock:
76 __raw_spin_unlock(&wakeup_lock); 78 __raw_spin_unlock(&wakeup_lock);
@@ -94,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
94{ 96{
95 .func = wakeup_tracer_call, 97 .func = wakeup_tracer_call,
96}; 98};
97#endif /* CONFIG_FTRACE */ 99#endif /* CONFIG_FUNCTION_TRACER */
98 100
99/* 101/*
100 * Should this new latency be reported/recorded? 102 * Should this new latency be reported/recorded?
@@ -112,17 +114,18 @@ static int report_latency(cycle_t delta)
112} 114}
113 115
114static void notrace 116static void notrace
115wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 117probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
116 struct task_struct *next) 118 struct task_struct *next)
117{ 119{
118 unsigned long latency = 0, t0 = 0, t1 = 0; 120 unsigned long latency = 0, t0 = 0, t1 = 0;
119 struct trace_array **ptr = private;
120 struct trace_array *tr = *ptr;
121 struct trace_array_cpu *data; 121 struct trace_array_cpu *data;
122 cycle_t T0, T1, delta; 122 cycle_t T0, T1, delta;
123 unsigned long flags; 123 unsigned long flags;
124 long disabled; 124 long disabled;
125 int cpu; 125 int cpu;
126 int pc;
127
128 tracing_record_cmdline(prev);
126 129
127 if (unlikely(!tracer_enabled)) 130 if (unlikely(!tracer_enabled))
128 return; 131 return;
@@ -139,12 +142,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
139 if (next != wakeup_task) 142 if (next != wakeup_task)
140 return; 143 return;
141 144
145 pc = preempt_count();
146
142 /* The task we are waiting for is waking up */ 147 /* The task we are waiting for is waking up */
143 data = tr->data[wakeup_cpu]; 148 data = wakeup_trace->data[wakeup_cpu];
144 149
145 /* disable local data, not wakeup_cpu data */ 150 /* disable local data, not wakeup_cpu data */
146 cpu = raw_smp_processor_id(); 151 cpu = raw_smp_processor_id();
147 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 152 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
148 if (likely(disabled != 1)) 153 if (likely(disabled != 1))
149 goto out; 154 goto out;
150 155
@@ -155,7 +160,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
155 if (unlikely(!tracer_enabled || next != wakeup_task)) 160 if (unlikely(!tracer_enabled || next != wakeup_task))
156 goto out_unlock; 161 goto out_unlock;
157 162
158 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 163 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
159 164
160 /* 165 /*
161 * usecs conversion is slow so we try to delay the conversion 166 * usecs conversion is slow so we try to delay the conversion
@@ -174,39 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
174 t0 = nsecs_to_usecs(T0); 179 t0 = nsecs_to_usecs(T0);
175 t1 = nsecs_to_usecs(T1); 180 t1 = nsecs_to_usecs(T1);
176 181
177 update_max_tr(tr, wakeup_task, wakeup_cpu); 182 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
178 183
179out_unlock: 184out_unlock:
180 __wakeup_reset(tr); 185 __wakeup_reset(wakeup_trace);
181 __raw_spin_unlock(&wakeup_lock); 186 __raw_spin_unlock(&wakeup_lock);
182 local_irq_restore(flags); 187 local_irq_restore(flags);
183out: 188out:
184 atomic_dec(&tr->data[cpu]->disabled); 189 atomic_dec(&wakeup_trace->data[cpu]->disabled);
185}
186
187static notrace void
188sched_switch_callback(void *probe_data, void *call_data,
189 const char *format, va_list *args)
190{
191 struct task_struct *prev;
192 struct task_struct *next;
193 struct rq *__rq;
194
195 /* skip prev_pid %d next_pid %d prev_state %ld */
196 (void)va_arg(*args, int);
197 (void)va_arg(*args, int);
198 (void)va_arg(*args, long);
199 __rq = va_arg(*args, typeof(__rq));
200 prev = va_arg(*args, typeof(prev));
201 next = va_arg(*args, typeof(next));
202
203 tracing_record_cmdline(prev);
204
205 /*
206 * If tracer_switch_func only points to the local
207 * switch func, it still needs the ptr passed to it.
208 */
209 wakeup_sched_switch(probe_data, __rq, prev, next);
210} 190}
211 191
212static void __wakeup_reset(struct trace_array *tr) 192static void __wakeup_reset(struct trace_array *tr)
@@ -216,7 +196,7 @@ static void __wakeup_reset(struct trace_array *tr)
216 196
217 for_each_possible_cpu(cpu) { 197 for_each_possible_cpu(cpu) {
218 data = tr->data[cpu]; 198 data = tr->data[cpu];
219 tracing_reset(data); 199 tracing_reset(tr, cpu);
220 } 200 }
221 201
222 wakeup_cpu = -1; 202 wakeup_cpu = -1;
@@ -240,19 +220,26 @@ static void wakeup_reset(struct trace_array *tr)
240} 220}
241 221
242static void 222static void
243wakeup_check_start(struct trace_array *tr, struct task_struct *p, 223probe_wakeup(struct rq *rq, struct task_struct *p)
244 struct task_struct *curr)
245{ 224{
246 int cpu = smp_processor_id(); 225 int cpu = smp_processor_id();
247 unsigned long flags; 226 unsigned long flags;
248 long disabled; 227 long disabled;
228 int pc;
229
230 if (likely(!tracer_enabled))
231 return;
232
233 tracing_record_cmdline(p);
234 tracing_record_cmdline(current);
249 235
250 if (likely(!rt_task(p)) || 236 if (likely(!rt_task(p)) ||
251 p->prio >= wakeup_prio || 237 p->prio >= wakeup_prio ||
252 p->prio >= curr->prio) 238 p->prio >= current->prio)
253 return; 239 return;
254 240
255 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 241 pc = preempt_count();
242 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
256 if (unlikely(disabled != 1)) 243 if (unlikely(disabled != 1))
257 goto out; 244 goto out;
258 245
@@ -264,7 +251,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
264 goto out_locked; 251 goto out_locked;
265 252
266 /* reset the trace */ 253 /* reset the trace */
267 __wakeup_reset(tr); 254 __wakeup_reset(wakeup_trace);
268 255
269 wakeup_cpu = task_cpu(p); 256 wakeup_cpu = task_cpu(p);
270 wakeup_prio = p->prio; 257 wakeup_prio = p->prio;
@@ -274,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
274 261
275 local_save_flags(flags); 262 local_save_flags(flags);
276 263
277 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 264 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
278 trace_function(tr, tr->data[wakeup_cpu], 265 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
279 CALLER_ADDR1, CALLER_ADDR2, flags); 266 CALLER_ADDR1, CALLER_ADDR2, flags, pc);
280 267
281out_locked: 268out_locked:
282 __raw_spin_unlock(&wakeup_lock); 269 __raw_spin_unlock(&wakeup_lock);
283out: 270out:
284 atomic_dec(&tr->data[cpu]->disabled); 271 atomic_dec(&wakeup_trace->data[cpu]->disabled);
285}
286
287static notrace void
288wake_up_callback(void *probe_data, void *call_data,
289 const char *format, va_list *args)
290{
291 struct trace_array **ptr = probe_data;
292 struct trace_array *tr = *ptr;
293 struct task_struct *curr;
294 struct task_struct *task;
295 struct rq *__rq;
296
297 if (likely(!tracer_enabled))
298 return;
299
300 /* Skip pid %d state %ld */
301 (void)va_arg(*args, int);
302 (void)va_arg(*args, long);
303 /* now get the meat: "rq %p task %p rq->curr %p" */
304 __rq = va_arg(*args, typeof(__rq));
305 task = va_arg(*args, typeof(task));
306 curr = va_arg(*args, typeof(curr));
307
308 tracing_record_cmdline(task);
309 tracing_record_cmdline(curr);
310
311 wakeup_check_start(tr, task, curr);
312} 272}
313 273
314static void start_wakeup_tracer(struct trace_array *tr) 274static void start_wakeup_tracer(struct trace_array *tr)
315{ 275{
316 int ret; 276 int ret;
317 277
318 ret = marker_probe_register("kernel_sched_wakeup", 278 ret = register_trace_sched_wakeup(probe_wakeup);
319 "pid %d state %ld ## rq %p task %p rq->curr %p",
320 wake_up_callback,
321 &wakeup_trace);
322 if (ret) { 279 if (ret) {
323 pr_info("wakeup trace: Couldn't add marker" 280 pr_info("wakeup trace: Couldn't activate tracepoint"
324 " probe to kernel_sched_wakeup\n"); 281 " probe to kernel_sched_wakeup\n");
325 return; 282 return;
326 } 283 }
327 284
328 ret = marker_probe_register("kernel_sched_wakeup_new", 285 ret = register_trace_sched_wakeup_new(probe_wakeup);
329 "pid %d state %ld ## rq %p task %p rq->curr %p",
330 wake_up_callback,
331 &wakeup_trace);
332 if (ret) { 286 if (ret) {
333 pr_info("wakeup trace: Couldn't add marker" 287 pr_info("wakeup trace: Couldn't activate tracepoint"
334 " probe to kernel_sched_wakeup_new\n"); 288 " probe to kernel_sched_wakeup_new\n");
335 goto fail_deprobe; 289 goto fail_deprobe;
336 } 290 }
337 291
338 ret = marker_probe_register("kernel_sched_schedule", 292 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
339 "prev_pid %d next_pid %d prev_state %ld "
340 "## rq %p prev %p next %p",
341 sched_switch_callback,
342 &wakeup_trace);
343 if (ret) { 293 if (ret) {
344 pr_info("sched trace: Couldn't add marker" 294 pr_info("sched trace: Couldn't activate tracepoint"
345 " probe to kernel_sched_schedule\n"); 295 " probe to kernel_sched_schedule\n");
346 goto fail_deprobe_wake_new; 296 goto fail_deprobe_wake_new;
347 } 297 }
@@ -363,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
363 313
364 return; 314 return;
365fail_deprobe_wake_new: 315fail_deprobe_wake_new:
366 marker_probe_unregister("kernel_sched_wakeup_new", 316 unregister_trace_sched_wakeup_new(probe_wakeup);
367 wake_up_callback,
368 &wakeup_trace);
369fail_deprobe: 317fail_deprobe:
370 marker_probe_unregister("kernel_sched_wakeup", 318 unregister_trace_sched_wakeup(probe_wakeup);
371 wake_up_callback,
372 &wakeup_trace);
373} 319}
374 320
375static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
376{ 322{
377 tracer_enabled = 0; 323 tracer_enabled = 0;
378 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
379 marker_probe_unregister("kernel_sched_schedule", 325 unregister_trace_sched_switch(probe_wakeup_sched_switch);
380 sched_switch_callback, 326 unregister_trace_sched_wakeup_new(probe_wakeup);
381 &wakeup_trace); 327 unregister_trace_sched_wakeup(probe_wakeup);
382 marker_probe_unregister("kernel_sched_wakeup_new",
383 wake_up_callback,
384 &wakeup_trace);
385 marker_probe_unregister("kernel_sched_wakeup",
386 wake_up_callback,
387 &wakeup_trace);
388} 328}
389 329
390static void wakeup_tracer_init(struct trace_array *tr) 330static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073bf..90bc752a7580 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
12 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT:
13 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
14 return 1; 16 return 1;
15 } 17 }
16 return 0; 18 return 0;
17} 19}
18 20
19static int 21static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{ 22{
22 struct trace_entry *entries; 23 struct ring_buffer_event *event;
23 struct page *page; 24 struct trace_entry *entry;
24 int idx = 0;
25 int i;
26 25
27 BUG_ON(list_empty(&data->trace_pages)); 26 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 page = list_entry(data->trace_pages.next, struct page, lru); 27 entry = ring_buffer_event_data(event);
29 entries = page_address(page);
30 28
31 check_pages(data); 29 if (!trace_valid_entry(entry)) {
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ", 30 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type); 31 entry->type);
46 goto failed; 32 goto failed;
47 } 33 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 } 34 }
70
71 return 0; 35 return 0;
72 36
73 failed: 37 failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
89 /* Don't allow flipping of max traces now */ 53 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags); 54 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock); 55 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95 56
96 cnt += tr->data[cpu]->trace_idx; 57 cnt = ring_buffer_entries(tr->buffer);
97 58
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]); 59 for_each_possible_cpu(cpu) {
60 ret = trace_test_buffer_cpu(tr, cpu);
99 if (ret) 61 if (ret)
100 break; 62 break;
101 } 63 }
@@ -108,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
108 return ret; 70 return ret;
109} 71}
110 72
111#ifdef CONFIG_FTRACE 73#ifdef CONFIG_FUNCTION_TRACER
112 74
113#ifdef CONFIG_DYNAMIC_FTRACE 75#ifdef CONFIG_DYNAMIC_FTRACE
114 76
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr, 82 struct trace_array *tr,
121 int (*func)(void)) 83 int (*func)(void))
122{ 84{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled; 85 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled; 86 int save_tracer_enabled = tracer_enabled;
87 unsigned long count;
127 char *func_name; 88 char *func_name;
89 int ret;
128 90
129 /* The ftrace test PASSED */ 91 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n"); 92 printk(KERN_CONT "PASSED\n");
@@ -137,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
137 /* passed in by parameter to fool gcc from optimizing */ 99 /* passed in by parameter to fool gcc from optimizing */
138 func(); 100 func();
139 101
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /* 102 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the 103 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to 104 * start of the function names. We simply put a '*' to
@@ -157,6 +112,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
157 /* enable tracing */ 112 /* enable tracing */
158 tr->ctrl = 1; 113 tr->ctrl = 1;
159 trace->init(tr); 114 trace->init(tr);
115
160 /* Sleep for a 1/10 of a second */ 116 /* Sleep for a 1/10 of a second */
161 msleep(100); 117 msleep(100);
162 118
@@ -212,21 +168,14 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
212int 168int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 169trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{ 170{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled; 171 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled; 172 int save_tracer_enabled = tracer_enabled;
173 unsigned long count;
174 int ret;
219 175
220 /* make sure msleep has been recorded */ 176 /* make sure msleep has been recorded */
221 msleep(1); 177 msleep(1);
222 178
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */ 179 /* start the tracing */
231 ftrace_enabled = 1; 180 ftrace_enabled = 1;
232 tracer_enabled = 1; 181 tracer_enabled = 1;
@@ -263,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
263 212
264 return ret; 213 return ret;
265} 214}
266#endif /* CONFIG_FTRACE */ 215#endif /* CONFIG_FUNCTION_TRACER */
267 216
268#ifdef CONFIG_IRQSOFF_TRACER 217#ifdef CONFIG_IRQSOFF_TRACER
269int 218int
@@ -415,6 +364,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
415} 364}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ 365#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417 366
367#ifdef CONFIG_NOP_TRACER
368int
369trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
370{
371 /* What could possibly go wrong? */
372 return 0;
373}
374#endif
375
418#ifdef CONFIG_SCHED_TRACER 376#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data) 377static int trace_wakeup_test_thread(void *data)
420{ 378{
@@ -486,6 +444,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
486 444
487 wake_up_process(p); 445 wake_up_process(p);
488 446
447 /* give a little time to let the thread wake up */
448 msleep(100);
449
489 /* stop the tracing. */ 450 /* stop the tracing. */
490 tr->ctrl = 0; 451 tr->ctrl = 0;
491 trace->ctrl_update(tr); 452 trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 000000000000..be682b62fe58
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,314 @@
1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15#include "trace.h"
16
17#define STACK_TRACE_ENTRIES 500
18
19static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
20 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
21static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
22
23static struct stack_trace max_stack_trace = {
24 .max_entries = STACK_TRACE_ENTRIES,
25 .entries = stack_dump_trace,
26};
27
28static unsigned long max_stack_size;
29static raw_spinlock_t max_stack_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31
32static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active);
34
35static inline void check_stack(void)
36{
37 unsigned long this_size, flags;
38 unsigned long *p, *top, *start;
39 int i;
40
41 this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
42 this_size = THREAD_SIZE - this_size;
43
44 if (this_size <= max_stack_size)
45 return;
46
47 /* we do not handle interrupt stacks yet */
48 if (!object_is_on_stack(&this_size))
49 return;
50
51 raw_local_irq_save(flags);
52 __raw_spin_lock(&max_stack_lock);
53
54 /* a race could have already updated it */
55 if (this_size <= max_stack_size)
56 goto out;
57
58 max_stack_size = this_size;
59
60 max_stack_trace.nr_entries = 0;
61 max_stack_trace.skip = 3;
62
63 save_stack_trace(&max_stack_trace);
64
65 /*
66 * Now find where in the stack these are.
67 */
68 i = 0;
69 start = &this_size;
70 top = (unsigned long *)
71 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
72
73 /*
74 * Loop through all the entries. One of the entries may
75 * for some reason be missed on the stack, so we may
76 * have to account for them. If they are all there, this
77 * loop will only happen once. This code only takes place
78 * on a new max, so it is far from a fast path.
79 */
80 while (i < max_stack_trace.nr_entries) {
81
82 stack_dump_index[i] = this_size;
83 p = start;
84
85 for (; p < top && i < max_stack_trace.nr_entries; p++) {
86 if (*p == stack_dump_trace[i]) {
87 this_size = stack_dump_index[i++] =
88 (top - p) * sizeof(unsigned long);
89 /* Start the search from here */
90 start = p + 1;
91 }
92 }
93
94 i++;
95 }
96
97 out:
98 __raw_spin_unlock(&max_stack_lock);
99 raw_local_irq_restore(flags);
100}
101
102static void
103stack_trace_call(unsigned long ip, unsigned long parent_ip)
104{
105 int cpu, resched;
106
107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return;
109
110 resched = need_resched();
111 preempt_disable_notrace();
112
113 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */
115 if (per_cpu(trace_active, cpu)++ != 0)
116 goto out;
117
118 check_stack();
119
120 out:
121 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */
123 if (resched)
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127}
128
129static struct ftrace_ops trace_ops __read_mostly =
130{
131 .func = stack_trace_call,
132};
133
134static ssize_t
135stack_max_size_read(struct file *filp, char __user *ubuf,
136 size_t count, loff_t *ppos)
137{
138 unsigned long *ptr = filp->private_data;
139 char buf[64];
140 int r;
141
142 r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
143 if (r > sizeof(buf))
144 r = sizeof(buf);
145 return simple_read_from_buffer(ubuf, count, ppos, buf, r);
146}
147
148static ssize_t
149stack_max_size_write(struct file *filp, const char __user *ubuf,
150 size_t count, loff_t *ppos)
151{
152 long *ptr = filp->private_data;
153 unsigned long val, flags;
154 char buf[64];
155 int ret;
156
157 if (count >= sizeof(buf))
158 return -EINVAL;
159
160 if (copy_from_user(&buf, ubuf, count))
161 return -EFAULT;
162
163 buf[count] = 0;
164
165 ret = strict_strtoul(buf, 10, &val);
166 if (ret < 0)
167 return ret;
168
169 raw_local_irq_save(flags);
170 __raw_spin_lock(&max_stack_lock);
171 *ptr = val;
172 __raw_spin_unlock(&max_stack_lock);
173 raw_local_irq_restore(flags);
174
175 return count;
176}
177
178static struct file_operations stack_max_size_fops = {
179 .open = tracing_open_generic,
180 .read = stack_max_size_read,
181 .write = stack_max_size_write,
182};
183
184static void *
185t_next(struct seq_file *m, void *v, loff_t *pos)
186{
187 long i = (long)m->private;
188
189 (*pos)++;
190
191 i++;
192
193 if (i >= max_stack_trace.nr_entries ||
194 stack_dump_trace[i] == ULONG_MAX)
195 return NULL;
196
197 m->private = (void *)i;
198
199 return &m->private;
200}
201
202static void *t_start(struct seq_file *m, loff_t *pos)
203{
204 void *t = &m->private;
205 loff_t l = 0;
206
207 local_irq_disable();
208 __raw_spin_lock(&max_stack_lock);
209
210 for (; t && l < *pos; t = t_next(m, t, &l))
211 ;
212
213 return t;
214}
215
216static void t_stop(struct seq_file *m, void *p)
217{
218 __raw_spin_unlock(&max_stack_lock);
219 local_irq_enable();
220}
221
222static int trace_lookup_stack(struct seq_file *m, long i)
223{
224 unsigned long addr = stack_dump_trace[i];
225#ifdef CONFIG_KALLSYMS
226 char str[KSYM_SYMBOL_LEN];
227
228 sprint_symbol(str, addr);
229
230 return seq_printf(m, "%s\n", str);
231#else
232 return seq_printf(m, "%p\n", (void*)addr);
233#endif
234}
235
236static int t_show(struct seq_file *m, void *v)
237{
238 long i = *(long *)v;
239 int size;
240
241 if (i < 0) {
242 seq_printf(m, " Depth Size Location"
243 " (%d entries)\n"
244 " ----- ---- --------\n",
245 max_stack_trace.nr_entries);
246 return 0;
247 }
248
249 if (i >= max_stack_trace.nr_entries ||
250 stack_dump_trace[i] == ULONG_MAX)
251 return 0;
252
253 if (i+1 == max_stack_trace.nr_entries ||
254 stack_dump_trace[i+1] == ULONG_MAX)
255 size = stack_dump_index[i];
256 else
257 size = stack_dump_index[i] - stack_dump_index[i+1];
258
259 seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
260
261 trace_lookup_stack(m, i);
262
263 return 0;
264}
265
266static struct seq_operations stack_trace_seq_ops = {
267 .start = t_start,
268 .next = t_next,
269 .stop = t_stop,
270 .show = t_show,
271};
272
273static int stack_trace_open(struct inode *inode, struct file *file)
274{
275 int ret;
276
277 ret = seq_open(file, &stack_trace_seq_ops);
278 if (!ret) {
279 struct seq_file *m = file->private_data;
280 m->private = (void *)-1;
281 }
282
283 return ret;
284}
285
286static struct file_operations stack_trace_fops = {
287 .open = stack_trace_open,
288 .read = seq_read,
289 .llseek = seq_lseek,
290};
291
292static __init int stack_trace_init(void)
293{
294 struct dentry *d_tracer;
295 struct dentry *entry;
296
297 d_tracer = tracing_init_dentry();
298
299 entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
300 &max_stack_size, &stack_max_size_fops);
301 if (!entry)
302 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
303
304 entry = debugfs_create_file("stack_trace", 0444, d_tracer,
305 NULL, &stack_trace_fops);
306 if (!entry)
307 pr_warning("Could not create debugfs 'stack_trace' entry\n");
308
309 register_ftrace_function(&trace_ops);
310
311 return 0;
312}
313
314device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index db58fb66a135..9587d3bcba55 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
241 tr->time_start = ftrace_now(tr->cpu); 241 tr->time_start = ftrace_now(tr->cpu);
242 242
243 for_each_online_cpu(cpu) 243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]); 244 tracing_reset(tr, cpu);
245} 245}
246 246
247static void start_stack_trace(struct trace_array *tr) 247static void start_stack_trace(struct trace_array *tr)