aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig115
-rw-r--r--kernel/trace/Makefile9
-rw-r--r--kernel/trace/ftrace.c929
-rw-r--r--kernel/trace/ring_buffer.c690
-rw-r--r--kernel/trace/trace.c927
-rw-r--r--kernel/trace/trace.h264
-rw-r--r--kernel/trace/trace_boot.c166
-rw-r--r--kernel/trace/trace_branch.c342
-rw-r--r--kernel/trace/trace_functions.c18
-rw-r--r--kernel/trace/trace_functions_graph.c669
-rw-r--r--kernel/trace/trace_hw_branches.c205
-rw-r--r--kernel/trace/trace_irqsoff.c61
-rw-r--r--kernel/trace/trace_mmiotrace.c27
-rw-r--r--kernel/trace/trace_nop.c65
-rw-r--r--kernel/trace/trace_power.c179
-rw-r--r--kernel/trace/trace_sched_switch.c107
-rw-r--r--kernel/trace/trace_sched_wakeup.c70
-rw-r--r--kernel/trace/trace_selftest.c173
-rw-r--r--kernel/trace/trace_stack.c70
-rw-r--r--kernel/trace/trace_sysprof.c19
20 files changed, 4368 insertions, 737 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..e2a4ff6fc3a6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -3,18 +3,34 @@
3# select HAVE_FUNCTION_TRACER: 3# select HAVE_FUNCTION_TRACER:
4# 4#
5 5
6config USER_STACKTRACE_SUPPORT
7 bool
8
6config NOP_TRACER 9config NOP_TRACER
7 bool 10 bool
8 11
9config HAVE_FUNCTION_TRACER 12config HAVE_FUNCTION_TRACER
10 bool 13 bool
11 14
15config HAVE_FUNCTION_GRAPH_TRACER
16 bool
17
18config HAVE_FUNCTION_TRACE_MCOUNT_TEST
19 bool
20 help
21 This gets selected when the arch tests the function_trace_stop
22 variable at the mcount call site. Otherwise, this variable
23 is tested by the called function.
24
12config HAVE_DYNAMIC_FTRACE 25config HAVE_DYNAMIC_FTRACE
13 bool 26 bool
14 27
15config HAVE_FTRACE_MCOUNT_RECORD 28config HAVE_FTRACE_MCOUNT_RECORD
16 bool 29 bool
17 30
31config HAVE_HW_BRANCH_TRACER
32 bool
33
18config TRACER_MAX_TRACE 34config TRACER_MAX_TRACE
19 bool 35 bool
20 36
@@ -47,6 +63,20 @@ config FUNCTION_TRACER
47 (the bootup default), then the overhead of the instructions is very 63 (the bootup default), then the overhead of the instructions is very
48 small and not measurable even in micro-benchmarks. 64 small and not measurable even in micro-benchmarks.
49 65
66config FUNCTION_GRAPH_TRACER
67 bool "Kernel Function Graph Tracer"
68 depends on HAVE_FUNCTION_GRAPH_TRACER
69 depends on FUNCTION_TRACER
70 default y
71 help
72 Enable the kernel to trace a function at both its return
73 and its entry.
74 It's first purpose is to trace the duration of functions and
75 draw a call graph for each thread with some informations like
76 the return value.
77 This is done by setting the current return address on the current
78 task structure into a stack of calls.
79
50config IRQSOFF_TRACER 80config IRQSOFF_TRACER
51 bool "Interrupts-off Latency Tracer" 81 bool "Interrupts-off Latency Tracer"
52 default n 82 default n
@@ -138,6 +168,70 @@ config BOOT_TRACER
138 selected, because the self-tests are an initcall as well and that 168 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. ) 169 would invalidate the boot trace. )
140 170
171config TRACE_BRANCH_PROFILING
172 bool "Trace likely/unlikely profiler"
173 depends on DEBUG_KERNEL
174 select TRACING
175 help
176 This tracer profiles all the the likely and unlikely macros
177 in the kernel. It will display the results in:
178
179 /debugfs/tracing/profile_annotated_branch
180
181 Note: this will add a significant overhead, only turn this
182 on if you need to profile the system's use of these macros.
183
184 Say N if unsure.
185
186config PROFILE_ALL_BRANCHES
187 bool "Profile all if conditionals"
188 depends on TRACE_BRANCH_PROFILING
189 help
190 This tracer profiles all branch conditions. Every if ()
191 taken in the kernel is recorded whether it hit or miss.
192 The results will be displayed in:
193
194 /debugfs/tracing/profile_branch
195
196 This configuration, when enabled, will impose a great overhead
197 on the system. This should only be enabled when the system
198 is to be analyzed
199
200 Say N if unsure.
201
202config TRACING_BRANCHES
203 bool
204 help
205 Selected by tracers that will trace the likely and unlikely
206 conditions. This prevents the tracers themselves from being
207 profiled. Profiling the tracing infrastructure can only happen
208 when the likelys and unlikelys are not being traced.
209
210config BRANCH_TRACER
211 bool "Trace likely/unlikely instances"
212 depends on TRACE_BRANCH_PROFILING
213 select TRACING_BRANCHES
214 help
215 This traces the events of likely and unlikely condition
216 calls in the kernel. The difference between this and the
217 "Trace likely/unlikely profiler" is that this is not a
218 histogram of the callers, but actually places the calling
219 events into a running trace buffer to see when and where the
220 events happened, as well as their results.
221
222 Say N if unsure.
223
224config POWER_TRACER
225 bool "Trace power consumption behavior"
226 depends on DEBUG_KERNEL
227 depends on X86
228 select TRACING
229 help
230 This tracer helps developers to analyze and optimize the kernels
231 power management decisions, specifically the C-state and P-state
232 behavior.
233
234
141config STACK_TRACER 235config STACK_TRACER
142 bool "Trace max stack" 236 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER 237 depends on HAVE_FUNCTION_TRACER
@@ -150,13 +244,26 @@ config STACK_TRACER
150 244
151 This tracer works by hooking into every function call that the 245 This tracer works by hooking into every function call that the
152 kernel executes, and keeping a maximum stack depth value and 246 kernel executes, and keeping a maximum stack depth value and
153 stack-trace saved. Because this logic has to execute in every 247 stack-trace saved. If this is configured with DYNAMIC_FTRACE
154 kernel function, all the time, this option can slow down the 248 then it will not have any overhead while the stack tracer
155 kernel measurably and is generally intended for kernel 249 is disabled.
156 developers only. 250
251 To enable the stack tracer on bootup, pass in 'stacktrace'
252 on the kernel command line.
253
254 The stack tracer can also be enabled or disabled via the
255 sysctl kernel.stack_tracer_enabled
157 256
158 Say N if unsure. 257 Say N if unsure.
159 258
259config HW_BRANCH_TRACER
260 depends on HAVE_HW_BRANCH_TRACER
261 bool "Trace hw branches"
262 select TRACING
263 help
264 This tracer records all branches on the system in a circular
265 buffer giving access to the last N branches for each cpu.
266
160config DYNAMIC_FTRACE 267config DYNAMIC_FTRACE
161 bool "enable/disable ftrace tracepoints dynamically" 268 bool "enable/disable ftrace tracepoints dynamically"
162 depends on FUNCTION_TRACER 269 depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..349d5a93653f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 20
@@ -24,5 +29,9 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o 29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
34obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
35obj-$(CONFIG_POWER_TRACER) += trace_power.o
27 36
28libftrace-y := ftrace.o 37libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78db083390f0..2f32969c09df 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,13 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* set when tracing only a pid */
51struct pid *ftrace_pid_trace;
52static struct pid * const ftrace_swapper_pid = &init_struct_pid;
53
54/* Quick disabling of function tracer. */
55int function_trace_stop;
56
50/* 57/*
51 * ftrace_disabled is set when an anomaly is discovered. 58 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 59 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -55,6 +62,7 @@ static int ftrace_disabled __read_mostly;
55 62
56static DEFINE_SPINLOCK(ftrace_lock); 63static DEFINE_SPINLOCK(ftrace_lock);
57static DEFINE_MUTEX(ftrace_sysctl_lock); 64static DEFINE_MUTEX(ftrace_sysctl_lock);
65static DEFINE_MUTEX(ftrace_start_lock);
58 66
59static struct ftrace_ops ftrace_list_end __read_mostly = 67static struct ftrace_ops ftrace_list_end __read_mostly =
60{ 68{
@@ -63,6 +71,8 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 71
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 72static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 73ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
74ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
75ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
66 76
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 77static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 78{
@@ -79,6 +89,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
79 }; 89 };
80} 90}
81 91
92static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
93{
94 if (!test_tsk_trace_trace(current))
95 return;
96
97 ftrace_pid_function(ip, parent_ip);
98}
99
100static void set_ftrace_pid_function(ftrace_func_t func)
101{
102 /* do not set ftrace_pid_function to itself! */
103 if (func != ftrace_pid_func)
104 ftrace_pid_function = func;
105}
106
82/** 107/**
83 * clear_ftrace_function - reset the ftrace function 108 * clear_ftrace_function - reset the ftrace function
84 * 109 *
@@ -88,7 +113,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 113void clear_ftrace_function(void)
89{ 114{
90 ftrace_trace_function = ftrace_stub; 115 ftrace_trace_function = ftrace_stub;
116 __ftrace_trace_function = ftrace_stub;
117 ftrace_pid_function = ftrace_stub;
118}
119
120#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
121/*
122 * For those archs that do not test ftrace_trace_stop in their
123 * mcount call site, we need to do it from C.
124 */
125static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
126{
127 if (function_trace_stop)
128 return;
129
130 __ftrace_trace_function(ip, parent_ip);
91} 131}
132#endif
92 133
93static int __register_ftrace_function(struct ftrace_ops *ops) 134static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 135{
@@ -106,14 +147,28 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
106 ftrace_list = ops; 147 ftrace_list = ops;
107 148
108 if (ftrace_enabled) { 149 if (ftrace_enabled) {
150 ftrace_func_t func;
151
152 if (ops->next == &ftrace_list_end)
153 func = ops->func;
154 else
155 func = ftrace_list_func;
156
157 if (ftrace_pid_trace) {
158 set_ftrace_pid_function(func);
159 func = ftrace_pid_func;
160 }
161
109 /* 162 /*
110 * For one func, simply call it directly. 163 * For one func, simply call it directly.
111 * For more than one func, call the chain. 164 * For more than one func, call the chain.
112 */ 165 */
113 if (ops->next == &ftrace_list_end) 166#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
114 ftrace_trace_function = ops->func; 167 ftrace_trace_function = func;
115 else 168#else
116 ftrace_trace_function = ftrace_list_func; 169 __ftrace_trace_function = func;
170 ftrace_trace_function = ftrace_test_stop_func;
171#endif
117 } 172 }
118 173
119 spin_unlock(&ftrace_lock); 174 spin_unlock(&ftrace_lock);
@@ -152,9 +207,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152 207
153 if (ftrace_enabled) { 208 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 209 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 210 if (ftrace_list->next == &ftrace_list_end) {
156 ftrace_list->next == &ftrace_list_end) 211 ftrace_func_t func = ftrace_list->func;
157 ftrace_trace_function = ftrace_list->func; 212
213 if (ftrace_pid_trace) {
214 set_ftrace_pid_function(func);
215 func = ftrace_pid_func;
216 }
217#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
218 ftrace_trace_function = func;
219#else
220 __ftrace_trace_function = func;
221#endif
222 }
158 } 223 }
159 224
160 out: 225 out:
@@ -163,6 +228,36 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
163 return ret; 228 return ret;
164} 229}
165 230
231static void ftrace_update_pid_func(void)
232{
233 ftrace_func_t func;
234
235 /* should not be called from interrupt context */
236 spin_lock(&ftrace_lock);
237
238 if (ftrace_trace_function == ftrace_stub)
239 goto out;
240
241 func = ftrace_trace_function;
242
243 if (ftrace_pid_trace) {
244 set_ftrace_pid_function(func);
245 func = ftrace_pid_func;
246 } else {
247 if (func == ftrace_pid_func)
248 func = ftrace_pid_function;
249 }
250
251#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
252 ftrace_trace_function = func;
253#else
254 __ftrace_trace_function = func;
255#endif
256
257 out:
258 spin_unlock(&ftrace_lock);
259}
260
166#ifdef CONFIG_DYNAMIC_FTRACE 261#ifdef CONFIG_DYNAMIC_FTRACE
167#ifndef CONFIG_FTRACE_MCOUNT_RECORD 262#ifndef CONFIG_FTRACE_MCOUNT_RECORD
168# error Dynamic ftrace depends on MCOUNT_RECORD 263# error Dynamic ftrace depends on MCOUNT_RECORD
@@ -182,6 +277,8 @@ enum {
182 FTRACE_UPDATE_TRACE_FUNC = (1 << 2), 277 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
183 FTRACE_ENABLE_MCOUNT = (1 << 3), 278 FTRACE_ENABLE_MCOUNT = (1 << 3),
184 FTRACE_DISABLE_MCOUNT = (1 << 4), 279 FTRACE_DISABLE_MCOUNT = (1 << 4),
280 FTRACE_START_FUNC_RET = (1 << 5),
281 FTRACE_STOP_FUNC_RET = (1 << 6),
185}; 282};
186 283
187static int ftrace_filtered; 284static int ftrace_filtered;
@@ -308,7 +405,7 @@ ftrace_record_ip(unsigned long ip)
308{ 405{
309 struct dyn_ftrace *rec; 406 struct dyn_ftrace *rec;
310 407
311 if (!ftrace_enabled || ftrace_disabled) 408 if (ftrace_disabled)
312 return NULL; 409 return NULL;
313 410
314 rec = ftrace_alloc_dyn_node(ip); 411 rec = ftrace_alloc_dyn_node(ip);
@@ -322,14 +419,51 @@ ftrace_record_ip(unsigned long ip)
322 return rec; 419 return rec;
323} 420}
324 421
325#define FTRACE_ADDR ((long)(ftrace_caller)) 422static void print_ip_ins(const char *fmt, unsigned char *p)
423{
424 int i;
425
426 printk(KERN_CONT "%s", fmt);
427
428 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
429 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
430}
431
432static void ftrace_bug(int failed, unsigned long ip)
433{
434 switch (failed) {
435 case -EFAULT:
436 FTRACE_WARN_ON_ONCE(1);
437 pr_info("ftrace faulted on modifying ");
438 print_ip_sym(ip);
439 break;
440 case -EINVAL:
441 FTRACE_WARN_ON_ONCE(1);
442 pr_info("ftrace failed to modify ");
443 print_ip_sym(ip);
444 print_ip_ins(" actual: ", (unsigned char *)ip);
445 printk(KERN_CONT "\n");
446 break;
447 case -EPERM:
448 FTRACE_WARN_ON_ONCE(1);
449 pr_info("ftrace faulted on writing ");
450 print_ip_sym(ip);
451 break;
452 default:
453 FTRACE_WARN_ON_ONCE(1);
454 pr_info("ftrace faulted on unknown error ");
455 print_ip_sym(ip);
456 }
457}
458
326 459
327static int 460static int
328__ftrace_replace_code(struct dyn_ftrace *rec, 461__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
329 unsigned char *nop, int enable)
330{ 462{
331 unsigned long ip, fl; 463 unsigned long ip, fl;
332 unsigned char *call, *old, *new; 464 unsigned long ftrace_addr;
465
466 ftrace_addr = (unsigned long)ftrace_caller;
333 467
334 ip = rec->ip; 468 ip = rec->ip;
335 469
@@ -388,34 +522,28 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
388 } 522 }
389 } 523 }
390 524
391 call = ftrace_call_replace(ip, FTRACE_ADDR); 525 if (rec->flags & FTRACE_FL_ENABLED)
392 526 return ftrace_make_call(rec, ftrace_addr);
393 if (rec->flags & FTRACE_FL_ENABLED) { 527 else
394 old = nop; 528 return ftrace_make_nop(NULL, rec, ftrace_addr);
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
401 return ftrace_modify_code(ip, old, new);
402} 529}
403 530
404static void ftrace_replace_code(int enable) 531static void ftrace_replace_code(int enable)
405{ 532{
406 int i, failed; 533 int i, failed;
407 unsigned char *nop = NULL;
408 struct dyn_ftrace *rec; 534 struct dyn_ftrace *rec;
409 struct ftrace_page *pg; 535 struct ftrace_page *pg;
410 536
411 nop = ftrace_nop_replace();
412
413 for (pg = ftrace_pages_start; pg; pg = pg->next) { 537 for (pg = ftrace_pages_start; pg; pg = pg->next) {
414 for (i = 0; i < pg->index; i++) { 538 for (i = 0; i < pg->index; i++) {
415 rec = &pg->records[i]; 539 rec = &pg->records[i];
416 540
417 /* don't modify code that has already faulted */ 541 /*
418 if (rec->flags & FTRACE_FL_FAILED) 542 * Skip over free records and records that have
543 * failed.
544 */
545 if (rec->flags & FTRACE_FL_FREE ||
546 rec->flags & FTRACE_FL_FAILED)
419 continue; 547 continue;
420 548
421 /* ignore updates to this record's mcount site */ 549 /* ignore updates to this record's mcount site */
@@ -426,68 +554,30 @@ static void ftrace_replace_code(int enable)
426 unfreeze_record(rec); 554 unfreeze_record(rec);
427 } 555 }
428 556
429 failed = __ftrace_replace_code(rec, nop, enable); 557 failed = __ftrace_replace_code(rec, enable);
430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 558 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
431 rec->flags |= FTRACE_FL_FAILED; 559 rec->flags |= FTRACE_FL_FAILED;
432 if ((system_state == SYSTEM_BOOTING) || 560 if ((system_state == SYSTEM_BOOTING) ||
433 !core_kernel_text(rec->ip)) { 561 !core_kernel_text(rec->ip)) {
434 ftrace_free_rec(rec); 562 ftrace_free_rec(rec);
435 } 563 } else
564 ftrace_bug(failed, rec->ip);
436 } 565 }
437 } 566 }
438 } 567 }
439} 568}
440 569
441static void print_ip_ins(const char *fmt, unsigned char *p)
442{
443 int i;
444
445 printk(KERN_CONT "%s", fmt);
446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
449}
450
451static int 570static int
452ftrace_code_disable(struct dyn_ftrace *rec) 571ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
453{ 572{
454 unsigned long ip; 573 unsigned long ip;
455 unsigned char *nop, *call;
456 int ret; 574 int ret;
457 575
458 ip = rec->ip; 576 ip = rec->ip;
459 577
460 nop = ftrace_nop_replace(); 578 ret = ftrace_make_nop(mod, rec, mcount_addr);
461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) { 579 if (ret) {
465 switch (ret) { 580 ftrace_bug(ret, ip);
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
490
491 rec->flags |= FTRACE_FL_FAILED; 581 rec->flags |= FTRACE_FL_FAILED;
492 return 0; 582 return 0;
493 } 583 }
@@ -506,6 +596,11 @@ static int __ftrace_modify_code(void *data)
506 if (*command & FTRACE_UPDATE_TRACE_FUNC) 596 if (*command & FTRACE_UPDATE_TRACE_FUNC)
507 ftrace_update_ftrace_func(ftrace_trace_function); 597 ftrace_update_ftrace_func(ftrace_trace_function);
508 598
599 if (*command & FTRACE_START_FUNC_RET)
600 ftrace_enable_ftrace_graph_caller();
601 else if (*command & FTRACE_STOP_FUNC_RET)
602 ftrace_disable_ftrace_graph_caller();
603
509 return 0; 604 return 0;
510} 605}
511 606
@@ -515,43 +610,43 @@ static void ftrace_run_update_code(int command)
515} 610}
516 611
517static ftrace_func_t saved_ftrace_func; 612static ftrace_func_t saved_ftrace_func;
518static int ftrace_start; 613static int ftrace_start_up;
519static DEFINE_MUTEX(ftrace_start_lock);
520 614
521static void ftrace_startup(void) 615static void ftrace_startup_enable(int command)
522{ 616{
523 int command = 0;
524
525 if (unlikely(ftrace_disabled))
526 return;
527
528 mutex_lock(&ftrace_start_lock);
529 ftrace_start++;
530 command |= FTRACE_ENABLE_CALLS;
531
532 if (saved_ftrace_func != ftrace_trace_function) { 617 if (saved_ftrace_func != ftrace_trace_function) {
533 saved_ftrace_func = ftrace_trace_function; 618 saved_ftrace_func = ftrace_trace_function;
534 command |= FTRACE_UPDATE_TRACE_FUNC; 619 command |= FTRACE_UPDATE_TRACE_FUNC;
535 } 620 }
536 621
537 if (!command || !ftrace_enabled) 622 if (!command || !ftrace_enabled)
538 goto out; 623 return;
539 624
540 ftrace_run_update_code(command); 625 ftrace_run_update_code(command);
541 out:
542 mutex_unlock(&ftrace_start_lock);
543} 626}
544 627
545static void ftrace_shutdown(void) 628static void ftrace_startup(int command)
546{ 629{
547 int command = 0; 630 if (unlikely(ftrace_disabled))
631 return;
632
633 mutex_lock(&ftrace_start_lock);
634 ftrace_start_up++;
635 command |= FTRACE_ENABLE_CALLS;
548 636
637 ftrace_startup_enable(command);
638
639 mutex_unlock(&ftrace_start_lock);
640}
641
642static void ftrace_shutdown(int command)
643{
549 if (unlikely(ftrace_disabled)) 644 if (unlikely(ftrace_disabled))
550 return; 645 return;
551 646
552 mutex_lock(&ftrace_start_lock); 647 mutex_lock(&ftrace_start_lock);
553 ftrace_start--; 648 ftrace_start_up--;
554 if (!ftrace_start) 649 if (!ftrace_start_up)
555 command |= FTRACE_DISABLE_CALLS; 650 command |= FTRACE_DISABLE_CALLS;
556 651
557 if (saved_ftrace_func != ftrace_trace_function) { 652 if (saved_ftrace_func != ftrace_trace_function) {
@@ -577,8 +672,8 @@ static void ftrace_startup_sysctl(void)
577 mutex_lock(&ftrace_start_lock); 672 mutex_lock(&ftrace_start_lock);
578 /* Force update next time */ 673 /* Force update next time */
579 saved_ftrace_func = NULL; 674 saved_ftrace_func = NULL;
580 /* ftrace_start is true if we want ftrace running */ 675 /* ftrace_start_up is true if we want ftrace running */
581 if (ftrace_start) 676 if (ftrace_start_up)
582 command |= FTRACE_ENABLE_CALLS; 677 command |= FTRACE_ENABLE_CALLS;
583 678
584 ftrace_run_update_code(command); 679 ftrace_run_update_code(command);
@@ -593,8 +688,8 @@ static void ftrace_shutdown_sysctl(void)
593 return; 688 return;
594 689
595 mutex_lock(&ftrace_start_lock); 690 mutex_lock(&ftrace_start_lock);
596 /* ftrace_start is true if ftrace is running */ 691 /* ftrace_start_up is true if ftrace is running */
597 if (ftrace_start) 692 if (ftrace_start_up)
598 command |= FTRACE_DISABLE_CALLS; 693 command |= FTRACE_DISABLE_CALLS;
599 694
600 ftrace_run_update_code(command); 695 ftrace_run_update_code(command);
@@ -605,7 +700,7 @@ static cycle_t ftrace_update_time;
605static unsigned long ftrace_update_cnt; 700static unsigned long ftrace_update_cnt;
606unsigned long ftrace_update_tot_cnt; 701unsigned long ftrace_update_tot_cnt;
607 702
608static int ftrace_update_code(void) 703static int ftrace_update_code(struct module *mod)
609{ 704{
610 struct dyn_ftrace *p, *t; 705 struct dyn_ftrace *p, *t;
611 cycle_t start, stop; 706 cycle_t start, stop;
@@ -622,7 +717,7 @@ static int ftrace_update_code(void)
622 list_del_init(&p->list); 717 list_del_init(&p->list);
623 718
624 /* convert record (i.e, patch mcount-call with NOP) */ 719 /* convert record (i.e, patch mcount-call with NOP) */
625 if (ftrace_code_disable(p)) { 720 if (ftrace_code_disable(mod, p)) {
626 p->flags |= FTRACE_FL_CONVERTED; 721 p->flags |= FTRACE_FL_CONVERTED;
627 ftrace_update_cnt++; 722 ftrace_update_cnt++;
628 } else 723 } else
@@ -690,7 +785,6 @@ enum {
690#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ 785#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
691 786
692struct ftrace_iterator { 787struct ftrace_iterator {
693 loff_t pos;
694 struct ftrace_page *pg; 788 struct ftrace_page *pg;
695 unsigned idx; 789 unsigned idx;
696 unsigned flags; 790 unsigned flags;
@@ -715,6 +809,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
715 iter->pg = iter->pg->next; 809 iter->pg = iter->pg->next;
716 iter->idx = 0; 810 iter->idx = 0;
717 goto retry; 811 goto retry;
812 } else {
813 iter->idx = -1;
718 } 814 }
719 } else { 815 } else {
720 rec = &iter->pg->records[iter->idx++]; 816 rec = &iter->pg->records[iter->idx++];
@@ -737,8 +833,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
737 } 833 }
738 spin_unlock(&ftrace_lock); 834 spin_unlock(&ftrace_lock);
739 835
740 iter->pos = *pos;
741
742 return rec; 836 return rec;
743} 837}
744 838
@@ -746,13 +840,15 @@ static void *t_start(struct seq_file *m, loff_t *pos)
746{ 840{
747 struct ftrace_iterator *iter = m->private; 841 struct ftrace_iterator *iter = m->private;
748 void *p = NULL; 842 void *p = NULL;
749 loff_t l = -1;
750 843
751 if (*pos > iter->pos) 844 if (*pos > 0) {
752 *pos = iter->pos; 845 if (iter->idx < 0)
846 return p;
847 (*pos)--;
848 iter->idx--;
849 }
753 850
754 l = *pos; 851 p = t_next(m, p, pos);
755 p = t_next(m, p, &l);
756 852
757 return p; 853 return p;
758} 854}
@@ -763,21 +859,15 @@ static void t_stop(struct seq_file *m, void *p)
763 859
764static int t_show(struct seq_file *m, void *v) 860static int t_show(struct seq_file *m, void *v)
765{ 861{
766 struct ftrace_iterator *iter = m->private;
767 struct dyn_ftrace *rec = v; 862 struct dyn_ftrace *rec = v;
768 char str[KSYM_SYMBOL_LEN]; 863 char str[KSYM_SYMBOL_LEN];
769 int ret = 0;
770 864
771 if (!rec) 865 if (!rec)
772 return 0; 866 return 0;
773 867
774 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); 868 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
775 869
776 ret = seq_printf(m, "%s\n", str); 870 seq_printf(m, "%s\n", str);
777 if (ret < 0) {
778 iter->pos--;
779 iter->idx--;
780 }
781 871
782 return 0; 872 return 0;
783} 873}
@@ -803,7 +893,6 @@ ftrace_avail_open(struct inode *inode, struct file *file)
803 return -ENOMEM; 893 return -ENOMEM;
804 894
805 iter->pg = ftrace_pages_start; 895 iter->pg = ftrace_pages_start;
806 iter->pos = 0;
807 896
808 ret = seq_open(file, &show_ftrace_seq_ops); 897 ret = seq_open(file, &show_ftrace_seq_ops);
809 if (!ret) { 898 if (!ret) {
@@ -890,7 +979,6 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
890 979
891 if (file->f_mode & FMODE_READ) { 980 if (file->f_mode & FMODE_READ) {
892 iter->pg = ftrace_pages_start; 981 iter->pg = ftrace_pages_start;
893 iter->pos = 0;
894 iter->flags = enable ? FTRACE_ITER_FILTER : 982 iter->flags = enable ? FTRACE_ITER_FILTER :
895 FTRACE_ITER_NOTRACE; 983 FTRACE_ITER_NOTRACE;
896 984
@@ -959,6 +1047,13 @@ ftrace_match(unsigned char *buff, int len, int enable)
959 int type = MATCH_FULL; 1047 int type = MATCH_FULL;
960 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1048 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
961 unsigned i, match = 0, search_len = 0; 1049 unsigned i, match = 0, search_len = 0;
1050 int not = 0;
1051
1052 if (buff[0] == '!') {
1053 not = 1;
1054 buff++;
1055 len--;
1056 }
962 1057
963 for (i = 0; i < len; i++) { 1058 for (i = 0; i < len; i++) {
964 if (buff[i] == '*') { 1059 if (buff[i] == '*') {
@@ -1012,8 +1107,12 @@ ftrace_match(unsigned char *buff, int len, int enable)
1012 matched = 1; 1107 matched = 1;
1013 break; 1108 break;
1014 } 1109 }
1015 if (matched) 1110 if (matched) {
1016 rec->flags |= flag; 1111 if (not)
1112 rec->flags &= ~flag;
1113 else
1114 rec->flags |= flag;
1115 }
1017 } 1116 }
1018 pg = pg->next; 1117 pg = pg->next;
1019 } 1118 }
@@ -1181,7 +1280,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1181 1280
1182 mutex_lock(&ftrace_sysctl_lock); 1281 mutex_lock(&ftrace_sysctl_lock);
1183 mutex_lock(&ftrace_start_lock); 1282 mutex_lock(&ftrace_start_lock);
1184 if (ftrace_start && ftrace_enabled) 1283 if (ftrace_start_up && ftrace_enabled)
1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1284 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1186 mutex_unlock(&ftrace_start_lock); 1285 mutex_unlock(&ftrace_start_lock);
1187 mutex_unlock(&ftrace_sysctl_lock); 1286 mutex_unlock(&ftrace_sysctl_lock);
@@ -1233,12 +1332,233 @@ static struct file_operations ftrace_notrace_fops = {
1233 .release = ftrace_notrace_release, 1332 .release = ftrace_notrace_release,
1234}; 1333};
1235 1334
1236static __init int ftrace_init_debugfs(void) 1335#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1336
1337static DEFINE_MUTEX(graph_lock);
1338
1339int ftrace_graph_count;
1340unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
1341
1342static void *
1343g_next(struct seq_file *m, void *v, loff_t *pos)
1237{ 1344{
1238 struct dentry *d_tracer; 1345 unsigned long *array = m->private;
1239 struct dentry *entry; 1346 int index = *pos;
1240 1347
1241 d_tracer = tracing_init_dentry(); 1348 (*pos)++;
1349
1350 if (index >= ftrace_graph_count)
1351 return NULL;
1352
1353 return &array[index];
1354}
1355
1356static void *g_start(struct seq_file *m, loff_t *pos)
1357{
1358 void *p = NULL;
1359
1360 mutex_lock(&graph_lock);
1361
1362 p = g_next(m, p, pos);
1363
1364 return p;
1365}
1366
1367static void g_stop(struct seq_file *m, void *p)
1368{
1369 mutex_unlock(&graph_lock);
1370}
1371
1372static int g_show(struct seq_file *m, void *v)
1373{
1374 unsigned long *ptr = v;
1375 char str[KSYM_SYMBOL_LEN];
1376
1377 if (!ptr)
1378 return 0;
1379
1380 kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
1381
1382 seq_printf(m, "%s\n", str);
1383
1384 return 0;
1385}
1386
1387static struct seq_operations ftrace_graph_seq_ops = {
1388 .start = g_start,
1389 .next = g_next,
1390 .stop = g_stop,
1391 .show = g_show,
1392};
1393
1394static int
1395ftrace_graph_open(struct inode *inode, struct file *file)
1396{
1397 int ret = 0;
1398
1399 if (unlikely(ftrace_disabled))
1400 return -ENODEV;
1401
1402 mutex_lock(&graph_lock);
1403 if ((file->f_mode & FMODE_WRITE) &&
1404 !(file->f_flags & O_APPEND)) {
1405 ftrace_graph_count = 0;
1406 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
1407 }
1408
1409 if (file->f_mode & FMODE_READ) {
1410 ret = seq_open(file, &ftrace_graph_seq_ops);
1411 if (!ret) {
1412 struct seq_file *m = file->private_data;
1413 m->private = ftrace_graph_funcs;
1414 }
1415 } else
1416 file->private_data = ftrace_graph_funcs;
1417 mutex_unlock(&graph_lock);
1418
1419 return ret;
1420}
1421
1422static ssize_t
1423ftrace_graph_read(struct file *file, char __user *ubuf,
1424 size_t cnt, loff_t *ppos)
1425{
1426 if (file->f_mode & FMODE_READ)
1427 return seq_read(file, ubuf, cnt, ppos);
1428 else
1429 return -EPERM;
1430}
1431
1432static int
1433ftrace_set_func(unsigned long *array, int idx, char *buffer)
1434{
1435 char str[KSYM_SYMBOL_LEN];
1436 struct dyn_ftrace *rec;
1437 struct ftrace_page *pg;
1438 int found = 0;
1439 int i, j;
1440
1441 if (ftrace_disabled)
1442 return -ENODEV;
1443
1444 /* should not be called from interrupt context */
1445 spin_lock(&ftrace_lock);
1446
1447 for (pg = ftrace_pages_start; pg; pg = pg->next) {
1448 for (i = 0; i < pg->index; i++) {
1449 rec = &pg->records[i];
1450
1451 if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
1452 continue;
1453
1454 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1455 if (strcmp(str, buffer) == 0) {
1456 found = 1;
1457 for (j = 0; j < idx; j++)
1458 if (array[j] == rec->ip) {
1459 found = 0;
1460 break;
1461 }
1462 if (found)
1463 array[idx] = rec->ip;
1464 break;
1465 }
1466 }
1467 }
1468 spin_unlock(&ftrace_lock);
1469
1470 return found ? 0 : -EINVAL;
1471}
1472
1473static ssize_t
1474ftrace_graph_write(struct file *file, const char __user *ubuf,
1475 size_t cnt, loff_t *ppos)
1476{
1477 unsigned char buffer[FTRACE_BUFF_MAX+1];
1478 unsigned long *array;
1479 size_t read = 0;
1480 ssize_t ret;
1481 int index = 0;
1482 char ch;
1483
1484 if (!cnt || cnt < 0)
1485 return 0;
1486
1487 mutex_lock(&graph_lock);
1488
1489 if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) {
1490 ret = -EBUSY;
1491 goto out;
1492 }
1493
1494 if (file->f_mode & FMODE_READ) {
1495 struct seq_file *m = file->private_data;
1496 array = m->private;
1497 } else
1498 array = file->private_data;
1499
1500 ret = get_user(ch, ubuf++);
1501 if (ret)
1502 goto out;
1503 read++;
1504 cnt--;
1505
1506 /* skip white space */
1507 while (cnt && isspace(ch)) {
1508 ret = get_user(ch, ubuf++);
1509 if (ret)
1510 goto out;
1511 read++;
1512 cnt--;
1513 }
1514
1515 if (isspace(ch)) {
1516 *ppos += read;
1517 ret = read;
1518 goto out;
1519 }
1520
1521 while (cnt && !isspace(ch)) {
1522 if (index < FTRACE_BUFF_MAX)
1523 buffer[index++] = ch;
1524 else {
1525 ret = -EINVAL;
1526 goto out;
1527 }
1528 ret = get_user(ch, ubuf++);
1529 if (ret)
1530 goto out;
1531 read++;
1532 cnt--;
1533 }
1534 buffer[index] = 0;
1535
1536 /* we allow only one at a time */
1537 ret = ftrace_set_func(array, ftrace_graph_count, buffer);
1538 if (ret)
1539 goto out;
1540
1541 ftrace_graph_count++;
1542
1543 file->f_pos += read;
1544
1545 ret = read;
1546 out:
1547 mutex_unlock(&graph_lock);
1548
1549 return ret;
1550}
1551
1552static const struct file_operations ftrace_graph_fops = {
1553 .open = ftrace_graph_open,
1554 .read = ftrace_graph_read,
1555 .write = ftrace_graph_write,
1556};
1557#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1558
1559static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
1560{
1561 struct dentry *entry;
1242 1562
1243 entry = debugfs_create_file("available_filter_functions", 0444, 1563 entry = debugfs_create_file("available_filter_functions", 0444,
1244 d_tracer, NULL, &ftrace_avail_fops); 1564 d_tracer, NULL, &ftrace_avail_fops);
@@ -1263,12 +1583,20 @@ static __init int ftrace_init_debugfs(void)
1263 pr_warning("Could not create debugfs " 1583 pr_warning("Could not create debugfs "
1264 "'set_ftrace_notrace' entry\n"); 1584 "'set_ftrace_notrace' entry\n");
1265 1585
1586#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1587 entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
1588 NULL,
1589 &ftrace_graph_fops);
1590 if (!entry)
1591 pr_warning("Could not create debugfs "
1592 "'set_graph_function' entry\n");
1593#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1594
1266 return 0; 1595 return 0;
1267} 1596}
1268 1597
1269fs_initcall(ftrace_init_debugfs); 1598static int ftrace_convert_nops(struct module *mod,
1270 1599 unsigned long *start,
1271static int ftrace_convert_nops(unsigned long *start,
1272 unsigned long *end) 1600 unsigned long *end)
1273{ 1601{
1274 unsigned long *p; 1602 unsigned long *p;
@@ -1279,23 +1607,32 @@ static int ftrace_convert_nops(unsigned long *start,
1279 p = start; 1607 p = start;
1280 while (p < end) { 1608 while (p < end) {
1281 addr = ftrace_call_adjust(*p++); 1609 addr = ftrace_call_adjust(*p++);
1610 /*
1611 * Some architecture linkers will pad between
1612 * the different mcount_loc sections of different
1613 * object files to satisfy alignments.
1614 * Skip any NULL pointers.
1615 */
1616 if (!addr)
1617 continue;
1282 ftrace_record_ip(addr); 1618 ftrace_record_ip(addr);
1283 } 1619 }
1284 1620
1285 /* disable interrupts to prevent kstop machine */ 1621 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags); 1622 local_irq_save(flags);
1287 ftrace_update_code(); 1623 ftrace_update_code(mod);
1288 local_irq_restore(flags); 1624 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock); 1625 mutex_unlock(&ftrace_start_lock);
1290 1626
1291 return 0; 1627 return 0;
1292} 1628}
1293 1629
1294void ftrace_init_module(unsigned long *start, unsigned long *end) 1630void ftrace_init_module(struct module *mod,
1631 unsigned long *start, unsigned long *end)
1295{ 1632{
1296 if (ftrace_disabled || start == end) 1633 if (ftrace_disabled || start == end)
1297 return; 1634 return;
1298 ftrace_convert_nops(start, end); 1635 ftrace_convert_nops(mod, start, end);
1299} 1636}
1300 1637
1301extern unsigned long __start_mcount_loc[]; 1638extern unsigned long __start_mcount_loc[];
@@ -1325,7 +1662,8 @@ void __init ftrace_init(void)
1325 1662
1326 last_ftrace_enabled = ftrace_enabled = 1; 1663 last_ftrace_enabled = ftrace_enabled = 1;
1327 1664
1328 ret = ftrace_convert_nops(__start_mcount_loc, 1665 ret = ftrace_convert_nops(NULL,
1666 __start_mcount_loc,
1329 __stop_mcount_loc); 1667 __stop_mcount_loc);
1330 1668
1331 return; 1669 return;
@@ -1342,12 +1680,186 @@ static int __init ftrace_nodyn_init(void)
1342} 1680}
1343device_initcall(ftrace_nodyn_init); 1681device_initcall(ftrace_nodyn_init);
1344 1682
1345# define ftrace_startup() do { } while (0) 1683static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
1346# define ftrace_shutdown() do { } while (0) 1684static inline void ftrace_startup_enable(int command) { }
1685/* Keep as macros so we do not need to define the commands */
1686# define ftrace_startup(command) do { } while (0)
1687# define ftrace_shutdown(command) do { } while (0)
1347# define ftrace_startup_sysctl() do { } while (0) 1688# define ftrace_startup_sysctl() do { } while (0)
1348# define ftrace_shutdown_sysctl() do { } while (0) 1689# define ftrace_shutdown_sysctl() do { } while (0)
1349#endif /* CONFIG_DYNAMIC_FTRACE */ 1690#endif /* CONFIG_DYNAMIC_FTRACE */
1350 1691
1692static ssize_t
1693ftrace_pid_read(struct file *file, char __user *ubuf,
1694 size_t cnt, loff_t *ppos)
1695{
1696 char buf[64];
1697 int r;
1698
1699 if (ftrace_pid_trace == ftrace_swapper_pid)
1700 r = sprintf(buf, "swapper tasks\n");
1701 else if (ftrace_pid_trace)
1702 r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
1703 else
1704 r = sprintf(buf, "no pid\n");
1705
1706 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1707}
1708
1709static void clear_ftrace_swapper(void)
1710{
1711 struct task_struct *p;
1712 int cpu;
1713
1714 get_online_cpus();
1715 for_each_online_cpu(cpu) {
1716 p = idle_task(cpu);
1717 clear_tsk_trace_trace(p);
1718 }
1719 put_online_cpus();
1720}
1721
1722static void set_ftrace_swapper(void)
1723{
1724 struct task_struct *p;
1725 int cpu;
1726
1727 get_online_cpus();
1728 for_each_online_cpu(cpu) {
1729 p = idle_task(cpu);
1730 set_tsk_trace_trace(p);
1731 }
1732 put_online_cpus();
1733}
1734
1735static void clear_ftrace_pid(struct pid *pid)
1736{
1737 struct task_struct *p;
1738
1739 do_each_pid_task(pid, PIDTYPE_PID, p) {
1740 clear_tsk_trace_trace(p);
1741 } while_each_pid_task(pid, PIDTYPE_PID, p);
1742 put_pid(pid);
1743}
1744
1745static void set_ftrace_pid(struct pid *pid)
1746{
1747 struct task_struct *p;
1748
1749 do_each_pid_task(pid, PIDTYPE_PID, p) {
1750 set_tsk_trace_trace(p);
1751 } while_each_pid_task(pid, PIDTYPE_PID, p);
1752}
1753
1754static void clear_ftrace_pid_task(struct pid **pid)
1755{
1756 if (*pid == ftrace_swapper_pid)
1757 clear_ftrace_swapper();
1758 else
1759 clear_ftrace_pid(*pid);
1760
1761 *pid = NULL;
1762}
1763
1764static void set_ftrace_pid_task(struct pid *pid)
1765{
1766 if (pid == ftrace_swapper_pid)
1767 set_ftrace_swapper();
1768 else
1769 set_ftrace_pid(pid);
1770}
1771
1772static ssize_t
1773ftrace_pid_write(struct file *filp, const char __user *ubuf,
1774 size_t cnt, loff_t *ppos)
1775{
1776 struct pid *pid;
1777 char buf[64];
1778 long val;
1779 int ret;
1780
1781 if (cnt >= sizeof(buf))
1782 return -EINVAL;
1783
1784 if (copy_from_user(&buf, ubuf, cnt))
1785 return -EFAULT;
1786
1787 buf[cnt] = 0;
1788
1789 ret = strict_strtol(buf, 10, &val);
1790 if (ret < 0)
1791 return ret;
1792
1793 mutex_lock(&ftrace_start_lock);
1794 if (val < 0) {
1795 /* disable pid tracing */
1796 if (!ftrace_pid_trace)
1797 goto out;
1798
1799 clear_ftrace_pid_task(&ftrace_pid_trace);
1800
1801 } else {
1802 /* swapper task is special */
1803 if (!val) {
1804 pid = ftrace_swapper_pid;
1805 if (pid == ftrace_pid_trace)
1806 goto out;
1807 } else {
1808 pid = find_get_pid(val);
1809
1810 if (pid == ftrace_pid_trace) {
1811 put_pid(pid);
1812 goto out;
1813 }
1814 }
1815
1816 if (ftrace_pid_trace)
1817 clear_ftrace_pid_task(&ftrace_pid_trace);
1818
1819 if (!pid)
1820 goto out;
1821
1822 ftrace_pid_trace = pid;
1823
1824 set_ftrace_pid_task(ftrace_pid_trace);
1825 }
1826
1827 /* update the function call */
1828 ftrace_update_pid_func();
1829 ftrace_startup_enable(0);
1830
1831 out:
1832 mutex_unlock(&ftrace_start_lock);
1833
1834 return cnt;
1835}
1836
1837static struct file_operations ftrace_pid_fops = {
1838 .read = ftrace_pid_read,
1839 .write = ftrace_pid_write,
1840};
1841
1842static __init int ftrace_init_debugfs(void)
1843{
1844 struct dentry *d_tracer;
1845 struct dentry *entry;
1846
1847 d_tracer = tracing_init_dentry();
1848 if (!d_tracer)
1849 return 0;
1850
1851 ftrace_init_dyn_debugfs(d_tracer);
1852
1853 entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
1854 NULL, &ftrace_pid_fops);
1855 if (!entry)
1856 pr_warning("Could not create debugfs "
1857 "'set_ftrace_pid' entry\n");
1858 return 0;
1859}
1860
1861fs_initcall(ftrace_init_debugfs);
1862
1351/** 1863/**
1352 * ftrace_kill - kill ftrace 1864 * ftrace_kill - kill ftrace
1353 * 1865 *
@@ -1381,10 +1893,11 @@ int register_ftrace_function(struct ftrace_ops *ops)
1381 return -1; 1893 return -1;
1382 1894
1383 mutex_lock(&ftrace_sysctl_lock); 1895 mutex_lock(&ftrace_sysctl_lock);
1896
1384 ret = __register_ftrace_function(ops); 1897 ret = __register_ftrace_function(ops);
1385 ftrace_startup(); 1898 ftrace_startup(0);
1386 mutex_unlock(&ftrace_sysctl_lock);
1387 1899
1900 mutex_unlock(&ftrace_sysctl_lock);
1388 return ret; 1901 return ret;
1389} 1902}
1390 1903
@@ -1400,7 +1913,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
1400 1913
1401 mutex_lock(&ftrace_sysctl_lock); 1914 mutex_lock(&ftrace_sysctl_lock);
1402 ret = __unregister_ftrace_function(ops); 1915 ret = __unregister_ftrace_function(ops);
1403 ftrace_shutdown(); 1916 ftrace_shutdown(0);
1404 mutex_unlock(&ftrace_sysctl_lock); 1917 mutex_unlock(&ftrace_sysctl_lock);
1405 1918
1406 return ret; 1919 return ret;
@@ -1449,3 +1962,153 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1449 return ret; 1962 return ret;
1450} 1963}
1451 1964
1965#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1966
1967static atomic_t ftrace_graph_active;
1968
1969int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
1970{
1971 return 0;
1972}
1973
1974/* The callbacks that hook a function */
1975trace_func_graph_ret_t ftrace_graph_return =
1976 (trace_func_graph_ret_t)ftrace_stub;
1977trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
1978
1979/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
1980static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
1981{
1982 int i;
1983 int ret = 0;
1984 unsigned long flags;
1985 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
1986 struct task_struct *g, *t;
1987
1988 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
1989 ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH
1990 * sizeof(struct ftrace_ret_stack),
1991 GFP_KERNEL);
1992 if (!ret_stack_list[i]) {
1993 start = 0;
1994 end = i;
1995 ret = -ENOMEM;
1996 goto free;
1997 }
1998 }
1999
2000 read_lock_irqsave(&tasklist_lock, flags);
2001 do_each_thread(g, t) {
2002 if (start == end) {
2003 ret = -EAGAIN;
2004 goto unlock;
2005 }
2006
2007 if (t->ret_stack == NULL) {
2008 t->curr_ret_stack = -1;
2009 /* Make sure IRQs see the -1 first: */
2010 barrier();
2011 t->ret_stack = ret_stack_list[start++];
2012 atomic_set(&t->tracing_graph_pause, 0);
2013 atomic_set(&t->trace_overrun, 0);
2014 }
2015 } while_each_thread(g, t);
2016
2017unlock:
2018 read_unlock_irqrestore(&tasklist_lock, flags);
2019free:
2020 for (i = start; i < end; i++)
2021 kfree(ret_stack_list[i]);
2022 return ret;
2023}
2024
2025/* Allocate a return stack for each task */
2026static int start_graph_tracing(void)
2027{
2028 struct ftrace_ret_stack **ret_stack_list;
2029 int ret;
2030
2031 ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
2032 sizeof(struct ftrace_ret_stack *),
2033 GFP_KERNEL);
2034
2035 if (!ret_stack_list)
2036 return -ENOMEM;
2037
2038 do {
2039 ret = alloc_retstack_tasklist(ret_stack_list);
2040 } while (ret == -EAGAIN);
2041
2042 kfree(ret_stack_list);
2043 return ret;
2044}
2045
2046int register_ftrace_graph(trace_func_graph_ret_t retfunc,
2047 trace_func_graph_ent_t entryfunc)
2048{
2049 int ret = 0;
2050
2051 mutex_lock(&ftrace_sysctl_lock);
2052
2053 atomic_inc(&ftrace_graph_active);
2054 ret = start_graph_tracing();
2055 if (ret) {
2056 atomic_dec(&ftrace_graph_active);
2057 goto out;
2058 }
2059
2060 ftrace_graph_return = retfunc;
2061 ftrace_graph_entry = entryfunc;
2062
2063 ftrace_startup(FTRACE_START_FUNC_RET);
2064
2065out:
2066 mutex_unlock(&ftrace_sysctl_lock);
2067 return ret;
2068}
2069
2070void unregister_ftrace_graph(void)
2071{
2072 mutex_lock(&ftrace_sysctl_lock);
2073
2074 atomic_dec(&ftrace_graph_active);
2075 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
2076 ftrace_graph_entry = ftrace_graph_entry_stub;
2077 ftrace_shutdown(FTRACE_STOP_FUNC_RET);
2078
2079 mutex_unlock(&ftrace_sysctl_lock);
2080}
2081
2082/* Allocate a return stack for newly created task */
2083void ftrace_graph_init_task(struct task_struct *t)
2084{
2085 if (atomic_read(&ftrace_graph_active)) {
2086 t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
2087 * sizeof(struct ftrace_ret_stack),
2088 GFP_KERNEL);
2089 if (!t->ret_stack)
2090 return;
2091 t->curr_ret_stack = -1;
2092 atomic_set(&t->tracing_graph_pause, 0);
2093 atomic_set(&t->trace_overrun, 0);
2094 } else
2095 t->ret_stack = NULL;
2096}
2097
2098void ftrace_graph_exit_task(struct task_struct *t)
2099{
2100 struct ftrace_ret_stack *ret_stack = t->ret_stack;
2101
2102 t->ret_stack = NULL;
2103 /* NULL must become visible to IRQs before we free it: */
2104 barrier();
2105
2106 kfree(ret_stack);
2107}
2108
2109void ftrace_graph_stop(void)
2110{
2111 ftrace_stop();
2112}
2113#endif
2114
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 668bbb5ef2bd..bb6922a931b1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -18,8 +18,46 @@
18 18
19#include "trace.h" 19#include "trace.h"
20 20
21/* Global flag to disable all recording to ring buffers */ 21/*
22static int ring_buffers_off __read_mostly; 22 * A fast way to enable or disable all ring buffers is to
23 * call tracing_on or tracing_off. Turning off the ring buffers
24 * prevents all ring buffers from being recorded to.
25 * Turning this switch on, makes it OK to write to the
26 * ring buffer, if the ring buffer is enabled itself.
27 *
28 * There's three layers that must be on in order to write
29 * to the ring buffer.
30 *
31 * 1) This global flag must be set.
32 * 2) The ring buffer must be enabled for recording.
33 * 3) The per cpu buffer must be enabled for recording.
34 *
35 * In case of an anomaly, this global flag has a bit set that
36 * will permantly disable all ring buffers.
37 */
38
39/*
40 * Global flag to disable all recording to ring buffers
41 * This has two bits: ON, DISABLED
42 *
43 * ON DISABLED
44 * ---- ----------
45 * 0 0 : ring buffers are off
46 * 1 0 : ring buffers are on
47 * X 1 : ring buffers are permanently disabled
48 */
49
50enum {
51 RB_BUFFERS_ON_BIT = 0,
52 RB_BUFFERS_DISABLED_BIT = 1,
53};
54
55enum {
56 RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
57 RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
58};
59
60static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
23 61
24/** 62/**
25 * tracing_on - enable all tracing buffers 63 * tracing_on - enable all tracing buffers
@@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly;
29 */ 67 */
30void tracing_on(void) 68void tracing_on(void)
31{ 69{
32 ring_buffers_off = 0; 70 set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
33} 71}
34 72
35/** 73/**
@@ -42,9 +80,22 @@ void tracing_on(void)
42 */ 80 */
43void tracing_off(void) 81void tracing_off(void)
44{ 82{
45 ring_buffers_off = 1; 83 clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
46} 84}
47 85
86/**
87 * tracing_off_permanent - permanently disable ring buffers
88 *
89 * This function, once called, will disable all ring buffers
90 * permanenty.
91 */
92void tracing_off_permanent(void)
93{
94 set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
95}
96
97#include "trace.h"
98
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 99/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 100#define DEBUG_SHIFT 0
50 101
@@ -56,7 +107,7 @@ u64 ring_buffer_time_stamp(int cpu)
56 preempt_disable_notrace(); 107 preempt_disable_notrace();
57 /* shift to debug/test normalization and TIME_EXTENTS */ 108 /* shift to debug/test normalization and TIME_EXTENTS */
58 time = sched_clock() << DEBUG_SHIFT; 109 time = sched_clock() << DEBUG_SHIFT;
59 preempt_enable_notrace(); 110 preempt_enable_no_resched_notrace();
60 111
61 return time; 112 return time;
62} 113}
@@ -144,20 +195,24 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
144#define TS_MASK ((1ULL << TS_SHIFT) - 1) 195#define TS_MASK ((1ULL << TS_SHIFT) - 1)
145#define TS_DELTA_TEST (~TS_MASK) 196#define TS_DELTA_TEST (~TS_MASK)
146 197
147/* 198struct buffer_data_page {
148 * This hack stolen from mm/slob.c.
149 * We can store per page timing information in the page frame of the page.
150 * Thanks to Peter Zijlstra for suggesting this idea.
151 */
152struct buffer_page {
153 u64 time_stamp; /* page time stamp */ 199 u64 time_stamp; /* page time stamp */
154 local_t write; /* index for next write */
155 local_t commit; /* write commited index */ 200 local_t commit; /* write commited index */
201 unsigned char data[]; /* data of buffer page */
202};
203
204struct buffer_page {
205 local_t write; /* index for next write */
156 unsigned read; /* index for next read */ 206 unsigned read; /* index for next read */
157 struct list_head list; /* list of free pages */ 207 struct list_head list; /* list of free pages */
158 void *page; /* Actual data page */ 208 struct buffer_data_page *page; /* Actual data page */
159}; 209};
160 210
211static void rb_init_page(struct buffer_data_page *bpage)
212{
213 local_set(&bpage->commit, 0);
214}
215
161/* 216/*
162 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing 217 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
163 * this issue out. 218 * this issue out.
@@ -179,7 +234,7 @@ static inline int test_time_stamp(u64 delta)
179 return 0; 234 return 0;
180} 235}
181 236
182#define BUF_PAGE_SIZE PAGE_SIZE 237#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page))
183 238
184/* 239/*
185 * head_page == tail_page && head == tail then buffer is empty. 240 * head_page == tail_page && head == tail then buffer is empty.
@@ -187,7 +242,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 242struct ring_buffer_per_cpu {
188 int cpu; 243 int cpu;
189 struct ring_buffer *buffer; 244 struct ring_buffer *buffer;
190 spinlock_t lock; 245 spinlock_t reader_lock; /* serialize readers */
246 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 247 struct lock_class_key lock_key;
192 struct list_head pages; 248 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 249 struct buffer_page *head_page; /* read from head */
@@ -202,7 +258,6 @@ struct ring_buffer_per_cpu {
202}; 258};
203 259
204struct ring_buffer { 260struct ring_buffer {
205 unsigned long size;
206 unsigned pages; 261 unsigned pages;
207 unsigned flags; 262 unsigned flags;
208 int cpus; 263 int cpus;
@@ -221,32 +276,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 276 u64 read_stamp;
222}; 277};
223 278
279/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 280#define RB_WARN_ON(buffer, cond) \
225 do { \ 281 ({ \
226 if (unlikely(cond)) { \ 282 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 283 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 284 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 285 WARN_ON(1); \
248 } \ 286 } \
249 } while (0) 287 _____ret; \
288 })
250 289
251/** 290/**
252 * check_pages - integrity check of buffer pages 291 * check_pages - integrity check of buffer pages
@@ -258,16 +297,20 @@ struct ring_buffer_iter {
258static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 297static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
259{ 298{
260 struct list_head *head = &cpu_buffer->pages; 299 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 300 struct buffer_page *bpage, *tmp;
262 301
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 302 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 303 return -1;
304 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
305 return -1;
265 306
266 list_for_each_entry_safe(page, tmp, head, list) { 307 list_for_each_entry_safe(bpage, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 308 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 309 bpage->list.next->prev != &bpage->list))
269 RB_WARN_ON_RET(cpu_buffer, 310 return -1;
270 page->list.prev->next != &page->list); 311 if (RB_WARN_ON(cpu_buffer,
312 bpage->list.prev->next != &bpage->list))
313 return -1;
271 } 314 }
272 315
273 return 0; 316 return 0;
@@ -277,22 +320,23 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
277 unsigned nr_pages) 320 unsigned nr_pages)
278{ 321{
279 struct list_head *head = &cpu_buffer->pages; 322 struct list_head *head = &cpu_buffer->pages;
280 struct buffer_page *page, *tmp; 323 struct buffer_page *bpage, *tmp;
281 unsigned long addr; 324 unsigned long addr;
282 LIST_HEAD(pages); 325 LIST_HEAD(pages);
283 unsigned i; 326 unsigned i;
284 327
285 for (i = 0; i < nr_pages; i++) { 328 for (i = 0; i < nr_pages; i++) {
286 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 329 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
287 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 330 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
288 if (!page) 331 if (!bpage)
289 goto free_pages; 332 goto free_pages;
290 list_add(&page->list, &pages); 333 list_add(&bpage->list, &pages);
291 334
292 addr = __get_free_page(GFP_KERNEL); 335 addr = __get_free_page(GFP_KERNEL);
293 if (!addr) 336 if (!addr)
294 goto free_pages; 337 goto free_pages;
295 page->page = (void *)addr; 338 bpage->page = (void *)addr;
339 rb_init_page(bpage->page);
296 } 340 }
297 341
298 list_splice(&pages, head); 342 list_splice(&pages, head);
@@ -302,9 +346,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
302 return 0; 346 return 0;
303 347
304 free_pages: 348 free_pages:
305 list_for_each_entry_safe(page, tmp, &pages, list) { 349 list_for_each_entry_safe(bpage, tmp, &pages, list) {
306 list_del_init(&page->list); 350 list_del_init(&bpage->list);
307 free_buffer_page(page); 351 free_buffer_page(bpage);
308 } 352 }
309 return -ENOMEM; 353 return -ENOMEM;
310} 354}
@@ -313,7 +357,7 @@ static struct ring_buffer_per_cpu *
313rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) 357rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
314{ 358{
315 struct ring_buffer_per_cpu *cpu_buffer; 359 struct ring_buffer_per_cpu *cpu_buffer;
316 struct buffer_page *page; 360 struct buffer_page *bpage;
317 unsigned long addr; 361 unsigned long addr;
318 int ret; 362 int ret;
319 363
@@ -324,19 +368,21 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 368
325 cpu_buffer->cpu = cpu; 369 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 370 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 371 spin_lock_init(&cpu_buffer->reader_lock);
372 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 373 INIT_LIST_HEAD(&cpu_buffer->pages);
329 374
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 375 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
331 GFP_KERNEL, cpu_to_node(cpu)); 376 GFP_KERNEL, cpu_to_node(cpu));
332 if (!page) 377 if (!bpage)
333 goto fail_free_buffer; 378 goto fail_free_buffer;
334 379
335 cpu_buffer->reader_page = page; 380 cpu_buffer->reader_page = bpage;
336 addr = __get_free_page(GFP_KERNEL); 381 addr = __get_free_page(GFP_KERNEL);
337 if (!addr) 382 if (!addr)
338 goto fail_free_reader; 383 goto fail_free_reader;
339 page->page = (void *)addr; 384 bpage->page = (void *)addr;
385 rb_init_page(bpage->page);
340 386
341 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 387 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
342 388
@@ -361,14 +407,14 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
361static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) 407static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
362{ 408{
363 struct list_head *head = &cpu_buffer->pages; 409 struct list_head *head = &cpu_buffer->pages;
364 struct buffer_page *page, *tmp; 410 struct buffer_page *bpage, *tmp;
365 411
366 list_del_init(&cpu_buffer->reader_page->list); 412 list_del_init(&cpu_buffer->reader_page->list);
367 free_buffer_page(cpu_buffer->reader_page); 413 free_buffer_page(cpu_buffer->reader_page);
368 414
369 list_for_each_entry_safe(page, tmp, head, list) { 415 list_for_each_entry_safe(bpage, tmp, head, list) {
370 list_del_init(&page->list); 416 list_del_init(&bpage->list);
371 free_buffer_page(page); 417 free_buffer_page(bpage);
372 } 418 }
373 kfree(cpu_buffer); 419 kfree(cpu_buffer);
374} 420}
@@ -465,7 +511,7 @@ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
465static void 511static void
466rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) 512rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
467{ 513{
468 struct buffer_page *page; 514 struct buffer_page *bpage;
469 struct list_head *p; 515 struct list_head *p;
470 unsigned i; 516 unsigned i;
471 517
@@ -473,13 +519,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 519 synchronize_sched();
474 520
475 for (i = 0; i < nr_pages; i++) { 521 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 522 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
523 return;
477 p = cpu_buffer->pages.next; 524 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 525 bpage = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 526 list_del_init(&bpage->list);
480 free_buffer_page(page); 527 free_buffer_page(bpage);
481 } 528 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 529 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
530 return;
483 531
484 rb_reset_cpu(cpu_buffer); 532 rb_reset_cpu(cpu_buffer);
485 533
@@ -493,7 +541,7 @@ static void
493rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, 541rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
494 struct list_head *pages, unsigned nr_pages) 542 struct list_head *pages, unsigned nr_pages)
495{ 543{
496 struct buffer_page *page; 544 struct buffer_page *bpage;
497 struct list_head *p; 545 struct list_head *p;
498 unsigned i; 546 unsigned i;
499 547
@@ -501,11 +549,12 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 549 synchronize_sched();
502 550
503 for (i = 0; i < nr_pages; i++) { 551 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 552 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
553 return;
505 p = pages->next; 554 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 555 bpage = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 556 list_del_init(&bpage->list);
508 list_add_tail(&page->list, &cpu_buffer->pages); 557 list_add_tail(&bpage->list, &cpu_buffer->pages);
509 } 558 }
510 rb_reset_cpu(cpu_buffer); 559 rb_reset_cpu(cpu_buffer);
511 560
@@ -532,7 +581,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
532{ 581{
533 struct ring_buffer_per_cpu *cpu_buffer; 582 struct ring_buffer_per_cpu *cpu_buffer;
534 unsigned nr_pages, rm_pages, new_pages; 583 unsigned nr_pages, rm_pages, new_pages;
535 struct buffer_page *page, *tmp; 584 struct buffer_page *bpage, *tmp;
536 unsigned long buffer_size; 585 unsigned long buffer_size;
537 unsigned long addr; 586 unsigned long addr;
538 LIST_HEAD(pages); 587 LIST_HEAD(pages);
@@ -562,7 +611,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 611 if (size < buffer_size) {
563 612
564 /* easy case, just free pages */ 613 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 614 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
615 mutex_unlock(&buffer->mutex);
616 return -1;
617 }
566 618
567 rm_pages = buffer->pages - nr_pages; 619 rm_pages = buffer->pages - nr_pages;
568 620
@@ -581,21 +633,26 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 633 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 634 * them all and return -ENOMEM;
583 */ 635 */
584 BUG_ON(nr_pages <= buffer->pages); 636 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
637 mutex_unlock(&buffer->mutex);
638 return -1;
639 }
640
585 new_pages = nr_pages - buffer->pages; 641 new_pages = nr_pages - buffer->pages;
586 642
587 for_each_buffer_cpu(buffer, cpu) { 643 for_each_buffer_cpu(buffer, cpu) {
588 for (i = 0; i < new_pages; i++) { 644 for (i = 0; i < new_pages; i++) {
589 page = kzalloc_node(ALIGN(sizeof(*page), 645 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
590 cache_line_size()), 646 cache_line_size()),
591 GFP_KERNEL, cpu_to_node(cpu)); 647 GFP_KERNEL, cpu_to_node(cpu));
592 if (!page) 648 if (!bpage)
593 goto free_pages; 649 goto free_pages;
594 list_add(&page->list, &pages); 650 list_add(&bpage->list, &pages);
595 addr = __get_free_page(GFP_KERNEL); 651 addr = __get_free_page(GFP_KERNEL);
596 if (!addr) 652 if (!addr)
597 goto free_pages; 653 goto free_pages;
598 page->page = (void *)addr; 654 bpage->page = (void *)addr;
655 rb_init_page(bpage->page);
599 } 656 }
600 } 657 }
601 658
@@ -604,7 +661,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 661 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 662 }
606 663
607 BUG_ON(!list_empty(&pages)); 664 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
665 mutex_unlock(&buffer->mutex);
666 return -1;
667 }
608 668
609 out: 669 out:
610 buffer->pages = nr_pages; 670 buffer->pages = nr_pages;
@@ -613,9 +673,9 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
613 return size; 673 return size;
614 674
615 free_pages: 675 free_pages:
616 list_for_each_entry_safe(page, tmp, &pages, list) { 676 list_for_each_entry_safe(bpage, tmp, &pages, list) {
617 list_del_init(&page->list); 677 list_del_init(&bpage->list);
618 free_buffer_page(page); 678 free_buffer_page(bpage);
619 } 679 }
620 mutex_unlock(&buffer->mutex); 680 mutex_unlock(&buffer->mutex);
621 return -ENOMEM; 681 return -ENOMEM;
@@ -626,9 +686,15 @@ static inline int rb_null_event(struct ring_buffer_event *event)
626 return event->type == RINGBUF_TYPE_PADDING; 686 return event->type == RINGBUF_TYPE_PADDING;
627} 687}
628 688
629static inline void *__rb_page_index(struct buffer_page *page, unsigned index) 689static inline void *
690__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
691{
692 return bpage->data + index;
693}
694
695static inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
630{ 696{
631 return page->page + index; 697 return bpage->page->data + index;
632} 698}
633 699
634static inline struct ring_buffer_event * 700static inline struct ring_buffer_event *
@@ -658,7 +724,7 @@ static inline unsigned rb_page_write(struct buffer_page *bpage)
658 724
659static inline unsigned rb_page_commit(struct buffer_page *bpage) 725static inline unsigned rb_page_commit(struct buffer_page *bpage)
660{ 726{
661 return local_read(&bpage->commit); 727 return local_read(&bpage->page->commit);
662} 728}
663 729
664/* Size is determined by what has been commited */ 730/* Size is determined by what has been commited */
@@ -693,7 +759,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 759 head += rb_event_length(event)) {
694 760
695 event = __rb_page_index(cpu_buffer->head_page, head); 761 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 762 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
763 return;
697 /* Only count data entries */ 764 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 765 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 766 continue;
@@ -703,14 +770,14 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
703} 770}
704 771
705static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer, 772static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
706 struct buffer_page **page) 773 struct buffer_page **bpage)
707{ 774{
708 struct list_head *p = (*page)->list.next; 775 struct list_head *p = (*bpage)->list.next;
709 776
710 if (p == &cpu_buffer->pages) 777 if (p == &cpu_buffer->pages)
711 p = p->next; 778 p = p->next;
712 779
713 *page = list_entry(p, struct buffer_page, list); 780 *bpage = list_entry(p, struct buffer_page, list);
714} 781}
715 782
716static inline unsigned 783static inline unsigned
@@ -746,16 +813,18 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 813 addr &= PAGE_MASK;
747 814
748 while (cpu_buffer->commit_page->page != (void *)addr) { 815 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 816 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 817 cpu_buffer->commit_page == cpu_buffer->tail_page))
751 cpu_buffer->commit_page->commit = 818 return;
819 cpu_buffer->commit_page->page->commit =
752 cpu_buffer->commit_page->write; 820 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 821 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
754 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 822 cpu_buffer->write_stamp =
823 cpu_buffer->commit_page->page->time_stamp;
755 } 824 }
756 825
757 /* Now set the commit to the event's index */ 826 /* Now set the commit to the event's index */
758 local_set(&cpu_buffer->commit_page->commit, index); 827 local_set(&cpu_buffer->commit_page->page->commit, index);
759} 828}
760 829
761static inline void 830static inline void
@@ -770,16 +839,17 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
770 * assign the commit to the tail. 839 * assign the commit to the tail.
771 */ 840 */
772 while (cpu_buffer->commit_page != cpu_buffer->tail_page) { 841 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
773 cpu_buffer->commit_page->commit = 842 cpu_buffer->commit_page->page->commit =
774 cpu_buffer->commit_page->write; 843 cpu_buffer->commit_page->write;
775 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 844 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
776 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp; 845 cpu_buffer->write_stamp =
846 cpu_buffer->commit_page->page->time_stamp;
777 /* add barrier to keep gcc from optimizing too much */ 847 /* add barrier to keep gcc from optimizing too much */
778 barrier(); 848 barrier();
779 } 849 }
780 while (rb_commit_index(cpu_buffer) != 850 while (rb_commit_index(cpu_buffer) !=
781 rb_page_write(cpu_buffer->commit_page)) { 851 rb_page_write(cpu_buffer->commit_page)) {
782 cpu_buffer->commit_page->commit = 852 cpu_buffer->commit_page->page->commit =
783 cpu_buffer->commit_page->write; 853 cpu_buffer->commit_page->write;
784 barrier(); 854 barrier();
785 } 855 }
@@ -787,7 +857,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
787 857
788static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) 858static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
789{ 859{
790 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp; 860 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
791 cpu_buffer->reader_page->read = 0; 861 cpu_buffer->reader_page->read = 0;
792} 862}
793 863
@@ -806,7 +876,7 @@ static inline void rb_inc_iter(struct ring_buffer_iter *iter)
806 else 876 else
807 rb_inc_page(cpu_buffer, &iter->head_page); 877 rb_inc_page(cpu_buffer, &iter->head_page);
808 878
809 iter->read_stamp = iter->head_page->time_stamp; 879 iter->read_stamp = iter->head_page->page->time_stamp;
810 iter->head = 0; 880 iter->head = 0;
811} 881}
812 882
@@ -894,7 +964,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 964 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 965 struct buffer_page *next_page = tail_page;
896 966
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 967 local_irq_save(flags);
968 __raw_spin_lock(&cpu_buffer->lock);
898 969
899 rb_inc_page(cpu_buffer, &next_page); 970 rb_inc_page(cpu_buffer, &next_page);
900 971
@@ -902,7 +973,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 973 reader_page = cpu_buffer->reader_page;
903 974
904 /* we grabbed the lock before incrementing */ 975 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 976 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
977 goto out_unlock;
906 978
907 /* 979 /*
908 * If for some reason, we had an interrupt storm that made 980 * If for some reason, we had an interrupt storm that made
@@ -940,12 +1012,12 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
940 */ 1012 */
941 if (tail_page == cpu_buffer->tail_page) { 1013 if (tail_page == cpu_buffer->tail_page) {
942 local_set(&next_page->write, 0); 1014 local_set(&next_page->write, 0);
943 local_set(&next_page->commit, 0); 1015 local_set(&next_page->page->commit, 0);
944 cpu_buffer->tail_page = next_page; 1016 cpu_buffer->tail_page = next_page;
945 1017
946 /* reread the time stamp */ 1018 /* reread the time stamp */
947 *ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1019 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
948 cpu_buffer->tail_page->time_stamp = *ts; 1020 cpu_buffer->tail_page->page->time_stamp = *ts;
949 } 1021 }
950 1022
951 /* 1023 /*
@@ -970,7 +1042,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 1042 rb_set_commit_to_write(cpu_buffer);
971 } 1043 }
972 1044
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1045 __raw_spin_unlock(&cpu_buffer->lock);
1046 local_irq_restore(flags);
974 1047
975 /* fail and let the caller try again */ 1048 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 1049 return ERR_PTR(-EAGAIN);
@@ -978,7 +1051,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 1051
979 /* We reserved something on the buffer */ 1052 /* We reserved something on the buffer */
980 1053
981 BUG_ON(write > BUF_PAGE_SIZE); 1054 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
1055 return NULL;
982 1056
983 event = __rb_page_index(tail_page, tail); 1057 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 1058 rb_update_event(event, type, length);
@@ -988,12 +1062,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
988 * this page's time stamp. 1062 * this page's time stamp.
989 */ 1063 */
990 if (!tail && rb_is_commit(cpu_buffer, event)) 1064 if (!tail && rb_is_commit(cpu_buffer, event))
991 cpu_buffer->commit_page->time_stamp = *ts; 1065 cpu_buffer->commit_page->page->time_stamp = *ts;
992 1066
993 return event; 1067 return event;
994 1068
995 out_unlock: 1069 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1070 __raw_spin_unlock(&cpu_buffer->lock);
1071 local_irq_restore(flags);
997 return NULL; 1072 return NULL;
998} 1073}
999 1074
@@ -1038,7 +1113,7 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1038 event->time_delta = *delta & TS_MASK; 1113 event->time_delta = *delta & TS_MASK;
1039 event->array[0] = *delta >> TS_SHIFT; 1114 event->array[0] = *delta >> TS_SHIFT;
1040 } else { 1115 } else {
1041 cpu_buffer->commit_page->time_stamp = *ts; 1116 cpu_buffer->commit_page->page->time_stamp = *ts;
1042 event->time_delta = 0; 1117 event->time_delta = 0;
1043 event->array[0] = 0; 1118 event->array[0] = 0;
1044 } 1119 }
@@ -1076,10 +1151,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1151 * storm or we have something buggy.
1077 * Bail! 1152 * Bail!
1078 */ 1153 */
1079 if (unlikely(++nr_loops > 1000)) { 1154 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1155 return NULL;
1082 }
1083 1156
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1157 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1158
@@ -1175,15 +1248,14 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1175 struct ring_buffer_event *event; 1248 struct ring_buffer_event *event;
1176 int cpu, resched; 1249 int cpu, resched;
1177 1250
1178 if (ring_buffers_off) 1251 if (ring_buffer_flags != RB_BUFFERS_ON)
1179 return NULL; 1252 return NULL;
1180 1253
1181 if (atomic_read(&buffer->record_disabled)) 1254 if (atomic_read(&buffer->record_disabled))
1182 return NULL; 1255 return NULL;
1183 1256
1184 /* If we are tracing schedule, we don't want to recurse */ 1257 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1258 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1259
1188 cpu = raw_smp_processor_id(); 1260 cpu = raw_smp_processor_id();
1189 1261
@@ -1214,10 +1286,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1286 return event;
1215 1287
1216 out: 1288 out:
1217 if (resched) 1289 ftrace_preempt_enable(resched);
1218 preempt_enable_no_resched_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1290 return NULL;
1222} 1291}
1223 1292
@@ -1259,12 +1328,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1328 /*
1260 * Only the last preempt count needs to restore preemption. 1329 * Only the last preempt count needs to restore preemption.
1261 */ 1330 */
1262 if (preempt_count() == 1) { 1331 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1332 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1333 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1334 preempt_enable_no_resched_notrace();
1269 1335
1270 return 0; 1336 return 0;
@@ -1294,14 +1360,13 @@ int ring_buffer_write(struct ring_buffer *buffer,
1294 int ret = -EBUSY; 1360 int ret = -EBUSY;
1295 int cpu, resched; 1361 int cpu, resched;
1296 1362
1297 if (ring_buffers_off) 1363 if (ring_buffer_flags != RB_BUFFERS_ON)
1298 return -EBUSY; 1364 return -EBUSY;
1299 1365
1300 if (atomic_read(&buffer->record_disabled)) 1366 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1367 return -EBUSY;
1302 1368
1303 resched = need_resched(); 1369 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1370
1306 cpu = raw_smp_processor_id(); 1371 cpu = raw_smp_processor_id();
1307 1372
@@ -1327,10 +1392,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1392
1328 ret = 0; 1393 ret = 0;
1329 out: 1394 out:
1330 if (resched) 1395 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1396
1335 return ret; 1397 return ret;
1336} 1398}
@@ -1489,14 +1551,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1551 return overruns;
1490} 1552}
1491 1553
1492/** 1554static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1555{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1556 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1557
@@ -1511,7 +1566,24 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1511 if (iter->head) 1566 if (iter->head)
1512 iter->read_stamp = cpu_buffer->read_stamp; 1567 iter->read_stamp = cpu_buffer->read_stamp;
1513 else 1568 else
1514 iter->read_stamp = iter->head_page->time_stamp; 1569 iter->read_stamp = iter->head_page->page->time_stamp;
1570}
1571
1572/**
1573 * ring_buffer_iter_reset - reset an iterator
1574 * @iter: The iterator to reset
1575 *
1576 * Resets the iterator, so that it will start from the beginning
1577 * again.
1578 */
1579void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1580{
1581 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1582 unsigned long flags;
1583
1584 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1585 rb_iter_reset(iter);
1586 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1515} 1587}
1516 1588
1517/** 1589/**
@@ -1597,7 +1669,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1669 unsigned long flags;
1598 int nr_loops = 0; 1670 int nr_loops = 0;
1599 1671
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1672 local_irq_save(flags);
1673 __raw_spin_lock(&cpu_buffer->lock);
1601 1674
1602 again: 1675 again:
1603 /* 1676 /*
@@ -1606,8 +1679,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1679 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1680 * reason to loop four times (that I know of).
1608 */ 1681 */
1609 if (unlikely(++nr_loops > 3)) { 1682 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1683 reader = NULL;
1612 goto out; 1684 goto out;
1613 } 1685 }
@@ -1619,8 +1691,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1691 goto out;
1620 1692
1621 /* Never should we have an index greater than the size */ 1693 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1694 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1695 cpu_buffer->reader_page->read > rb_page_size(reader)))
1696 goto out;
1624 1697
1625 /* check if we caught up to the tail */ 1698 /* check if we caught up to the tail */
1626 reader = NULL; 1699 reader = NULL;
@@ -1637,7 +1710,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1637 cpu_buffer->reader_page->list.prev = reader->list.prev; 1710 cpu_buffer->reader_page->list.prev = reader->list.prev;
1638 1711
1639 local_set(&cpu_buffer->reader_page->write, 0); 1712 local_set(&cpu_buffer->reader_page->write, 0);
1640 local_set(&cpu_buffer->reader_page->commit, 0); 1713 local_set(&cpu_buffer->reader_page->page->commit, 0);
1641 1714
1642 /* Make the reader page now replace the head */ 1715 /* Make the reader page now replace the head */
1643 reader->list.prev->next = &cpu_buffer->reader_page->list; 1716 reader->list.prev->next = &cpu_buffer->reader_page->list;
@@ -1659,7 +1732,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1732 goto again;
1660 1733
1661 out: 1734 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1735 __raw_spin_unlock(&cpu_buffer->lock);
1736 local_irq_restore(flags);
1663 1737
1664 return reader; 1738 return reader;
1665} 1739}
@@ -1673,7 +1747,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1747 reader = rb_get_reader_page(cpu_buffer);
1674 1748
1675 /* This function should not be called when buffer is empty */ 1749 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1750 if (RB_WARN_ON(cpu_buffer, !reader))
1751 return;
1677 1752
1678 event = rb_reader_event(cpu_buffer); 1753 event = rb_reader_event(cpu_buffer);
1679 1754
@@ -1700,7 +1775,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1775 * Check if we are at the end of the buffer.
1701 */ 1776 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1777 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1778 if (RB_WARN_ON(buffer,
1779 iter->head_page == cpu_buffer->commit_page))
1780 return;
1704 rb_inc_iter(iter); 1781 rb_inc_iter(iter);
1705 return; 1782 return;
1706 } 1783 }
@@ -1713,8 +1790,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1790 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1791 * at the tail of the buffer.
1715 */ 1792 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1793 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1794 (iter->head_page == cpu_buffer->commit_page) &&
1795 (iter->head + length > rb_commit_index(cpu_buffer))))
1796 return;
1718 1797
1719 rb_update_iter_read_stamp(iter, event); 1798 rb_update_iter_read_stamp(iter, event);
1720 1799
@@ -1726,17 +1805,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1805 rb_advance_iter(iter);
1727} 1806}
1728 1807
1729/** 1808static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1809rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1810{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1811 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1812 struct ring_buffer_event *event;
@@ -1757,10 +1827,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1827 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1828 * an anomaly.
1759 */ 1829 */
1760 if (unlikely(++nr_loops > 10)) { 1830 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1831 return NULL;
1763 }
1764 1832
1765 reader = rb_get_reader_page(cpu_buffer); 1833 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1834 if (!reader)
@@ -1798,16 +1866,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1866 return NULL;
1799} 1867}
1800 1868
1801/** 1869static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1870rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1871{
1812 struct ring_buffer *buffer; 1872 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1873 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1889,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1889 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1890 * an anomaly.
1831 */ 1891 */
1832 if (unlikely(++nr_loops > 10)) { 1892 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1893 return NULL;
1835 }
1836 1894
1837 if (rb_per_cpu_empty(cpu_buffer)) 1895 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1896 return NULL;
@@ -1869,6 +1927,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1927}
1870 1928
1871/** 1929/**
1930 * ring_buffer_peek - peek at the next event to be read
1931 * @buffer: The ring buffer to read
1932 * @cpu: The cpu to peak at
1933 * @ts: The timestamp counter of this event.
1934 *
1935 * This will return the event that will be read next, but does
1936 * not consume the data.
1937 */
1938struct ring_buffer_event *
1939ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1940{
1941 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1942 struct ring_buffer_event *event;
1943 unsigned long flags;
1944
1945 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1946 event = rb_buffer_peek(buffer, cpu, ts);
1947 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1948
1949 return event;
1950}
1951
1952/**
1953 * ring_buffer_iter_peek - peek at the next event to be read
1954 * @iter: The ring buffer iterator
1955 * @ts: The timestamp counter of this event.
1956 *
1957 * This will return the event that will be read next, but does
1958 * not increment the iterator.
1959 */
1960struct ring_buffer_event *
1961ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1962{
1963 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1964 struct ring_buffer_event *event;
1965 unsigned long flags;
1966
1967 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1968 event = rb_iter_peek(iter, ts);
1969 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1970
1971 return event;
1972}
1973
1974/**
1872 * ring_buffer_consume - return an event and consume it 1975 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1976 * @buffer: The ring buffer to get the next event from
1874 * 1977 *
@@ -1879,19 +1982,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1982struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1983ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1984{
1882 struct ring_buffer_per_cpu *cpu_buffer; 1985 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 1986 struct ring_buffer_event *event;
1987 unsigned long flags;
1884 1988
1885 if (!cpu_isset(cpu, buffer->cpumask)) 1989 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 1990 return NULL;
1887 1991
1888 event = ring_buffer_peek(buffer, cpu, ts); 1992 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1993
1994 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 1995 if (!event)
1890 return NULL; 1996 goto out;
1891 1997
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 1998 rb_advance_reader(cpu_buffer);
1894 1999
2000 out:
2001 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2002
1895 return event; 2003 return event;
1896} 2004}
1897 2005
@@ -1928,9 +2036,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 2036 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 2037 synchronize_sched();
1930 2038
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 2039 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 2040 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2041 rb_iter_reset(iter);
2042 __raw_spin_unlock(&cpu_buffer->lock);
2043 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 2044
1935 return iter; 2045 return iter;
1936} 2046}
@@ -1962,12 +2072,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2072ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2073{
1964 struct ring_buffer_event *event; 2074 struct ring_buffer_event *event;
2075 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2076 unsigned long flags;
1965 2077
1966 event = ring_buffer_iter_peek(iter, ts); 2078 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2079 event = rb_iter_peek(iter, ts);
1967 if (!event) 2080 if (!event)
1968 return NULL; 2081 goto out;
1969 2082
1970 rb_advance_iter(iter); 2083 rb_advance_iter(iter);
2084 out:
2085 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2086
1972 return event; 2087 return event;
1973} 2088}
@@ -1987,7 +2102,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1987 cpu_buffer->head_page 2102 cpu_buffer->head_page
1988 = list_entry(cpu_buffer->pages.next, struct buffer_page, list); 2103 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1989 local_set(&cpu_buffer->head_page->write, 0); 2104 local_set(&cpu_buffer->head_page->write, 0);
1990 local_set(&cpu_buffer->head_page->commit, 0); 2105 local_set(&cpu_buffer->head_page->page->commit, 0);
1991 2106
1992 cpu_buffer->head_page->read = 0; 2107 cpu_buffer->head_page->read = 0;
1993 2108
@@ -1996,7 +2111,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1996 2111
1997 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 2112 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1998 local_set(&cpu_buffer->reader_page->write, 0); 2113 local_set(&cpu_buffer->reader_page->write, 0);
1999 local_set(&cpu_buffer->reader_page->commit, 0); 2114 local_set(&cpu_buffer->reader_page->page->commit, 0);
2000 cpu_buffer->reader_page->read = 0; 2115 cpu_buffer->reader_page->read = 0;
2001 2116
2002 cpu_buffer->overrun = 0; 2117 cpu_buffer->overrun = 0;
@@ -2016,11 +2131,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2131 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2132 return;
2018 2133
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2134 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2135
2136 __raw_spin_lock(&cpu_buffer->lock);
2020 2137
2021 rb_reset_cpu(cpu_buffer); 2138 rb_reset_cpu(cpu_buffer);
2022 2139
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2140 __raw_spin_unlock(&cpu_buffer->lock);
2141
2142 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2143}
2025 2144
2026/** 2145/**
@@ -2090,8 +2209,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2090 return -EINVAL; 2209 return -EINVAL;
2091 2210
2092 /* At least make sure the two buffers are somewhat the same */ 2211 /* At least make sure the two buffers are somewhat the same */
2093 if (buffer_a->size != buffer_b->size || 2212 if (buffer_a->pages != buffer_b->pages)
2094 buffer_a->pages != buffer_b->pages)
2095 return -EINVAL; 2213 return -EINVAL;
2096 2214
2097 cpu_buffer_a = buffer_a->buffers[cpu]; 2215 cpu_buffer_a = buffer_a->buffers[cpu];
@@ -2118,16 +2236,178 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2118 return 0; 2236 return 0;
2119} 2237}
2120 2238
2239static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
2240 struct buffer_data_page *bpage)
2241{
2242 struct ring_buffer_event *event;
2243 unsigned long head;
2244
2245 __raw_spin_lock(&cpu_buffer->lock);
2246 for (head = 0; head < local_read(&bpage->commit);
2247 head += rb_event_length(event)) {
2248
2249 event = __rb_data_page_index(bpage, head);
2250 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
2251 return;
2252 /* Only count data entries */
2253 if (event->type != RINGBUF_TYPE_DATA)
2254 continue;
2255 cpu_buffer->entries--;
2256 }
2257 __raw_spin_unlock(&cpu_buffer->lock);
2258}
2259
2260/**
2261 * ring_buffer_alloc_read_page - allocate a page to read from buffer
2262 * @buffer: the buffer to allocate for.
2263 *
2264 * This function is used in conjunction with ring_buffer_read_page.
2265 * When reading a full page from the ring buffer, these functions
2266 * can be used to speed up the process. The calling function should
2267 * allocate a few pages first with this function. Then when it
2268 * needs to get pages from the ring buffer, it passes the result
2269 * of this function into ring_buffer_read_page, which will swap
2270 * the page that was allocated, with the read page of the buffer.
2271 *
2272 * Returns:
2273 * The page allocated, or NULL on error.
2274 */
2275void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
2276{
2277 unsigned long addr;
2278 struct buffer_data_page *bpage;
2279
2280 addr = __get_free_page(GFP_KERNEL);
2281 if (!addr)
2282 return NULL;
2283
2284 bpage = (void *)addr;
2285
2286 return bpage;
2287}
2288
2289/**
2290 * ring_buffer_free_read_page - free an allocated read page
2291 * @buffer: the buffer the page was allocate for
2292 * @data: the page to free
2293 *
2294 * Free a page allocated from ring_buffer_alloc_read_page.
2295 */
2296void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
2297{
2298 free_page((unsigned long)data);
2299}
2300
2301/**
2302 * ring_buffer_read_page - extract a page from the ring buffer
2303 * @buffer: buffer to extract from
2304 * @data_page: the page to use allocated from ring_buffer_alloc_read_page
2305 * @cpu: the cpu of the buffer to extract
2306 * @full: should the extraction only happen when the page is full.
2307 *
2308 * This function will pull out a page from the ring buffer and consume it.
2309 * @data_page must be the address of the variable that was returned
2310 * from ring_buffer_alloc_read_page. This is because the page might be used
2311 * to swap with a page in the ring buffer.
2312 *
2313 * for example:
2314 * rpage = ring_buffer_alloc_page(buffer);
2315 * if (!rpage)
2316 * return error;
2317 * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
2318 * if (ret)
2319 * process_page(rpage);
2320 *
2321 * When @full is set, the function will not return true unless
2322 * the writer is off the reader page.
2323 *
2324 * Note: it is up to the calling functions to handle sleeps and wakeups.
2325 * The ring buffer can be used anywhere in the kernel and can not
2326 * blindly call wake_up. The layer that uses the ring buffer must be
2327 * responsible for that.
2328 *
2329 * Returns:
2330 * 1 if data has been transferred
2331 * 0 if no data has been transferred.
2332 */
2333int ring_buffer_read_page(struct ring_buffer *buffer,
2334 void **data_page, int cpu, int full)
2335{
2336 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2337 struct ring_buffer_event *event;
2338 struct buffer_data_page *bpage;
2339 unsigned long flags;
2340 int ret = 0;
2341
2342 if (!data_page)
2343 return 0;
2344
2345 bpage = *data_page;
2346 if (!bpage)
2347 return 0;
2348
2349 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2350
2351 /*
2352 * rb_buffer_peek will get the next ring buffer if
2353 * the current reader page is empty.
2354 */
2355 event = rb_buffer_peek(buffer, cpu, NULL);
2356 if (!event)
2357 goto out;
2358
2359 /* check for data */
2360 if (!local_read(&cpu_buffer->reader_page->page->commit))
2361 goto out;
2362 /*
2363 * If the writer is already off of the read page, then simply
2364 * switch the read page with the given page. Otherwise
2365 * we need to copy the data from the reader to the writer.
2366 */
2367 if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
2368 unsigned int read = cpu_buffer->reader_page->read;
2369
2370 if (full)
2371 goto out;
2372 /* The writer is still on the reader page, we must copy */
2373 bpage = cpu_buffer->reader_page->page;
2374 memcpy(bpage->data,
2375 cpu_buffer->reader_page->page->data + read,
2376 local_read(&bpage->commit) - read);
2377
2378 /* consume what was read */
2379 cpu_buffer->reader_page += read;
2380
2381 } else {
2382 /* swap the pages */
2383 rb_init_page(bpage);
2384 bpage = cpu_buffer->reader_page->page;
2385 cpu_buffer->reader_page->page = *data_page;
2386 cpu_buffer->reader_page->read = 0;
2387 *data_page = bpage;
2388 }
2389 ret = 1;
2390
2391 /* update the entry counter */
2392 rb_remove_entries(cpu_buffer, bpage);
2393 out:
2394 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2395
2396 return ret;
2397}
2398
2121static ssize_t 2399static ssize_t
2122rb_simple_read(struct file *filp, char __user *ubuf, 2400rb_simple_read(struct file *filp, char __user *ubuf,
2123 size_t cnt, loff_t *ppos) 2401 size_t cnt, loff_t *ppos)
2124{ 2402{
2125 int *p = filp->private_data; 2403 long *p = filp->private_data;
2126 char buf[64]; 2404 char buf[64];
2127 int r; 2405 int r;
2128 2406
2129 /* !ring_buffers_off == tracing_on */ 2407 if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
2130 r = sprintf(buf, "%d\n", !*p); 2408 r = sprintf(buf, "permanently disabled\n");
2409 else
2410 r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
2131 2411
2132 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2412 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2133} 2413}
@@ -2136,7 +2416,7 @@ static ssize_t
2136rb_simple_write(struct file *filp, const char __user *ubuf, 2416rb_simple_write(struct file *filp, const char __user *ubuf,
2137 size_t cnt, loff_t *ppos) 2417 size_t cnt, loff_t *ppos)
2138{ 2418{
2139 int *p = filp->private_data; 2419 long *p = filp->private_data;
2140 char buf[64]; 2420 char buf[64];
2141 long val; 2421 long val;
2142 int ret; 2422 int ret;
@@ -2153,8 +2433,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
2153 if (ret < 0) 2433 if (ret < 0)
2154 return ret; 2434 return ret;
2155 2435
2156 /* !ring_buffers_off == tracing_on */ 2436 if (val)
2157 *p = !val; 2437 set_bit(RB_BUFFERS_ON_BIT, p);
2438 else
2439 clear_bit(RB_BUFFERS_ON_BIT, p);
2158 2440
2159 (*ppos)++; 2441 (*ppos)++;
2160 2442
@@ -2176,7 +2458,7 @@ static __init int rb_init_debugfs(void)
2176 d_tracer = tracing_init_dentry(); 2458 d_tracer = tracing_init_dentry();
2177 2459
2178 entry = debugfs_create_file("tracing_on", 0644, d_tracer, 2460 entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2179 &ring_buffers_off, &rb_simple_fops); 2461 &ring_buffer_flags, &rb_simple_fops);
2180 if (!entry) 2462 if (!entry)
2181 pr_warning("Could not create debugfs 'tracing_on' entry\n"); 2463 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2182 2464
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 803100518f11..0eb6d48347f7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/gfp.h> 30#include <linux/gfp.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/kprobes.h> 32#include <linux/kprobes.h>
33#include <linux/seq_file.h>
33#include <linux/writeback.h> 34#include <linux/writeback.h>
34 35
35#include <linux/stacktrace.h> 36#include <linux/stacktrace.h>
@@ -43,6 +44,38 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 44unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 45unsigned long __read_mostly tracing_thresh;
45 46
47/*
48 * We need to change this state when a selftest is running.
49 * A selftest will lurk into the ring-buffer to count the
50 * entries inserted during the selftest although some concurrent
51 * insertions into the ring-buffer such as ftrace_printk could occurred
52 * at the same time, giving false positive or negative results.
53 */
54static bool __read_mostly tracing_selftest_running;
55
56/* For tracers that don't implement custom flags */
57static struct tracer_opt dummy_tracer_opt[] = {
58 { }
59};
60
61static struct tracer_flags dummy_tracer_flags = {
62 .val = 0,
63 .opts = dummy_tracer_opt
64};
65
66static int dummy_set_flag(u32 old_flags, u32 bit, int set)
67{
68 return 0;
69}
70
71/*
72 * Kill all tracing for good (never come back).
73 * It is initialized to 1 but will turn to zero if the initialization
74 * of the tracer is successful. But that is the only place that sets
75 * this back to zero.
76 */
77int tracing_disabled = 1;
78
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 79static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 80
48static inline void ftrace_disable_cpu(void) 81static inline void ftrace_disable_cpu(void)
@@ -62,7 +95,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 95#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 96 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 97
65static int tracing_disabled = 1; 98/*
99 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
100 *
101 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
102 * is set, then ftrace_dump is called. This will output the contents
103 * of the ftrace buffers to the console. This is very useful for
104 * capturing traces that lead to crashes and outputing it to a
105 * serial console.
106 *
107 * It is default off, but you can enable it with either specifying
108 * "ftrace_dump_on_oops" in the kernel command line, or setting
109 * /proc/sys/kernel/ftrace_dump_on_oops to true.
110 */
111int ftrace_dump_on_oops;
112
113static int tracing_set_tracer(char *buf);
114
115static int __init set_ftrace(char *str)
116{
117 tracing_set_tracer(str);
118 return 1;
119}
120__setup("ftrace", set_ftrace);
121
122static int __init set_ftrace_dump_on_oops(char *str)
123{
124 ftrace_dump_on_oops = 1;
125 return 1;
126}
127__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 128
67long 129long
68ns2usecs(cycle_t nsec) 130ns2usecs(cycle_t nsec)
@@ -112,6 +174,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 174/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 175static int tracer_enabled = 1;
114 176
177/**
178 * tracing_is_enabled - return tracer_enabled status
179 *
180 * This function is used by other tracers to know the status
181 * of the tracer_enabled flag. Tracers may use this function
182 * to know if it should enable their features when starting
183 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
184 */
185int tracing_is_enabled(void)
186{
187 return tracer_enabled;
188}
189
115/* function tracing enabled */ 190/* function tracing enabled */
116int ftrace_function_enabled; 191int ftrace_function_enabled;
117 192
@@ -153,8 +228,9 @@ static DEFINE_MUTEX(trace_types_lock);
153/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 228/* trace_wait is a waitqueue for tasks blocked on trace_poll */
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 229static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 230
156/* trace_flags holds iter_ctrl options */ 231/* trace_flags holds trace_options default values */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 232unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
233 TRACE_ITER_ANNOTATE;
158 234
159/** 235/**
160 * trace_wake_up - wake up tasks waiting for trace input 236 * trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +269,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
193 return nsecs / 1000; 269 return nsecs / 1000;
194} 270}
195 271
196/*
197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
198 * control the output of kernel symbols.
199 */
200#define TRACE_ITER_SYM_MASK \
201 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
202
203/* These must match the bit postions in trace_iterator_flags */ 272/* These must match the bit postions in trace_iterator_flags */
204static const char *trace_options[] = { 273static const char *trace_options[] = {
205 "print-parent", 274 "print-parent",
@@ -213,6 +282,12 @@ static const char *trace_options[] = {
213 "stacktrace", 282 "stacktrace",
214 "sched-tree", 283 "sched-tree",
215 "ftrace_printk", 284 "ftrace_printk",
285 "ftrace_preempt",
286 "branch",
287 "annotate",
288 "userstacktrace",
289 "sym-userobj",
290 "printk-msg-only",
216 NULL 291 NULL
217}; 292};
218 293
@@ -359,6 +434,28 @@ trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
359 return trace_seq_putmem(s, hex, j); 434 return trace_seq_putmem(s, hex, j);
360} 435}
361 436
437static int
438trace_seq_path(struct trace_seq *s, struct path *path)
439{
440 unsigned char *p;
441
442 if (s->len >= (PAGE_SIZE - 1))
443 return 0;
444 p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
445 if (!IS_ERR(p)) {
446 p = mangle_path(s->buffer + s->len, p, "\n");
447 if (p) {
448 s->len = p - s->buffer;
449 return 1;
450 }
451 } else {
452 s->buffer[s->len++] = '?';
453 return 1;
454 }
455
456 return 0;
457}
458
362static void 459static void
363trace_seq_reset(struct trace_seq *s) 460trace_seq_reset(struct trace_seq *s)
364{ 461{
@@ -470,7 +567,17 @@ int register_tracer(struct tracer *type)
470 return -1; 567 return -1;
471 } 568 }
472 569
570 /*
571 * When this gets called we hold the BKL which means that
572 * preemption is disabled. Various trace selftests however
573 * need to disable and enable preemption for successful tests.
574 * So we drop the BKL here and grab it after the tests again.
575 */
576 unlock_kernel();
473 mutex_lock(&trace_types_lock); 577 mutex_lock(&trace_types_lock);
578
579 tracing_selftest_running = true;
580
474 for (t = trace_types; t; t = t->next) { 581 for (t = trace_types; t; t = t->next) {
475 if (strcmp(type->name, t->name) == 0) { 582 if (strcmp(type->name, t->name) == 0) {
476 /* already found */ 583 /* already found */
@@ -481,12 +588,20 @@ int register_tracer(struct tracer *type)
481 } 588 }
482 } 589 }
483 590
591 if (!type->set_flag)
592 type->set_flag = &dummy_set_flag;
593 if (!type->flags)
594 type->flags = &dummy_tracer_flags;
595 else
596 if (!type->flags->opts)
597 type->flags->opts = dummy_tracer_opt;
598
484#ifdef CONFIG_FTRACE_STARTUP_TEST 599#ifdef CONFIG_FTRACE_STARTUP_TEST
485 if (type->selftest) { 600 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 601 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 602 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 603 int i;
604
490 /* 605 /*
491 * Run a selftest on this tracer. 606 * Run a selftest on this tracer.
492 * Here we reset the trace buffer, and set the current 607 * Here we reset the trace buffer, and set the current
@@ -494,25 +609,23 @@ int register_tracer(struct tracer *type)
494 * internal tracing to verify that everything is in order. 609 * internal tracing to verify that everything is in order.
495 * If we fail, we do not register this tracer. 610 * If we fail, we do not register this tracer.
496 */ 611 */
497 for_each_tracing_cpu(i) { 612 for_each_tracing_cpu(i)
498 tracing_reset(tr, i); 613 tracing_reset(tr, i);
499 } 614
500 current_trace = type; 615 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 616 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 617 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 618 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 619 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 620 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 621 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 622 printk(KERN_CONT "FAILED!\n");
510 goto out; 623 goto out;
511 } 624 }
512 /* Only reset on passing, to avoid touching corrupted buffers */ 625 /* Only reset on passing, to avoid touching corrupted buffers */
513 for_each_tracing_cpu(i) { 626 for_each_tracing_cpu(i)
514 tracing_reset(tr, i); 627 tracing_reset(tr, i);
515 } 628
516 printk(KERN_CONT "PASSED\n"); 629 printk(KERN_CONT "PASSED\n");
517 } 630 }
518#endif 631#endif
@@ -524,7 +637,9 @@ int register_tracer(struct tracer *type)
524 max_tracer_type_len = len; 637 max_tracer_type_len = len;
525 638
526 out: 639 out:
640 tracing_selftest_running = false;
527 mutex_unlock(&trace_types_lock); 641 mutex_unlock(&trace_types_lock);
642 lock_kernel();
528 643
529 return ret; 644 return ret;
530} 645}
@@ -581,6 +696,91 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 696 cmdline_idx = 0;
582} 697}
583 698
699static int trace_stop_count;
700static DEFINE_SPINLOCK(tracing_start_lock);
701
702/**
703 * ftrace_off_permanent - disable all ftrace code permanently
704 *
705 * This should only be called when a serious anomally has
706 * been detected. This will turn off the function tracing,
707 * ring buffers, and other tracing utilites. It takes no
708 * locks and can be called from any context.
709 */
710void ftrace_off_permanent(void)
711{
712 tracing_disabled = 1;
713 ftrace_stop();
714 tracing_off_permanent();
715}
716
717/**
718 * tracing_start - quick start of the tracer
719 *
720 * If tracing is enabled but was stopped by tracing_stop,
721 * this will start the tracer back up.
722 */
723void tracing_start(void)
724{
725 struct ring_buffer *buffer;
726 unsigned long flags;
727
728 if (tracing_disabled)
729 return;
730
731 spin_lock_irqsave(&tracing_start_lock, flags);
732 if (--trace_stop_count)
733 goto out;
734
735 if (trace_stop_count < 0) {
736 /* Someone screwed up their debugging */
737 WARN_ON_ONCE(1);
738 trace_stop_count = 0;
739 goto out;
740 }
741
742
743 buffer = global_trace.buffer;
744 if (buffer)
745 ring_buffer_record_enable(buffer);
746
747 buffer = max_tr.buffer;
748 if (buffer)
749 ring_buffer_record_enable(buffer);
750
751 ftrace_start();
752 out:
753 spin_unlock_irqrestore(&tracing_start_lock, flags);
754}
755
756/**
757 * tracing_stop - quick stop of the tracer
758 *
759 * Light weight way to stop tracing. Use in conjunction with
760 * tracing_start.
761 */
762void tracing_stop(void)
763{
764 struct ring_buffer *buffer;
765 unsigned long flags;
766
767 ftrace_stop();
768 spin_lock_irqsave(&tracing_start_lock, flags);
769 if (trace_stop_count++)
770 goto out;
771
772 buffer = global_trace.buffer;
773 if (buffer)
774 ring_buffer_record_disable(buffer);
775
776 buffer = max_tr.buffer;
777 if (buffer)
778 ring_buffer_record_disable(buffer);
779
780 out:
781 spin_unlock_irqrestore(&tracing_start_lock, flags);
782}
783
584void trace_stop_cmdline_recording(void); 784void trace_stop_cmdline_recording(void);
585 785
586static void trace_save_cmdline(struct task_struct *tsk) 786static void trace_save_cmdline(struct task_struct *tsk)
@@ -618,7 +818,7 @@ static void trace_save_cmdline(struct task_struct *tsk)
618 spin_unlock(&trace_cmdline_lock); 818 spin_unlock(&trace_cmdline_lock);
619} 819}
620 820
621static char *trace_find_cmdline(int pid) 821char *trace_find_cmdline(int pid)
622{ 822{
623 char *cmdline = "<...>"; 823 char *cmdline = "<...>";
624 unsigned map; 824 unsigned map;
@@ -655,6 +855,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
655 855
656 entry->preempt_count = pc & 0xff; 856 entry->preempt_count = pc & 0xff;
657 entry->pid = (tsk) ? tsk->pid : 0; 857 entry->pid = (tsk) ? tsk->pid : 0;
858 entry->tgid = (tsk) ? tsk->tgid : 0;
658 entry->flags = 859 entry->flags =
659#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT 860#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
660 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 861 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -691,6 +892,56 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 892 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
692} 893}
693 894
895#ifdef CONFIG_FUNCTION_GRAPH_TRACER
896static void __trace_graph_entry(struct trace_array *tr,
897 struct trace_array_cpu *data,
898 struct ftrace_graph_ent *trace,
899 unsigned long flags,
900 int pc)
901{
902 struct ring_buffer_event *event;
903 struct ftrace_graph_ent_entry *entry;
904 unsigned long irq_flags;
905
906 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
907 return;
908
909 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
910 &irq_flags);
911 if (!event)
912 return;
913 entry = ring_buffer_event_data(event);
914 tracing_generic_entry_update(&entry->ent, flags, pc);
915 entry->ent.type = TRACE_GRAPH_ENT;
916 entry->graph_ent = *trace;
917 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
918}
919
920static void __trace_graph_return(struct trace_array *tr,
921 struct trace_array_cpu *data,
922 struct ftrace_graph_ret *trace,
923 unsigned long flags,
924 int pc)
925{
926 struct ring_buffer_event *event;
927 struct ftrace_graph_ret_entry *entry;
928 unsigned long irq_flags;
929
930 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
931 return;
932
933 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
934 &irq_flags);
935 if (!event)
936 return;
937 entry = ring_buffer_event_data(event);
938 tracing_generic_entry_update(&entry->ent, flags, pc);
939 entry->ent.type = TRACE_GRAPH_RET;
940 entry->ret = *trace;
941 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
942}
943#endif
944
694void 945void
695ftrace(struct trace_array *tr, struct trace_array_cpu *data, 946ftrace(struct trace_array *tr, struct trace_array_cpu *data,
696 unsigned long ip, unsigned long parent_ip, unsigned long flags, 947 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -742,6 +993,46 @@ void __trace_stack(struct trace_array *tr,
742 ftrace_trace_stack(tr, data, flags, skip, preempt_count()); 993 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
743} 994}
744 995
996static void ftrace_trace_userstack(struct trace_array *tr,
997 struct trace_array_cpu *data,
998 unsigned long flags, int pc)
999{
1000#ifdef CONFIG_STACKTRACE
1001 struct ring_buffer_event *event;
1002 struct userstack_entry *entry;
1003 struct stack_trace trace;
1004 unsigned long irq_flags;
1005
1006 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1007 return;
1008
1009 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
1010 &irq_flags);
1011 if (!event)
1012 return;
1013 entry = ring_buffer_event_data(event);
1014 tracing_generic_entry_update(&entry->ent, flags, pc);
1015 entry->ent.type = TRACE_USER_STACK;
1016
1017 memset(&entry->caller, 0, sizeof(entry->caller));
1018
1019 trace.nr_entries = 0;
1020 trace.max_entries = FTRACE_STACK_ENTRIES;
1021 trace.skip = 0;
1022 trace.entries = entry->caller;
1023
1024 save_stack_trace_user(&trace);
1025 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
1026#endif
1027}
1028
1029void __trace_userstack(struct trace_array *tr,
1030 struct trace_array_cpu *data,
1031 unsigned long flags)
1032{
1033 ftrace_trace_userstack(tr, data, flags, preempt_count());
1034}
1035
745static void 1036static void
746ftrace_trace_special(void *__tr, void *__data, 1037ftrace_trace_special(void *__tr, void *__data,
747 unsigned long arg1, unsigned long arg2, unsigned long arg3, 1038 unsigned long arg1, unsigned long arg2, unsigned long arg3,
@@ -765,6 +1056,7 @@ ftrace_trace_special(void *__tr, void *__data,
765 entry->arg3 = arg3; 1056 entry->arg3 = arg3;
766 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1057 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
767 ftrace_trace_stack(tr, data, irq_flags, 4, pc); 1058 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
1059 ftrace_trace_userstack(tr, data, irq_flags, pc);
768 1060
769 trace_wake_up(); 1061 trace_wake_up();
770} 1062}
@@ -803,6 +1095,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
803 entry->next_cpu = task_cpu(next); 1095 entry->next_cpu = task_cpu(next);
804 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1096 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
805 ftrace_trace_stack(tr, data, flags, 5, pc); 1097 ftrace_trace_stack(tr, data, flags, 5, pc);
1098 ftrace_trace_userstack(tr, data, flags, pc);
806} 1099}
807 1100
808void 1101void
@@ -832,6 +1125,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
832 entry->next_cpu = task_cpu(wakee); 1125 entry->next_cpu = task_cpu(wakee);
833 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 1126 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
834 ftrace_trace_stack(tr, data, flags, 6, pc); 1127 ftrace_trace_stack(tr, data, flags, 6, pc);
1128 ftrace_trace_userstack(tr, data, flags, pc);
835 1129
836 trace_wake_up(); 1130 trace_wake_up();
837} 1131}
@@ -841,26 +1135,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 1135{
842 struct trace_array *tr = &global_trace; 1136 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 1137 struct trace_array_cpu *data;
1138 unsigned long flags;
844 int cpu; 1139 int cpu;
845 int pc; 1140 int pc;
846 1141
847 if (tracing_disabled || !tr->ctrl) 1142 if (tracing_disabled)
848 return; 1143 return;
849 1144
850 pc = preempt_count(); 1145 pc = preempt_count();
851 preempt_disable_notrace(); 1146 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 1147 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 1148 data = tr->data[cpu];
854 1149
855 if (likely(!atomic_read(&data->disabled))) 1150 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1151 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 1152
858 preempt_enable_notrace(); 1153 atomic_dec(&data->disabled);
1154 local_irq_restore(flags);
859} 1155}
860 1156
861#ifdef CONFIG_FUNCTION_TRACER 1157#ifdef CONFIG_FUNCTION_TRACER
862static void 1158static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 1159function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 1160{
865 struct trace_array *tr = &global_trace; 1161 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 1162 struct trace_array_cpu *data;
@@ -873,8 +1169,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 1169 return;
874 1170
875 pc = preempt_count(); 1171 pc = preempt_count();
876 resched = need_resched(); 1172 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 1173 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 1174 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1175 data = tr->data[cpu];
@@ -884,11 +1179,96 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1179 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1180
886 atomic_dec(&data->disabled); 1181 atomic_dec(&data->disabled);
887 if (resched) 1182 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace(); 1183}
889 else 1184
890 preempt_enable_notrace(); 1185static void
1186function_trace_call(unsigned long ip, unsigned long parent_ip)
1187{
1188 struct trace_array *tr = &global_trace;
1189 struct trace_array_cpu *data;
1190 unsigned long flags;
1191 long disabled;
1192 int cpu;
1193 int pc;
1194
1195 if (unlikely(!ftrace_function_enabled))
1196 return;
1197
1198 /*
1199 * Need to use raw, since this must be called before the
1200 * recursive protection is performed.
1201 */
1202 local_irq_save(flags);
1203 cpu = raw_smp_processor_id();
1204 data = tr->data[cpu];
1205 disabled = atomic_inc_return(&data->disabled);
1206
1207 if (likely(disabled == 1)) {
1208 pc = preempt_count();
1209 trace_function(tr, data, ip, parent_ip, flags, pc);
1210 }
1211
1212 atomic_dec(&data->disabled);
1213 local_irq_restore(flags);
1214}
1215
1216#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1217int trace_graph_entry(struct ftrace_graph_ent *trace)
1218{
1219 struct trace_array *tr = &global_trace;
1220 struct trace_array_cpu *data;
1221 unsigned long flags;
1222 long disabled;
1223 int cpu;
1224 int pc;
1225
1226 if (!ftrace_trace_task(current))
1227 return 0;
1228
1229 if (!ftrace_graph_addr(trace->func))
1230 return 0;
1231
1232 local_irq_save(flags);
1233 cpu = raw_smp_processor_id();
1234 data = tr->data[cpu];
1235 disabled = atomic_inc_return(&data->disabled);
1236 if (likely(disabled == 1)) {
1237 pc = preempt_count();
1238 __trace_graph_entry(tr, data, trace, flags, pc);
1239 }
1240 /* Only do the atomic if it is not already set */
1241 if (!test_tsk_trace_graph(current))
1242 set_tsk_trace_graph(current);
1243 atomic_dec(&data->disabled);
1244 local_irq_restore(flags);
1245
1246 return 1;
1247}
1248
1249void trace_graph_return(struct ftrace_graph_ret *trace)
1250{
1251 struct trace_array *tr = &global_trace;
1252 struct trace_array_cpu *data;
1253 unsigned long flags;
1254 long disabled;
1255 int cpu;
1256 int pc;
1257
1258 local_irq_save(flags);
1259 cpu = raw_smp_processor_id();
1260 data = tr->data[cpu];
1261 disabled = atomic_inc_return(&data->disabled);
1262 if (likely(disabled == 1)) {
1263 pc = preempt_count();
1264 __trace_graph_return(tr, data, trace, flags, pc);
1265 }
1266 if (!trace->depth)
1267 clear_tsk_trace_graph(current);
1268 atomic_dec(&data->disabled);
1269 local_irq_restore(flags);
891} 1270}
1271#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
892 1272
893static struct ftrace_ops trace_ops __read_mostly = 1273static struct ftrace_ops trace_ops __read_mostly =
894{ 1274{
@@ -898,9 +1278,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1278void tracing_start_function_trace(void)
899{ 1279{
900 ftrace_function_enabled = 0; 1280 ftrace_function_enabled = 0;
1281
1282 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1283 trace_ops.func = function_trace_call_preempt_only;
1284 else
1285 trace_ops.func = function_trace_call;
1286
901 register_ftrace_function(&trace_ops); 1287 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1288 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1289}
905 1290
906void tracing_stop_function_trace(void) 1291void tracing_stop_function_trace(void)
@@ -912,6 +1297,7 @@ void tracing_stop_function_trace(void)
912 1297
913enum trace_file_type { 1298enum trace_file_type {
914 TRACE_FILE_LAT_FMT = 1, 1299 TRACE_FILE_LAT_FMT = 1,
1300 TRACE_FILE_ANNOTATE = 2,
915}; 1301};
916 1302
917static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 1303static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@ -1047,10 +1433,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1433
1048 atomic_inc(&trace_record_cmdline_disabled); 1434 atomic_inc(&trace_record_cmdline_disabled);
1049 1435
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1436 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1437 iter->ent = NULL;
1056 iter->cpu = 0; 1438 iter->cpu = 0;
@@ -1077,14 +1459,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1459
1078static void s_stop(struct seq_file *m, void *p) 1460static void s_stop(struct seq_file *m, void *p)
1079{ 1461{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1462 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1463 mutex_unlock(&trace_types_lock);
1089} 1464}
1090 1465
@@ -1143,7 +1518,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1143# define IP_FMT "%016lx" 1518# define IP_FMT "%016lx"
1144#endif 1519#endif
1145 1520
1146static int 1521int
1147seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1522seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1148{ 1523{
1149 int ret; 1524 int ret;
@@ -1164,6 +1539,78 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1164 return ret; 1539 return ret;
1165} 1540}
1166 1541
1542static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
1543 unsigned long ip, unsigned long sym_flags)
1544{
1545 struct file *file = NULL;
1546 unsigned long vmstart = 0;
1547 int ret = 1;
1548
1549 if (mm) {
1550 const struct vm_area_struct *vma;
1551
1552 down_read(&mm->mmap_sem);
1553 vma = find_vma(mm, ip);
1554 if (vma) {
1555 file = vma->vm_file;
1556 vmstart = vma->vm_start;
1557 }
1558 if (file) {
1559 ret = trace_seq_path(s, &file->f_path);
1560 if (ret)
1561 ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
1562 }
1563 up_read(&mm->mmap_sem);
1564 }
1565 if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
1566 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1567 return ret;
1568}
1569
1570static int
1571seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
1572 unsigned long sym_flags)
1573{
1574 struct mm_struct *mm = NULL;
1575 int ret = 1;
1576 unsigned int i;
1577
1578 if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
1579 struct task_struct *task;
1580 /*
1581 * we do the lookup on the thread group leader,
1582 * since individual threads might have already quit!
1583 */
1584 rcu_read_lock();
1585 task = find_task_by_vpid(entry->ent.tgid);
1586 if (task)
1587 mm = get_task_mm(task);
1588 rcu_read_unlock();
1589 }
1590
1591 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1592 unsigned long ip = entry->caller[i];
1593
1594 if (ip == ULONG_MAX || !ret)
1595 break;
1596 if (i && ret)
1597 ret = trace_seq_puts(s, " <- ");
1598 if (!ip) {
1599 if (ret)
1600 ret = trace_seq_puts(s, "??");
1601 continue;
1602 }
1603 if (!ret)
1604 break;
1605 if (ret)
1606 ret = seq_print_user_ip(s, mm, ip, sym_flags);
1607 }
1608
1609 if (mm)
1610 mmput(mm);
1611 return ret;
1612}
1613
1167static void print_lat_help_header(struct seq_file *m) 1614static void print_lat_help_header(struct seq_file *m)
1168{ 1615{
1169 seq_puts(m, "# _------=> CPU# \n"); 1616 seq_puts(m, "# _------=> CPU# \n");
@@ -1345,6 +1792,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1345 trace_seq_putc(s, '\n'); 1792 trace_seq_putc(s, '\n');
1346} 1793}
1347 1794
1795static void test_cpu_buff_start(struct trace_iterator *iter)
1796{
1797 struct trace_seq *s = &iter->seq;
1798
1799 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1800 return;
1801
1802 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1803 return;
1804
1805 if (cpu_isset(iter->cpu, iter->started))
1806 return;
1807
1808 cpu_set(iter->cpu, iter->started);
1809 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1810}
1811
1348static enum print_line_t 1812static enum print_line_t
1349print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1813print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1350{ 1814{
@@ -1359,11 +1823,12 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1359 char *comm; 1823 char *comm;
1360 int S, T; 1824 int S, T;
1361 int i; 1825 int i;
1362 unsigned state;
1363 1826
1364 if (entry->type == TRACE_CONT) 1827 if (entry->type == TRACE_CONT)
1365 return TRACE_TYPE_HANDLED; 1828 return TRACE_TYPE_HANDLED;
1366 1829
1830 test_cpu_buff_start(iter);
1831
1367 next_entry = find_next_entry(iter, NULL, &next_ts); 1832 next_entry = find_next_entry(iter, NULL, &next_ts);
1368 if (!next_entry) 1833 if (!next_entry)
1369 next_ts = iter->ts; 1834 next_ts = iter->ts;
@@ -1451,6 +1916,27 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1451 trace_seq_print_cont(s, iter); 1916 trace_seq_print_cont(s, iter);
1452 break; 1917 break;
1453 } 1918 }
1919 case TRACE_BRANCH: {
1920 struct trace_branch *field;
1921
1922 trace_assign_type(field, entry);
1923
1924 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1925 field->correct ? " ok " : " MISS ",
1926 field->func,
1927 field->file,
1928 field->line);
1929 break;
1930 }
1931 case TRACE_USER_STACK: {
1932 struct userstack_entry *field;
1933
1934 trace_assign_type(field, entry);
1935
1936 seq_print_userip_objs(field, s, sym_flags);
1937 trace_seq_putc(s, '\n');
1938 break;
1939 }
1454 default: 1940 default:
1455 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1941 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1456 } 1942 }
@@ -1475,6 +1961,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1475 if (entry->type == TRACE_CONT) 1961 if (entry->type == TRACE_CONT)
1476 return TRACE_TYPE_HANDLED; 1962 return TRACE_TYPE_HANDLED;
1477 1963
1964 test_cpu_buff_start(iter);
1965
1478 comm = trace_find_cmdline(iter->ent->pid); 1966 comm = trace_find_cmdline(iter->ent->pid);
1479 1967
1480 t = ns2usecs(iter->ts); 1968 t = ns2usecs(iter->ts);
@@ -1582,6 +2070,37 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1582 trace_seq_print_cont(s, iter); 2070 trace_seq_print_cont(s, iter);
1583 break; 2071 break;
1584 } 2072 }
2073 case TRACE_GRAPH_RET: {
2074 return print_graph_function(iter);
2075 }
2076 case TRACE_GRAPH_ENT: {
2077 return print_graph_function(iter);
2078 }
2079 case TRACE_BRANCH: {
2080 struct trace_branch *field;
2081
2082 trace_assign_type(field, entry);
2083
2084 trace_seq_printf(s, "[%s] %s:%s:%d\n",
2085 field->correct ? " ok " : " MISS ",
2086 field->func,
2087 field->file,
2088 field->line);
2089 break;
2090 }
2091 case TRACE_USER_STACK: {
2092 struct userstack_entry *field;
2093
2094 trace_assign_type(field, entry);
2095
2096 ret = seq_print_userip_objs(field, s, sym_flags);
2097 if (!ret)
2098 return TRACE_TYPE_PARTIAL_LINE;
2099 ret = trace_seq_putc(s, '\n');
2100 if (!ret)
2101 return TRACE_TYPE_PARTIAL_LINE;
2102 break;
2103 }
1585 } 2104 }
1586 return TRACE_TYPE_HANDLED; 2105 return TRACE_TYPE_HANDLED;
1587} 2106}
@@ -1638,6 +2157,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1638 break; 2157 break;
1639 } 2158 }
1640 case TRACE_SPECIAL: 2159 case TRACE_SPECIAL:
2160 case TRACE_USER_STACK:
1641 case TRACE_STACK: { 2161 case TRACE_STACK: {
1642 struct special_entry *field; 2162 struct special_entry *field;
1643 2163
@@ -1723,6 +2243,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1723 break; 2243 break;
1724 } 2244 }
1725 case TRACE_SPECIAL: 2245 case TRACE_SPECIAL:
2246 case TRACE_USER_STACK:
1726 case TRACE_STACK: { 2247 case TRACE_STACK: {
1727 struct special_entry *field; 2248 struct special_entry *field;
1728 2249
@@ -1739,6 +2260,25 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1739 return TRACE_TYPE_HANDLED; 2260 return TRACE_TYPE_HANDLED;
1740} 2261}
1741 2262
2263static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
2264{
2265 struct trace_seq *s = &iter->seq;
2266 struct trace_entry *entry = iter->ent;
2267 struct print_entry *field;
2268 int ret;
2269
2270 trace_assign_type(field, entry);
2271
2272 ret = trace_seq_printf(s, field->buf);
2273 if (!ret)
2274 return TRACE_TYPE_PARTIAL_LINE;
2275
2276 if (entry->flags & TRACE_FLAG_CONT)
2277 trace_seq_print_cont(s, iter);
2278
2279 return TRACE_TYPE_HANDLED;
2280}
2281
1742static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 2282static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1743{ 2283{
1744 struct trace_seq *s = &iter->seq; 2284 struct trace_seq *s = &iter->seq;
@@ -1777,6 +2317,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1777 break; 2317 break;
1778 } 2318 }
1779 case TRACE_SPECIAL: 2319 case TRACE_SPECIAL:
2320 case TRACE_USER_STACK:
1780 case TRACE_STACK: { 2321 case TRACE_STACK: {
1781 struct special_entry *field; 2322 struct special_entry *field;
1782 2323
@@ -1818,6 +2359,11 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
1818 return ret; 2359 return ret;
1819 } 2360 }
1820 2361
2362 if (iter->ent->type == TRACE_PRINT &&
2363 trace_flags & TRACE_ITER_PRINTK &&
2364 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2365 return print_printk_msg_only(iter);
2366
1821 if (trace_flags & TRACE_ITER_BIN) 2367 if (trace_flags & TRACE_ITER_BIN)
1822 return print_bin_fmt(iter); 2368 return print_bin_fmt(iter);
1823 2369
@@ -1842,7 +2388,9 @@ static int s_show(struct seq_file *m, void *v)
1842 seq_printf(m, "# tracer: %s\n", iter->trace->name); 2388 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1843 seq_puts(m, "#\n"); 2389 seq_puts(m, "#\n");
1844 } 2390 }
1845 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2391 if (iter->trace && iter->trace->print_header)
2392 iter->trace->print_header(m);
2393 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1846 /* print nothing if the buffers are empty */ 2394 /* print nothing if the buffers are empty */
1847 if (trace_empty(iter)) 2395 if (trace_empty(iter))
1848 return 0; 2396 return 0;
@@ -1894,6 +2442,15 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1894 iter->trace = current_trace; 2442 iter->trace = current_trace;
1895 iter->pos = -1; 2443 iter->pos = -1;
1896 2444
2445 /* Notify the tracer early; before we stop tracing. */
2446 if (iter->trace && iter->trace->open)
2447 iter->trace->open(iter);
2448
2449 /* Annotate start of buffers if we had overruns */
2450 if (ring_buffer_overruns(iter->tr->buffer))
2451 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2452
2453
1897 for_each_tracing_cpu(cpu) { 2454 for_each_tracing_cpu(cpu) {
1898 2455
1899 iter->buffer_iter[cpu] = 2456 iter->buffer_iter[cpu] =
@@ -1912,13 +2469,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1912 m->private = iter; 2469 m->private = iter;
1913 2470
1914 /* stop the trace while dumping */ 2471 /* stop the trace while dumping */
1915 if (iter->tr->ctrl) { 2472 tracing_stop();
1916 tracer_enabled = 0;
1917 ftrace_function_enabled = 0;
1918 }
1919
1920 if (iter->trace && iter->trace->open)
1921 iter->trace->open(iter);
1922 2473
1923 mutex_unlock(&trace_types_lock); 2474 mutex_unlock(&trace_types_lock);
1924 2475
@@ -1961,14 +2512,7 @@ int tracing_release(struct inode *inode, struct file *file)
1961 iter->trace->close(iter); 2512 iter->trace->close(iter);
1962 2513
1963 /* reenable tracing if it was previously enabled */ 2514 /* reenable tracing if it was previously enabled */
1964 if (iter->tr->ctrl) { 2515 tracing_start();
1965 tracer_enabled = 1;
1966 /*
1967 * It is safe to enable function tracing even if it
1968 * isn't used
1969 */
1970 ftrace_function_enabled = 1;
1971 }
1972 mutex_unlock(&trace_types_lock); 2516 mutex_unlock(&trace_types_lock);
1973 2517
1974 seq_release(inode, file); 2518 seq_release(inode, file);
@@ -2146,7 +2690,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2146 if (err) 2690 if (err)
2147 goto err_unlock; 2691 goto err_unlock;
2148 2692
2149 raw_local_irq_disable(); 2693 local_irq_disable();
2150 __raw_spin_lock(&ftrace_max_lock); 2694 __raw_spin_lock(&ftrace_max_lock);
2151 for_each_tracing_cpu(cpu) { 2695 for_each_tracing_cpu(cpu) {
2152 /* 2696 /*
@@ -2163,7 +2707,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2163 } 2707 }
2164 } 2708 }
2165 __raw_spin_unlock(&ftrace_max_lock); 2709 __raw_spin_unlock(&ftrace_max_lock);
2166 raw_local_irq_enable(); 2710 local_irq_enable();
2167 2711
2168 tracing_cpumask = tracing_cpumask_new; 2712 tracing_cpumask = tracing_cpumask_new;
2169 2713
@@ -2184,13 +2728,16 @@ static struct file_operations tracing_cpumask_fops = {
2184}; 2728};
2185 2729
2186static ssize_t 2730static ssize_t
2187tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2731tracing_trace_options_read(struct file *filp, char __user *ubuf,
2188 size_t cnt, loff_t *ppos) 2732 size_t cnt, loff_t *ppos)
2189{ 2733{
2734 int i;
2190 char *buf; 2735 char *buf;
2191 int r = 0; 2736 int r = 0;
2192 int len = 0; 2737 int len = 0;
2193 int i; 2738 u32 tracer_flags = current_trace->flags->val;
2739 struct tracer_opt *trace_opts = current_trace->flags->opts;
2740
2194 2741
2195 /* calulate max size */ 2742 /* calulate max size */
2196 for (i = 0; trace_options[i]; i++) { 2743 for (i = 0; trace_options[i]; i++) {
@@ -2198,6 +2745,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2198 len += 3; /* "no" and space */ 2745 len += 3; /* "no" and space */
2199 } 2746 }
2200 2747
2748 /*
2749 * Increase the size with names of options specific
2750 * of the current tracer.
2751 */
2752 for (i = 0; trace_opts[i].name; i++) {
2753 len += strlen(trace_opts[i].name);
2754 len += 3; /* "no" and space */
2755 }
2756
2201 /* +2 for \n and \0 */ 2757 /* +2 for \n and \0 */
2202 buf = kmalloc(len + 2, GFP_KERNEL); 2758 buf = kmalloc(len + 2, GFP_KERNEL);
2203 if (!buf) 2759 if (!buf)
@@ -2210,6 +2766,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2210 r += sprintf(buf + r, "no%s ", trace_options[i]); 2766 r += sprintf(buf + r, "no%s ", trace_options[i]);
2211 } 2767 }
2212 2768
2769 for (i = 0; trace_opts[i].name; i++) {
2770 if (tracer_flags & trace_opts[i].bit)
2771 r += sprintf(buf + r, "%s ",
2772 trace_opts[i].name);
2773 else
2774 r += sprintf(buf + r, "no%s ",
2775 trace_opts[i].name);
2776 }
2777
2213 r += sprintf(buf + r, "\n"); 2778 r += sprintf(buf + r, "\n");
2214 WARN_ON(r >= len + 2); 2779 WARN_ON(r >= len + 2);
2215 2780
@@ -2220,13 +2785,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2220 return r; 2785 return r;
2221} 2786}
2222 2787
2788/* Try to assign a tracer specific option */
2789static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2790{
2791 struct tracer_flags *trace_flags = trace->flags;
2792 struct tracer_opt *opts = NULL;
2793 int ret = 0, i = 0;
2794 int len;
2795
2796 for (i = 0; trace_flags->opts[i].name; i++) {
2797 opts = &trace_flags->opts[i];
2798 len = strlen(opts->name);
2799
2800 if (strncmp(cmp, opts->name, len) == 0) {
2801 ret = trace->set_flag(trace_flags->val,
2802 opts->bit, !neg);
2803 break;
2804 }
2805 }
2806 /* Not found */
2807 if (!trace_flags->opts[i].name)
2808 return -EINVAL;
2809
2810 /* Refused to handle */
2811 if (ret)
2812 return ret;
2813
2814 if (neg)
2815 trace_flags->val &= ~opts->bit;
2816 else
2817 trace_flags->val |= opts->bit;
2818
2819 return 0;
2820}
2821
2223static ssize_t 2822static ssize_t
2224tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2823tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2225 size_t cnt, loff_t *ppos) 2824 size_t cnt, loff_t *ppos)
2226{ 2825{
2227 char buf[64]; 2826 char buf[64];
2228 char *cmp = buf; 2827 char *cmp = buf;
2229 int neg = 0; 2828 int neg = 0;
2829 int ret;
2230 int i; 2830 int i;
2231 2831
2232 if (cnt >= sizeof(buf)) 2832 if (cnt >= sizeof(buf))
@@ -2253,11 +2853,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2253 break; 2853 break;
2254 } 2854 }
2255 } 2855 }
2256 /* 2856
2257 * If no option could be set, return an error: 2857 /* If no option could be set, test the specific tracer options */
2258 */ 2858 if (!trace_options[i]) {
2259 if (!trace_options[i]) 2859 ret = set_tracer_option(current_trace, cmp, neg);
2260 return -EINVAL; 2860 if (ret)
2861 return ret;
2862 }
2261 2863
2262 filp->f_pos += cnt; 2864 filp->f_pos += cnt;
2263 2865
@@ -2266,8 +2868,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2266 2868
2267static struct file_operations tracing_iter_fops = { 2869static struct file_operations tracing_iter_fops = {
2268 .open = tracing_open_generic, 2870 .open = tracing_open_generic,
2269 .read = tracing_iter_ctrl_read, 2871 .read = tracing_trace_options_read,
2270 .write = tracing_iter_ctrl_write, 2872 .write = tracing_trace_options_write,
2271}; 2873};
2272 2874
2273static const char readme_msg[] = 2875static const char readme_msg[] =
@@ -2281,9 +2883,9 @@ static const char readme_msg[] =
2281 "# echo sched_switch > /debug/tracing/current_tracer\n" 2883 "# echo sched_switch > /debug/tracing/current_tracer\n"
2282 "# cat /debug/tracing/current_tracer\n" 2884 "# cat /debug/tracing/current_tracer\n"
2283 "sched_switch\n" 2885 "sched_switch\n"
2284 "# cat /debug/tracing/iter_ctrl\n" 2886 "# cat /debug/tracing/trace_options\n"
2285 "noprint-parent nosym-offset nosym-addr noverbose\n" 2887 "noprint-parent nosym-offset nosym-addr noverbose\n"
2286 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2888 "# echo print-parent > /debug/tracing/trace_options\n"
2287 "# echo 1 > /debug/tracing/tracing_enabled\n" 2889 "# echo 1 > /debug/tracing/tracing_enabled\n"
2288 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2890 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2289 "echo 0 > /debug/tracing/tracing_enabled\n" 2891 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2306,11 +2908,10 @@ static ssize_t
2306tracing_ctrl_read(struct file *filp, char __user *ubuf, 2908tracing_ctrl_read(struct file *filp, char __user *ubuf,
2307 size_t cnt, loff_t *ppos) 2909 size_t cnt, loff_t *ppos)
2308{ 2910{
2309 struct trace_array *tr = filp->private_data;
2310 char buf[64]; 2911 char buf[64];
2311 int r; 2912 int r;
2312 2913
2313 r = sprintf(buf, "%ld\n", tr->ctrl); 2914 r = sprintf(buf, "%u\n", tracer_enabled);
2314 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2915 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2315} 2916}
2316 2917
@@ -2338,16 +2939,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2338 val = !!val; 2939 val = !!val;
2339 2940
2340 mutex_lock(&trace_types_lock); 2941 mutex_lock(&trace_types_lock);
2341 if (tr->ctrl ^ val) { 2942 if (tracer_enabled ^ val) {
2342 if (val) 2943 if (val) {
2343 tracer_enabled = 1; 2944 tracer_enabled = 1;
2344 else 2945 if (current_trace->start)
2946 current_trace->start(tr);
2947 tracing_start();
2948 } else {
2345 tracer_enabled = 0; 2949 tracer_enabled = 0;
2346 2950 tracing_stop();
2347 tr->ctrl = val; 2951 if (current_trace->stop)
2348 2952 current_trace->stop(tr);
2349 if (current_trace && current_trace->ctrl_update) 2953 }
2350 current_trace->ctrl_update(tr);
2351 } 2954 }
2352 mutex_unlock(&trace_types_lock); 2955 mutex_unlock(&trace_types_lock);
2353 2956
@@ -2373,29 +2976,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2373 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2976 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2374} 2977}
2375 2978
2376static ssize_t 2979static int tracing_set_tracer(char *buf)
2377tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2378 size_t cnt, loff_t *ppos)
2379{ 2980{
2380 struct trace_array *tr = &global_trace; 2981 struct trace_array *tr = &global_trace;
2381 struct tracer *t; 2982 struct tracer *t;
2382 char buf[max_tracer_type_len+1]; 2983 int ret = 0;
2383 int i;
2384 size_t ret;
2385
2386 ret = cnt;
2387
2388 if (cnt > max_tracer_type_len)
2389 cnt = max_tracer_type_len;
2390
2391 if (copy_from_user(&buf, ubuf, cnt))
2392 return -EFAULT;
2393
2394 buf[cnt] = 0;
2395
2396 /* strip ending whitespace. */
2397 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2398 buf[i] = 0;
2399 2984
2400 mutex_lock(&trace_types_lock); 2985 mutex_lock(&trace_types_lock);
2401 for (t = trace_types; t; t = t->next) { 2986 for (t = trace_types; t; t = t->next) {
@@ -2409,18 +2994,52 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2409 if (t == current_trace) 2994 if (t == current_trace)
2410 goto out; 2995 goto out;
2411 2996
2997 trace_branch_disable();
2412 if (current_trace && current_trace->reset) 2998 if (current_trace && current_trace->reset)
2413 current_trace->reset(tr); 2999 current_trace->reset(tr);
2414 3000
2415 current_trace = t; 3001 current_trace = t;
2416 if (t->init) 3002 if (t->init) {
2417 t->init(tr); 3003 ret = t->init(tr);
3004 if (ret)
3005 goto out;
3006 }
2418 3007
3008 trace_branch_enable(tr);
2419 out: 3009 out:
2420 mutex_unlock(&trace_types_lock); 3010 mutex_unlock(&trace_types_lock);
2421 3011
2422 if (ret > 0) 3012 return ret;
2423 filp->f_pos += ret; 3013}
3014
3015static ssize_t
3016tracing_set_trace_write(struct file *filp, const char __user *ubuf,
3017 size_t cnt, loff_t *ppos)
3018{
3019 char buf[max_tracer_type_len+1];
3020 int i;
3021 size_t ret;
3022 int err;
3023
3024 ret = cnt;
3025
3026 if (cnt > max_tracer_type_len)
3027 cnt = max_tracer_type_len;
3028
3029 if (copy_from_user(&buf, ubuf, cnt))
3030 return -EFAULT;
3031
3032 buf[cnt] = 0;
3033
3034 /* strip ending whitespace. */
3035 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
3036 buf[i] = 0;
3037
3038 err = tracing_set_tracer(buf);
3039 if (err)
3040 return err;
3041
3042 filp->f_pos += ret;
2424 3043
2425 return ret; 3044 return ret;
2426} 3045}
@@ -2487,6 +3106,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2487 return -ENOMEM; 3106 return -ENOMEM;
2488 3107
2489 mutex_lock(&trace_types_lock); 3108 mutex_lock(&trace_types_lock);
3109
3110 /* trace pipe does not show start of buffer */
3111 cpus_setall(iter->started);
3112
2490 iter->tr = &global_trace; 3113 iter->tr = &global_trace;
2491 iter->trace = current_trace; 3114 iter->trace = current_trace;
2492 filp->private_data = iter; 3115 filp->private_data = iter;
@@ -2662,7 +3285,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2662 char buf[64]; 3285 char buf[64];
2663 int r; 3286 int r;
2664 3287
2665 r = sprintf(buf, "%lu\n", tr->entries); 3288 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2666 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3289 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2667} 3290}
2668 3291
@@ -2673,7 +3296,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2673 unsigned long val; 3296 unsigned long val;
2674 char buf[64]; 3297 char buf[64];
2675 int ret, cpu; 3298 int ret, cpu;
2676 struct trace_array *tr = filp->private_data;
2677 3299
2678 if (cnt >= sizeof(buf)) 3300 if (cnt >= sizeof(buf))
2679 return -EINVAL; 3301 return -EINVAL;
@@ -2693,12 +3315,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2693 3315
2694 mutex_lock(&trace_types_lock); 3316 mutex_lock(&trace_types_lock);
2695 3317
2696 if (tr->ctrl) { 3318 tracing_stop();
2697 cnt = -EBUSY;
2698 pr_info("ftrace: please disable tracing"
2699 " before modifying buffer size\n");
2700 goto out;
2701 }
2702 3319
2703 /* disable all cpu buffers */ 3320 /* disable all cpu buffers */
2704 for_each_tracing_cpu(cpu) { 3321 for_each_tracing_cpu(cpu) {
@@ -2708,6 +3325,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2708 atomic_inc(&max_tr.data[cpu]->disabled); 3325 atomic_inc(&max_tr.data[cpu]->disabled);
2709 } 3326 }
2710 3327
3328 /* value is in KB */
3329 val <<= 10;
3330
2711 if (val != global_trace.entries) { 3331 if (val != global_trace.entries) {
2712 ret = ring_buffer_resize(global_trace.buffer, val); 3332 ret = ring_buffer_resize(global_trace.buffer, val);
2713 if (ret < 0) { 3333 if (ret < 0) {
@@ -2746,6 +3366,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2746 atomic_dec(&max_tr.data[cpu]->disabled); 3366 atomic_dec(&max_tr.data[cpu]->disabled);
2747 } 3367 }
2748 3368
3369 tracing_start();
2749 max_tr.entries = global_trace.entries; 3370 max_tr.entries = global_trace.entries;
2750 mutex_unlock(&trace_types_lock); 3371 mutex_unlock(&trace_types_lock);
2751 3372
@@ -2757,7 +3378,7 @@ static int mark_printk(const char *fmt, ...)
2757 int ret; 3378 int ret;
2758 va_list args; 3379 va_list args;
2759 va_start(args, fmt); 3380 va_start(args, fmt);
2760 ret = trace_vprintk(0, fmt, args); 3381 ret = trace_vprintk(0, -1, fmt, args);
2761 va_end(args); 3382 va_end(args);
2762 return ret; 3383 return ret;
2763} 3384}
@@ -2768,9 +3389,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2768{ 3389{
2769 char *buf; 3390 char *buf;
2770 char *end; 3391 char *end;
2771 struct trace_array *tr = &global_trace;
2772 3392
2773 if (!tr->ctrl || tracing_disabled) 3393 if (tracing_disabled)
2774 return -EINVAL; 3394 return -EINVAL;
2775 3395
2776 if (cnt > TRACE_BUF_SIZE) 3396 if (cnt > TRACE_BUF_SIZE)
@@ -2836,22 +3456,38 @@ static struct file_operations tracing_mark_fops = {
2836 3456
2837#ifdef CONFIG_DYNAMIC_FTRACE 3457#ifdef CONFIG_DYNAMIC_FTRACE
2838 3458
3459int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3460{
3461 return 0;
3462}
3463
2839static ssize_t 3464static ssize_t
2840tracing_read_long(struct file *filp, char __user *ubuf, 3465tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2841 size_t cnt, loff_t *ppos) 3466 size_t cnt, loff_t *ppos)
2842{ 3467{
3468 static char ftrace_dyn_info_buffer[1024];
3469 static DEFINE_MUTEX(dyn_info_mutex);
2843 unsigned long *p = filp->private_data; 3470 unsigned long *p = filp->private_data;
2844 char buf[64]; 3471 char *buf = ftrace_dyn_info_buffer;
3472 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2845 int r; 3473 int r;
2846 3474
2847 r = sprintf(buf, "%ld\n", *p); 3475 mutex_lock(&dyn_info_mutex);
3476 r = sprintf(buf, "%ld ", *p);
2848 3477
2849 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3478 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3479 buf[r++] = '\n';
3480
3481 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3482
3483 mutex_unlock(&dyn_info_mutex);
3484
3485 return r;
2850} 3486}
2851 3487
2852static struct file_operations tracing_read_long_fops = { 3488static struct file_operations tracing_dyn_info_fops = {
2853 .open = tracing_open_generic, 3489 .open = tracing_open_generic,
2854 .read = tracing_read_long, 3490 .read = tracing_read_dyn_info,
2855}; 3491};
2856#endif 3492#endif
2857 3493
@@ -2892,10 +3528,10 @@ static __init int tracer_init_debugfs(void)
2892 if (!entry) 3528 if (!entry)
2893 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3529 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2894 3530
2895 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3531 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2896 NULL, &tracing_iter_fops); 3532 NULL, &tracing_iter_fops);
2897 if (!entry) 3533 if (!entry)
2898 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3534 pr_warning("Could not create debugfs 'trace_options' entry\n");
2899 3535
2900 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3536 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2901 NULL, &tracing_cpumask_fops); 3537 NULL, &tracing_cpumask_fops);
@@ -2945,11 +3581,11 @@ static __init int tracer_init_debugfs(void)
2945 pr_warning("Could not create debugfs " 3581 pr_warning("Could not create debugfs "
2946 "'trace_pipe' entry\n"); 3582 "'trace_pipe' entry\n");
2947 3583
2948 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3584 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2949 &global_trace, &tracing_entries_fops); 3585 &global_trace, &tracing_entries_fops);
2950 if (!entry) 3586 if (!entry)
2951 pr_warning("Could not create debugfs " 3587 pr_warning("Could not create debugfs "
2952 "'trace_entries' entry\n"); 3588 "'buffer_size_kb' entry\n");
2953 3589
2954 entry = debugfs_create_file("trace_marker", 0220, d_tracer, 3590 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2955 NULL, &tracing_mark_fops); 3591 NULL, &tracing_mark_fops);
@@ -2960,7 +3596,7 @@ static __init int tracer_init_debugfs(void)
2960#ifdef CONFIG_DYNAMIC_FTRACE 3596#ifdef CONFIG_DYNAMIC_FTRACE
2961 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3597 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2962 &ftrace_update_tot_cnt, 3598 &ftrace_update_tot_cnt,
2963 &tracing_read_long_fops); 3599 &tracing_dyn_info_fops);
2964 if (!entry) 3600 if (!entry)
2965 pr_warning("Could not create debugfs " 3601 pr_warning("Could not create debugfs "
2966 "'dyn_ftrace_total_info' entry\n"); 3602 "'dyn_ftrace_total_info' entry\n");
@@ -2971,7 +3607,7 @@ static __init int tracer_init_debugfs(void)
2971 return 0; 3607 return 0;
2972} 3608}
2973 3609
2974int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3610int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
2975{ 3611{
2976 static DEFINE_SPINLOCK(trace_buf_lock); 3612 static DEFINE_SPINLOCK(trace_buf_lock);
2977 static char trace_buf[TRACE_BUF_SIZE]; 3613 static char trace_buf[TRACE_BUF_SIZE];
@@ -2979,11 +3615,11 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2979 struct ring_buffer_event *event; 3615 struct ring_buffer_event *event;
2980 struct trace_array *tr = &global_trace; 3616 struct trace_array *tr = &global_trace;
2981 struct trace_array_cpu *data; 3617 struct trace_array_cpu *data;
2982 struct print_entry *entry;
2983 unsigned long flags, irq_flags;
2984 int cpu, len = 0, size, pc; 3618 int cpu, len = 0, size, pc;
3619 struct print_entry *entry;
3620 unsigned long irq_flags;
2985 3621
2986 if (!tr->ctrl || tracing_disabled) 3622 if (tracing_disabled || tracing_selftest_running)
2987 return 0; 3623 return 0;
2988 3624
2989 pc = preempt_count(); 3625 pc = preempt_count();
@@ -2994,7 +3630,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2994 if (unlikely(atomic_read(&data->disabled))) 3630 if (unlikely(atomic_read(&data->disabled)))
2995 goto out; 3631 goto out;
2996 3632
2997 spin_lock_irqsave(&trace_buf_lock, flags); 3633 pause_graph_tracing();
3634 spin_lock_irqsave(&trace_buf_lock, irq_flags);
2998 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); 3635 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2999 3636
3000 len = min(len, TRACE_BUF_SIZE-1); 3637 len = min(len, TRACE_BUF_SIZE-1);
@@ -3005,17 +3642,18 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3005 if (!event) 3642 if (!event)
3006 goto out_unlock; 3643 goto out_unlock;
3007 entry = ring_buffer_event_data(event); 3644 entry = ring_buffer_event_data(event);
3008 tracing_generic_entry_update(&entry->ent, flags, pc); 3645 tracing_generic_entry_update(&entry->ent, irq_flags, pc);
3009 entry->ent.type = TRACE_PRINT; 3646 entry->ent.type = TRACE_PRINT;
3010 entry->ip = ip; 3647 entry->ip = ip;
3648 entry->depth = depth;
3011 3649
3012 memcpy(&entry->buf, trace_buf, len); 3650 memcpy(&entry->buf, trace_buf, len);
3013 entry->buf[len] = 0; 3651 entry->buf[len] = 0;
3014 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 3652 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
3015 3653
3016 out_unlock: 3654 out_unlock:
3017 spin_unlock_irqrestore(&trace_buf_lock, flags); 3655 spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3018 3656 unpause_graph_tracing();
3019 out: 3657 out:
3020 preempt_enable_notrace(); 3658 preempt_enable_notrace();
3021 3659
@@ -3032,7 +3670,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3032 return 0; 3670 return 0;
3033 3671
3034 va_start(ap, fmt); 3672 va_start(ap, fmt);
3035 ret = trace_vprintk(ip, fmt, ap); 3673 ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3036 va_end(ap); 3674 va_end(ap);
3037 return ret; 3675 return ret;
3038} 3676}
@@ -3041,7 +3679,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3041static int trace_panic_handler(struct notifier_block *this, 3679static int trace_panic_handler(struct notifier_block *this,
3042 unsigned long event, void *unused) 3680 unsigned long event, void *unused)
3043{ 3681{
3044 ftrace_dump(); 3682 if (ftrace_dump_on_oops)
3683 ftrace_dump();
3045 return NOTIFY_OK; 3684 return NOTIFY_OK;
3046} 3685}
3047 3686
@@ -3057,7 +3696,8 @@ static int trace_die_handler(struct notifier_block *self,
3057{ 3696{
3058 switch (val) { 3697 switch (val) {
3059 case DIE_OOPS: 3698 case DIE_OOPS:
3060 ftrace_dump(); 3699 if (ftrace_dump_on_oops)
3700 ftrace_dump();
3061 break; 3701 break;
3062 default: 3702 default:
3063 break; 3703 break;
@@ -3098,7 +3738,6 @@ trace_printk_seq(struct trace_seq *s)
3098 trace_seq_reset(s); 3738 trace_seq_reset(s);
3099} 3739}
3100 3740
3101
3102void ftrace_dump(void) 3741void ftrace_dump(void)
3103{ 3742{
3104 static DEFINE_SPINLOCK(ftrace_dump_lock); 3743 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3123,6 +3762,9 @@ void ftrace_dump(void)
3123 atomic_inc(&global_trace.data[cpu]->disabled); 3762 atomic_inc(&global_trace.data[cpu]->disabled);
3124 } 3763 }
3125 3764
3765 /* don't look at user memory in panic mode */
3766 trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3767
3126 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 3768 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3127 3769
3128 iter.tr = &global_trace; 3770 iter.tr = &global_trace;
@@ -3216,7 +3858,6 @@ __init static int tracer_alloc_buffers(void)
3216#endif 3858#endif
3217 3859
3218 /* All seems OK, enable tracing */ 3860 /* All seems OK, enable tracing */
3219 global_trace.ctrl = tracer_enabled;
3220 tracing_disabled = 0; 3861 tracing_disabled = 0;
3221 3862
3222 atomic_notifier_chain_register(&panic_notifier_list, 3863 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..fc75dce7a664 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h>
11 12
12enum trace_type { 13enum trace_type {
13 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,14 @@ enum trace_type {
21 TRACE_SPECIAL, 22 TRACE_SPECIAL,
22 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
23 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
24 TRACE_BOOT, 25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_GRAPH_RET,
29 TRACE_GRAPH_ENT,
30 TRACE_USER_STACK,
31 TRACE_HW_BRANCHES,
32 TRACE_POWER,
25 33
26 __TRACE_LAST_TYPE 34 __TRACE_LAST_TYPE
27}; 35};
@@ -38,6 +46,7 @@ struct trace_entry {
38 unsigned char flags; 46 unsigned char flags;
39 unsigned char preempt_count; 47 unsigned char preempt_count;
40 int pid; 48 int pid;
49 int tgid;
41}; 50};
42 51
43/* 52/*
@@ -48,6 +57,18 @@ struct ftrace_entry {
48 unsigned long ip; 57 unsigned long ip;
49 unsigned long parent_ip; 58 unsigned long parent_ip;
50}; 59};
60
61/* Function call entry */
62struct ftrace_graph_ent_entry {
63 struct trace_entry ent;
64 struct ftrace_graph_ent graph_ent;
65};
66
67/* Function return entry */
68struct ftrace_graph_ret_entry {
69 struct trace_entry ent;
70 struct ftrace_graph_ret ret;
71};
51extern struct tracer boot_tracer; 72extern struct tracer boot_tracer;
52 73
53/* 74/*
@@ -85,12 +106,18 @@ struct stack_entry {
85 unsigned long caller[FTRACE_STACK_ENTRIES]; 106 unsigned long caller[FTRACE_STACK_ENTRIES];
86}; 107};
87 108
109struct userstack_entry {
110 struct trace_entry ent;
111 unsigned long caller[FTRACE_STACK_ENTRIES];
112};
113
88/* 114/*
89 * ftrace_printk entry: 115 * ftrace_printk entry:
90 */ 116 */
91struct print_entry { 117struct print_entry {
92 struct trace_entry ent; 118 struct trace_entry ent;
93 unsigned long ip; 119 unsigned long ip;
120 int depth;
94 char buf[]; 121 char buf[];
95}; 122};
96 123
@@ -112,9 +139,35 @@ struct trace_mmiotrace_map {
112 struct mmiotrace_map map; 139 struct mmiotrace_map map;
113}; 140};
114 141
115struct trace_boot { 142struct trace_boot_call {
116 struct trace_entry ent; 143 struct trace_entry ent;
117 struct boot_trace initcall; 144 struct boot_trace_call boot_call;
145};
146
147struct trace_boot_ret {
148 struct trace_entry ent;
149 struct boot_trace_ret boot_ret;
150};
151
152#define TRACE_FUNC_SIZE 30
153#define TRACE_FILE_SIZE 20
154struct trace_branch {
155 struct trace_entry ent;
156 unsigned line;
157 char func[TRACE_FUNC_SIZE+1];
158 char file[TRACE_FILE_SIZE+1];
159 char correct;
160};
161
162struct hw_branch_entry {
163 struct trace_entry ent;
164 u64 from;
165 u64 to;
166};
167
168struct trace_power {
169 struct trace_entry ent;
170 struct power_trace state_data;
118}; 171};
119 172
120/* 173/*
@@ -172,7 +225,6 @@ struct trace_iterator;
172struct trace_array { 225struct trace_array {
173 struct ring_buffer *buffer; 226 struct ring_buffer *buffer;
174 unsigned long entries; 227 unsigned long entries;
175 long ctrl;
176 int cpu; 228 int cpu;
177 cycle_t time_start; 229 cycle_t time_start;
178 struct task_struct *waiter; 230 struct task_struct *waiter;
@@ -212,13 +264,22 @@ extern void __ftrace_bad_type(void);
212 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ 264 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
213 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ 265 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
214 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ 266 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
267 IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
215 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ 268 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
216 IF_ASSIGN(var, ent, struct special_entry, 0); \ 269 IF_ASSIGN(var, ent, struct special_entry, 0); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ 270 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
218 TRACE_MMIO_RW); \ 271 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 272 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \ 273 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ 274 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
275 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
276 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
277 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
278 TRACE_GRAPH_ENT); \
279 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
280 TRACE_GRAPH_RET); \
281 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
282 IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
222 __ftrace_bad_type(); \ 283 __ftrace_bad_type(); \
223 } while (0) 284 } while (0)
224 285
@@ -229,29 +290,56 @@ enum print_line_t {
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 290 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230}; 291};
231 292
293
294/*
295 * An option specific to a tracer. This is a boolean value.
296 * The bit is the bit index that sets its value on the
297 * flags value in struct tracer_flags.
298 */
299struct tracer_opt {
300 const char *name; /* Will appear on the trace_options file */
301 u32 bit; /* Mask assigned in val field in tracer_flags */
302};
303
304/*
305 * The set of specific options for a tracer. Your tracer
306 * have to set the initial value of the flags val.
307 */
308struct tracer_flags {
309 u32 val;
310 struct tracer_opt *opts;
311};
312
313/* Makes more easy to define a tracer opt */
314#define TRACER_OPT(s, b) .name = #s, .bit = b
315
232/* 316/*
233 * A specific tracer, represented by methods that operate on a trace array: 317 * A specific tracer, represented by methods that operate on a trace array:
234 */ 318 */
235struct tracer { 319struct tracer {
236 const char *name; 320 const char *name;
237 void (*init)(struct trace_array *tr); 321 /* Your tracer should raise a warning if init fails */
322 int (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 323 void (*reset)(struct trace_array *tr);
324 void (*start)(struct trace_array *tr);
325 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 326 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 327 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 328 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 329 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 330 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 331 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 332#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 333 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 334 struct trace_array *tr);
251#endif 335#endif
336 void (*print_header)(struct seq_file *m);
252 enum print_line_t (*print_line)(struct trace_iterator *iter); 337 enum print_line_t (*print_line)(struct trace_iterator *iter);
338 /* If you handled the flag setting, return 0 */
339 int (*set_flag)(u32 old_flags, u32 bit, int set);
253 struct tracer *next; 340 struct tracer *next;
254 int print_max; 341 int print_max;
342 struct tracer_flags *flags;
255}; 343};
256 344
257struct trace_seq { 345struct trace_seq {
@@ -279,8 +367,11 @@ struct trace_iterator {
279 unsigned long iter_flags; 367 unsigned long iter_flags;
280 loff_t pos; 368 loff_t pos;
281 long idx; 369 long idx;
370
371 cpumask_t started;
282}; 372};
283 373
374int tracing_is_enabled(void);
284void trace_wake_up(void); 375void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 376void tracing_reset(struct trace_array *tr, int cpu);
286int tracing_open_generic(struct inode *inode, struct file *filp); 377int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -321,8 +412,15 @@ void trace_function(struct trace_array *tr,
321 unsigned long parent_ip, 412 unsigned long parent_ip,
322 unsigned long flags, int pc); 413 unsigned long flags, int pc);
323 414
415void trace_graph_return(struct ftrace_graph_ret *trace);
416int trace_graph_entry(struct ftrace_graph_ent *trace);
417void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
418
324void tracing_start_cmdline_record(void); 419void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 420void tracing_stop_cmdline_record(void);
421void tracing_sched_switch_assign_trace(struct trace_array *tr);
422void tracing_stop_sched_switch_record(void);
423void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 424int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 425void unregister_tracer(struct tracer *type);
328 426
@@ -358,6 +456,7 @@ struct tracer_switch_ops {
358 struct tracer_switch_ops *next; 456 struct tracer_switch_ops *next;
359}; 457};
360 458
459char *trace_find_cmdline(int pid);
361#endif /* CONFIG_CONTEXT_SWITCH_TRACER */ 460#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
362 461
363#ifdef CONFIG_DYNAMIC_FTRACE 462#ifdef CONFIG_DYNAMIC_FTRACE
@@ -383,19 +482,79 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
383 struct trace_array *tr); 482 struct trace_array *tr);
384extern int trace_selftest_startup_sysprof(struct tracer *trace, 483extern int trace_selftest_startup_sysprof(struct tracer *trace,
385 struct trace_array *tr); 484 struct trace_array *tr);
485extern int trace_selftest_startup_branch(struct tracer *trace,
486 struct trace_array *tr);
386#endif /* CONFIG_FTRACE_STARTUP_TEST */ 487#endif /* CONFIG_FTRACE_STARTUP_TEST */
387 488
388extern void *head_page(struct trace_array_cpu *data); 489extern void *head_page(struct trace_array_cpu *data);
389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 490extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s, 491extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter); 492 struct trace_iterator *iter);
493
494extern int
495seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
496 unsigned long sym_flags);
392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 497extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
393 size_t cnt); 498 size_t cnt);
394extern long ns2usecs(cycle_t nsec); 499extern long ns2usecs(cycle_t nsec);
395extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); 500extern int
501trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
396 502
397extern unsigned long trace_flags; 503extern unsigned long trace_flags;
398 504
505/* Standard output formatting function used for function return traces */
506#ifdef CONFIG_FUNCTION_GRAPH_TRACER
507extern enum print_line_t print_graph_function(struct trace_iterator *iter);
508
509#ifdef CONFIG_DYNAMIC_FTRACE
510/* TODO: make this variable */
511#define FTRACE_GRAPH_MAX_FUNCS 32
512extern int ftrace_graph_count;
513extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
514
515static inline int ftrace_graph_addr(unsigned long addr)
516{
517 int i;
518
519 if (!ftrace_graph_count || test_tsk_trace_graph(current))
520 return 1;
521
522 for (i = 0; i < ftrace_graph_count; i++) {
523 if (addr == ftrace_graph_funcs[i])
524 return 1;
525 }
526
527 return 0;
528}
529#else
530static inline int ftrace_trace_addr(unsigned long addr)
531{
532 return 1;
533}
534static inline int ftrace_graph_addr(unsigned long addr)
535{
536 return 1;
537}
538#endif /* CONFIG_DYNAMIC_FTRACE */
539
540#else /* CONFIG_FUNCTION_GRAPH_TRACER */
541static inline enum print_line_t
542print_graph_function(struct trace_iterator *iter)
543{
544 return TRACE_TYPE_UNHANDLED;
545}
546#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
547
548extern struct pid *ftrace_pid_trace;
549
550static inline int ftrace_trace_task(struct task_struct *task)
551{
552 if (!ftrace_pid_trace)
553 return 1;
554
555 return test_tsk_trace_trace(task);
556}
557
399/* 558/*
400 * trace_iterator_flags is an enumeration that defines bit 559 * trace_iterator_flags is an enumeration that defines bit
401 * positions into trace_flags that controls the output. 560 * positions into trace_flags that controls the output.
@@ -415,8 +574,93 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 574 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 575 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 576 TRACE_ITER_PRINTK = 0x400,
577 TRACE_ITER_PREEMPTONLY = 0x800,
578 TRACE_ITER_BRANCH = 0x1000,
579 TRACE_ITER_ANNOTATE = 0x2000,
580 TRACE_ITER_USERSTACKTRACE = 0x4000,
581 TRACE_ITER_SYM_USEROBJ = 0x8000,
582 TRACE_ITER_PRINTK_MSGONLY = 0x10000
418}; 583};
419 584
585/*
586 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
587 * control the output of kernel symbols.
588 */
589#define TRACE_ITER_SYM_MASK \
590 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
591
420extern struct tracer nop_trace; 592extern struct tracer nop_trace;
421 593
594/**
595 * ftrace_preempt_disable - disable preemption scheduler safe
596 *
597 * When tracing can happen inside the scheduler, there exists
598 * cases that the tracing might happen before the need_resched
599 * flag is checked. If this happens and the tracer calls
600 * preempt_enable (after a disable), a schedule might take place
601 * causing an infinite recursion.
602 *
603 * To prevent this, we read the need_recshed flag before
604 * disabling preemption. When we want to enable preemption we
605 * check the flag, if it is set, then we call preempt_enable_no_resched.
606 * Otherwise, we call preempt_enable.
607 *
608 * The rational for doing the above is that if need resched is set
609 * and we have yet to reschedule, we are either in an atomic location
610 * (where we do not need to check for scheduling) or we are inside
611 * the scheduler and do not want to resched.
612 */
613static inline int ftrace_preempt_disable(void)
614{
615 int resched;
616
617 resched = need_resched();
618 preempt_disable_notrace();
619
620 return resched;
621}
622
623/**
624 * ftrace_preempt_enable - enable preemption scheduler safe
625 * @resched: the return value from ftrace_preempt_disable
626 *
627 * This is a scheduler safe way to enable preemption and not miss
628 * any preemption checks. The disabled saved the state of preemption.
629 * If resched is set, then we were either inside an atomic or
630 * are inside the scheduler (we would have already scheduled
631 * otherwise). In this case, we do not want to call normal
632 * preempt_enable, but preempt_enable_no_resched instead.
633 */
634static inline void ftrace_preempt_enable(int resched)
635{
636 if (resched)
637 preempt_enable_no_resched_notrace();
638 else
639 preempt_enable_notrace();
640}
641
642#ifdef CONFIG_BRANCH_TRACER
643extern int enable_branch_tracing(struct trace_array *tr);
644extern void disable_branch_tracing(void);
645static inline int trace_branch_enable(struct trace_array *tr)
646{
647 if (trace_flags & TRACE_ITER_BRANCH)
648 return enable_branch_tracing(tr);
649 return 0;
650}
651static inline void trace_branch_disable(void)
652{
653 /* due to races, always disable */
654 disable_branch_tracing();
655}
656#else
657static inline int trace_branch_enable(struct trace_array *tr)
658{
659 return 0;
660}
661static inline void trace_branch_disable(void)
662{
663}
664#endif /* CONFIG_BRANCH_TRACER */
665
422#endif /* _LINUX_KERNEL_TRACE_H */ 666#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..a4fa2c57e34e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,73 +13,117 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static void reset_boot_trace(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr;
39 43
40 trace_boot_enabled = 0; 44 tr->time_start = ftrace_now(tr->cpu);
45
46 for_each_online_cpu(cpu)
47 tracing_reset(tr, cpu);
48}
49
50static int boot_trace_init(struct trace_array *tr)
51{
52 int cpu;
53 boot_trace = tr;
41 54
42 for_each_cpu_mask(cpu, cpu_possible_map) 55 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu); 56 tracing_reset(tr, cpu);
57
58 tracing_sched_switch_assign_trace(tr);
59 return 0;
44} 60}
45 61
46static void boot_trace_ctrl_update(struct trace_array *tr) 62static enum print_line_t
63initcall_call_print_line(struct trace_iterator *iter)
47{ 64{
48 if (tr->ctrl) 65 struct trace_entry *entry = iter->ent;
49 start_boot_trace(); 66 struct trace_seq *s = &iter->seq;
67 struct trace_boot_call *field;
68 struct boot_trace_call *call;
69 u64 ts;
70 unsigned long nsec_rem;
71 int ret;
72
73 trace_assign_type(field, entry);
74 call = &field->boot_call;
75 ts = iter->ts;
76 nsec_rem = do_div(ts, 1000000000);
77
78 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
79 (unsigned long)ts, nsec_rem, call->func, call->caller);
80
81 if (!ret)
82 return TRACE_TYPE_PARTIAL_LINE;
50 else 83 else
51 stop_boot_trace(); 84 return TRACE_TYPE_HANDLED;
52} 85}
53 86
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 87static enum print_line_t
88initcall_ret_print_line(struct trace_iterator *iter)
55{ 89{
56 int ret;
57 struct trace_entry *entry = iter->ent; 90 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq; 91 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime); 92 struct trace_boot_ret *field;
62 struct timespec rettime = ktime_to_timespec(it->rettime); 93 struct boot_trace_ret *init_ret;
63 94 u64 ts;
64 if (entry->type == TRACE_BOOT) { 95 unsigned long nsec_rem;
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 96 int ret;
66 calltime.tv_sec, 97
67 calltime.tv_nsec, 98 trace_assign_type(field, entry);
68 it->func, it->caller); 99 init_ret = &field->boot_ret;
69 if (!ret) 100 ts = iter->ts;
70 return TRACE_TYPE_PARTIAL_LINE; 101 nsec_rem = do_div(ts, 1000000000);
71 102
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 103 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n", 104 "returned %d after %llu msecs\n",
74 rettime.tv_sec, 105 (unsigned long) ts,
75 rettime.tv_nsec, 106 nsec_rem,
76 it->func, it->result, it->duration); 107 init_ret->func, init_ret->result, init_ret->duration);
77 108
78 if (!ret) 109 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE; 110 return TRACE_TYPE_PARTIAL_LINE;
111 else
80 return TRACE_TYPE_HANDLED; 112 return TRACE_TYPE_HANDLED;
113}
114
115static enum print_line_t initcall_print_line(struct trace_iterator *iter)
116{
117 struct trace_entry *entry = iter->ent;
118
119 switch (entry->type) {
120 case TRACE_BOOT_CALL:
121 return initcall_call_print_line(iter);
122 case TRACE_BOOT_RET:
123 return initcall_ret_print_line(iter);
124 default:
125 return TRACE_TYPE_UNHANDLED;
81 } 126 }
82 return TRACE_TYPE_UNHANDLED;
83} 127}
84 128
85struct tracer boot_tracer __read_mostly = 129struct tracer boot_tracer __read_mostly =
@@ -87,27 +131,53 @@ struct tracer boot_tracer __read_mostly =
87 .name = "initcall", 131 .name = "initcall",
88 .init = boot_trace_init, 132 .init = boot_trace_init,
89 .reset = reset_boot_trace, 133 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 134 .print_line = initcall_print_line,
92}; 135};
93 136
94void trace_boot(struct boot_trace *it, initcall_t fn) 137void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
95{ 138{
96 struct ring_buffer_event *event; 139 struct ring_buffer_event *event;
97 struct trace_boot *entry; 140 struct trace_boot_call *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags; 141 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 142 struct trace_array *tr = boot_trace;
101 143
102 if (!trace_boot_enabled) 144 if (!pre_initcalls_finished)
103 return; 145 return;
104 146
105 /* Get its name now since this function could 147 /* Get its name now since this function could
106 * disappear because it is in the .init section. 148 * disappear because it is in the .init section.
107 */ 149 */
108 sprint_symbol(it->func, (unsigned long)fn); 150 sprint_symbol(bt->func, (unsigned long)fn);
151 preempt_disable();
152
153 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
154 &irq_flags);
155 if (!event)
156 goto out;
157 entry = ring_buffer_event_data(event);
158 tracing_generic_entry_update(&entry->ent, 0, 0);
159 entry->ent.type = TRACE_BOOT_CALL;
160 entry->boot_call = *bt;
161 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
162
163 trace_wake_up();
164
165 out:
166 preempt_enable();
167}
168
169void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
170{
171 struct ring_buffer_event *event;
172 struct trace_boot_ret *entry;
173 unsigned long irq_flags;
174 struct trace_array *tr = boot_trace;
175
176 if (!pre_initcalls_finished)
177 return;
178
179 sprint_symbol(bt->func, (unsigned long)fn);
109 preempt_disable(); 180 preempt_disable();
110 data = tr->data[smp_processor_id()];
111 181
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 182 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags); 183 &irq_flags);
@@ -115,8 +185,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
115 goto out; 185 goto out;
116 entry = ring_buffer_event_data(event); 186 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0); 187 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT; 188 entry->ent.type = TRACE_BOOT_RET;
119 entry->initcall = *it; 189 entry->boot_ret = *bt;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 190 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121 191
122 trace_wake_up(); 192 trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..6c00feb3bac7
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,342 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/irqflags.h>
10#include <linux/debugfs.h>
11#include <linux/uaccess.h>
12#include <linux/module.h>
13#include <linux/ftrace.h>
14#include <linux/hash.h>
15#include <linux/fs.h>
16#include <asm/local.h>
17#include "trace.h"
18
19#ifdef CONFIG_BRANCH_TRACER
20
21static int branch_tracing_enabled __read_mostly;
22static DEFINE_MUTEX(branch_tracing_mutex);
23static struct trace_array *branch_tracer;
24
25static void
26probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
27{
28 struct trace_array *tr = branch_tracer;
29 struct ring_buffer_event *event;
30 struct trace_branch *entry;
31 unsigned long flags, irq_flags;
32 int cpu, pc;
33 const char *p;
34
35 /*
36 * I would love to save just the ftrace_likely_data pointer, but
37 * this code can also be used by modules. Ugly things can happen
38 * if the module is unloaded, and then we go and read the
39 * pointer. This is slower, but much safer.
40 */
41
42 if (unlikely(!tr))
43 return;
44
45 local_irq_save(flags);
46 cpu = raw_smp_processor_id();
47 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
48 goto out;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
51 &irq_flags);
52 if (!event)
53 goto out;
54
55 pc = preempt_count();
56 entry = ring_buffer_event_data(event);
57 tracing_generic_entry_update(&entry->ent, flags, pc);
58 entry->ent.type = TRACE_BRANCH;
59
60 /* Strip off the path, only save the file */
61 p = f->file + strlen(f->file);
62 while (p >= f->file && *p != '/')
63 p--;
64 p++;
65
66 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
67 strncpy(entry->file, p, TRACE_FILE_SIZE);
68 entry->func[TRACE_FUNC_SIZE] = 0;
69 entry->file[TRACE_FILE_SIZE] = 0;
70 entry->line = f->line;
71 entry->correct = val == expect;
72
73 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
74
75 out:
76 atomic_dec(&tr->data[cpu]->disabled);
77 local_irq_restore(flags);
78}
79
80static inline
81void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
82{
83 if (!branch_tracing_enabled)
84 return;
85
86 probe_likely_condition(f, val, expect);
87}
88
89int enable_branch_tracing(struct trace_array *tr)
90{
91 int ret = 0;
92
93 mutex_lock(&branch_tracing_mutex);
94 branch_tracer = tr;
95 /*
96 * Must be seen before enabling. The reader is a condition
97 * where we do not need a matching rmb()
98 */
99 smp_wmb();
100 branch_tracing_enabled++;
101 mutex_unlock(&branch_tracing_mutex);
102
103 return ret;
104}
105
106void disable_branch_tracing(void)
107{
108 mutex_lock(&branch_tracing_mutex);
109
110 if (!branch_tracing_enabled)
111 goto out_unlock;
112
113 branch_tracing_enabled--;
114
115 out_unlock:
116 mutex_unlock(&branch_tracing_mutex);
117}
118
119static void start_branch_trace(struct trace_array *tr)
120{
121 enable_branch_tracing(tr);
122}
123
124static void stop_branch_trace(struct trace_array *tr)
125{
126 disable_branch_tracing();
127}
128
129static int branch_trace_init(struct trace_array *tr)
130{
131 int cpu;
132
133 for_each_online_cpu(cpu)
134 tracing_reset(tr, cpu);
135
136 start_branch_trace(tr);
137 return 0;
138}
139
140static void branch_trace_reset(struct trace_array *tr)
141{
142 stop_branch_trace(tr);
143}
144
145struct tracer branch_trace __read_mostly =
146{
147 .name = "branch",
148 .init = branch_trace_init,
149 .reset = branch_trace_reset,
150#ifdef CONFIG_FTRACE_SELFTEST
151 .selftest = trace_selftest_startup_branch,
152#endif
153};
154
155__init static int init_branch_trace(void)
156{
157 return register_tracer(&branch_trace);
158}
159
160device_initcall(init_branch_trace);
161#else
162static inline
163void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
164{
165}
166#endif /* CONFIG_BRANCH_TRACER */
167
168void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
169{
170 /*
171 * I would love to have a trace point here instead, but the
172 * trace point code is so inundated with unlikely and likely
173 * conditions that the recursive nightmare that exists is too
174 * much to try to get working. At least for now.
175 */
176 trace_likely_condition(f, val, expect);
177
178 /* FIXME: Make this atomic! */
179 if (val == expect)
180 f->correct++;
181 else
182 f->incorrect++;
183}
184EXPORT_SYMBOL(ftrace_likely_update);
185
186struct ftrace_pointer {
187 void *start;
188 void *stop;
189 int hit;
190};
191
192static void *
193t_next(struct seq_file *m, void *v, loff_t *pos)
194{
195 const struct ftrace_pointer *f = m->private;
196 struct ftrace_branch_data *p = v;
197
198 (*pos)++;
199
200 if (v == (void *)1)
201 return f->start;
202
203 ++p;
204
205 if ((void *)p >= (void *)f->stop)
206 return NULL;
207
208 return p;
209}
210
211static void *t_start(struct seq_file *m, loff_t *pos)
212{
213 void *t = (void *)1;
214 loff_t l = 0;
215
216 for (; t && l < *pos; t = t_next(m, t, &l))
217 ;
218
219 return t;
220}
221
222static void t_stop(struct seq_file *m, void *p)
223{
224}
225
226static int t_show(struct seq_file *m, void *v)
227{
228 const struct ftrace_pointer *fp = m->private;
229 struct ftrace_branch_data *p = v;
230 const char *f;
231 long percent;
232
233 if (v == (void *)1) {
234 if (fp->hit)
235 seq_printf(m, " miss hit %% ");
236 else
237 seq_printf(m, " correct incorrect %% ");
238 seq_printf(m, " Function "
239 " File Line\n"
240 " ------- --------- - "
241 " -------- "
242 " ---- ----\n");
243 return 0;
244 }
245
246 /* Only print the file, not the path */
247 f = p->file + strlen(p->file);
248 while (f >= p->file && *f != '/')
249 f--;
250 f++;
251
252 /*
253 * The miss is overlayed on correct, and hit on incorrect.
254 */
255 if (p->correct) {
256 percent = p->incorrect * 100;
257 percent /= p->correct + p->incorrect;
258 } else
259 percent = p->incorrect ? 100 : -1;
260
261 seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect);
262 if (percent < 0)
263 seq_printf(m, " X ");
264 else
265 seq_printf(m, "%3ld ", percent);
266 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
267 return 0;
268}
269
270static struct seq_operations tracing_likely_seq_ops = {
271 .start = t_start,
272 .next = t_next,
273 .stop = t_stop,
274 .show = t_show,
275};
276
277static int tracing_branch_open(struct inode *inode, struct file *file)
278{
279 int ret;
280
281 ret = seq_open(file, &tracing_likely_seq_ops);
282 if (!ret) {
283 struct seq_file *m = file->private_data;
284 m->private = (void *)inode->i_private;
285 }
286
287 return ret;
288}
289
290static const struct file_operations tracing_branch_fops = {
291 .open = tracing_branch_open,
292 .read = seq_read,
293 .llseek = seq_lseek,
294};
295
296#ifdef CONFIG_PROFILE_ALL_BRANCHES
297extern unsigned long __start_branch_profile[];
298extern unsigned long __stop_branch_profile[];
299
300static const struct ftrace_pointer ftrace_branch_pos = {
301 .start = __start_branch_profile,
302 .stop = __stop_branch_profile,
303 .hit = 1,
304};
305
306#endif /* CONFIG_PROFILE_ALL_BRANCHES */
307
308extern unsigned long __start_annotated_branch_profile[];
309extern unsigned long __stop_annotated_branch_profile[];
310
311static const struct ftrace_pointer ftrace_annotated_branch_pos = {
312 .start = __start_annotated_branch_profile,
313 .stop = __stop_annotated_branch_profile,
314};
315
316static __init int ftrace_branch_init(void)
317{
318 struct dentry *d_tracer;
319 struct dentry *entry;
320
321 d_tracer = tracing_init_dentry();
322
323 entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
324 (void *)&ftrace_annotated_branch_pos,
325 &tracing_branch_fops);
326 if (!entry)
327 pr_warning("Could not create debugfs "
328 "'profile_annotatet_branch' entry\n");
329
330#ifdef CONFIG_PROFILE_ALL_BRANCHES
331 entry = debugfs_create_file("profile_branch", 0444, d_tracer,
332 (void *)&ftrace_branch_pos,
333 &tracing_branch_fops);
334 if (!entry)
335 pr_warning("Could not create debugfs"
336 " 'profile_branch' entry\n");
337#endif
338
339 return 0;
340}
341
342device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..e74f6d0a3216 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,24 +42,20 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
43} 43}
44 44
45static void function_trace_init(struct trace_array *tr) 45static int function_trace_init(struct trace_array *tr)
46{ 46{
47 if (tr->ctrl) 47 start_function_trace(tr);
48 start_function_trace(tr); 48 return 0;
49} 49}
50 50
51static void function_trace_reset(struct trace_array *tr) 51static void function_trace_reset(struct trace_array *tr)
52{ 52{
53 if (tr->ctrl) 53 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 54}
56 55
57static void function_trace_ctrl_update(struct trace_array *tr) 56static void function_trace_start(struct trace_array *tr)
58{ 57{
59 if (tr->ctrl) 58 function_reset(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 59}
64 60
65static struct tracer function_trace __read_mostly = 61static struct tracer function_trace __read_mostly =
@@ -67,7 +63,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 63 .name = "function",
68 .init = function_trace_init, 64 .init = function_trace_init,
69 .reset = function_trace_reset, 65 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 66 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 67#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 68 .selftest = trace_selftest_startup_function,
73#endif 69#endif
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
new file mode 100644
index 000000000000..4bf39fcae97a
--- /dev/null
+++ b/kernel/trace/trace_functions_graph.c
@@ -0,0 +1,669 @@
1/*
2 *
3 * Function graph tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16#define TRACE_GRAPH_INDENT 2
17
18/* Flag options */
19#define TRACE_GRAPH_PRINT_OVERRUN 0x1
20#define TRACE_GRAPH_PRINT_CPU 0x2
21#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
22#define TRACE_GRAPH_PRINT_PROC 0x8
23
24static struct tracer_opt trace_opts[] = {
25 /* Display overruns ? */
26 { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
27 /* Display CPU ? */
28 { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
29 /* Display Overhead ? */
30 { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
31 /* Display proc name/pid */
32 { TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
33 { } /* Empty entry */
34};
35
36static struct tracer_flags tracer_flags = {
37 /* Don't display overruns and proc by default */
38 .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
39 .opts = trace_opts
40};
41
42/* pid on the last trace processed */
43static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
44
45static int graph_trace_init(struct trace_array *tr)
46{
47 int cpu, ret;
48
49 for_each_online_cpu(cpu)
50 tracing_reset(tr, cpu);
51
52 ret = register_ftrace_graph(&trace_graph_return,
53 &trace_graph_entry);
54 if (ret)
55 return ret;
56 tracing_start_cmdline_record();
57
58 return 0;
59}
60
61static void graph_trace_reset(struct trace_array *tr)
62{
63 tracing_stop_cmdline_record();
64 unregister_ftrace_graph();
65}
66
67static inline int log10_cpu(int nb)
68{
69 if (nb / 100)
70 return 3;
71 if (nb / 10)
72 return 2;
73 return 1;
74}
75
76static enum print_line_t
77print_graph_cpu(struct trace_seq *s, int cpu)
78{
79 int i;
80 int ret;
81 int log10_this = log10_cpu(cpu);
82 int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
83
84
85 /*
86 * Start with a space character - to make it stand out
87 * to the right a bit when trace output is pasted into
88 * email:
89 */
90 ret = trace_seq_printf(s, " ");
91
92 /*
93 * Tricky - we space the CPU field according to the max
94 * number of online CPUs. On a 2-cpu system it would take
95 * a maximum of 1 digit - on a 128 cpu system it would
96 * take up to 3 digits:
97 */
98 for (i = 0; i < log10_all - log10_this; i++) {
99 ret = trace_seq_printf(s, " ");
100 if (!ret)
101 return TRACE_TYPE_PARTIAL_LINE;
102 }
103 ret = trace_seq_printf(s, "%d) ", cpu);
104 if (!ret)
105 return TRACE_TYPE_PARTIAL_LINE;
106
107 return TRACE_TYPE_HANDLED;
108}
109
110#define TRACE_GRAPH_PROCINFO_LENGTH 14
111
112static enum print_line_t
113print_graph_proc(struct trace_seq *s, pid_t pid)
114{
115 int i;
116 int ret;
117 int len;
118 char comm[8];
119 int spaces = 0;
120 /* sign + log10(MAX_INT) + '\0' */
121 char pid_str[11];
122
123 strncpy(comm, trace_find_cmdline(pid), 7);
124 comm[7] = '\0';
125 sprintf(pid_str, "%d", pid);
126
127 /* 1 stands for the "-" character */
128 len = strlen(comm) + strlen(pid_str) + 1;
129
130 if (len < TRACE_GRAPH_PROCINFO_LENGTH)
131 spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;
132
133 /* First spaces to align center */
134 for (i = 0; i < spaces / 2; i++) {
135 ret = trace_seq_printf(s, " ");
136 if (!ret)
137 return TRACE_TYPE_PARTIAL_LINE;
138 }
139
140 ret = trace_seq_printf(s, "%s-%s", comm, pid_str);
141 if (!ret)
142 return TRACE_TYPE_PARTIAL_LINE;
143
144 /* Last spaces to align center */
145 for (i = 0; i < spaces - (spaces / 2); i++) {
146 ret = trace_seq_printf(s, " ");
147 if (!ret)
148 return TRACE_TYPE_PARTIAL_LINE;
149 }
150 return TRACE_TYPE_HANDLED;
151}
152
153
154/* If the pid changed since the last trace, output this event */
155static enum print_line_t
156verif_pid(struct trace_seq *s, pid_t pid, int cpu)
157{
158 pid_t prev_pid;
159 int ret;
160
161 if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
162 return TRACE_TYPE_HANDLED;
163
164 prev_pid = last_pid[cpu];
165 last_pid[cpu] = pid;
166
167/*
168 * Context-switch trace line:
169
170 ------------------------------------------
171 | 1) migration/0--1 => sshd-1755
172 ------------------------------------------
173
174 */
175 ret = trace_seq_printf(s,
176 " ------------------------------------------\n");
177 if (!ret)
178 TRACE_TYPE_PARTIAL_LINE;
179
180 ret = print_graph_cpu(s, cpu);
181 if (ret == TRACE_TYPE_PARTIAL_LINE)
182 TRACE_TYPE_PARTIAL_LINE;
183
184 ret = print_graph_proc(s, prev_pid);
185 if (ret == TRACE_TYPE_PARTIAL_LINE)
186 TRACE_TYPE_PARTIAL_LINE;
187
188 ret = trace_seq_printf(s, " => ");
189 if (!ret)
190 TRACE_TYPE_PARTIAL_LINE;
191
192 ret = print_graph_proc(s, pid);
193 if (ret == TRACE_TYPE_PARTIAL_LINE)
194 TRACE_TYPE_PARTIAL_LINE;
195
196 ret = trace_seq_printf(s,
197 "\n ------------------------------------------\n\n");
198 if (!ret)
199 TRACE_TYPE_PARTIAL_LINE;
200
201 return ret;
202}
203
204static bool
205trace_branch_is_leaf(struct trace_iterator *iter,
206 struct ftrace_graph_ent_entry *curr)
207{
208 struct ring_buffer_iter *ring_iter;
209 struct ring_buffer_event *event;
210 struct ftrace_graph_ret_entry *next;
211
212 ring_iter = iter->buffer_iter[iter->cpu];
213
214 if (!ring_iter)
215 return false;
216
217 event = ring_buffer_iter_peek(ring_iter, NULL);
218
219 if (!event)
220 return false;
221
222 next = ring_buffer_event_data(event);
223
224 if (next->ent.type != TRACE_GRAPH_RET)
225 return false;
226
227 if (curr->ent.pid != next->ent.pid ||
228 curr->graph_ent.func != next->ret.func)
229 return false;
230
231 return true;
232}
233
234static enum print_line_t
235print_graph_irq(struct trace_seq *s, unsigned long addr,
236 enum trace_type type, int cpu, pid_t pid)
237{
238 int ret;
239
240 if (addr < (unsigned long)__irqentry_text_start ||
241 addr >= (unsigned long)__irqentry_text_end)
242 return TRACE_TYPE_UNHANDLED;
243
244 if (type == TRACE_GRAPH_ENT) {
245 ret = trace_seq_printf(s, "==========> | ");
246 } else {
247 /* Cpu */
248 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
249 ret = print_graph_cpu(s, cpu);
250 if (ret == TRACE_TYPE_PARTIAL_LINE)
251 return TRACE_TYPE_PARTIAL_LINE;
252 }
253 /* Proc */
254 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
255 ret = print_graph_proc(s, pid);
256 if (ret == TRACE_TYPE_PARTIAL_LINE)
257 return TRACE_TYPE_PARTIAL_LINE;
258
259 ret = trace_seq_printf(s, " | ");
260 if (!ret)
261 return TRACE_TYPE_PARTIAL_LINE;
262 }
263
264 /* No overhead */
265 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
266 ret = trace_seq_printf(s, " ");
267 if (!ret)
268 return TRACE_TYPE_PARTIAL_LINE;
269 }
270
271 ret = trace_seq_printf(s, "<========== |\n");
272 }
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275 return TRACE_TYPE_HANDLED;
276}
277
278static enum print_line_t
279print_graph_duration(unsigned long long duration, struct trace_seq *s)
280{
281 unsigned long nsecs_rem = do_div(duration, 1000);
282 /* log10(ULONG_MAX) + '\0' */
283 char msecs_str[21];
284 char nsecs_str[5];
285 int ret, len;
286 int i;
287
288 sprintf(msecs_str, "%lu", (unsigned long) duration);
289
290 /* Print msecs */
291 ret = trace_seq_printf(s, msecs_str);
292 if (!ret)
293 return TRACE_TYPE_PARTIAL_LINE;
294
295 len = strlen(msecs_str);
296
297 /* Print nsecs (we don't want to exceed 7 numbers) */
298 if (len < 7) {
299 snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem);
300 ret = trace_seq_printf(s, ".%s", nsecs_str);
301 if (!ret)
302 return TRACE_TYPE_PARTIAL_LINE;
303 len += strlen(nsecs_str);
304 }
305
306 ret = trace_seq_printf(s, " us ");
307 if (!ret)
308 return TRACE_TYPE_PARTIAL_LINE;
309
310 /* Print remaining spaces to fit the row's width */
311 for (i = len; i < 7; i++) {
312 ret = trace_seq_printf(s, " ");
313 if (!ret)
314 return TRACE_TYPE_PARTIAL_LINE;
315 }
316
317 ret = trace_seq_printf(s, "| ");
318 if (!ret)
319 return TRACE_TYPE_PARTIAL_LINE;
320 return TRACE_TYPE_HANDLED;
321
322}
323
324/* Signal a overhead of time execution to the output */
325static int
326print_graph_overhead(unsigned long long duration, struct trace_seq *s)
327{
328 /* Duration exceeded 100 msecs */
329 if (duration > 100000ULL)
330 return trace_seq_printf(s, "! ");
331
332 /* Duration exceeded 10 msecs */
333 if (duration > 10000ULL)
334 return trace_seq_printf(s, "+ ");
335
336 return trace_seq_printf(s, " ");
337}
338
339/* Case of a leaf function on its call entry */
340static enum print_line_t
341print_graph_entry_leaf(struct trace_iterator *iter,
342 struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
343{
344 struct ftrace_graph_ret_entry *ret_entry;
345 struct ftrace_graph_ret *graph_ret;
346 struct ring_buffer_event *event;
347 struct ftrace_graph_ent *call;
348 unsigned long long duration;
349 int ret;
350 int i;
351
352 event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
353 ret_entry = ring_buffer_event_data(event);
354 graph_ret = &ret_entry->ret;
355 call = &entry->graph_ent;
356 duration = graph_ret->rettime - graph_ret->calltime;
357
358 /* Overhead */
359 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
360 ret = print_graph_overhead(duration, s);
361 if (!ret)
362 return TRACE_TYPE_PARTIAL_LINE;
363 }
364
365 /* Duration */
366 ret = print_graph_duration(duration, s);
367 if (ret == TRACE_TYPE_PARTIAL_LINE)
368 return TRACE_TYPE_PARTIAL_LINE;
369
370 /* Function */
371 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
372 ret = trace_seq_printf(s, " ");
373 if (!ret)
374 return TRACE_TYPE_PARTIAL_LINE;
375 }
376
377 ret = seq_print_ip_sym(s, call->func, 0);
378 if (!ret)
379 return TRACE_TYPE_PARTIAL_LINE;
380
381 ret = trace_seq_printf(s, "();\n");
382 if (!ret)
383 return TRACE_TYPE_PARTIAL_LINE;
384
385 return TRACE_TYPE_HANDLED;
386}
387
388static enum print_line_t
389print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
390 struct trace_seq *s, pid_t pid, int cpu)
391{
392 int i;
393 int ret;
394 struct ftrace_graph_ent *call = &entry->graph_ent;
395
396 /* No overhead */
397 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
398 ret = trace_seq_printf(s, " ");
399 if (!ret)
400 return TRACE_TYPE_PARTIAL_LINE;
401 }
402
403 /* Interrupt */
404 ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
405 if (ret == TRACE_TYPE_UNHANDLED) {
406 /* No time */
407 ret = trace_seq_printf(s, " | ");
408 if (!ret)
409 return TRACE_TYPE_PARTIAL_LINE;
410 } else {
411 if (ret == TRACE_TYPE_PARTIAL_LINE)
412 return TRACE_TYPE_PARTIAL_LINE;
413 }
414
415
416 /* Function */
417 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
418 ret = trace_seq_printf(s, " ");
419 if (!ret)
420 return TRACE_TYPE_PARTIAL_LINE;
421 }
422
423 ret = seq_print_ip_sym(s, call->func, 0);
424 if (!ret)
425 return TRACE_TYPE_PARTIAL_LINE;
426
427 ret = trace_seq_printf(s, "() {\n");
428 if (!ret)
429 return TRACE_TYPE_PARTIAL_LINE;
430
431 return TRACE_TYPE_HANDLED;
432}
433
434static enum print_line_t
435print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
436 struct trace_iterator *iter, int cpu)
437{
438 int ret;
439 struct trace_entry *ent = iter->ent;
440
441 /* Pid */
442 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
443 return TRACE_TYPE_PARTIAL_LINE;
444
445 /* Cpu */
446 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
447 ret = print_graph_cpu(s, cpu);
448 if (ret == TRACE_TYPE_PARTIAL_LINE)
449 return TRACE_TYPE_PARTIAL_LINE;
450 }
451
452 /* Proc */
453 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
454 ret = print_graph_proc(s, ent->pid);
455 if (ret == TRACE_TYPE_PARTIAL_LINE)
456 return TRACE_TYPE_PARTIAL_LINE;
457
458 ret = trace_seq_printf(s, " | ");
459 if (!ret)
460 return TRACE_TYPE_PARTIAL_LINE;
461 }
462
463 if (trace_branch_is_leaf(iter, field))
464 return print_graph_entry_leaf(iter, field, s);
465 else
466 return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
467
468}
469
470static enum print_line_t
471print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
472 struct trace_entry *ent, int cpu)
473{
474 int i;
475 int ret;
476 unsigned long long duration = trace->rettime - trace->calltime;
477
478 /* Pid */
479 if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
480 return TRACE_TYPE_PARTIAL_LINE;
481
482 /* Cpu */
483 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
484 ret = print_graph_cpu(s, cpu);
485 if (ret == TRACE_TYPE_PARTIAL_LINE)
486 return TRACE_TYPE_PARTIAL_LINE;
487 }
488
489 /* Proc */
490 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
491 ret = print_graph_proc(s, ent->pid);
492 if (ret == TRACE_TYPE_PARTIAL_LINE)
493 return TRACE_TYPE_PARTIAL_LINE;
494
495 ret = trace_seq_printf(s, " | ");
496 if (!ret)
497 return TRACE_TYPE_PARTIAL_LINE;
498 }
499
500 /* Overhead */
501 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
502 ret = print_graph_overhead(duration, s);
503 if (!ret)
504 return TRACE_TYPE_PARTIAL_LINE;
505 }
506
507 /* Duration */
508 ret = print_graph_duration(duration, s);
509 if (ret == TRACE_TYPE_PARTIAL_LINE)
510 return TRACE_TYPE_PARTIAL_LINE;
511
512 /* Closing brace */
513 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
514 ret = trace_seq_printf(s, " ");
515 if (!ret)
516 return TRACE_TYPE_PARTIAL_LINE;
517 }
518
519 ret = trace_seq_printf(s, "}\n");
520 if (!ret)
521 return TRACE_TYPE_PARTIAL_LINE;
522
523 /* Overrun */
524 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) {
525 ret = trace_seq_printf(s, " (Overruns: %lu)\n",
526 trace->overrun);
527 if (!ret)
528 return TRACE_TYPE_PARTIAL_LINE;
529 }
530
531 ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
532 if (ret == TRACE_TYPE_PARTIAL_LINE)
533 return TRACE_TYPE_PARTIAL_LINE;
534
535 return TRACE_TYPE_HANDLED;
536}
537
538static enum print_line_t
539print_graph_comment(struct print_entry *trace, struct trace_seq *s,
540 struct trace_entry *ent, struct trace_iterator *iter)
541{
542 int i;
543 int ret;
544
545 /* Pid */
546 if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
547 return TRACE_TYPE_PARTIAL_LINE;
548
549 /* Cpu */
550 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
551 ret = print_graph_cpu(s, iter->cpu);
552 if (ret == TRACE_TYPE_PARTIAL_LINE)
553 return TRACE_TYPE_PARTIAL_LINE;
554 }
555
556 /* Proc */
557 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
558 ret = print_graph_proc(s, ent->pid);
559 if (ret == TRACE_TYPE_PARTIAL_LINE)
560 return TRACE_TYPE_PARTIAL_LINE;
561
562 ret = trace_seq_printf(s, " | ");
563 if (!ret)
564 return TRACE_TYPE_PARTIAL_LINE;
565 }
566
567 /* No overhead */
568 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
569 ret = trace_seq_printf(s, " ");
570 if (!ret)
571 return TRACE_TYPE_PARTIAL_LINE;
572 }
573
574 /* No time */
575 ret = trace_seq_printf(s, " | ");
576 if (!ret)
577 return TRACE_TYPE_PARTIAL_LINE;
578
579 /* Indentation */
580 if (trace->depth > 0)
581 for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
582 ret = trace_seq_printf(s, " ");
583 if (!ret)
584 return TRACE_TYPE_PARTIAL_LINE;
585 }
586
587 /* The comment */
588 ret = trace_seq_printf(s, "/* %s", trace->buf);
589 if (!ret)
590 return TRACE_TYPE_PARTIAL_LINE;
591
592 if (ent->flags & TRACE_FLAG_CONT)
593 trace_seq_print_cont(s, iter);
594
595 ret = trace_seq_printf(s, " */\n");
596 if (!ret)
597 return TRACE_TYPE_PARTIAL_LINE;
598
599 return TRACE_TYPE_HANDLED;
600}
601
602
603enum print_line_t
604print_graph_function(struct trace_iterator *iter)
605{
606 struct trace_seq *s = &iter->seq;
607 struct trace_entry *entry = iter->ent;
608
609 switch (entry->type) {
610 case TRACE_GRAPH_ENT: {
611 struct ftrace_graph_ent_entry *field;
612 trace_assign_type(field, entry);
613 return print_graph_entry(field, s, iter,
614 iter->cpu);
615 }
616 case TRACE_GRAPH_RET: {
617 struct ftrace_graph_ret_entry *field;
618 trace_assign_type(field, entry);
619 return print_graph_return(&field->ret, s, entry, iter->cpu);
620 }
621 case TRACE_PRINT: {
622 struct print_entry *field;
623 trace_assign_type(field, entry);
624 return print_graph_comment(field, s, entry, iter);
625 }
626 default:
627 return TRACE_TYPE_UNHANDLED;
628 }
629}
630
631static void print_graph_headers(struct seq_file *s)
632{
633 /* 1st line */
634 seq_printf(s, "# ");
635 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
636 seq_printf(s, "CPU ");
637 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
638 seq_printf(s, "TASK/PID ");
639 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
640 seq_printf(s, "OVERHEAD/");
641 seq_printf(s, "DURATION FUNCTION CALLS\n");
642
643 /* 2nd line */
644 seq_printf(s, "# ");
645 if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
646 seq_printf(s, "| ");
647 if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
648 seq_printf(s, "| | ");
649 if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
650 seq_printf(s, "| ");
651 seq_printf(s, "| | | | |\n");
652 } else
653 seq_printf(s, " | | | | |\n");
654}
655static struct tracer graph_trace __read_mostly = {
656 .name = "function_graph",
657 .init = graph_trace_init,
658 .reset = graph_trace_reset,
659 .print_line = print_graph_function,
660 .print_header = print_graph_headers,
661 .flags = &tracer_flags,
662};
663
664static __init int init_graph_trace(void)
665{
666 return register_tracer(&graph_trace);
667}
668
669device_initcall(init_graph_trace);
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
new file mode 100644
index 000000000000..ee29e012aa97
--- /dev/null
+++ b/kernel/trace/trace_hw_branches.c
@@ -0,0 +1,205 @@
1/*
2 * h/w branch tracer for x86 based on bts
3 *
4 * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/kallsyms.h>
13
14#include <asm/ds.h>
15
16#include "trace.h"
17
18
19#define SIZEOF_BTS (1 << 13)
20
21static DEFINE_PER_CPU(struct bts_tracer *, tracer);
22static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
23
24#define this_tracer per_cpu(tracer, smp_processor_id())
25#define this_buffer per_cpu(buffer, smp_processor_id())
26
27
28static void bts_trace_reset(struct trace_array *tr)
29{
30 int cpu;
31
32 tr->time_start = ftrace_now(tr->cpu);
33
34 for_each_online_cpu(cpu)
35 tracing_reset(tr, cpu);
36}
37
38static void bts_trace_start_cpu(void *arg)
39{
40 if (this_tracer)
41 ds_release_bts(this_tracer);
42
43 this_tracer =
44 ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
45 /* ovfl = */ NULL, /* th = */ (size_t)-1,
46 BTS_KERNEL);
47 if (IS_ERR(this_tracer)) {
48 this_tracer = NULL;
49 return;
50 }
51}
52
53static void bts_trace_start(struct trace_array *tr)
54{
55 int cpu;
56
57 bts_trace_reset(tr);
58
59 for_each_cpu_mask(cpu, cpu_possible_map)
60 smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
61}
62
63static void bts_trace_stop_cpu(void *arg)
64{
65 if (this_tracer) {
66 ds_release_bts(this_tracer);
67 this_tracer = NULL;
68 }
69}
70
71static void bts_trace_stop(struct trace_array *tr)
72{
73 int cpu;
74
75 for_each_cpu_mask(cpu, cpu_possible_map)
76 smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
77}
78
79static int bts_trace_init(struct trace_array *tr)
80{
81 bts_trace_reset(tr);
82 bts_trace_start(tr);
83
84 return 0;
85}
86
87static void bts_trace_print_header(struct seq_file *m)
88{
89 seq_puts(m,
90 "# CPU# FROM TO FUNCTION\n");
91 seq_puts(m,
92 "# | | | |\n");
93}
94
95static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
96{
97 struct trace_entry *entry = iter->ent;
98 struct trace_seq *seq = &iter->seq;
99 struct hw_branch_entry *it;
100
101 trace_assign_type(it, entry);
102
103 if (entry->type == TRACE_HW_BRANCHES) {
104 if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
105 trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
106 it->from, it->to) &&
107 (!it->from ||
108 seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
109 trace_seq_printf(seq, "\n"))
110 return TRACE_TYPE_HANDLED;
111 return TRACE_TYPE_PARTIAL_LINE;;
112 }
113 return TRACE_TYPE_UNHANDLED;
114}
115
116void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
117{
118 struct ring_buffer_event *event;
119 struct hw_branch_entry *entry;
120 unsigned long irq;
121
122 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
123 if (!event)
124 return;
125 entry = ring_buffer_event_data(event);
126 tracing_generic_entry_update(&entry->ent, 0, from);
127 entry->ent.type = TRACE_HW_BRANCHES;
128 entry->ent.cpu = smp_processor_id();
129 entry->from = from;
130 entry->to = to;
131 ring_buffer_unlock_commit(tr->buffer, event, irq);
132}
133
134static void trace_bts_at(struct trace_array *tr,
135 const struct bts_trace *trace, void *at)
136{
137 struct bts_struct bts;
138 int err = 0;
139
140 WARN_ON_ONCE(!trace->read);
141 if (!trace->read)
142 return;
143
144 err = trace->read(this_tracer, at, &bts);
145 if (err < 0)
146 return;
147
148 switch (bts.qualifier) {
149 case BTS_BRANCH:
150 trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
151 break;
152 }
153}
154
155static void trace_bts_cpu(void *arg)
156{
157 struct trace_array *tr = (struct trace_array *) arg;
158 const struct bts_trace *trace;
159 unsigned char *at;
160
161 if (!this_tracer)
162 return;
163
164 ds_suspend_bts(this_tracer);
165 trace = ds_read_bts(this_tracer);
166 if (!trace)
167 goto out;
168
169 for (at = trace->ds.top; (void *)at < trace->ds.end;
170 at += trace->ds.size)
171 trace_bts_at(tr, trace, at);
172
173 for (at = trace->ds.begin; (void *)at < trace->ds.top;
174 at += trace->ds.size)
175 trace_bts_at(tr, trace, at);
176
177out:
178 ds_resume_bts(this_tracer);
179}
180
181static void trace_bts_prepare(struct trace_iterator *iter)
182{
183 int cpu;
184
185 for_each_cpu_mask(cpu, cpu_possible_map)
186 smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
187}
188
189struct tracer bts_tracer __read_mostly =
190{
191 .name = "hw-branch-tracer",
192 .init = bts_trace_init,
193 .reset = bts_trace_stop,
194 .print_header = bts_trace_print_header,
195 .print_line = bts_trace_print_line,
196 .start = bts_trace_start,
197 .stop = bts_trace_stop,
198 .open = trace_bts_prepare
199};
200
201__init static int init_bts_trace(void)
202{
203 return register_tracer(&bts_tracer);
204}
205device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
406static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
407{ 420{
408 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
409 422
410 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
411} 425}
412static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
413{ 427{
414 .name = "irqsoff", 428 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 435 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -428,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
428#endif 443#endif
429 444
430#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
431static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
432{ 447{
433 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
434 449
435 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
436} 452}
437 453
438static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -440,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 456 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 463 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -456,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
456#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
457 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
458 475
459static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
460{ 477{
461 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
462 479
463 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
464} 482}
465 483
466static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -468,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 493 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index e62cbf78eab6..2fb6da6523b3 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -32,34 +32,29 @@ static void mmio_reset_data(struct trace_array *tr)
32 tracing_reset(tr, cpu); 32 tracing_reset(tr, cpu);
33} 33}
34 34
35static void mmio_trace_init(struct trace_array *tr) 35static int mmio_trace_init(struct trace_array *tr)
36{ 36{
37 pr_debug("in %s\n", __func__); 37 pr_debug("in %s\n", __func__);
38 mmio_trace_array = tr; 38 mmio_trace_array = tr;
39 if (tr->ctrl) { 39
40 mmio_reset_data(tr); 40 mmio_reset_data(tr);
41 enable_mmiotrace(); 41 enable_mmiotrace();
42 } 42 return 0;
43} 43}
44 44
45static void mmio_trace_reset(struct trace_array *tr) 45static void mmio_trace_reset(struct trace_array *tr)
46{ 46{
47 pr_debug("in %s\n", __func__); 47 pr_debug("in %s\n", __func__);
48 if (tr->ctrl) 48
49 disable_mmiotrace(); 49 disable_mmiotrace();
50 mmio_reset_data(tr); 50 mmio_reset_data(tr);
51 mmio_trace_array = NULL; 51 mmio_trace_array = NULL;
52} 52}
53 53
54static void mmio_trace_ctrl_update(struct trace_array *tr) 54static void mmio_trace_start(struct trace_array *tr)
55{ 55{
56 pr_debug("in %s\n", __func__); 56 pr_debug("in %s\n", __func__);
57 if (tr->ctrl) { 57 mmio_reset_data(tr);
58 mmio_reset_data(tr);
59 enable_mmiotrace();
60 } else {
61 disable_mmiotrace();
62 }
63} 58}
64 59
65static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 60static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -296,10 +291,10 @@ static struct tracer mmio_tracer __read_mostly =
296 .name = "mmiotrace", 291 .name = "mmiotrace",
297 .init = mmio_trace_init, 292 .init = mmio_trace_init,
298 .reset = mmio_trace_reset, 293 .reset = mmio_trace_reset,
294 .start = mmio_trace_start,
299 .pipe_open = mmio_pipe_open, 295 .pipe_open = mmio_pipe_open,
300 .close = mmio_close, 296 .close = mmio_close,
301 .read = mmio_read, 297 .read = mmio_read,
302 .ctrl_update = mmio_trace_ctrl_update,
303 .print_line = mmio_print_line, 298 .print_line = mmio_print_line,
304}; 299};
305 300
@@ -371,5 +366,5 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
371 366
372int mmio_trace_printk(const char *fmt, va_list args) 367int mmio_trace_printk(const char *fmt, va_list args)
373{ 368{
374 return trace_vprintk(0, fmt, args); 369 return trace_vprintk(0, -1, fmt, args);
375} 370}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..b9767acd30ac 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -12,6 +12,27 @@
12 12
13#include "trace.h" 13#include "trace.h"
14 14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
15static struct trace_array *ctx_trace; 36static struct trace_array *ctx_trace;
16 37
17static void start_nop_trace(struct trace_array *tr) 38static void start_nop_trace(struct trace_array *tr)
@@ -24,7 +45,7 @@ static void stop_nop_trace(struct trace_array *tr)
24 /* Nothing to do! */ 45 /* Nothing to do! */
25} 46}
26 47
27static void nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
28{ 49{
29 int cpu; 50 int cpu;
30 ctx_trace = tr; 51 ctx_trace = tr;
@@ -32,33 +53,53 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 53 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 54 tracing_reset(tr, cpu);
34 55
35 if (tr->ctrl) 56 start_nop_trace(tr);
36 start_nop_trace(tr); 57 return 0;
37} 58}
38 59
39static void nop_trace_reset(struct trace_array *tr) 60static void nop_trace_reset(struct trace_array *tr)
40{ 61{
41 if (tr->ctrl) 62 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43} 63}
44 64
45static void nop_trace_ctrl_update(struct trace_array *tr) 65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
46{ 71{
47 /* When starting a new trace, reset the buffers */ 72 /*
48 if (tr->ctrl) 73 * Note that you don't need to update nop_flags.val yourself.
49 start_nop_trace(tr); 74 * The tracing Api will do it automatically if you return 0
50 else 75 */
51 stop_nop_trace(tr); 76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
52} 91}
53 92
93
54struct tracer nop_trace __read_mostly = 94struct tracer nop_trace __read_mostly =
55{ 95{
56 .name = "nop", 96 .name = "nop",
57 .init = nop_trace_init, 97 .init = nop_trace_init,
58 .reset = nop_trace_reset, 98 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 99#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 100 .selftest = trace_selftest_startup_nop,
62#endif 101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
63}; 104};
64 105
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
new file mode 100644
index 000000000000..a7172a352f62
--- /dev/null
+++ b/kernel/trace/trace_power.c
@@ -0,0 +1,179 @@
1/*
2 * ring buffer based C-state tracer
3 *
4 * Arjan van de Ven <arjan@linux.intel.com>
5 * Copyright (C) 2008 Intel Corporation
6 *
7 * Much is borrowed from trace_boot.c which is
8 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
9 *
10 */
11
12#include <linux/init.h>
13#include <linux/debugfs.h>
14#include <linux/ftrace.h>
15#include <linux/kallsyms.h>
16#include <linux/module.h>
17
18#include "trace.h"
19
20static struct trace_array *power_trace;
21static int __read_mostly trace_power_enabled;
22
23
24static void start_power_trace(struct trace_array *tr)
25{
26 trace_power_enabled = 1;
27}
28
29static void stop_power_trace(struct trace_array *tr)
30{
31 trace_power_enabled = 0;
32}
33
34
35static int power_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 power_trace = tr;
39
40 trace_power_enabled = 1;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44 return 0;
45}
46
47static enum print_line_t power_print_line(struct trace_iterator *iter)
48{
49 int ret = 0;
50 struct trace_entry *entry = iter->ent;
51 struct trace_power *field ;
52 struct power_trace *it;
53 struct trace_seq *s = &iter->seq;
54 struct timespec stamp;
55 struct timespec duration;
56
57 trace_assign_type(field, entry);
58 it = &field->state_data;
59 stamp = ktime_to_timespec(it->stamp);
60 duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
61
62 if (entry->type == TRACE_POWER) {
63 if (it->type == POWER_CSTATE)
64 ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
65 stamp.tv_sec,
66 stamp.tv_nsec,
67 it->state, iter->cpu,
68 duration.tv_sec,
69 duration.tv_nsec);
70 if (it->type == POWER_PSTATE)
71 ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
72 stamp.tv_sec,
73 stamp.tv_nsec,
74 it->state, iter->cpu);
75 if (!ret)
76 return TRACE_TYPE_PARTIAL_LINE;
77 return TRACE_TYPE_HANDLED;
78 }
79 return TRACE_TYPE_UNHANDLED;
80}
81
82static struct tracer power_tracer __read_mostly =
83{
84 .name = "power",
85 .init = power_trace_init,
86 .start = start_power_trace,
87 .stop = stop_power_trace,
88 .reset = stop_power_trace,
89 .print_line = power_print_line,
90};
91
92static int init_power_trace(void)
93{
94 return register_tracer(&power_tracer);
95}
96device_initcall(init_power_trace);
97
98void trace_power_start(struct power_trace *it, unsigned int type,
99 unsigned int level)
100{
101 if (!trace_power_enabled)
102 return;
103
104 memset(it, 0, sizeof(struct power_trace));
105 it->state = level;
106 it->type = type;
107 it->stamp = ktime_get();
108}
109EXPORT_SYMBOL_GPL(trace_power_start);
110
111
112void trace_power_end(struct power_trace *it)
113{
114 struct ring_buffer_event *event;
115 struct trace_power *entry;
116 struct trace_array_cpu *data;
117 unsigned long irq_flags;
118 struct trace_array *tr = power_trace;
119
120 if (!trace_power_enabled)
121 return;
122
123 preempt_disable();
124 it->end = ktime_get();
125 data = tr->data[smp_processor_id()];
126
127 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
128 &irq_flags);
129 if (!event)
130 goto out;
131 entry = ring_buffer_event_data(event);
132 tracing_generic_entry_update(&entry->ent, 0, 0);
133 entry->ent.type = TRACE_POWER;
134 entry->state_data = *it;
135 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
136
137 trace_wake_up();
138
139 out:
140 preempt_enable();
141}
142EXPORT_SYMBOL_GPL(trace_power_end);
143
144void trace_power_mark(struct power_trace *it, unsigned int type,
145 unsigned int level)
146{
147 struct ring_buffer_event *event;
148 struct trace_power *entry;
149 struct trace_array_cpu *data;
150 unsigned long irq_flags;
151 struct trace_array *tr = power_trace;
152
153 if (!trace_power_enabled)
154 return;
155
156 memset(it, 0, sizeof(struct power_trace));
157 it->state = level;
158 it->type = type;
159 it->stamp = ktime_get();
160 preempt_disable();
161 it->end = it->stamp;
162 data = tr->data[smp_processor_id()];
163
164 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
165 &irq_flags);
166 if (!event)
167 goto out;
168 entry = ring_buffer_event_data(event);
169 tracing_generic_entry_update(&entry->ent, 0, 0);
170 entry->ent.type = TRACE_POWER;
171 entry->state_data = *it;
172 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
173
174 trace_wake_up();
175
176 out:
177 preempt_enable();
178}
179EXPORT_SYMBOL_GPL(trace_power_mark);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..781d72ef873c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
123 124
124static void tracing_start_sched_switch(void) 125static void tracing_start_sched_switch(void)
125{ 126{
126 long ref; 127 mutex_lock(&sched_register_mutex);
127 128 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 129 tracing_sched_register();
130 mutex_unlock(&sched_register_mutex);
131} 131}
132 132
133static void tracing_stop_sched_switch(void) 133static void tracing_stop_sched_switch(void)
134{ 134{
135 long ref; 135 mutex_lock(&sched_register_mutex);
136 136 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 137 tracing_sched_unregister();
138 mutex_unlock(&sched_register_mutex);
140} 139}
141 140
142void tracing_start_cmdline_record(void) 141void tracing_start_cmdline_record(void)
@@ -149,40 +148,86 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 148 tracing_stop_sched_switch();
150} 149}
151 150
151/**
152 * tracing_start_sched_switch_record - start tracing context switches
153 *
154 * Turns on context switch tracing for a tracer.
155 */
156void tracing_start_sched_switch_record(void)
157{
158 if (unlikely(!ctx_trace)) {
159 WARN_ON(1);
160 return;
161 }
162
163 tracing_start_sched_switch();
164
165 mutex_lock(&sched_register_mutex);
166 tracer_enabled++;
167 mutex_unlock(&sched_register_mutex);
168}
169
170/**
171 * tracing_stop_sched_switch_record - start tracing context switches
172 *
173 * Turns off context switch tracing for a tracer.
174 */
175void tracing_stop_sched_switch_record(void)
176{
177 mutex_lock(&sched_register_mutex);
178 tracer_enabled--;
179 WARN_ON(tracer_enabled < 0);
180 mutex_unlock(&sched_register_mutex);
181
182 tracing_stop_sched_switch();
183}
184
185/**
186 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
187 * @tr: trace array pointer to assign
188 *
189 * Some tracers might want to record the context switches in their
190 * trace. This function lets those tracers assign the trace array
191 * to use.
192 */
193void tracing_sched_switch_assign_trace(struct trace_array *tr)
194{
195 ctx_trace = tr;
196}
197
152static void start_sched_trace(struct trace_array *tr) 198static void start_sched_trace(struct trace_array *tr)
153{ 199{
154 sched_switch_reset(tr); 200 sched_switch_reset(tr);
155 tracing_start_cmdline_record(); 201 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 202}
158 203
159static void stop_sched_trace(struct trace_array *tr) 204static void stop_sched_trace(struct trace_array *tr)
160{ 205{
161 tracer_enabled = 0; 206 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 207}
164 208
165static void sched_switch_trace_init(struct trace_array *tr) 209static int sched_switch_trace_init(struct trace_array *tr)
166{ 210{
167 ctx_trace = tr; 211 ctx_trace = tr;
168 212 start_sched_trace(tr);
169 if (tr->ctrl) 213 return 0;
170 start_sched_trace(tr);
171} 214}
172 215
173static void sched_switch_trace_reset(struct trace_array *tr) 216static void sched_switch_trace_reset(struct trace_array *tr)
174{ 217{
175 if (tr->ctrl) 218 if (sched_ref)
176 stop_sched_trace(tr); 219 stop_sched_trace(tr);
177} 220}
178 221
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 222static void sched_switch_trace_start(struct trace_array *tr)
180{ 223{
181 /* When starting a new trace, reset the buffers */ 224 sched_switch_reset(tr);
182 if (tr->ctrl) 225 tracing_start_sched_switch();
183 start_sched_trace(tr); 226}
184 else 227
185 stop_sched_trace(tr); 228static void sched_switch_trace_stop(struct trace_array *tr)
229{
230 tracing_stop_sched_switch();
186} 231}
187 232
188static struct tracer sched_switch_trace __read_mostly = 233static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +235,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 235 .name = "sched_switch",
191 .init = sched_switch_trace_init, 236 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 237 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 238 .start = sched_switch_trace_start,
239 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 240#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 241 .selftest = trace_selftest_startup_sched_switch,
196#endif 242#endif
@@ -198,14 +244,7 @@ static struct tracer sched_switch_trace __read_mostly =
198 244
199__init static int init_sched_switch_trace(void) 245__init static int init_sched_switch_trace(void)
200{ 246{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 247 return register_tracer(&sched_switch_trace);
210} 248}
211device_initcall(init_sched_switch_trace); 249device_initcall(init_sched_switch_trace);
250
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..0067b49746c1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,49 +324,53 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
327 unregister_trace_sched_wakeup(probe_wakeup); 331 unregister_trace_sched_wakeup(probe_wakeup);
328} 332}
329 333
330static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl) 338 return 0;
335 start_wakeup_tracer(tr);
336} 339}
337 340
338static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
339{ 342{
340 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
343 wakeup_reset(tr); 346}
344 } 347
348static void wakeup_tracer_start(struct trace_array *tr)
349{
350 wakeup_reset(tr);
351 tracer_enabled = 1;
352 save_tracer_enabled = 1;
345} 353}
346 354
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 355static void wakeup_tracer_stop(struct trace_array *tr)
348{ 356{
349 if (tr->ctrl) 357 tracer_enabled = 0;
350 start_wakeup_tracer(tr); 358 save_tracer_enabled = 0;
351 else
352 stop_wakeup_tracer(tr);
353} 359}
354 360
355static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 362{
357 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 364 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 365}
361 366
362static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 368{
364 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
367} 374}
368 375
369static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 378 .name = "wakeup",
372 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 385 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
16 return 1; 17 return 1;
17 } 18 }
18 return 0; 19 return 0;
@@ -51,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
51 int cpu, ret = 0; 52 int cpu, ret = 0;
52 53
53 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
54 raw_local_irq_save(flags); 55 local_irq_save(flags);
55 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
56 57
57 cnt = ring_buffer_entries(tr->buffer); 58 cnt = ring_buffer_entries(tr->buffer);
@@ -62,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
62 break; 63 break;
63 } 64 }
64 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
65 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
66 67
67 if (count) 68 if (count)
68 *count = cnt; 69 *count = cnt;
@@ -70,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 71 return ret;
71} 72}
72 73
74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
73#ifdef CONFIG_FUNCTION_TRACER 79#ifdef CONFIG_FUNCTION_TRACER
74 80
75#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
@@ -110,8 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
111 117
112 /* enable tracing */ 118 /* enable tracing */
113 tr->ctrl = 1; 119 ret = trace->init(tr);
114 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
115 124
116 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
117 msleep(100); 126 msleep(100);
@@ -134,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 143 msleep(100);
135 144
136 /* stop the tracing. */ 145 /* stop the tracing. */
137 tr->ctrl = 0; 146 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 147 ftrace_enabled = 0;
140 148
141 /* check the trace buffer */ 149 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
144 153
145 /* we should only have one item */ 154 /* we should only have one item */
146 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -148,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 157 ret = -1;
149 goto out; 158 goto out;
150 } 159 }
160
151 out: 161 out:
152 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -180,18 +190,22 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 190 ftrace_enabled = 1;
181 tracer_enabled = 1; 191 tracer_enabled = 1;
182 192
183 tr->ctrl = 1; 193 ret = trace->init(tr);
184 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
185 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
186 msleep(100); 200 msleep(100);
187 /* stop the tracing. */ 201 /* stop the tracing. */
188 tr->ctrl = 0; 202 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 203 ftrace_enabled = 0;
191 204
192 /* check the trace buffer */ 205 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
195 209
196 if (!ret && !count) { 210 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -223,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 237 int ret;
224 238
225 /* start the tracing */ 239 /* start the tracing */
226 tr->ctrl = 1; 240 ret = trace->init(tr);
227 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
228 /* reset the max latency */ 246 /* reset the max latency */
229 tracing_max_latency = 0; 247 tracing_max_latency = 0;
230 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -232,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 250 udelay(100);
233 local_irq_enable(); 251 local_irq_enable();
234 /* stop the tracing. */ 252 /* stop the tracing. */
235 tr->ctrl = 0; 253 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 254 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 256 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
242 260
243 if (!ret && !count) { 261 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -259,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 277 unsigned long count;
260 int ret; 278 int ret;
261 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
262 /* start the tracing */ 293 /* start the tracing */
263 tr->ctrl = 1; 294 ret = trace->init(tr);
264 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
265 /* reset the max latency */ 300 /* reset the max latency */
266 tracing_max_latency = 0; 301 tracing_max_latency = 0;
267 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -269,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 304 udelay(100);
270 preempt_enable(); 305 preempt_enable();
271 /* stop the tracing. */ 306 /* stop the tracing. */
272 tr->ctrl = 0; 307 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 308 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 310 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
279 314
280 if (!ret && !count) { 315 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -296,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 331 unsigned long count;
297 int ret; 332 int ret;
298 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
299 /* start the tracing */ 347 /* start the tracing */
300 tr->ctrl = 1; 348 ret = trace->init(tr);
301 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
302 353
303 /* reset the max latency */ 354 /* reset the max latency */
304 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -312,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 363 local_irq_enable();
313 364
314 /* stop the tracing. */ 365 /* stop the tracing. */
315 tr->ctrl = 0; 366 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 367 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
319 if (ret) 369 if (ret) {
370 tracing_start();
320 goto out; 371 goto out;
372 }
321 373
322 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 375 if (ret) {
376 tracing_start();
324 goto out; 377 goto out;
378 }
325 379
326 if (!ret && !count) { 380 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 382 ret = -1;
383 tracing_start();
329 goto out; 384 goto out;
330 } 385 }
331 386
332 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 388 tracing_max_latency = 0;
334 tr->ctrl = 1; 389 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 390 preempt_disable();
337 local_irq_disable(); 391 local_irq_disable();
338 udelay(100); 392 udelay(100);
@@ -341,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 395 local_irq_enable();
342 396
343 /* stop the tracing. */ 397 /* stop the tracing. */
344 tr->ctrl = 0; 398 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 399 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
348 if (ret) 401 if (ret)
@@ -358,6 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 411
359 out: 412 out:
360 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
361 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
362 416
363 return ret; 417 return ret;
@@ -423,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
424 478
425 /* start the tracing */ 479 /* start the tracing */
426 tr->ctrl = 1; 480 ret = trace->init(tr);
427 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
428 /* reset the max latency */ 486 /* reset the max latency */
429 tracing_max_latency = 0; 487 tracing_max_latency = 0;
430 488
@@ -448,8 +506,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 506 msleep(100);
449 507
450 /* stop the tracing. */ 508 /* stop the tracing. */
451 tr->ctrl = 0; 509 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 510 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 512 if (!ret)
@@ -457,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 514
458 515
459 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
460 518
461 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
462 520
@@ -480,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 538 int ret;
481 539
482 /* start the tracing */ 540 /* start the tracing */
483 tr->ctrl = 1; 541 ret = trace->init(tr);
484 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
485 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
486 msleep(100); 548 msleep(100);
487 /* stop the tracing. */ 549 /* stop the tracing. */
488 tr->ctrl = 0; 550 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 551 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
493 555
494 if (!ret && !count) { 556 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 570 int ret;
509 571
510 /* start the tracing */ 572 /* start the tracing */
511 tr->ctrl = 1; 573 ret = trace->init(tr);
512 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
513 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
514 msleep(100); 580 msleep(100);
515 /* stop the tracing. */ 581 /* stop the tracing. */
516 tr->ctrl = 0; 582 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 583 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
521 587
522 return ret; 588 return ret;
523} 589}
524#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3bdb44bde4b7..d0871bc0aca5 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -10,6 +10,7 @@
10#include <linux/debugfs.h> 10#include <linux/debugfs.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/sysctl.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/fs.h> 15#include <linux/fs.h>
15#include "trace.h" 16#include "trace.h"
@@ -31,6 +32,10 @@ static raw_spinlock_t max_stack_lock =
31 32
32static int stack_trace_disabled __read_mostly; 33static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active); 34static DEFINE_PER_CPU(int, trace_active);
35static DEFINE_MUTEX(stack_sysctl_mutex);
36
37int stack_tracer_enabled;
38static int last_stack_tracer_enabled;
34 39
35static inline void check_stack(void) 40static inline void check_stack(void)
36{ 41{
@@ -48,7 +53,7 @@ static inline void check_stack(void)
48 if (!object_is_on_stack(&this_size)) 53 if (!object_is_on_stack(&this_size))
49 return; 54 return;
50 55
51 raw_local_irq_save(flags); 56 local_irq_save(flags);
52 __raw_spin_lock(&max_stack_lock); 57 __raw_spin_lock(&max_stack_lock);
53 58
54 /* a race could have already updated it */ 59 /* a race could have already updated it */
@@ -78,6 +83,7 @@ static inline void check_stack(void)
78 * on a new max, so it is far from a fast path. 83 * on a new max, so it is far from a fast path.
79 */ 84 */
80 while (i < max_stack_trace.nr_entries) { 85 while (i < max_stack_trace.nr_entries) {
86 int found = 0;
81 87
82 stack_dump_index[i] = this_size; 88 stack_dump_index[i] = this_size;
83 p = start; 89 p = start;
@@ -86,17 +92,19 @@ static inline void check_stack(void)
86 if (*p == stack_dump_trace[i]) { 92 if (*p == stack_dump_trace[i]) {
87 this_size = stack_dump_index[i++] = 93 this_size = stack_dump_index[i++] =
88 (top - p) * sizeof(unsigned long); 94 (top - p) * sizeof(unsigned long);
95 found = 1;
89 /* Start the search from here */ 96 /* Start the search from here */
90 start = p + 1; 97 start = p + 1;
91 } 98 }
92 } 99 }
93 100
94 i++; 101 if (!found)
102 i++;
95 } 103 }
96 104
97 out: 105 out:
98 __raw_spin_unlock(&max_stack_lock); 106 __raw_spin_unlock(&max_stack_lock);
99 raw_local_irq_restore(flags); 107 local_irq_restore(flags);
100} 108}
101 109
102static void 110static void
@@ -107,8 +115,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 116 return;
109 117
110 resched = need_resched(); 118 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 119
113 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 127 out:
121 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
123 if (resched) 130 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 131}
128 132
129static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
@@ -166,16 +170,16 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
166 if (ret < 0) 170 if (ret < 0)
167 return ret; 171 return ret;
168 172
169 raw_local_irq_save(flags); 173 local_irq_save(flags);
170 __raw_spin_lock(&max_stack_lock); 174 __raw_spin_lock(&max_stack_lock);
171 *ptr = val; 175 *ptr = val;
172 __raw_spin_unlock(&max_stack_lock); 176 __raw_spin_unlock(&max_stack_lock);
173 raw_local_irq_restore(flags); 177 local_irq_restore(flags);
174 178
175 return count; 179 return count;
176} 180}
177 181
178static struct file_operations stack_max_size_fops = { 182static const struct file_operations stack_max_size_fops = {
179 .open = tracing_open_generic, 183 .open = tracing_open_generic,
180 .read = stack_max_size_read, 184 .read = stack_max_size_read,
181 .write = stack_max_size_write, 185 .write = stack_max_size_write,
@@ -273,7 +277,7 @@ static int t_show(struct seq_file *m, void *v)
273 return 0; 277 return 0;
274} 278}
275 279
276static struct seq_operations stack_trace_seq_ops = { 280static const struct seq_operations stack_trace_seq_ops = {
277 .start = t_start, 281 .start = t_start,
278 .next = t_next, 282 .next = t_next,
279 .stop = t_stop, 283 .stop = t_stop,
@@ -289,12 +293,47 @@ static int stack_trace_open(struct inode *inode, struct file *file)
289 return ret; 293 return ret;
290} 294}
291 295
292static struct file_operations stack_trace_fops = { 296static const struct file_operations stack_trace_fops = {
293 .open = stack_trace_open, 297 .open = stack_trace_open,
294 .read = seq_read, 298 .read = seq_read,
295 .llseek = seq_lseek, 299 .llseek = seq_lseek,
296}; 300};
297 301
302int
303stack_trace_sysctl(struct ctl_table *table, int write,
304 struct file *file, void __user *buffer, size_t *lenp,
305 loff_t *ppos)
306{
307 int ret;
308
309 mutex_lock(&stack_sysctl_mutex);
310
311 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
312
313 if (ret || !write ||
314 (last_stack_tracer_enabled == stack_tracer_enabled))
315 goto out;
316
317 last_stack_tracer_enabled = stack_tracer_enabled;
318
319 if (stack_tracer_enabled)
320 register_ftrace_function(&trace_ops);
321 else
322 unregister_ftrace_function(&trace_ops);
323
324 out:
325 mutex_unlock(&stack_sysctl_mutex);
326 return ret;
327}
328
329static __init int enable_stacktrace(char *str)
330{
331 stack_tracer_enabled = 1;
332 last_stack_tracer_enabled = 1;
333 return 1;
334}
335__setup("stacktrace", enable_stacktrace);
336
298static __init int stack_trace_init(void) 337static __init int stack_trace_init(void)
299{ 338{
300 struct dentry *d_tracer; 339 struct dentry *d_tracer;
@@ -312,7 +351,8 @@ static __init int stack_trace_init(void)
312 if (!entry) 351 if (!entry)
313 pr_warning("Could not create debugfs 'stack_trace' entry\n"); 352 pr_warning("Could not create debugfs 'stack_trace' entry\n");
314 353
315 register_ftrace_function(&trace_ops); 354 if (stack_tracer_enabled)
355 register_ftrace_function(&trace_ops);
316 356
317 return 0; 357 return 0;
318} 358}
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..54960edb96d0 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -261,27 +261,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 261 mutex_unlock(&sample_timer_lock);
262} 262}
263 263
264static void stack_trace_init(struct trace_array *tr) 264static int stack_trace_init(struct trace_array *tr)
265{ 265{
266 sysprof_trace = tr; 266 sysprof_trace = tr;
267 267
268 if (tr->ctrl) 268 start_stack_trace(tr);
269 start_stack_trace(tr); 269 return 0;
270} 270}
271 271
272static void stack_trace_reset(struct trace_array *tr) 272static void stack_trace_reset(struct trace_array *tr)
273{ 273{
274 if (tr->ctrl) 274 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 275}
286 276
287static struct tracer stack_trace __read_mostly = 277static struct tracer stack_trace __read_mostly =
@@ -289,7 +279,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 279 .name = "sysprof",
290 .init = stack_trace_init, 280 .init = stack_trace_init,
291 .reset = stack_trace_reset, 281 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 282#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 283 .selftest = trace_selftest_startup_sysprof,
295#endif 284#endif