aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/marker.c192
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/sysctl.c10
-rw-r--r--kernel/trace/Kconfig58
-rw-r--r--kernel/trace/Makefile7
-rw-r--r--kernel/trace/ftrace.c260
-rw-r--r--kernel/trace/ring_buffer.c298
-rw-r--r--kernel/trace/trace.c611
-rw-r--r--kernel/trace/trace.h182
-rw-r--r--kernel/trace/trace_boot.c166
-rw-r--r--kernel/trace/trace_branch.c321
-rw-r--r--kernel/trace/trace_functions.c18
-rw-r--r--kernel/trace/trace_functions_return.c98
-rw-r--r--kernel/trace/trace_irqsoff.c61
-rw-r--r--kernel/trace/trace_mmiotrace.c25
-rw-r--r--kernel/trace/trace_nop.c65
-rw-r--r--kernel/trace/trace_sched_switch.c106
-rw-r--r--kernel/trace/trace_sched_wakeup.c70
-rw-r--r--kernel/trace/trace_selftest.c173
-rw-r--r--kernel/trace/trace_stack.c8
-rw-r--r--kernel/trace/trace_sysprof.c19
-rw-r--r--kernel/tracepoint.c295
29 files changed, 2326 insertions, 751 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..03a45e7e87b7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,6 +21,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
21CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22CFLAGS_REMOVE_sched.o = -pg 22CFLAGS_REMOVE_sched.o = -pg
23endif 23endif
24ifdef CONFIG_FUNCTION_RET_TRACER
25CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address()
26CFLAGS_REMOVE_module.o = -pg # For __module_text_address()
27endif
24 28
25obj-$(CONFIG_FREEZER) += freezer.o 29obj-$(CONFIG_FREEZER) += freezer.o
26obj-$(CONFIG_PROFILING) += profile.o 30obj-$(CONFIG_PROFILING) += profile.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d8be7ebb0f7..35c8ec2ba03a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -53,6 +53,10 @@
53#include <asm/pgtable.h> 53#include <asm/pgtable.h>
54#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
55 55
56DEFINE_TRACE(sched_process_free);
57DEFINE_TRACE(sched_process_exit);
58DEFINE_TRACE(sched_process_wait);
59
56static void exit_mm(struct task_struct * tsk); 60static void exit_mm(struct task_struct * tsk);
57 61
58static inline int task_detached(struct task_struct *p) 62static inline int task_detached(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index 2a372a0e206f..ac62f43ee430 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -80,6 +80,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
80 80
81__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 81__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
82 82
83DEFINE_TRACE(sched_process_fork);
84
83int nr_processes(void) 85int nr_processes(void)
84{ 86{
85 int cpu; 87 int cpu;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0a..4fbc456f393d 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
21static LIST_HEAD(kthread_create_list); 21static LIST_HEAD(kthread_create_list);
22struct task_struct *kthreadd_task; 22struct task_struct *kthreadd_task;
23 23
24DEFINE_TRACE(sched_kthread_stop);
25DEFINE_TRACE(sched_kthread_stop_ret);
26
24struct kthread_create_info 27struct kthread_create_info
25{ 28{
26 /* Information passed to kthread() from kthreadd. */ 29 /* Information passed to kthread() from kthreadd. */
diff --git a/kernel/marker.c b/kernel/marker.c
index e9c6b2bc9400..ea54f2647868 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(markers_mutex);
43 */ 43 */
44#define MARKER_HASH_BITS 6 44#define MARKER_HASH_BITS 6
45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 45#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
46static struct hlist_head marker_table[MARKER_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -64,11 +65,10 @@ struct marker_entry {
64 void *oldptr; 65 void *oldptr;
65 int rcu_pending; 66 int rcu_pending;
66 unsigned char ptype:1; 67 unsigned char ptype:1;
68 unsigned char format_allocated:1;
67 char name[0]; /* Contains name'\0'format'\0' */ 69 char name[0]; /* Contains name'\0'format'\0' */
68}; 70};
69 71
70static struct hlist_head marker_table[MARKER_TABLE_SIZE];
71
72/** 72/**
73 * __mark_empty_function - Empty probe callback 73 * __mark_empty_function - Empty probe callback
74 * @probe_private: probe private data 74 * @probe_private: probe private data
@@ -81,7 +81,7 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
81 * though the function pointer change and the marker enabling are two distinct 81 * though the function pointer change and the marker enabling are two distinct
82 * operations that modifies the execution flow of preemptible code. 82 * operations that modifies the execution flow of preemptible code.
83 */ 83 */
84void __mark_empty_function(void *probe_private, void *call_private, 84notrace void __mark_empty_function(void *probe_private, void *call_private,
85 const char *fmt, va_list *args) 85 const char *fmt, va_list *args)
86{ 86{
87} 87}
@@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
97 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
99 */ 99 */
100void marker_probe_cb(const struct marker *mdata, void *call_private, ...) 100notrace void marker_probe_cb(const struct marker *mdata,
101 void *call_private, ...)
101{ 102{
102 va_list args; 103 va_list args;
103 char ptype; 104 char ptype;
@@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
107 * sure the teardown of the callbacks can be done correctly when they 108 * sure the teardown of the callbacks can be done correctly when they
108 * are in modules and they insure RCU read coherency. 109 * are in modules and they insure RCU read coherency.
109 */ 110 */
110 rcu_read_lock_sched(); 111 rcu_read_lock_sched_notrace();
111 ptype = mdata->ptype; 112 ptype = mdata->ptype;
112 if (likely(!ptype)) { 113 if (likely(!ptype)) {
113 marker_probe_func *func; 114 marker_probe_func *func;
@@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
145 va_end(args); 146 va_end(args);
146 } 147 }
147 } 148 }
148 rcu_read_unlock_sched(); 149 rcu_read_unlock_sched_notrace();
149} 150}
150EXPORT_SYMBOL_GPL(marker_probe_cb); 151EXPORT_SYMBOL_GPL(marker_probe_cb);
151 152
@@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
157 * 158 *
158 * Should be connected to markers "MARK_NOARGS". 159 * Should be connected to markers "MARK_NOARGS".
159 */ 160 */
160void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) 161static notrace void marker_probe_cb_noarg(const struct marker *mdata,
162 void *call_private, ...)
161{ 163{
162 va_list args; /* not initialized */ 164 va_list args; /* not initialized */
163 char ptype; 165 char ptype;
164 166
165 rcu_read_lock_sched(); 167 rcu_read_lock_sched_notrace();
166 ptype = mdata->ptype; 168 ptype = mdata->ptype;
167 if (likely(!ptype)) { 169 if (likely(!ptype)) {
168 marker_probe_func *func; 170 marker_probe_func *func;
@@ -195,9 +197,8 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
195 multi[i].func(multi[i].probe_private, call_private, 197 multi[i].func(multi[i].probe_private, call_private,
196 mdata->format, &args); 198 mdata->format, &args);
197 } 199 }
198 rcu_read_unlock_sched(); 200 rcu_read_unlock_sched_notrace();
199} 201}
200EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
201 202
202static void free_old_closure(struct rcu_head *head) 203static void free_old_closure(struct rcu_head *head)
203{ 204{
@@ -416,6 +417,7 @@ static struct marker_entry *add_marker(const char *name, const char *format)
416 e->single.probe_private = NULL; 417 e->single.probe_private = NULL;
417 e->multi = NULL; 418 e->multi = NULL;
418 e->ptype = 0; 419 e->ptype = 0;
420 e->format_allocated = 0;
419 e->refcount = 0; 421 e->refcount = 0;
420 e->rcu_pending = 0; 422 e->rcu_pending = 0;
421 hlist_add_head(&e->hlist, head); 423 hlist_add_head(&e->hlist, head);
@@ -447,6 +449,8 @@ static int remove_marker(const char *name)
447 if (e->single.func != __mark_empty_function) 449 if (e->single.func != __mark_empty_function)
448 return -EBUSY; 450 return -EBUSY;
449 hlist_del(&e->hlist); 451 hlist_del(&e->hlist);
452 if (e->format_allocated)
453 kfree(e->format);
450 /* Make sure the call_rcu has been executed */ 454 /* Make sure the call_rcu has been executed */
451 if (e->rcu_pending) 455 if (e->rcu_pending)
452 rcu_barrier_sched(); 456 rcu_barrier_sched();
@@ -457,57 +461,34 @@ static int remove_marker(const char *name)
457/* 461/*
458 * Set the mark_entry format to the format found in the element. 462 * Set the mark_entry format to the format found in the element.
459 */ 463 */
460static int marker_set_format(struct marker_entry **entry, const char *format) 464static int marker_set_format(struct marker_entry *entry, const char *format)
461{ 465{
462 struct marker_entry *e; 466 entry->format = kstrdup(format, GFP_KERNEL);
463 size_t name_len = strlen((*entry)->name) + 1; 467 if (!entry->format)
464 size_t format_len = strlen(format) + 1;
465
466
467 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
468 GFP_KERNEL);
469 if (!e)
470 return -ENOMEM; 468 return -ENOMEM;
471 memcpy(&e->name[0], (*entry)->name, name_len); 469 entry->format_allocated = 1;
472 e->format = &e->name[name_len]; 470
473 memcpy(e->format, format, format_len);
474 if (strcmp(e->format, MARK_NOARGS) == 0)
475 e->call = marker_probe_cb_noarg;
476 else
477 e->call = marker_probe_cb;
478 e->single = (*entry)->single;
479 e->multi = (*entry)->multi;
480 e->ptype = (*entry)->ptype;
481 e->refcount = (*entry)->refcount;
482 e->rcu_pending = 0;
483 hlist_add_before(&e->hlist, &(*entry)->hlist);
484 hlist_del(&(*entry)->hlist);
485 /* Make sure the call_rcu has been executed */
486 if ((*entry)->rcu_pending)
487 rcu_barrier_sched();
488 kfree(*entry);
489 *entry = e;
490 trace_mark(core_marker_format, "name %s format %s", 471 trace_mark(core_marker_format, "name %s format %s",
491 e->name, e->format); 472 entry->name, entry->format);
492 return 0; 473 return 0;
493} 474}
494 475
495/* 476/*
496 * Sets the probe callback corresponding to one marker. 477 * Sets the probe callback corresponding to one marker.
497 */ 478 */
498static int set_marker(struct marker_entry **entry, struct marker *elem, 479static int set_marker(struct marker_entry *entry, struct marker *elem,
499 int active) 480 int active)
500{ 481{
501 int ret; 482 int ret = 0;
502 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 483 WARN_ON(strcmp(entry->name, elem->name) != 0);
503 484
504 if ((*entry)->format) { 485 if (entry->format) {
505 if (strcmp((*entry)->format, elem->format) != 0) { 486 if (strcmp(entry->format, elem->format) != 0) {
506 printk(KERN_NOTICE 487 printk(KERN_NOTICE
507 "Format mismatch for probe %s " 488 "Format mismatch for probe %s "
508 "(%s), marker (%s)\n", 489 "(%s), marker (%s)\n",
509 (*entry)->name, 490 entry->name,
510 (*entry)->format, 491 entry->format,
511 elem->format); 492 elem->format);
512 return -EPERM; 493 return -EPERM;
513 } 494 }
@@ -523,37 +504,67 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
523 * pass from a "safe" callback (with argument) to an "unsafe" 504 * pass from a "safe" callback (with argument) to an "unsafe"
524 * callback (does not set arguments). 505 * callback (does not set arguments).
525 */ 506 */
526 elem->call = (*entry)->call; 507 elem->call = entry->call;
527 /* 508 /*
528 * Sanity check : 509 * Sanity check :
529 * We only update the single probe private data when the ptr is 510 * We only update the single probe private data when the ptr is
530 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) 511 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
531 */ 512 */
532 WARN_ON(elem->single.func != __mark_empty_function 513 WARN_ON(elem->single.func != __mark_empty_function
533 && elem->single.probe_private 514 && elem->single.probe_private != entry->single.probe_private
534 != (*entry)->single.probe_private && 515 && !elem->ptype);
535 !elem->ptype); 516 elem->single.probe_private = entry->single.probe_private;
536 elem->single.probe_private = (*entry)->single.probe_private;
537 /* 517 /*
538 * Make sure the private data is valid when we update the 518 * Make sure the private data is valid when we update the
539 * single probe ptr. 519 * single probe ptr.
540 */ 520 */
541 smp_wmb(); 521 smp_wmb();
542 elem->single.func = (*entry)->single.func; 522 elem->single.func = entry->single.func;
543 /* 523 /*
544 * We also make sure that the new probe callbacks array is consistent 524 * We also make sure that the new probe callbacks array is consistent
545 * before setting a pointer to it. 525 * before setting a pointer to it.
546 */ 526 */
547 rcu_assign_pointer(elem->multi, (*entry)->multi); 527 rcu_assign_pointer(elem->multi, entry->multi);
548 /* 528 /*
549 * Update the function or multi probe array pointer before setting the 529 * Update the function or multi probe array pointer before setting the
550 * ptype. 530 * ptype.
551 */ 531 */
552 smp_wmb(); 532 smp_wmb();
553 elem->ptype = (*entry)->ptype; 533 elem->ptype = entry->ptype;
534
535 if (elem->tp_name && (active ^ elem->state)) {
536 WARN_ON(!elem->tp_cb);
537 /*
538 * It is ok to directly call the probe registration because type
539 * checking has been done in the __trace_mark_tp() macro.
540 */
541
542 if (active) {
543 /*
544 * try_module_get should always succeed because we hold
545 * lock_module() to get the tp_cb address.
546 */
547 ret = try_module_get(__module_text_address(
548 (unsigned long)elem->tp_cb));
549 BUG_ON(!ret);
550 ret = tracepoint_probe_register_noupdate(
551 elem->tp_name,
552 elem->tp_cb);
553 } else {
554 ret = tracepoint_probe_unregister_noupdate(
555 elem->tp_name,
556 elem->tp_cb);
557 /*
558 * tracepoint_probe_update_all() must be called
559 * before the module containing tp_cb is unloaded.
560 */
561 module_put(__module_text_address(
562 (unsigned long)elem->tp_cb));
563 }
564 }
554 elem->state = active; 565 elem->state = active;
555 566
556 return 0; 567 return ret;
557} 568}
558 569
559/* 570/*
@@ -564,7 +575,24 @@ static int set_marker(struct marker_entry **entry, struct marker *elem,
564 */ 575 */
565static void disable_marker(struct marker *elem) 576static void disable_marker(struct marker *elem)
566{ 577{
578 int ret;
579
567 /* leave "call" as is. It is known statically. */ 580 /* leave "call" as is. It is known statically. */
581 if (elem->tp_name && elem->state) {
582 WARN_ON(!elem->tp_cb);
583 /*
584 * It is ok to directly call the probe registration because type
585 * checking has been done in the __trace_mark_tp() macro.
586 */
587 ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
588 elem->tp_cb);
589 WARN_ON(ret);
590 /*
591 * tracepoint_probe_update_all() must be called
592 * before the module containing tp_cb is unloaded.
593 */
594 module_put(__module_text_address((unsigned long)elem->tp_cb));
595 }
568 elem->state = 0; 596 elem->state = 0;
569 elem->single.func = __mark_empty_function; 597 elem->single.func = __mark_empty_function;
570 /* Update the function before setting the ptype */ 598 /* Update the function before setting the ptype */
@@ -594,8 +622,7 @@ void marker_update_probe_range(struct marker *begin,
594 for (iter = begin; iter < end; iter++) { 622 for (iter = begin; iter < end; iter++) {
595 mark_entry = get_marker(iter->name); 623 mark_entry = get_marker(iter->name);
596 if (mark_entry) { 624 if (mark_entry) {
597 set_marker(&mark_entry, iter, 625 set_marker(mark_entry, iter, !!mark_entry->refcount);
598 !!mark_entry->refcount);
599 /* 626 /*
600 * ignore error, continue 627 * ignore error, continue
601 */ 628 */
@@ -629,6 +656,7 @@ static void marker_update_probes(void)
629 marker_update_probe_range(__start___markers, __stop___markers); 656 marker_update_probe_range(__start___markers, __stop___markers);
630 /* Markers in modules. */ 657 /* Markers in modules. */
631 module_update_markers(); 658 module_update_markers();
659 tracepoint_probe_update_all();
632} 660}
633 661
634/** 662/**
@@ -657,7 +685,7 @@ int marker_probe_register(const char *name, const char *format,
657 ret = PTR_ERR(entry); 685 ret = PTR_ERR(entry);
658 } else if (format) { 686 } else if (format) {
659 if (!entry->format) 687 if (!entry->format)
660 ret = marker_set_format(&entry, format); 688 ret = marker_set_format(entry, format);
661 else if (strcmp(entry->format, format)) 689 else if (strcmp(entry->format, format))
662 ret = -EPERM; 690 ret = -EPERM;
663 } 691 }
@@ -676,10 +704,11 @@ int marker_probe_register(const char *name, const char *format,
676 goto end; 704 goto end;
677 } 705 }
678 mutex_unlock(&markers_mutex); 706 mutex_unlock(&markers_mutex);
679 marker_update_probes(); /* may update entry */ 707 marker_update_probes();
680 mutex_lock(&markers_mutex); 708 mutex_lock(&markers_mutex);
681 entry = get_marker(name); 709 entry = get_marker(name);
682 WARN_ON(!entry); 710 if (!entry)
711 goto end;
683 if (entry->rcu_pending) 712 if (entry->rcu_pending)
684 rcu_barrier_sched(); 713 rcu_barrier_sched();
685 entry->oldptr = old; 714 entry->oldptr = old;
@@ -720,7 +749,7 @@ int marker_probe_unregister(const char *name,
720 rcu_barrier_sched(); 749 rcu_barrier_sched();
721 old = marker_entry_remove_probe(entry, probe, probe_private); 750 old = marker_entry_remove_probe(entry, probe, probe_private);
722 mutex_unlock(&markers_mutex); 751 mutex_unlock(&markers_mutex);
723 marker_update_probes(); /* may update entry */ 752 marker_update_probes();
724 mutex_lock(&markers_mutex); 753 mutex_lock(&markers_mutex);
725 entry = get_marker(name); 754 entry = get_marker(name);
726 if (!entry) 755 if (!entry)
@@ -801,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
801 rcu_barrier_sched(); 830 rcu_barrier_sched();
802 old = marker_entry_remove_probe(entry, NULL, probe_private); 831 old = marker_entry_remove_probe(entry, NULL, probe_private);
803 mutex_unlock(&markers_mutex); 832 mutex_unlock(&markers_mutex);
804 marker_update_probes(); /* may update entry */ 833 marker_update_probes();
805 mutex_lock(&markers_mutex); 834 mutex_lock(&markers_mutex);
806 entry = get_marker_from_private_data(probe, probe_private); 835 entry = get_marker_from_private_data(probe, probe_private);
807 WARN_ON(!entry); 836 if (!entry)
837 goto end;
808 if (entry->rcu_pending) 838 if (entry->rcu_pending)
809 rcu_barrier_sched(); 839 rcu_barrier_sched();
810 entry->oldptr = old; 840 entry->oldptr = old;
@@ -848,8 +878,6 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
848 if (!e->ptype) { 878 if (!e->ptype) {
849 if (num == 0 && e->single.func == probe) 879 if (num == 0 && e->single.func == probe)
850 return e->single.probe_private; 880 return e->single.probe_private;
851 else
852 break;
853 } else { 881 } else {
854 struct marker_probe_closure *closure; 882 struct marker_probe_closure *closure;
855 int match = 0; 883 int match = 0;
@@ -861,8 +889,42 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
861 return closure[i].probe_private; 889 return closure[i].probe_private;
862 } 890 }
863 } 891 }
892 break;
864 } 893 }
865 } 894 }
866 return ERR_PTR(-ENOENT); 895 return ERR_PTR(-ENOENT);
867} 896}
868EXPORT_SYMBOL_GPL(marker_get_private_data); 897EXPORT_SYMBOL_GPL(marker_get_private_data);
898
899#ifdef CONFIG_MODULES
900
901int marker_module_notify(struct notifier_block *self,
902 unsigned long val, void *data)
903{
904 struct module *mod = data;
905
906 switch (val) {
907 case MODULE_STATE_COMING:
908 marker_update_probe_range(mod->markers,
909 mod->markers + mod->num_markers);
910 break;
911 case MODULE_STATE_GOING:
912 marker_update_probe_range(mod->markers,
913 mod->markers + mod->num_markers);
914 break;
915 }
916 return 0;
917}
918
919struct notifier_block marker_module_nb = {
920 .notifier_call = marker_module_notify,
921 .priority = 0,
922};
923
924static int init_markers(void)
925{
926 return register_module_notifier(&marker_module_nb);
927}
928__initcall(init_markers);
929
930#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c20..89bcf7c1327d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod,
2184 struct mod_debug *debug; 2184 struct mod_debug *debug;
2185 unsigned int num_debug; 2185 unsigned int num_debug;
2186 2186
2187#ifdef CONFIG_MARKERS
2188 marker_update_probe_range(mod->markers,
2189 mod->markers + mod->num_markers);
2190#endif
2191 debug = section_objs(hdr, sechdrs, secstrings, "__verbose", 2187 debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
2192 sizeof(*debug), &num_debug); 2188 sizeof(*debug), &num_debug);
2193 dynamic_printk_setup(debug, num_debug); 2189 dynamic_printk_setup(debug, num_debug);
2194
2195#ifdef CONFIG_TRACEPOINTS
2196 tracepoint_update_probe_range(mod->tracepoints,
2197 mod->tracepoints + mod->num_tracepoints);
2198#endif
2199 } 2190 }
2200 2191
2201 /* sechdrs[0].sh_size is always zero */ 2192 /* sechdrs[0].sh_size is always zero */
2202 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", 2193 mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
2203 sizeof(*mseg), &num_mcount); 2194 sizeof(*mseg), &num_mcount);
2204 ftrace_init_module(mseg, mseg + num_mcount); 2195 ftrace_init_module(mod, mseg, mseg + num_mcount);
2205 2196
2206 err = module_finalize(hdr, sechdrs, mod); 2197 err = module_finalize(hdr, sechdrs, mod);
2207 if (err < 0) 2198 if (err < 0)
diff --git a/kernel/profile.c b/kernel/profile.c
index 5b7d1ac7124c..7f93a5042d3b 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -544,7 +544,7 @@ static const struct file_operations proc_profile_operations = {
544}; 544};
545 545
546#ifdef CONFIG_SMP 546#ifdef CONFIG_SMP
547static inline void profile_nop(void *unused) 547static void profile_nop(void *unused)
548{ 548{
549} 549}
550 550
diff --git a/kernel/sched.c b/kernel/sched.c
index 9b1e79371c20..4de56108c86f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
118 */ 118 */
119#define RUNTIME_INF ((u64)~0ULL) 119#define RUNTIME_INF ((u64)~0ULL)
120 120
121DEFINE_TRACE(sched_wait_task);
122DEFINE_TRACE(sched_wakeup);
123DEFINE_TRACE(sched_wakeup_new);
124DEFINE_TRACE(sched_switch);
125DEFINE_TRACE(sched_migrate_task);
126
121#ifdef CONFIG_SMP 127#ifdef CONFIG_SMP
122/* 128/*
123 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) 129 * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc654455..e9afe63da24b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
41 41
42static struct kmem_cache *sigqueue_cachep; 42static struct kmem_cache *sigqueue_cachep;
43 43
44DEFINE_TRACE(sched_signal_send);
45
44static void __user *sig_handler(struct task_struct *t, int sig) 46static void __user *sig_handler(struct task_struct *t, int sig)
45{ 47{
46 return t->sighand->action[sig - 1].sa.sa_handler; 48 return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9d048fa2d902..65d4a9ba79e4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
484 .proc_handler = &ftrace_enable_sysctl, 484 .proc_handler = &ftrace_enable_sysctl,
485 }, 485 },
486#endif 486#endif
487#ifdef CONFIG_TRACING
488 {
489 .ctl_name = CTL_UNNUMBERED,
490 .procname = "ftrace_dump_on_oops",
491 .data = &ftrace_dump_on_oops,
492 .maxlen = sizeof(int),
493 .mode = 0644,
494 .proc_handler = &proc_dointvec,
495 },
496#endif
487#ifdef CONFIG_MODULES 497#ifdef CONFIG_MODULES
488 { 498 {
489 .ctl_name = KERN_MODPROBE, 499 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 33dbefd471e8..b8378fad29a3 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,16 @@ config NOP_TRACER
9config HAVE_FUNCTION_TRACER 9config HAVE_FUNCTION_TRACER
10 bool 10 bool
11 11
12config HAVE_FUNCTION_RET_TRACER
13 bool
14
15config HAVE_FUNCTION_TRACE_MCOUNT_TEST
16 bool
17 help
18 This gets selected when the arch tests the function_trace_stop
19 variable at the mcount call site. Otherwise, this variable
20 is tested by the called function.
21
12config HAVE_DYNAMIC_FTRACE 22config HAVE_DYNAMIC_FTRACE
13 bool 23 bool
14 24
@@ -47,6 +57,16 @@ config FUNCTION_TRACER
47 (the bootup default), then the overhead of the instructions is very 57 (the bootup default), then the overhead of the instructions is very
48 small and not measurable even in micro-benchmarks. 58 small and not measurable even in micro-benchmarks.
49 59
60config FUNCTION_RET_TRACER
61 bool "Kernel Function return Tracer"
62 depends on HAVE_FUNCTION_RET_TRACER
63 depends on FUNCTION_TRACER
64 help
65 Enable the kernel to trace a function at its return.
66 It's first purpose is to trace the duration of functions.
67 This is done by setting the current return address on the thread
68 info structure of the current task.
69
50config IRQSOFF_TRACER 70config IRQSOFF_TRACER
51 bool "Interrupts-off Latency Tracer" 71 bool "Interrupts-off Latency Tracer"
52 default n 72 default n
@@ -138,6 +158,44 @@ config BOOT_TRACER
138 selected, because the self-tests are an initcall as well and that 158 selected, because the self-tests are an initcall as well and that
139 would invalidate the boot trace. ) 159 would invalidate the boot trace. )
140 160
161config TRACE_BRANCH_PROFILING
162 bool "Trace likely/unlikely profiler"
163 depends on DEBUG_KERNEL
164 select TRACING
165 help
166 This tracer profiles all the the likely and unlikely macros
167 in the kernel. It will display the results in:
168
169 /debugfs/tracing/profile_likely
170 /debugfs/tracing/profile_unlikely
171
172 Note: this will add a significant overhead, only turn this
173 on if you need to profile the system's use of these macros.
174
175 Say N if unsure.
176
177config TRACING_BRANCHES
178 bool
179 help
180 Selected by tracers that will trace the likely and unlikely
181 conditions. This prevents the tracers themselves from being
182 profiled. Profiling the tracing infrastructure can only happen
183 when the likelys and unlikelys are not being traced.
184
185config BRANCH_TRACER
186 bool "Trace likely/unlikely instances"
187 depends on TRACE_BRANCH_PROFILING
188 select TRACING_BRANCHES
189 help
190 This traces the events of likely and unlikely condition
191 calls in the kernel. The difference between this and the
192 "Trace likely/unlikely profiler" is that this is not a
193 histogram of the callers, but actually places the calling
194 events into a running trace buffer to see when and where the
195 events happened, as well as their results.
196
197 Say N if unsure.
198
141config STACK_TRACER 199config STACK_TRACER
142 bool "Trace max stack" 200 bool "Trace max stack"
143 depends on HAVE_FUNCTION_TRACER 201 depends on HAVE_FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c8228b1a49e9..1a8c9259dc69 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -10,6 +10,11 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13# If unlikely tracing is enabled, do not trace these files
14ifdef CONFIG_TRACING_BRANCHES
15KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
16endif
17
13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o 18obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 19obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 20
@@ -24,5 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o 29obj-$(CONFIG_STACK_TRACER) += trace_stack.o
25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 30obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 31obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
32obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
33obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
27 34
28libftrace-y := ftrace.o 35libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 78db083390f0..f212da486689 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -47,6 +47,12 @@
47int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
48static int last_ftrace_enabled; 48static int last_ftrace_enabled;
49 49
50/* Quick disabling of function tracer. */
51int function_trace_stop;
52
53/* By default, current tracing type is normal tracing. */
54enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER;
55
50/* 56/*
51 * ftrace_disabled is set when an anomaly is discovered. 57 * ftrace_disabled is set when an anomaly is discovered.
52 * ftrace_disabled is much stronger than ftrace_enabled. 58 * ftrace_disabled is much stronger than ftrace_enabled.
@@ -63,6 +69,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly =
63 69
64static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; 70static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
65ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 71ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
72ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
66 73
67static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 74static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
68{ 75{
@@ -88,8 +95,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
88void clear_ftrace_function(void) 95void clear_ftrace_function(void)
89{ 96{
90 ftrace_trace_function = ftrace_stub; 97 ftrace_trace_function = ftrace_stub;
98 __ftrace_trace_function = ftrace_stub;
91} 99}
92 100
101#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
102/*
103 * For those archs that do not test ftrace_trace_stop in their
104 * mcount call site, we need to do it from C.
105 */
106static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
107{
108 if (function_trace_stop)
109 return;
110
111 __ftrace_trace_function(ip, parent_ip);
112}
113#endif
114
93static int __register_ftrace_function(struct ftrace_ops *ops) 115static int __register_ftrace_function(struct ftrace_ops *ops)
94{ 116{
95 /* should not be called from interrupt context */ 117 /* should not be called from interrupt context */
@@ -110,10 +132,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
110 * For one func, simply call it directly. 132 * For one func, simply call it directly.
111 * For more than one func, call the chain. 133 * For more than one func, call the chain.
112 */ 134 */
135#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
113 if (ops->next == &ftrace_list_end) 136 if (ops->next == &ftrace_list_end)
114 ftrace_trace_function = ops->func; 137 ftrace_trace_function = ops->func;
115 else 138 else
116 ftrace_trace_function = ftrace_list_func; 139 ftrace_trace_function = ftrace_list_func;
140#else
141 if (ops->next == &ftrace_list_end)
142 __ftrace_trace_function = ops->func;
143 else
144 __ftrace_trace_function = ftrace_list_func;
145 ftrace_trace_function = ftrace_test_stop_func;
146#endif
117 } 147 }
118 148
119 spin_unlock(&ftrace_lock); 149 spin_unlock(&ftrace_lock);
@@ -152,8 +182,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
152 182
153 if (ftrace_enabled) { 183 if (ftrace_enabled) {
154 /* If we only have one func left, then call that directly */ 184 /* If we only have one func left, then call that directly */
155 if (ftrace_list == &ftrace_list_end || 185 if (ftrace_list->next == &ftrace_list_end)
156 ftrace_list->next == &ftrace_list_end)
157 ftrace_trace_function = ftrace_list->func; 186 ftrace_trace_function = ftrace_list->func;
158 } 187 }
159 188
@@ -308,7 +337,7 @@ ftrace_record_ip(unsigned long ip)
308{ 337{
309 struct dyn_ftrace *rec; 338 struct dyn_ftrace *rec;
310 339
311 if (!ftrace_enabled || ftrace_disabled) 340 if (ftrace_disabled)
312 return NULL; 341 return NULL;
313 342
314 rec = ftrace_alloc_dyn_node(ip); 343 rec = ftrace_alloc_dyn_node(ip);
@@ -322,14 +351,58 @@ ftrace_record_ip(unsigned long ip)
322 return rec; 351 return rec;
323} 352}
324 353
325#define FTRACE_ADDR ((long)(ftrace_caller)) 354static void print_ip_ins(const char *fmt, unsigned char *p)
355{
356 int i;
357
358 printk(KERN_CONT "%s", fmt);
359
360 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
361 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
362}
363
364static void ftrace_bug(int failed, unsigned long ip)
365{
366 switch (failed) {
367 case -EFAULT:
368 FTRACE_WARN_ON_ONCE(1);
369 pr_info("ftrace faulted on modifying ");
370 print_ip_sym(ip);
371 break;
372 case -EINVAL:
373 FTRACE_WARN_ON_ONCE(1);
374 pr_info("ftrace failed to modify ");
375 print_ip_sym(ip);
376 print_ip_ins(" actual: ", (unsigned char *)ip);
377 printk(KERN_CONT "\n");
378 break;
379 case -EPERM:
380 FTRACE_WARN_ON_ONCE(1);
381 pr_info("ftrace faulted on writing ");
382 print_ip_sym(ip);
383 break;
384 default:
385 FTRACE_WARN_ON_ONCE(1);
386 pr_info("ftrace faulted on unknown error ");
387 print_ip_sym(ip);
388 }
389}
390
326 391
327static int 392static int
328__ftrace_replace_code(struct dyn_ftrace *rec, 393__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
329 unsigned char *nop, int enable)
330{ 394{
331 unsigned long ip, fl; 395 unsigned long ip, fl;
332 unsigned char *call, *old, *new; 396 unsigned long ftrace_addr;
397
398#ifdef CONFIG_FUNCTION_RET_TRACER
399 if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
400 ftrace_addr = (unsigned long)ftrace_caller;
401 else
402 ftrace_addr = (unsigned long)ftrace_return_caller;
403#else
404 ftrace_addr = (unsigned long)ftrace_caller;
405#endif
333 406
334 ip = rec->ip; 407 ip = rec->ip;
335 408
@@ -388,34 +461,28 @@ __ftrace_replace_code(struct dyn_ftrace *rec,
388 } 461 }
389 } 462 }
390 463
391 call = ftrace_call_replace(ip, FTRACE_ADDR); 464 if (rec->flags & FTRACE_FL_ENABLED)
392 465 return ftrace_make_call(rec, ftrace_addr);
393 if (rec->flags & FTRACE_FL_ENABLED) { 466 else
394 old = nop; 467 return ftrace_make_nop(NULL, rec, ftrace_addr);
395 new = call;
396 } else {
397 old = call;
398 new = nop;
399 }
400
401 return ftrace_modify_code(ip, old, new);
402} 468}
403 469
404static void ftrace_replace_code(int enable) 470static void ftrace_replace_code(int enable)
405{ 471{
406 int i, failed; 472 int i, failed;
407 unsigned char *nop = NULL;
408 struct dyn_ftrace *rec; 473 struct dyn_ftrace *rec;
409 struct ftrace_page *pg; 474 struct ftrace_page *pg;
410 475
411 nop = ftrace_nop_replace();
412
413 for (pg = ftrace_pages_start; pg; pg = pg->next) { 476 for (pg = ftrace_pages_start; pg; pg = pg->next) {
414 for (i = 0; i < pg->index; i++) { 477 for (i = 0; i < pg->index; i++) {
415 rec = &pg->records[i]; 478 rec = &pg->records[i];
416 479
417 /* don't modify code that has already faulted */ 480 /*
418 if (rec->flags & FTRACE_FL_FAILED) 481 * Skip over free records and records that have
482 * failed.
483 */
484 if (rec->flags & FTRACE_FL_FREE ||
485 rec->flags & FTRACE_FL_FAILED)
419 continue; 486 continue;
420 487
421 /* ignore updates to this record's mcount site */ 488 /* ignore updates to this record's mcount site */
@@ -426,68 +493,30 @@ static void ftrace_replace_code(int enable)
426 unfreeze_record(rec); 493 unfreeze_record(rec);
427 } 494 }
428 495
429 failed = __ftrace_replace_code(rec, nop, enable); 496 failed = __ftrace_replace_code(rec, enable);
430 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { 497 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
431 rec->flags |= FTRACE_FL_FAILED; 498 rec->flags |= FTRACE_FL_FAILED;
432 if ((system_state == SYSTEM_BOOTING) || 499 if ((system_state == SYSTEM_BOOTING) ||
433 !core_kernel_text(rec->ip)) { 500 !core_kernel_text(rec->ip)) {
434 ftrace_free_rec(rec); 501 ftrace_free_rec(rec);
435 } 502 } else
503 ftrace_bug(failed, rec->ip);
436 } 504 }
437 } 505 }
438 } 506 }
439} 507}
440 508
441static void print_ip_ins(const char *fmt, unsigned char *p)
442{
443 int i;
444
445 printk(KERN_CONT "%s", fmt);
446
447 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
448 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
449}
450
451static int 509static int
452ftrace_code_disable(struct dyn_ftrace *rec) 510ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
453{ 511{
454 unsigned long ip; 512 unsigned long ip;
455 unsigned char *nop, *call;
456 int ret; 513 int ret;
457 514
458 ip = rec->ip; 515 ip = rec->ip;
459 516
460 nop = ftrace_nop_replace(); 517 ret = ftrace_make_nop(mod, rec, mcount_addr);
461 call = ftrace_call_replace(ip, mcount_addr);
462
463 ret = ftrace_modify_code(ip, call, nop);
464 if (ret) { 518 if (ret) {
465 switch (ret) { 519 ftrace_bug(ret, ip);
466 case -EFAULT:
467 FTRACE_WARN_ON_ONCE(1);
468 pr_info("ftrace faulted on modifying ");
469 print_ip_sym(ip);
470 break;
471 case -EINVAL:
472 FTRACE_WARN_ON_ONCE(1);
473 pr_info("ftrace failed to modify ");
474 print_ip_sym(ip);
475 print_ip_ins(" expected: ", call);
476 print_ip_ins(" actual: ", (unsigned char *)ip);
477 print_ip_ins(" replace: ", nop);
478 printk(KERN_CONT "\n");
479 break;
480 case -EPERM:
481 FTRACE_WARN_ON_ONCE(1);
482 pr_info("ftrace faulted on writing ");
483 print_ip_sym(ip);
484 break;
485 default:
486 FTRACE_WARN_ON_ONCE(1);
487 pr_info("ftrace faulted on unknown error ");
488 print_ip_sym(ip);
489 }
490
491 rec->flags |= FTRACE_FL_FAILED; 520 rec->flags |= FTRACE_FL_FAILED;
492 return 0; 521 return 0;
493 } 522 }
@@ -515,7 +544,7 @@ static void ftrace_run_update_code(int command)
515} 544}
516 545
517static ftrace_func_t saved_ftrace_func; 546static ftrace_func_t saved_ftrace_func;
518static int ftrace_start; 547static int ftrace_start_up;
519static DEFINE_MUTEX(ftrace_start_lock); 548static DEFINE_MUTEX(ftrace_start_lock);
520 549
521static void ftrace_startup(void) 550static void ftrace_startup(void)
@@ -526,7 +555,7 @@ static void ftrace_startup(void)
526 return; 555 return;
527 556
528 mutex_lock(&ftrace_start_lock); 557 mutex_lock(&ftrace_start_lock);
529 ftrace_start++; 558 ftrace_start_up++;
530 command |= FTRACE_ENABLE_CALLS; 559 command |= FTRACE_ENABLE_CALLS;
531 560
532 if (saved_ftrace_func != ftrace_trace_function) { 561 if (saved_ftrace_func != ftrace_trace_function) {
@@ -550,8 +579,8 @@ static void ftrace_shutdown(void)
550 return; 579 return;
551 580
552 mutex_lock(&ftrace_start_lock); 581 mutex_lock(&ftrace_start_lock);
553 ftrace_start--; 582 ftrace_start_up--;
554 if (!ftrace_start) 583 if (!ftrace_start_up)
555 command |= FTRACE_DISABLE_CALLS; 584 command |= FTRACE_DISABLE_CALLS;
556 585
557 if (saved_ftrace_func != ftrace_trace_function) { 586 if (saved_ftrace_func != ftrace_trace_function) {
@@ -577,8 +606,8 @@ static void ftrace_startup_sysctl(void)
577 mutex_lock(&ftrace_start_lock); 606 mutex_lock(&ftrace_start_lock);
578 /* Force update next time */ 607 /* Force update next time */
579 saved_ftrace_func = NULL; 608 saved_ftrace_func = NULL;
580 /* ftrace_start is true if we want ftrace running */ 609 /* ftrace_start_up is true if we want ftrace running */
581 if (ftrace_start) 610 if (ftrace_start_up)
582 command |= FTRACE_ENABLE_CALLS; 611 command |= FTRACE_ENABLE_CALLS;
583 612
584 ftrace_run_update_code(command); 613 ftrace_run_update_code(command);
@@ -593,8 +622,8 @@ static void ftrace_shutdown_sysctl(void)
593 return; 622 return;
594 623
595 mutex_lock(&ftrace_start_lock); 624 mutex_lock(&ftrace_start_lock);
596 /* ftrace_start is true if ftrace is running */ 625 /* ftrace_start_up is true if ftrace is running */
597 if (ftrace_start) 626 if (ftrace_start_up)
598 command |= FTRACE_DISABLE_CALLS; 627 command |= FTRACE_DISABLE_CALLS;
599 628
600 ftrace_run_update_code(command); 629 ftrace_run_update_code(command);
@@ -605,7 +634,7 @@ static cycle_t ftrace_update_time;
605static unsigned long ftrace_update_cnt; 634static unsigned long ftrace_update_cnt;
606unsigned long ftrace_update_tot_cnt; 635unsigned long ftrace_update_tot_cnt;
607 636
608static int ftrace_update_code(void) 637static int ftrace_update_code(struct module *mod)
609{ 638{
610 struct dyn_ftrace *p, *t; 639 struct dyn_ftrace *p, *t;
611 cycle_t start, stop; 640 cycle_t start, stop;
@@ -622,7 +651,7 @@ static int ftrace_update_code(void)
622 list_del_init(&p->list); 651 list_del_init(&p->list);
623 652
624 /* convert record (i.e, patch mcount-call with NOP) */ 653 /* convert record (i.e, patch mcount-call with NOP) */
625 if (ftrace_code_disable(p)) { 654 if (ftrace_code_disable(mod, p)) {
626 p->flags |= FTRACE_FL_CONVERTED; 655 p->flags |= FTRACE_FL_CONVERTED;
627 ftrace_update_cnt++; 656 ftrace_update_cnt++;
628 } else 657 } else
@@ -1181,7 +1210,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1181 1210
1182 mutex_lock(&ftrace_sysctl_lock); 1211 mutex_lock(&ftrace_sysctl_lock);
1183 mutex_lock(&ftrace_start_lock); 1212 mutex_lock(&ftrace_start_lock);
1184 if (ftrace_start && ftrace_enabled) 1213 if (ftrace_start_up && ftrace_enabled)
1185 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1214 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1186 mutex_unlock(&ftrace_start_lock); 1215 mutex_unlock(&ftrace_start_lock);
1187 mutex_unlock(&ftrace_sysctl_lock); 1216 mutex_unlock(&ftrace_sysctl_lock);
@@ -1268,7 +1297,8 @@ static __init int ftrace_init_debugfs(void)
1268 1297
1269fs_initcall(ftrace_init_debugfs); 1298fs_initcall(ftrace_init_debugfs);
1270 1299
1271static int ftrace_convert_nops(unsigned long *start, 1300static int ftrace_convert_nops(struct module *mod,
1301 unsigned long *start,
1272 unsigned long *end) 1302 unsigned long *end)
1273{ 1303{
1274 unsigned long *p; 1304 unsigned long *p;
@@ -1279,23 +1309,32 @@ static int ftrace_convert_nops(unsigned long *start,
1279 p = start; 1309 p = start;
1280 while (p < end) { 1310 while (p < end) {
1281 addr = ftrace_call_adjust(*p++); 1311 addr = ftrace_call_adjust(*p++);
1312 /*
1313 * Some architecture linkers will pad between
1314 * the different mcount_loc sections of different
1315 * object files to satisfy alignments.
1316 * Skip any NULL pointers.
1317 */
1318 if (!addr)
1319 continue;
1282 ftrace_record_ip(addr); 1320 ftrace_record_ip(addr);
1283 } 1321 }
1284 1322
1285 /* disable interrupts to prevent kstop machine */ 1323 /* disable interrupts to prevent kstop machine */
1286 local_irq_save(flags); 1324 local_irq_save(flags);
1287 ftrace_update_code(); 1325 ftrace_update_code(mod);
1288 local_irq_restore(flags); 1326 local_irq_restore(flags);
1289 mutex_unlock(&ftrace_start_lock); 1327 mutex_unlock(&ftrace_start_lock);
1290 1328
1291 return 0; 1329 return 0;
1292} 1330}
1293 1331
1294void ftrace_init_module(unsigned long *start, unsigned long *end) 1332void ftrace_init_module(struct module *mod,
1333 unsigned long *start, unsigned long *end)
1295{ 1334{
1296 if (ftrace_disabled || start == end) 1335 if (ftrace_disabled || start == end)
1297 return; 1336 return;
1298 ftrace_convert_nops(start, end); 1337 ftrace_convert_nops(mod, start, end);
1299} 1338}
1300 1339
1301extern unsigned long __start_mcount_loc[]; 1340extern unsigned long __start_mcount_loc[];
@@ -1325,7 +1364,8 @@ void __init ftrace_init(void)
1325 1364
1326 last_ftrace_enabled = ftrace_enabled = 1; 1365 last_ftrace_enabled = ftrace_enabled = 1;
1327 1366
1328 ret = ftrace_convert_nops(__start_mcount_loc, 1367 ret = ftrace_convert_nops(NULL,
1368 __start_mcount_loc,
1329 __stop_mcount_loc); 1369 __stop_mcount_loc);
1330 1370
1331 return; 1371 return;
@@ -1381,10 +1421,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
1381 return -1; 1421 return -1;
1382 1422
1383 mutex_lock(&ftrace_sysctl_lock); 1423 mutex_lock(&ftrace_sysctl_lock);
1424
1425 if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
1426 ret = -EBUSY;
1427 goto out;
1428 }
1429
1384 ret = __register_ftrace_function(ops); 1430 ret = __register_ftrace_function(ops);
1385 ftrace_startup(); 1431 ftrace_startup();
1386 mutex_unlock(&ftrace_sysctl_lock);
1387 1432
1433out:
1434 mutex_unlock(&ftrace_sysctl_lock);
1388 return ret; 1435 return ret;
1389} 1436}
1390 1437
@@ -1449,3 +1496,48 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1449 return ret; 1496 return ret;
1450} 1497}
1451 1498
1499#ifdef CONFIG_FUNCTION_RET_TRACER
1500
1501/* The callback that hooks the return of a function */
1502trace_function_return_t ftrace_function_return =
1503 (trace_function_return_t)ftrace_stub;
1504
1505int register_ftrace_return(trace_function_return_t func)
1506{
1507 int ret = 0;
1508
1509 mutex_lock(&ftrace_sysctl_lock);
1510
1511 /*
1512 * Don't launch return tracing if normal function
1513 * tracing is already running.
1514 */
1515 if (ftrace_trace_function != ftrace_stub) {
1516 ret = -EBUSY;
1517 goto out;
1518 }
1519
1520 ftrace_tracing_type = FTRACE_TYPE_RETURN;
1521 ftrace_function_return = func;
1522 ftrace_startup();
1523
1524out:
1525 mutex_unlock(&ftrace_sysctl_lock);
1526 return ret;
1527}
1528
1529void unregister_ftrace_return(void)
1530{
1531 mutex_lock(&ftrace_sysctl_lock);
1532
1533 ftrace_function_return = (trace_function_return_t)ftrace_stub;
1534 ftrace_shutdown();
1535 /* Restore normal tracing type */
1536 ftrace_tracing_type = FTRACE_TYPE_ENTER;
1537
1538 mutex_unlock(&ftrace_sysctl_lock);
1539}
1540#endif
1541
1542
1543
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f780e9552f91..85ced143c2c4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -45,6 +45,8 @@ void tracing_off(void)
45 ring_buffers_off = 1; 45 ring_buffers_off = 1;
46} 46}
47 47
48#include "trace.h"
49
48/* Up this if you want to test the TIME_EXTENTS and normalization */ 50/* Up this if you want to test the TIME_EXTENTS and normalization */
49#define DEBUG_SHIFT 0 51#define DEBUG_SHIFT 0
50 52
@@ -187,7 +189,8 @@ static inline int test_time_stamp(u64 delta)
187struct ring_buffer_per_cpu { 189struct ring_buffer_per_cpu {
188 int cpu; 190 int cpu;
189 struct ring_buffer *buffer; 191 struct ring_buffer *buffer;
190 spinlock_t lock; 192 spinlock_t reader_lock; /* serialize readers */
193 raw_spinlock_t lock;
191 struct lock_class_key lock_key; 194 struct lock_class_key lock_key;
192 struct list_head pages; 195 struct list_head pages;
193 struct buffer_page *head_page; /* read from head */ 196 struct buffer_page *head_page; /* read from head */
@@ -221,32 +224,16 @@ struct ring_buffer_iter {
221 u64 read_stamp; 224 u64 read_stamp;
222}; 225};
223 226
227/* buffer may be either ring_buffer or ring_buffer_per_cpu */
224#define RB_WARN_ON(buffer, cond) \ 228#define RB_WARN_ON(buffer, cond) \
225 do { \ 229 ({ \
226 if (unlikely(cond)) { \ 230 int _____ret = unlikely(cond); \
227 atomic_inc(&buffer->record_disabled); \ 231 if (_____ret) { \
228 WARN_ON(1); \
229 } \
230 } while (0)
231
232#define RB_WARN_ON_RET(buffer, cond) \
233 do { \
234 if (unlikely(cond)) { \
235 atomic_inc(&buffer->record_disabled); \
236 WARN_ON(1); \
237 return -1; \
238 } \
239 } while (0)
240
241#define RB_WARN_ON_ONCE(buffer, cond) \
242 do { \
243 static int once; \
244 if (unlikely(cond) && !once) { \
245 once++; \
246 atomic_inc(&buffer->record_disabled); \ 232 atomic_inc(&buffer->record_disabled); \
247 WARN_ON(1); \ 233 WARN_ON(1); \
248 } \ 234 } \
249 } while (0) 235 _____ret; \
236 })
250 237
251/** 238/**
252 * check_pages - integrity check of buffer pages 239 * check_pages - integrity check of buffer pages
@@ -260,14 +247,18 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
260 struct list_head *head = &cpu_buffer->pages; 247 struct list_head *head = &cpu_buffer->pages;
261 struct buffer_page *page, *tmp; 248 struct buffer_page *page, *tmp;
262 249
263 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head); 250 if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
264 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head); 251 return -1;
252 if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
253 return -1;
265 254
266 list_for_each_entry_safe(page, tmp, head, list) { 255 list_for_each_entry_safe(page, tmp, head, list) {
267 RB_WARN_ON_RET(cpu_buffer, 256 if (RB_WARN_ON(cpu_buffer,
268 page->list.next->prev != &page->list); 257 page->list.next->prev != &page->list))
269 RB_WARN_ON_RET(cpu_buffer, 258 return -1;
270 page->list.prev->next != &page->list); 259 if (RB_WARN_ON(cpu_buffer,
260 page->list.prev->next != &page->list))
261 return -1;
271 } 262 }
272 263
273 return 0; 264 return 0;
@@ -324,7 +315,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
324 315
325 cpu_buffer->cpu = cpu; 316 cpu_buffer->cpu = cpu;
326 cpu_buffer->buffer = buffer; 317 cpu_buffer->buffer = buffer;
327 spin_lock_init(&cpu_buffer->lock); 318 spin_lock_init(&cpu_buffer->reader_lock);
319 cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
328 INIT_LIST_HEAD(&cpu_buffer->pages); 320 INIT_LIST_HEAD(&cpu_buffer->pages);
329 321
330 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()), 322 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
@@ -473,13 +465,15 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
473 synchronize_sched(); 465 synchronize_sched();
474 466
475 for (i = 0; i < nr_pages; i++) { 467 for (i = 0; i < nr_pages; i++) {
476 BUG_ON(list_empty(&cpu_buffer->pages)); 468 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
469 return;
477 p = cpu_buffer->pages.next; 470 p = cpu_buffer->pages.next;
478 page = list_entry(p, struct buffer_page, list); 471 page = list_entry(p, struct buffer_page, list);
479 list_del_init(&page->list); 472 list_del_init(&page->list);
480 free_buffer_page(page); 473 free_buffer_page(page);
481 } 474 }
482 BUG_ON(list_empty(&cpu_buffer->pages)); 475 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
476 return;
483 477
484 rb_reset_cpu(cpu_buffer); 478 rb_reset_cpu(cpu_buffer);
485 479
@@ -501,7 +495,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
501 synchronize_sched(); 495 synchronize_sched();
502 496
503 for (i = 0; i < nr_pages; i++) { 497 for (i = 0; i < nr_pages; i++) {
504 BUG_ON(list_empty(pages)); 498 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
499 return;
505 p = pages->next; 500 p = pages->next;
506 page = list_entry(p, struct buffer_page, list); 501 page = list_entry(p, struct buffer_page, list);
507 list_del_init(&page->list); 502 list_del_init(&page->list);
@@ -562,7 +557,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
562 if (size < buffer_size) { 557 if (size < buffer_size) {
563 558
564 /* easy case, just free pages */ 559 /* easy case, just free pages */
565 BUG_ON(nr_pages >= buffer->pages); 560 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
561 mutex_unlock(&buffer->mutex);
562 return -1;
563 }
566 564
567 rm_pages = buffer->pages - nr_pages; 565 rm_pages = buffer->pages - nr_pages;
568 566
@@ -581,7 +579,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
581 * add these pages to the cpu_buffers. Otherwise we just free 579 * add these pages to the cpu_buffers. Otherwise we just free
582 * them all and return -ENOMEM; 580 * them all and return -ENOMEM;
583 */ 581 */
584 BUG_ON(nr_pages <= buffer->pages); 582 if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
583 mutex_unlock(&buffer->mutex);
584 return -1;
585 }
586
585 new_pages = nr_pages - buffer->pages; 587 new_pages = nr_pages - buffer->pages;
586 588
587 for_each_buffer_cpu(buffer, cpu) { 589 for_each_buffer_cpu(buffer, cpu) {
@@ -604,7 +606,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
604 rb_insert_pages(cpu_buffer, &pages, new_pages); 606 rb_insert_pages(cpu_buffer, &pages, new_pages);
605 } 607 }
606 608
607 BUG_ON(!list_empty(&pages)); 609 if (RB_WARN_ON(buffer, !list_empty(&pages))) {
610 mutex_unlock(&buffer->mutex);
611 return -1;
612 }
608 613
609 out: 614 out:
610 buffer->pages = nr_pages; 615 buffer->pages = nr_pages;
@@ -693,7 +698,8 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 head += rb_event_length(event)) { 698 head += rb_event_length(event)) {
694 699
695 event = __rb_page_index(cpu_buffer->head_page, head); 700 event = __rb_page_index(cpu_buffer->head_page, head);
696 BUG_ON(rb_null_event(event)); 701 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
702 return;
697 /* Only count data entries */ 703 /* Only count data entries */
698 if (event->type != RINGBUF_TYPE_DATA) 704 if (event->type != RINGBUF_TYPE_DATA)
699 continue; 705 continue;
@@ -746,8 +752,9 @@ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746 addr &= PAGE_MASK; 752 addr &= PAGE_MASK;
747 753
748 while (cpu_buffer->commit_page->page != (void *)addr) { 754 while (cpu_buffer->commit_page->page != (void *)addr) {
749 RB_WARN_ON(cpu_buffer, 755 if (RB_WARN_ON(cpu_buffer,
750 cpu_buffer->commit_page == cpu_buffer->tail_page); 756 cpu_buffer->commit_page == cpu_buffer->tail_page))
757 return;
751 cpu_buffer->commit_page->commit = 758 cpu_buffer->commit_page->commit =
752 cpu_buffer->commit_page->write; 759 cpu_buffer->commit_page->write;
753 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page); 760 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
@@ -894,7 +901,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
894 if (write > BUF_PAGE_SIZE) { 901 if (write > BUF_PAGE_SIZE) {
895 struct buffer_page *next_page = tail_page; 902 struct buffer_page *next_page = tail_page;
896 903
897 spin_lock_irqsave(&cpu_buffer->lock, flags); 904 local_irq_save(flags);
905 __raw_spin_lock(&cpu_buffer->lock);
898 906
899 rb_inc_page(cpu_buffer, &next_page); 907 rb_inc_page(cpu_buffer, &next_page);
900 908
@@ -902,7 +910,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
902 reader_page = cpu_buffer->reader_page; 910 reader_page = cpu_buffer->reader_page;
903 911
904 /* we grabbed the lock before incrementing */ 912 /* we grabbed the lock before incrementing */
905 RB_WARN_ON(cpu_buffer, next_page == reader_page); 913 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
914 goto out_unlock;
906 915
907 /* 916 /*
908 * If for some reason, we had an interrupt storm that made 917 * If for some reason, we had an interrupt storm that made
@@ -970,7 +979,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
970 rb_set_commit_to_write(cpu_buffer); 979 rb_set_commit_to_write(cpu_buffer);
971 } 980 }
972 981
973 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 982 __raw_spin_unlock(&cpu_buffer->lock);
983 local_irq_restore(flags);
974 984
975 /* fail and let the caller try again */ 985 /* fail and let the caller try again */
976 return ERR_PTR(-EAGAIN); 986 return ERR_PTR(-EAGAIN);
@@ -978,7 +988,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
978 988
979 /* We reserved something on the buffer */ 989 /* We reserved something on the buffer */
980 990
981 BUG_ON(write > BUF_PAGE_SIZE); 991 if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
992 return NULL;
982 993
983 event = __rb_page_index(tail_page, tail); 994 event = __rb_page_index(tail_page, tail);
984 rb_update_event(event, type, length); 995 rb_update_event(event, type, length);
@@ -993,7 +1004,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
993 return event; 1004 return event;
994 1005
995 out_unlock: 1006 out_unlock:
996 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1007 __raw_spin_unlock(&cpu_buffer->lock);
1008 local_irq_restore(flags);
997 return NULL; 1009 return NULL;
998} 1010}
999 1011
@@ -1076,10 +1088,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1076 * storm or we have something buggy. 1088 * storm or we have something buggy.
1077 * Bail! 1089 * Bail!
1078 */ 1090 */
1079 if (unlikely(++nr_loops > 1000)) { 1091 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1080 RB_WARN_ON(cpu_buffer, 1);
1081 return NULL; 1092 return NULL;
1082 }
1083 1093
1084 ts = ring_buffer_time_stamp(cpu_buffer->cpu); 1094 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1085 1095
@@ -1182,8 +1192,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1182 return NULL; 1192 return NULL;
1183 1193
1184 /* If we are tracing schedule, we don't want to recurse */ 1194 /* If we are tracing schedule, we don't want to recurse */
1185 resched = need_resched(); 1195 resched = ftrace_preempt_disable();
1186 preempt_disable_notrace();
1187 1196
1188 cpu = raw_smp_processor_id(); 1197 cpu = raw_smp_processor_id();
1189 1198
@@ -1214,10 +1223,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
1214 return event; 1223 return event;
1215 1224
1216 out: 1225 out:
1217 if (resched) 1226 ftrace_preempt_enable(resched);
1218 preempt_enable_notrace();
1219 else
1220 preempt_enable_notrace();
1221 return NULL; 1227 return NULL;
1222} 1228}
1223 1229
@@ -1259,12 +1265,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1259 /* 1265 /*
1260 * Only the last preempt count needs to restore preemption. 1266 * Only the last preempt count needs to restore preemption.
1261 */ 1267 */
1262 if (preempt_count() == 1) { 1268 if (preempt_count() == 1)
1263 if (per_cpu(rb_need_resched, cpu)) 1269 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1264 preempt_enable_no_resched_notrace(); 1270 else
1265 else
1266 preempt_enable_notrace();
1267 } else
1268 preempt_enable_no_resched_notrace(); 1271 preempt_enable_no_resched_notrace();
1269 1272
1270 return 0; 1273 return 0;
@@ -1300,8 +1303,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1300 if (atomic_read(&buffer->record_disabled)) 1303 if (atomic_read(&buffer->record_disabled))
1301 return -EBUSY; 1304 return -EBUSY;
1302 1305
1303 resched = need_resched(); 1306 resched = ftrace_preempt_disable();
1304 preempt_disable_notrace();
1305 1307
1306 cpu = raw_smp_processor_id(); 1308 cpu = raw_smp_processor_id();
1307 1309
@@ -1327,10 +1329,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
1327 1329
1328 ret = 0; 1330 ret = 0;
1329 out: 1331 out:
1330 if (resched) 1332 ftrace_preempt_enable(resched);
1331 preempt_enable_no_resched_notrace();
1332 else
1333 preempt_enable_notrace();
1334 1333
1335 return ret; 1334 return ret;
1336} 1335}
@@ -1489,14 +1488,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1489 return overruns; 1488 return overruns;
1490} 1489}
1491 1490
1492/** 1491static void rb_iter_reset(struct ring_buffer_iter *iter)
1493 * ring_buffer_iter_reset - reset an iterator
1494 * @iter: The iterator to reset
1495 *
1496 * Resets the iterator, so that it will start from the beginning
1497 * again.
1498 */
1499void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1500{ 1492{
1501 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1493 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1502 1494
@@ -1515,6 +1507,23 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1515} 1507}
1516 1508
1517/** 1509/**
1510 * ring_buffer_iter_reset - reset an iterator
1511 * @iter: The iterator to reset
1512 *
1513 * Resets the iterator, so that it will start from the beginning
1514 * again.
1515 */
1516void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1517{
1518 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1519 unsigned long flags;
1520
1521 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1522 rb_iter_reset(iter);
1523 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1524}
1525
1526/**
1518 * ring_buffer_iter_empty - check if an iterator has no more to read 1527 * ring_buffer_iter_empty - check if an iterator has no more to read
1519 * @iter: The iterator to check 1528 * @iter: The iterator to check
1520 */ 1529 */
@@ -1597,7 +1606,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1597 unsigned long flags; 1606 unsigned long flags;
1598 int nr_loops = 0; 1607 int nr_loops = 0;
1599 1608
1600 spin_lock_irqsave(&cpu_buffer->lock, flags); 1609 local_irq_save(flags);
1610 __raw_spin_lock(&cpu_buffer->lock);
1601 1611
1602 again: 1612 again:
1603 /* 1613 /*
@@ -1606,8 +1616,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1606 * a case where we will loop three times. There should be no 1616 * a case where we will loop three times. There should be no
1607 * reason to loop four times (that I know of). 1617 * reason to loop four times (that I know of).
1608 */ 1618 */
1609 if (unlikely(++nr_loops > 3)) { 1619 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1610 RB_WARN_ON(cpu_buffer, 1);
1611 reader = NULL; 1620 reader = NULL;
1612 goto out; 1621 goto out;
1613 } 1622 }
@@ -1619,8 +1628,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1619 goto out; 1628 goto out;
1620 1629
1621 /* Never should we have an index greater than the size */ 1630 /* Never should we have an index greater than the size */
1622 RB_WARN_ON(cpu_buffer, 1631 if (RB_WARN_ON(cpu_buffer,
1623 cpu_buffer->reader_page->read > rb_page_size(reader)); 1632 cpu_buffer->reader_page->read > rb_page_size(reader)))
1633 goto out;
1624 1634
1625 /* check if we caught up to the tail */ 1635 /* check if we caught up to the tail */
1626 reader = NULL; 1636 reader = NULL;
@@ -1659,7 +1669,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1659 goto again; 1669 goto again;
1660 1670
1661 out: 1671 out:
1662 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1672 __raw_spin_unlock(&cpu_buffer->lock);
1673 local_irq_restore(flags);
1663 1674
1664 return reader; 1675 return reader;
1665} 1676}
@@ -1673,7 +1684,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1673 reader = rb_get_reader_page(cpu_buffer); 1684 reader = rb_get_reader_page(cpu_buffer);
1674 1685
1675 /* This function should not be called when buffer is empty */ 1686 /* This function should not be called when buffer is empty */
1676 BUG_ON(!reader); 1687 if (RB_WARN_ON(cpu_buffer, !reader))
1688 return;
1677 1689
1678 event = rb_reader_event(cpu_buffer); 1690 event = rb_reader_event(cpu_buffer);
1679 1691
@@ -1700,7 +1712,9 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1700 * Check if we are at the end of the buffer. 1712 * Check if we are at the end of the buffer.
1701 */ 1713 */
1702 if (iter->head >= rb_page_size(iter->head_page)) { 1714 if (iter->head >= rb_page_size(iter->head_page)) {
1703 BUG_ON(iter->head_page == cpu_buffer->commit_page); 1715 if (RB_WARN_ON(buffer,
1716 iter->head_page == cpu_buffer->commit_page))
1717 return;
1704 rb_inc_iter(iter); 1718 rb_inc_iter(iter);
1705 return; 1719 return;
1706 } 1720 }
@@ -1713,8 +1727,10 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1713 * This should not be called to advance the header if we are 1727 * This should not be called to advance the header if we are
1714 * at the tail of the buffer. 1728 * at the tail of the buffer.
1715 */ 1729 */
1716 BUG_ON((iter->head_page == cpu_buffer->commit_page) && 1730 if (RB_WARN_ON(cpu_buffer,
1717 (iter->head + length > rb_commit_index(cpu_buffer))); 1731 (iter->head_page == cpu_buffer->commit_page) &&
1732 (iter->head + length > rb_commit_index(cpu_buffer))))
1733 return;
1718 1734
1719 rb_update_iter_read_stamp(iter, event); 1735 rb_update_iter_read_stamp(iter, event);
1720 1736
@@ -1726,17 +1742,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
1726 rb_advance_iter(iter); 1742 rb_advance_iter(iter);
1727} 1743}
1728 1744
1729/** 1745static struct ring_buffer_event *
1730 * ring_buffer_peek - peek at the next event to be read 1746rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1731 * @buffer: The ring buffer to read
1732 * @cpu: The cpu to peak at
1733 * @ts: The timestamp counter of this event.
1734 *
1735 * This will return the event that will be read next, but does
1736 * not consume the data.
1737 */
1738struct ring_buffer_event *
1739ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1740{ 1747{
1741 struct ring_buffer_per_cpu *cpu_buffer; 1748 struct ring_buffer_per_cpu *cpu_buffer;
1742 struct ring_buffer_event *event; 1749 struct ring_buffer_event *event;
@@ -1757,10 +1764,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1757 * can have. Nesting 10 deep of interrupts is clearly 1764 * can have. Nesting 10 deep of interrupts is clearly
1758 * an anomaly. 1765 * an anomaly.
1759 */ 1766 */
1760 if (unlikely(++nr_loops > 10)) { 1767 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1761 RB_WARN_ON(cpu_buffer, 1);
1762 return NULL; 1768 return NULL;
1763 }
1764 1769
1765 reader = rb_get_reader_page(cpu_buffer); 1770 reader = rb_get_reader_page(cpu_buffer);
1766 if (!reader) 1771 if (!reader)
@@ -1798,16 +1803,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1798 return NULL; 1803 return NULL;
1799} 1804}
1800 1805
1801/** 1806static struct ring_buffer_event *
1802 * ring_buffer_iter_peek - peek at the next event to be read 1807rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1803 * @iter: The ring buffer iterator
1804 * @ts: The timestamp counter of this event.
1805 *
1806 * This will return the event that will be read next, but does
1807 * not increment the iterator.
1808 */
1809struct ring_buffer_event *
1810ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1811{ 1808{
1812 struct ring_buffer *buffer; 1809 struct ring_buffer *buffer;
1813 struct ring_buffer_per_cpu *cpu_buffer; 1810 struct ring_buffer_per_cpu *cpu_buffer;
@@ -1829,10 +1826,8 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1829 * can have. Nesting 10 deep of interrupts is clearly 1826 * can have. Nesting 10 deep of interrupts is clearly
1830 * an anomaly. 1827 * an anomaly.
1831 */ 1828 */
1832 if (unlikely(++nr_loops > 10)) { 1829 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1833 RB_WARN_ON(cpu_buffer, 1);
1834 return NULL; 1830 return NULL;
1835 }
1836 1831
1837 if (rb_per_cpu_empty(cpu_buffer)) 1832 if (rb_per_cpu_empty(cpu_buffer))
1838 return NULL; 1833 return NULL;
@@ -1869,6 +1864,51 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1869} 1864}
1870 1865
1871/** 1866/**
1867 * ring_buffer_peek - peek at the next event to be read
1868 * @buffer: The ring buffer to read
1869 * @cpu: The cpu to peak at
1870 * @ts: The timestamp counter of this event.
1871 *
1872 * This will return the event that will be read next, but does
1873 * not consume the data.
1874 */
1875struct ring_buffer_event *
1876ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1877{
1878 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1879 struct ring_buffer_event *event;
1880 unsigned long flags;
1881
1882 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1883 event = rb_buffer_peek(buffer, cpu, ts);
1884 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1885
1886 return event;
1887}
1888
1889/**
1890 * ring_buffer_iter_peek - peek at the next event to be read
1891 * @iter: The ring buffer iterator
1892 * @ts: The timestamp counter of this event.
1893 *
1894 * This will return the event that will be read next, but does
1895 * not increment the iterator.
1896 */
1897struct ring_buffer_event *
1898ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1899{
1900 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1901 struct ring_buffer_event *event;
1902 unsigned long flags;
1903
1904 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1905 event = rb_iter_peek(iter, ts);
1906 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1907
1908 return event;
1909}
1910
1911/**
1872 * ring_buffer_consume - return an event and consume it 1912 * ring_buffer_consume - return an event and consume it
1873 * @buffer: The ring buffer to get the next event from 1913 * @buffer: The ring buffer to get the next event from
1874 * 1914 *
@@ -1879,19 +1919,24 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1879struct ring_buffer_event * 1919struct ring_buffer_event *
1880ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) 1920ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1881{ 1921{
1882 struct ring_buffer_per_cpu *cpu_buffer; 1922 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1883 struct ring_buffer_event *event; 1923 struct ring_buffer_event *event;
1924 unsigned long flags;
1884 1925
1885 if (!cpu_isset(cpu, buffer->cpumask)) 1926 if (!cpu_isset(cpu, buffer->cpumask))
1886 return NULL; 1927 return NULL;
1887 1928
1888 event = ring_buffer_peek(buffer, cpu, ts); 1929 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1930
1931 event = rb_buffer_peek(buffer, cpu, ts);
1889 if (!event) 1932 if (!event)
1890 return NULL; 1933 goto out;
1891 1934
1892 cpu_buffer = buffer->buffers[cpu];
1893 rb_advance_reader(cpu_buffer); 1935 rb_advance_reader(cpu_buffer);
1894 1936
1937 out:
1938 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1939
1895 return event; 1940 return event;
1896} 1941}
1897 1942
@@ -1928,9 +1973,11 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1928 atomic_inc(&cpu_buffer->record_disabled); 1973 atomic_inc(&cpu_buffer->record_disabled);
1929 synchronize_sched(); 1974 synchronize_sched();
1930 1975
1931 spin_lock_irqsave(&cpu_buffer->lock, flags); 1976 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1932 ring_buffer_iter_reset(iter); 1977 __raw_spin_lock(&cpu_buffer->lock);
1933 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 1978 rb_iter_reset(iter);
1979 __raw_spin_unlock(&cpu_buffer->lock);
1980 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1934 1981
1935 return iter; 1982 return iter;
1936} 1983}
@@ -1962,12 +2009,17 @@ struct ring_buffer_event *
1962ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) 2009ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1963{ 2010{
1964 struct ring_buffer_event *event; 2011 struct ring_buffer_event *event;
2012 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2013 unsigned long flags;
1965 2014
1966 event = ring_buffer_iter_peek(iter, ts); 2015 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2016 event = rb_iter_peek(iter, ts);
1967 if (!event) 2017 if (!event)
1968 return NULL; 2018 goto out;
1969 2019
1970 rb_advance_iter(iter); 2020 rb_advance_iter(iter);
2021 out:
2022 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1971 2023
1972 return event; 2024 return event;
1973} 2025}
@@ -2016,11 +2068,15 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2016 if (!cpu_isset(cpu, buffer->cpumask)) 2068 if (!cpu_isset(cpu, buffer->cpumask))
2017 return; 2069 return;
2018 2070
2019 spin_lock_irqsave(&cpu_buffer->lock, flags); 2071 spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2072
2073 __raw_spin_lock(&cpu_buffer->lock);
2020 2074
2021 rb_reset_cpu(cpu_buffer); 2075 rb_reset_cpu(cpu_buffer);
2022 2076
2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2077 __raw_spin_unlock(&cpu_buffer->lock);
2078
2079 spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2024} 2080}
2025 2081
2026/** 2082/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d86e3252f300..4ee6f0375222 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -43,6 +43,29 @@
43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 43unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
44unsigned long __read_mostly tracing_thresh; 44unsigned long __read_mostly tracing_thresh;
45 45
46/* For tracers that don't implement custom flags */
47static struct tracer_opt dummy_tracer_opt[] = {
48 { }
49};
50
51static struct tracer_flags dummy_tracer_flags = {
52 .val = 0,
53 .opts = dummy_tracer_opt
54};
55
56static int dummy_set_flag(u32 old_flags, u32 bit, int set)
57{
58 return 0;
59}
60
61/*
62 * Kill all tracing for good (never come back).
63 * It is initialized to 1 but will turn to zero if the initialization
64 * of the tracer is successful. But that is the only place that sets
65 * this back to zero.
66 */
67int tracing_disabled = 1;
68
46static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); 69static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
47 70
48static inline void ftrace_disable_cpu(void) 71static inline void ftrace_disable_cpu(void)
@@ -62,7 +85,36 @@ static cpumask_t __read_mostly tracing_buffer_mask;
62#define for_each_tracing_cpu(cpu) \ 85#define for_each_tracing_cpu(cpu) \
63 for_each_cpu_mask(cpu, tracing_buffer_mask) 86 for_each_cpu_mask(cpu, tracing_buffer_mask)
64 87
65static int tracing_disabled = 1; 88/*
89 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
90 *
91 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
92 * is set, then ftrace_dump is called. This will output the contents
93 * of the ftrace buffers to the console. This is very useful for
94 * capturing traces that lead to crashes and outputing it to a
95 * serial console.
96 *
97 * It is default off, but you can enable it with either specifying
98 * "ftrace_dump_on_oops" in the kernel command line, or setting
99 * /proc/sys/kernel/ftrace_dump_on_oops to true.
100 */
101int ftrace_dump_on_oops;
102
103static int tracing_set_tracer(char *buf);
104
105static int __init set_ftrace(char *str)
106{
107 tracing_set_tracer(str);
108 return 1;
109}
110__setup("ftrace", set_ftrace);
111
112static int __init set_ftrace_dump_on_oops(char *str)
113{
114 ftrace_dump_on_oops = 1;
115 return 1;
116}
117__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
66 118
67long 119long
68ns2usecs(cycle_t nsec) 120ns2usecs(cycle_t nsec)
@@ -112,6 +164,19 @@ static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
112/* tracer_enabled is used to toggle activation of a tracer */ 164/* tracer_enabled is used to toggle activation of a tracer */
113static int tracer_enabled = 1; 165static int tracer_enabled = 1;
114 166
167/**
168 * tracing_is_enabled - return tracer_enabled status
169 *
170 * This function is used by other tracers to know the status
171 * of the tracer_enabled flag. Tracers may use this function
172 * to know if it should enable their features when starting
173 * up. See irqsoff tracer for an example (start_irqsoff_tracer).
174 */
175int tracing_is_enabled(void)
176{
177 return tracer_enabled;
178}
179
115/* function tracing enabled */ 180/* function tracing enabled */
116int ftrace_function_enabled; 181int ftrace_function_enabled;
117 182
@@ -153,8 +218,9 @@ static DEFINE_MUTEX(trace_types_lock);
153/* trace_wait is a waitqueue for tasks blocked on trace_poll */ 218/* trace_wait is a waitqueue for tasks blocked on trace_poll */
154static DECLARE_WAIT_QUEUE_HEAD(trace_wait); 219static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
155 220
156/* trace_flags holds iter_ctrl options */ 221/* trace_flags holds trace_options default values */
157unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 222unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
223 TRACE_ITER_ANNOTATE;
158 224
159/** 225/**
160 * trace_wake_up - wake up tasks waiting for trace input 226 * trace_wake_up - wake up tasks waiting for trace input
@@ -193,13 +259,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
193 return nsecs / 1000; 259 return nsecs / 1000;
194} 260}
195 261
196/*
197 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
198 * control the output of kernel symbols.
199 */
200#define TRACE_ITER_SYM_MASK \
201 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
202
203/* These must match the bit postions in trace_iterator_flags */ 262/* These must match the bit postions in trace_iterator_flags */
204static const char *trace_options[] = { 263static const char *trace_options[] = {
205 "print-parent", 264 "print-parent",
@@ -213,6 +272,9 @@ static const char *trace_options[] = {
213 "stacktrace", 272 "stacktrace",
214 "sched-tree", 273 "sched-tree",
215 "ftrace_printk", 274 "ftrace_printk",
275 "ftrace_preempt",
276 "branch",
277 "annotate",
216 NULL 278 NULL
217}; 279};
218 280
@@ -470,7 +532,15 @@ int register_tracer(struct tracer *type)
470 return -1; 532 return -1;
471 } 533 }
472 534
535 /*
536 * When this gets called we hold the BKL which means that
537 * preemption is disabled. Various trace selftests however
538 * need to disable and enable preemption for successful tests.
539 * So we drop the BKL here and grab it after the tests again.
540 */
541 unlock_kernel();
473 mutex_lock(&trace_types_lock); 542 mutex_lock(&trace_types_lock);
543
474 for (t = trace_types; t; t = t->next) { 544 for (t = trace_types; t; t = t->next) {
475 if (strcmp(type->name, t->name) == 0) { 545 if (strcmp(type->name, t->name) == 0) {
476 /* already found */ 546 /* already found */
@@ -481,11 +551,18 @@ int register_tracer(struct tracer *type)
481 } 551 }
482 } 552 }
483 553
554 if (!type->set_flag)
555 type->set_flag = &dummy_set_flag;
556 if (!type->flags)
557 type->flags = &dummy_tracer_flags;
558 else
559 if (!type->flags->opts)
560 type->flags->opts = dummy_tracer_opt;
561
484#ifdef CONFIG_FTRACE_STARTUP_TEST 562#ifdef CONFIG_FTRACE_STARTUP_TEST
485 if (type->selftest) { 563 if (type->selftest) {
486 struct tracer *saved_tracer = current_trace; 564 struct tracer *saved_tracer = current_trace;
487 struct trace_array *tr = &global_trace; 565 struct trace_array *tr = &global_trace;
488 int saved_ctrl = tr->ctrl;
489 int i; 566 int i;
490 /* 567 /*
491 * Run a selftest on this tracer. 568 * Run a selftest on this tracer.
@@ -494,25 +571,23 @@ int register_tracer(struct tracer *type)
494 * internal tracing to verify that everything is in order. 571 * internal tracing to verify that everything is in order.
495 * If we fail, we do not register this tracer. 572 * If we fail, we do not register this tracer.
496 */ 573 */
497 for_each_tracing_cpu(i) { 574 for_each_tracing_cpu(i)
498 tracing_reset(tr, i); 575 tracing_reset(tr, i);
499 } 576
500 current_trace = type; 577 current_trace = type;
501 tr->ctrl = 0;
502 /* the test is responsible for initializing and enabling */ 578 /* the test is responsible for initializing and enabling */
503 pr_info("Testing tracer %s: ", type->name); 579 pr_info("Testing tracer %s: ", type->name);
504 ret = type->selftest(type, tr); 580 ret = type->selftest(type, tr);
505 /* the test is responsible for resetting too */ 581 /* the test is responsible for resetting too */
506 current_trace = saved_tracer; 582 current_trace = saved_tracer;
507 tr->ctrl = saved_ctrl;
508 if (ret) { 583 if (ret) {
509 printk(KERN_CONT "FAILED!\n"); 584 printk(KERN_CONT "FAILED!\n");
510 goto out; 585 goto out;
511 } 586 }
512 /* Only reset on passing, to avoid touching corrupted buffers */ 587 /* Only reset on passing, to avoid touching corrupted buffers */
513 for_each_tracing_cpu(i) { 588 for_each_tracing_cpu(i)
514 tracing_reset(tr, i); 589 tracing_reset(tr, i);
515 } 590
516 printk(KERN_CONT "PASSED\n"); 591 printk(KERN_CONT "PASSED\n");
517 } 592 }
518#endif 593#endif
@@ -525,6 +600,7 @@ int register_tracer(struct tracer *type)
525 600
526 out: 601 out:
527 mutex_unlock(&trace_types_lock); 602 mutex_unlock(&trace_types_lock);
603 lock_kernel();
528 604
529 return ret; 605 return ret;
530} 606}
@@ -581,6 +657,76 @@ static void trace_init_cmdlines(void)
581 cmdline_idx = 0; 657 cmdline_idx = 0;
582} 658}
583 659
660static int trace_stop_count;
661static DEFINE_SPINLOCK(tracing_start_lock);
662
663/**
664 * tracing_start - quick start of the tracer
665 *
666 * If tracing is enabled but was stopped by tracing_stop,
667 * this will start the tracer back up.
668 */
669void tracing_start(void)
670{
671 struct ring_buffer *buffer;
672 unsigned long flags;
673
674 if (tracing_disabled)
675 return;
676
677 spin_lock_irqsave(&tracing_start_lock, flags);
678 if (--trace_stop_count)
679 goto out;
680
681 if (trace_stop_count < 0) {
682 /* Someone screwed up their debugging */
683 WARN_ON_ONCE(1);
684 trace_stop_count = 0;
685 goto out;
686 }
687
688
689 buffer = global_trace.buffer;
690 if (buffer)
691 ring_buffer_record_enable(buffer);
692
693 buffer = max_tr.buffer;
694 if (buffer)
695 ring_buffer_record_enable(buffer);
696
697 ftrace_start();
698 out:
699 spin_unlock_irqrestore(&tracing_start_lock, flags);
700}
701
702/**
703 * tracing_stop - quick stop of the tracer
704 *
705 * Light weight way to stop tracing. Use in conjunction with
706 * tracing_start.
707 */
708void tracing_stop(void)
709{
710 struct ring_buffer *buffer;
711 unsigned long flags;
712
713 ftrace_stop();
714 spin_lock_irqsave(&tracing_start_lock, flags);
715 if (trace_stop_count++)
716 goto out;
717
718 buffer = global_trace.buffer;
719 if (buffer)
720 ring_buffer_record_disable(buffer);
721
722 buffer = max_tr.buffer;
723 if (buffer)
724 ring_buffer_record_disable(buffer);
725
726 out:
727 spin_unlock_irqrestore(&tracing_start_lock, flags);
728}
729
584void trace_stop_cmdline_recording(void); 730void trace_stop_cmdline_recording(void);
585 731
586static void trace_save_cmdline(struct task_struct *tsk) 732static void trace_save_cmdline(struct task_struct *tsk)
@@ -691,6 +837,36 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data,
691 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 837 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
692} 838}
693 839
840#ifdef CONFIG_FUNCTION_RET_TRACER
841static void __trace_function_return(struct trace_array *tr,
842 struct trace_array_cpu *data,
843 struct ftrace_retfunc *trace,
844 unsigned long flags,
845 int pc)
846{
847 struct ring_buffer_event *event;
848 struct ftrace_ret_entry *entry;
849 unsigned long irq_flags;
850
851 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
852 return;
853
854 event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
855 &irq_flags);
856 if (!event)
857 return;
858 entry = ring_buffer_event_data(event);
859 tracing_generic_entry_update(&entry->ent, flags, pc);
860 entry->ent.type = TRACE_FN_RET;
861 entry->ip = trace->func;
862 entry->parent_ip = trace->ret;
863 entry->rettime = trace->rettime;
864 entry->calltime = trace->calltime;
865 entry->overrun = trace->overrun;
866 ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
867}
868#endif
869
694void 870void
695ftrace(struct trace_array *tr, struct trace_array_cpu *data, 871ftrace(struct trace_array *tr, struct trace_array_cpu *data,
696 unsigned long ip, unsigned long parent_ip, unsigned long flags, 872 unsigned long ip, unsigned long parent_ip, unsigned long flags,
@@ -841,26 +1017,28 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
841{ 1017{
842 struct trace_array *tr = &global_trace; 1018 struct trace_array *tr = &global_trace;
843 struct trace_array_cpu *data; 1019 struct trace_array_cpu *data;
1020 unsigned long flags;
844 int cpu; 1021 int cpu;
845 int pc; 1022 int pc;
846 1023
847 if (tracing_disabled || !tr->ctrl) 1024 if (tracing_disabled)
848 return; 1025 return;
849 1026
850 pc = preempt_count(); 1027 pc = preempt_count();
851 preempt_disable_notrace(); 1028 local_irq_save(flags);
852 cpu = raw_smp_processor_id(); 1029 cpu = raw_smp_processor_id();
853 data = tr->data[cpu]; 1030 data = tr->data[cpu];
854 1031
855 if (likely(!atomic_read(&data->disabled))) 1032 if (likely(atomic_inc_return(&data->disabled) == 1))
856 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); 1033 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
857 1034
858 preempt_enable_notrace(); 1035 atomic_dec(&data->disabled);
1036 local_irq_restore(flags);
859} 1037}
860 1038
861#ifdef CONFIG_FUNCTION_TRACER 1039#ifdef CONFIG_FUNCTION_TRACER
862static void 1040static void
863function_trace_call(unsigned long ip, unsigned long parent_ip) 1041function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
864{ 1042{
865 struct trace_array *tr = &global_trace; 1043 struct trace_array *tr = &global_trace;
866 struct trace_array_cpu *data; 1044 struct trace_array_cpu *data;
@@ -873,8 +1051,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
873 return; 1051 return;
874 1052
875 pc = preempt_count(); 1053 pc = preempt_count();
876 resched = need_resched(); 1054 resched = ftrace_preempt_disable();
877 preempt_disable_notrace();
878 local_save_flags(flags); 1055 local_save_flags(flags);
879 cpu = raw_smp_processor_id(); 1056 cpu = raw_smp_processor_id();
880 data = tr->data[cpu]; 1057 data = tr->data[cpu];
@@ -884,12 +1061,63 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
884 trace_function(tr, data, ip, parent_ip, flags, pc); 1061 trace_function(tr, data, ip, parent_ip, flags, pc);
885 1062
886 atomic_dec(&data->disabled); 1063 atomic_dec(&data->disabled);
887 if (resched) 1064 ftrace_preempt_enable(resched);
888 preempt_enable_no_resched_notrace(); 1065}
889 else 1066
890 preempt_enable_notrace(); 1067static void
1068function_trace_call(unsigned long ip, unsigned long parent_ip)
1069{
1070 struct trace_array *tr = &global_trace;
1071 struct trace_array_cpu *data;
1072 unsigned long flags;
1073 long disabled;
1074 int cpu;
1075 int pc;
1076
1077 if (unlikely(!ftrace_function_enabled))
1078 return;
1079
1080 /*
1081 * Need to use raw, since this must be called before the
1082 * recursive protection is performed.
1083 */
1084 local_irq_save(flags);
1085 cpu = raw_smp_processor_id();
1086 data = tr->data[cpu];
1087 disabled = atomic_inc_return(&data->disabled);
1088
1089 if (likely(disabled == 1)) {
1090 pc = preempt_count();
1091 trace_function(tr, data, ip, parent_ip, flags, pc);
1092 }
1093
1094 atomic_dec(&data->disabled);
1095 local_irq_restore(flags);
891} 1096}
892 1097
1098#ifdef CONFIG_FUNCTION_RET_TRACER
1099void trace_function_return(struct ftrace_retfunc *trace)
1100{
1101 struct trace_array *tr = &global_trace;
1102 struct trace_array_cpu *data;
1103 unsigned long flags;
1104 long disabled;
1105 int cpu;
1106 int pc;
1107
1108 raw_local_irq_save(flags);
1109 cpu = raw_smp_processor_id();
1110 data = tr->data[cpu];
1111 disabled = atomic_inc_return(&data->disabled);
1112 if (likely(disabled == 1)) {
1113 pc = preempt_count();
1114 __trace_function_return(tr, data, trace, flags, pc);
1115 }
1116 atomic_dec(&data->disabled);
1117 raw_local_irq_restore(flags);
1118}
1119#endif /* CONFIG_FUNCTION_RET_TRACER */
1120
893static struct ftrace_ops trace_ops __read_mostly = 1121static struct ftrace_ops trace_ops __read_mostly =
894{ 1122{
895 .func = function_trace_call, 1123 .func = function_trace_call,
@@ -898,9 +1126,14 @@ static struct ftrace_ops trace_ops __read_mostly =
898void tracing_start_function_trace(void) 1126void tracing_start_function_trace(void)
899{ 1127{
900 ftrace_function_enabled = 0; 1128 ftrace_function_enabled = 0;
1129
1130 if (trace_flags & TRACE_ITER_PREEMPTONLY)
1131 trace_ops.func = function_trace_call_preempt_only;
1132 else
1133 trace_ops.func = function_trace_call;
1134
901 register_ftrace_function(&trace_ops); 1135 register_ftrace_function(&trace_ops);
902 if (tracer_enabled) 1136 ftrace_function_enabled = 1;
903 ftrace_function_enabled = 1;
904} 1137}
905 1138
906void tracing_stop_function_trace(void) 1139void tracing_stop_function_trace(void)
@@ -912,6 +1145,7 @@ void tracing_stop_function_trace(void)
912 1145
913enum trace_file_type { 1146enum trace_file_type {
914 TRACE_FILE_LAT_FMT = 1, 1147 TRACE_FILE_LAT_FMT = 1,
1148 TRACE_FILE_ANNOTATE = 2,
915}; 1149};
916 1150
917static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 1151static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
@@ -1047,10 +1281,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1047 1281
1048 atomic_inc(&trace_record_cmdline_disabled); 1282 atomic_inc(&trace_record_cmdline_disabled);
1049 1283
1050 /* let the tracer grab locks here if needed */
1051 if (current_trace->start)
1052 current_trace->start(iter);
1053
1054 if (*pos != iter->pos) { 1284 if (*pos != iter->pos) {
1055 iter->ent = NULL; 1285 iter->ent = NULL;
1056 iter->cpu = 0; 1286 iter->cpu = 0;
@@ -1077,14 +1307,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1077 1307
1078static void s_stop(struct seq_file *m, void *p) 1308static void s_stop(struct seq_file *m, void *p)
1079{ 1309{
1080 struct trace_iterator *iter = m->private;
1081
1082 atomic_dec(&trace_record_cmdline_disabled); 1310 atomic_dec(&trace_record_cmdline_disabled);
1083
1084 /* let the tracer release locks here if needed */
1085 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1086 iter->trace->stop(iter);
1087
1088 mutex_unlock(&trace_types_lock); 1311 mutex_unlock(&trace_types_lock);
1089} 1312}
1090 1313
@@ -1143,7 +1366,7 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1143# define IP_FMT "%016lx" 1366# define IP_FMT "%016lx"
1144#endif 1367#endif
1145 1368
1146static int 1369int
1147seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) 1370seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1148{ 1371{
1149 int ret; 1372 int ret;
@@ -1338,6 +1561,23 @@ void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1338 trace_seq_putc(s, '\n'); 1561 trace_seq_putc(s, '\n');
1339} 1562}
1340 1563
1564static void test_cpu_buff_start(struct trace_iterator *iter)
1565{
1566 struct trace_seq *s = &iter->seq;
1567
1568 if (!(trace_flags & TRACE_ITER_ANNOTATE))
1569 return;
1570
1571 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1572 return;
1573
1574 if (cpu_isset(iter->cpu, iter->started))
1575 return;
1576
1577 cpu_set(iter->cpu, iter->started);
1578 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1579}
1580
1341static enum print_line_t 1581static enum print_line_t
1342print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1582print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1343{ 1583{
@@ -1357,6 +1597,8 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1357 if (entry->type == TRACE_CONT) 1597 if (entry->type == TRACE_CONT)
1358 return TRACE_TYPE_HANDLED; 1598 return TRACE_TYPE_HANDLED;
1359 1599
1600 test_cpu_buff_start(iter);
1601
1360 next_entry = find_next_entry(iter, NULL, &next_ts); 1602 next_entry = find_next_entry(iter, NULL, &next_ts);
1361 if (!next_entry) 1603 if (!next_entry)
1362 next_ts = iter->ts; 1604 next_ts = iter->ts;
@@ -1448,6 +1690,18 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1448 trace_seq_print_cont(s, iter); 1690 trace_seq_print_cont(s, iter);
1449 break; 1691 break;
1450 } 1692 }
1693 case TRACE_BRANCH: {
1694 struct trace_branch *field;
1695
1696 trace_assign_type(field, entry);
1697
1698 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1699 field->correct ? " ok " : " MISS ",
1700 field->func,
1701 field->file,
1702 field->line);
1703 break;
1704 }
1451 default: 1705 default:
1452 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1706 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1453 } 1707 }
@@ -1472,6 +1726,8 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1472 if (entry->type == TRACE_CONT) 1726 if (entry->type == TRACE_CONT)
1473 return TRACE_TYPE_HANDLED; 1727 return TRACE_TYPE_HANDLED;
1474 1728
1729 test_cpu_buff_start(iter);
1730
1475 comm = trace_find_cmdline(iter->ent->pid); 1731 comm = trace_find_cmdline(iter->ent->pid);
1476 1732
1477 t = ns2usecs(iter->ts); 1733 t = ns2usecs(iter->ts);
@@ -1581,6 +1837,22 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1581 trace_seq_print_cont(s, iter); 1837 trace_seq_print_cont(s, iter);
1582 break; 1838 break;
1583 } 1839 }
1840 case TRACE_FN_RET: {
1841 return print_return_function(iter);
1842 break;
1843 }
1844 case TRACE_BRANCH: {
1845 struct trace_branch *field;
1846
1847 trace_assign_type(field, entry);
1848
1849 trace_seq_printf(s, "[%s] %s:%s:%d\n",
1850 field->correct ? " ok " : " MISS ",
1851 field->func,
1852 field->file,
1853 field->line);
1854 break;
1855 }
1584 } 1856 }
1585 return TRACE_TYPE_HANDLED; 1857 return TRACE_TYPE_HANDLED;
1586} 1858}
@@ -1899,6 +2171,11 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1899 iter->trace = current_trace; 2171 iter->trace = current_trace;
1900 iter->pos = -1; 2172 iter->pos = -1;
1901 2173
2174 /* Annotate start of buffers if we had overruns */
2175 if (ring_buffer_overruns(iter->tr->buffer))
2176 iter->iter_flags |= TRACE_FILE_ANNOTATE;
2177
2178
1902 for_each_tracing_cpu(cpu) { 2179 for_each_tracing_cpu(cpu) {
1903 2180
1904 iter->buffer_iter[cpu] = 2181 iter->buffer_iter[cpu] =
@@ -1917,10 +2194,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1917 m->private = iter; 2194 m->private = iter;
1918 2195
1919 /* stop the trace while dumping */ 2196 /* stop the trace while dumping */
1920 if (iter->tr->ctrl) { 2197 tracing_stop();
1921 tracer_enabled = 0;
1922 ftrace_function_enabled = 0;
1923 }
1924 2198
1925 if (iter->trace && iter->trace->open) 2199 if (iter->trace && iter->trace->open)
1926 iter->trace->open(iter); 2200 iter->trace->open(iter);
@@ -1966,14 +2240,7 @@ int tracing_release(struct inode *inode, struct file *file)
1966 iter->trace->close(iter); 2240 iter->trace->close(iter);
1967 2241
1968 /* reenable tracing if it was previously enabled */ 2242 /* reenable tracing if it was previously enabled */
1969 if (iter->tr->ctrl) { 2243 tracing_start();
1970 tracer_enabled = 1;
1971 /*
1972 * It is safe to enable function tracing even if it
1973 * isn't used
1974 */
1975 ftrace_function_enabled = 1;
1976 }
1977 mutex_unlock(&trace_types_lock); 2244 mutex_unlock(&trace_types_lock);
1978 2245
1979 seq_release(inode, file); 2246 seq_release(inode, file);
@@ -2189,13 +2456,16 @@ static struct file_operations tracing_cpumask_fops = {
2189}; 2456};
2190 2457
2191static ssize_t 2458static ssize_t
2192tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, 2459tracing_trace_options_read(struct file *filp, char __user *ubuf,
2193 size_t cnt, loff_t *ppos) 2460 size_t cnt, loff_t *ppos)
2194{ 2461{
2462 int i;
2195 char *buf; 2463 char *buf;
2196 int r = 0; 2464 int r = 0;
2197 int len = 0; 2465 int len = 0;
2198 int i; 2466 u32 tracer_flags = current_trace->flags->val;
2467 struct tracer_opt *trace_opts = current_trace->flags->opts;
2468
2199 2469
2200 /* calulate max size */ 2470 /* calulate max size */
2201 for (i = 0; trace_options[i]; i++) { 2471 for (i = 0; trace_options[i]; i++) {
@@ -2203,6 +2473,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2203 len += 3; /* "no" and space */ 2473 len += 3; /* "no" and space */
2204 } 2474 }
2205 2475
2476 /*
2477 * Increase the size with names of options specific
2478 * of the current tracer.
2479 */
2480 for (i = 0; trace_opts[i].name; i++) {
2481 len += strlen(trace_opts[i].name);
2482 len += 3; /* "no" and space */
2483 }
2484
2206 /* +2 for \n and \0 */ 2485 /* +2 for \n and \0 */
2207 buf = kmalloc(len + 2, GFP_KERNEL); 2486 buf = kmalloc(len + 2, GFP_KERNEL);
2208 if (!buf) 2487 if (!buf)
@@ -2215,6 +2494,15 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2215 r += sprintf(buf + r, "no%s ", trace_options[i]); 2494 r += sprintf(buf + r, "no%s ", trace_options[i]);
2216 } 2495 }
2217 2496
2497 for (i = 0; trace_opts[i].name; i++) {
2498 if (tracer_flags & trace_opts[i].bit)
2499 r += sprintf(buf + r, "%s ",
2500 trace_opts[i].name);
2501 else
2502 r += sprintf(buf + r, "no%s ",
2503 trace_opts[i].name);
2504 }
2505
2218 r += sprintf(buf + r, "\n"); 2506 r += sprintf(buf + r, "\n");
2219 WARN_ON(r >= len + 2); 2507 WARN_ON(r >= len + 2);
2220 2508
@@ -2225,13 +2513,48 @@ tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2225 return r; 2513 return r;
2226} 2514}
2227 2515
2516/* Try to assign a tracer specific option */
2517static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2518{
2519 struct tracer_flags *trace_flags = trace->flags;
2520 struct tracer_opt *opts = NULL;
2521 int ret = 0, i = 0;
2522 int len;
2523
2524 for (i = 0; trace_flags->opts[i].name; i++) {
2525 opts = &trace_flags->opts[i];
2526 len = strlen(opts->name);
2527
2528 if (strncmp(cmp, opts->name, len) == 0) {
2529 ret = trace->set_flag(trace_flags->val,
2530 opts->bit, !neg);
2531 break;
2532 }
2533 }
2534 /* Not found */
2535 if (!trace_flags->opts[i].name)
2536 return -EINVAL;
2537
2538 /* Refused to handle */
2539 if (ret)
2540 return ret;
2541
2542 if (neg)
2543 trace_flags->val &= ~opts->bit;
2544 else
2545 trace_flags->val |= opts->bit;
2546
2547 return 0;
2548}
2549
2228static ssize_t 2550static ssize_t
2229tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf, 2551tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2230 size_t cnt, loff_t *ppos) 2552 size_t cnt, loff_t *ppos)
2231{ 2553{
2232 char buf[64]; 2554 char buf[64];
2233 char *cmp = buf; 2555 char *cmp = buf;
2234 int neg = 0; 2556 int neg = 0;
2557 int ret;
2235 int i; 2558 int i;
2236 2559
2237 if (cnt >= sizeof(buf)) 2560 if (cnt >= sizeof(buf))
@@ -2258,11 +2581,13 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2258 break; 2581 break;
2259 } 2582 }
2260 } 2583 }
2261 /* 2584
2262 * If no option could be set, return an error: 2585 /* If no option could be set, test the specific tracer options */
2263 */ 2586 if (!trace_options[i]) {
2264 if (!trace_options[i]) 2587 ret = set_tracer_option(current_trace, cmp, neg);
2265 return -EINVAL; 2588 if (ret)
2589 return ret;
2590 }
2266 2591
2267 filp->f_pos += cnt; 2592 filp->f_pos += cnt;
2268 2593
@@ -2271,8 +2596,8 @@ tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2271 2596
2272static struct file_operations tracing_iter_fops = { 2597static struct file_operations tracing_iter_fops = {
2273 .open = tracing_open_generic, 2598 .open = tracing_open_generic,
2274 .read = tracing_iter_ctrl_read, 2599 .read = tracing_trace_options_read,
2275 .write = tracing_iter_ctrl_write, 2600 .write = tracing_trace_options_write,
2276}; 2601};
2277 2602
2278static const char readme_msg[] = 2603static const char readme_msg[] =
@@ -2286,9 +2611,9 @@ static const char readme_msg[] =
2286 "# echo sched_switch > /debug/tracing/current_tracer\n" 2611 "# echo sched_switch > /debug/tracing/current_tracer\n"
2287 "# cat /debug/tracing/current_tracer\n" 2612 "# cat /debug/tracing/current_tracer\n"
2288 "sched_switch\n" 2613 "sched_switch\n"
2289 "# cat /debug/tracing/iter_ctrl\n" 2614 "# cat /debug/tracing/trace_options\n"
2290 "noprint-parent nosym-offset nosym-addr noverbose\n" 2615 "noprint-parent nosym-offset nosym-addr noverbose\n"
2291 "# echo print-parent > /debug/tracing/iter_ctrl\n" 2616 "# echo print-parent > /debug/tracing/trace_options\n"
2292 "# echo 1 > /debug/tracing/tracing_enabled\n" 2617 "# echo 1 > /debug/tracing/tracing_enabled\n"
2293 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2618 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2294 "echo 0 > /debug/tracing/tracing_enabled\n" 2619 "echo 0 > /debug/tracing/tracing_enabled\n"
@@ -2311,11 +2636,10 @@ static ssize_t
2311tracing_ctrl_read(struct file *filp, char __user *ubuf, 2636tracing_ctrl_read(struct file *filp, char __user *ubuf,
2312 size_t cnt, loff_t *ppos) 2637 size_t cnt, loff_t *ppos)
2313{ 2638{
2314 struct trace_array *tr = filp->private_data;
2315 char buf[64]; 2639 char buf[64];
2316 int r; 2640 int r;
2317 2641
2318 r = sprintf(buf, "%ld\n", tr->ctrl); 2642 r = sprintf(buf, "%u\n", tracer_enabled);
2319 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2643 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2320} 2644}
2321 2645
@@ -2343,16 +2667,18 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2343 val = !!val; 2667 val = !!val;
2344 2668
2345 mutex_lock(&trace_types_lock); 2669 mutex_lock(&trace_types_lock);
2346 if (tr->ctrl ^ val) { 2670 if (tracer_enabled ^ val) {
2347 if (val) 2671 if (val) {
2348 tracer_enabled = 1; 2672 tracer_enabled = 1;
2349 else 2673 if (current_trace->start)
2674 current_trace->start(tr);
2675 tracing_start();
2676 } else {
2350 tracer_enabled = 0; 2677 tracer_enabled = 0;
2351 2678 tracing_stop();
2352 tr->ctrl = val; 2679 if (current_trace->stop)
2353 2680 current_trace->stop(tr);
2354 if (current_trace && current_trace->ctrl_update) 2681 }
2355 current_trace->ctrl_update(tr);
2356 } 2682 }
2357 mutex_unlock(&trace_types_lock); 2683 mutex_unlock(&trace_types_lock);
2358 2684
@@ -2378,29 +2704,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
2378 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 2704 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2379} 2705}
2380 2706
2381static ssize_t 2707static int tracing_set_tracer(char *buf)
2382tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2383 size_t cnt, loff_t *ppos)
2384{ 2708{
2385 struct trace_array *tr = &global_trace; 2709 struct trace_array *tr = &global_trace;
2386 struct tracer *t; 2710 struct tracer *t;
2387 char buf[max_tracer_type_len+1]; 2711 int ret = 0;
2388 int i;
2389 size_t ret;
2390
2391 ret = cnt;
2392
2393 if (cnt > max_tracer_type_len)
2394 cnt = max_tracer_type_len;
2395
2396 if (copy_from_user(&buf, ubuf, cnt))
2397 return -EFAULT;
2398
2399 buf[cnt] = 0;
2400
2401 /* strip ending whitespace. */
2402 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2403 buf[i] = 0;
2404 2712
2405 mutex_lock(&trace_types_lock); 2713 mutex_lock(&trace_types_lock);
2406 for (t = trace_types; t; t = t->next) { 2714 for (t = trace_types; t; t = t->next) {
@@ -2414,18 +2722,52 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2414 if (t == current_trace) 2722 if (t == current_trace)
2415 goto out; 2723 goto out;
2416 2724
2725 trace_branch_disable();
2417 if (current_trace && current_trace->reset) 2726 if (current_trace && current_trace->reset)
2418 current_trace->reset(tr); 2727 current_trace->reset(tr);
2419 2728
2420 current_trace = t; 2729 current_trace = t;
2421 if (t->init) 2730 if (t->init) {
2422 t->init(tr); 2731 ret = t->init(tr);
2732 if (ret)
2733 goto out;
2734 }
2423 2735
2736 trace_branch_enable(tr);
2424 out: 2737 out:
2425 mutex_unlock(&trace_types_lock); 2738 mutex_unlock(&trace_types_lock);
2426 2739
2427 if (ret > 0) 2740 return ret;
2428 filp->f_pos += ret; 2741}
2742
2743static ssize_t
2744tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2745 size_t cnt, loff_t *ppos)
2746{
2747 char buf[max_tracer_type_len+1];
2748 int i;
2749 size_t ret;
2750 int err;
2751
2752 ret = cnt;
2753
2754 if (cnt > max_tracer_type_len)
2755 cnt = max_tracer_type_len;
2756
2757 if (copy_from_user(&buf, ubuf, cnt))
2758 return -EFAULT;
2759
2760 buf[cnt] = 0;
2761
2762 /* strip ending whitespace. */
2763 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2764 buf[i] = 0;
2765
2766 err = tracing_set_tracer(buf);
2767 if (err)
2768 return err;
2769
2770 filp->f_pos += ret;
2429 2771
2430 return ret; 2772 return ret;
2431} 2773}
@@ -2492,6 +2834,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
2492 return -ENOMEM; 2834 return -ENOMEM;
2493 2835
2494 mutex_lock(&trace_types_lock); 2836 mutex_lock(&trace_types_lock);
2837
2838 /* trace pipe does not show start of buffer */
2839 cpus_setall(iter->started);
2840
2495 iter->tr = &global_trace; 2841 iter->tr = &global_trace;
2496 iter->trace = current_trace; 2842 iter->trace = current_trace;
2497 filp->private_data = iter; 2843 filp->private_data = iter;
@@ -2667,7 +3013,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
2667 char buf[64]; 3013 char buf[64];
2668 int r; 3014 int r;
2669 3015
2670 r = sprintf(buf, "%lu\n", tr->entries); 3016 r = sprintf(buf, "%lu\n", tr->entries >> 10);
2671 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3017 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2672} 3018}
2673 3019
@@ -2678,7 +3024,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2678 unsigned long val; 3024 unsigned long val;
2679 char buf[64]; 3025 char buf[64];
2680 int ret, cpu; 3026 int ret, cpu;
2681 struct trace_array *tr = filp->private_data;
2682 3027
2683 if (cnt >= sizeof(buf)) 3028 if (cnt >= sizeof(buf))
2684 return -EINVAL; 3029 return -EINVAL;
@@ -2698,12 +3043,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2698 3043
2699 mutex_lock(&trace_types_lock); 3044 mutex_lock(&trace_types_lock);
2700 3045
2701 if (tr->ctrl) { 3046 tracing_stop();
2702 cnt = -EBUSY;
2703 pr_info("ftrace: please disable tracing"
2704 " before modifying buffer size\n");
2705 goto out;
2706 }
2707 3047
2708 /* disable all cpu buffers */ 3048 /* disable all cpu buffers */
2709 for_each_tracing_cpu(cpu) { 3049 for_each_tracing_cpu(cpu) {
@@ -2713,6 +3053,9 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2713 atomic_inc(&max_tr.data[cpu]->disabled); 3053 atomic_inc(&max_tr.data[cpu]->disabled);
2714 } 3054 }
2715 3055
3056 /* value is in KB */
3057 val <<= 10;
3058
2716 if (val != global_trace.entries) { 3059 if (val != global_trace.entries) {
2717 ret = ring_buffer_resize(global_trace.buffer, val); 3060 ret = ring_buffer_resize(global_trace.buffer, val);
2718 if (ret < 0) { 3061 if (ret < 0) {
@@ -2751,6 +3094,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2751 atomic_dec(&max_tr.data[cpu]->disabled); 3094 atomic_dec(&max_tr.data[cpu]->disabled);
2752 } 3095 }
2753 3096
3097 tracing_start();
2754 max_tr.entries = global_trace.entries; 3098 max_tr.entries = global_trace.entries;
2755 mutex_unlock(&trace_types_lock); 3099 mutex_unlock(&trace_types_lock);
2756 3100
@@ -2773,9 +3117,8 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
2773{ 3117{
2774 char *buf; 3118 char *buf;
2775 char *end; 3119 char *end;
2776 struct trace_array *tr = &global_trace;
2777 3120
2778 if (!tr->ctrl || tracing_disabled) 3121 if (tracing_disabled)
2779 return -EINVAL; 3122 return -EINVAL;
2780 3123
2781 if (cnt > TRACE_BUF_SIZE) 3124 if (cnt > TRACE_BUF_SIZE)
@@ -2841,22 +3184,38 @@ static struct file_operations tracing_mark_fops = {
2841 3184
2842#ifdef CONFIG_DYNAMIC_FTRACE 3185#ifdef CONFIG_DYNAMIC_FTRACE
2843 3186
3187int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3188{
3189 return 0;
3190}
3191
2844static ssize_t 3192static ssize_t
2845tracing_read_long(struct file *filp, char __user *ubuf, 3193tracing_read_dyn_info(struct file *filp, char __user *ubuf,
2846 size_t cnt, loff_t *ppos) 3194 size_t cnt, loff_t *ppos)
2847{ 3195{
3196 static char ftrace_dyn_info_buffer[1024];
3197 static DEFINE_MUTEX(dyn_info_mutex);
2848 unsigned long *p = filp->private_data; 3198 unsigned long *p = filp->private_data;
2849 char buf[64]; 3199 char *buf = ftrace_dyn_info_buffer;
3200 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
2850 int r; 3201 int r;
2851 3202
2852 r = sprintf(buf, "%ld\n", *p); 3203 mutex_lock(&dyn_info_mutex);
3204 r = sprintf(buf, "%ld ", *p);
2853 3205
2854 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 3206 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3207 buf[r++] = '\n';
3208
3209 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3210
3211 mutex_unlock(&dyn_info_mutex);
3212
3213 return r;
2855} 3214}
2856 3215
2857static struct file_operations tracing_read_long_fops = { 3216static struct file_operations tracing_dyn_info_fops = {
2858 .open = tracing_open_generic, 3217 .open = tracing_open_generic,
2859 .read = tracing_read_long, 3218 .read = tracing_read_dyn_info,
2860}; 3219};
2861#endif 3220#endif
2862 3221
@@ -2897,10 +3256,10 @@ static __init int tracer_init_debugfs(void)
2897 if (!entry) 3256 if (!entry)
2898 pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); 3257 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2899 3258
2900 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, 3259 entry = debugfs_create_file("trace_options", 0644, d_tracer,
2901 NULL, &tracing_iter_fops); 3260 NULL, &tracing_iter_fops);
2902 if (!entry) 3261 if (!entry)
2903 pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); 3262 pr_warning("Could not create debugfs 'trace_options' entry\n");
2904 3263
2905 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, 3264 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2906 NULL, &tracing_cpumask_fops); 3265 NULL, &tracing_cpumask_fops);
@@ -2950,11 +3309,11 @@ static __init int tracer_init_debugfs(void)
2950 pr_warning("Could not create debugfs " 3309 pr_warning("Could not create debugfs "
2951 "'trace_pipe' entry\n"); 3310 "'trace_pipe' entry\n");
2952 3311
2953 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 3312 entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
2954 &global_trace, &tracing_entries_fops); 3313 &global_trace, &tracing_entries_fops);
2955 if (!entry) 3314 if (!entry)
2956 pr_warning("Could not create debugfs " 3315 pr_warning("Could not create debugfs "
2957 "'trace_entries' entry\n"); 3316 "'buffer_size_kb' entry\n");
2958 3317
2959 entry = debugfs_create_file("trace_marker", 0220, d_tracer, 3318 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2960 NULL, &tracing_mark_fops); 3319 NULL, &tracing_mark_fops);
@@ -2965,7 +3324,7 @@ static __init int tracer_init_debugfs(void)
2965#ifdef CONFIG_DYNAMIC_FTRACE 3324#ifdef CONFIG_DYNAMIC_FTRACE
2966 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 3325 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2967 &ftrace_update_tot_cnt, 3326 &ftrace_update_tot_cnt,
2968 &tracing_read_long_fops); 3327 &tracing_dyn_info_fops);
2969 if (!entry) 3328 if (!entry)
2970 pr_warning("Could not create debugfs " 3329 pr_warning("Could not create debugfs "
2971 "'dyn_ftrace_total_info' entry\n"); 3330 "'dyn_ftrace_total_info' entry\n");
@@ -2988,7 +3347,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2988 unsigned long flags, irq_flags; 3347 unsigned long flags, irq_flags;
2989 int cpu, len = 0, size, pc; 3348 int cpu, len = 0, size, pc;
2990 3349
2991 if (!tr->ctrl || tracing_disabled) 3350 if (tracing_disabled)
2992 return 0; 3351 return 0;
2993 3352
2994 pc = preempt_count(); 3353 pc = preempt_count();
@@ -3046,7 +3405,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
3046static int trace_panic_handler(struct notifier_block *this, 3405static int trace_panic_handler(struct notifier_block *this,
3047 unsigned long event, void *unused) 3406 unsigned long event, void *unused)
3048{ 3407{
3049 ftrace_dump(); 3408 if (ftrace_dump_on_oops)
3409 ftrace_dump();
3050 return NOTIFY_OK; 3410 return NOTIFY_OK;
3051} 3411}
3052 3412
@@ -3062,7 +3422,8 @@ static int trace_die_handler(struct notifier_block *self,
3062{ 3422{
3063 switch (val) { 3423 switch (val) {
3064 case DIE_OOPS: 3424 case DIE_OOPS:
3065 ftrace_dump(); 3425 if (ftrace_dump_on_oops)
3426 ftrace_dump();
3066 break; 3427 break;
3067 default: 3428 default:
3068 break; 3429 break;
@@ -3103,7 +3464,6 @@ trace_printk_seq(struct trace_seq *s)
3103 trace_seq_reset(s); 3464 trace_seq_reset(s);
3104} 3465}
3105 3466
3106
3107void ftrace_dump(void) 3467void ftrace_dump(void)
3108{ 3468{
3109 static DEFINE_SPINLOCK(ftrace_dump_lock); 3469 static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3221,7 +3581,6 @@ __init static int tracer_alloc_buffers(void)
3221#endif 3581#endif
3222 3582
3223 /* All seems OK, enable tracing */ 3583 /* All seems OK, enable tracing */
3224 global_trace.ctrl = tracer_enabled;
3225 tracing_disabled = 0; 3584 tracing_disabled = 0;
3226 3585
3227 atomic_notifier_chain_register(&panic_notifier_list, 3586 atomic_notifier_chain_register(&panic_notifier_list,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8465ad052707..2cb12fd98f6b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
8#include <linux/ring_buffer.h> 8#include <linux/ring_buffer.h>
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <trace/boot.h>
11 12
12enum trace_type { 13enum trace_type {
13 __TRACE_FIRST_TYPE = 0, 14 __TRACE_FIRST_TYPE = 0,
@@ -21,7 +22,10 @@ enum trace_type {
21 TRACE_SPECIAL, 22 TRACE_SPECIAL,
22 TRACE_MMIO_RW, 23 TRACE_MMIO_RW,
23 TRACE_MMIO_MAP, 24 TRACE_MMIO_MAP,
24 TRACE_BOOT, 25 TRACE_BRANCH,
26 TRACE_BOOT_CALL,
27 TRACE_BOOT_RET,
28 TRACE_FN_RET,
25 29
26 __TRACE_LAST_TYPE 30 __TRACE_LAST_TYPE
27}; 31};
@@ -48,6 +52,16 @@ struct ftrace_entry {
48 unsigned long ip; 52 unsigned long ip;
49 unsigned long parent_ip; 53 unsigned long parent_ip;
50}; 54};
55
56/* Function return entry */
57struct ftrace_ret_entry {
58 struct trace_entry ent;
59 unsigned long ip;
60 unsigned long parent_ip;
61 unsigned long long calltime;
62 unsigned long long rettime;
63 unsigned long overrun;
64};
51extern struct tracer boot_tracer; 65extern struct tracer boot_tracer;
52 66
53/* 67/*
@@ -112,9 +126,24 @@ struct trace_mmiotrace_map {
112 struct mmiotrace_map map; 126 struct mmiotrace_map map;
113}; 127};
114 128
115struct trace_boot { 129struct trace_boot_call {
116 struct trace_entry ent; 130 struct trace_entry ent;
117 struct boot_trace initcall; 131 struct boot_trace_call boot_call;
132};
133
134struct trace_boot_ret {
135 struct trace_entry ent;
136 struct boot_trace_ret boot_ret;
137};
138
139#define TRACE_FUNC_SIZE 30
140#define TRACE_FILE_SIZE 20
141struct trace_branch {
142 struct trace_entry ent;
143 unsigned line;
144 char func[TRACE_FUNC_SIZE+1];
145 char file[TRACE_FILE_SIZE+1];
146 char correct;
118}; 147};
119 148
120/* 149/*
@@ -172,7 +201,6 @@ struct trace_iterator;
172struct trace_array { 201struct trace_array {
173 struct ring_buffer *buffer; 202 struct ring_buffer *buffer;
174 unsigned long entries; 203 unsigned long entries;
175 long ctrl;
176 int cpu; 204 int cpu;
177 cycle_t time_start; 205 cycle_t time_start;
178 struct task_struct *waiter; 206 struct task_struct *waiter;
@@ -218,7 +246,10 @@ extern void __ftrace_bad_type(void);
218 TRACE_MMIO_RW); \ 246 TRACE_MMIO_RW); \
219 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 247 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
220 TRACE_MMIO_MAP); \ 248 TRACE_MMIO_MAP); \
221 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ 249 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
250 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
251 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
252 IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\
222 __ftrace_bad_type(); \ 253 __ftrace_bad_type(); \
223 } while (0) 254 } while (0)
224 255
@@ -229,29 +260,55 @@ enum print_line_t {
229 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */ 260 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
230}; 261};
231 262
263
264/*
265 * An option specific to a tracer. This is a boolean value.
266 * The bit is the bit index that sets its value on the
267 * flags value in struct tracer_flags.
268 */
269struct tracer_opt {
270 const char *name; /* Will appear on the trace_options file */
271 u32 bit; /* Mask assigned in val field in tracer_flags */
272};
273
274/*
275 * The set of specific options for a tracer. Your tracer
276 * have to set the initial value of the flags val.
277 */
278struct tracer_flags {
279 u32 val;
280 struct tracer_opt *opts;
281};
282
283/* Makes more easy to define a tracer opt */
284#define TRACER_OPT(s, b) .name = #s, .bit = b
285
232/* 286/*
233 * A specific tracer, represented by methods that operate on a trace array: 287 * A specific tracer, represented by methods that operate on a trace array:
234 */ 288 */
235struct tracer { 289struct tracer {
236 const char *name; 290 const char *name;
237 void (*init)(struct trace_array *tr); 291 /* Your tracer should raise a warning if init fails */
292 int (*init)(struct trace_array *tr);
238 void (*reset)(struct trace_array *tr); 293 void (*reset)(struct trace_array *tr);
294 void (*start)(struct trace_array *tr);
295 void (*stop)(struct trace_array *tr);
239 void (*open)(struct trace_iterator *iter); 296 void (*open)(struct trace_iterator *iter);
240 void (*pipe_open)(struct trace_iterator *iter); 297 void (*pipe_open)(struct trace_iterator *iter);
241 void (*close)(struct trace_iterator *iter); 298 void (*close)(struct trace_iterator *iter);
242 void (*start)(struct trace_iterator *iter);
243 void (*stop)(struct trace_iterator *iter);
244 ssize_t (*read)(struct trace_iterator *iter, 299 ssize_t (*read)(struct trace_iterator *iter,
245 struct file *filp, char __user *ubuf, 300 struct file *filp, char __user *ubuf,
246 size_t cnt, loff_t *ppos); 301 size_t cnt, loff_t *ppos);
247 void (*ctrl_update)(struct trace_array *tr);
248#ifdef CONFIG_FTRACE_STARTUP_TEST 302#ifdef CONFIG_FTRACE_STARTUP_TEST
249 int (*selftest)(struct tracer *trace, 303 int (*selftest)(struct tracer *trace,
250 struct trace_array *tr); 304 struct trace_array *tr);
251#endif 305#endif
252 enum print_line_t (*print_line)(struct trace_iterator *iter); 306 enum print_line_t (*print_line)(struct trace_iterator *iter);
307 /* If you handled the flag setting, return 0 */
308 int (*set_flag)(u32 old_flags, u32 bit, int set);
253 struct tracer *next; 309 struct tracer *next;
254 int print_max; 310 int print_max;
311 struct tracer_flags *flags;
255}; 312};
256 313
257struct trace_seq { 314struct trace_seq {
@@ -279,8 +336,11 @@ struct trace_iterator {
279 unsigned long iter_flags; 336 unsigned long iter_flags;
280 loff_t pos; 337 loff_t pos;
281 long idx; 338 long idx;
339
340 cpumask_t started;
282}; 341};
283 342
343int tracing_is_enabled(void);
284void trace_wake_up(void); 344void trace_wake_up(void);
285void tracing_reset(struct trace_array *tr, int cpu); 345void tracing_reset(struct trace_array *tr, int cpu);
286int tracing_open_generic(struct inode *inode, struct file *filp); 346int tracing_open_generic(struct inode *inode, struct file *filp);
@@ -320,9 +380,14 @@ void trace_function(struct trace_array *tr,
320 unsigned long ip, 380 unsigned long ip,
321 unsigned long parent_ip, 381 unsigned long parent_ip,
322 unsigned long flags, int pc); 382 unsigned long flags, int pc);
383void
384trace_function_return(struct ftrace_retfunc *trace);
323 385
324void tracing_start_cmdline_record(void); 386void tracing_start_cmdline_record(void);
325void tracing_stop_cmdline_record(void); 387void tracing_stop_cmdline_record(void);
388void tracing_sched_switch_assign_trace(struct trace_array *tr);
389void tracing_stop_sched_switch_record(void);
390void tracing_start_sched_switch_record(void);
326int register_tracer(struct tracer *type); 391int register_tracer(struct tracer *type);
327void unregister_tracer(struct tracer *type); 392void unregister_tracer(struct tracer *type);
328 393
@@ -383,12 +448,18 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
383 struct trace_array *tr); 448 struct trace_array *tr);
384extern int trace_selftest_startup_sysprof(struct tracer *trace, 449extern int trace_selftest_startup_sysprof(struct tracer *trace,
385 struct trace_array *tr); 450 struct trace_array *tr);
451extern int trace_selftest_startup_branch(struct tracer *trace,
452 struct trace_array *tr);
386#endif /* CONFIG_FTRACE_STARTUP_TEST */ 453#endif /* CONFIG_FTRACE_STARTUP_TEST */
387 454
388extern void *head_page(struct trace_array_cpu *data); 455extern void *head_page(struct trace_array_cpu *data);
389extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 456extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
390extern void trace_seq_print_cont(struct trace_seq *s, 457extern void trace_seq_print_cont(struct trace_seq *s,
391 struct trace_iterator *iter); 458 struct trace_iterator *iter);
459
460extern int
461seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
462 unsigned long sym_flags);
392extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 463extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
393 size_t cnt); 464 size_t cnt);
394extern long ns2usecs(cycle_t nsec); 465extern long ns2usecs(cycle_t nsec);
@@ -396,6 +467,17 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
396 467
397extern unsigned long trace_flags; 468extern unsigned long trace_flags;
398 469
470/* Standard output formatting function used for function return traces */
471#ifdef CONFIG_FUNCTION_RET_TRACER
472extern enum print_line_t print_return_function(struct trace_iterator *iter);
473#else
474static inline enum print_line_t
475print_return_function(struct trace_iterator *iter)
476{
477 return TRACE_TYPE_UNHANDLED;
478}
479#endif
480
399/* 481/*
400 * trace_iterator_flags is an enumeration that defines bit 482 * trace_iterator_flags is an enumeration that defines bit
401 * positions into trace_flags that controls the output. 483 * positions into trace_flags that controls the output.
@@ -415,8 +497,90 @@ enum trace_iterator_flags {
415 TRACE_ITER_STACKTRACE = 0x100, 497 TRACE_ITER_STACKTRACE = 0x100,
416 TRACE_ITER_SCHED_TREE = 0x200, 498 TRACE_ITER_SCHED_TREE = 0x200,
417 TRACE_ITER_PRINTK = 0x400, 499 TRACE_ITER_PRINTK = 0x400,
500 TRACE_ITER_PREEMPTONLY = 0x800,
501 TRACE_ITER_BRANCH = 0x1000,
502 TRACE_ITER_ANNOTATE = 0x2000,
418}; 503};
419 504
505/*
506 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
507 * control the output of kernel symbols.
508 */
509#define TRACE_ITER_SYM_MASK \
510 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
511
420extern struct tracer nop_trace; 512extern struct tracer nop_trace;
421 513
514/**
515 * ftrace_preempt_disable - disable preemption scheduler safe
516 *
517 * When tracing can happen inside the scheduler, there exists
518 * cases that the tracing might happen before the need_resched
519 * flag is checked. If this happens and the tracer calls
520 * preempt_enable (after a disable), a schedule might take place
521 * causing an infinite recursion.
522 *
523 * To prevent this, we read the need_recshed flag before
524 * disabling preemption. When we want to enable preemption we
525 * check the flag, if it is set, then we call preempt_enable_no_resched.
526 * Otherwise, we call preempt_enable.
527 *
528 * The rational for doing the above is that if need resched is set
529 * and we have yet to reschedule, we are either in an atomic location
530 * (where we do not need to check for scheduling) or we are inside
531 * the scheduler and do not want to resched.
532 */
533static inline int ftrace_preempt_disable(void)
534{
535 int resched;
536
537 resched = need_resched();
538 preempt_disable_notrace();
539
540 return resched;
541}
542
543/**
544 * ftrace_preempt_enable - enable preemption scheduler safe
545 * @resched: the return value from ftrace_preempt_disable
546 *
547 * This is a scheduler safe way to enable preemption and not miss
548 * any preemption checks. The disabled saved the state of preemption.
549 * If resched is set, then we were either inside an atomic or
550 * are inside the scheduler (we would have already scheduled
551 * otherwise). In this case, we do not want to call normal
552 * preempt_enable, but preempt_enable_no_resched instead.
553 */
554static inline void ftrace_preempt_enable(int resched)
555{
556 if (resched)
557 preempt_enable_no_resched_notrace();
558 else
559 preempt_enable_notrace();
560}
561
562#ifdef CONFIG_BRANCH_TRACER
563extern int enable_branch_tracing(struct trace_array *tr);
564extern void disable_branch_tracing(void);
565static inline int trace_branch_enable(struct trace_array *tr)
566{
567 if (trace_flags & TRACE_ITER_BRANCH)
568 return enable_branch_tracing(tr);
569 return 0;
570}
571static inline void trace_branch_disable(void)
572{
573 /* due to races, always disable */
574 disable_branch_tracing();
575}
576#else
577static inline int trace_branch_enable(struct trace_array *tr)
578{
579 return 0;
580}
581static inline void trace_branch_disable(void)
582{
583}
584#endif /* CONFIG_BRANCH_TRACER */
585
422#endif /* _LINUX_KERNEL_TRACE_H */ 586#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index d0a5e50eeff2..a4fa2c57e34e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -13,73 +13,117 @@
13#include "trace.h" 13#include "trace.h"
14 14
15static struct trace_array *boot_trace; 15static struct trace_array *boot_trace;
16static int trace_boot_enabled; 16static bool pre_initcalls_finished;
17 17
18 18/* Tells the boot tracer that the pre_smp_initcalls are finished.
19/* Should be started after do_pre_smp_initcalls() in init/main.c */ 19 * So we are ready .
20 * It doesn't enable sched events tracing however.
21 * You have to call enable_boot_trace to do so.
22 */
20void start_boot_trace(void) 23void start_boot_trace(void)
21{ 24{
22 trace_boot_enabled = 1; 25 pre_initcalls_finished = true;
23} 26}
24 27
25void stop_boot_trace(void) 28void enable_boot_trace(void)
26{ 29{
27 trace_boot_enabled = 0; 30 if (pre_initcalls_finished)
31 tracing_start_sched_switch_record();
28} 32}
29 33
30void reset_boot_trace(struct trace_array *tr) 34void disable_boot_trace(void)
31{ 35{
32 stop_boot_trace(); 36 if (pre_initcalls_finished)
37 tracing_stop_sched_switch_record();
33} 38}
34 39
35static void boot_trace_init(struct trace_array *tr) 40static void reset_boot_trace(struct trace_array *tr)
36{ 41{
37 int cpu; 42 int cpu;
38 boot_trace = tr;
39 43
40 trace_boot_enabled = 0; 44 tr->time_start = ftrace_now(tr->cpu);
45
46 for_each_online_cpu(cpu)
47 tracing_reset(tr, cpu);
48}
49
50static int boot_trace_init(struct trace_array *tr)
51{
52 int cpu;
53 boot_trace = tr;
41 54
42 for_each_cpu_mask(cpu, cpu_possible_map) 55 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu); 56 tracing_reset(tr, cpu);
57
58 tracing_sched_switch_assign_trace(tr);
59 return 0;
44} 60}
45 61
46static void boot_trace_ctrl_update(struct trace_array *tr) 62static enum print_line_t
63initcall_call_print_line(struct trace_iterator *iter)
47{ 64{
48 if (tr->ctrl) 65 struct trace_entry *entry = iter->ent;
49 start_boot_trace(); 66 struct trace_seq *s = &iter->seq;
67 struct trace_boot_call *field;
68 struct boot_trace_call *call;
69 u64 ts;
70 unsigned long nsec_rem;
71 int ret;
72
73 trace_assign_type(field, entry);
74 call = &field->boot_call;
75 ts = iter->ts;
76 nsec_rem = do_div(ts, 1000000000);
77
78 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
79 (unsigned long)ts, nsec_rem, call->func, call->caller);
80
81 if (!ret)
82 return TRACE_TYPE_PARTIAL_LINE;
50 else 83 else
51 stop_boot_trace(); 84 return TRACE_TYPE_HANDLED;
52} 85}
53 86
54static enum print_line_t initcall_print_line(struct trace_iterator *iter) 87static enum print_line_t
88initcall_ret_print_line(struct trace_iterator *iter)
55{ 89{
56 int ret;
57 struct trace_entry *entry = iter->ent; 90 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq; 91 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime); 92 struct trace_boot_ret *field;
62 struct timespec rettime = ktime_to_timespec(it->rettime); 93 struct boot_trace_ret *init_ret;
63 94 u64 ts;
64 if (entry->type == TRACE_BOOT) { 95 unsigned long nsec_rem;
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", 96 int ret;
66 calltime.tv_sec, 97
67 calltime.tv_nsec, 98 trace_assign_type(field, entry);
68 it->func, it->caller); 99 init_ret = &field->boot_ret;
69 if (!ret) 100 ts = iter->ts;
70 return TRACE_TYPE_PARTIAL_LINE; 101 nsec_rem = do_div(ts, 1000000000);
71 102
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " 103 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n", 104 "returned %d after %llu msecs\n",
74 rettime.tv_sec, 105 (unsigned long) ts,
75 rettime.tv_nsec, 106 nsec_rem,
76 it->func, it->result, it->duration); 107 init_ret->func, init_ret->result, init_ret->duration);
77 108
78 if (!ret) 109 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE; 110 return TRACE_TYPE_PARTIAL_LINE;
111 else
80 return TRACE_TYPE_HANDLED; 112 return TRACE_TYPE_HANDLED;
113}
114
115static enum print_line_t initcall_print_line(struct trace_iterator *iter)
116{
117 struct trace_entry *entry = iter->ent;
118
119 switch (entry->type) {
120 case TRACE_BOOT_CALL:
121 return initcall_call_print_line(iter);
122 case TRACE_BOOT_RET:
123 return initcall_ret_print_line(iter);
124 default:
125 return TRACE_TYPE_UNHANDLED;
81 } 126 }
82 return TRACE_TYPE_UNHANDLED;
83} 127}
84 128
85struct tracer boot_tracer __read_mostly = 129struct tracer boot_tracer __read_mostly =
@@ -87,27 +131,53 @@ struct tracer boot_tracer __read_mostly =
87 .name = "initcall", 131 .name = "initcall",
88 .init = boot_trace_init, 132 .init = boot_trace_init,
89 .reset = reset_boot_trace, 133 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line, 134 .print_line = initcall_print_line,
92}; 135};
93 136
94void trace_boot(struct boot_trace *it, initcall_t fn) 137void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
95{ 138{
96 struct ring_buffer_event *event; 139 struct ring_buffer_event *event;
97 struct trace_boot *entry; 140 struct trace_boot_call *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags; 141 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace; 142 struct trace_array *tr = boot_trace;
101 143
102 if (!trace_boot_enabled) 144 if (!pre_initcalls_finished)
103 return; 145 return;
104 146
105 /* Get its name now since this function could 147 /* Get its name now since this function could
106 * disappear because it is in the .init section. 148 * disappear because it is in the .init section.
107 */ 149 */
108 sprint_symbol(it->func, (unsigned long)fn); 150 sprint_symbol(bt->func, (unsigned long)fn);
151 preempt_disable();
152
153 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
154 &irq_flags);
155 if (!event)
156 goto out;
157 entry = ring_buffer_event_data(event);
158 tracing_generic_entry_update(&entry->ent, 0, 0);
159 entry->ent.type = TRACE_BOOT_CALL;
160 entry->boot_call = *bt;
161 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
162
163 trace_wake_up();
164
165 out:
166 preempt_enable();
167}
168
169void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
170{
171 struct ring_buffer_event *event;
172 struct trace_boot_ret *entry;
173 unsigned long irq_flags;
174 struct trace_array *tr = boot_trace;
175
176 if (!pre_initcalls_finished)
177 return;
178
179 sprint_symbol(bt->func, (unsigned long)fn);
109 preempt_disable(); 180 preempt_disable();
110 data = tr->data[smp_processor_id()];
111 181
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), 182 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags); 183 &irq_flags);
@@ -115,8 +185,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn)
115 goto out; 185 goto out;
116 entry = ring_buffer_event_data(event); 186 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0); 187 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT; 188 entry->ent.type = TRACE_BOOT_RET;
119 entry->initcall = *it; 189 entry->boot_ret = *bt;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags); 190 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121 191
122 trace_wake_up(); 192 trace_wake_up();
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
new file mode 100644
index 000000000000..23f9b02ce967
--- /dev/null
+++ b/kernel/trace/trace_branch.c
@@ -0,0 +1,321 @@
1/*
2 * unlikely profiler
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/module.h>
12#include <linux/ftrace.h>
13#include <linux/hash.h>
14#include <linux/fs.h>
15#include <asm/local.h>
16#include "trace.h"
17
18#ifdef CONFIG_BRANCH_TRACER
19
20static int branch_tracing_enabled __read_mostly;
21static DEFINE_MUTEX(branch_tracing_mutex);
22static struct trace_array *branch_tracer;
23
24static void
25probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
26{
27 struct trace_array *tr = branch_tracer;
28 struct ring_buffer_event *event;
29 struct trace_branch *entry;
30 unsigned long flags, irq_flags;
31 int cpu, pc;
32 const char *p;
33
34 /*
35 * I would love to save just the ftrace_likely_data pointer, but
36 * this code can also be used by modules. Ugly things can happen
37 * if the module is unloaded, and then we go and read the
38 * pointer. This is slower, but much safer.
39 */
40
41 if (unlikely(!tr))
42 return;
43
44 raw_local_irq_save(flags);
45 cpu = raw_smp_processor_id();
46 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
47 goto out;
48
49 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
50 &irq_flags);
51 if (!event)
52 goto out;
53
54 pc = preempt_count();
55 entry = ring_buffer_event_data(event);
56 tracing_generic_entry_update(&entry->ent, flags, pc);
57 entry->ent.type = TRACE_BRANCH;
58
59 /* Strip off the path, only save the file */
60 p = f->file + strlen(f->file);
61 while (p >= f->file && *p != '/')
62 p--;
63 p++;
64
65 strncpy(entry->func, f->func, TRACE_FUNC_SIZE);
66 strncpy(entry->file, p, TRACE_FILE_SIZE);
67 entry->func[TRACE_FUNC_SIZE] = 0;
68 entry->file[TRACE_FILE_SIZE] = 0;
69 entry->line = f->line;
70 entry->correct = val == expect;
71
72 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
73
74 out:
75 atomic_dec(&tr->data[cpu]->disabled);
76 raw_local_irq_restore(flags);
77}
78
79static inline
80void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
81{
82 if (!branch_tracing_enabled)
83 return;
84
85 probe_likely_condition(f, val, expect);
86}
87
88int enable_branch_tracing(struct trace_array *tr)
89{
90 int ret = 0;
91
92 mutex_lock(&branch_tracing_mutex);
93 branch_tracer = tr;
94 /*
95 * Must be seen before enabling. The reader is a condition
96 * where we do not need a matching rmb()
97 */
98 smp_wmb();
99 branch_tracing_enabled++;
100 mutex_unlock(&branch_tracing_mutex);
101
102 return ret;
103}
104
105void disable_branch_tracing(void)
106{
107 mutex_lock(&branch_tracing_mutex);
108
109 if (!branch_tracing_enabled)
110 goto out_unlock;
111
112 branch_tracing_enabled--;
113
114 out_unlock:
115 mutex_unlock(&branch_tracing_mutex);
116}
117
118static void start_branch_trace(struct trace_array *tr)
119{
120 enable_branch_tracing(tr);
121}
122
123static void stop_branch_trace(struct trace_array *tr)
124{
125 disable_branch_tracing();
126}
127
128static int branch_trace_init(struct trace_array *tr)
129{
130 int cpu;
131
132 for_each_online_cpu(cpu)
133 tracing_reset(tr, cpu);
134
135 start_branch_trace(tr);
136 return 0;
137}
138
139static void branch_trace_reset(struct trace_array *tr)
140{
141 stop_branch_trace(tr);
142}
143
144struct tracer branch_trace __read_mostly =
145{
146 .name = "branch",
147 .init = branch_trace_init,
148 .reset = branch_trace_reset,
149#ifdef CONFIG_FTRACE_SELFTEST
150 .selftest = trace_selftest_startup_branch,
151#endif
152};
153
154__init static int init_branch_trace(void)
155{
156 return register_tracer(&branch_trace);
157}
158
159device_initcall(init_branch_trace);
160#else
161static inline
162void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
163{
164}
165#endif /* CONFIG_BRANCH_TRACER */
166
167void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect)
168{
169 /*
170 * I would love to have a trace point here instead, but the
171 * trace point code is so inundated with unlikely and likely
172 * conditions that the recursive nightmare that exists is too
173 * much to try to get working. At least for now.
174 */
175 trace_likely_condition(f, val, expect);
176
177 /* FIXME: Make this atomic! */
178 if (val == expect)
179 f->correct++;
180 else
181 f->incorrect++;
182}
183EXPORT_SYMBOL(ftrace_likely_update);
184
185struct ftrace_pointer {
186 void *start;
187 void *stop;
188};
189
190static void *
191t_next(struct seq_file *m, void *v, loff_t *pos)
192{
193 struct ftrace_pointer *f = m->private;
194 struct ftrace_branch_data *p = v;
195
196 (*pos)++;
197
198 if (v == (void *)1)
199 return f->start;
200
201 ++p;
202
203 if ((void *)p >= (void *)f->stop)
204 return NULL;
205
206 return p;
207}
208
209static void *t_start(struct seq_file *m, loff_t *pos)
210{
211 void *t = (void *)1;
212 loff_t l = 0;
213
214 for (; t && l < *pos; t = t_next(m, t, &l))
215 ;
216
217 return t;
218}
219
220static void t_stop(struct seq_file *m, void *p)
221{
222}
223
224static int t_show(struct seq_file *m, void *v)
225{
226 struct ftrace_branch_data *p = v;
227 const char *f;
228 unsigned long percent;
229
230 if (v == (void *)1) {
231 seq_printf(m, " correct incorrect %% "
232 " Function "
233 " File Line\n"
234 " ------- --------- - "
235 " -------- "
236 " ---- ----\n");
237 return 0;
238 }
239
240 /* Only print the file, not the path */
241 f = p->file + strlen(p->file);
242 while (f >= p->file && *f != '/')
243 f--;
244 f++;
245
246 if (p->correct) {
247 percent = p->incorrect * 100;
248 percent /= p->correct + p->incorrect;
249 } else
250 percent = p->incorrect ? 100 : 0;
251
252 seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent);
253 seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
254 return 0;
255}
256
257static struct seq_operations tracing_likely_seq_ops = {
258 .start = t_start,
259 .next = t_next,
260 .stop = t_stop,
261 .show = t_show,
262};
263
264static int tracing_likely_open(struct inode *inode, struct file *file)
265{
266 int ret;
267
268 ret = seq_open(file, &tracing_likely_seq_ops);
269 if (!ret) {
270 struct seq_file *m = file->private_data;
271 m->private = (void *)inode->i_private;
272 }
273
274 return ret;
275}
276
277static struct file_operations tracing_likely_fops = {
278 .open = tracing_likely_open,
279 .read = seq_read,
280 .llseek = seq_lseek,
281};
282
283extern unsigned long __start_likely_profile[];
284extern unsigned long __stop_likely_profile[];
285extern unsigned long __start_unlikely_profile[];
286extern unsigned long __stop_unlikely_profile[];
287
288static struct ftrace_pointer ftrace_likely_pos = {
289 .start = __start_likely_profile,
290 .stop = __stop_likely_profile,
291};
292
293static struct ftrace_pointer ftrace_unlikely_pos = {
294 .start = __start_unlikely_profile,
295 .stop = __stop_unlikely_profile,
296};
297
298static __init int ftrace_branch_init(void)
299{
300 struct dentry *d_tracer;
301 struct dentry *entry;
302
303 d_tracer = tracing_init_dentry();
304
305 entry = debugfs_create_file("profile_likely", 0444, d_tracer,
306 &ftrace_likely_pos,
307 &tracing_likely_fops);
308 if (!entry)
309 pr_warning("Could not create debugfs 'profile_likely' entry\n");
310
311 entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
312 &ftrace_unlikely_pos,
313 &tracing_likely_fops);
314 if (!entry)
315 pr_warning("Could not create debugfs"
316 " 'profile_unlikely' entry\n");
317
318 return 0;
319}
320
321device_initcall(ftrace_branch_init);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0f85a64003d3..e74f6d0a3216 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,24 +42,20 @@ static void stop_function_trace(struct trace_array *tr)
42 tracing_stop_cmdline_record(); 42 tracing_stop_cmdline_record();
43} 43}
44 44
45static void function_trace_init(struct trace_array *tr) 45static int function_trace_init(struct trace_array *tr)
46{ 46{
47 if (tr->ctrl) 47 start_function_trace(tr);
48 start_function_trace(tr); 48 return 0;
49} 49}
50 50
51static void function_trace_reset(struct trace_array *tr) 51static void function_trace_reset(struct trace_array *tr)
52{ 52{
53 if (tr->ctrl) 53 stop_function_trace(tr);
54 stop_function_trace(tr);
55} 54}
56 55
57static void function_trace_ctrl_update(struct trace_array *tr) 56static void function_trace_start(struct trace_array *tr)
58{ 57{
59 if (tr->ctrl) 58 function_reset(tr);
60 start_function_trace(tr);
61 else
62 stop_function_trace(tr);
63} 59}
64 60
65static struct tracer function_trace __read_mostly = 61static struct tracer function_trace __read_mostly =
@@ -67,7 +63,7 @@ static struct tracer function_trace __read_mostly =
67 .name = "function", 63 .name = "function",
68 .init = function_trace_init, 64 .init = function_trace_init,
69 .reset = function_trace_reset, 65 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 66 .start = function_trace_start,
71#ifdef CONFIG_FTRACE_SELFTEST 67#ifdef CONFIG_FTRACE_SELFTEST
72 .selftest = trace_selftest_startup_function, 68 .selftest = trace_selftest_startup_function,
73#endif 69#endif
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
new file mode 100644
index 000000000000..e00d64509c9c
--- /dev/null
+++ b/kernel/trace/trace_functions_return.c
@@ -0,0 +1,98 @@
1/*
2 *
3 * Function return tracer.
4 * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 */
9#include <linux/debugfs.h>
10#include <linux/uaccess.h>
11#include <linux/ftrace.h>
12#include <linux/fs.h>
13
14#include "trace.h"
15
16
17#define TRACE_RETURN_PRINT_OVERRUN 0x1
18static struct tracer_opt trace_opts[] = {
19 /* Display overruns or not */
20 { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) },
21 { } /* Empty entry */
22};
23
24static struct tracer_flags tracer_flags = {
25 .val = 0, /* Don't display overruns by default */
26 .opts = trace_opts
27};
28
29
30static int return_trace_init(struct trace_array *tr)
31{
32 int cpu;
33 for_each_online_cpu(cpu)
34 tracing_reset(tr, cpu);
35
36 return register_ftrace_return(&trace_function_return);
37}
38
39static void return_trace_reset(struct trace_array *tr)
40{
41 unregister_ftrace_return();
42}
43
44
45enum print_line_t
46print_return_function(struct trace_iterator *iter)
47{
48 struct trace_seq *s = &iter->seq;
49 struct trace_entry *entry = iter->ent;
50 struct ftrace_ret_entry *field;
51 int ret;
52
53 if (entry->type == TRACE_FN_RET) {
54 trace_assign_type(field, entry);
55 ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
56 if (!ret)
57 return TRACE_TYPE_PARTIAL_LINE;
58
59 ret = seq_print_ip_sym(s, field->ip,
60 trace_flags & TRACE_ITER_SYM_MASK);
61 if (!ret)
62 return TRACE_TYPE_PARTIAL_LINE;
63
64 ret = trace_seq_printf(s, " (%llu ns)",
65 field->rettime - field->calltime);
66 if (!ret)
67 return TRACE_TYPE_PARTIAL_LINE;
68
69 if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) {
70 ret = trace_seq_printf(s, " (Overruns: %lu)",
71 field->overrun);
72 if (!ret)
73 return TRACE_TYPE_PARTIAL_LINE;
74 }
75
76 ret = trace_seq_printf(s, "\n");
77 if (!ret)
78 return TRACE_TYPE_PARTIAL_LINE;
79
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85static struct tracer return_trace __read_mostly = {
86 .name = "return",
87 .init = return_trace_init,
88 .reset = return_trace_reset,
89 .print_line = print_return_function,
90 .flags = &tracer_flags,
91};
92
93static __init int init_return_trace(void)
94{
95 return register_tracer(&return_trace);
96}
97
98device_initcall(init_return_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 9c74071c10e0..7c2e326bbc8b 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -353,15 +353,28 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
353} 353}
354#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
355 355
356/*
357 * save_tracer_enabled is used to save the state of the tracer_enabled
358 * variable when we disable it when we open a trace output file.
359 */
360static int save_tracer_enabled;
361
356static void start_irqsoff_tracer(struct trace_array *tr) 362static void start_irqsoff_tracer(struct trace_array *tr)
357{ 363{
358 register_ftrace_function(&trace_ops); 364 register_ftrace_function(&trace_ops);
359 tracer_enabled = 1; 365 if (tracing_is_enabled()) {
366 tracer_enabled = 1;
367 save_tracer_enabled = 1;
368 } else {
369 tracer_enabled = 0;
370 save_tracer_enabled = 0;
371 }
360} 372}
361 373
362static void stop_irqsoff_tracer(struct trace_array *tr) 374static void stop_irqsoff_tracer(struct trace_array *tr)
363{ 375{
364 tracer_enabled = 0; 376 tracer_enabled = 0;
377 save_tracer_enabled = 0;
365 unregister_ftrace_function(&trace_ops); 378 unregister_ftrace_function(&trace_ops);
366} 379}
367 380
@@ -370,53 +383,55 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
370 irqsoff_trace = tr; 383 irqsoff_trace = tr;
371 /* make sure that the tracer is visible */ 384 /* make sure that the tracer is visible */
372 smp_wmb(); 385 smp_wmb();
373 386 start_irqsoff_tracer(tr);
374 if (tr->ctrl)
375 start_irqsoff_tracer(tr);
376} 387}
377 388
378static void irqsoff_tracer_reset(struct trace_array *tr) 389static void irqsoff_tracer_reset(struct trace_array *tr)
379{ 390{
380 if (tr->ctrl) 391 stop_irqsoff_tracer(tr);
381 stop_irqsoff_tracer(tr);
382} 392}
383 393
384static void irqsoff_tracer_ctrl_update(struct trace_array *tr) 394static void irqsoff_tracer_start(struct trace_array *tr)
385{ 395{
386 if (tr->ctrl) 396 tracer_enabled = 1;
387 start_irqsoff_tracer(tr); 397 save_tracer_enabled = 1;
388 else 398}
389 stop_irqsoff_tracer(tr); 399
400static void irqsoff_tracer_stop(struct trace_array *tr)
401{
402 tracer_enabled = 0;
403 save_tracer_enabled = 0;
390} 404}
391 405
392static void irqsoff_tracer_open(struct trace_iterator *iter) 406static void irqsoff_tracer_open(struct trace_iterator *iter)
393{ 407{
394 /* stop the trace while dumping */ 408 /* stop the trace while dumping */
395 if (iter->tr->ctrl) 409 tracer_enabled = 0;
396 stop_irqsoff_tracer(iter->tr);
397} 410}
398 411
399static void irqsoff_tracer_close(struct trace_iterator *iter) 412static void irqsoff_tracer_close(struct trace_iterator *iter)
400{ 413{
401 if (iter->tr->ctrl) 414 /* restart tracing */
402 start_irqsoff_tracer(iter->tr); 415 tracer_enabled = save_tracer_enabled;
403} 416}
404 417
405#ifdef CONFIG_IRQSOFF_TRACER 418#ifdef CONFIG_IRQSOFF_TRACER
406static void irqsoff_tracer_init(struct trace_array *tr) 419static int irqsoff_tracer_init(struct trace_array *tr)
407{ 420{
408 trace_type = TRACER_IRQS_OFF; 421 trace_type = TRACER_IRQS_OFF;
409 422
410 __irqsoff_tracer_init(tr); 423 __irqsoff_tracer_init(tr);
424 return 0;
411} 425}
412static struct tracer irqsoff_tracer __read_mostly = 426static struct tracer irqsoff_tracer __read_mostly =
413{ 427{
414 .name = "irqsoff", 428 .name = "irqsoff",
415 .init = irqsoff_tracer_init, 429 .init = irqsoff_tracer_init,
416 .reset = irqsoff_tracer_reset, 430 .reset = irqsoff_tracer_reset,
431 .start = irqsoff_tracer_start,
432 .stop = irqsoff_tracer_stop,
417 .open = irqsoff_tracer_open, 433 .open = irqsoff_tracer_open,
418 .close = irqsoff_tracer_close, 434 .close = irqsoff_tracer_close,
419 .ctrl_update = irqsoff_tracer_ctrl_update,
420 .print_max = 1, 435 .print_max = 1,
421#ifdef CONFIG_FTRACE_SELFTEST 436#ifdef CONFIG_FTRACE_SELFTEST
422 .selftest = trace_selftest_startup_irqsoff, 437 .selftest = trace_selftest_startup_irqsoff,
@@ -428,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
428#endif 443#endif
429 444
430#ifdef CONFIG_PREEMPT_TRACER 445#ifdef CONFIG_PREEMPT_TRACER
431static void preemptoff_tracer_init(struct trace_array *tr) 446static int preemptoff_tracer_init(struct trace_array *tr)
432{ 447{
433 trace_type = TRACER_PREEMPT_OFF; 448 trace_type = TRACER_PREEMPT_OFF;
434 449
435 __irqsoff_tracer_init(tr); 450 __irqsoff_tracer_init(tr);
451 return 0;
436} 452}
437 453
438static struct tracer preemptoff_tracer __read_mostly = 454static struct tracer preemptoff_tracer __read_mostly =
@@ -440,9 +456,10 @@ static struct tracer preemptoff_tracer __read_mostly =
440 .name = "preemptoff", 456 .name = "preemptoff",
441 .init = preemptoff_tracer_init, 457 .init = preemptoff_tracer_init,
442 .reset = irqsoff_tracer_reset, 458 .reset = irqsoff_tracer_reset,
459 .start = irqsoff_tracer_start,
460 .stop = irqsoff_tracer_stop,
443 .open = irqsoff_tracer_open, 461 .open = irqsoff_tracer_open,
444 .close = irqsoff_tracer_close, 462 .close = irqsoff_tracer_close,
445 .ctrl_update = irqsoff_tracer_ctrl_update,
446 .print_max = 1, 463 .print_max = 1,
447#ifdef CONFIG_FTRACE_SELFTEST 464#ifdef CONFIG_FTRACE_SELFTEST
448 .selftest = trace_selftest_startup_preemptoff, 465 .selftest = trace_selftest_startup_preemptoff,
@@ -456,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
456#if defined(CONFIG_IRQSOFF_TRACER) && \ 473#if defined(CONFIG_IRQSOFF_TRACER) && \
457 defined(CONFIG_PREEMPT_TRACER) 474 defined(CONFIG_PREEMPT_TRACER)
458 475
459static void preemptirqsoff_tracer_init(struct trace_array *tr) 476static int preemptirqsoff_tracer_init(struct trace_array *tr)
460{ 477{
461 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; 478 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
462 479
463 __irqsoff_tracer_init(tr); 480 __irqsoff_tracer_init(tr);
481 return 0;
464} 482}
465 483
466static struct tracer preemptirqsoff_tracer __read_mostly = 484static struct tracer preemptirqsoff_tracer __read_mostly =
@@ -468,9 +486,10 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
468 .name = "preemptirqsoff", 486 .name = "preemptirqsoff",
469 .init = preemptirqsoff_tracer_init, 487 .init = preemptirqsoff_tracer_init,
470 .reset = irqsoff_tracer_reset, 488 .reset = irqsoff_tracer_reset,
489 .start = irqsoff_tracer_start,
490 .stop = irqsoff_tracer_stop,
471 .open = irqsoff_tracer_open, 491 .open = irqsoff_tracer_open,
472 .close = irqsoff_tracer_close, 492 .close = irqsoff_tracer_close,
473 .ctrl_update = irqsoff_tracer_ctrl_update,
474 .print_max = 1, 493 .print_max = 1,
475#ifdef CONFIG_FTRACE_SELFTEST 494#ifdef CONFIG_FTRACE_SELFTEST
476 .selftest = trace_selftest_startup_preemptirqsoff, 495 .selftest = trace_selftest_startup_preemptirqsoff,
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index f28484618ff0..433d650eda9f 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -30,34 +30,29 @@ static void mmio_reset_data(struct trace_array *tr)
30 tracing_reset(tr, cpu); 30 tracing_reset(tr, cpu);
31} 31}
32 32
33static void mmio_trace_init(struct trace_array *tr) 33static int mmio_trace_init(struct trace_array *tr)
34{ 34{
35 pr_debug("in %s\n", __func__); 35 pr_debug("in %s\n", __func__);
36 mmio_trace_array = tr; 36 mmio_trace_array = tr;
37 if (tr->ctrl) { 37
38 mmio_reset_data(tr); 38 mmio_reset_data(tr);
39 enable_mmiotrace(); 39 enable_mmiotrace();
40 } 40 return 0;
41} 41}
42 42
43static void mmio_trace_reset(struct trace_array *tr) 43static void mmio_trace_reset(struct trace_array *tr)
44{ 44{
45 pr_debug("in %s\n", __func__); 45 pr_debug("in %s\n", __func__);
46 if (tr->ctrl) 46
47 disable_mmiotrace(); 47 disable_mmiotrace();
48 mmio_reset_data(tr); 48 mmio_reset_data(tr);
49 mmio_trace_array = NULL; 49 mmio_trace_array = NULL;
50} 50}
51 51
52static void mmio_trace_ctrl_update(struct trace_array *tr) 52static void mmio_trace_start(struct trace_array *tr)
53{ 53{
54 pr_debug("in %s\n", __func__); 54 pr_debug("in %s\n", __func__);
55 if (tr->ctrl) { 55 mmio_reset_data(tr);
56 mmio_reset_data(tr);
57 enable_mmiotrace();
58 } else {
59 disable_mmiotrace();
60 }
61} 56}
62 57
63static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) 58static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
@@ -298,10 +293,10 @@ static struct tracer mmio_tracer __read_mostly =
298 .name = "mmiotrace", 293 .name = "mmiotrace",
299 .init = mmio_trace_init, 294 .init = mmio_trace_init,
300 .reset = mmio_trace_reset, 295 .reset = mmio_trace_reset,
296 .start = mmio_trace_start,
301 .pipe_open = mmio_pipe_open, 297 .pipe_open = mmio_pipe_open,
302 .close = mmio_close, 298 .close = mmio_close,
303 .read = mmio_read, 299 .read = mmio_read,
304 .ctrl_update = mmio_trace_ctrl_update,
305 .print_line = mmio_print_line, 300 .print_line = mmio_print_line,
306}; 301};
307 302
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 4592b4862515..b9767acd30ac 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -12,6 +12,27 @@
12 12
13#include "trace.h" 13#include "trace.h"
14 14
15/* Our two options */
16enum {
17 TRACE_NOP_OPT_ACCEPT = 0x1,
18 TRACE_NOP_OPT_REFUSE = 0x2
19};
20
21/* Options for the tracer (see trace_options file) */
22static struct tracer_opt nop_opts[] = {
23 /* Option that will be accepted by set_flag callback */
24 { TRACER_OPT(test_nop_accept, TRACE_NOP_OPT_ACCEPT) },
25 /* Option that will be refused by set_flag callback */
26 { TRACER_OPT(test_nop_refuse, TRACE_NOP_OPT_REFUSE) },
27 { } /* Always set a last empty entry */
28};
29
30static struct tracer_flags nop_flags = {
31 /* You can check your flags value here when you want. */
32 .val = 0, /* By default: all flags disabled */
33 .opts = nop_opts
34};
35
15static struct trace_array *ctx_trace; 36static struct trace_array *ctx_trace;
16 37
17static void start_nop_trace(struct trace_array *tr) 38static void start_nop_trace(struct trace_array *tr)
@@ -24,7 +45,7 @@ static void stop_nop_trace(struct trace_array *tr)
24 /* Nothing to do! */ 45 /* Nothing to do! */
25} 46}
26 47
27static void nop_trace_init(struct trace_array *tr) 48static int nop_trace_init(struct trace_array *tr)
28{ 49{
29 int cpu; 50 int cpu;
30 ctx_trace = tr; 51 ctx_trace = tr;
@@ -32,33 +53,53 @@ static void nop_trace_init(struct trace_array *tr)
32 for_each_online_cpu(cpu) 53 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu); 54 tracing_reset(tr, cpu);
34 55
35 if (tr->ctrl) 56 start_nop_trace(tr);
36 start_nop_trace(tr); 57 return 0;
37} 58}
38 59
39static void nop_trace_reset(struct trace_array *tr) 60static void nop_trace_reset(struct trace_array *tr)
40{ 61{
41 if (tr->ctrl) 62 stop_nop_trace(tr);
42 stop_nop_trace(tr);
43} 63}
44 64
45static void nop_trace_ctrl_update(struct trace_array *tr) 65/* It only serves as a signal handler and a callback to
66 * accept or refuse tthe setting of a flag.
67 * If you don't implement it, then the flag setting will be
68 * automatically accepted.
69 */
70static int nop_set_flag(u32 old_flags, u32 bit, int set)
46{ 71{
47 /* When starting a new trace, reset the buffers */ 72 /*
48 if (tr->ctrl) 73 * Note that you don't need to update nop_flags.val yourself.
49 start_nop_trace(tr); 74 * The tracing Api will do it automatically if you return 0
50 else 75 */
51 stop_nop_trace(tr); 76 if (bit == TRACE_NOP_OPT_ACCEPT) {
77 printk(KERN_DEBUG "nop_test_accept flag set to %d: we accept."
78 " Now cat trace_options to see the result\n",
79 set);
80 return 0;
81 }
82
83 if (bit == TRACE_NOP_OPT_REFUSE) {
84 printk(KERN_DEBUG "nop_test_refuse flag set to %d: we refuse."
85 "Now cat trace_options to see the result\n",
86 set);
87 return -EINVAL;
88 }
89
90 return 0;
52} 91}
53 92
93
54struct tracer nop_trace __read_mostly = 94struct tracer nop_trace __read_mostly =
55{ 95{
56 .name = "nop", 96 .name = "nop",
57 .init = nop_trace_init, 97 .init = nop_trace_init,
58 .reset = nop_trace_reset, 98 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST 99#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop, 100 .selftest = trace_selftest_startup_nop,
62#endif 101#endif
102 .flags = &nop_flags,
103 .set_flag = nop_set_flag
63}; 104};
64 105
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index b8f56beb1a62..863390557b44 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -16,7 +16,8 @@
16 16
17static struct trace_array *ctx_trace; 17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled; 18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static int sched_ref;
20static DEFINE_MUTEX(sched_register_mutex);
20 21
21static void 22static void
22probe_sched_switch(struct rq *__rq, struct task_struct *prev, 23probe_sched_switch(struct rq *__rq, struct task_struct *prev,
@@ -27,7 +28,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
27 int cpu; 28 int cpu;
28 int pc; 29 int pc;
29 30
30 if (!atomic_read(&sched_ref)) 31 if (!sched_ref)
31 return; 32 return;
32 33
33 tracing_record_cmdline(prev); 34 tracing_record_cmdline(prev);
@@ -123,20 +124,18 @@ static void tracing_sched_unregister(void)
123 124
124static void tracing_start_sched_switch(void) 125static void tracing_start_sched_switch(void)
125{ 126{
126 long ref; 127 mutex_lock(&sched_register_mutex);
127 128 if (!(sched_ref++))
128 ref = atomic_inc_return(&sched_ref);
129 if (ref == 1)
130 tracing_sched_register(); 129 tracing_sched_register();
130 mutex_unlock(&sched_register_mutex);
131} 131}
132 132
133static void tracing_stop_sched_switch(void) 133static void tracing_stop_sched_switch(void)
134{ 134{
135 long ref; 135 mutex_lock(&sched_register_mutex);
136 136 if (!(--sched_ref))
137 ref = atomic_dec_and_test(&sched_ref);
138 if (ref)
139 tracing_sched_unregister(); 137 tracing_sched_unregister();
138 mutex_unlock(&sched_register_mutex);
140} 139}
141 140
142void tracing_start_cmdline_record(void) 141void tracing_start_cmdline_record(void)
@@ -149,40 +148,86 @@ void tracing_stop_cmdline_record(void)
149 tracing_stop_sched_switch(); 148 tracing_stop_sched_switch();
150} 149}
151 150
151/**
152 * tracing_start_sched_switch_record - start tracing context switches
153 *
154 * Turns on context switch tracing for a tracer.
155 */
156void tracing_start_sched_switch_record(void)
157{
158 if (unlikely(!ctx_trace)) {
159 WARN_ON(1);
160 return;
161 }
162
163 tracing_start_sched_switch();
164
165 mutex_lock(&sched_register_mutex);
166 tracer_enabled++;
167 mutex_unlock(&sched_register_mutex);
168}
169
170/**
171 * tracing_stop_sched_switch_record - start tracing context switches
172 *
173 * Turns off context switch tracing for a tracer.
174 */
175void tracing_stop_sched_switch_record(void)
176{
177 mutex_lock(&sched_register_mutex);
178 tracer_enabled--;
179 WARN_ON(tracer_enabled < 0);
180 mutex_unlock(&sched_register_mutex);
181
182 tracing_stop_sched_switch();
183}
184
185/**
186 * tracing_sched_switch_assign_trace - assign a trace array for ctx switch
187 * @tr: trace array pointer to assign
188 *
189 * Some tracers might want to record the context switches in their
190 * trace. This function lets those tracers assign the trace array
191 * to use.
192 */
193void tracing_sched_switch_assign_trace(struct trace_array *tr)
194{
195 ctx_trace = tr;
196}
197
152static void start_sched_trace(struct trace_array *tr) 198static void start_sched_trace(struct trace_array *tr)
153{ 199{
154 sched_switch_reset(tr); 200 sched_switch_reset(tr);
155 tracing_start_cmdline_record(); 201 tracing_start_sched_switch_record();
156 tracer_enabled = 1;
157} 202}
158 203
159static void stop_sched_trace(struct trace_array *tr) 204static void stop_sched_trace(struct trace_array *tr)
160{ 205{
161 tracer_enabled = 0; 206 tracing_stop_sched_switch_record();
162 tracing_stop_cmdline_record();
163} 207}
164 208
165static void sched_switch_trace_init(struct trace_array *tr) 209static int sched_switch_trace_init(struct trace_array *tr)
166{ 210{
167 ctx_trace = tr; 211 ctx_trace = tr;
168 212 start_sched_trace(tr);
169 if (tr->ctrl) 213 return 0;
170 start_sched_trace(tr);
171} 214}
172 215
173static void sched_switch_trace_reset(struct trace_array *tr) 216static void sched_switch_trace_reset(struct trace_array *tr)
174{ 217{
175 if (tr->ctrl) 218 if (sched_ref)
176 stop_sched_trace(tr); 219 stop_sched_trace(tr);
177} 220}
178 221
179static void sched_switch_trace_ctrl_update(struct trace_array *tr) 222static void sched_switch_trace_start(struct trace_array *tr)
180{ 223{
181 /* When starting a new trace, reset the buffers */ 224 sched_switch_reset(tr);
182 if (tr->ctrl) 225 tracing_start_sched_switch();
183 start_sched_trace(tr); 226}
184 else 227
185 stop_sched_trace(tr); 228static void sched_switch_trace_stop(struct trace_array *tr)
229{
230 tracing_stop_sched_switch();
186} 231}
187 232
188static struct tracer sched_switch_trace __read_mostly = 233static struct tracer sched_switch_trace __read_mostly =
@@ -190,7 +235,8 @@ static struct tracer sched_switch_trace __read_mostly =
190 .name = "sched_switch", 235 .name = "sched_switch",
191 .init = sched_switch_trace_init, 236 .init = sched_switch_trace_init,
192 .reset = sched_switch_trace_reset, 237 .reset = sched_switch_trace_reset,
193 .ctrl_update = sched_switch_trace_ctrl_update, 238 .start = sched_switch_trace_start,
239 .stop = sched_switch_trace_stop,
194#ifdef CONFIG_FTRACE_SELFTEST 240#ifdef CONFIG_FTRACE_SELFTEST
195 .selftest = trace_selftest_startup_sched_switch, 241 .selftest = trace_selftest_startup_sched_switch,
196#endif 242#endif
@@ -198,14 +244,6 @@ static struct tracer sched_switch_trace __read_mostly =
198 244
199__init static int init_sched_switch_trace(void) 245__init static int init_sched_switch_trace(void)
200{ 246{
201 int ret = 0;
202
203 if (atomic_read(&sched_ref))
204 ret = tracing_sched_register();
205 if (ret) {
206 pr_info("error registering scheduler trace\n");
207 return ret;
208 }
209 return register_tracer(&sched_switch_trace); 247 return register_tracer(&sched_switch_trace);
210} 248}
211device_initcall(init_sched_switch_trace); 249device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3ae93f16b565..0067b49746c1 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -50,8 +50,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
50 return; 50 return;
51 51
52 pc = preempt_count(); 52 pc = preempt_count();
53 resched = need_resched(); 53 resched = ftrace_preempt_disable();
54 preempt_disable_notrace();
55 54
56 cpu = raw_smp_processor_id(); 55 cpu = raw_smp_processor_id();
57 data = tr->data[cpu]; 56 data = tr->data[cpu];
@@ -81,15 +80,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
81 out: 80 out:
82 atomic_dec(&data->disabled); 81 atomic_dec(&data->disabled);
83 82
84 /* 83 ftrace_preempt_enable(resched);
85 * To prevent recursion from the scheduler, if the
86 * resched flag was set before we entered, then
87 * don't reschedule.
88 */
89 if (resched)
90 preempt_enable_no_resched_notrace();
91 else
92 preempt_enable_notrace();
93} 84}
94 85
95static struct ftrace_ops trace_ops __read_mostly = 86static struct ftrace_ops trace_ops __read_mostly =
@@ -271,6 +262,12 @@ out:
271 atomic_dec(&wakeup_trace->data[cpu]->disabled); 262 atomic_dec(&wakeup_trace->data[cpu]->disabled);
272} 263}
273 264
265/*
266 * save_tracer_enabled is used to save the state of the tracer_enabled
267 * variable when we disable it when we open a trace output file.
268 */
269static int save_tracer_enabled;
270
274static void start_wakeup_tracer(struct trace_array *tr) 271static void start_wakeup_tracer(struct trace_array *tr)
275{ 272{
276 int ret; 273 int ret;
@@ -309,7 +306,13 @@ static void start_wakeup_tracer(struct trace_array *tr)
309 306
310 register_ftrace_function(&trace_ops); 307 register_ftrace_function(&trace_ops);
311 308
312 tracer_enabled = 1; 309 if (tracing_is_enabled()) {
310 tracer_enabled = 1;
311 save_tracer_enabled = 1;
312 } else {
313 tracer_enabled = 0;
314 save_tracer_enabled = 0;
315 }
313 316
314 return; 317 return;
315fail_deprobe_wake_new: 318fail_deprobe_wake_new:
@@ -321,49 +324,53 @@ fail_deprobe:
321static void stop_wakeup_tracer(struct trace_array *tr) 324static void stop_wakeup_tracer(struct trace_array *tr)
322{ 325{
323 tracer_enabled = 0; 326 tracer_enabled = 0;
327 save_tracer_enabled = 0;
324 unregister_ftrace_function(&trace_ops); 328 unregister_ftrace_function(&trace_ops);
325 unregister_trace_sched_switch(probe_wakeup_sched_switch); 329 unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 unregister_trace_sched_wakeup_new(probe_wakeup); 330 unregister_trace_sched_wakeup_new(probe_wakeup);
327 unregister_trace_sched_wakeup(probe_wakeup); 331 unregister_trace_sched_wakeup(probe_wakeup);
328} 332}
329 333
330static void wakeup_tracer_init(struct trace_array *tr) 334static int wakeup_tracer_init(struct trace_array *tr)
331{ 335{
332 wakeup_trace = tr; 336 wakeup_trace = tr;
333 337 start_wakeup_tracer(tr);
334 if (tr->ctrl) 338 return 0;
335 start_wakeup_tracer(tr);
336} 339}
337 340
338static void wakeup_tracer_reset(struct trace_array *tr) 341static void wakeup_tracer_reset(struct trace_array *tr)
339{ 342{
340 if (tr->ctrl) { 343 stop_wakeup_tracer(tr);
341 stop_wakeup_tracer(tr); 344 /* make sure we put back any tasks we are tracing */
342 /* make sure we put back any tasks we are tracing */ 345 wakeup_reset(tr);
343 wakeup_reset(tr); 346}
344 } 347
348static void wakeup_tracer_start(struct trace_array *tr)
349{
350 wakeup_reset(tr);
351 tracer_enabled = 1;
352 save_tracer_enabled = 1;
345} 353}
346 354
347static void wakeup_tracer_ctrl_update(struct trace_array *tr) 355static void wakeup_tracer_stop(struct trace_array *tr)
348{ 356{
349 if (tr->ctrl) 357 tracer_enabled = 0;
350 start_wakeup_tracer(tr); 358 save_tracer_enabled = 0;
351 else
352 stop_wakeup_tracer(tr);
353} 359}
354 360
355static void wakeup_tracer_open(struct trace_iterator *iter) 361static void wakeup_tracer_open(struct trace_iterator *iter)
356{ 362{
357 /* stop the trace while dumping */ 363 /* stop the trace while dumping */
358 if (iter->tr->ctrl) 364 tracer_enabled = 0;
359 stop_wakeup_tracer(iter->tr);
360} 365}
361 366
362static void wakeup_tracer_close(struct trace_iterator *iter) 367static void wakeup_tracer_close(struct trace_iterator *iter)
363{ 368{
364 /* forget about any processes we were recording */ 369 /* forget about any processes we were recording */
365 if (iter->tr->ctrl) 370 if (save_tracer_enabled) {
366 start_wakeup_tracer(iter->tr); 371 wakeup_reset(iter->tr);
372 tracer_enabled = 1;
373 }
367} 374}
368 375
369static struct tracer wakeup_tracer __read_mostly = 376static struct tracer wakeup_tracer __read_mostly =
@@ -371,9 +378,10 @@ static struct tracer wakeup_tracer __read_mostly =
371 .name = "wakeup", 378 .name = "wakeup",
372 .init = wakeup_tracer_init, 379 .init = wakeup_tracer_init,
373 .reset = wakeup_tracer_reset, 380 .reset = wakeup_tracer_reset,
381 .start = wakeup_tracer_start,
382 .stop = wakeup_tracer_stop,
374 .open = wakeup_tracer_open, 383 .open = wakeup_tracer_open,
375 .close = wakeup_tracer_close, 384 .close = wakeup_tracer_close,
376 .ctrl_update = wakeup_tracer_ctrl_update,
377 .print_max = 1, 385 .print_max = 1,
378#ifdef CONFIG_FTRACE_SELFTEST 386#ifdef CONFIG_FTRACE_SELFTEST
379 .selftest = trace_selftest_startup_wakeup, 387 .selftest = trace_selftest_startup_wakeup,
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 90bc752a7580..88c8eb70f54a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -13,6 +13,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
13 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT: 14 case TRACE_PRINT:
15 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
16 case TRACE_BRANCH:
16 return 1; 17 return 1;
17 } 18 }
18 return 0; 19 return 0;
@@ -51,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
51 int cpu, ret = 0; 52 int cpu, ret = 0;
52 53
53 /* Don't allow flipping of max traces now */ 54 /* Don't allow flipping of max traces now */
54 raw_local_irq_save(flags); 55 local_irq_save(flags);
55 __raw_spin_lock(&ftrace_max_lock); 56 __raw_spin_lock(&ftrace_max_lock);
56 57
57 cnt = ring_buffer_entries(tr->buffer); 58 cnt = ring_buffer_entries(tr->buffer);
@@ -62,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
62 break; 63 break;
63 } 64 }
64 __raw_spin_unlock(&ftrace_max_lock); 65 __raw_spin_unlock(&ftrace_max_lock);
65 raw_local_irq_restore(flags); 66 local_irq_restore(flags);
66 67
67 if (count) 68 if (count)
68 *count = cnt; 69 *count = cnt;
@@ -70,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 71 return ret;
71} 72}
72 73
74static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
75{
76 printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
77 trace->name, init_ret);
78}
73#ifdef CONFIG_FUNCTION_TRACER 79#ifdef CONFIG_FUNCTION_TRACER
74 80
75#ifdef CONFIG_DYNAMIC_FTRACE 81#ifdef CONFIG_DYNAMIC_FTRACE
@@ -110,8 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
110 ftrace_set_filter(func_name, strlen(func_name), 1); 116 ftrace_set_filter(func_name, strlen(func_name), 1);
111 117
112 /* enable tracing */ 118 /* enable tracing */
113 tr->ctrl = 1; 119 ret = trace->init(tr);
114 trace->init(tr); 120 if (ret) {
121 warn_failed_init_tracer(trace, ret);
122 goto out;
123 }
115 124
116 /* Sleep for a 1/10 of a second */ 125 /* Sleep for a 1/10 of a second */
117 msleep(100); 126 msleep(100);
@@ -134,13 +143,13 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
134 msleep(100); 143 msleep(100);
135 144
136 /* stop the tracing. */ 145 /* stop the tracing. */
137 tr->ctrl = 0; 146 tracing_stop();
138 trace->ctrl_update(tr);
139 ftrace_enabled = 0; 147 ftrace_enabled = 0;
140 148
141 /* check the trace buffer */ 149 /* check the trace buffer */
142 ret = trace_test_buffer(tr, &count); 150 ret = trace_test_buffer(tr, &count);
143 trace->reset(tr); 151 trace->reset(tr);
152 tracing_start();
144 153
145 /* we should only have one item */ 154 /* we should only have one item */
146 if (!ret && count != 1) { 155 if (!ret && count != 1) {
@@ -148,6 +157,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
148 ret = -1; 157 ret = -1;
149 goto out; 158 goto out;
150 } 159 }
160
151 out: 161 out:
152 ftrace_enabled = save_ftrace_enabled; 162 ftrace_enabled = save_ftrace_enabled;
153 tracer_enabled = save_tracer_enabled; 163 tracer_enabled = save_tracer_enabled;
@@ -180,18 +190,22 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
180 ftrace_enabled = 1; 190 ftrace_enabled = 1;
181 tracer_enabled = 1; 191 tracer_enabled = 1;
182 192
183 tr->ctrl = 1; 193 ret = trace->init(tr);
184 trace->init(tr); 194 if (ret) {
195 warn_failed_init_tracer(trace, ret);
196 goto out;
197 }
198
185 /* Sleep for a 1/10 of a second */ 199 /* Sleep for a 1/10 of a second */
186 msleep(100); 200 msleep(100);
187 /* stop the tracing. */ 201 /* stop the tracing. */
188 tr->ctrl = 0; 202 tracing_stop();
189 trace->ctrl_update(tr);
190 ftrace_enabled = 0; 203 ftrace_enabled = 0;
191 204
192 /* check the trace buffer */ 205 /* check the trace buffer */
193 ret = trace_test_buffer(tr, &count); 206 ret = trace_test_buffer(tr, &count);
194 trace->reset(tr); 207 trace->reset(tr);
208 tracing_start();
195 209
196 if (!ret && !count) { 210 if (!ret && !count) {
197 printk(KERN_CONT ".. no entries found .."); 211 printk(KERN_CONT ".. no entries found ..");
@@ -223,8 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
223 int ret; 237 int ret;
224 238
225 /* start the tracing */ 239 /* start the tracing */
226 tr->ctrl = 1; 240 ret = trace->init(tr);
227 trace->init(tr); 241 if (ret) {
242 warn_failed_init_tracer(trace, ret);
243 return ret;
244 }
245
228 /* reset the max latency */ 246 /* reset the max latency */
229 tracing_max_latency = 0; 247 tracing_max_latency = 0;
230 /* disable interrupts for a bit */ 248 /* disable interrupts for a bit */
@@ -232,13 +250,13 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
232 udelay(100); 250 udelay(100);
233 local_irq_enable(); 251 local_irq_enable();
234 /* stop the tracing. */ 252 /* stop the tracing. */
235 tr->ctrl = 0; 253 tracing_stop();
236 trace->ctrl_update(tr);
237 /* check both trace buffers */ 254 /* check both trace buffers */
238 ret = trace_test_buffer(tr, NULL); 255 ret = trace_test_buffer(tr, NULL);
239 if (!ret) 256 if (!ret)
240 ret = trace_test_buffer(&max_tr, &count); 257 ret = trace_test_buffer(&max_tr, &count);
241 trace->reset(tr); 258 trace->reset(tr);
259 tracing_start();
242 260
243 if (!ret && !count) { 261 if (!ret && !count) {
244 printk(KERN_CONT ".. no entries found .."); 262 printk(KERN_CONT ".. no entries found ..");
@@ -259,9 +277,26 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
259 unsigned long count; 277 unsigned long count;
260 int ret; 278 int ret;
261 279
280 /*
281 * Now that the big kernel lock is no longer preemptable,
282 * and this is called with the BKL held, it will always
283 * fail. If preemption is already disabled, simply
284 * pass the test. When the BKL is removed, or becomes
285 * preemptible again, we will once again test this,
286 * so keep it in.
287 */
288 if (preempt_count()) {
289 printk(KERN_CONT "can not test ... force ");
290 return 0;
291 }
292
262 /* start the tracing */ 293 /* start the tracing */
263 tr->ctrl = 1; 294 ret = trace->init(tr);
264 trace->init(tr); 295 if (ret) {
296 warn_failed_init_tracer(trace, ret);
297 return ret;
298 }
299
265 /* reset the max latency */ 300 /* reset the max latency */
266 tracing_max_latency = 0; 301 tracing_max_latency = 0;
267 /* disable preemption for a bit */ 302 /* disable preemption for a bit */
@@ -269,13 +304,13 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
269 udelay(100); 304 udelay(100);
270 preempt_enable(); 305 preempt_enable();
271 /* stop the tracing. */ 306 /* stop the tracing. */
272 tr->ctrl = 0; 307 tracing_stop();
273 trace->ctrl_update(tr);
274 /* check both trace buffers */ 308 /* check both trace buffers */
275 ret = trace_test_buffer(tr, NULL); 309 ret = trace_test_buffer(tr, NULL);
276 if (!ret) 310 if (!ret)
277 ret = trace_test_buffer(&max_tr, &count); 311 ret = trace_test_buffer(&max_tr, &count);
278 trace->reset(tr); 312 trace->reset(tr);
313 tracing_start();
279 314
280 if (!ret && !count) { 315 if (!ret && !count) {
281 printk(KERN_CONT ".. no entries found .."); 316 printk(KERN_CONT ".. no entries found ..");
@@ -296,9 +331,25 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
296 unsigned long count; 331 unsigned long count;
297 int ret; 332 int ret;
298 333
334 /*
335 * Now that the big kernel lock is no longer preemptable,
336 * and this is called with the BKL held, it will always
337 * fail. If preemption is already disabled, simply
338 * pass the test. When the BKL is removed, or becomes
339 * preemptible again, we will once again test this,
340 * so keep it in.
341 */
342 if (preempt_count()) {
343 printk(KERN_CONT "can not test ... force ");
344 return 0;
345 }
346
299 /* start the tracing */ 347 /* start the tracing */
300 tr->ctrl = 1; 348 ret = trace->init(tr);
301 trace->init(tr); 349 if (ret) {
350 warn_failed_init_tracer(trace, ret);
351 goto out;
352 }
302 353
303 /* reset the max latency */ 354 /* reset the max latency */
304 tracing_max_latency = 0; 355 tracing_max_latency = 0;
@@ -312,27 +363,30 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
312 local_irq_enable(); 363 local_irq_enable();
313 364
314 /* stop the tracing. */ 365 /* stop the tracing. */
315 tr->ctrl = 0; 366 tracing_stop();
316 trace->ctrl_update(tr);
317 /* check both trace buffers */ 367 /* check both trace buffers */
318 ret = trace_test_buffer(tr, NULL); 368 ret = trace_test_buffer(tr, NULL);
319 if (ret) 369 if (ret) {
370 tracing_start();
320 goto out; 371 goto out;
372 }
321 373
322 ret = trace_test_buffer(&max_tr, &count); 374 ret = trace_test_buffer(&max_tr, &count);
323 if (ret) 375 if (ret) {
376 tracing_start();
324 goto out; 377 goto out;
378 }
325 379
326 if (!ret && !count) { 380 if (!ret && !count) {
327 printk(KERN_CONT ".. no entries found .."); 381 printk(KERN_CONT ".. no entries found ..");
328 ret = -1; 382 ret = -1;
383 tracing_start();
329 goto out; 384 goto out;
330 } 385 }
331 386
332 /* do the test by disabling interrupts first this time */ 387 /* do the test by disabling interrupts first this time */
333 tracing_max_latency = 0; 388 tracing_max_latency = 0;
334 tr->ctrl = 1; 389 tracing_start();
335 trace->ctrl_update(tr);
336 preempt_disable(); 390 preempt_disable();
337 local_irq_disable(); 391 local_irq_disable();
338 udelay(100); 392 udelay(100);
@@ -341,8 +395,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
341 local_irq_enable(); 395 local_irq_enable();
342 396
343 /* stop the tracing. */ 397 /* stop the tracing. */
344 tr->ctrl = 0; 398 tracing_stop();
345 trace->ctrl_update(tr);
346 /* check both trace buffers */ 399 /* check both trace buffers */
347 ret = trace_test_buffer(tr, NULL); 400 ret = trace_test_buffer(tr, NULL);
348 if (ret) 401 if (ret)
@@ -358,6 +411,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
358 411
359 out: 412 out:
360 trace->reset(tr); 413 trace->reset(tr);
414 tracing_start();
361 tracing_max_latency = save_max; 415 tracing_max_latency = save_max;
362 416
363 return ret; 417 return ret;
@@ -423,8 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
423 wait_for_completion(&isrt); 477 wait_for_completion(&isrt);
424 478
425 /* start the tracing */ 479 /* start the tracing */
426 tr->ctrl = 1; 480 ret = trace->init(tr);
427 trace->init(tr); 481 if (ret) {
482 warn_failed_init_tracer(trace, ret);
483 return ret;
484 }
485
428 /* reset the max latency */ 486 /* reset the max latency */
429 tracing_max_latency = 0; 487 tracing_max_latency = 0;
430 488
@@ -448,8 +506,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448 msleep(100); 506 msleep(100);
449 507
450 /* stop the tracing. */ 508 /* stop the tracing. */
451 tr->ctrl = 0; 509 tracing_stop();
452 trace->ctrl_update(tr);
453 /* check both trace buffers */ 510 /* check both trace buffers */
454 ret = trace_test_buffer(tr, NULL); 511 ret = trace_test_buffer(tr, NULL);
455 if (!ret) 512 if (!ret)
@@ -457,6 +514,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
457 514
458 515
459 trace->reset(tr); 516 trace->reset(tr);
517 tracing_start();
460 518
461 tracing_max_latency = save_max; 519 tracing_max_latency = save_max;
462 520
@@ -480,16 +538,20 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
480 int ret; 538 int ret;
481 539
482 /* start the tracing */ 540 /* start the tracing */
483 tr->ctrl = 1; 541 ret = trace->init(tr);
484 trace->init(tr); 542 if (ret) {
543 warn_failed_init_tracer(trace, ret);
544 return ret;
545 }
546
485 /* Sleep for a 1/10 of a second */ 547 /* Sleep for a 1/10 of a second */
486 msleep(100); 548 msleep(100);
487 /* stop the tracing. */ 549 /* stop the tracing. */
488 tr->ctrl = 0; 550 tracing_stop();
489 trace->ctrl_update(tr);
490 /* check the trace buffer */ 551 /* check the trace buffer */
491 ret = trace_test_buffer(tr, &count); 552 ret = trace_test_buffer(tr, &count);
492 trace->reset(tr); 553 trace->reset(tr);
554 tracing_start();
493 555
494 if (!ret && !count) { 556 if (!ret && !count) {
495 printk(KERN_CONT ".. no entries found .."); 557 printk(KERN_CONT ".. no entries found ..");
@@ -508,17 +570,48 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
508 int ret; 570 int ret;
509 571
510 /* start the tracing */ 572 /* start the tracing */
511 tr->ctrl = 1; 573 ret = trace->init(tr);
512 trace->init(tr); 574 if (ret) {
575 warn_failed_init_tracer(trace, ret);
576 return 0;
577 }
578
513 /* Sleep for a 1/10 of a second */ 579 /* Sleep for a 1/10 of a second */
514 msleep(100); 580 msleep(100);
515 /* stop the tracing. */ 581 /* stop the tracing. */
516 tr->ctrl = 0; 582 tracing_stop();
517 trace->ctrl_update(tr);
518 /* check the trace buffer */ 583 /* check the trace buffer */
519 ret = trace_test_buffer(tr, &count); 584 ret = trace_test_buffer(tr, &count);
520 trace->reset(tr); 585 trace->reset(tr);
586 tracing_start();
521 587
522 return ret; 588 return ret;
523} 589}
524#endif /* CONFIG_SYSPROF_TRACER */ 590#endif /* CONFIG_SYSPROF_TRACER */
591
592#ifdef CONFIG_BRANCH_TRACER
593int
594trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
595{
596 unsigned long count;
597 int ret;
598
599 /* start the tracing */
600 ret = trace->init(tr);
601 if (ret) {
602 warn_failed_init_tracer(trace, ret);
603 return ret;
604 }
605
606 /* Sleep for a 1/10 of a second */
607 msleep(100);
608 /* stop the tracing. */
609 tracing_stop();
610 /* check the trace buffer */
611 ret = trace_test_buffer(tr, &count);
612 trace->reset(tr);
613 tracing_start();
614
615 return ret;
616}
617#endif /* CONFIG_BRANCH_TRACER */
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3bdb44bde4b7..fde3be15c642 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -107,8 +107,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
107 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 107 if (unlikely(!ftrace_enabled || stack_trace_disabled))
108 return; 108 return;
109 109
110 resched = need_resched(); 110 resched = ftrace_preempt_disable();
111 preempt_disable_notrace();
112 111
113 cpu = raw_smp_processor_id(); 112 cpu = raw_smp_processor_id();
114 /* no atomic needed, we only modify this variable by this cpu */ 113 /* no atomic needed, we only modify this variable by this cpu */
@@ -120,10 +119,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
120 out: 119 out:
121 per_cpu(trace_active, cpu)--; 120 per_cpu(trace_active, cpu)--;
122 /* prevent recursion in schedule */ 121 /* prevent recursion in schedule */
123 if (resched) 122 ftrace_preempt_enable(resched);
124 preempt_enable_no_resched_notrace();
125 else
126 preempt_enable_notrace();
127} 123}
128 124
129static struct ftrace_ops trace_ops __read_mostly = 125static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 9587d3bcba55..54960edb96d0 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -261,27 +261,17 @@ static void stop_stack_trace(struct trace_array *tr)
261 mutex_unlock(&sample_timer_lock); 261 mutex_unlock(&sample_timer_lock);
262} 262}
263 263
264static void stack_trace_init(struct trace_array *tr) 264static int stack_trace_init(struct trace_array *tr)
265{ 265{
266 sysprof_trace = tr; 266 sysprof_trace = tr;
267 267
268 if (tr->ctrl) 268 start_stack_trace(tr);
269 start_stack_trace(tr); 269 return 0;
270} 270}
271 271
272static void stack_trace_reset(struct trace_array *tr) 272static void stack_trace_reset(struct trace_array *tr)
273{ 273{
274 if (tr->ctrl) 274 stop_stack_trace(tr);
275 stop_stack_trace(tr);
276}
277
278static void stack_trace_ctrl_update(struct trace_array *tr)
279{
280 /* When starting a new trace, reset the buffers */
281 if (tr->ctrl)
282 start_stack_trace(tr);
283 else
284 stop_stack_trace(tr);
285} 275}
286 276
287static struct tracer stack_trace __read_mostly = 277static struct tracer stack_trace __read_mostly =
@@ -289,7 +279,6 @@ static struct tracer stack_trace __read_mostly =
289 .name = "sysprof", 279 .name = "sysprof",
290 .init = stack_trace_init, 280 .init = stack_trace_init,
291 .reset = stack_trace_reset, 281 .reset = stack_trace_reset,
292 .ctrl_update = stack_trace_ctrl_update,
293#ifdef CONFIG_FTRACE_SELFTEST 282#ifdef CONFIG_FTRACE_SELFTEST
294 .selftest = trace_selftest_startup_sysprof, 283 .selftest = trace_selftest_startup_sysprof,
295#endif 284#endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index af8c85664882..79602740bbb5 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
43 */ 43 */
44#define TRACEPOINT_HASH_BITS 6 44#define TRACEPOINT_HASH_BITS 6
45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) 45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
46static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
46 47
47/* 48/*
48 * Note about RCU : 49 * Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 void **funcs; 56 void **funcs;
56 int refcount; /* Number of times armed. 0 if disarmed. */ 57 int refcount; /* Number of times armed. 0 if disarmed. */
57 struct rcu_head rcu;
58 void *oldptr;
59 unsigned char rcu_pending:1;
60 char name[0]; 58 char name[0];
61}; 59};
62 60
63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; 61struct tp_probes {
62 union {
63 struct rcu_head rcu;
64 struct list_head list;
65 } u;
66 void *probes[0];
67};
64 68
65static void free_old_closure(struct rcu_head *head) 69static inline void *allocate_probes(int count)
66{ 70{
67 struct tracepoint_entry *entry = container_of(head, 71 struct tp_probes *p = kmalloc(count * sizeof(void *)
68 struct tracepoint_entry, rcu); 72 + sizeof(struct tp_probes), GFP_KERNEL);
69 kfree(entry->oldptr); 73 return p == NULL ? NULL : p->probes;
70 /* Make sure we free the data before setting the pending flag to 0 */
71 smp_wmb();
72 entry->rcu_pending = 0;
73} 74}
74 75
75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) 76static void rcu_free_old_probes(struct rcu_head *head)
76{ 77{
77 if (!old) 78 kfree(container_of(head, struct tp_probes, u.rcu));
78 return; 79}
79 entry->oldptr = old; 80
80 entry->rcu_pending = 1; 81static inline void release_probes(void *old)
81 /* write rcu_pending before calling the RCU callback */ 82{
82 smp_wmb(); 83 if (old) {
83 call_rcu_sched(&entry->rcu, free_old_closure); 84 struct tp_probes *tp_probes = container_of(old,
85 struct tp_probes, probes[0]);
86 call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
87 }
84} 88}
85 89
86static void debug_print_probes(struct tracepoint_entry *entry) 90static void debug_print_probes(struct tracepoint_entry *entry)
87{ 91{
88 int i; 92 int i;
89 93
90 if (!tracepoint_debug) 94 if (!tracepoint_debug || !entry->funcs)
91 return; 95 return;
92 96
93 for (i = 0; entry->funcs[i]; i++) 97 for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
111 return ERR_PTR(-EEXIST); 115 return ERR_PTR(-EEXIST);
112 } 116 }
113 /* + 2 : one for new probe, one for NULL func */ 117 /* + 2 : one for new probe, one for NULL func */
114 new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); 118 new = allocate_probes(nr_probes + 2);
115 if (new == NULL) 119 if (new == NULL)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
117 if (old) 121 if (old)
118 memcpy(new, old, nr_probes * sizeof(void *)); 122 memcpy(new, old, nr_probes * sizeof(void *));
119 new[nr_probes] = probe; 123 new[nr_probes] = probe;
124 new[nr_probes + 1] = NULL;
120 entry->refcount = nr_probes + 1; 125 entry->refcount = nr_probes + 1;
121 entry->funcs = new; 126 entry->funcs = new;
122 debug_print_probes(entry); 127 debug_print_probes(entry);
@@ -132,7 +137,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
132 old = entry->funcs; 137 old = entry->funcs;
133 138
134 if (!old) 139 if (!old)
135 return NULL; 140 return ERR_PTR(-ENOENT);
136 141
137 debug_print_probes(entry); 142 debug_print_probes(entry);
138 /* (N -> M), (N > 1, M >= 0) probes */ 143 /* (N -> M), (N > 1, M >= 0) probes */
@@ -151,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
151 int j = 0; 156 int j = 0;
152 /* N -> M, (N > 1, M > 0) */ 157 /* N -> M, (N > 1, M > 0) */
153 /* + 1 for NULL */ 158 /* + 1 for NULL */
154 new = kzalloc((nr_probes - nr_del + 1) 159 new = allocate_probes(nr_probes - nr_del + 1);
155 * sizeof(void *), GFP_KERNEL);
156 if (new == NULL) 160 if (new == NULL)
157 return ERR_PTR(-ENOMEM); 161 return ERR_PTR(-ENOMEM);
158 for (i = 0; old[i]; i++) 162 for (i = 0; old[i]; i++)
159 if ((probe && old[i] != probe)) 163 if ((probe && old[i] != probe))
160 new[j++] = old[i]; 164 new[j++] = old[i];
165 new[nr_probes - nr_del] = NULL;
161 entry->refcount = nr_probes - nr_del; 166 entry->refcount = nr_probes - nr_del;
162 entry->funcs = new; 167 entry->funcs = new;
163 } 168 }
@@ -215,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
215 memcpy(&e->name[0], name, name_len); 220 memcpy(&e->name[0], name, name_len);
216 e->funcs = NULL; 221 e->funcs = NULL;
217 e->refcount = 0; 222 e->refcount = 0;
218 e->rcu_pending = 0;
219 hlist_add_head(&e->hlist, head); 223 hlist_add_head(&e->hlist, head);
220 return e; 224 return e;
221} 225}
@@ -224,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
224 * Remove the tracepoint from the tracepoint hash table. Must be called with 228 * Remove the tracepoint from the tracepoint hash table. Must be called with
225 * mutex_lock held. 229 * mutex_lock held.
226 */ 230 */
227static int remove_tracepoint(const char *name) 231static inline void remove_tracepoint(struct tracepoint_entry *e)
228{ 232{
229 struct hlist_head *head;
230 struct hlist_node *node;
231 struct tracepoint_entry *e;
232 int found = 0;
233 size_t len = strlen(name) + 1;
234 u32 hash = jhash(name, len-1, 0);
235
236 head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
237 hlist_for_each_entry(e, node, head, hlist) {
238 if (!strcmp(name, e->name)) {
239 found = 1;
240 break;
241 }
242 }
243 if (!found)
244 return -ENOENT;
245 if (e->refcount)
246 return -EBUSY;
247 hlist_del(&e->hlist); 233 hlist_del(&e->hlist);
248 /* Make sure the call_rcu_sched has been executed */
249 if (e->rcu_pending)
250 rcu_barrier_sched();
251 kfree(e); 234 kfree(e);
252 return 0;
253} 235}
254 236
255/* 237/*
@@ -280,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry,
280static void disable_tracepoint(struct tracepoint *elem) 262static void disable_tracepoint(struct tracepoint *elem)
281{ 263{
282 elem->state = 0; 264 elem->state = 0;
265 rcu_assign_pointer(elem->funcs, NULL);
283} 266}
284 267
285/** 268/**
@@ -320,6 +303,23 @@ static void tracepoint_update_probes(void)
320 module_update_tracepoints(); 303 module_update_tracepoints();
321} 304}
322 305
306static void *tracepoint_add_probe(const char *name, void *probe)
307{
308 struct tracepoint_entry *entry;
309 void *old;
310
311 entry = get_tracepoint(name);
312 if (!entry) {
313 entry = add_tracepoint(name);
314 if (IS_ERR(entry))
315 return entry;
316 }
317 old = tracepoint_entry_add_probe(entry, probe);
318 if (IS_ERR(old) && !entry->refcount)
319 remove_tracepoint(entry);
320 return old;
321}
322
323/** 323/**
324 * tracepoint_probe_register - Connect a probe to a tracepoint 324 * tracepoint_probe_register - Connect a probe to a tracepoint
325 * @name: tracepoint name 325 * @name: tracepoint name
@@ -330,44 +330,36 @@ static void tracepoint_update_probes(void)
330 */ 330 */
331int tracepoint_probe_register(const char *name, void *probe) 331int tracepoint_probe_register(const char *name, void *probe)
332{ 332{
333 struct tracepoint_entry *entry;
334 int ret = 0;
335 void *old; 333 void *old;
336 334
337 mutex_lock(&tracepoints_mutex); 335 mutex_lock(&tracepoints_mutex);
338 entry = get_tracepoint(name); 336 old = tracepoint_add_probe(name, probe);
339 if (!entry) {
340 entry = add_tracepoint(name);
341 if (IS_ERR(entry)) {
342 ret = PTR_ERR(entry);
343 goto end;
344 }
345 }
346 /*
347 * If we detect that a call_rcu_sched is pending for this tracepoint,
348 * make sure it's executed now.
349 */
350 if (entry->rcu_pending)
351 rcu_barrier_sched();
352 old = tracepoint_entry_add_probe(entry, probe);
353 if (IS_ERR(old)) {
354 ret = PTR_ERR(old);
355 goto end;
356 }
357 mutex_unlock(&tracepoints_mutex); 337 mutex_unlock(&tracepoints_mutex);
338 if (IS_ERR(old))
339 return PTR_ERR(old);
340
358 tracepoint_update_probes(); /* may update entry */ 341 tracepoint_update_probes(); /* may update entry */
359 mutex_lock(&tracepoints_mutex); 342 release_probes(old);
360 entry = get_tracepoint(name); 343 return 0;
361 WARN_ON(!entry);
362 if (entry->rcu_pending)
363 rcu_barrier_sched();
364 tracepoint_entry_free_old(entry, old);
365end:
366 mutex_unlock(&tracepoints_mutex);
367 return ret;
368} 344}
369EXPORT_SYMBOL_GPL(tracepoint_probe_register); 345EXPORT_SYMBOL_GPL(tracepoint_probe_register);
370 346
347static void *tracepoint_remove_probe(const char *name, void *probe)
348{
349 struct tracepoint_entry *entry;
350 void *old;
351
352 entry = get_tracepoint(name);
353 if (!entry)
354 return ERR_PTR(-ENOENT);
355 old = tracepoint_entry_remove_probe(entry, probe);
356 if (IS_ERR(old))
357 return old;
358 if (!entry->refcount)
359 remove_tracepoint(entry);
360 return old;
361}
362
371/** 363/**
372 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint 364 * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
373 * @name: tracepoint name 365 * @name: tracepoint name
@@ -380,38 +372,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
380 */ 372 */
381int tracepoint_probe_unregister(const char *name, void *probe) 373int tracepoint_probe_unregister(const char *name, void *probe)
382{ 374{
383 struct tracepoint_entry *entry;
384 void *old; 375 void *old;
385 int ret = -ENOENT;
386 376
387 mutex_lock(&tracepoints_mutex); 377 mutex_lock(&tracepoints_mutex);
388 entry = get_tracepoint(name); 378 old = tracepoint_remove_probe(name, probe);
389 if (!entry)
390 goto end;
391 if (entry->rcu_pending)
392 rcu_barrier_sched();
393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
399 mutex_unlock(&tracepoints_mutex); 379 mutex_unlock(&tracepoints_mutex);
380 if (IS_ERR(old))
381 return PTR_ERR(old);
382
400 tracepoint_update_probes(); /* may update entry */ 383 tracepoint_update_probes(); /* may update entry */
384 release_probes(old);
385 return 0;
386}
387EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
388
389static LIST_HEAD(old_probes);
390static int need_update;
391
392static void tracepoint_add_old_probes(void *old)
393{
394 need_update = 1;
395 if (old) {
396 struct tp_probes *tp_probes = container_of(old,
397 struct tp_probes, probes[0]);
398 list_add(&tp_probes->u.list, &old_probes);
399 }
400}
401
402/**
403 * tracepoint_probe_register_noupdate - register a probe but not connect
404 * @name: tracepoint name
405 * @probe: probe handler
406 *
407 * caller must call tracepoint_probe_update_all()
408 */
409int tracepoint_probe_register_noupdate(const char *name, void *probe)
410{
411 void *old;
412
401 mutex_lock(&tracepoints_mutex); 413 mutex_lock(&tracepoints_mutex);
402 entry = get_tracepoint(name); 414 old = tracepoint_add_probe(name, probe);
403 if (!entry) 415 if (IS_ERR(old)) {
404 goto end; 416 mutex_unlock(&tracepoints_mutex);
405 if (entry->rcu_pending) 417 return PTR_ERR(old);
406 rcu_barrier_sched(); 418 }
407 tracepoint_entry_free_old(entry, old); 419 tracepoint_add_old_probes(old);
408 remove_tracepoint(name); /* Ignore busy error message */
409 ret = 0;
410end:
411 mutex_unlock(&tracepoints_mutex); 420 mutex_unlock(&tracepoints_mutex);
412 return ret; 421 return 0;
413} 422}
414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); 423EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
424
425/**
426 * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
427 * @name: tracepoint name
428 * @probe: probe function pointer
429 *
430 * caller must call tracepoint_probe_update_all()
431 */
432int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
433{
434 void *old;
435
436 mutex_lock(&tracepoints_mutex);
437 old = tracepoint_remove_probe(name, probe);
438 if (IS_ERR(old)) {
439 mutex_unlock(&tracepoints_mutex);
440 return PTR_ERR(old);
441 }
442 tracepoint_add_old_probes(old);
443 mutex_unlock(&tracepoints_mutex);
444 return 0;
445}
446EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
447
448/**
449 * tracepoint_probe_update_all - update tracepoints
450 */
451void tracepoint_probe_update_all(void)
452{
453 LIST_HEAD(release_probes);
454 struct tp_probes *pos, *next;
455
456 mutex_lock(&tracepoints_mutex);
457 if (!need_update) {
458 mutex_unlock(&tracepoints_mutex);
459 return;
460 }
461 if (!list_empty(&old_probes))
462 list_replace_init(&old_probes, &release_probes);
463 need_update = 0;
464 mutex_unlock(&tracepoints_mutex);
465
466 tracepoint_update_probes();
467 list_for_each_entry_safe(pos, next, &release_probes, u.list) {
468 list_del(&pos->u.list);
469 call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
470 }
471}
472EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
415 473
416/** 474/**
417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. 475 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
@@ -483,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
483 iter->tracepoint = NULL; 541 iter->tracepoint = NULL;
484} 542}
485EXPORT_SYMBOL_GPL(tracepoint_iter_reset); 543EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
544
545#ifdef CONFIG_MODULES
546
547int tracepoint_module_notify(struct notifier_block *self,
548 unsigned long val, void *data)
549{
550 struct module *mod = data;
551
552 switch (val) {
553 case MODULE_STATE_COMING:
554 tracepoint_update_probe_range(mod->tracepoints,
555 mod->tracepoints + mod->num_tracepoints);
556 break;
557 case MODULE_STATE_GOING:
558 tracepoint_update_probe_range(mod->tracepoints,
559 mod->tracepoints + mod->num_tracepoints);
560 break;
561 }
562 return 0;
563}
564
565struct notifier_block tracepoint_module_nb = {
566 .notifier_call = tracepoint_module_notify,
567 .priority = 0,
568};
569
570static int init_tracepoints(void)
571{
572 return register_module_notifier(&tracepoint_module_nb);
573}
574__initcall(init_tracepoints);
575
576#endif /* CONFIG_MODULES */