aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig64
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c281
-rw-r--r--kernel/trace/ring_buffer.c2014
-rw-r--r--kernel/trace/trace.c1849
-rw-r--r--kernel/trace/trace.h211
-rw-r--r--kernel/trace/trace_boot.c126
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c27
-rw-r--r--kernel/trace/trace_mmiotrace.c116
-rw-r--r--kernel/trace/trace_nop.c64
-rw-r--r--kernel/trace/trace_sched_switch.c137
-rw-r--r--kernel/trace/trace_sched_wakeup.c175
-rw-r--r--kernel/trace/trace_selftest.c83
-rw-r--r--kernel/trace/trace_stack.c310
-rw-r--r--kernel/trace/trace_sysprof.c6
16 files changed, 4129 insertions, 1340 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd6..1cb3e1f616a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,23 +1,37 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3# 3#
4
5config NOP_TRACER
6 bool
7
4config HAVE_FTRACE 8config HAVE_FTRACE
5 bool 9 bool
10 select NOP_TRACER
6 11
7config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
8 bool 13 bool
9 14
15config HAVE_FTRACE_MCOUNT_RECORD
16 bool
17
10config TRACER_MAX_TRACE 18config TRACER_MAX_TRACE
11 bool 19 bool
12 20
21config RING_BUFFER
22 bool
23
13config TRACING 24config TRACING
14 bool 25 bool
15 select DEBUG_FS 26 select DEBUG_FS
27 select RING_BUFFER
16 select STACKTRACE 28 select STACKTRACE
29 select TRACEPOINTS
17 30
18config FTRACE 31config FTRACE
19 bool "Kernel Function Tracer" 32 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE 33 depends on HAVE_FTRACE
34 depends on DEBUG_KERNEL
21 select FRAME_POINTER 35 select FRAME_POINTER
22 select TRACING 36 select TRACING
23 select CONTEXT_SWITCH_TRACER 37 select CONTEXT_SWITCH_TRACER
@@ -36,6 +50,7 @@ config IRQSOFF_TRACER
36 depends on TRACE_IRQFLAGS_SUPPORT 50 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME 51 depends on GENERIC_TIME
38 depends on HAVE_FTRACE 52 depends on HAVE_FTRACE
53 depends on DEBUG_KERNEL
39 select TRACE_IRQFLAGS 54 select TRACE_IRQFLAGS
40 select TRACING 55 select TRACING
41 select TRACER_MAX_TRACE 56 select TRACER_MAX_TRACE
@@ -59,6 +74,7 @@ config PREEMPT_TRACER
59 depends on GENERIC_TIME 74 depends on GENERIC_TIME
60 depends on PREEMPT 75 depends on PREEMPT
61 depends on HAVE_FTRACE 76 depends on HAVE_FTRACE
77 depends on DEBUG_KERNEL
62 select TRACING 78 select TRACING
63 select TRACER_MAX_TRACE 79 select TRACER_MAX_TRACE
64 help 80 help
@@ -86,6 +102,7 @@ config SYSPROF_TRACER
86config SCHED_TRACER 102config SCHED_TRACER
87 bool "Scheduling Latency Tracer" 103 bool "Scheduling Latency Tracer"
88 depends on HAVE_FTRACE 104 depends on HAVE_FTRACE
105 depends on DEBUG_KERNEL
89 select TRACING 106 select TRACING
90 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
91 select TRACER_MAX_TRACE 108 select TRACER_MAX_TRACE
@@ -96,16 +113,56 @@ config SCHED_TRACER
96config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
97 bool "Trace process context switches" 114 bool "Trace process context switches"
98 depends on HAVE_FTRACE 115 depends on HAVE_FTRACE
116 depends on DEBUG_KERNEL
99 select TRACING 117 select TRACING
100 select MARKERS 118 select MARKERS
101 help 119 help
102 This tracer gets called from the context switch and records 120 This tracer gets called from the context switch and records
103 all switching of tasks. 121 all switching of tasks.
104 122
123config BOOT_TRACER
124 bool "Trace boot initcalls"
125 depends on HAVE_FTRACE
126 depends on DEBUG_KERNEL
127 select TRACING
128 help
129 This tracer helps developers to optimize boot times: it records
130 the timings of the initcalls and traces key events and the identity
131 of tasks that can cause boot delays, such as context-switches.
132
133 Its aim is to be parsed by the /scripts/bootgraph.pl tool to
134 produce pretty graphics about boot inefficiencies, giving a visual
135 representation of the delays during initcalls - but the raw
136 /debug/tracing/trace text output is readable too.
137
138 ( Note that tracing self tests can't be enabled if this tracer is
139 selected, because the self-tests are an initcall as well and that
140 would invalidate the boot trace. )
141
142config STACK_TRACER
143 bool "Trace max stack"
144 depends on HAVE_FTRACE
145 depends on DEBUG_KERNEL
146 select FTRACE
147 select STACKTRACE
148 help
149 This special tracer records the maximum stack footprint of the
150 kernel and displays it in debugfs/tracing/stack_trace.
151
152 This tracer works by hooking into every function call that the
153 kernel executes, and keeping a maximum stack depth value and
154 stack-trace saved. Because this logic has to execute in every
155 kernel function, all the time, this option can slow down the
156 kernel measurably and is generally intended for kernel
157 developers only.
158
159 Say N if unsure.
160
105config DYNAMIC_FTRACE 161config DYNAMIC_FTRACE
106 bool "enable/disable ftrace tracepoints dynamically" 162 bool "enable/disable ftrace tracepoints dynamically"
107 depends on FTRACE 163 depends on FTRACE
108 depends on HAVE_DYNAMIC_FTRACE 164 depends on HAVE_DYNAMIC_FTRACE
165 depends on DEBUG_KERNEL
109 default y 166 default y
110 help 167 help
111 This option will modify all the calls to ftrace dynamically 168 This option will modify all the calls to ftrace dynamically
@@ -121,12 +178,17 @@ config DYNAMIC_FTRACE
121 were made. If so, it runs stop_machine (stops all CPUS) 178 were made. If so, it runs stop_machine (stops all CPUS)
122 and modifies the code to jump over the call to ftrace. 179 and modifies the code to jump over the call to ftrace.
123 180
181config FTRACE_MCOUNT_RECORD
182 def_bool y
183 depends on DYNAMIC_FTRACE
184 depends on HAVE_FTRACE_MCOUNT_RECORD
185
124config FTRACE_SELFTEST 186config FTRACE_SELFTEST
125 bool 187 bool
126 188
127config FTRACE_STARTUP_TEST 189config FTRACE_STARTUP_TEST
128 bool "Perform a startup test on ftrace" 190 bool "Perform a startup test on ftrace"
129 depends on TRACING 191 depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
130 select FTRACE_SELFTEST 192 select FTRACE_SELFTEST
131 help 193 help
132 This option performs a series of startup tests on ftrace. On bootup 194 This option performs a series of startup tests on ftrace. On bootup
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 71d17de1728..a85dfba88ba 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -11,6 +11,7 @@ obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FTRACE) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
14 15
15obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
@@ -19,6 +20,9 @@ obj-$(CONFIG_FTRACE) += trace_functions.o
19obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
23obj-$(CONFIG_NOP_TRACER) += trace_nop.o
24obj-$(CONFIG_STACK_TRACER) += trace_stack.o
22obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 25obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
26obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
23 27
24libftrace-y := ftrace.o 28libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4231a3dc224..4dda4f60a2a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -81,7 +81,7 @@ void clear_ftrace_function(void)
81 81
82static int __register_ftrace_function(struct ftrace_ops *ops) 82static int __register_ftrace_function(struct ftrace_ops *ops)
83{ 83{
84 /* Should never be called by interrupts */ 84 /* should not be called from interrupt context */
85 spin_lock(&ftrace_lock); 85 spin_lock(&ftrace_lock);
86 86
87 ops->next = ftrace_list; 87 ops->next = ftrace_list;
@@ -115,6 +115,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
115 struct ftrace_ops **p; 115 struct ftrace_ops **p;
116 int ret = 0; 116 int ret = 0;
117 117
118 /* should not be called from interrupt context */
118 spin_lock(&ftrace_lock); 119 spin_lock(&ftrace_lock);
119 120
120 /* 121 /*
@@ -153,6 +154,30 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
153 154
154#ifdef CONFIG_DYNAMIC_FTRACE 155#ifdef CONFIG_DYNAMIC_FTRACE
155 156
157#ifndef CONFIG_FTRACE_MCOUNT_RECORD
158/*
159 * The hash lock is only needed when the recording of the mcount
160 * callers are dynamic. That is, by the caller themselves and
161 * not recorded via the compilation.
162 */
163static DEFINE_SPINLOCK(ftrace_hash_lock);
164#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
165#define ftrace_hash_unlock(flags) \
166 spin_unlock_irqrestore(&ftrace_hash_lock, flags)
167#else
168/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
169#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
170#define ftrace_hash_unlock(flags) do { } while(0)
171#endif
172
173/*
174 * Since MCOUNT_ADDR may point to mcount itself, we do not want
175 * to get it confused by reading a reference in the code as we
176 * are parsing on objcopy output of text. Use a variable for
177 * it instead.
178 */
179static unsigned long mcount_addr = MCOUNT_ADDR;
180
156static struct task_struct *ftraced_task; 181static struct task_struct *ftraced_task;
157 182
158enum { 183enum {
@@ -171,7 +196,6 @@ static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171 196
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 197static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173 198
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock); 199static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock); 200static DEFINE_MUTEX(ftrace_regex_lock);
177 201
@@ -294,13 +318,37 @@ static inline void ftrace_del_hash(struct dyn_ftrace *node)
294 318
295static void ftrace_free_rec(struct dyn_ftrace *rec) 319static void ftrace_free_rec(struct dyn_ftrace *rec)
296{ 320{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records; 321 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec; 322 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE; 323 rec->flags |= FTRACE_FL_FREE;
302} 324}
303 325
326void ftrace_release(void *start, unsigned long size)
327{
328 struct dyn_ftrace *rec;
329 struct ftrace_page *pg;
330 unsigned long s = (unsigned long)start;
331 unsigned long e = s + size;
332 int i;
333
334 if (ftrace_disabled || !start)
335 return;
336
337 /* should not be called from interrupt context */
338 spin_lock(&ftrace_lock);
339
340 for (pg = ftrace_pages_start; pg; pg = pg->next) {
341 for (i = 0; i < pg->index; i++) {
342 rec = &pg->records[i];
343
344 if ((rec->ip >= s) && (rec->ip < e))
345 ftrace_free_rec(rec);
346 }
347 }
348 spin_unlock(&ftrace_lock);
349
350}
351
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 352static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{ 353{
306 struct dyn_ftrace *rec; 354 struct dyn_ftrace *rec;
@@ -338,7 +386,6 @@ ftrace_record_ip(unsigned long ip)
338 unsigned long flags; 386 unsigned long flags;
339 unsigned long key; 387 unsigned long key;
340 int resched; 388 int resched;
341 int atomic;
342 int cpu; 389 int cpu;
343 390
344 if (!ftrace_enabled || ftrace_disabled) 391 if (!ftrace_enabled || ftrace_disabled)
@@ -368,9 +415,7 @@ ftrace_record_ip(unsigned long ip)
368 if (ftrace_ip_in_hash(ip, key)) 415 if (ftrace_ip_in_hash(ip, key))
369 goto out; 416 goto out;
370 417
371 atomic = irqs_disabled(); 418 ftrace_hash_lock(flags);
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374 419
375 /* This ip may have hit the hash before the lock */ 420 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key)) 421 if (ftrace_ip_in_hash(ip, key))
@@ -387,7 +432,7 @@ ftrace_record_ip(unsigned long ip)
387 ftraced_trigger = 1; 432 ftraced_trigger = 1;
388 433
389 out_unlock: 434 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags); 435 ftrace_hash_unlock(flags);
391 out: 436 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--; 437 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393 438
@@ -531,6 +576,16 @@ static void ftrace_shutdown_replenish(void)
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL); 576 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532} 577}
533 578
579static void print_ip_ins(const char *fmt, unsigned char *p)
580{
581 int i;
582
583 printk(KERN_CONT "%s", fmt);
584
585 for (i = 0; i < MCOUNT_INSN_SIZE; i++)
586 printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
587}
588
534static int 589static int
535ftrace_code_disable(struct dyn_ftrace *rec) 590ftrace_code_disable(struct dyn_ftrace *rec)
536{ 591{
@@ -541,10 +596,27 @@ ftrace_code_disable(struct dyn_ftrace *rec)
541 ip = rec->ip; 596 ip = rec->ip;
542 597
543 nop = ftrace_nop_replace(); 598 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR); 599 call = ftrace_call_replace(ip, mcount_addr);
545 600
546 failed = ftrace_modify_code(ip, call, nop); 601 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) { 602 if (failed) {
603 switch (failed) {
604 case 1:
605 WARN_ON_ONCE(1);
606 pr_info("ftrace faulted on modifying ");
607 print_ip_sym(ip);
608 break;
609 case 2:
610 WARN_ON_ONCE(1);
611 pr_info("ftrace failed to modify ");
612 print_ip_sym(ip);
613 print_ip_ins(" expected: ", call);
614 print_ip_ins(" actual: ", (unsigned char *)ip);
615 print_ip_ins(" replace: ", nop);
616 printk(KERN_CONT "\n");
617 break;
618 }
619
548 rec->flags |= FTRACE_FL_FAILED; 620 rec->flags |= FTRACE_FL_FAILED;
549 return 0; 621 return 0;
550 } 622 }
@@ -587,7 +659,7 @@ static int __ftrace_modify_code(void *data)
587 659
588static void ftrace_run_update_code(int command) 660static void ftrace_run_update_code(int command)
589{ 661{
590 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS); 662 stop_machine(__ftrace_modify_code, &command, NULL);
591} 663}
592 664
593void ftrace_disable_daemon(void) 665void ftrace_disable_daemon(void)
@@ -787,52 +859,12 @@ static int ftrace_update_code(void)
787 !ftrace_enabled || !ftraced_trigger) 859 !ftrace_enabled || !ftraced_trigger)
788 return 0; 860 return 0;
789 861
790 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS); 862 stop_machine(__ftrace_update_code, NULL, NULL);
791 863
792 return 1; 864 return 1;
793} 865}
794 866
795static int ftraced(void *ignore) 867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{ 868{
837 struct ftrace_page *pg; 869 struct ftrace_page *pg;
838 int cnt; 870 int cnt;
@@ -859,7 +891,9 @@ static int __init ftrace_dyn_table_alloc(void)
859 891
860 pg = ftrace_pages = ftrace_pages_start; 892 pg = ftrace_pages = ftrace_pages_start;
861 893
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE; 894 cnt = num_to_init / ENTRIES_PER_PAGE;
895 pr_info("ftrace: allocating %ld hash entries in %d pages\n",
896 num_to_init, cnt);
863 897
864 for (i = 0; i < cnt; i++) { 898 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL); 899 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -901,6 +935,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
901 935
902 (*pos)++; 936 (*pos)++;
903 937
938 /* should not be called from interrupt context */
939 spin_lock(&ftrace_lock);
904 retry: 940 retry:
905 if (iter->idx >= iter->pg->index) { 941 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) { 942 if (iter->pg->next) {
@@ -910,15 +946,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
910 } 946 }
911 } else { 947 } else {
912 rec = &iter->pg->records[iter->idx++]; 948 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) && 949 if ((rec->flags & FTRACE_FL_FREE) ||
950
951 (!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) || 952 (rec->flags & FTRACE_FL_FAILED)) ||
915 953
916 ((iter->flags & FTRACE_ITER_FAILURES) && 954 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) || 955 !(rec->flags & FTRACE_FL_FAILED)) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922 956
923 ((iter->flags & FTRACE_ITER_NOTRACE) && 957 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) { 958 !(rec->flags & FTRACE_FL_NOTRACE))) {
@@ -926,6 +960,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
926 goto retry; 960 goto retry;
927 } 961 }
928 } 962 }
963 spin_unlock(&ftrace_lock);
929 964
930 iter->pos = *pos; 965 iter->pos = *pos;
931 966
@@ -1039,8 +1074,8 @@ static void ftrace_filter_reset(int enable)
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; 1074 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i; 1075 unsigned i;
1041 1076
1042 /* keep kstop machine from running */ 1077 /* should not be called from interrupt context */
1043 preempt_disable(); 1078 spin_lock(&ftrace_lock);
1044 if (enable) 1079 if (enable)
1045 ftrace_filtered = 0; 1080 ftrace_filtered = 0;
1046 pg = ftrace_pages_start; 1081 pg = ftrace_pages_start;
@@ -1053,7 +1088,7 @@ static void ftrace_filter_reset(int enable)
1053 } 1088 }
1054 pg = pg->next; 1089 pg = pg->next;
1055 } 1090 }
1056 preempt_enable(); 1091 spin_unlock(&ftrace_lock);
1057} 1092}
1058 1093
1059static int 1094static int
@@ -1165,8 +1200,8 @@ ftrace_match(unsigned char *buff, int len, int enable)
1165 } 1200 }
1166 } 1201 }
1167 1202
1168 /* keep kstop machine from running */ 1203 /* should not be called from interrupt context */
1169 preempt_disable(); 1204 spin_lock(&ftrace_lock);
1170 if (enable) 1205 if (enable)
1171 ftrace_filtered = 1; 1206 ftrace_filtered = 1;
1172 pg = ftrace_pages_start; 1207 pg = ftrace_pages_start;
@@ -1203,7 +1238,7 @@ ftrace_match(unsigned char *buff, int len, int enable)
1203 } 1238 }
1204 pg = pg->next; 1239 pg = pg->next;
1205 } 1240 }
1206 preempt_enable(); 1241 spin_unlock(&ftrace_lock);
1207} 1242}
1208 1243
1209static ssize_t 1244static ssize_t
@@ -1556,6 +1591,114 @@ static __init int ftrace_init_debugfs(void)
1556 1591
1557fs_initcall(ftrace_init_debugfs); 1592fs_initcall(ftrace_init_debugfs);
1558 1593
1594#ifdef CONFIG_FTRACE_MCOUNT_RECORD
1595static int ftrace_convert_nops(unsigned long *start,
1596 unsigned long *end)
1597{
1598 unsigned long *p;
1599 unsigned long addr;
1600 unsigned long flags;
1601
1602 p = start;
1603 while (p < end) {
1604 addr = ftrace_call_adjust(*p++);
1605 /* should not be called from interrupt context */
1606 spin_lock(&ftrace_lock);
1607 ftrace_record_ip(addr);
1608 spin_unlock(&ftrace_lock);
1609 ftrace_shutdown_replenish();
1610 }
1611
1612 /* p is ignored */
1613 local_irq_save(flags);
1614 __ftrace_update_code(p);
1615 local_irq_restore(flags);
1616
1617 return 0;
1618}
1619
1620void ftrace_init_module(unsigned long *start, unsigned long *end)
1621{
1622 if (ftrace_disabled || start == end)
1623 return;
1624 ftrace_convert_nops(start, end);
1625}
1626
1627extern unsigned long __start_mcount_loc[];
1628extern unsigned long __stop_mcount_loc[];
1629
1630void __init ftrace_init(void)
1631{
1632 unsigned long count, addr, flags;
1633 int ret;
1634
1635 /* Keep the ftrace pointer to the stub */
1636 addr = (unsigned long)ftrace_stub;
1637
1638 local_irq_save(flags);
1639 ftrace_dyn_arch_init(&addr);
1640 local_irq_restore(flags);
1641
1642 /* ftrace_dyn_arch_init places the return code in addr */
1643 if (addr)
1644 goto failed;
1645
1646 count = __stop_mcount_loc - __start_mcount_loc;
1647
1648 ret = ftrace_dyn_table_alloc(count);
1649 if (ret)
1650 goto failed;
1651
1652 last_ftrace_enabled = ftrace_enabled = 1;
1653
1654 ret = ftrace_convert_nops(__start_mcount_loc,
1655 __stop_mcount_loc);
1656
1657 return;
1658 failed:
1659 ftrace_disabled = 1;
1660}
1661#else /* CONFIG_FTRACE_MCOUNT_RECORD */
1662static int ftraced(void *ignore)
1663{
1664 unsigned long usecs;
1665
1666 while (!kthread_should_stop()) {
1667
1668 set_current_state(TASK_INTERRUPTIBLE);
1669
1670 /* check once a second */
1671 schedule_timeout(HZ);
1672
1673 if (unlikely(ftrace_disabled))
1674 continue;
1675
1676 mutex_lock(&ftrace_sysctl_lock);
1677 mutex_lock(&ftraced_lock);
1678 if (!ftraced_suspend && !ftraced_stop &&
1679 ftrace_update_code()) {
1680 usecs = nsecs_to_usecs(ftrace_update_time);
1681 if (ftrace_update_tot_cnt > 100000) {
1682 ftrace_update_tot_cnt = 0;
1683 pr_info("hm, dftrace overflow: %lu change%s"
1684 " (%lu total) in %lu usec%s\n",
1685 ftrace_update_cnt,
1686 ftrace_update_cnt != 1 ? "s" : "",
1687 ftrace_update_tot_cnt,
1688 usecs, usecs != 1 ? "s" : "");
1689 ftrace_disabled = 1;
1690 WARN_ON_ONCE(1);
1691 }
1692 }
1693 mutex_unlock(&ftraced_lock);
1694 mutex_unlock(&ftrace_sysctl_lock);
1695
1696 ftrace_shutdown_replenish();
1697 }
1698 __set_current_state(TASK_RUNNING);
1699 return 0;
1700}
1701
1559static int __init ftrace_dynamic_init(void) 1702static int __init ftrace_dynamic_init(void)
1560{ 1703{
1561 struct task_struct *p; 1704 struct task_struct *p;
@@ -1564,7 +1707,7 @@ static int __init ftrace_dynamic_init(void)
1564 1707
1565 addr = (unsigned long)ftrace_record_ip; 1708 addr = (unsigned long)ftrace_record_ip;
1566 1709
1567 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS); 1710 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1568 1711
1569 /* ftrace_dyn_arch_init places the return code in addr */ 1712 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) { 1713 if (addr) {
@@ -1572,7 +1715,7 @@ static int __init ftrace_dynamic_init(void)
1572 goto failed; 1715 goto failed;
1573 } 1716 }
1574 1717
1575 ret = ftrace_dyn_table_alloc(); 1718 ret = ftrace_dyn_table_alloc(NR_TO_INIT);
1576 if (ret) 1719 if (ret)
1577 goto failed; 1720 goto failed;
1578 1721
@@ -1593,6 +1736,8 @@ static int __init ftrace_dynamic_init(void)
1593} 1736}
1594 1737
1595core_initcall(ftrace_dynamic_init); 1738core_initcall(ftrace_dynamic_init);
1739#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
1740
1596#else 1741#else
1597# define ftrace_startup() do { } while (0) 1742# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0) 1743# define ftrace_shutdown() do { } while (0)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
new file mode 100644
index 00000000000..94af1fe56bb
--- /dev/null
+++ b/kernel/trace/ring_buffer.c
@@ -0,0 +1,2014 @@
1/*
2 * Generic ring buffer
3 *
4 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5 */
6#include <linux/ring_buffer.h>
7#include <linux/spinlock.h>
8#include <linux/debugfs.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/percpu.h>
12#include <linux/mutex.h>
13#include <linux/sched.h> /* used for sched_clock() (for now) */
14#include <linux/init.h>
15#include <linux/hash.h>
16#include <linux/list.h>
17#include <linux/fs.h>
18
19/* Up this if you want to test the TIME_EXTENTS and normalization */
20#define DEBUG_SHIFT 0
21
22/* FIXME!!! */
23u64 ring_buffer_time_stamp(int cpu)
24{
25 /* shift to debug/test normalization and TIME_EXTENTS */
26 return sched_clock() << DEBUG_SHIFT;
27}
28
29void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
30{
31 /* Just stupid testing the normalize function and deltas */
32 *ts >>= DEBUG_SHIFT;
33}
34
35#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
36#define RB_ALIGNMENT_SHIFT 2
37#define RB_ALIGNMENT (1 << RB_ALIGNMENT_SHIFT)
38#define RB_MAX_SMALL_DATA 28
39
40enum {
41 RB_LEN_TIME_EXTEND = 8,
42 RB_LEN_TIME_STAMP = 16,
43};
44
45/* inline for ring buffer fast paths */
46static inline unsigned
47rb_event_length(struct ring_buffer_event *event)
48{
49 unsigned length;
50
51 switch (event->type) {
52 case RINGBUF_TYPE_PADDING:
53 /* undefined */
54 return -1;
55
56 case RINGBUF_TYPE_TIME_EXTEND:
57 return RB_LEN_TIME_EXTEND;
58
59 case RINGBUF_TYPE_TIME_STAMP:
60 return RB_LEN_TIME_STAMP;
61
62 case RINGBUF_TYPE_DATA:
63 if (event->len)
64 length = event->len << RB_ALIGNMENT_SHIFT;
65 else
66 length = event->array[0];
67 return length + RB_EVNT_HDR_SIZE;
68 default:
69 BUG();
70 }
71 /* not hit */
72 return 0;
73}
74
75/**
76 * ring_buffer_event_length - return the length of the event
77 * @event: the event to get the length of
78 */
79unsigned ring_buffer_event_length(struct ring_buffer_event *event)
80{
81 return rb_event_length(event);
82}
83
84/* inline for ring buffer fast paths */
85static inline void *
86rb_event_data(struct ring_buffer_event *event)
87{
88 BUG_ON(event->type != RINGBUF_TYPE_DATA);
89 /* If length is in len field, then array[0] has the data */
90 if (event->len)
91 return (void *)&event->array[0];
92 /* Otherwise length is in array[0] and array[1] has the data */
93 return (void *)&event->array[1];
94}
95
96/**
97 * ring_buffer_event_data - return the data of the event
98 * @event: the event to get the data from
99 */
100void *ring_buffer_event_data(struct ring_buffer_event *event)
101{
102 return rb_event_data(event);
103}
104
105#define for_each_buffer_cpu(buffer, cpu) \
106 for_each_cpu_mask(cpu, buffer->cpumask)
107
108#define TS_SHIFT 27
109#define TS_MASK ((1ULL << TS_SHIFT) - 1)
110#define TS_DELTA_TEST (~TS_MASK)
111
112/*
113 * This hack stolen from mm/slob.c.
114 * We can store per page timing information in the page frame of the page.
115 * Thanks to Peter Zijlstra for suggesting this idea.
116 */
117struct buffer_page {
118 u64 time_stamp; /* page time stamp */
119 local_t write; /* index for next write */
120 local_t commit; /* write commited index */
121 unsigned read; /* index for next read */
122 struct list_head list; /* list of free pages */
123 void *page; /* Actual data page */
124};
125
126/*
127 * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
128 * this issue out.
129 */
130static inline void free_buffer_page(struct buffer_page *bpage)
131{
132 if (bpage->page)
133 __free_page(bpage->page);
134 kfree(bpage);
135}
136
137/*
138 * We need to fit the time_stamp delta into 27 bits.
139 */
140static inline int test_time_stamp(u64 delta)
141{
142 if (delta & TS_DELTA_TEST)
143 return 1;
144 return 0;
145}
146
147#define BUF_PAGE_SIZE PAGE_SIZE
148
149/*
150 * head_page == tail_page && head == tail then buffer is empty.
151 */
152struct ring_buffer_per_cpu {
153 int cpu;
154 struct ring_buffer *buffer;
155 spinlock_t lock;
156 struct lock_class_key lock_key;
157 struct list_head pages;
158 struct buffer_page *head_page; /* read from head */
159 struct buffer_page *tail_page; /* write to tail */
160 struct buffer_page *commit_page; /* commited pages */
161 struct buffer_page *reader_page;
162 unsigned long overrun;
163 unsigned long entries;
164 u64 write_stamp;
165 u64 read_stamp;
166 atomic_t record_disabled;
167};
168
169struct ring_buffer {
170 unsigned long size;
171 unsigned pages;
172 unsigned flags;
173 int cpus;
174 cpumask_t cpumask;
175 atomic_t record_disabled;
176
177 struct mutex mutex;
178
179 struct ring_buffer_per_cpu **buffers;
180};
181
182struct ring_buffer_iter {
183 struct ring_buffer_per_cpu *cpu_buffer;
184 unsigned long head;
185 struct buffer_page *head_page;
186 u64 read_stamp;
187};
188
189#define RB_WARN_ON(buffer, cond) \
190 do { \
191 if (unlikely(cond)) { \
192 atomic_inc(&buffer->record_disabled); \
193 WARN_ON(1); \
194 } \
195 } while (0)
196
197#define RB_WARN_ON_RET(buffer, cond) \
198 do { \
199 if (unlikely(cond)) { \
200 atomic_inc(&buffer->record_disabled); \
201 WARN_ON(1); \
202 return -1; \
203 } \
204 } while (0)
205
206#define RB_WARN_ON_ONCE(buffer, cond) \
207 do { \
208 static int once; \
209 if (unlikely(cond) && !once) { \
210 once++; \
211 atomic_inc(&buffer->record_disabled); \
212 WARN_ON(1); \
213 } \
214 } while (0)
215
216/**
217 * check_pages - integrity check of buffer pages
218 * @cpu_buffer: CPU buffer with pages to test
219 *
220 * As a safty measure we check to make sure the data pages have not
221 * been corrupted.
222 */
223static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
224{
225 struct list_head *head = &cpu_buffer->pages;
226 struct buffer_page *page, *tmp;
227
228 RB_WARN_ON_RET(cpu_buffer, head->next->prev != head);
229 RB_WARN_ON_RET(cpu_buffer, head->prev->next != head);
230
231 list_for_each_entry_safe(page, tmp, head, list) {
232 RB_WARN_ON_RET(cpu_buffer,
233 page->list.next->prev != &page->list);
234 RB_WARN_ON_RET(cpu_buffer,
235 page->list.prev->next != &page->list);
236 }
237
238 return 0;
239}
240
241static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
242 unsigned nr_pages)
243{
244 struct list_head *head = &cpu_buffer->pages;
245 struct buffer_page *page, *tmp;
246 unsigned long addr;
247 LIST_HEAD(pages);
248 unsigned i;
249
250 for (i = 0; i < nr_pages; i++) {
251 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
252 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
253 if (!page)
254 goto free_pages;
255 list_add(&page->list, &pages);
256
257 addr = __get_free_page(GFP_KERNEL);
258 if (!addr)
259 goto free_pages;
260 page->page = (void *)addr;
261 }
262
263 list_splice(&pages, head);
264
265 rb_check_pages(cpu_buffer);
266
267 return 0;
268
269 free_pages:
270 list_for_each_entry_safe(page, tmp, &pages, list) {
271 list_del_init(&page->list);
272 free_buffer_page(page);
273 }
274 return -ENOMEM;
275}
276
277static struct ring_buffer_per_cpu *
278rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
279{
280 struct ring_buffer_per_cpu *cpu_buffer;
281 struct buffer_page *page;
282 unsigned long addr;
283 int ret;
284
285 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
286 GFP_KERNEL, cpu_to_node(cpu));
287 if (!cpu_buffer)
288 return NULL;
289
290 cpu_buffer->cpu = cpu;
291 cpu_buffer->buffer = buffer;
292 spin_lock_init(&cpu_buffer->lock);
293 INIT_LIST_HEAD(&cpu_buffer->pages);
294
295 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
296 GFP_KERNEL, cpu_to_node(cpu));
297 if (!page)
298 goto fail_free_buffer;
299
300 cpu_buffer->reader_page = page;
301 addr = __get_free_page(GFP_KERNEL);
302 if (!addr)
303 goto fail_free_reader;
304 page->page = (void *)addr;
305
306 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
307
308 ret = rb_allocate_pages(cpu_buffer, buffer->pages);
309 if (ret < 0)
310 goto fail_free_reader;
311
312 cpu_buffer->head_page
313 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
314 cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
315
316 return cpu_buffer;
317
318 fail_free_reader:
319 free_buffer_page(cpu_buffer->reader_page);
320
321 fail_free_buffer:
322 kfree(cpu_buffer);
323 return NULL;
324}
325
326static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
327{
328 struct list_head *head = &cpu_buffer->pages;
329 struct buffer_page *page, *tmp;
330
331 list_del_init(&cpu_buffer->reader_page->list);
332 free_buffer_page(cpu_buffer->reader_page);
333
334 list_for_each_entry_safe(page, tmp, head, list) {
335 list_del_init(&page->list);
336 free_buffer_page(page);
337 }
338 kfree(cpu_buffer);
339}
340
341/*
342 * Causes compile errors if the struct buffer_page gets bigger
343 * than the struct page.
344 */
345extern int ring_buffer_page_too_big(void);
346
347/**
348 * ring_buffer_alloc - allocate a new ring_buffer
349 * @size: the size in bytes that is needed.
350 * @flags: attributes to set for the ring buffer.
351 *
352 * Currently the only flag that is available is the RB_FL_OVERWRITE
353 * flag. This flag means that the buffer will overwrite old data
354 * when the buffer wraps. If this flag is not set, the buffer will
355 * drop data when the tail hits the head.
356 */
357struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
358{
359 struct ring_buffer *buffer;
360 int bsize;
361 int cpu;
362
363 /* Paranoid! Optimizes out when all is well */
364 if (sizeof(struct buffer_page) > sizeof(struct page))
365 ring_buffer_page_too_big();
366
367
368 /* keep it in its own cache line */
369 buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
370 GFP_KERNEL);
371 if (!buffer)
372 return NULL;
373
374 buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
375 buffer->flags = flags;
376
377 /* need at least two pages */
378 if (buffer->pages == 1)
379 buffer->pages++;
380
381 buffer->cpumask = cpu_possible_map;
382 buffer->cpus = nr_cpu_ids;
383
384 bsize = sizeof(void *) * nr_cpu_ids;
385 buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
386 GFP_KERNEL);
387 if (!buffer->buffers)
388 goto fail_free_buffer;
389
390 for_each_buffer_cpu(buffer, cpu) {
391 buffer->buffers[cpu] =
392 rb_allocate_cpu_buffer(buffer, cpu);
393 if (!buffer->buffers[cpu])
394 goto fail_free_buffers;
395 }
396
397 mutex_init(&buffer->mutex);
398
399 return buffer;
400
401 fail_free_buffers:
402 for_each_buffer_cpu(buffer, cpu) {
403 if (buffer->buffers[cpu])
404 rb_free_cpu_buffer(buffer->buffers[cpu]);
405 }
406 kfree(buffer->buffers);
407
408 fail_free_buffer:
409 kfree(buffer);
410 return NULL;
411}
412
413/**
414 * ring_buffer_free - free a ring buffer.
415 * @buffer: the buffer to free.
416 */
417void
418ring_buffer_free(struct ring_buffer *buffer)
419{
420 int cpu;
421
422 for_each_buffer_cpu(buffer, cpu)
423 rb_free_cpu_buffer(buffer->buffers[cpu]);
424
425 kfree(buffer);
426}
427
428static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
429
430static void
431rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
432{
433 struct buffer_page *page;
434 struct list_head *p;
435 unsigned i;
436
437 atomic_inc(&cpu_buffer->record_disabled);
438 synchronize_sched();
439
440 for (i = 0; i < nr_pages; i++) {
441 BUG_ON(list_empty(&cpu_buffer->pages));
442 p = cpu_buffer->pages.next;
443 page = list_entry(p, struct buffer_page, list);
444 list_del_init(&page->list);
445 free_buffer_page(page);
446 }
447 BUG_ON(list_empty(&cpu_buffer->pages));
448
449 rb_reset_cpu(cpu_buffer);
450
451 rb_check_pages(cpu_buffer);
452
453 atomic_dec(&cpu_buffer->record_disabled);
454
455}
456
457static void
458rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
459 struct list_head *pages, unsigned nr_pages)
460{
461 struct buffer_page *page;
462 struct list_head *p;
463 unsigned i;
464
465 atomic_inc(&cpu_buffer->record_disabled);
466 synchronize_sched();
467
468 for (i = 0; i < nr_pages; i++) {
469 BUG_ON(list_empty(pages));
470 p = pages->next;
471 page = list_entry(p, struct buffer_page, list);
472 list_del_init(&page->list);
473 list_add_tail(&page->list, &cpu_buffer->pages);
474 }
475 rb_reset_cpu(cpu_buffer);
476
477 rb_check_pages(cpu_buffer);
478
479 atomic_dec(&cpu_buffer->record_disabled);
480}
481
482/**
483 * ring_buffer_resize - resize the ring buffer
484 * @buffer: the buffer to resize.
485 * @size: the new size.
486 *
487 * The tracer is responsible for making sure that the buffer is
488 * not being used while changing the size.
489 * Note: We may be able to change the above requirement by using
490 * RCU synchronizations.
491 *
492 * Minimum size is 2 * BUF_PAGE_SIZE.
493 *
494 * Returns -1 on failure.
495 */
496int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
497{
498 struct ring_buffer_per_cpu *cpu_buffer;
499 unsigned nr_pages, rm_pages, new_pages;
500 struct buffer_page *page, *tmp;
501 unsigned long buffer_size;
502 unsigned long addr;
503 LIST_HEAD(pages);
504 int i, cpu;
505
506 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
507 size *= BUF_PAGE_SIZE;
508 buffer_size = buffer->pages * BUF_PAGE_SIZE;
509
510 /* we need a minimum of two pages */
511 if (size < BUF_PAGE_SIZE * 2)
512 size = BUF_PAGE_SIZE * 2;
513
514 if (size == buffer_size)
515 return size;
516
517 mutex_lock(&buffer->mutex);
518
519 nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
520
521 if (size < buffer_size) {
522
523 /* easy case, just free pages */
524 BUG_ON(nr_pages >= buffer->pages);
525
526 rm_pages = buffer->pages - nr_pages;
527
528 for_each_buffer_cpu(buffer, cpu) {
529 cpu_buffer = buffer->buffers[cpu];
530 rb_remove_pages(cpu_buffer, rm_pages);
531 }
532 goto out;
533 }
534
535 /*
536 * This is a bit more difficult. We only want to add pages
537 * when we can allocate enough for all CPUs. We do this
538 * by allocating all the pages and storing them on a local
539 * link list. If we succeed in our allocation, then we
540 * add these pages to the cpu_buffers. Otherwise we just free
541 * them all and return -ENOMEM;
542 */
543 BUG_ON(nr_pages <= buffer->pages);
544 new_pages = nr_pages - buffer->pages;
545
546 for_each_buffer_cpu(buffer, cpu) {
547 for (i = 0; i < new_pages; i++) {
548 page = kzalloc_node(ALIGN(sizeof(*page),
549 cache_line_size()),
550 GFP_KERNEL, cpu_to_node(cpu));
551 if (!page)
552 goto free_pages;
553 list_add(&page->list, &pages);
554 addr = __get_free_page(GFP_KERNEL);
555 if (!addr)
556 goto free_pages;
557 page->page = (void *)addr;
558 }
559 }
560
561 for_each_buffer_cpu(buffer, cpu) {
562 cpu_buffer = buffer->buffers[cpu];
563 rb_insert_pages(cpu_buffer, &pages, new_pages);
564 }
565
566 BUG_ON(!list_empty(&pages));
567
568 out:
569 buffer->pages = nr_pages;
570 mutex_unlock(&buffer->mutex);
571
572 return size;
573
574 free_pages:
575 list_for_each_entry_safe(page, tmp, &pages, list) {
576 list_del_init(&page->list);
577 free_buffer_page(page);
578 }
579 return -ENOMEM;
580}
581
582static inline int rb_null_event(struct ring_buffer_event *event)
583{
584 return event->type == RINGBUF_TYPE_PADDING;
585}
586
587static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
588{
589 return page->page + index;
590}
591
592static inline struct ring_buffer_event *
593rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
594{
595 return __rb_page_index(cpu_buffer->reader_page,
596 cpu_buffer->reader_page->read);
597}
598
599static inline struct ring_buffer_event *
600rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
601{
602 return __rb_page_index(cpu_buffer->head_page,
603 cpu_buffer->head_page->read);
604}
605
606static inline struct ring_buffer_event *
607rb_iter_head_event(struct ring_buffer_iter *iter)
608{
609 return __rb_page_index(iter->head_page, iter->head);
610}
611
612static inline unsigned rb_page_write(struct buffer_page *bpage)
613{
614 return local_read(&bpage->write);
615}
616
617static inline unsigned rb_page_commit(struct buffer_page *bpage)
618{
619 return local_read(&bpage->commit);
620}
621
622/* Size is determined by what has been commited */
623static inline unsigned rb_page_size(struct buffer_page *bpage)
624{
625 return rb_page_commit(bpage);
626}
627
628static inline unsigned
629rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
630{
631 return rb_page_commit(cpu_buffer->commit_page);
632}
633
634static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
635{
636 return rb_page_commit(cpu_buffer->head_page);
637}
638
639/*
640 * When the tail hits the head and the buffer is in overwrite mode,
641 * the head jumps to the next page and all content on the previous
642 * page is discarded. But before doing so, we update the overrun
643 * variable of the buffer.
644 */
645static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
646{
647 struct ring_buffer_event *event;
648 unsigned long head;
649
650 for (head = 0; head < rb_head_size(cpu_buffer);
651 head += rb_event_length(event)) {
652
653 event = __rb_page_index(cpu_buffer->head_page, head);
654 BUG_ON(rb_null_event(event));
655 /* Only count data entries */
656 if (event->type != RINGBUF_TYPE_DATA)
657 continue;
658 cpu_buffer->overrun++;
659 cpu_buffer->entries--;
660 }
661}
662
663static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
664 struct buffer_page **page)
665{
666 struct list_head *p = (*page)->list.next;
667
668 if (p == &cpu_buffer->pages)
669 p = p->next;
670
671 *page = list_entry(p, struct buffer_page, list);
672}
673
674static inline unsigned
675rb_event_index(struct ring_buffer_event *event)
676{
677 unsigned long addr = (unsigned long)event;
678
679 return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
680}
681
682static inline int
683rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
684 struct ring_buffer_event *event)
685{
686 unsigned long addr = (unsigned long)event;
687 unsigned long index;
688
689 index = rb_event_index(event);
690 addr &= PAGE_MASK;
691
692 return cpu_buffer->commit_page->page == (void *)addr &&
693 rb_commit_index(cpu_buffer) == index;
694}
695
696static inline void
697rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
698 struct ring_buffer_event *event)
699{
700 unsigned long addr = (unsigned long)event;
701 unsigned long index;
702
703 index = rb_event_index(event);
704 addr &= PAGE_MASK;
705
706 while (cpu_buffer->commit_page->page != (void *)addr) {
707 RB_WARN_ON(cpu_buffer,
708 cpu_buffer->commit_page == cpu_buffer->tail_page);
709 cpu_buffer->commit_page->commit =
710 cpu_buffer->commit_page->write;
711 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
712 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
713 }
714
715 /* Now set the commit to the event's index */
716 local_set(&cpu_buffer->commit_page->commit, index);
717}
718
719static inline void
720rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
721{
722 /*
723 * We only race with interrupts and NMIs on this CPU.
724 * If we own the commit event, then we can commit
725 * all others that interrupted us, since the interruptions
726 * are in stack format (they finish before they come
727 * back to us). This allows us to do a simple loop to
728 * assign the commit to the tail.
729 */
730 while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
731 cpu_buffer->commit_page->commit =
732 cpu_buffer->commit_page->write;
733 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
734 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
735 /* add barrier to keep gcc from optimizing too much */
736 barrier();
737 }
738 while (rb_commit_index(cpu_buffer) !=
739 rb_page_write(cpu_buffer->commit_page)) {
740 cpu_buffer->commit_page->commit =
741 cpu_buffer->commit_page->write;
742 barrier();
743 }
744}
745
746static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
747{
748 cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
749 cpu_buffer->reader_page->read = 0;
750}
751
752static inline void rb_inc_iter(struct ring_buffer_iter *iter)
753{
754 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
755
756 /*
757 * The iterator could be on the reader page (it starts there).
758 * But the head could have moved, since the reader was
759 * found. Check for this case and assign the iterator
760 * to the head page instead of next.
761 */
762 if (iter->head_page == cpu_buffer->reader_page)
763 iter->head_page = cpu_buffer->head_page;
764 else
765 rb_inc_page(cpu_buffer, &iter->head_page);
766
767 iter->read_stamp = iter->head_page->time_stamp;
768 iter->head = 0;
769}
770
771/**
772 * ring_buffer_update_event - update event type and data
773 * @event: the even to update
774 * @type: the type of event
775 * @length: the size of the event field in the ring buffer
776 *
777 * Update the type and data fields of the event. The length
778 * is the actual size that is written to the ring buffer,
779 * and with this, we can determine what to place into the
780 * data field.
781 */
782static inline void
783rb_update_event(struct ring_buffer_event *event,
784 unsigned type, unsigned length)
785{
786 event->type = type;
787
788 switch (type) {
789
790 case RINGBUF_TYPE_PADDING:
791 break;
792
793 case RINGBUF_TYPE_TIME_EXTEND:
794 event->len =
795 (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
796 >> RB_ALIGNMENT_SHIFT;
797 break;
798
799 case RINGBUF_TYPE_TIME_STAMP:
800 event->len =
801 (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
802 >> RB_ALIGNMENT_SHIFT;
803 break;
804
805 case RINGBUF_TYPE_DATA:
806 length -= RB_EVNT_HDR_SIZE;
807 if (length > RB_MAX_SMALL_DATA) {
808 event->len = 0;
809 event->array[0] = length;
810 } else
811 event->len =
812 (length + (RB_ALIGNMENT-1))
813 >> RB_ALIGNMENT_SHIFT;
814 break;
815 default:
816 BUG();
817 }
818}
819
820static inline unsigned rb_calculate_event_length(unsigned length)
821{
822 struct ring_buffer_event event; /* Used only for sizeof array */
823
824 /* zero length can cause confusions */
825 if (!length)
826 length = 1;
827
828 if (length > RB_MAX_SMALL_DATA)
829 length += sizeof(event.array[0]);
830
831 length += RB_EVNT_HDR_SIZE;
832 length = ALIGN(length, RB_ALIGNMENT);
833
834 return length;
835}
836
837static struct ring_buffer_event *
838__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
839 unsigned type, unsigned long length, u64 *ts)
840{
841 struct buffer_page *tail_page, *head_page, *reader_page;
842 unsigned long tail, write;
843 struct ring_buffer *buffer = cpu_buffer->buffer;
844 struct ring_buffer_event *event;
845 unsigned long flags;
846
847 tail_page = cpu_buffer->tail_page;
848 write = local_add_return(length, &tail_page->write);
849 tail = write - length;
850
851 /* See if we shot pass the end of this buffer page */
852 if (write > BUF_PAGE_SIZE) {
853 struct buffer_page *next_page = tail_page;
854
855 spin_lock_irqsave(&cpu_buffer->lock, flags);
856
857 rb_inc_page(cpu_buffer, &next_page);
858
859 head_page = cpu_buffer->head_page;
860 reader_page = cpu_buffer->reader_page;
861
862 /* we grabbed the lock before incrementing */
863 RB_WARN_ON(cpu_buffer, next_page == reader_page);
864
865 /*
866 * If for some reason, we had an interrupt storm that made
867 * it all the way around the buffer, bail, and warn
868 * about it.
869 */
870 if (unlikely(next_page == cpu_buffer->commit_page)) {
871 WARN_ON_ONCE(1);
872 goto out_unlock;
873 }
874
875 if (next_page == head_page) {
876 if (!(buffer->flags & RB_FL_OVERWRITE)) {
877 /* reset write */
878 if (tail <= BUF_PAGE_SIZE)
879 local_set(&tail_page->write, tail);
880 goto out_unlock;
881 }
882
883 /* tail_page has not moved yet? */
884 if (tail_page == cpu_buffer->tail_page) {
885 /* count overflows */
886 rb_update_overflow(cpu_buffer);
887
888 rb_inc_page(cpu_buffer, &head_page);
889 cpu_buffer->head_page = head_page;
890 cpu_buffer->head_page->read = 0;
891 }
892 }
893
894 /*
895 * If the tail page is still the same as what we think
896 * it is, then it is up to us to update the tail
897 * pointer.
898 */
899 if (tail_page == cpu_buffer->tail_page) {
900 local_set(&next_page->write, 0);
901 local_set(&next_page->commit, 0);
902 cpu_buffer->tail_page = next_page;
903
904 /* reread the time stamp */
905 *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
906 cpu_buffer->tail_page->time_stamp = *ts;
907 }
908
909 /*
910 * The actual tail page has moved forward.
911 */
912 if (tail < BUF_PAGE_SIZE) {
913 /* Mark the rest of the page with padding */
914 event = __rb_page_index(tail_page, tail);
915 event->type = RINGBUF_TYPE_PADDING;
916 }
917
918 if (tail <= BUF_PAGE_SIZE)
919 /* Set the write back to the previous setting */
920 local_set(&tail_page->write, tail);
921
922 /*
923 * If this was a commit entry that failed,
924 * increment that too
925 */
926 if (tail_page == cpu_buffer->commit_page &&
927 tail == rb_commit_index(cpu_buffer)) {
928 rb_set_commit_to_write(cpu_buffer);
929 }
930
931 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
932
933 /* fail and let the caller try again */
934 return ERR_PTR(-EAGAIN);
935 }
936
937 /* We reserved something on the buffer */
938
939 BUG_ON(write > BUF_PAGE_SIZE);
940
941 event = __rb_page_index(tail_page, tail);
942 rb_update_event(event, type, length);
943
944 /*
945 * If this is a commit and the tail is zero, then update
946 * this page's time stamp.
947 */
948 if (!tail && rb_is_commit(cpu_buffer, event))
949 cpu_buffer->commit_page->time_stamp = *ts;
950
951 return event;
952
953 out_unlock:
954 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
955 return NULL;
956}
957
958static int
959rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
960 u64 *ts, u64 *delta)
961{
962 struct ring_buffer_event *event;
963 static int once;
964 int ret;
965
966 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n",
969 *delta, *ts, cpu_buffer->write_stamp);
970 WARN_ON(1);
971 }
972
973 /*
974 * The delta is too big, we to add a
975 * new timestamp.
976 */
977 event = __rb_reserve_next(cpu_buffer,
978 RINGBUF_TYPE_TIME_EXTEND,
979 RB_LEN_TIME_EXTEND,
980 ts);
981 if (!event)
982 return -EBUSY;
983
984 if (PTR_ERR(event) == -EAGAIN)
985 return -EAGAIN;
986
987 /* Only a commited time event can update the write stamp */
988 if (rb_is_commit(cpu_buffer, event)) {
989 /*
990 * If this is the first on the page, then we need to
991 * update the page itself, and just put in a zero.
992 */
993 if (rb_event_index(event)) {
994 event->time_delta = *delta & TS_MASK;
995 event->array[0] = *delta >> TS_SHIFT;
996 } else {
997 cpu_buffer->commit_page->time_stamp = *ts;
998 event->time_delta = 0;
999 event->array[0] = 0;
1000 }
1001 cpu_buffer->write_stamp = *ts;
1002 /* let the caller know this was the commit */
1003 ret = 1;
1004 } else {
1005 /* Darn, this is just wasted space */
1006 event->time_delta = 0;
1007 event->array[0] = 0;
1008 ret = 0;
1009 }
1010
1011 *delta = 0;
1012
1013 return ret;
1014}
1015
1016static struct ring_buffer_event *
1017rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1018 unsigned type, unsigned long length)
1019{
1020 struct ring_buffer_event *event;
1021 u64 ts, delta;
1022 int commit = 0;
1023
1024 again:
1025 ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1026
1027 /*
1028 * Only the first commit can update the timestamp.
1029 * Yes there is a race here. If an interrupt comes in
1030 * just after the conditional and it traces too, then it
1031 * will also check the deltas. More than one timestamp may
1032 * also be made. But only the entry that did the actual
1033 * commit will be something other than zero.
1034 */
1035 if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1036 rb_page_write(cpu_buffer->tail_page) ==
1037 rb_commit_index(cpu_buffer)) {
1038
1039 delta = ts - cpu_buffer->write_stamp;
1040
1041 /* make sure this delta is calculated here */
1042 barrier();
1043
1044 /* Did the write stamp get updated already? */
1045 if (unlikely(ts < cpu_buffer->write_stamp))
1046 goto again;
1047
1048 if (test_time_stamp(delta)) {
1049
1050 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1051
1052 if (commit == -EBUSY)
1053 return NULL;
1054
1055 if (commit == -EAGAIN)
1056 goto again;
1057
1058 RB_WARN_ON(cpu_buffer, commit < 0);
1059 }
1060 } else
1061 /* Non commits have zero deltas */
1062 delta = 0;
1063
1064 event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1065 if (PTR_ERR(event) == -EAGAIN)
1066 goto again;
1067
1068 if (!event) {
1069 if (unlikely(commit))
1070 /*
1071 * Ouch! We needed a timestamp and it was commited. But
1072 * we didn't get our event reserved.
1073 */
1074 rb_set_commit_to_write(cpu_buffer);
1075 return NULL;
1076 }
1077
1078 /*
1079 * If the timestamp was commited, make the commit our entry
1080 * now so that we will update it when needed.
1081 */
1082 if (commit)
1083 rb_set_commit_event(cpu_buffer, event);
1084 else if (!rb_is_commit(cpu_buffer, event))
1085 delta = 0;
1086
1087 event->time_delta = delta;
1088
1089 return event;
1090}
1091
1092static DEFINE_PER_CPU(int, rb_need_resched);
1093
1094/**
1095 * ring_buffer_lock_reserve - reserve a part of the buffer
1096 * @buffer: the ring buffer to reserve from
1097 * @length: the length of the data to reserve (excluding event header)
1098 * @flags: a pointer to save the interrupt flags
1099 *
1100 * Returns a reseverd event on the ring buffer to copy directly to.
1101 * The user of this interface will need to get the body to write into
1102 * and can use the ring_buffer_event_data() interface.
1103 *
1104 * The length is the length of the data needed, not the event length
1105 * which also includes the event header.
1106 *
1107 * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1108 * If NULL is returned, then nothing has been allocated or locked.
1109 */
1110struct ring_buffer_event *
1111ring_buffer_lock_reserve(struct ring_buffer *buffer,
1112 unsigned long length,
1113 unsigned long *flags)
1114{
1115 struct ring_buffer_per_cpu *cpu_buffer;
1116 struct ring_buffer_event *event;
1117 int cpu, resched;
1118
1119 if (atomic_read(&buffer->record_disabled))
1120 return NULL;
1121
1122 /* If we are tracing schedule, we don't want to recurse */
1123 resched = need_resched();
1124 preempt_disable_notrace();
1125
1126 cpu = raw_smp_processor_id();
1127
1128 if (!cpu_isset(cpu, buffer->cpumask))
1129 goto out;
1130
1131 cpu_buffer = buffer->buffers[cpu];
1132
1133 if (atomic_read(&cpu_buffer->record_disabled))
1134 goto out;
1135
1136 length = rb_calculate_event_length(length);
1137 if (length > BUF_PAGE_SIZE)
1138 goto out;
1139
1140 event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1141 if (!event)
1142 goto out;
1143
1144 /*
1145 * Need to store resched state on this cpu.
1146 * Only the first needs to.
1147 */
1148
1149 if (preempt_count() == 1)
1150 per_cpu(rb_need_resched, cpu) = resched;
1151
1152 return event;
1153
1154 out:
1155 if (resched)
1156 preempt_enable_notrace();
1157 else
1158 preempt_enable_notrace();
1159 return NULL;
1160}
1161
1162static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1163 struct ring_buffer_event *event)
1164{
1165 cpu_buffer->entries++;
1166
1167 /* Only process further if we own the commit */
1168 if (!rb_is_commit(cpu_buffer, event))
1169 return;
1170
1171 cpu_buffer->write_stamp += event->time_delta;
1172
1173 rb_set_commit_to_write(cpu_buffer);
1174}
1175
1176/**
1177 * ring_buffer_unlock_commit - commit a reserved
1178 * @buffer: The buffer to commit to
1179 * @event: The event pointer to commit.
1180 * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1181 *
1182 * This commits the data to the ring buffer, and releases any locks held.
1183 *
1184 * Must be paired with ring_buffer_lock_reserve.
1185 */
1186int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1187 struct ring_buffer_event *event,
1188 unsigned long flags)
1189{
1190 struct ring_buffer_per_cpu *cpu_buffer;
1191 int cpu = raw_smp_processor_id();
1192
1193 cpu_buffer = buffer->buffers[cpu];
1194
1195 rb_commit(cpu_buffer, event);
1196
1197 /*
1198 * Only the last preempt count needs to restore preemption.
1199 */
1200 if (preempt_count() == 1) {
1201 if (per_cpu(rb_need_resched, cpu))
1202 preempt_enable_no_resched_notrace();
1203 else
1204 preempt_enable_notrace();
1205 } else
1206 preempt_enable_no_resched_notrace();
1207
1208 return 0;
1209}
1210
1211/**
1212 * ring_buffer_write - write data to the buffer without reserving
1213 * @buffer: The ring buffer to write to.
1214 * @length: The length of the data being written (excluding the event header)
1215 * @data: The data to write to the buffer.
1216 *
1217 * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1218 * one function. If you already have the data to write to the buffer, it
1219 * may be easier to simply call this function.
1220 *
1221 * Note, like ring_buffer_lock_reserve, the length is the length of the data
1222 * and not the length of the event which would hold the header.
1223 */
1224int ring_buffer_write(struct ring_buffer *buffer,
1225 unsigned long length,
1226 void *data)
1227{
1228 struct ring_buffer_per_cpu *cpu_buffer;
1229 struct ring_buffer_event *event;
1230 unsigned long event_length;
1231 void *body;
1232 int ret = -EBUSY;
1233 int cpu, resched;
1234
1235 if (atomic_read(&buffer->record_disabled))
1236 return -EBUSY;
1237
1238 resched = need_resched();
1239 preempt_disable_notrace();
1240
1241 cpu = raw_smp_processor_id();
1242
1243 if (!cpu_isset(cpu, buffer->cpumask))
1244 goto out;
1245
1246 cpu_buffer = buffer->buffers[cpu];
1247
1248 if (atomic_read(&cpu_buffer->record_disabled))
1249 goto out;
1250
1251 event_length = rb_calculate_event_length(length);
1252 event = rb_reserve_next_event(cpu_buffer,
1253 RINGBUF_TYPE_DATA, event_length);
1254 if (!event)
1255 goto out;
1256
1257 body = rb_event_data(event);
1258
1259 memcpy(body, data, length);
1260
1261 rb_commit(cpu_buffer, event);
1262
1263 ret = 0;
1264 out:
1265 if (resched)
1266 preempt_enable_no_resched_notrace();
1267 else
1268 preempt_enable_notrace();
1269
1270 return ret;
1271}
1272
1273static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1274{
1275 struct buffer_page *reader = cpu_buffer->reader_page;
1276 struct buffer_page *head = cpu_buffer->head_page;
1277 struct buffer_page *commit = cpu_buffer->commit_page;
1278
1279 return reader->read == rb_page_commit(reader) &&
1280 (commit == reader ||
1281 (commit == head &&
1282 head->read == rb_page_commit(commit)));
1283}
1284
1285/**
1286 * ring_buffer_record_disable - stop all writes into the buffer
1287 * @buffer: The ring buffer to stop writes to.
1288 *
1289 * This prevents all writes to the buffer. Any attempt to write
1290 * to the buffer after this will fail and return NULL.
1291 *
1292 * The caller should call synchronize_sched() after this.
1293 */
1294void ring_buffer_record_disable(struct ring_buffer *buffer)
1295{
1296 atomic_inc(&buffer->record_disabled);
1297}
1298
1299/**
1300 * ring_buffer_record_enable - enable writes to the buffer
1301 * @buffer: The ring buffer to enable writes
1302 *
1303 * Note, multiple disables will need the same number of enables
1304 * to truely enable the writing (much like preempt_disable).
1305 */
1306void ring_buffer_record_enable(struct ring_buffer *buffer)
1307{
1308 atomic_dec(&buffer->record_disabled);
1309}
1310
1311/**
1312 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1313 * @buffer: The ring buffer to stop writes to.
1314 * @cpu: The CPU buffer to stop
1315 *
1316 * This prevents all writes to the buffer. Any attempt to write
1317 * to the buffer after this will fail and return NULL.
1318 *
1319 * The caller should call synchronize_sched() after this.
1320 */
1321void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1322{
1323 struct ring_buffer_per_cpu *cpu_buffer;
1324
1325 if (!cpu_isset(cpu, buffer->cpumask))
1326 return;
1327
1328 cpu_buffer = buffer->buffers[cpu];
1329 atomic_inc(&cpu_buffer->record_disabled);
1330}
1331
1332/**
1333 * ring_buffer_record_enable_cpu - enable writes to the buffer
1334 * @buffer: The ring buffer to enable writes
1335 * @cpu: The CPU to enable.
1336 *
1337 * Note, multiple disables will need the same number of enables
1338 * to truely enable the writing (much like preempt_disable).
1339 */
1340void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1341{
1342 struct ring_buffer_per_cpu *cpu_buffer;
1343
1344 if (!cpu_isset(cpu, buffer->cpumask))
1345 return;
1346
1347 cpu_buffer = buffer->buffers[cpu];
1348 atomic_dec(&cpu_buffer->record_disabled);
1349}
1350
1351/**
1352 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1353 * @buffer: The ring buffer
1354 * @cpu: The per CPU buffer to get the entries from.
1355 */
1356unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1357{
1358 struct ring_buffer_per_cpu *cpu_buffer;
1359
1360 if (!cpu_isset(cpu, buffer->cpumask))
1361 return 0;
1362
1363 cpu_buffer = buffer->buffers[cpu];
1364 return cpu_buffer->entries;
1365}
1366
1367/**
1368 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1369 * @buffer: The ring buffer
1370 * @cpu: The per CPU buffer to get the number of overruns from
1371 */
1372unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1373{
1374 struct ring_buffer_per_cpu *cpu_buffer;
1375
1376 if (!cpu_isset(cpu, buffer->cpumask))
1377 return 0;
1378
1379 cpu_buffer = buffer->buffers[cpu];
1380 return cpu_buffer->overrun;
1381}
1382
1383/**
1384 * ring_buffer_entries - get the number of entries in a buffer
1385 * @buffer: The ring buffer
1386 *
1387 * Returns the total number of entries in the ring buffer
1388 * (all CPU entries)
1389 */
1390unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1391{
1392 struct ring_buffer_per_cpu *cpu_buffer;
1393 unsigned long entries = 0;
1394 int cpu;
1395
1396 /* if you care about this being correct, lock the buffer */
1397 for_each_buffer_cpu(buffer, cpu) {
1398 cpu_buffer = buffer->buffers[cpu];
1399 entries += cpu_buffer->entries;
1400 }
1401
1402 return entries;
1403}
1404
1405/**
1406 * ring_buffer_overrun_cpu - get the number of overruns in buffer
1407 * @buffer: The ring buffer
1408 *
1409 * Returns the total number of overruns in the ring buffer
1410 * (all CPU entries)
1411 */
1412unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1413{
1414 struct ring_buffer_per_cpu *cpu_buffer;
1415 unsigned long overruns = 0;
1416 int cpu;
1417
1418 /* if you care about this being correct, lock the buffer */
1419 for_each_buffer_cpu(buffer, cpu) {
1420 cpu_buffer = buffer->buffers[cpu];
1421 overruns += cpu_buffer->overrun;
1422 }
1423
1424 return overruns;
1425}
1426
1427/**
1428 * ring_buffer_iter_reset - reset an iterator
1429 * @iter: The iterator to reset
1430 *
1431 * Resets the iterator, so that it will start from the beginning
1432 * again.
1433 */
1434void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1435{
1436 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1437
1438 /* Iterator usage is expected to have record disabled */
1439 if (list_empty(&cpu_buffer->reader_page->list)) {
1440 iter->head_page = cpu_buffer->head_page;
1441 iter->head = cpu_buffer->head_page->read;
1442 } else {
1443 iter->head_page = cpu_buffer->reader_page;
1444 iter->head = cpu_buffer->reader_page->read;
1445 }
1446 if (iter->head)
1447 iter->read_stamp = cpu_buffer->read_stamp;
1448 else
1449 iter->read_stamp = iter->head_page->time_stamp;
1450}
1451
1452/**
1453 * ring_buffer_iter_empty - check if an iterator has no more to read
1454 * @iter: The iterator to check
1455 */
1456int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1457{
1458 struct ring_buffer_per_cpu *cpu_buffer;
1459
1460 cpu_buffer = iter->cpu_buffer;
1461
1462 return iter->head_page == cpu_buffer->commit_page &&
1463 iter->head == rb_commit_index(cpu_buffer);
1464}
1465
1466static void
1467rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1468 struct ring_buffer_event *event)
1469{
1470 u64 delta;
1471
1472 switch (event->type) {
1473 case RINGBUF_TYPE_PADDING:
1474 return;
1475
1476 case RINGBUF_TYPE_TIME_EXTEND:
1477 delta = event->array[0];
1478 delta <<= TS_SHIFT;
1479 delta += event->time_delta;
1480 cpu_buffer->read_stamp += delta;
1481 return;
1482
1483 case RINGBUF_TYPE_TIME_STAMP:
1484 /* FIXME: not implemented */
1485 return;
1486
1487 case RINGBUF_TYPE_DATA:
1488 cpu_buffer->read_stamp += event->time_delta;
1489 return;
1490
1491 default:
1492 BUG();
1493 }
1494 return;
1495}
1496
1497static void
1498rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1499 struct ring_buffer_event *event)
1500{
1501 u64 delta;
1502
1503 switch (event->type) {
1504 case RINGBUF_TYPE_PADDING:
1505 return;
1506
1507 case RINGBUF_TYPE_TIME_EXTEND:
1508 delta = event->array[0];
1509 delta <<= TS_SHIFT;
1510 delta += event->time_delta;
1511 iter->read_stamp += delta;
1512 return;
1513
1514 case RINGBUF_TYPE_TIME_STAMP:
1515 /* FIXME: not implemented */
1516 return;
1517
1518 case RINGBUF_TYPE_DATA:
1519 iter->read_stamp += event->time_delta;
1520 return;
1521
1522 default:
1523 BUG();
1524 }
1525 return;
1526}
1527
1528static struct buffer_page *
1529rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1530{
1531 struct buffer_page *reader = NULL;
1532 unsigned long flags;
1533
1534 spin_lock_irqsave(&cpu_buffer->lock, flags);
1535
1536 again:
1537 reader = cpu_buffer->reader_page;
1538
1539 /* If there's more to read, return this page */
1540 if (cpu_buffer->reader_page->read < rb_page_size(reader))
1541 goto out;
1542
1543 /* Never should we have an index greater than the size */
1544 RB_WARN_ON(cpu_buffer,
1545 cpu_buffer->reader_page->read > rb_page_size(reader));
1546
1547 /* check if we caught up to the tail */
1548 reader = NULL;
1549 if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1550 goto out;
1551
1552 /*
1553 * Splice the empty reader page into the list around the head.
1554 * Reset the reader page to size zero.
1555 */
1556
1557 reader = cpu_buffer->head_page;
1558 cpu_buffer->reader_page->list.next = reader->list.next;
1559 cpu_buffer->reader_page->list.prev = reader->list.prev;
1560
1561 local_set(&cpu_buffer->reader_page->write, 0);
1562 local_set(&cpu_buffer->reader_page->commit, 0);
1563
1564 /* Make the reader page now replace the head */
1565 reader->list.prev->next = &cpu_buffer->reader_page->list;
1566 reader->list.next->prev = &cpu_buffer->reader_page->list;
1567
1568 /*
1569 * If the tail is on the reader, then we must set the head
1570 * to the inserted page, otherwise we set it one before.
1571 */
1572 cpu_buffer->head_page = cpu_buffer->reader_page;
1573
1574 if (cpu_buffer->commit_page != reader)
1575 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1576
1577 /* Finally update the reader page to the new head */
1578 cpu_buffer->reader_page = reader;
1579 rb_reset_reader_page(cpu_buffer);
1580
1581 goto again;
1582
1583 out:
1584 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1585
1586 return reader;
1587}
1588
1589static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1590{
1591 struct ring_buffer_event *event;
1592 struct buffer_page *reader;
1593 unsigned length;
1594
1595 reader = rb_get_reader_page(cpu_buffer);
1596
1597 /* This function should not be called when buffer is empty */
1598 BUG_ON(!reader);
1599
1600 event = rb_reader_event(cpu_buffer);
1601
1602 if (event->type == RINGBUF_TYPE_DATA)
1603 cpu_buffer->entries--;
1604
1605 rb_update_read_stamp(cpu_buffer, event);
1606
1607 length = rb_event_length(event);
1608 cpu_buffer->reader_page->read += length;
1609}
1610
1611static void rb_advance_iter(struct ring_buffer_iter *iter)
1612{
1613 struct ring_buffer *buffer;
1614 struct ring_buffer_per_cpu *cpu_buffer;
1615 struct ring_buffer_event *event;
1616 unsigned length;
1617
1618 cpu_buffer = iter->cpu_buffer;
1619 buffer = cpu_buffer->buffer;
1620
1621 /*
1622 * Check if we are at the end of the buffer.
1623 */
1624 if (iter->head >= rb_page_size(iter->head_page)) {
1625 BUG_ON(iter->head_page == cpu_buffer->commit_page);
1626 rb_inc_iter(iter);
1627 return;
1628 }
1629
1630 event = rb_iter_head_event(iter);
1631
1632 length = rb_event_length(event);
1633
1634 /*
1635 * This should not be called to advance the header if we are
1636 * at the tail of the buffer.
1637 */
1638 BUG_ON((iter->head_page == cpu_buffer->commit_page) &&
1639 (iter->head + length > rb_commit_index(cpu_buffer)));
1640
1641 rb_update_iter_read_stamp(iter, event);
1642
1643 iter->head += length;
1644
1645 /* check for end of page padding */
1646 if ((iter->head >= rb_page_size(iter->head_page)) &&
1647 (iter->head_page != cpu_buffer->commit_page))
1648 rb_advance_iter(iter);
1649}
1650
1651/**
1652 * ring_buffer_peek - peek at the next event to be read
1653 * @buffer: The ring buffer to read
1654 * @cpu: The cpu to peak at
1655 * @ts: The timestamp counter of this event.
1656 *
1657 * This will return the event that will be read next, but does
1658 * not consume the data.
1659 */
1660struct ring_buffer_event *
1661ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1662{
1663 struct ring_buffer_per_cpu *cpu_buffer;
1664 struct ring_buffer_event *event;
1665 struct buffer_page *reader;
1666
1667 if (!cpu_isset(cpu, buffer->cpumask))
1668 return NULL;
1669
1670 cpu_buffer = buffer->buffers[cpu];
1671
1672 again:
1673 reader = rb_get_reader_page(cpu_buffer);
1674 if (!reader)
1675 return NULL;
1676
1677 event = rb_reader_event(cpu_buffer);
1678
1679 switch (event->type) {
1680 case RINGBUF_TYPE_PADDING:
1681 RB_WARN_ON(cpu_buffer, 1);
1682 rb_advance_reader(cpu_buffer);
1683 return NULL;
1684
1685 case RINGBUF_TYPE_TIME_EXTEND:
1686 /* Internal data, OK to advance */
1687 rb_advance_reader(cpu_buffer);
1688 goto again;
1689
1690 case RINGBUF_TYPE_TIME_STAMP:
1691 /* FIXME: not implemented */
1692 rb_advance_reader(cpu_buffer);
1693 goto again;
1694
1695 case RINGBUF_TYPE_DATA:
1696 if (ts) {
1697 *ts = cpu_buffer->read_stamp + event->time_delta;
1698 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1699 }
1700 return event;
1701
1702 default:
1703 BUG();
1704 }
1705
1706 return NULL;
1707}
1708
1709/**
1710 * ring_buffer_iter_peek - peek at the next event to be read
1711 * @iter: The ring buffer iterator
1712 * @ts: The timestamp counter of this event.
1713 *
1714 * This will return the event that will be read next, but does
1715 * not increment the iterator.
1716 */
1717struct ring_buffer_event *
1718ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1719{
1720 struct ring_buffer *buffer;
1721 struct ring_buffer_per_cpu *cpu_buffer;
1722 struct ring_buffer_event *event;
1723
1724 if (ring_buffer_iter_empty(iter))
1725 return NULL;
1726
1727 cpu_buffer = iter->cpu_buffer;
1728 buffer = cpu_buffer->buffer;
1729
1730 again:
1731 if (rb_per_cpu_empty(cpu_buffer))
1732 return NULL;
1733
1734 event = rb_iter_head_event(iter);
1735
1736 switch (event->type) {
1737 case RINGBUF_TYPE_PADDING:
1738 rb_inc_iter(iter);
1739 goto again;
1740
1741 case RINGBUF_TYPE_TIME_EXTEND:
1742 /* Internal data, OK to advance */
1743 rb_advance_iter(iter);
1744 goto again;
1745
1746 case RINGBUF_TYPE_TIME_STAMP:
1747 /* FIXME: not implemented */
1748 rb_advance_iter(iter);
1749 goto again;
1750
1751 case RINGBUF_TYPE_DATA:
1752 if (ts) {
1753 *ts = iter->read_stamp + event->time_delta;
1754 ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1755 }
1756 return event;
1757
1758 default:
1759 BUG();
1760 }
1761
1762 return NULL;
1763}
1764
1765/**
1766 * ring_buffer_consume - return an event and consume it
1767 * @buffer: The ring buffer to get the next event from
1768 *
1769 * Returns the next event in the ring buffer, and that event is consumed.
1770 * Meaning, that sequential reads will keep returning a different event,
1771 * and eventually empty the ring buffer if the producer is slower.
1772 */
1773struct ring_buffer_event *
1774ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1775{
1776 struct ring_buffer_per_cpu *cpu_buffer;
1777 struct ring_buffer_event *event;
1778
1779 if (!cpu_isset(cpu, buffer->cpumask))
1780 return NULL;
1781
1782 event = ring_buffer_peek(buffer, cpu, ts);
1783 if (!event)
1784 return NULL;
1785
1786 cpu_buffer = buffer->buffers[cpu];
1787 rb_advance_reader(cpu_buffer);
1788
1789 return event;
1790}
1791
1792/**
1793 * ring_buffer_read_start - start a non consuming read of the buffer
1794 * @buffer: The ring buffer to read from
1795 * @cpu: The cpu buffer to iterate over
1796 *
1797 * This starts up an iteration through the buffer. It also disables
1798 * the recording to the buffer until the reading is finished.
1799 * This prevents the reading from being corrupted. This is not
1800 * a consuming read, so a producer is not expected.
1801 *
1802 * Must be paired with ring_buffer_finish.
1803 */
1804struct ring_buffer_iter *
1805ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1806{
1807 struct ring_buffer_per_cpu *cpu_buffer;
1808 struct ring_buffer_iter *iter;
1809 unsigned long flags;
1810
1811 if (!cpu_isset(cpu, buffer->cpumask))
1812 return NULL;
1813
1814 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1815 if (!iter)
1816 return NULL;
1817
1818 cpu_buffer = buffer->buffers[cpu];
1819
1820 iter->cpu_buffer = cpu_buffer;
1821
1822 atomic_inc(&cpu_buffer->record_disabled);
1823 synchronize_sched();
1824
1825 spin_lock_irqsave(&cpu_buffer->lock, flags);
1826 ring_buffer_iter_reset(iter);
1827 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1828
1829 return iter;
1830}
1831
1832/**
1833 * ring_buffer_finish - finish reading the iterator of the buffer
1834 * @iter: The iterator retrieved by ring_buffer_start
1835 *
1836 * This re-enables the recording to the buffer, and frees the
1837 * iterator.
1838 */
1839void
1840ring_buffer_read_finish(struct ring_buffer_iter *iter)
1841{
1842 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1843
1844 atomic_dec(&cpu_buffer->record_disabled);
1845 kfree(iter);
1846}
1847
1848/**
1849 * ring_buffer_read - read the next item in the ring buffer by the iterator
1850 * @iter: The ring buffer iterator
1851 * @ts: The time stamp of the event read.
1852 *
1853 * This reads the next event in the ring buffer and increments the iterator.
1854 */
1855struct ring_buffer_event *
1856ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
1857{
1858 struct ring_buffer_event *event;
1859
1860 event = ring_buffer_iter_peek(iter, ts);
1861 if (!event)
1862 return NULL;
1863
1864 rb_advance_iter(iter);
1865
1866 return event;
1867}
1868
1869/**
1870 * ring_buffer_size - return the size of the ring buffer (in bytes)
1871 * @buffer: The ring buffer.
1872 */
1873unsigned long ring_buffer_size(struct ring_buffer *buffer)
1874{
1875 return BUF_PAGE_SIZE * buffer->pages;
1876}
1877
1878static void
1879rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
1880{
1881 cpu_buffer->head_page
1882 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
1883 local_set(&cpu_buffer->head_page->write, 0);
1884 local_set(&cpu_buffer->head_page->commit, 0);
1885
1886 cpu_buffer->head_page->read = 0;
1887
1888 cpu_buffer->tail_page = cpu_buffer->head_page;
1889 cpu_buffer->commit_page = cpu_buffer->head_page;
1890
1891 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
1892 local_set(&cpu_buffer->reader_page->write, 0);
1893 local_set(&cpu_buffer->reader_page->commit, 0);
1894 cpu_buffer->reader_page->read = 0;
1895
1896 cpu_buffer->overrun = 0;
1897 cpu_buffer->entries = 0;
1898}
1899
1900/**
1901 * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
1902 * @buffer: The ring buffer to reset a per cpu buffer of
1903 * @cpu: The CPU buffer to be reset
1904 */
1905void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
1906{
1907 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1908 unsigned long flags;
1909
1910 if (!cpu_isset(cpu, buffer->cpumask))
1911 return;
1912
1913 spin_lock_irqsave(&cpu_buffer->lock, flags);
1914
1915 rb_reset_cpu(cpu_buffer);
1916
1917 spin_unlock_irqrestore(&cpu_buffer->lock, flags);
1918}
1919
1920/**
1921 * ring_buffer_reset - reset a ring buffer
1922 * @buffer: The ring buffer to reset all cpu buffers
1923 */
1924void ring_buffer_reset(struct ring_buffer *buffer)
1925{
1926 int cpu;
1927
1928 for_each_buffer_cpu(buffer, cpu)
1929 ring_buffer_reset_cpu(buffer, cpu);
1930}
1931
1932/**
1933 * rind_buffer_empty - is the ring buffer empty?
1934 * @buffer: The ring buffer to test
1935 */
1936int ring_buffer_empty(struct ring_buffer *buffer)
1937{
1938 struct ring_buffer_per_cpu *cpu_buffer;
1939 int cpu;
1940
1941 /* yes this is racy, but if you don't like the race, lock the buffer */
1942 for_each_buffer_cpu(buffer, cpu) {
1943 cpu_buffer = buffer->buffers[cpu];
1944 if (!rb_per_cpu_empty(cpu_buffer))
1945 return 0;
1946 }
1947 return 1;
1948}
1949
1950/**
1951 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
1952 * @buffer: The ring buffer
1953 * @cpu: The CPU buffer to test
1954 */
1955int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
1956{
1957 struct ring_buffer_per_cpu *cpu_buffer;
1958
1959 if (!cpu_isset(cpu, buffer->cpumask))
1960 return 1;
1961
1962 cpu_buffer = buffer->buffers[cpu];
1963 return rb_per_cpu_empty(cpu_buffer);
1964}
1965
1966/**
1967 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
1968 * @buffer_a: One buffer to swap with
1969 * @buffer_b: The other buffer to swap with
1970 *
1971 * This function is useful for tracers that want to take a "snapshot"
1972 * of a CPU buffer and has another back up buffer lying around.
1973 * it is expected that the tracer handles the cpu buffer not being
1974 * used at the moment.
1975 */
1976int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
1977 struct ring_buffer *buffer_b, int cpu)
1978{
1979 struct ring_buffer_per_cpu *cpu_buffer_a;
1980 struct ring_buffer_per_cpu *cpu_buffer_b;
1981
1982 if (!cpu_isset(cpu, buffer_a->cpumask) ||
1983 !cpu_isset(cpu, buffer_b->cpumask))
1984 return -EINVAL;
1985
1986 /* At least make sure the two buffers are somewhat the same */
1987 if (buffer_a->size != buffer_b->size ||
1988 buffer_a->pages != buffer_b->pages)
1989 return -EINVAL;
1990
1991 cpu_buffer_a = buffer_a->buffers[cpu];
1992 cpu_buffer_b = buffer_b->buffers[cpu];
1993
1994 /*
1995 * We can't do a synchronize_sched here because this
1996 * function can be called in atomic context.
1997 * Normally this will be called from the same CPU as cpu.
1998 * If not it's up to the caller to protect this.
1999 */
2000 atomic_inc(&cpu_buffer_a->record_disabled);
2001 atomic_inc(&cpu_buffer_b->record_disabled);
2002
2003 buffer_a->buffers[cpu] = cpu_buffer_b;
2004 buffer_b->buffers[cpu] = cpu_buffer_a;
2005
2006 cpu_buffer_b->buffer = buffer_a;
2007 cpu_buffer_a->buffer = buffer_b;
2008
2009 atomic_dec(&cpu_buffer_a->record_disabled);
2010 atomic_dec(&cpu_buffer_b->record_disabled);
2011
2012 return 0;
2013}
2014
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 868e121c8e3..d345d649d07 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -14,6 +14,7 @@
14#include <linux/utsrelease.h> 14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/seq_file.h> 16#include <linux/seq_file.h>
17#include <linux/notifier.h>
17#include <linux/debugfs.h> 18#include <linux/debugfs.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/hardirq.h> 20#include <linux/hardirq.h>
@@ -22,6 +23,7 @@
22#include <linux/ftrace.h> 23#include <linux/ftrace.h>
23#include <linux/module.h> 24#include <linux/module.h>
24#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/kdebug.h>
25#include <linux/ctype.h> 27#include <linux/ctype.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/poll.h> 29#include <linux/poll.h>
@@ -31,25 +33,36 @@
31#include <linux/writeback.h> 33#include <linux/writeback.h>
32 34
33#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h>
34 37
35#include "trace.h" 38#include "trace.h"
36 39
40#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
41
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; 42unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh; 43unsigned long __read_mostly tracing_thresh;
39 44
40static unsigned long __read_mostly tracing_nr_buffers; 45static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
46
47static inline void ftrace_disable_cpu(void)
48{
49 preempt_disable();
50 local_inc(&__get_cpu_var(ftrace_cpu_disabled));
51}
52
53static inline void ftrace_enable_cpu(void)
54{
55 local_dec(&__get_cpu_var(ftrace_cpu_disabled));
56 preempt_enable();
57}
58
41static cpumask_t __read_mostly tracing_buffer_mask; 59static cpumask_t __read_mostly tracing_buffer_mask;
42 60
43#define for_each_tracing_cpu(cpu) \ 61#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask) 62 for_each_cpu_mask(cpu, tracing_buffer_mask)
45 63
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1; 64static int tracing_disabled = 1;
50 65
51static unsigned long tracing_pages_allocated;
52
53long 66long
54ns2usecs(cycle_t nsec) 67ns2usecs(cycle_t nsec)
55{ 68{
@@ -60,7 +73,9 @@ ns2usecs(cycle_t nsec)
60 73
61cycle_t ftrace_now(int cpu) 74cycle_t ftrace_now(int cpu)
62{ 75{
63 return cpu_clock(cpu); 76 u64 ts = ring_buffer_time_stamp(cpu);
77 ring_buffer_normalize_time_stamp(cpu, &ts);
78 return ts;
64} 79}
65 80
66/* 81/*
@@ -100,11 +115,18 @@ static int tracer_enabled = 1;
100int ftrace_function_enabled; 115int ftrace_function_enabled;
101 116
102/* 117/*
103 * trace_nr_entries is the number of entries that is allocated 118 * trace_buf_size is the size in bytes that is allocated
104 * for a buffer. Note, the number of entries is always rounded 119 * for a buffer. Note, the number of bytes is always rounded
105 * to ENTRIES_PER_PAGE. 120 * to page size.
121 *
122 * This number is purposely set to a low number of 16384.
123 * If the dump on oops happens, it will be much appreciated
124 * to not have to wait for all that output. Anyway this can be
125 * boot time and run time configurable.
106 */ 126 */
107static unsigned long trace_nr_entries = 65536UL; 127#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
128
129static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
108 130
109/* trace_types holds a link list of available tracers. */ 131/* trace_types holds a link list of available tracers. */
110static struct tracer *trace_types __read_mostly; 132static struct tracer *trace_types __read_mostly;
@@ -133,24 +155,6 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
133/* trace_flags holds iter_ctrl options */ 155/* trace_flags holds iter_ctrl options */
134unsigned long trace_flags = TRACE_ITER_PRINT_PARENT; 156unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
135 157
136static notrace void no_trace_init(struct trace_array *tr)
137{
138 int cpu;
139
140 ftrace_function_enabled = 0;
141 if(tr->ctrl)
142 for_each_online_cpu(cpu)
143 tracing_reset(tr->data[cpu]);
144 tracer_enabled = 0;
145}
146
147/* dummy trace to disable tracing */
148static struct tracer no_tracer __read_mostly = {
149 .name = "none",
150 .init = no_trace_init
151};
152
153
154/** 158/**
155 * trace_wake_up - wake up tasks waiting for trace input 159 * trace_wake_up - wake up tasks waiting for trace input
156 * 160 *
@@ -167,23 +171,21 @@ void trace_wake_up(void)
167 wake_up(&trace_wait); 171 wake_up(&trace_wait);
168} 172}
169 173
170#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry)) 174static int __init set_buf_size(char *str)
171
172static int __init set_nr_entries(char *str)
173{ 175{
174 unsigned long nr_entries; 176 unsigned long buf_size;
175 int ret; 177 int ret;
176 178
177 if (!str) 179 if (!str)
178 return 0; 180 return 0;
179 ret = strict_strtoul(str, 0, &nr_entries); 181 ret = strict_strtoul(str, 0, &buf_size);
180 /* nr_entries can not be zero */ 182 /* nr_entries can not be zero */
181 if (ret < 0 || nr_entries == 0) 183 if (ret < 0 || buf_size == 0)
182 return 0; 184 return 0;
183 trace_nr_entries = nr_entries; 185 trace_buf_size = buf_size;
184 return 1; 186 return 1;
185} 187}
186__setup("trace_entries=", set_nr_entries); 188__setup("trace_buf_size=", set_buf_size);
187 189
188unsigned long nsecs_to_usecs(unsigned long nsecs) 190unsigned long nsecs_to_usecs(unsigned long nsecs)
189{ 191{
@@ -191,21 +193,6 @@ unsigned long nsecs_to_usecs(unsigned long nsecs)
191} 193}
192 194
193/* 195/*
194 * trace_flag_type is an enumeration that holds different
195 * states when a trace occurs. These are:
196 * IRQS_OFF - interrupts were disabled
197 * NEED_RESCED - reschedule is requested
198 * HARDIRQ - inside an interrupt handler
199 * SOFTIRQ - inside a softirq handler
200 */
201enum trace_flag_type {
202 TRACE_FLAG_IRQS_OFF = 0x01,
203 TRACE_FLAG_NEED_RESCHED = 0x02,
204 TRACE_FLAG_HARDIRQ = 0x04,
205 TRACE_FLAG_SOFTIRQ = 0x08,
206};
207
208/*
209 * TRACE_ITER_SYM_MASK masks the options in trace_flags that 196 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
210 * control the output of kernel symbols. 197 * control the output of kernel symbols.
211 */ 198 */
@@ -224,6 +211,7 @@ static const char *trace_options[] = {
224 "block", 211 "block",
225 "stacktrace", 212 "stacktrace",
226 "sched-tree", 213 "sched-tree",
214 "ftrace_printk",
227 NULL 215 NULL
228}; 216};
229 217
@@ -266,54 +254,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
266 tracing_record_cmdline(current); 254 tracing_record_cmdline(current);
267} 255}
268 256
269#define CHECK_COND(cond) \
270 if (unlikely(cond)) { \
271 tracing_disabled = 1; \
272 WARN_ON(1); \
273 return -1; \
274 }
275
276/**
277 * check_pages - integrity check of trace buffers
278 *
279 * As a safty measure we check to make sure the data pages have not
280 * been corrupted.
281 */
282int check_pages(struct trace_array_cpu *data)
283{
284 struct page *page, *tmp;
285
286 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
287 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
288
289 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
290 CHECK_COND(page->lru.next->prev != &page->lru);
291 CHECK_COND(page->lru.prev->next != &page->lru);
292 }
293
294 return 0;
295}
296
297/**
298 * head_page - page address of the first page in per_cpu buffer.
299 *
300 * head_page returns the page address of the first page in
301 * a per_cpu buffer. This also preforms various consistency
302 * checks to make sure the buffer has not been corrupted.
303 */
304void *head_page(struct trace_array_cpu *data)
305{
306 struct page *page;
307
308 if (list_empty(&data->trace_pages))
309 return NULL;
310
311 page = list_entry(data->trace_pages.next, struct page, lru);
312 BUG_ON(&page->lru == &data->trace_pages);
313
314 return page_address(page);
315}
316
317/** 257/**
318 * trace_seq_printf - sequence printing of trace information 258 * trace_seq_printf - sequence printing of trace information
319 * @s: trace sequence descriptor 259 * @s: trace sequence descriptor
@@ -395,28 +335,23 @@ trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
395 return len; 335 return len;
396} 336}
397 337
398#define HEX_CHARS 17 338#define MAX_MEMHEX_BYTES 8
399static const char hex2asc[] = "0123456789abcdef"; 339#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
400 340
401static int 341static int
402trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len) 342trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
403{ 343{
404 unsigned char hex[HEX_CHARS]; 344 unsigned char hex[HEX_CHARS];
405 unsigned char *data = mem; 345 unsigned char *data = mem;
406 unsigned char byte;
407 int i, j; 346 int i, j;
408 347
409 BUG_ON(len >= HEX_CHARS);
410
411#ifdef __BIG_ENDIAN 348#ifdef __BIG_ENDIAN
412 for (i = 0, j = 0; i < len; i++) { 349 for (i = 0, j = 0; i < len; i++) {
413#else 350#else
414 for (i = len-1, j = 0; i >= 0; i--) { 351 for (i = len-1, j = 0; i >= 0; i--) {
415#endif 352#endif
416 byte = data[i]; 353 hex[j++] = hex_asc_hi(data[i]);
417 354 hex[j++] = hex_asc_lo(data[i]);
418 hex[j++] = hex2asc[byte & 0x0f];
419 hex[j++] = hex2asc[byte >> 4];
420 } 355 }
421 hex[j++] = ' '; 356 hex[j++] = ' ';
422 357
@@ -460,34 +395,6 @@ trace_print_seq(struct seq_file *m, struct trace_seq *s)
460 trace_seq_reset(s); 395 trace_seq_reset(s);
461} 396}
462 397
463/*
464 * flip the trace buffers between two trace descriptors.
465 * This usually is the buffers between the global_trace and
466 * the max_tr to record a snapshot of a current trace.
467 *
468 * The ftrace_max_lock must be held.
469 */
470static void
471flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
472{
473 struct list_head flip_pages;
474
475 INIT_LIST_HEAD(&flip_pages);
476
477 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
478 sizeof(struct trace_array_cpu) -
479 offsetof(struct trace_array_cpu, trace_head_idx));
480
481 check_pages(tr1);
482 check_pages(tr2);
483 list_splice_init(&tr1->trace_pages, &flip_pages);
484 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
485 list_splice_init(&flip_pages, &tr2->trace_pages);
486 BUG_ON(!list_empty(&flip_pages));
487 check_pages(tr1);
488 check_pages(tr2);
489}
490
491/** 398/**
492 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 399 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
493 * @tr: tracer 400 * @tr: tracer
@@ -500,17 +407,17 @@ flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
500void 407void
501update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 408update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
502{ 409{
503 struct trace_array_cpu *data; 410 struct ring_buffer *buf = tr->buffer;
504 int i;
505 411
506 WARN_ON_ONCE(!irqs_disabled()); 412 WARN_ON_ONCE(!irqs_disabled());
507 __raw_spin_lock(&ftrace_max_lock); 413 __raw_spin_lock(&ftrace_max_lock);
508 /* clear out all the previous traces */ 414
509 for_each_tracing_cpu(i) { 415 tr->buffer = max_tr.buffer;
510 data = tr->data[i]; 416 max_tr.buffer = buf;
511 flip_trace(max_tr.data[i], data); 417
512 tracing_reset(data); 418 ftrace_disable_cpu();
513 } 419 ring_buffer_reset(tr->buffer);
420 ftrace_enable_cpu();
514 421
515 __update_max_tr(tr, tsk, cpu); 422 __update_max_tr(tr, tsk, cpu);
516 __raw_spin_unlock(&ftrace_max_lock); 423 __raw_spin_unlock(&ftrace_max_lock);
@@ -527,16 +434,19 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
527void 434void
528update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 435update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
529{ 436{
530 struct trace_array_cpu *data = tr->data[cpu]; 437 int ret;
531 int i;
532 438
533 WARN_ON_ONCE(!irqs_disabled()); 439 WARN_ON_ONCE(!irqs_disabled());
534 __raw_spin_lock(&ftrace_max_lock); 440 __raw_spin_lock(&ftrace_max_lock);
535 for_each_tracing_cpu(i)
536 tracing_reset(max_tr.data[i]);
537 441
538 flip_trace(max_tr.data[cpu], data); 442 ftrace_disable_cpu();
539 tracing_reset(data); 443
444 ring_buffer_reset(max_tr.buffer);
445 ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
446
447 ftrace_enable_cpu();
448
449 WARN_ON_ONCE(ret);
540 450
541 __update_max_tr(tr, tsk, cpu); 451 __update_max_tr(tr, tsk, cpu);
542 __raw_spin_unlock(&ftrace_max_lock); 452 __raw_spin_unlock(&ftrace_max_lock);
@@ -573,7 +483,6 @@ int register_tracer(struct tracer *type)
573#ifdef CONFIG_FTRACE_STARTUP_TEST 483#ifdef CONFIG_FTRACE_STARTUP_TEST
574 if (type->selftest) { 484 if (type->selftest) {
575 struct tracer *saved_tracer = current_trace; 485 struct tracer *saved_tracer = current_trace;
576 struct trace_array_cpu *data;
577 struct trace_array *tr = &global_trace; 486 struct trace_array *tr = &global_trace;
578 int saved_ctrl = tr->ctrl; 487 int saved_ctrl = tr->ctrl;
579 int i; 488 int i;
@@ -585,10 +494,7 @@ int register_tracer(struct tracer *type)
585 * If we fail, we do not register this tracer. 494 * If we fail, we do not register this tracer.
586 */ 495 */
587 for_each_tracing_cpu(i) { 496 for_each_tracing_cpu(i) {
588 data = tr->data[i]; 497 tracing_reset(tr, i);
589 if (!head_page(data))
590 continue;
591 tracing_reset(data);
592 } 498 }
593 current_trace = type; 499 current_trace = type;
594 tr->ctrl = 0; 500 tr->ctrl = 0;
@@ -604,10 +510,7 @@ int register_tracer(struct tracer *type)
604 } 510 }
605 /* Only reset on passing, to avoid touching corrupted buffers */ 511 /* Only reset on passing, to avoid touching corrupted buffers */
606 for_each_tracing_cpu(i) { 512 for_each_tracing_cpu(i) {
607 data = tr->data[i]; 513 tracing_reset(tr, i);
608 if (!head_page(data))
609 continue;
610 tracing_reset(data);
611 } 514 }
612 printk(KERN_CONT "PASSED\n"); 515 printk(KERN_CONT "PASSED\n");
613 } 516 }
@@ -653,13 +556,11 @@ void unregister_tracer(struct tracer *type)
653 mutex_unlock(&trace_types_lock); 556 mutex_unlock(&trace_types_lock);
654} 557}
655 558
656void tracing_reset(struct trace_array_cpu *data) 559void tracing_reset(struct trace_array *tr, int cpu)
657{ 560{
658 data->trace_idx = 0; 561 ftrace_disable_cpu();
659 data->overrun = 0; 562 ring_buffer_reset_cpu(tr->buffer, cpu);
660 data->trace_head = data->trace_tail = head_page(data); 563 ftrace_enable_cpu();
661 data->trace_head_idx = 0;
662 data->trace_tail_idx = 0;
663} 564}
664 565
665#define SAVED_CMDLINES 128 566#define SAVED_CMDLINES 128
@@ -745,82 +646,16 @@ void tracing_record_cmdline(struct task_struct *tsk)
745 trace_save_cmdline(tsk); 646 trace_save_cmdline(tsk);
746} 647}
747 648
748static inline struct list_head * 649void
749trace_next_list(struct trace_array_cpu *data, struct list_head *next) 650tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
750{ 651 int pc)
751 /*
752 * Roundrobin - but skip the head (which is not a real page):
753 */
754 next = next->next;
755 if (unlikely(next == &data->trace_pages))
756 next = next->next;
757 BUG_ON(next == &data->trace_pages);
758
759 return next;
760}
761
762static inline void *
763trace_next_page(struct trace_array_cpu *data, void *addr)
764{
765 struct list_head *next;
766 struct page *page;
767
768 page = virt_to_page(addr);
769
770 next = trace_next_list(data, &page->lru);
771 page = list_entry(next, struct page, lru);
772
773 return page_address(page);
774}
775
776static inline struct trace_entry *
777tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
778{
779 unsigned long idx, idx_next;
780 struct trace_entry *entry;
781
782 data->trace_idx++;
783 idx = data->trace_head_idx;
784 idx_next = idx + 1;
785
786 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
787
788 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
789
790 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
791 data->trace_head = trace_next_page(data, data->trace_head);
792 idx_next = 0;
793 }
794
795 if (data->trace_head == data->trace_tail &&
796 idx_next == data->trace_tail_idx) {
797 /* overrun */
798 data->overrun++;
799 data->trace_tail_idx++;
800 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
801 data->trace_tail =
802 trace_next_page(data, data->trace_tail);
803 data->trace_tail_idx = 0;
804 }
805 }
806
807 data->trace_head_idx = idx_next;
808
809 return entry;
810}
811
812static inline void
813tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
814{ 652{
815 struct task_struct *tsk = current; 653 struct task_struct *tsk = current;
816 unsigned long pc;
817
818 pc = preempt_count();
819 654
820 entry->preempt_count = pc & 0xff; 655 entry->preempt_count = pc & 0xff;
821 entry->pid = (tsk) ? tsk->pid : 0; 656 entry->pid = (tsk) ? tsk->pid : 0;
822 entry->t = ftrace_now(raw_smp_processor_id()); 657 entry->flags =
823 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | 658 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
824 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | 659 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
825 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 660 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
826 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 661 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -828,145 +663,139 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
828 663
829void 664void
830trace_function(struct trace_array *tr, struct trace_array_cpu *data, 665trace_function(struct trace_array *tr, struct trace_array_cpu *data,
831 unsigned long ip, unsigned long parent_ip, unsigned long flags) 666 unsigned long ip, unsigned long parent_ip, unsigned long flags,
667 int pc)
832{ 668{
833 struct trace_entry *entry; 669 struct ring_buffer_event *event;
670 struct ftrace_entry *entry;
834 unsigned long irq_flags; 671 unsigned long irq_flags;
835 672
836 raw_local_irq_save(irq_flags); 673 /* If we are reading the ring buffer, don't trace */
837 __raw_spin_lock(&data->lock); 674 if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
838 entry = tracing_get_trace_entry(tr, data); 675 return;
839 tracing_generic_entry_update(entry, flags); 676
840 entry->type = TRACE_FN; 677 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
841 entry->fn.ip = ip; 678 &irq_flags);
842 entry->fn.parent_ip = parent_ip; 679 if (!event)
843 __raw_spin_unlock(&data->lock); 680 return;
844 raw_local_irq_restore(irq_flags); 681 entry = ring_buffer_event_data(event);
682 tracing_generic_entry_update(&entry->ent, flags, pc);
683 entry->ent.type = TRACE_FN;
684 entry->ip = ip;
685 entry->parent_ip = parent_ip;
686 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
845} 687}
846 688
847void 689void
848ftrace(struct trace_array *tr, struct trace_array_cpu *data, 690ftrace(struct trace_array *tr, struct trace_array_cpu *data,
849 unsigned long ip, unsigned long parent_ip, unsigned long flags) 691 unsigned long ip, unsigned long parent_ip, unsigned long flags,
692 int pc)
850{ 693{
851 if (likely(!atomic_read(&data->disabled))) 694 if (likely(!atomic_read(&data->disabled)))
852 trace_function(tr, data, ip, parent_ip, flags); 695 trace_function(tr, data, ip, parent_ip, flags, pc);
853} 696}
854 697
855#ifdef CONFIG_MMIOTRACE 698static void ftrace_trace_stack(struct trace_array *tr,
856void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, 699 struct trace_array_cpu *data,
857 struct mmiotrace_rw *rw) 700 unsigned long flags,
701 int skip, int pc)
858{ 702{
859 struct trace_entry *entry; 703 struct ring_buffer_event *event;
704 struct stack_entry *entry;
705 struct stack_trace trace;
860 unsigned long irq_flags; 706 unsigned long irq_flags;
861 707
862 raw_local_irq_save(irq_flags); 708 if (!(trace_flags & TRACE_ITER_STACKTRACE))
863 __raw_spin_lock(&data->lock); 709 return;
864
865 entry = tracing_get_trace_entry(tr, data);
866 tracing_generic_entry_update(entry, 0);
867 entry->type = TRACE_MMIO_RW;
868 entry->mmiorw = *rw;
869
870 __raw_spin_unlock(&data->lock);
871 raw_local_irq_restore(irq_flags);
872
873 trace_wake_up();
874}
875
876void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
877 struct mmiotrace_map *map)
878{
879 struct trace_entry *entry;
880 unsigned long irq_flags;
881 710
882 raw_local_irq_save(irq_flags); 711 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
883 __raw_spin_lock(&data->lock); 712 &irq_flags);
713 if (!event)
714 return;
715 entry = ring_buffer_event_data(event);
716 tracing_generic_entry_update(&entry->ent, flags, pc);
717 entry->ent.type = TRACE_STACK;
884 718
885 entry = tracing_get_trace_entry(tr, data); 719 memset(&entry->caller, 0, sizeof(entry->caller));
886 tracing_generic_entry_update(entry, 0);
887 entry->type = TRACE_MMIO_MAP;
888 entry->mmiomap = *map;
889 720
890 __raw_spin_unlock(&data->lock); 721 trace.nr_entries = 0;
891 raw_local_irq_restore(irq_flags); 722 trace.max_entries = FTRACE_STACK_ENTRIES;
723 trace.skip = skip;
724 trace.entries = entry->caller;
892 725
893 trace_wake_up(); 726 save_stack_trace(&trace);
727 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
894} 728}
895#endif
896 729
897void __trace_stack(struct trace_array *tr, 730void __trace_stack(struct trace_array *tr,
898 struct trace_array_cpu *data, 731 struct trace_array_cpu *data,
899 unsigned long flags, 732 unsigned long flags,
900 int skip) 733 int skip)
901{ 734{
902 struct trace_entry *entry; 735 ftrace_trace_stack(tr, data, flags, skip, preempt_count());
903 struct stack_trace trace;
904
905 if (!(trace_flags & TRACE_ITER_STACKTRACE))
906 return;
907
908 entry = tracing_get_trace_entry(tr, data);
909 tracing_generic_entry_update(entry, flags);
910 entry->type = TRACE_STACK;
911
912 memset(&entry->stack, 0, sizeof(entry->stack));
913
914 trace.nr_entries = 0;
915 trace.max_entries = FTRACE_STACK_ENTRIES;
916 trace.skip = skip;
917 trace.entries = entry->stack.caller;
918
919 save_stack_trace(&trace);
920} 736}
921 737
922void 738static void
923__trace_special(void *__tr, void *__data, 739ftrace_trace_special(void *__tr, void *__data,
924 unsigned long arg1, unsigned long arg2, unsigned long arg3) 740 unsigned long arg1, unsigned long arg2, unsigned long arg3,
741 int pc)
925{ 742{
743 struct ring_buffer_event *event;
926 struct trace_array_cpu *data = __data; 744 struct trace_array_cpu *data = __data;
927 struct trace_array *tr = __tr; 745 struct trace_array *tr = __tr;
928 struct trace_entry *entry; 746 struct special_entry *entry;
929 unsigned long irq_flags; 747 unsigned long irq_flags;
930 748
931 raw_local_irq_save(irq_flags); 749 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
932 __raw_spin_lock(&data->lock); 750 &irq_flags);
933 entry = tracing_get_trace_entry(tr, data); 751 if (!event)
934 tracing_generic_entry_update(entry, 0); 752 return;
935 entry->type = TRACE_SPECIAL; 753 entry = ring_buffer_event_data(event);
936 entry->special.arg1 = arg1; 754 tracing_generic_entry_update(&entry->ent, 0, pc);
937 entry->special.arg2 = arg2; 755 entry->ent.type = TRACE_SPECIAL;
938 entry->special.arg3 = arg3; 756 entry->arg1 = arg1;
939 __trace_stack(tr, data, irq_flags, 4); 757 entry->arg2 = arg2;
940 __raw_spin_unlock(&data->lock); 758 entry->arg3 = arg3;
941 raw_local_irq_restore(irq_flags); 759 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
760 ftrace_trace_stack(tr, data, irq_flags, 4, pc);
942 761
943 trace_wake_up(); 762 trace_wake_up();
944} 763}
945 764
946void 765void
766__trace_special(void *__tr, void *__data,
767 unsigned long arg1, unsigned long arg2, unsigned long arg3)
768{
769 ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
770}
771
772void
947tracing_sched_switch_trace(struct trace_array *tr, 773tracing_sched_switch_trace(struct trace_array *tr,
948 struct trace_array_cpu *data, 774 struct trace_array_cpu *data,
949 struct task_struct *prev, 775 struct task_struct *prev,
950 struct task_struct *next, 776 struct task_struct *next,
951 unsigned long flags) 777 unsigned long flags, int pc)
952{ 778{
953 struct trace_entry *entry; 779 struct ring_buffer_event *event;
780 struct ctx_switch_entry *entry;
954 unsigned long irq_flags; 781 unsigned long irq_flags;
955 782
956 raw_local_irq_save(irq_flags); 783 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
957 __raw_spin_lock(&data->lock); 784 &irq_flags);
958 entry = tracing_get_trace_entry(tr, data); 785 if (!event)
959 tracing_generic_entry_update(entry, flags); 786 return;
960 entry->type = TRACE_CTX; 787 entry = ring_buffer_event_data(event);
961 entry->ctx.prev_pid = prev->pid; 788 tracing_generic_entry_update(&entry->ent, flags, pc);
962 entry->ctx.prev_prio = prev->prio; 789 entry->ent.type = TRACE_CTX;
963 entry->ctx.prev_state = prev->state; 790 entry->prev_pid = prev->pid;
964 entry->ctx.next_pid = next->pid; 791 entry->prev_prio = prev->prio;
965 entry->ctx.next_prio = next->prio; 792 entry->prev_state = prev->state;
966 entry->ctx.next_state = next->state; 793 entry->next_pid = next->pid;
967 __trace_stack(tr, data, flags, 5); 794 entry->next_prio = next->prio;
968 __raw_spin_unlock(&data->lock); 795 entry->next_state = next->state;
969 raw_local_irq_restore(irq_flags); 796 entry->next_cpu = task_cpu(next);
797 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
798 ftrace_trace_stack(tr, data, flags, 5, pc);
970} 799}
971 800
972void 801void
@@ -974,25 +803,28 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
974 struct trace_array_cpu *data, 803 struct trace_array_cpu *data,
975 struct task_struct *wakee, 804 struct task_struct *wakee,
976 struct task_struct *curr, 805 struct task_struct *curr,
977 unsigned long flags) 806 unsigned long flags, int pc)
978{ 807{
979 struct trace_entry *entry; 808 struct ring_buffer_event *event;
809 struct ctx_switch_entry *entry;
980 unsigned long irq_flags; 810 unsigned long irq_flags;
981 811
982 raw_local_irq_save(irq_flags); 812 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
983 __raw_spin_lock(&data->lock); 813 &irq_flags);
984 entry = tracing_get_trace_entry(tr, data); 814 if (!event)
985 tracing_generic_entry_update(entry, flags); 815 return;
986 entry->type = TRACE_WAKE; 816 entry = ring_buffer_event_data(event);
987 entry->ctx.prev_pid = curr->pid; 817 tracing_generic_entry_update(&entry->ent, flags, pc);
988 entry->ctx.prev_prio = curr->prio; 818 entry->ent.type = TRACE_WAKE;
989 entry->ctx.prev_state = curr->state; 819 entry->prev_pid = curr->pid;
990 entry->ctx.next_pid = wakee->pid; 820 entry->prev_prio = curr->prio;
991 entry->ctx.next_prio = wakee->prio; 821 entry->prev_state = curr->state;
992 entry->ctx.next_state = wakee->state; 822 entry->next_pid = wakee->pid;
993 __trace_stack(tr, data, flags, 6); 823 entry->next_prio = wakee->prio;
994 __raw_spin_unlock(&data->lock); 824 entry->next_state = wakee->state;
995 raw_local_irq_restore(irq_flags); 825 entry->next_cpu = task_cpu(wakee);
826 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
827 ftrace_trace_stack(tr, data, flags, 6, pc);
996 828
997 trace_wake_up(); 829 trace_wake_up();
998} 830}
@@ -1002,23 +834,21 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1002{ 834{
1003 struct trace_array *tr = &global_trace; 835 struct trace_array *tr = &global_trace;
1004 struct trace_array_cpu *data; 836 struct trace_array_cpu *data;
1005 unsigned long flags;
1006 long disabled;
1007 int cpu; 837 int cpu;
838 int pc;
1008 839
1009 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl) 840 if (tracing_disabled || !tr->ctrl)
1010 return; 841 return;
1011 842
1012 local_irq_save(flags); 843 pc = preempt_count();
844 preempt_disable_notrace();
1013 cpu = raw_smp_processor_id(); 845 cpu = raw_smp_processor_id();
1014 data = tr->data[cpu]; 846 data = tr->data[cpu];
1015 disabled = atomic_inc_return(&data->disabled);
1016 847
1017 if (likely(disabled == 1)) 848 if (likely(!atomic_read(&data->disabled)))
1018 __trace_special(tr, data, arg1, arg2, arg3); 849 ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
1019 850
1020 atomic_dec(&data->disabled); 851 preempt_enable_notrace();
1021 local_irq_restore(flags);
1022} 852}
1023 853
1024#ifdef CONFIG_FTRACE 854#ifdef CONFIG_FTRACE
@@ -1029,7 +859,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1029 struct trace_array_cpu *data; 859 struct trace_array_cpu *data;
1030 unsigned long flags; 860 unsigned long flags;
1031 long disabled; 861 long disabled;
1032 int cpu; 862 int cpu, resched;
863 int pc;
1033 864
1034 if (unlikely(!ftrace_function_enabled)) 865 if (unlikely(!ftrace_function_enabled))
1035 return; 866 return;
@@ -1037,16 +868,22 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
1037 if (skip_trace(ip)) 868 if (skip_trace(ip))
1038 return; 869 return;
1039 870
1040 local_irq_save(flags); 871 pc = preempt_count();
872 resched = need_resched();
873 preempt_disable_notrace();
874 local_save_flags(flags);
1041 cpu = raw_smp_processor_id(); 875 cpu = raw_smp_processor_id();
1042 data = tr->data[cpu]; 876 data = tr->data[cpu];
1043 disabled = atomic_inc_return(&data->disabled); 877 disabled = atomic_inc_return(&data->disabled);
1044 878
1045 if (likely(disabled == 1)) 879 if (likely(disabled == 1))
1046 trace_function(tr, data, ip, parent_ip, flags); 880 trace_function(tr, data, ip, parent_ip, flags, pc);
1047 881
1048 atomic_dec(&data->disabled); 882 atomic_dec(&data->disabled);
1049 local_irq_restore(flags); 883 if (resched)
884 preempt_enable_no_resched_notrace();
885 else
886 preempt_enable_notrace();
1050} 887}
1051 888
1052static struct ftrace_ops trace_ops __read_mostly = 889static struct ftrace_ops trace_ops __read_mostly =
@@ -1073,117 +910,101 @@ enum trace_file_type {
1073 TRACE_FILE_LAT_FMT = 1, 910 TRACE_FILE_LAT_FMT = 1,
1074}; 911};
1075 912
1076static struct trace_entry * 913static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1077trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1078 struct trace_iterator *iter, int cpu)
1079{ 914{
1080 struct page *page; 915 /* Don't allow ftrace to trace into the ring buffers */
1081 struct trace_entry *array; 916 ftrace_disable_cpu();
1082 917
1083 if (iter->next_idx[cpu] >= tr->entries || 918 iter->idx++;
1084 iter->next_idx[cpu] >= data->trace_idx || 919 if (iter->buffer_iter[iter->cpu])
1085 (data->trace_head == data->trace_tail && 920 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1086 data->trace_head_idx == data->trace_tail_idx))
1087 return NULL;
1088 921
1089 if (!iter->next_page[cpu]) { 922 ftrace_enable_cpu();
1090 /* Initialize the iterator for this cpu trace buffer */ 923}
1091 WARN_ON(!data->trace_tail); 924
1092 page = virt_to_page(data->trace_tail); 925static struct trace_entry *
1093 iter->next_page[cpu] = &page->lru; 926peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1094 iter->next_page_idx[cpu] = data->trace_tail_idx; 927{
1095 } 928 struct ring_buffer_event *event;
929 struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1096 930
1097 page = list_entry(iter->next_page[cpu], struct page, lru); 931 /* Don't allow ftrace to trace into the ring buffers */
1098 BUG_ON(&data->trace_pages == &page->lru); 932 ftrace_disable_cpu();
933
934 if (buf_iter)
935 event = ring_buffer_iter_peek(buf_iter, ts);
936 else
937 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1099 938
1100 array = page_address(page); 939 ftrace_enable_cpu();
1101 940
1102 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE); 941 return event ? ring_buffer_event_data(event) : NULL;
1103 return &array[iter->next_page_idx[cpu]];
1104} 942}
1105 943
1106static struct trace_entry * 944static struct trace_entry *
1107find_next_entry(struct trace_iterator *iter, int *ent_cpu) 945__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1108{ 946{
1109 struct trace_array *tr = iter->tr; 947 struct ring_buffer *buffer = iter->tr->buffer;
1110 struct trace_entry *ent, *next = NULL; 948 struct trace_entry *ent, *next = NULL;
949 u64 next_ts = 0, ts;
1111 int next_cpu = -1; 950 int next_cpu = -1;
1112 int cpu; 951 int cpu;
1113 952
1114 for_each_tracing_cpu(cpu) { 953 for_each_tracing_cpu(cpu) {
1115 if (!head_page(tr->data[cpu])) 954
955 if (ring_buffer_empty_cpu(buffer, cpu))
1116 continue; 956 continue;
1117 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu); 957
958 ent = peek_next_entry(iter, cpu, &ts);
959
1118 /* 960 /*
1119 * Pick the entry with the smallest timestamp: 961 * Pick the entry with the smallest timestamp:
1120 */ 962 */
1121 if (ent && (!next || ent->t < next->t)) { 963 if (ent && (!next || ts < next_ts)) {
1122 next = ent; 964 next = ent;
1123 next_cpu = cpu; 965 next_cpu = cpu;
966 next_ts = ts;
1124 } 967 }
1125 } 968 }
1126 969
1127 if (ent_cpu) 970 if (ent_cpu)
1128 *ent_cpu = next_cpu; 971 *ent_cpu = next_cpu;
1129 972
973 if (ent_ts)
974 *ent_ts = next_ts;
975
1130 return next; 976 return next;
1131} 977}
1132 978
1133static void trace_iterator_increment(struct trace_iterator *iter) 979/* Find the next real entry, without updating the iterator itself */
980static struct trace_entry *
981find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1134{ 982{
1135 iter->idx++; 983 return __find_next_entry(iter, ent_cpu, ent_ts);
1136 iter->next_idx[iter->cpu]++;
1137 iter->next_page_idx[iter->cpu]++;
1138
1139 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1140 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1141
1142 iter->next_page_idx[iter->cpu] = 0;
1143 iter->next_page[iter->cpu] =
1144 trace_next_list(data, iter->next_page[iter->cpu]);
1145 }
1146} 984}
1147 985
1148static void trace_consume(struct trace_iterator *iter) 986/* Find the next real entry, and increment the iterator to the next entry */
987static void *find_next_entry_inc(struct trace_iterator *iter)
1149{ 988{
1150 struct trace_array_cpu *data = iter->tr->data[iter->cpu]; 989 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1151 990
1152 data->trace_tail_idx++; 991 if (iter->ent)
1153 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) { 992 trace_iterator_increment(iter, iter->cpu);
1154 data->trace_tail = trace_next_page(data, data->trace_tail);
1155 data->trace_tail_idx = 0;
1156 }
1157 993
1158 /* Check if we empty it, then reset the index */ 994 return iter->ent ? iter : NULL;
1159 if (data->trace_head == data->trace_tail &&
1160 data->trace_head_idx == data->trace_tail_idx)
1161 data->trace_idx = 0;
1162} 995}
1163 996
1164static void *find_next_entry_inc(struct trace_iterator *iter) 997static void trace_consume(struct trace_iterator *iter)
1165{ 998{
1166 struct trace_entry *next; 999 /* Don't allow ftrace to trace into the ring buffers */
1167 int next_cpu = -1; 1000 ftrace_disable_cpu();
1168 1001 ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1169 next = find_next_entry(iter, &next_cpu); 1002 ftrace_enable_cpu();
1170
1171 iter->prev_ent = iter->ent;
1172 iter->prev_cpu = iter->cpu;
1173
1174 iter->ent = next;
1175 iter->cpu = next_cpu;
1176
1177 if (next)
1178 trace_iterator_increment(iter);
1179
1180 return next ? iter : NULL;
1181} 1003}
1182 1004
1183static void *s_next(struct seq_file *m, void *v, loff_t *pos) 1005static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1184{ 1006{
1185 struct trace_iterator *iter = m->private; 1007 struct trace_iterator *iter = m->private;
1186 void *last_ent = iter->ent;
1187 int i = (int)*pos; 1008 int i = (int)*pos;
1188 void *ent; 1009 void *ent;
1189 1010
@@ -1203,9 +1024,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1203 1024
1204 iter->pos = *pos; 1025 iter->pos = *pos;
1205 1026
1206 if (last_ent && !ent)
1207 seq_puts(m, "\n\nvim:ft=help\n");
1208
1209 return ent; 1027 return ent;
1210} 1028}
1211 1029
@@ -1214,7 +1032,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1214 struct trace_iterator *iter = m->private; 1032 struct trace_iterator *iter = m->private;
1215 void *p = NULL; 1033 void *p = NULL;
1216 loff_t l = 0; 1034 loff_t l = 0;
1217 int i; 1035 int cpu;
1218 1036
1219 mutex_lock(&trace_types_lock); 1037 mutex_lock(&trace_types_lock);
1220 1038
@@ -1233,14 +1051,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1233 iter->ent = NULL; 1051 iter->ent = NULL;
1234 iter->cpu = 0; 1052 iter->cpu = 0;
1235 iter->idx = -1; 1053 iter->idx = -1;
1236 iter->prev_ent = NULL;
1237 iter->prev_cpu = -1;
1238 1054
1239 for_each_tracing_cpu(i) { 1055 ftrace_disable_cpu();
1240 iter->next_idx[i] = 0; 1056
1241 iter->next_page[i] = NULL; 1057 for_each_tracing_cpu(cpu) {
1058 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1242 } 1059 }
1243 1060
1061 ftrace_enable_cpu();
1062
1244 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1063 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1245 ; 1064 ;
1246 1065
@@ -1334,21 +1153,21 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1334 1153
1335static void print_lat_help_header(struct seq_file *m) 1154static void print_lat_help_header(struct seq_file *m)
1336{ 1155{
1337 seq_puts(m, "# _------=> CPU# \n"); 1156 seq_puts(m, "# _------=> CPU# \n");
1338 seq_puts(m, "# / _-----=> irqs-off \n"); 1157 seq_puts(m, "# / _-----=> irqs-off \n");
1339 seq_puts(m, "# | / _----=> need-resched \n"); 1158 seq_puts(m, "# | / _----=> need-resched \n");
1340 seq_puts(m, "# || / _---=> hardirq/softirq \n"); 1159 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1341 seq_puts(m, "# ||| / _--=> preempt-depth \n"); 1160 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1342 seq_puts(m, "# |||| / \n"); 1161 seq_puts(m, "# |||| / \n");
1343 seq_puts(m, "# ||||| delay \n"); 1162 seq_puts(m, "# ||||| delay \n");
1344 seq_puts(m, "# cmd pid ||||| time | caller \n"); 1163 seq_puts(m, "# cmd pid ||||| time | caller \n");
1345 seq_puts(m, "# \\ / ||||| \\ | / \n"); 1164 seq_puts(m, "# \\ / ||||| \\ | / \n");
1346} 1165}
1347 1166
1348static void print_func_help_header(struct seq_file *m) 1167static void print_func_help_header(struct seq_file *m)
1349{ 1168{
1350 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); 1169 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1351 seq_puts(m, "# | | | | |\n"); 1170 seq_puts(m, "# | | | | |\n");
1352} 1171}
1353 1172
1354 1173
@@ -1359,23 +1178,16 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1359 struct trace_array *tr = iter->tr; 1178 struct trace_array *tr = iter->tr;
1360 struct trace_array_cpu *data = tr->data[tr->cpu]; 1179 struct trace_array_cpu *data = tr->data[tr->cpu];
1361 struct tracer *type = current_trace; 1180 struct tracer *type = current_trace;
1362 unsigned long total = 0; 1181 unsigned long total;
1363 unsigned long entries = 0; 1182 unsigned long entries;
1364 int cpu;
1365 const char *name = "preemption"; 1183 const char *name = "preemption";
1366 1184
1367 if (type) 1185 if (type)
1368 name = type->name; 1186 name = type->name;
1369 1187
1370 for_each_tracing_cpu(cpu) { 1188 entries = ring_buffer_entries(iter->tr->buffer);
1371 if (head_page(tr->data[cpu])) { 1189 total = entries +
1372 total += tr->data[cpu]->trace_idx; 1190 ring_buffer_overruns(iter->tr->buffer);
1373 if (tr->data[cpu]->trace_idx > tr->entries)
1374 entries += tr->entries;
1375 else
1376 entries += tr->data[cpu]->trace_idx;
1377 }
1378 }
1379 1191
1380 seq_printf(m, "%s latency trace v1.1.5 on %s\n", 1192 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1381 name, UTS_RELEASE); 1193 name, UTS_RELEASE);
@@ -1432,7 +1244,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1432 comm = trace_find_cmdline(entry->pid); 1244 comm = trace_find_cmdline(entry->pid);
1433 1245
1434 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid); 1246 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1435 trace_seq_printf(s, "%d", cpu); 1247 trace_seq_printf(s, "%3d", cpu);
1436 trace_seq_printf(s, "%c%c", 1248 trace_seq_printf(s, "%c%c",
1437 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', 1249 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1438 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); 1250 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
@@ -1461,7 +1273,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1461unsigned long preempt_mark_thresh = 100; 1273unsigned long preempt_mark_thresh = 100;
1462 1274
1463static void 1275static void
1464lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs, 1276lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
1465 unsigned long rel_usecs) 1277 unsigned long rel_usecs)
1466{ 1278{
1467 trace_seq_printf(s, " %4lldus", abs_usecs); 1279 trace_seq_printf(s, " %4lldus", abs_usecs);
@@ -1475,34 +1287,76 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1475 1287
1476static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 1288static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1477 1289
1478static int 1290/*
1291 * The message is supposed to contain an ending newline.
1292 * If the printing stops prematurely, try to add a newline of our own.
1293 */
1294void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1295{
1296 struct trace_entry *ent;
1297 struct trace_field_cont *cont;
1298 bool ok = true;
1299
1300 ent = peek_next_entry(iter, iter->cpu, NULL);
1301 if (!ent || ent->type != TRACE_CONT) {
1302 trace_seq_putc(s, '\n');
1303 return;
1304 }
1305
1306 do {
1307 cont = (struct trace_field_cont *)ent;
1308 if (ok)
1309 ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
1310
1311 ftrace_disable_cpu();
1312
1313 if (iter->buffer_iter[iter->cpu])
1314 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1315 else
1316 ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
1317
1318 ftrace_enable_cpu();
1319
1320 ent = peek_next_entry(iter, iter->cpu, NULL);
1321 } while (ent && ent->type == TRACE_CONT);
1322
1323 if (!ok)
1324 trace_seq_putc(s, '\n');
1325}
1326
1327static enum print_line_t
1479print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) 1328print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1480{ 1329{
1481 struct trace_seq *s = &iter->seq; 1330 struct trace_seq *s = &iter->seq;
1482 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1331 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1483 struct trace_entry *next_entry = find_next_entry(iter, NULL); 1332 struct trace_entry *next_entry;
1484 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE); 1333 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1485 struct trace_entry *entry = iter->ent; 1334 struct trace_entry *entry = iter->ent;
1486 unsigned long abs_usecs; 1335 unsigned long abs_usecs;
1487 unsigned long rel_usecs; 1336 unsigned long rel_usecs;
1337 u64 next_ts;
1488 char *comm; 1338 char *comm;
1489 int S, T; 1339 int S, T;
1490 int i; 1340 int i;
1491 unsigned state; 1341 unsigned state;
1492 1342
1343 if (entry->type == TRACE_CONT)
1344 return TRACE_TYPE_HANDLED;
1345
1346 next_entry = find_next_entry(iter, NULL, &next_ts);
1493 if (!next_entry) 1347 if (!next_entry)
1494 next_entry = entry; 1348 next_ts = iter->ts;
1495 rel_usecs = ns2usecs(next_entry->t - entry->t); 1349 rel_usecs = ns2usecs(next_ts - iter->ts);
1496 abs_usecs = ns2usecs(entry->t - iter->tr->time_start); 1350 abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
1497 1351
1498 if (verbose) { 1352 if (verbose) {
1499 comm = trace_find_cmdline(entry->pid); 1353 comm = trace_find_cmdline(entry->pid);
1500 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]" 1354 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1501 " %ld.%03ldms (+%ld.%03ldms): ", 1355 " %ld.%03ldms (+%ld.%03ldms): ",
1502 comm, 1356 comm,
1503 entry->pid, cpu, entry->flags, 1357 entry->pid, cpu, entry->flags,
1504 entry->preempt_count, trace_idx, 1358 entry->preempt_count, trace_idx,
1505 ns2usecs(entry->t), 1359 ns2usecs(iter->ts),
1506 abs_usecs/1000, 1360 abs_usecs/1000,
1507 abs_usecs % 1000, rel_usecs/1000, 1361 abs_usecs % 1000, rel_usecs/1000,
1508 rel_usecs % 1000); 1362 rel_usecs % 1000);
@@ -1511,52 +1365,85 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1511 lat_print_timestamp(s, abs_usecs, rel_usecs); 1365 lat_print_timestamp(s, abs_usecs, rel_usecs);
1512 } 1366 }
1513 switch (entry->type) { 1367 switch (entry->type) {
1514 case TRACE_FN: 1368 case TRACE_FN: {
1515 seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1369 struct ftrace_entry *field;
1370
1371 trace_assign_type(field, entry);
1372
1373 seq_print_ip_sym(s, field->ip, sym_flags);
1516 trace_seq_puts(s, " ("); 1374 trace_seq_puts(s, " (");
1517 if (kretprobed(entry->fn.parent_ip)) 1375 if (kretprobed(field->parent_ip))
1518 trace_seq_puts(s, KRETPROBE_MSG); 1376 trace_seq_puts(s, KRETPROBE_MSG);
1519 else 1377 else
1520 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags); 1378 seq_print_ip_sym(s, field->parent_ip, sym_flags);
1521 trace_seq_puts(s, ")\n"); 1379 trace_seq_puts(s, ")\n");
1522 break; 1380 break;
1381 }
1523 case TRACE_CTX: 1382 case TRACE_CTX:
1524 case TRACE_WAKE: 1383 case TRACE_WAKE: {
1525 T = entry->ctx.next_state < sizeof(state_to_char) ? 1384 struct ctx_switch_entry *field;
1526 state_to_char[entry->ctx.next_state] : 'X'; 1385
1386 trace_assign_type(field, entry);
1527 1387
1528 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0; 1388 T = field->next_state < sizeof(state_to_char) ?
1389 state_to_char[field->next_state] : 'X';
1390
1391 state = field->prev_state ?
1392 __ffs(field->prev_state) + 1 : 0;
1529 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X'; 1393 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1530 comm = trace_find_cmdline(entry->ctx.next_pid); 1394 comm = trace_find_cmdline(field->next_pid);
1531 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n", 1395 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1532 entry->ctx.prev_pid, 1396 field->prev_pid,
1533 entry->ctx.prev_prio, 1397 field->prev_prio,
1534 S, entry->type == TRACE_CTX ? "==>" : " +", 1398 S, entry->type == TRACE_CTX ? "==>" : " +",
1535 entry->ctx.next_pid, 1399 field->next_cpu,
1536 entry->ctx.next_prio, 1400 field->next_pid,
1401 field->next_prio,
1537 T, comm); 1402 T, comm);
1538 break; 1403 break;
1539 case TRACE_SPECIAL: 1404 }
1405 case TRACE_SPECIAL: {
1406 struct special_entry *field;
1407
1408 trace_assign_type(field, entry);
1409
1540 trace_seq_printf(s, "# %ld %ld %ld\n", 1410 trace_seq_printf(s, "# %ld %ld %ld\n",
1541 entry->special.arg1, 1411 field->arg1,
1542 entry->special.arg2, 1412 field->arg2,
1543 entry->special.arg3); 1413 field->arg3);
1544 break; 1414 break;
1545 case TRACE_STACK: 1415 }
1416 case TRACE_STACK: {
1417 struct stack_entry *field;
1418
1419 trace_assign_type(field, entry);
1420
1546 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1421 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1547 if (i) 1422 if (i)
1548 trace_seq_puts(s, " <= "); 1423 trace_seq_puts(s, " <= ");
1549 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags); 1424 seq_print_ip_sym(s, field->caller[i], sym_flags);
1550 } 1425 }
1551 trace_seq_puts(s, "\n"); 1426 trace_seq_puts(s, "\n");
1552 break; 1427 break;
1428 }
1429 case TRACE_PRINT: {
1430 struct print_entry *field;
1431
1432 trace_assign_type(field, entry);
1433
1434 seq_print_ip_sym(s, field->ip, sym_flags);
1435 trace_seq_printf(s, ": %s", field->buf);
1436 if (entry->flags & TRACE_FLAG_CONT)
1437 trace_seq_print_cont(s, iter);
1438 break;
1439 }
1553 default: 1440 default:
1554 trace_seq_printf(s, "Unknown type %d\n", entry->type); 1441 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1555 } 1442 }
1556 return 1; 1443 return TRACE_TYPE_HANDLED;
1557} 1444}
1558 1445
1559static int print_trace_fmt(struct trace_iterator *iter) 1446static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1560{ 1447{
1561 struct trace_seq *s = &iter->seq; 1448 struct trace_seq *s = &iter->seq;
1562 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK); 1449 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1571,90 +1458,126 @@ static int print_trace_fmt(struct trace_iterator *iter)
1571 1458
1572 entry = iter->ent; 1459 entry = iter->ent;
1573 1460
1461 if (entry->type == TRACE_CONT)
1462 return TRACE_TYPE_HANDLED;
1463
1574 comm = trace_find_cmdline(iter->ent->pid); 1464 comm = trace_find_cmdline(iter->ent->pid);
1575 1465
1576 t = ns2usecs(entry->t); 1466 t = ns2usecs(iter->ts);
1577 usec_rem = do_div(t, 1000000ULL); 1467 usec_rem = do_div(t, 1000000ULL);
1578 secs = (unsigned long)t; 1468 secs = (unsigned long)t;
1579 1469
1580 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); 1470 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1581 if (!ret) 1471 if (!ret)
1582 return 0; 1472 return TRACE_TYPE_PARTIAL_LINE;
1583 ret = trace_seq_printf(s, "[%02d] ", iter->cpu); 1473 ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1584 if (!ret) 1474 if (!ret)
1585 return 0; 1475 return TRACE_TYPE_PARTIAL_LINE;
1586 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem); 1476 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1587 if (!ret) 1477 if (!ret)
1588 return 0; 1478 return TRACE_TYPE_PARTIAL_LINE;
1589 1479
1590 switch (entry->type) { 1480 switch (entry->type) {
1591 case TRACE_FN: 1481 case TRACE_FN: {
1592 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags); 1482 struct ftrace_entry *field;
1483
1484 trace_assign_type(field, entry);
1485
1486 ret = seq_print_ip_sym(s, field->ip, sym_flags);
1593 if (!ret) 1487 if (!ret)
1594 return 0; 1488 return TRACE_TYPE_PARTIAL_LINE;
1595 if ((sym_flags & TRACE_ITER_PRINT_PARENT) && 1489 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1596 entry->fn.parent_ip) { 1490 field->parent_ip) {
1597 ret = trace_seq_printf(s, " <-"); 1491 ret = trace_seq_printf(s, " <-");
1598 if (!ret) 1492 if (!ret)
1599 return 0; 1493 return TRACE_TYPE_PARTIAL_LINE;
1600 if (kretprobed(entry->fn.parent_ip)) 1494 if (kretprobed(field->parent_ip))
1601 ret = trace_seq_puts(s, KRETPROBE_MSG); 1495 ret = trace_seq_puts(s, KRETPROBE_MSG);
1602 else 1496 else
1603 ret = seq_print_ip_sym(s, entry->fn.parent_ip, 1497 ret = seq_print_ip_sym(s,
1498 field->parent_ip,
1604 sym_flags); 1499 sym_flags);
1605 if (!ret) 1500 if (!ret)
1606 return 0; 1501 return TRACE_TYPE_PARTIAL_LINE;
1607 } 1502 }
1608 ret = trace_seq_printf(s, "\n"); 1503 ret = trace_seq_printf(s, "\n");
1609 if (!ret) 1504 if (!ret)
1610 return 0; 1505 return TRACE_TYPE_PARTIAL_LINE;
1611 break; 1506 break;
1507 }
1612 case TRACE_CTX: 1508 case TRACE_CTX:
1613 case TRACE_WAKE: 1509 case TRACE_WAKE: {
1614 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1510 struct ctx_switch_entry *field;
1615 state_to_char[entry->ctx.prev_state] : 'X'; 1511
1616 T = entry->ctx.next_state < sizeof(state_to_char) ? 1512 trace_assign_type(field, entry);
1617 state_to_char[entry->ctx.next_state] : 'X'; 1513
1618 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n", 1514 S = field->prev_state < sizeof(state_to_char) ?
1619 entry->ctx.prev_pid, 1515 state_to_char[field->prev_state] : 'X';
1620 entry->ctx.prev_prio, 1516 T = field->next_state < sizeof(state_to_char) ?
1517 state_to_char[field->next_state] : 'X';
1518 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1519 field->prev_pid,
1520 field->prev_prio,
1621 S, 1521 S,
1622 entry->type == TRACE_CTX ? "==>" : " +", 1522 entry->type == TRACE_CTX ? "==>" : " +",
1623 entry->ctx.next_pid, 1523 field->next_cpu,
1624 entry->ctx.next_prio, 1524 field->next_pid,
1525 field->next_prio,
1625 T); 1526 T);
1626 if (!ret) 1527 if (!ret)
1627 return 0; 1528 return TRACE_TYPE_PARTIAL_LINE;
1628 break; 1529 break;
1629 case TRACE_SPECIAL: 1530 }
1531 case TRACE_SPECIAL: {
1532 struct special_entry *field;
1533
1534 trace_assign_type(field, entry);
1535
1630 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1536 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1631 entry->special.arg1, 1537 field->arg1,
1632 entry->special.arg2, 1538 field->arg2,
1633 entry->special.arg3); 1539 field->arg3);
1634 if (!ret) 1540 if (!ret)
1635 return 0; 1541 return TRACE_TYPE_PARTIAL_LINE;
1636 break; 1542 break;
1637 case TRACE_STACK: 1543 }
1544 case TRACE_STACK: {
1545 struct stack_entry *field;
1546
1547 trace_assign_type(field, entry);
1548
1638 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1549 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1639 if (i) { 1550 if (i) {
1640 ret = trace_seq_puts(s, " <= "); 1551 ret = trace_seq_puts(s, " <= ");
1641 if (!ret) 1552 if (!ret)
1642 return 0; 1553 return TRACE_TYPE_PARTIAL_LINE;
1643 } 1554 }
1644 ret = seq_print_ip_sym(s, entry->stack.caller[i], 1555 ret = seq_print_ip_sym(s, field->caller[i],
1645 sym_flags); 1556 sym_flags);
1646 if (!ret) 1557 if (!ret)
1647 return 0; 1558 return TRACE_TYPE_PARTIAL_LINE;
1648 } 1559 }
1649 ret = trace_seq_puts(s, "\n"); 1560 ret = trace_seq_puts(s, "\n");
1650 if (!ret) 1561 if (!ret)
1651 return 0; 1562 return TRACE_TYPE_PARTIAL_LINE;
1652 break; 1563 break;
1653 } 1564 }
1654 return 1; 1565 case TRACE_PRINT: {
1566 struct print_entry *field;
1567
1568 trace_assign_type(field, entry);
1569
1570 seq_print_ip_sym(s, field->ip, sym_flags);
1571 trace_seq_printf(s, ": %s", field->buf);
1572 if (entry->flags & TRACE_FLAG_CONT)
1573 trace_seq_print_cont(s, iter);
1574 break;
1575 }
1576 }
1577 return TRACE_TYPE_HANDLED;
1655} 1578}
1656 1579
1657static int print_raw_fmt(struct trace_iterator *iter) 1580static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1658{ 1581{
1659 struct trace_seq *s = &iter->seq; 1582 struct trace_seq *s = &iter->seq;
1660 struct trace_entry *entry; 1583 struct trace_entry *entry;
@@ -1663,47 +1586,77 @@ static int print_raw_fmt(struct trace_iterator *iter)
1663 1586
1664 entry = iter->ent; 1587 entry = iter->ent;
1665 1588
1589 if (entry->type == TRACE_CONT)
1590 return TRACE_TYPE_HANDLED;
1591
1666 ret = trace_seq_printf(s, "%d %d %llu ", 1592 ret = trace_seq_printf(s, "%d %d %llu ",
1667 entry->pid, iter->cpu, entry->t); 1593 entry->pid, iter->cpu, iter->ts);
1668 if (!ret) 1594 if (!ret)
1669 return 0; 1595 return TRACE_TYPE_PARTIAL_LINE;
1670 1596
1671 switch (entry->type) { 1597 switch (entry->type) {
1672 case TRACE_FN: 1598 case TRACE_FN: {
1599 struct ftrace_entry *field;
1600
1601 trace_assign_type(field, entry);
1602
1673 ret = trace_seq_printf(s, "%x %x\n", 1603 ret = trace_seq_printf(s, "%x %x\n",
1674 entry->fn.ip, entry->fn.parent_ip); 1604 field->ip,
1605 field->parent_ip);
1675 if (!ret) 1606 if (!ret)
1676 return 0; 1607 return TRACE_TYPE_PARTIAL_LINE;
1677 break; 1608 break;
1609 }
1678 case TRACE_CTX: 1610 case TRACE_CTX:
1679 case TRACE_WAKE: 1611 case TRACE_WAKE: {
1680 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1612 struct ctx_switch_entry *field;
1681 state_to_char[entry->ctx.prev_state] : 'X'; 1613
1682 T = entry->ctx.next_state < sizeof(state_to_char) ? 1614 trace_assign_type(field, entry);
1683 state_to_char[entry->ctx.next_state] : 'X'; 1615
1616 S = field->prev_state < sizeof(state_to_char) ?
1617 state_to_char[field->prev_state] : 'X';
1618 T = field->next_state < sizeof(state_to_char) ?
1619 state_to_char[field->next_state] : 'X';
1684 if (entry->type == TRACE_WAKE) 1620 if (entry->type == TRACE_WAKE)
1685 S = '+'; 1621 S = '+';
1686 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n", 1622 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1687 entry->ctx.prev_pid, 1623 field->prev_pid,
1688 entry->ctx.prev_prio, 1624 field->prev_prio,
1689 S, 1625 S,
1690 entry->ctx.next_pid, 1626 field->next_cpu,
1691 entry->ctx.next_prio, 1627 field->next_pid,
1628 field->next_prio,
1692 T); 1629 T);
1693 if (!ret) 1630 if (!ret)
1694 return 0; 1631 return TRACE_TYPE_PARTIAL_LINE;
1695 break; 1632 break;
1633 }
1696 case TRACE_SPECIAL: 1634 case TRACE_SPECIAL:
1697 case TRACE_STACK: 1635 case TRACE_STACK: {
1636 struct special_entry *field;
1637
1638 trace_assign_type(field, entry);
1639
1698 ret = trace_seq_printf(s, "# %ld %ld %ld\n", 1640 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1699 entry->special.arg1, 1641 field->arg1,
1700 entry->special.arg2, 1642 field->arg2,
1701 entry->special.arg3); 1643 field->arg3);
1702 if (!ret) 1644 if (!ret)
1703 return 0; 1645 return TRACE_TYPE_PARTIAL_LINE;
1704 break; 1646 break;
1705 } 1647 }
1706 return 1; 1648 case TRACE_PRINT: {
1649 struct print_entry *field;
1650
1651 trace_assign_type(field, entry);
1652
1653 trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
1654 if (entry->flags & TRACE_FLAG_CONT)
1655 trace_seq_print_cont(s, iter);
1656 break;
1657 }
1658 }
1659 return TRACE_TYPE_HANDLED;
1707} 1660}
1708 1661
1709#define SEQ_PUT_FIELD_RET(s, x) \ 1662#define SEQ_PUT_FIELD_RET(s, x) \
@@ -1714,11 +1667,12 @@ do { \
1714 1667
1715#define SEQ_PUT_HEX_FIELD_RET(s, x) \ 1668#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1716do { \ 1669do { \
1670 BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES); \
1717 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ 1671 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1718 return 0; \ 1672 return 0; \
1719} while (0) 1673} while (0)
1720 1674
1721static int print_hex_fmt(struct trace_iterator *iter) 1675static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1722{ 1676{
1723 struct trace_seq *s = &iter->seq; 1677 struct trace_seq *s = &iter->seq;
1724 unsigned char newline = '\n'; 1678 unsigned char newline = '\n';
@@ -1727,97 +1681,139 @@ static int print_hex_fmt(struct trace_iterator *iter)
1727 1681
1728 entry = iter->ent; 1682 entry = iter->ent;
1729 1683
1684 if (entry->type == TRACE_CONT)
1685 return TRACE_TYPE_HANDLED;
1686
1730 SEQ_PUT_HEX_FIELD_RET(s, entry->pid); 1687 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1731 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); 1688 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1732 SEQ_PUT_HEX_FIELD_RET(s, entry->t); 1689 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1733 1690
1734 switch (entry->type) { 1691 switch (entry->type) {
1735 case TRACE_FN: 1692 case TRACE_FN: {
1736 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip); 1693 struct ftrace_entry *field;
1737 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1694
1695 trace_assign_type(field, entry);
1696
1697 SEQ_PUT_HEX_FIELD_RET(s, field->ip);
1698 SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
1738 break; 1699 break;
1700 }
1739 case TRACE_CTX: 1701 case TRACE_CTX:
1740 case TRACE_WAKE: 1702 case TRACE_WAKE: {
1741 S = entry->ctx.prev_state < sizeof(state_to_char) ? 1703 struct ctx_switch_entry *field;
1742 state_to_char[entry->ctx.prev_state] : 'X'; 1704
1743 T = entry->ctx.next_state < sizeof(state_to_char) ? 1705 trace_assign_type(field, entry);
1744 state_to_char[entry->ctx.next_state] : 'X'; 1706
1707 S = field->prev_state < sizeof(state_to_char) ?
1708 state_to_char[field->prev_state] : 'X';
1709 T = field->next_state < sizeof(state_to_char) ?
1710 state_to_char[field->next_state] : 'X';
1745 if (entry->type == TRACE_WAKE) 1711 if (entry->type == TRACE_WAKE)
1746 S = '+'; 1712 S = '+';
1747 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid); 1713 SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
1748 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio); 1714 SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
1749 SEQ_PUT_HEX_FIELD_RET(s, S); 1715 SEQ_PUT_HEX_FIELD_RET(s, S);
1750 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid); 1716 SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
1751 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio); 1717 SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
1752 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip); 1718 SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
1753 SEQ_PUT_HEX_FIELD_RET(s, T); 1719 SEQ_PUT_HEX_FIELD_RET(s, T);
1754 break; 1720 break;
1721 }
1755 case TRACE_SPECIAL: 1722 case TRACE_SPECIAL:
1756 case TRACE_STACK: 1723 case TRACE_STACK: {
1757 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1); 1724 struct special_entry *field;
1758 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2); 1725
1759 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3); 1726 trace_assign_type(field, entry);
1727
1728 SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
1729 SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
1730 SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
1760 break; 1731 break;
1761 } 1732 }
1733 }
1762 SEQ_PUT_FIELD_RET(s, newline); 1734 SEQ_PUT_FIELD_RET(s, newline);
1763 1735
1764 return 1; 1736 return TRACE_TYPE_HANDLED;
1765} 1737}
1766 1738
1767static int print_bin_fmt(struct trace_iterator *iter) 1739static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1768{ 1740{
1769 struct trace_seq *s = &iter->seq; 1741 struct trace_seq *s = &iter->seq;
1770 struct trace_entry *entry; 1742 struct trace_entry *entry;
1771 1743
1772 entry = iter->ent; 1744 entry = iter->ent;
1773 1745
1746 if (entry->type == TRACE_CONT)
1747 return TRACE_TYPE_HANDLED;
1748
1774 SEQ_PUT_FIELD_RET(s, entry->pid); 1749 SEQ_PUT_FIELD_RET(s, entry->pid);
1775 SEQ_PUT_FIELD_RET(s, entry->cpu); 1750 SEQ_PUT_FIELD_RET(s, iter->cpu);
1776 SEQ_PUT_FIELD_RET(s, entry->t); 1751 SEQ_PUT_FIELD_RET(s, iter->ts);
1777 1752
1778 switch (entry->type) { 1753 switch (entry->type) {
1779 case TRACE_FN: 1754 case TRACE_FN: {
1780 SEQ_PUT_FIELD_RET(s, entry->fn.ip); 1755 struct ftrace_entry *field;
1781 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip); 1756
1757 trace_assign_type(field, entry);
1758
1759 SEQ_PUT_FIELD_RET(s, field->ip);
1760 SEQ_PUT_FIELD_RET(s, field->parent_ip);
1782 break; 1761 break;
1783 case TRACE_CTX: 1762 }
1784 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid); 1763 case TRACE_CTX: {
1785 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio); 1764 struct ctx_switch_entry *field;
1786 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state); 1765
1787 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid); 1766 trace_assign_type(field, entry);
1788 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio); 1767
1789 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state); 1768 SEQ_PUT_FIELD_RET(s, field->prev_pid);
1769 SEQ_PUT_FIELD_RET(s, field->prev_prio);
1770 SEQ_PUT_FIELD_RET(s, field->prev_state);
1771 SEQ_PUT_FIELD_RET(s, field->next_pid);
1772 SEQ_PUT_FIELD_RET(s, field->next_prio);
1773 SEQ_PUT_FIELD_RET(s, field->next_state);
1790 break; 1774 break;
1775 }
1791 case TRACE_SPECIAL: 1776 case TRACE_SPECIAL:
1792 case TRACE_STACK: 1777 case TRACE_STACK: {
1793 SEQ_PUT_FIELD_RET(s, entry->special.arg1); 1778 struct special_entry *field;
1794 SEQ_PUT_FIELD_RET(s, entry->special.arg2); 1779
1795 SEQ_PUT_FIELD_RET(s, entry->special.arg3); 1780 trace_assign_type(field, entry);
1781
1782 SEQ_PUT_FIELD_RET(s, field->arg1);
1783 SEQ_PUT_FIELD_RET(s, field->arg2);
1784 SEQ_PUT_FIELD_RET(s, field->arg3);
1796 break; 1785 break;
1797 } 1786 }
1787 }
1798 return 1; 1788 return 1;
1799} 1789}
1800 1790
1801static int trace_empty(struct trace_iterator *iter) 1791static int trace_empty(struct trace_iterator *iter)
1802{ 1792{
1803 struct trace_array_cpu *data;
1804 int cpu; 1793 int cpu;
1805 1794
1806 for_each_tracing_cpu(cpu) { 1795 for_each_tracing_cpu(cpu) {
1807 data = iter->tr->data[cpu]; 1796 if (iter->buffer_iter[cpu]) {
1808 1797 if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1809 if (head_page(data) && data->trace_idx && 1798 return 0;
1810 (data->trace_tail != data->trace_head || 1799 } else {
1811 data->trace_tail_idx != data->trace_head_idx)) 1800 if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1812 return 0; 1801 return 0;
1802 }
1813 } 1803 }
1804
1814 return 1; 1805 return 1;
1815} 1806}
1816 1807
1817static int print_trace_line(struct trace_iterator *iter) 1808static enum print_line_t print_trace_line(struct trace_iterator *iter)
1818{ 1809{
1819 if (iter->trace && iter->trace->print_line) 1810 enum print_line_t ret;
1820 return iter->trace->print_line(iter); 1811
1812 if (iter->trace && iter->trace->print_line) {
1813 ret = iter->trace->print_line(iter);
1814 if (ret != TRACE_TYPE_UNHANDLED)
1815 return ret;
1816 }
1821 1817
1822 if (trace_flags & TRACE_ITER_BIN) 1818 if (trace_flags & TRACE_ITER_BIN)
1823 return print_bin_fmt(iter); 1819 return print_bin_fmt(iter);
@@ -1873,6 +1869,8 @@ static struct trace_iterator *
1873__tracing_open(struct inode *inode, struct file *file, int *ret) 1869__tracing_open(struct inode *inode, struct file *file, int *ret)
1874{ 1870{
1875 struct trace_iterator *iter; 1871 struct trace_iterator *iter;
1872 struct seq_file *m;
1873 int cpu;
1876 1874
1877 if (tracing_disabled) { 1875 if (tracing_disabled) {
1878 *ret = -ENODEV; 1876 *ret = -ENODEV;
@@ -1893,28 +1891,45 @@ __tracing_open(struct inode *inode, struct file *file, int *ret)
1893 iter->trace = current_trace; 1891 iter->trace = current_trace;
1894 iter->pos = -1; 1892 iter->pos = -1;
1895 1893
1894 for_each_tracing_cpu(cpu) {
1895
1896 iter->buffer_iter[cpu] =
1897 ring_buffer_read_start(iter->tr->buffer, cpu);
1898
1899 if (!iter->buffer_iter[cpu])
1900 goto fail_buffer;
1901 }
1902
1896 /* TODO stop tracer */ 1903 /* TODO stop tracer */
1897 *ret = seq_open(file, &tracer_seq_ops); 1904 *ret = seq_open(file, &tracer_seq_ops);
1898 if (!*ret) { 1905 if (*ret)
1899 struct seq_file *m = file->private_data; 1906 goto fail_buffer;
1900 m->private = iter;
1901 1907
1902 /* stop the trace while dumping */ 1908 m = file->private_data;
1903 if (iter->tr->ctrl) { 1909 m->private = iter;
1904 tracer_enabled = 0;
1905 ftrace_function_enabled = 0;
1906 }
1907 1910
1908 if (iter->trace && iter->trace->open) 1911 /* stop the trace while dumping */
1909 iter->trace->open(iter); 1912 if (iter->tr->ctrl) {
1910 } else { 1913 tracer_enabled = 0;
1911 kfree(iter); 1914 ftrace_function_enabled = 0;
1912 iter = NULL;
1913 } 1915 }
1916
1917 if (iter->trace && iter->trace->open)
1918 iter->trace->open(iter);
1919
1914 mutex_unlock(&trace_types_lock); 1920 mutex_unlock(&trace_types_lock);
1915 1921
1916 out: 1922 out:
1917 return iter; 1923 return iter;
1924
1925 fail_buffer:
1926 for_each_tracing_cpu(cpu) {
1927 if (iter->buffer_iter[cpu])
1928 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1929 }
1930 mutex_unlock(&trace_types_lock);
1931
1932 return ERR_PTR(-ENOMEM);
1918} 1933}
1919 1934
1920int tracing_open_generic(struct inode *inode, struct file *filp) 1935int tracing_open_generic(struct inode *inode, struct file *filp)
@@ -1930,8 +1945,14 @@ int tracing_release(struct inode *inode, struct file *file)
1930{ 1945{
1931 struct seq_file *m = (struct seq_file *)file->private_data; 1946 struct seq_file *m = (struct seq_file *)file->private_data;
1932 struct trace_iterator *iter = m->private; 1947 struct trace_iterator *iter = m->private;
1948 int cpu;
1933 1949
1934 mutex_lock(&trace_types_lock); 1950 mutex_lock(&trace_types_lock);
1951 for_each_tracing_cpu(cpu) {
1952 if (iter->buffer_iter[cpu])
1953 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1954 }
1955
1935 if (iter->trace && iter->trace->close) 1956 if (iter->trace && iter->trace->close)
1936 iter->trace->close(iter); 1957 iter->trace->close(iter);
1937 1958
@@ -2356,9 +2377,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2356 struct tracer *t; 2377 struct tracer *t;
2357 char buf[max_tracer_type_len+1]; 2378 char buf[max_tracer_type_len+1];
2358 int i; 2379 int i;
2380 size_t ret;
2359 2381
2360 if (cnt > max_tracer_type_len) 2382 if (cnt > max_tracer_type_len)
2361 cnt = max_tracer_type_len; 2383 cnt = max_tracer_type_len;
2384 ret = cnt;
2362 2385
2363 if (copy_from_user(&buf, ubuf, cnt)) 2386 if (copy_from_user(&buf, ubuf, cnt))
2364 return -EFAULT; 2387 return -EFAULT;
@@ -2374,7 +2397,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2374 if (strcmp(t->name, buf) == 0) 2397 if (strcmp(t->name, buf) == 0)
2375 break; 2398 break;
2376 } 2399 }
2377 if (!t || t == current_trace) 2400 if (!t) {
2401 ret = -EINVAL;
2402 goto out;
2403 }
2404 if (t == current_trace)
2378 goto out; 2405 goto out;
2379 2406
2380 if (current_trace && current_trace->reset) 2407 if (current_trace && current_trace->reset)
@@ -2387,9 +2414,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2387 out: 2414 out:
2388 mutex_unlock(&trace_types_lock); 2415 mutex_unlock(&trace_types_lock);
2389 2416
2390 filp->f_pos += cnt; 2417 if (ret == cnt)
2418 filp->f_pos += cnt;
2391 2419
2392 return cnt; 2420 return ret;
2393} 2421}
2394 2422
2395static ssize_t 2423static ssize_t
@@ -2504,20 +2532,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2504 size_t cnt, loff_t *ppos) 2532 size_t cnt, loff_t *ppos)
2505{ 2533{
2506 struct trace_iterator *iter = filp->private_data; 2534 struct trace_iterator *iter = filp->private_data;
2507 struct trace_array_cpu *data;
2508 static cpumask_t mask;
2509 unsigned long flags;
2510#ifdef CONFIG_FTRACE
2511 int ftrace_save;
2512#endif
2513 int cpu;
2514 ssize_t sret; 2535 ssize_t sret;
2515 2536
2516 /* return any leftover data */ 2537 /* return any leftover data */
2517 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2538 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2518 if (sret != -EBUSY) 2539 if (sret != -EBUSY)
2519 return sret; 2540 return sret;
2520 sret = 0;
2521 2541
2522 trace_seq_reset(&iter->seq); 2542 trace_seq_reset(&iter->seq);
2523 2543
@@ -2528,6 +2548,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2528 goto out; 2548 goto out;
2529 } 2549 }
2530 2550
2551waitagain:
2552 sret = 0;
2531 while (trace_empty(iter)) { 2553 while (trace_empty(iter)) {
2532 2554
2533 if ((filp->f_flags & O_NONBLOCK)) { 2555 if ((filp->f_flags & O_NONBLOCK)) {
@@ -2592,46 +2614,12 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2592 offsetof(struct trace_iterator, seq)); 2614 offsetof(struct trace_iterator, seq));
2593 iter->pos = -1; 2615 iter->pos = -1;
2594 2616
2595 /*
2596 * We need to stop all tracing on all CPUS to read the
2597 * the next buffer. This is a bit expensive, but is
2598 * not done often. We fill all what we can read,
2599 * and then release the locks again.
2600 */
2601
2602 cpus_clear(mask);
2603 local_irq_save(flags);
2604#ifdef CONFIG_FTRACE
2605 ftrace_save = ftrace_enabled;
2606 ftrace_enabled = 0;
2607#endif
2608 smp_wmb();
2609 for_each_tracing_cpu(cpu) {
2610 data = iter->tr->data[cpu];
2611
2612 if (!head_page(data) || !data->trace_idx)
2613 continue;
2614
2615 atomic_inc(&data->disabled);
2616 cpu_set(cpu, mask);
2617 }
2618
2619 for_each_cpu_mask(cpu, mask) {
2620 data = iter->tr->data[cpu];
2621 __raw_spin_lock(&data->lock);
2622
2623 if (data->overrun > iter->last_overrun[cpu])
2624 iter->overrun[cpu] +=
2625 data->overrun - iter->last_overrun[cpu];
2626 iter->last_overrun[cpu] = data->overrun;
2627 }
2628
2629 while (find_next_entry_inc(iter) != NULL) { 2617 while (find_next_entry_inc(iter) != NULL) {
2630 int ret; 2618 enum print_line_t ret;
2631 int len = iter->seq.len; 2619 int len = iter->seq.len;
2632 2620
2633 ret = print_trace_line(iter); 2621 ret = print_trace_line(iter);
2634 if (!ret) { 2622 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2635 /* don't print partial lines */ 2623 /* don't print partial lines */
2636 iter->seq.len = len; 2624 iter->seq.len = len;
2637 break; 2625 break;
@@ -2643,26 +2631,17 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
2643 break; 2631 break;
2644 } 2632 }
2645 2633
2646 for_each_cpu_mask(cpu, mask) {
2647 data = iter->tr->data[cpu];
2648 __raw_spin_unlock(&data->lock);
2649 }
2650
2651 for_each_cpu_mask(cpu, mask) {
2652 data = iter->tr->data[cpu];
2653 atomic_dec(&data->disabled);
2654 }
2655#ifdef CONFIG_FTRACE
2656 ftrace_enabled = ftrace_save;
2657#endif
2658 local_irq_restore(flags);
2659
2660 /* Now copy what we have to the user */ 2634 /* Now copy what we have to the user */
2661 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 2635 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2662 if (iter->seq.readpos >= iter->seq.len) 2636 if (iter->seq.readpos >= iter->seq.len)
2663 trace_seq_reset(&iter->seq); 2637 trace_seq_reset(&iter->seq);
2638
2639 /*
2640 * If there was nothing to send to user, inspite of consuming trace
2641 * entries, go back to wait for more entries.
2642 */
2664 if (sret == -EBUSY) 2643 if (sret == -EBUSY)
2665 sret = 0; 2644 goto waitagain;
2666 2645
2667out: 2646out:
2668 mutex_unlock(&trace_types_lock); 2647 mutex_unlock(&trace_types_lock);
@@ -2688,7 +2667,8 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2688{ 2667{
2689 unsigned long val; 2668 unsigned long val;
2690 char buf[64]; 2669 char buf[64];
2691 int i, ret; 2670 int ret;
2671 struct trace_array *tr = filp->private_data;
2692 2672
2693 if (cnt >= sizeof(buf)) 2673 if (cnt >= sizeof(buf))
2694 return -EINVAL; 2674 return -EINVAL;
@@ -2708,59 +2688,38 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2708 2688
2709 mutex_lock(&trace_types_lock); 2689 mutex_lock(&trace_types_lock);
2710 2690
2711 if (current_trace != &no_tracer) { 2691 if (tr->ctrl) {
2712 cnt = -EBUSY; 2692 cnt = -EBUSY;
2713 pr_info("ftrace: set current_tracer to none" 2693 pr_info("ftrace: please disable tracing"
2714 " before modifying buffer size\n"); 2694 " before modifying buffer size\n");
2715 goto out; 2695 goto out;
2716 } 2696 }
2717 2697
2718 if (val > global_trace.entries) { 2698 if (val != global_trace.entries) {
2719 long pages_requested; 2699 ret = ring_buffer_resize(global_trace.buffer, val);
2720 unsigned long freeable_pages; 2700 if (ret < 0) {
2721 2701 cnt = ret;
2722 /* make sure we have enough memory before mapping */
2723 pages_requested =
2724 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2725
2726 /* account for each buffer (and max_tr) */
2727 pages_requested *= tracing_nr_buffers * 2;
2728
2729 /* Check for overflow */
2730 if (pages_requested < 0) {
2731 cnt = -ENOMEM;
2732 goto out;
2733 }
2734
2735 freeable_pages = determine_dirtyable_memory();
2736
2737 /* we only allow to request 1/4 of useable memory */
2738 if (pages_requested >
2739 ((freeable_pages + tracing_pages_allocated) / 4)) {
2740 cnt = -ENOMEM;
2741 goto out; 2702 goto out;
2742 } 2703 }
2743 2704
2744 while (global_trace.entries < val) { 2705 ret = ring_buffer_resize(max_tr.buffer, val);
2745 if (trace_alloc_page()) { 2706 if (ret < 0) {
2746 cnt = -ENOMEM; 2707 int r;
2747 goto out; 2708 cnt = ret;
2709 r = ring_buffer_resize(global_trace.buffer,
2710 global_trace.entries);
2711 if (r < 0) {
2712 /* AARGH! We are left with different
2713 * size max buffer!!!! */
2714 WARN_ON(1);
2715 tracing_disabled = 1;
2748 } 2716 }
2749 /* double check that we don't go over the known pages */ 2717 goto out;
2750 if (tracing_pages_allocated > pages_requested)
2751 break;
2752 } 2718 }
2753 2719
2754 } else { 2720 global_trace.entries = val;
2755 /* include the number of entries in val (inc of page entries) */
2756 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2757 trace_free_page();
2758 } 2721 }
2759 2722
2760 /* check integrity */
2761 for_each_tracing_cpu(i)
2762 check_pages(global_trace.data[i]);
2763
2764 filp->f_pos += cnt; 2723 filp->f_pos += cnt;
2765 2724
2766 /* If check pages failed, return ENOMEM */ 2725 /* If check pages failed, return ENOMEM */
@@ -2773,6 +2732,52 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2773 return cnt; 2732 return cnt;
2774} 2733}
2775 2734
2735static int mark_printk(const char *fmt, ...)
2736{
2737 int ret;
2738 va_list args;
2739 va_start(args, fmt);
2740 ret = trace_vprintk(0, fmt, args);
2741 va_end(args);
2742 return ret;
2743}
2744
2745static ssize_t
2746tracing_mark_write(struct file *filp, const char __user *ubuf,
2747 size_t cnt, loff_t *fpos)
2748{
2749 char *buf;
2750 char *end;
2751 struct trace_array *tr = &global_trace;
2752
2753 if (!tr->ctrl || tracing_disabled)
2754 return -EINVAL;
2755
2756 if (cnt > TRACE_BUF_SIZE)
2757 cnt = TRACE_BUF_SIZE;
2758
2759 buf = kmalloc(cnt + 1, GFP_KERNEL);
2760 if (buf == NULL)
2761 return -ENOMEM;
2762
2763 if (copy_from_user(buf, ubuf, cnt)) {
2764 kfree(buf);
2765 return -EFAULT;
2766 }
2767
2768 /* Cut from the first nil or newline. */
2769 buf[cnt] = '\0';
2770 end = strchr(buf, '\n');
2771 if (end)
2772 *end = '\0';
2773
2774 cnt = mark_printk("%s\n", buf);
2775 kfree(buf);
2776 *fpos += cnt;
2777
2778 return cnt;
2779}
2780
2776static struct file_operations tracing_max_lat_fops = { 2781static struct file_operations tracing_max_lat_fops = {
2777 .open = tracing_open_generic, 2782 .open = tracing_open_generic,
2778 .read = tracing_max_lat_read, 2783 .read = tracing_max_lat_read,
@@ -2804,6 +2809,11 @@ static struct file_operations tracing_entries_fops = {
2804 .write = tracing_entries_write, 2809 .write = tracing_entries_write,
2805}; 2810};
2806 2811
2812static struct file_operations tracing_mark_fops = {
2813 .open = tracing_open_generic,
2814 .write = tracing_mark_write,
2815};
2816
2807#ifdef CONFIG_DYNAMIC_FTRACE 2817#ifdef CONFIG_DYNAMIC_FTRACE
2808 2818
2809static ssize_t 2819static ssize_t
@@ -2850,7 +2860,7 @@ struct dentry *tracing_init_dentry(void)
2850#include "trace_selftest.c" 2860#include "trace_selftest.c"
2851#endif 2861#endif
2852 2862
2853static __init void tracer_init_debugfs(void) 2863static __init int tracer_init_debugfs(void)
2854{ 2864{
2855 struct dentry *d_tracer; 2865 struct dentry *d_tracer;
2856 struct dentry *entry; 2866 struct dentry *entry;
@@ -2885,12 +2895,12 @@ static __init void tracer_init_debugfs(void)
2885 entry = debugfs_create_file("available_tracers", 0444, d_tracer, 2895 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2886 &global_trace, &show_traces_fops); 2896 &global_trace, &show_traces_fops);
2887 if (!entry) 2897 if (!entry)
2888 pr_warning("Could not create debugfs 'trace' entry\n"); 2898 pr_warning("Could not create debugfs 'available_tracers' entry\n");
2889 2899
2890 entry = debugfs_create_file("current_tracer", 0444, d_tracer, 2900 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2891 &global_trace, &set_tracer_fops); 2901 &global_trace, &set_tracer_fops);
2892 if (!entry) 2902 if (!entry)
2893 pr_warning("Could not create debugfs 'trace' entry\n"); 2903 pr_warning("Could not create debugfs 'current_tracer' entry\n");
2894 2904
2895 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, 2905 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2896 &tracing_max_latency, 2906 &tracing_max_latency,
@@ -2903,7 +2913,7 @@ static __init void tracer_init_debugfs(void)
2903 &tracing_thresh, &tracing_max_lat_fops); 2913 &tracing_thresh, &tracing_max_lat_fops);
2904 if (!entry) 2914 if (!entry)
2905 pr_warning("Could not create debugfs " 2915 pr_warning("Could not create debugfs "
2906 "'tracing_threash' entry\n"); 2916 "'tracing_thresh' entry\n");
2907 entry = debugfs_create_file("README", 0644, d_tracer, 2917 entry = debugfs_create_file("README", 0644, d_tracer,
2908 NULL, &tracing_readme_fops); 2918 NULL, &tracing_readme_fops);
2909 if (!entry) 2919 if (!entry)
@@ -2913,13 +2923,19 @@ static __init void tracer_init_debugfs(void)
2913 NULL, &tracing_pipe_fops); 2923 NULL, &tracing_pipe_fops);
2914 if (!entry) 2924 if (!entry)
2915 pr_warning("Could not create debugfs " 2925 pr_warning("Could not create debugfs "
2916 "'tracing_threash' entry\n"); 2926 "'trace_pipe' entry\n");
2917 2927
2918 entry = debugfs_create_file("trace_entries", 0644, d_tracer, 2928 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2919 &global_trace, &tracing_entries_fops); 2929 &global_trace, &tracing_entries_fops);
2920 if (!entry) 2930 if (!entry)
2921 pr_warning("Could not create debugfs " 2931 pr_warning("Could not create debugfs "
2922 "'tracing_threash' entry\n"); 2932 "'trace_entries' entry\n");
2933
2934 entry = debugfs_create_file("trace_marker", 0220, d_tracer,
2935 NULL, &tracing_mark_fops);
2936 if (!entry)
2937 pr_warning("Could not create debugfs "
2938 "'trace_marker' entry\n");
2923 2939
2924#ifdef CONFIG_DYNAMIC_FTRACE 2940#ifdef CONFIG_DYNAMIC_FTRACE
2925 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, 2941 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
@@ -2932,230 +2948,263 @@ static __init void tracer_init_debugfs(void)
2932#ifdef CONFIG_SYSPROF_TRACER 2948#ifdef CONFIG_SYSPROF_TRACER
2933 init_tracer_sysprof_debugfs(d_tracer); 2949 init_tracer_sysprof_debugfs(d_tracer);
2934#endif 2950#endif
2951 return 0;
2935} 2952}
2936 2953
2937static int trace_alloc_page(void) 2954int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2938{ 2955{
2956 static DEFINE_SPINLOCK(trace_buf_lock);
2957 static char trace_buf[TRACE_BUF_SIZE];
2958
2959 struct ring_buffer_event *event;
2960 struct trace_array *tr = &global_trace;
2939 struct trace_array_cpu *data; 2961 struct trace_array_cpu *data;
2940 struct page *page, *tmp; 2962 struct print_entry *entry;
2941 LIST_HEAD(pages); 2963 unsigned long flags, irq_flags;
2942 void *array; 2964 int cpu, len = 0, size, pc;
2943 unsigned pages_allocated = 0;
2944 int i;
2945 2965
2946 /* first allocate a page for each CPU */ 2966 if (!tr->ctrl || tracing_disabled)
2947 for_each_tracing_cpu(i) { 2967 return 0;
2948 array = (void *)__get_free_page(GFP_KERNEL);
2949 if (array == NULL) {
2950 printk(KERN_ERR "tracer: failed to allocate page"
2951 "for trace buffer!\n");
2952 goto free_pages;
2953 }
2954 2968
2955 pages_allocated++; 2969 pc = preempt_count();
2956 page = virt_to_page(array); 2970 preempt_disable_notrace();
2957 list_add(&page->lru, &pages); 2971 cpu = raw_smp_processor_id();
2972 data = tr->data[cpu];
2958 2973
2959/* Only allocate if we are actually using the max trace */ 2974 if (unlikely(atomic_read(&data->disabled)))
2960#ifdef CONFIG_TRACER_MAX_TRACE 2975 goto out;
2961 array = (void *)__get_free_page(GFP_KERNEL);
2962 if (array == NULL) {
2963 printk(KERN_ERR "tracer: failed to allocate page"
2964 "for trace buffer!\n");
2965 goto free_pages;
2966 }
2967 pages_allocated++;
2968 page = virt_to_page(array);
2969 list_add(&page->lru, &pages);
2970#endif
2971 }
2972 2976
2973 /* Now that we successfully allocate a page per CPU, add them */ 2977 spin_lock_irqsave(&trace_buf_lock, flags);
2974 for_each_tracing_cpu(i) { 2978 len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
2975 data = global_trace.data[i];
2976 page = list_entry(pages.next, struct page, lru);
2977 list_del_init(&page->lru);
2978 list_add_tail(&page->lru, &data->trace_pages);
2979 ClearPageLRU(page);
2980 2979
2981#ifdef CONFIG_TRACER_MAX_TRACE 2980 len = min(len, TRACE_BUF_SIZE-1);
2982 data = max_tr.data[i]; 2981 trace_buf[len] = 0;
2983 page = list_entry(pages.next, struct page, lru);
2984 list_del_init(&page->lru);
2985 list_add_tail(&page->lru, &data->trace_pages);
2986 SetPageLRU(page);
2987#endif
2988 }
2989 tracing_pages_allocated += pages_allocated;
2990 global_trace.entries += ENTRIES_PER_PAGE;
2991 2982
2992 return 0; 2983 size = sizeof(*entry) + len + 1;
2984 event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
2985 if (!event)
2986 goto out_unlock;
2987 entry = ring_buffer_event_data(event);
2988 tracing_generic_entry_update(&entry->ent, flags, pc);
2989 entry->ent.type = TRACE_PRINT;
2990 entry->ip = ip;
2993 2991
2994 free_pages: 2992 memcpy(&entry->buf, trace_buf, len);
2995 list_for_each_entry_safe(page, tmp, &pages, lru) { 2993 entry->buf[len] = 0;
2996 list_del_init(&page->lru); 2994 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
2997 __free_page(page); 2995
2998 } 2996 out_unlock:
2999 return -ENOMEM; 2997 spin_unlock_irqrestore(&trace_buf_lock, flags);
2998
2999 out:
3000 preempt_enable_notrace();
3001
3002 return len;
3000} 3003}
3004EXPORT_SYMBOL_GPL(trace_vprintk);
3001 3005
3002static int trace_free_page(void) 3006int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3003{ 3007{
3004 struct trace_array_cpu *data; 3008 int ret;
3005 struct page *page; 3009 va_list ap;
3006 struct list_head *p;
3007 int i;
3008 int ret = 0;
3009 3010
3010 /* free one page from each buffer */ 3011 if (!(trace_flags & TRACE_ITER_PRINTK))
3011 for_each_tracing_cpu(i) { 3012 return 0;
3012 data = global_trace.data[i];
3013 p = data->trace_pages.next;
3014 if (p == &data->trace_pages) {
3015 /* should never happen */
3016 WARN_ON(1);
3017 tracing_disabled = 1;
3018 ret = -1;
3019 break;
3020 }
3021 page = list_entry(p, struct page, lru);
3022 ClearPageLRU(page);
3023 list_del(&page->lru);
3024 tracing_pages_allocated--;
3025 tracing_pages_allocated--;
3026 __free_page(page);
3027 3013
3028 tracing_reset(data); 3014 va_start(ap, fmt);
3015 ret = trace_vprintk(ip, fmt, ap);
3016 va_end(ap);
3017 return ret;
3018}
3019EXPORT_SYMBOL_GPL(__ftrace_printk);
3029 3020
3030#ifdef CONFIG_TRACER_MAX_TRACE 3021static int trace_panic_handler(struct notifier_block *this,
3031 data = max_tr.data[i]; 3022 unsigned long event, void *unused)
3032 p = data->trace_pages.next; 3023{
3033 if (p == &data->trace_pages) { 3024 ftrace_dump();
3034 /* should never happen */ 3025 return NOTIFY_OK;
3035 WARN_ON(1); 3026}
3036 tracing_disabled = 1;
3037 ret = -1;
3038 break;
3039 }
3040 page = list_entry(p, struct page, lru);
3041 ClearPageLRU(page);
3042 list_del(&page->lru);
3043 __free_page(page);
3044 3027
3045 tracing_reset(data); 3028static struct notifier_block trace_panic_notifier = {
3046#endif 3029 .notifier_call = trace_panic_handler,
3047 } 3030 .next = NULL,
3048 global_trace.entries -= ENTRIES_PER_PAGE; 3031 .priority = 150 /* priority: INT_MAX >= x >= 0 */
3032};
3049 3033
3050 return ret; 3034static int trace_die_handler(struct notifier_block *self,
3035 unsigned long val,
3036 void *data)
3037{
3038 switch (val) {
3039 case DIE_OOPS:
3040 ftrace_dump();
3041 break;
3042 default:
3043 break;
3044 }
3045 return NOTIFY_OK;
3051} 3046}
3052 3047
3053__init static int tracer_alloc_buffers(void) 3048static struct notifier_block trace_die_notifier = {
3049 .notifier_call = trace_die_handler,
3050 .priority = 200
3051};
3052
3053/*
3054 * printk is set to max of 1024, we really don't need it that big.
3055 * Nothing should be printing 1000 characters anyway.
3056 */
3057#define TRACE_MAX_PRINT 1000
3058
3059/*
3060 * Define here KERN_TRACE so that we have one place to modify
3061 * it if we decide to change what log level the ftrace dump
3062 * should be at.
3063 */
3064#define KERN_TRACE KERN_INFO
3065
3066static void
3067trace_printk_seq(struct trace_seq *s)
3054{ 3068{
3055 struct trace_array_cpu *data; 3069 /* Probably should print a warning here. */
3056 void *array; 3070 if (s->len >= 1000)
3057 struct page *page; 3071 s->len = 1000;
3058 int pages = 0;
3059 int ret = -ENOMEM;
3060 int i;
3061 3072
3062 /* TODO: make the number of buffers hot pluggable with CPUS */ 3073 /* should be zero ended, but we are paranoid. */
3063 tracing_nr_buffers = num_possible_cpus(); 3074 s->buffer[s->len] = 0;
3064 tracing_buffer_mask = cpu_possible_map;
3065 3075
3066 /* Allocate the first page for all buffers */ 3076 printk(KERN_TRACE "%s", s->buffer);
3067 for_each_tracing_cpu(i) {
3068 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3069 max_tr.data[i] = &per_cpu(max_data, i);
3070 3077
3071 array = (void *)__get_free_page(GFP_KERNEL); 3078 trace_seq_reset(s);
3072 if (array == NULL) { 3079}
3073 printk(KERN_ERR "tracer: failed to allocate page" 3080
3074 "for trace buffer!\n"); 3081
3075 goto free_buffers; 3082void ftrace_dump(void)
3076 } 3083{
3084 static DEFINE_SPINLOCK(ftrace_dump_lock);
3085 /* use static because iter can be a bit big for the stack */
3086 static struct trace_iterator iter;
3087 static cpumask_t mask;
3088 static int dump_ran;
3089 unsigned long flags;
3090 int cnt = 0, cpu;
3077 3091
3078 /* set the array to the list */ 3092 /* only one dump */
3079 INIT_LIST_HEAD(&data->trace_pages); 3093 spin_lock_irqsave(&ftrace_dump_lock, flags);
3080 page = virt_to_page(array); 3094 if (dump_ran)
3081 list_add(&page->lru, &data->trace_pages); 3095 goto out;
3082 /* use the LRU flag to differentiate the two buffers */
3083 ClearPageLRU(page);
3084 3096
3085 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 3097 dump_ran = 1;
3086 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3087 3098
3088/* Only allocate if we are actually using the max trace */ 3099 /* No turning back! */
3089#ifdef CONFIG_TRACER_MAX_TRACE 3100 ftrace_kill_atomic();
3090 array = (void *)__get_free_page(GFP_KERNEL);
3091 if (array == NULL) {
3092 printk(KERN_ERR "tracer: failed to allocate page"
3093 "for trace buffer!\n");
3094 goto free_buffers;
3095 }
3096 3101
3097 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages); 3102 for_each_tracing_cpu(cpu) {
3098 page = virt_to_page(array); 3103 atomic_inc(&global_trace.data[cpu]->disabled);
3099 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3100 SetPageLRU(page);
3101#endif
3102 } 3104 }
3103 3105
3106 printk(KERN_TRACE "Dumping ftrace buffer:\n");
3107
3108 iter.tr = &global_trace;
3109 iter.trace = current_trace;
3110
3104 /* 3111 /*
3105 * Since we allocate by orders of pages, we may be able to 3112 * We need to stop all tracing on all CPUS to read the
3106 * round up a bit. 3113 * the next buffer. This is a bit expensive, but is
3114 * not done often. We fill all what we can read,
3115 * and then release the locks again.
3107 */ 3116 */
3108 global_trace.entries = ENTRIES_PER_PAGE;
3109 pages++;
3110 3117
3111 while (global_trace.entries < trace_nr_entries) { 3118 cpus_clear(mask);
3112 if (trace_alloc_page()) 3119
3113 break; 3120 while (!trace_empty(&iter)) {
3114 pages++; 3121
3122 if (!cnt)
3123 printk(KERN_TRACE "---------------------------------\n");
3124
3125 cnt++;
3126
3127 /* reset all but tr, trace, and overruns */
3128 memset(&iter.seq, 0,
3129 sizeof(struct trace_iterator) -
3130 offsetof(struct trace_iterator, seq));
3131 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3132 iter.pos = -1;
3133
3134 if (find_next_entry_inc(&iter) != NULL) {
3135 print_trace_line(&iter);
3136 trace_consume(&iter);
3137 }
3138
3139 trace_printk_seq(&iter.seq);
3115 } 3140 }
3116 max_tr.entries = global_trace.entries;
3117 3141
3118 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n", 3142 if (!cnt)
3119 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE); 3143 printk(KERN_TRACE " (ftrace buffer empty)\n");
3120 pr_info(" actual entries %ld\n", global_trace.entries); 3144 else
3145 printk(KERN_TRACE "---------------------------------\n");
3146
3147 out:
3148 spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3149}
3150
3151__init static int tracer_alloc_buffers(void)
3152{
3153 struct trace_array_cpu *data;
3154 int i;
3155
3156 /* TODO: make the number of buffers hot pluggable with CPUS */
3157 tracing_buffer_mask = cpu_possible_map;
3158
3159 global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3160 TRACE_BUFFER_FLAGS);
3161 if (!global_trace.buffer) {
3162 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3163 WARN_ON(1);
3164 return 0;
3165 }
3166 global_trace.entries = ring_buffer_size(global_trace.buffer);
3121 3167
3122 tracer_init_debugfs(); 3168#ifdef CONFIG_TRACER_MAX_TRACE
3169 max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3170 TRACE_BUFFER_FLAGS);
3171 if (!max_tr.buffer) {
3172 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3173 WARN_ON(1);
3174 ring_buffer_free(global_trace.buffer);
3175 return 0;
3176 }
3177 max_tr.entries = ring_buffer_size(max_tr.buffer);
3178 WARN_ON(max_tr.entries != global_trace.entries);
3179#endif
3180
3181 /* Allocate the first page for all buffers */
3182 for_each_tracing_cpu(i) {
3183 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3184 max_tr.data[i] = &per_cpu(max_data, i);
3185 }
3123 3186
3124 trace_init_cmdlines(); 3187 trace_init_cmdlines();
3125 3188
3126 register_tracer(&no_tracer); 3189 register_tracer(&nop_trace);
3127 current_trace = &no_tracer; 3190#ifdef CONFIG_BOOT_TRACER
3191 register_tracer(&boot_tracer);
3192 current_trace = &boot_tracer;
3193 current_trace->init(&global_trace);
3194#else
3195 current_trace = &nop_trace;
3196#endif
3128 3197
3129 /* All seems OK, enable tracing */ 3198 /* All seems OK, enable tracing */
3130 global_trace.ctrl = tracer_enabled; 3199 global_trace.ctrl = tracer_enabled;
3131 tracing_disabled = 0; 3200 tracing_disabled = 0;
3132 3201
3133 return 0; 3202 atomic_notifier_chain_register(&panic_notifier_list,
3203 &trace_panic_notifier);
3134 3204
3135 free_buffers: 3205 register_die_notifier(&trace_die_notifier);
3136 for (i-- ; i >= 0; i--) {
3137 struct page *page, *tmp;
3138 struct trace_array_cpu *data = global_trace.data[i];
3139 3206
3140 if (data) { 3207 return 0;
3141 list_for_each_entry_safe(page, tmp,
3142 &data->trace_pages, lru) {
3143 list_del_init(&page->lru);
3144 __free_page(page);
3145 }
3146 }
3147
3148#ifdef CONFIG_TRACER_MAX_TRACE
3149 data = max_tr.data[i];
3150 if (data) {
3151 list_for_each_entry_safe(page, tmp,
3152 &data->trace_pages, lru) {
3153 list_del_init(&page->lru);
3154 __free_page(page);
3155 }
3156 }
3157#endif
3158 }
3159 return ret;
3160} 3208}
3161fs_initcall(tracer_alloc_buffers); 3209early_initcall(tracer_alloc_buffers);
3210fs_initcall(tracer_init_debugfs);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f69f86788c2..f1f99572cde 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -5,7 +5,9 @@
5#include <asm/atomic.h> 5#include <asm/atomic.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/clocksource.h> 7#include <linux/clocksource.h>
8#include <linux/ring_buffer.h>
8#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/ftrace.h>
9 11
10enum trace_type { 12enum trace_type {
11 __TRACE_FIRST_TYPE = 0, 13 __TRACE_FIRST_TYPE = 0,
@@ -13,38 +15,60 @@ enum trace_type {
13 TRACE_FN, 15 TRACE_FN,
14 TRACE_CTX, 16 TRACE_CTX,
15 TRACE_WAKE, 17 TRACE_WAKE,
18 TRACE_CONT,
16 TRACE_STACK, 19 TRACE_STACK,
20 TRACE_PRINT,
17 TRACE_SPECIAL, 21 TRACE_SPECIAL,
18 TRACE_MMIO_RW, 22 TRACE_MMIO_RW,
19 TRACE_MMIO_MAP, 23 TRACE_MMIO_MAP,
24 TRACE_BOOT,
20 25
21 __TRACE_LAST_TYPE 26 __TRACE_LAST_TYPE
22}; 27};
23 28
24/* 29/*
30 * The trace entry - the most basic unit of tracing. This is what
31 * is printed in the end as a single line in the trace output, such as:
32 *
33 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
34 */
35struct trace_entry {
36 unsigned char type;
37 unsigned char cpu;
38 unsigned char flags;
39 unsigned char preempt_count;
40 int pid;
41};
42
43/*
25 * Function trace entry - function address and parent function addres: 44 * Function trace entry - function address and parent function addres:
26 */ 45 */
27struct ftrace_entry { 46struct ftrace_entry {
47 struct trace_entry ent;
28 unsigned long ip; 48 unsigned long ip;
29 unsigned long parent_ip; 49 unsigned long parent_ip;
30}; 50};
51extern struct tracer boot_tracer;
31 52
32/* 53/*
33 * Context switch trace entry - which task (and prio) we switched from/to: 54 * Context switch trace entry - which task (and prio) we switched from/to:
34 */ 55 */
35struct ctx_switch_entry { 56struct ctx_switch_entry {
57 struct trace_entry ent;
36 unsigned int prev_pid; 58 unsigned int prev_pid;
37 unsigned char prev_prio; 59 unsigned char prev_prio;
38 unsigned char prev_state; 60 unsigned char prev_state;
39 unsigned int next_pid; 61 unsigned int next_pid;
40 unsigned char next_prio; 62 unsigned char next_prio;
41 unsigned char next_state; 63 unsigned char next_state;
64 unsigned int next_cpu;
42}; 65};
43 66
44/* 67/*
45 * Special (free-form) trace entry: 68 * Special (free-form) trace entry:
46 */ 69 */
47struct special_entry { 70struct special_entry {
71 struct trace_entry ent;
48 unsigned long arg1; 72 unsigned long arg1;
49 unsigned long arg2; 73 unsigned long arg2;
50 unsigned long arg3; 74 unsigned long arg3;
@@ -57,33 +81,60 @@ struct special_entry {
57#define FTRACE_STACK_ENTRIES 8 81#define FTRACE_STACK_ENTRIES 8
58 82
59struct stack_entry { 83struct stack_entry {
84 struct trace_entry ent;
60 unsigned long caller[FTRACE_STACK_ENTRIES]; 85 unsigned long caller[FTRACE_STACK_ENTRIES];
61}; 86};
62 87
63/* 88/*
64 * The trace entry - the most basic unit of tracing. This is what 89 * ftrace_printk entry:
65 * is printed in the end as a single line in the trace output, such as:
66 *
67 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
68 */ 90 */
69struct trace_entry { 91struct print_entry {
70 char type; 92 struct trace_entry ent;
71 char cpu; 93 unsigned long ip;
72 char flags; 94 char buf[];
73 char preempt_count; 95};
74 int pid; 96
75 cycle_t t; 97#define TRACE_OLD_SIZE 88
76 union { 98
77 struct ftrace_entry fn; 99struct trace_field_cont {
78 struct ctx_switch_entry ctx; 100 unsigned char type;
79 struct special_entry special; 101 /* Temporary till we get rid of this completely */
80 struct stack_entry stack; 102 char buf[TRACE_OLD_SIZE - 1];
81 struct mmiotrace_rw mmiorw; 103};
82 struct mmiotrace_map mmiomap; 104
83 }; 105struct trace_mmiotrace_rw {
106 struct trace_entry ent;
107 struct mmiotrace_rw rw;
84}; 108};
85 109
86#define TRACE_ENTRY_SIZE sizeof(struct trace_entry) 110struct trace_mmiotrace_map {
111 struct trace_entry ent;
112 struct mmiotrace_map map;
113};
114
115struct trace_boot {
116 struct trace_entry ent;
117 struct boot_trace initcall;
118};
119
120/*
121 * trace_flag_type is an enumeration that holds different
122 * states when a trace occurs. These are:
123 * IRQS_OFF - interrupts were disabled
124 * NEED_RESCED - reschedule is requested
125 * HARDIRQ - inside an interrupt handler
126 * SOFTIRQ - inside a softirq handler
127 * CONT - multiple entries hold the trace item
128 */
129enum trace_flag_type {
130 TRACE_FLAG_IRQS_OFF = 0x01,
131 TRACE_FLAG_NEED_RESCHED = 0x02,
132 TRACE_FLAG_HARDIRQ = 0x04,
133 TRACE_FLAG_SOFTIRQ = 0x08,
134 TRACE_FLAG_CONT = 0x10,
135};
136
137#define TRACE_BUF_SIZE 1024
87 138
88/* 139/*
89 * The CPU trace array - it consists of thousands of trace entries 140 * The CPU trace array - it consists of thousands of trace entries
@@ -91,16 +142,9 @@ struct trace_entry {
91 * the trace, etc.) 142 * the trace, etc.)
92 */ 143 */
93struct trace_array_cpu { 144struct trace_array_cpu {
94 struct list_head trace_pages;
95 atomic_t disabled; 145 atomic_t disabled;
96 raw_spinlock_t lock;
97 struct lock_class_key lock_key;
98 146
99 /* these fields get copied into max-trace: */ 147 /* these fields get copied into max-trace: */
100 unsigned trace_head_idx;
101 unsigned trace_tail_idx;
102 void *trace_head; /* producer */
103 void *trace_tail; /* consumer */
104 unsigned long trace_idx; 148 unsigned long trace_idx;
105 unsigned long overrun; 149 unsigned long overrun;
106 unsigned long saved_latency; 150 unsigned long saved_latency;
@@ -124,6 +168,7 @@ struct trace_iterator;
124 * They have on/off state as well: 168 * They have on/off state as well:
125 */ 169 */
126struct trace_array { 170struct trace_array {
171 struct ring_buffer *buffer;
127 unsigned long entries; 172 unsigned long entries;
128 long ctrl; 173 long ctrl;
129 int cpu; 174 int cpu;
@@ -132,6 +177,56 @@ struct trace_array {
132 struct trace_array_cpu *data[NR_CPUS]; 177 struct trace_array_cpu *data[NR_CPUS];
133}; 178};
134 179
180#define FTRACE_CMP_TYPE(var, type) \
181 __builtin_types_compatible_p(typeof(var), type *)
182
183#undef IF_ASSIGN
184#define IF_ASSIGN(var, entry, etype, id) \
185 if (FTRACE_CMP_TYPE(var, etype)) { \
186 var = (typeof(var))(entry); \
187 WARN_ON(id && (entry)->type != id); \
188 break; \
189 }
190
191/* Will cause compile errors if type is not found. */
192extern void __ftrace_bad_type(void);
193
194/*
195 * The trace_assign_type is a verifier that the entry type is
196 * the same as the type being assigned. To add new types simply
197 * add a line with the following format:
198 *
199 * IF_ASSIGN(var, ent, type, id);
200 *
201 * Where "type" is the trace type that includes the trace_entry
202 * as the "ent" item. And "id" is the trace identifier that is
203 * used in the trace_type enum.
204 *
205 * If the type can have more than one id, then use zero.
206 */
207#define trace_assign_type(var, ent) \
208 do { \
209 IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN); \
210 IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
211 IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
212 IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
213 IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
214 IF_ASSIGN(var, ent, struct special_entry, 0); \
215 IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
216 TRACE_MMIO_RW); \
217 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
218 TRACE_MMIO_MAP); \
219 IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \
220 __ftrace_bad_type(); \
221 } while (0)
222
223/* Return values for print_line callback */
224enum print_line_t {
225 TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */
226 TRACE_TYPE_HANDLED = 1,
227 TRACE_TYPE_UNHANDLED = 2 /* Relay to other output functions */
228};
229
135/* 230/*
136 * A specific tracer, represented by methods that operate on a trace array: 231 * A specific tracer, represented by methods that operate on a trace array:
137 */ 232 */
@@ -152,7 +247,7 @@ struct tracer {
152 int (*selftest)(struct tracer *trace, 247 int (*selftest)(struct tracer *trace,
153 struct trace_array *tr); 248 struct trace_array *tr);
154#endif 249#endif
155 int (*print_line)(struct trace_iterator *iter); 250 enum print_line_t (*print_line)(struct trace_iterator *iter);
156 struct tracer *next; 251 struct tracer *next;
157 int print_max; 252 int print_max;
158}; 253};
@@ -171,57 +266,58 @@ struct trace_iterator {
171 struct trace_array *tr; 266 struct trace_array *tr;
172 struct tracer *trace; 267 struct tracer *trace;
173 void *private; 268 void *private;
174 long last_overrun[NR_CPUS]; 269 struct ring_buffer_iter *buffer_iter[NR_CPUS];
175 long overrun[NR_CPUS];
176 270
177 /* The below is zeroed out in pipe_read */ 271 /* The below is zeroed out in pipe_read */
178 struct trace_seq seq; 272 struct trace_seq seq;
179 struct trace_entry *ent; 273 struct trace_entry *ent;
180 int cpu; 274 int cpu;
181 275 u64 ts;
182 struct trace_entry *prev_ent;
183 int prev_cpu;
184 276
185 unsigned long iter_flags; 277 unsigned long iter_flags;
186 loff_t pos; 278 loff_t pos;
187 unsigned long next_idx[NR_CPUS];
188 struct list_head *next_page[NR_CPUS];
189 unsigned next_page_idx[NR_CPUS];
190 long idx; 279 long idx;
191}; 280};
192 281
193void tracing_reset(struct trace_array_cpu *data); 282void trace_wake_up(void);
283void tracing_reset(struct trace_array *tr, int cpu);
194int tracing_open_generic(struct inode *inode, struct file *filp); 284int tracing_open_generic(struct inode *inode, struct file *filp);
195struct dentry *tracing_init_dentry(void); 285struct dentry *tracing_init_dentry(void);
196void init_tracer_sysprof_debugfs(struct dentry *d_tracer); 286void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
197 287
288struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
289 struct trace_array_cpu *data);
290void tracing_generic_entry_update(struct trace_entry *entry,
291 unsigned long flags,
292 int pc);
293
198void ftrace(struct trace_array *tr, 294void ftrace(struct trace_array *tr,
199 struct trace_array_cpu *data, 295 struct trace_array_cpu *data,
200 unsigned long ip, 296 unsigned long ip,
201 unsigned long parent_ip, 297 unsigned long parent_ip,
202 unsigned long flags); 298 unsigned long flags, int pc);
203void tracing_sched_switch_trace(struct trace_array *tr, 299void tracing_sched_switch_trace(struct trace_array *tr,
204 struct trace_array_cpu *data, 300 struct trace_array_cpu *data,
205 struct task_struct *prev, 301 struct task_struct *prev,
206 struct task_struct *next, 302 struct task_struct *next,
207 unsigned long flags); 303 unsigned long flags, int pc);
208void tracing_record_cmdline(struct task_struct *tsk); 304void tracing_record_cmdline(struct task_struct *tsk);
209 305
210void tracing_sched_wakeup_trace(struct trace_array *tr, 306void tracing_sched_wakeup_trace(struct trace_array *tr,
211 struct trace_array_cpu *data, 307 struct trace_array_cpu *data,
212 struct task_struct *wakee, 308 struct task_struct *wakee,
213 struct task_struct *cur, 309 struct task_struct *cur,
214 unsigned long flags); 310 unsigned long flags, int pc);
215void trace_special(struct trace_array *tr, 311void trace_special(struct trace_array *tr,
216 struct trace_array_cpu *data, 312 struct trace_array_cpu *data,
217 unsigned long arg1, 313 unsigned long arg1,
218 unsigned long arg2, 314 unsigned long arg2,
219 unsigned long arg3); 315 unsigned long arg3, int pc);
220void trace_function(struct trace_array *tr, 316void trace_function(struct trace_array *tr,
221 struct trace_array_cpu *data, 317 struct trace_array_cpu *data,
222 unsigned long ip, 318 unsigned long ip,
223 unsigned long parent_ip, 319 unsigned long parent_ip,
224 unsigned long flags); 320 unsigned long flags, int pc);
225 321
226void tracing_start_cmdline_record(void); 322void tracing_start_cmdline_record(void);
227void tracing_stop_cmdline_record(void); 323void tracing_stop_cmdline_record(void);
@@ -268,51 +364,33 @@ extern unsigned long ftrace_update_tot_cnt;
268extern int DYN_FTRACE_TEST_NAME(void); 364extern int DYN_FTRACE_TEST_NAME(void);
269#endif 365#endif
270 366
271#ifdef CONFIG_MMIOTRACE
272extern void __trace_mmiotrace_rw(struct trace_array *tr,
273 struct trace_array_cpu *data,
274 struct mmiotrace_rw *rw);
275extern void __trace_mmiotrace_map(struct trace_array *tr,
276 struct trace_array_cpu *data,
277 struct mmiotrace_map *map);
278#endif
279
280#ifdef CONFIG_FTRACE_STARTUP_TEST 367#ifdef CONFIG_FTRACE_STARTUP_TEST
281#ifdef CONFIG_FTRACE
282extern int trace_selftest_startup_function(struct tracer *trace, 368extern int trace_selftest_startup_function(struct tracer *trace,
283 struct trace_array *tr); 369 struct trace_array *tr);
284#endif
285#ifdef CONFIG_IRQSOFF_TRACER
286extern int trace_selftest_startup_irqsoff(struct tracer *trace, 370extern int trace_selftest_startup_irqsoff(struct tracer *trace,
287 struct trace_array *tr); 371 struct trace_array *tr);
288#endif
289#ifdef CONFIG_PREEMPT_TRACER
290extern int trace_selftest_startup_preemptoff(struct tracer *trace, 372extern int trace_selftest_startup_preemptoff(struct tracer *trace,
291 struct trace_array *tr); 373 struct trace_array *tr);
292#endif
293#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
294extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace, 374extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
295 struct trace_array *tr); 375 struct trace_array *tr);
296#endif
297#ifdef CONFIG_SCHED_TRACER
298extern int trace_selftest_startup_wakeup(struct tracer *trace, 376extern int trace_selftest_startup_wakeup(struct tracer *trace,
299 struct trace_array *tr); 377 struct trace_array *tr);
300#endif 378extern int trace_selftest_startup_nop(struct tracer *trace,
301#ifdef CONFIG_CONTEXT_SWITCH_TRACER 379 struct trace_array *tr);
302extern int trace_selftest_startup_sched_switch(struct tracer *trace, 380extern int trace_selftest_startup_sched_switch(struct tracer *trace,
303 struct trace_array *tr); 381 struct trace_array *tr);
304#endif
305#ifdef CONFIG_SYSPROF_TRACER
306extern int trace_selftest_startup_sysprof(struct tracer *trace, 382extern int trace_selftest_startup_sysprof(struct tracer *trace,
307 struct trace_array *tr); 383 struct trace_array *tr);
308#endif
309#endif /* CONFIG_FTRACE_STARTUP_TEST */ 384#endif /* CONFIG_FTRACE_STARTUP_TEST */
310 385
311extern void *head_page(struct trace_array_cpu *data); 386extern void *head_page(struct trace_array_cpu *data);
312extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...); 387extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
388extern void trace_seq_print_cont(struct trace_seq *s,
389 struct trace_iterator *iter);
313extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, 390extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
314 size_t cnt); 391 size_t cnt);
315extern long ns2usecs(cycle_t nsec); 392extern long ns2usecs(cycle_t nsec);
393extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args);
316 394
317extern unsigned long trace_flags; 395extern unsigned long trace_flags;
318 396
@@ -334,6 +412,9 @@ enum trace_iterator_flags {
334 TRACE_ITER_BLOCK = 0x80, 412 TRACE_ITER_BLOCK = 0x80,
335 TRACE_ITER_STACKTRACE = 0x100, 413 TRACE_ITER_STACKTRACE = 0x100,
336 TRACE_ITER_SCHED_TREE = 0x200, 414 TRACE_ITER_SCHED_TREE = 0x200,
415 TRACE_ITER_PRINTK = 0x400,
337}; 416};
338 417
418extern struct tracer nop_trace;
419
339#endif /* _LINUX_KERNEL_TRACE_H */ 420#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
new file mode 100644
index 00000000000..d0a5e50eeff
--- /dev/null
+++ b/kernel/trace/trace_boot.c
@@ -0,0 +1,126 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12
13#include "trace.h"
14
15static struct trace_array *boot_trace;
16static int trace_boot_enabled;
17
18
19/* Should be started after do_pre_smp_initcalls() in init/main.c */
20void start_boot_trace(void)
21{
22 trace_boot_enabled = 1;
23}
24
25void stop_boot_trace(void)
26{
27 trace_boot_enabled = 0;
28}
29
30void reset_boot_trace(struct trace_array *tr)
31{
32 stop_boot_trace();
33}
34
35static void boot_trace_init(struct trace_array *tr)
36{
37 int cpu;
38 boot_trace = tr;
39
40 trace_boot_enabled = 0;
41
42 for_each_cpu_mask(cpu, cpu_possible_map)
43 tracing_reset(tr, cpu);
44}
45
46static void boot_trace_ctrl_update(struct trace_array *tr)
47{
48 if (tr->ctrl)
49 start_boot_trace();
50 else
51 stop_boot_trace();
52}
53
54static enum print_line_t initcall_print_line(struct trace_iterator *iter)
55{
56 int ret;
57 struct trace_entry *entry = iter->ent;
58 struct trace_boot *field = (struct trace_boot *)entry;
59 struct boot_trace *it = &field->initcall;
60 struct trace_seq *s = &iter->seq;
61 struct timespec calltime = ktime_to_timespec(it->calltime);
62 struct timespec rettime = ktime_to_timespec(it->rettime);
63
64 if (entry->type == TRACE_BOOT) {
65 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
66 calltime.tv_sec,
67 calltime.tv_nsec,
68 it->func, it->caller);
69 if (!ret)
70 return TRACE_TYPE_PARTIAL_LINE;
71
72 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
73 "returned %d after %lld msecs\n",
74 rettime.tv_sec,
75 rettime.tv_nsec,
76 it->func, it->result, it->duration);
77
78 if (!ret)
79 return TRACE_TYPE_PARTIAL_LINE;
80 return TRACE_TYPE_HANDLED;
81 }
82 return TRACE_TYPE_UNHANDLED;
83}
84
85struct tracer boot_tracer __read_mostly =
86{
87 .name = "initcall",
88 .init = boot_trace_init,
89 .reset = reset_boot_trace,
90 .ctrl_update = boot_trace_ctrl_update,
91 .print_line = initcall_print_line,
92};
93
94void trace_boot(struct boot_trace *it, initcall_t fn)
95{
96 struct ring_buffer_event *event;
97 struct trace_boot *entry;
98 struct trace_array_cpu *data;
99 unsigned long irq_flags;
100 struct trace_array *tr = boot_trace;
101
102 if (!trace_boot_enabled)
103 return;
104
105 /* Get its name now since this function could
106 * disappear because it is in the .init section.
107 */
108 sprint_symbol(it->func, (unsigned long)fn);
109 preempt_disable();
110 data = tr->data[smp_processor_id()];
111
112 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
113 &irq_flags);
114 if (!event)
115 goto out;
116 entry = ring_buffer_event_data(event);
117 tracing_generic_entry_update(&entry->ent, 0, 0);
118 entry->ent.type = TRACE_BOOT;
119 entry->initcall = *it;
120 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
121
122 trace_wake_up();
123
124 out:
125 preempt_enable();
126}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 31214489797..e90eb0c2c56 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -23,7 +23,7 @@ static void function_reset(struct trace_array *tr)
23 tr->time_start = ftrace_now(tr->cpu); 23 tr->time_start = ftrace_now(tr->cpu);
24 24
25 for_each_online_cpu(cpu) 25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]); 26 tracing_reset(tr, cpu);
27} 27}
28 28
29static void start_function_trace(struct trace_array *tr) 29static void start_function_trace(struct trace_array *tr)
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 421d6fe3650..a7db7f040ae 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
95 disabled = atomic_inc_return(&data->disabled); 95 disabled = atomic_inc_return(&data->disabled);
96 96
97 if (likely(disabled == 1)) 97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags); 98 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
99 99
100 atomic_dec(&data->disabled); 100 atomic_dec(&data->disabled);
101} 101}
@@ -130,6 +130,7 @@ check_critical_timing(struct trace_array *tr,
130 unsigned long latency, t0, t1; 130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta; 131 cycle_t T0, T1, delta;
132 unsigned long flags; 132 unsigned long flags;
133 int pc;
133 134
134 /* 135 /*
135 * usecs conversion is slow so we try to delay the conversion 136 * usecs conversion is slow so we try to delay the conversion
@@ -141,6 +142,8 @@ check_critical_timing(struct trace_array *tr,
141 142
142 local_save_flags(flags); 143 local_save_flags(flags);
143 144
145 pc = preempt_count();
146
144 if (!report_latency(delta)) 147 if (!report_latency(delta))
145 goto out; 148 goto out;
146 149
@@ -150,7 +153,7 @@ check_critical_timing(struct trace_array *tr,
150 if (!report_latency(delta)) 153 if (!report_latency(delta))
151 goto out_unlock; 154 goto out_unlock;
152 155
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 156 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
154 157
155 latency = nsecs_to_usecs(delta); 158 latency = nsecs_to_usecs(delta);
156 159
@@ -173,8 +176,8 @@ out_unlock:
173out: 176out:
174 data->critical_sequence = max_sequence; 177 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu); 178 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data); 179 tracing_reset(tr, cpu);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags); 180 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
178} 181}
179 182
180static inline void 183static inline void
@@ -203,11 +206,11 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
203 data->critical_sequence = max_sequence; 206 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu); 207 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip; 208 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data); 209 tracing_reset(tr, cpu);
207 210
208 local_save_flags(flags); 211 local_save_flags(flags);
209 212
210 trace_function(tr, data, ip, parent_ip, flags); 213 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
211 214
212 per_cpu(tracing_cpu, cpu) = 1; 215 per_cpu(tracing_cpu, cpu) = 1;
213 216
@@ -234,14 +237,14 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
234 237
235 data = tr->data[cpu]; 238 data = tr->data[cpu];
236 239
237 if (unlikely(!data) || unlikely(!head_page(data)) || 240 if (unlikely(!data) ||
238 !data->critical_start || atomic_read(&data->disabled)) 241 !data->critical_start || atomic_read(&data->disabled))
239 return; 242 return;
240 243
241 atomic_inc(&data->disabled); 244 atomic_inc(&data->disabled);
242 245
243 local_save_flags(flags); 246 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags); 247 trace_function(tr, data, ip, parent_ip, flags, preempt_count());
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu); 248 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0; 249 data->critical_start = 0;
247 atomic_dec(&data->disabled); 250 atomic_dec(&data->disabled);
@@ -253,12 +256,14 @@ void start_critical_timings(void)
253 if (preempt_trace() || irq_trace()) 256 if (preempt_trace() || irq_trace())
254 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 257 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
255} 258}
259EXPORT_SYMBOL_GPL(start_critical_timings);
256 260
257void stop_critical_timings(void) 261void stop_critical_timings(void)
258{ 262{
259 if (preempt_trace() || irq_trace()) 263 if (preempt_trace() || irq_trace())
260 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); 264 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
261} 265}
266EXPORT_SYMBOL_GPL(stop_critical_timings);
262 267
263#ifdef CONFIG_IRQSOFF_TRACER 268#ifdef CONFIG_IRQSOFF_TRACER
264#ifdef CONFIG_PROVE_LOCKING 269#ifdef CONFIG_PROVE_LOCKING
@@ -337,12 +342,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller);
337#ifdef CONFIG_PREEMPT_TRACER 342#ifdef CONFIG_PREEMPT_TRACER
338void trace_preempt_on(unsigned long a0, unsigned long a1) 343void trace_preempt_on(unsigned long a0, unsigned long a1)
339{ 344{
340 stop_critical_timing(a0, a1); 345 if (preempt_trace())
346 stop_critical_timing(a0, a1);
341} 347}
342 348
343void trace_preempt_off(unsigned long a0, unsigned long a1) 349void trace_preempt_off(unsigned long a0, unsigned long a1)
344{ 350{
345 start_critical_timing(a0, a1); 351 if (preempt_trace())
352 start_critical_timing(a0, a1);
346} 353}
347#endif /* CONFIG_PREEMPT_TRACER */ 354#endif /* CONFIG_PREEMPT_TRACER */
348 355
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index b13dc19dcbb..f28484618ff 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -27,7 +27,7 @@ static void mmio_reset_data(struct trace_array *tr)
27 tr->time_start = ftrace_now(tr->cpu); 27 tr->time_start = ftrace_now(tr->cpu);
28 28
29 for_each_online_cpu(cpu) 29 for_each_online_cpu(cpu)
30 tracing_reset(tr->data[cpu]); 30 tracing_reset(tr, cpu);
31} 31}
32 32
33static void mmio_trace_init(struct trace_array *tr) 33static void mmio_trace_init(struct trace_array *tr)
@@ -130,10 +130,14 @@ static unsigned long count_overruns(struct trace_iterator *iter)
130{ 130{
131 int cpu; 131 int cpu;
132 unsigned long cnt = 0; 132 unsigned long cnt = 0;
133/* FIXME: */
134#if 0
133 for_each_online_cpu(cpu) { 135 for_each_online_cpu(cpu) {
134 cnt += iter->overrun[cpu]; 136 cnt += iter->overrun[cpu];
135 iter->overrun[cpu] = 0; 137 iter->overrun[cpu] = 0;
136 } 138 }
139#endif
140 (void)cpu;
137 return cnt; 141 return cnt;
138} 142}
139 143
@@ -171,17 +175,21 @@ print_out:
171 return (ret == -EBUSY) ? 0 : ret; 175 return (ret == -EBUSY) ? 0 : ret;
172} 176}
173 177
174static int mmio_print_rw(struct trace_iterator *iter) 178static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
175{ 179{
176 struct trace_entry *entry = iter->ent; 180 struct trace_entry *entry = iter->ent;
177 struct mmiotrace_rw *rw = &entry->mmiorw; 181 struct trace_mmiotrace_rw *field;
182 struct mmiotrace_rw *rw;
178 struct trace_seq *s = &iter->seq; 183 struct trace_seq *s = &iter->seq;
179 unsigned long long t = ns2usecs(entry->t); 184 unsigned long long t = ns2usecs(iter->ts);
180 unsigned long usec_rem = do_div(t, 1000000ULL); 185 unsigned long usec_rem = do_div(t, 1000000ULL);
181 unsigned secs = (unsigned long)t; 186 unsigned secs = (unsigned long)t;
182 int ret = 1; 187 int ret = 1;
183 188
184 switch (entry->mmiorw.opcode) { 189 trace_assign_type(field, entry);
190 rw = &field->rw;
191
192 switch (rw->opcode) {
185 case MMIO_READ: 193 case MMIO_READ:
186 ret = trace_seq_printf(s, 194 ret = trace_seq_printf(s,
187 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", 195 "R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
@@ -209,21 +217,25 @@ static int mmio_print_rw(struct trace_iterator *iter)
209 break; 217 break;
210 } 218 }
211 if (ret) 219 if (ret)
212 return 1; 220 return TRACE_TYPE_HANDLED;
213 return 0; 221 return TRACE_TYPE_PARTIAL_LINE;
214} 222}
215 223
216static int mmio_print_map(struct trace_iterator *iter) 224static enum print_line_t mmio_print_map(struct trace_iterator *iter)
217{ 225{
218 struct trace_entry *entry = iter->ent; 226 struct trace_entry *entry = iter->ent;
219 struct mmiotrace_map *m = &entry->mmiomap; 227 struct trace_mmiotrace_map *field;
228 struct mmiotrace_map *m;
220 struct trace_seq *s = &iter->seq; 229 struct trace_seq *s = &iter->seq;
221 unsigned long long t = ns2usecs(entry->t); 230 unsigned long long t = ns2usecs(iter->ts);
222 unsigned long usec_rem = do_div(t, 1000000ULL); 231 unsigned long usec_rem = do_div(t, 1000000ULL);
223 unsigned secs = (unsigned long)t; 232 unsigned secs = (unsigned long)t;
224 int ret = 1; 233 int ret;
225 234
226 switch (entry->mmiorw.opcode) { 235 trace_assign_type(field, entry);
236 m = &field->map;
237
238 switch (m->opcode) {
227 case MMIO_PROBE: 239 case MMIO_PROBE:
228 ret = trace_seq_printf(s, 240 ret = trace_seq_printf(s,
229 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", 241 "MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
@@ -241,20 +253,43 @@ static int mmio_print_map(struct trace_iterator *iter)
241 break; 253 break;
242 } 254 }
243 if (ret) 255 if (ret)
244 return 1; 256 return TRACE_TYPE_HANDLED;
245 return 0; 257 return TRACE_TYPE_PARTIAL_LINE;
258}
259
260static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
261{
262 struct trace_entry *entry = iter->ent;
263 struct print_entry *print = (struct print_entry *)entry;
264 const char *msg = print->buf;
265 struct trace_seq *s = &iter->seq;
266 unsigned long long t = ns2usecs(iter->ts);
267 unsigned long usec_rem = do_div(t, 1000000ULL);
268 unsigned secs = (unsigned long)t;
269 int ret;
270
271 /* The trailing newline must be in the message. */
272 ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
273 if (!ret)
274 return TRACE_TYPE_PARTIAL_LINE;
275
276 if (entry->flags & TRACE_FLAG_CONT)
277 trace_seq_print_cont(s, iter);
278
279 return TRACE_TYPE_HANDLED;
246} 280}
247 281
248/* return 0 to abort printing without consuming current entry in pipe mode */ 282static enum print_line_t mmio_print_line(struct trace_iterator *iter)
249static int mmio_print_line(struct trace_iterator *iter)
250{ 283{
251 switch (iter->ent->type) { 284 switch (iter->ent->type) {
252 case TRACE_MMIO_RW: 285 case TRACE_MMIO_RW:
253 return mmio_print_rw(iter); 286 return mmio_print_rw(iter);
254 case TRACE_MMIO_MAP: 287 case TRACE_MMIO_MAP:
255 return mmio_print_map(iter); 288 return mmio_print_map(iter);
289 case TRACE_PRINT:
290 return mmio_print_mark(iter);
256 default: 291 default:
257 return 1; /* ignore unknown entries */ 292 return TRACE_TYPE_HANDLED; /* ignore unknown entries */
258 } 293 }
259} 294}
260 295
@@ -276,6 +311,27 @@ __init static int init_mmio_trace(void)
276} 311}
277device_initcall(init_mmio_trace); 312device_initcall(init_mmio_trace);
278 313
314static void __trace_mmiotrace_rw(struct trace_array *tr,
315 struct trace_array_cpu *data,
316 struct mmiotrace_rw *rw)
317{
318 struct ring_buffer_event *event;
319 struct trace_mmiotrace_rw *entry;
320 unsigned long irq_flags;
321
322 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
323 &irq_flags);
324 if (!event)
325 return;
326 entry = ring_buffer_event_data(event);
327 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
328 entry->ent.type = TRACE_MMIO_RW;
329 entry->rw = *rw;
330 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
331
332 trace_wake_up();
333}
334
279void mmio_trace_rw(struct mmiotrace_rw *rw) 335void mmio_trace_rw(struct mmiotrace_rw *rw)
280{ 336{
281 struct trace_array *tr = mmio_trace_array; 337 struct trace_array *tr = mmio_trace_array;
@@ -283,6 +339,27 @@ void mmio_trace_rw(struct mmiotrace_rw *rw)
283 __trace_mmiotrace_rw(tr, data, rw); 339 __trace_mmiotrace_rw(tr, data, rw);
284} 340}
285 341
342static void __trace_mmiotrace_map(struct trace_array *tr,
343 struct trace_array_cpu *data,
344 struct mmiotrace_map *map)
345{
346 struct ring_buffer_event *event;
347 struct trace_mmiotrace_map *entry;
348 unsigned long irq_flags;
349
350 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
351 &irq_flags);
352 if (!event)
353 return;
354 entry = ring_buffer_event_data(event);
355 tracing_generic_entry_update(&entry->ent, 0, preempt_count());
356 entry->ent.type = TRACE_MMIO_MAP;
357 entry->map = *map;
358 ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
359
360 trace_wake_up();
361}
362
286void mmio_trace_mapping(struct mmiotrace_map *map) 363void mmio_trace_mapping(struct mmiotrace_map *map)
287{ 364{
288 struct trace_array *tr = mmio_trace_array; 365 struct trace_array *tr = mmio_trace_array;
@@ -293,3 +370,8 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
293 __trace_mmiotrace_map(tr, data, map); 370 __trace_mmiotrace_map(tr, data, map);
294 preempt_enable(); 371 preempt_enable();
295} 372}
373
374int mmio_trace_printk(const char *fmt, va_list args)
375{
376 return trace_vprintk(0, fmt, args);
377}
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
new file mode 100644
index 00000000000..4592b486251
--- /dev/null
+++ b/kernel/trace/trace_nop.c
@@ -0,0 +1,64 @@
1/*
2 * nop tracer
3 *
4 * Copyright (C) 2008 Steven Noonan <steven@uplinklabs.net>
5 *
6 */
7
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12
13#include "trace.h"
14
15static struct trace_array *ctx_trace;
16
17static void start_nop_trace(struct trace_array *tr)
18{
19 /* Nothing to do! */
20}
21
22static void stop_nop_trace(struct trace_array *tr)
23{
24 /* Nothing to do! */
25}
26
27static void nop_trace_init(struct trace_array *tr)
28{
29 int cpu;
30 ctx_trace = tr;
31
32 for_each_online_cpu(cpu)
33 tracing_reset(tr, cpu);
34
35 if (tr->ctrl)
36 start_nop_trace(tr);
37}
38
39static void nop_trace_reset(struct trace_array *tr)
40{
41 if (tr->ctrl)
42 stop_nop_trace(tr);
43}
44
45static void nop_trace_ctrl_update(struct trace_array *tr)
46{
47 /* When starting a new trace, reset the buffers */
48 if (tr->ctrl)
49 start_nop_trace(tr);
50 else
51 stop_nop_trace(tr);
52}
53
54struct tracer nop_trace __read_mostly =
55{
56 .name = "nop",
57 .init = nop_trace_init,
58 .reset = nop_trace_reset,
59 .ctrl_update = nop_trace_ctrl_update,
60#ifdef CONFIG_FTRACE_SELFTEST
61 .selftest = trace_selftest_startup_nop,
62#endif
63};
64
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa..b8f56beb1a6 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
11#include <linux/uaccess.h> 11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h> 12#include <linux/ftrace.h>
13#include <trace/sched.h>
14 14
15#include "trace.h" 15#include "trace.h"
16 16
@@ -19,15 +19,16 @@ static int __read_mostly tracer_enabled;
19static atomic_t sched_ref; 19static atomic_t sched_ref;
20 20
21static void 21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev, 22probe_sched_switch(struct rq *__rq, struct task_struct *prev,
23 struct task_struct *next) 23 struct task_struct *next)
24{ 24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data; 25 struct trace_array_cpu *data;
28 unsigned long flags; 26 unsigned long flags;
29 long disabled;
30 int cpu; 27 int cpu;
28 int pc;
29
30 if (!atomic_read(&sched_ref))
31 return;
31 32
32 tracing_record_cmdline(prev); 33 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next); 34 tracing_record_cmdline(next);
@@ -35,97 +36,41 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
35 if (!tracer_enabled) 36 if (!tracer_enabled)
36 return; 37 return;
37 38
39 pc = preempt_count();
38 local_irq_save(flags); 40 local_irq_save(flags);
39 cpu = raw_smp_processor_id(); 41 cpu = raw_smp_processor_id();
40 data = tr->data[cpu]; 42 data = ctx_trace->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42 43
43 if (likely(disabled == 1)) 44 if (likely(!atomic_read(&data->disabled)))
44 tracing_sched_switch_trace(tr, data, prev, next, flags); 45 tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
45 46
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags); 47 local_irq_restore(flags);
48} 48}
49 49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void 50static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct 51probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
78 task_struct *curr)
79{ 52{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data; 53 struct trace_array_cpu *data;
83 unsigned long flags; 54 unsigned long flags;
84 long disabled; 55 int cpu, pc;
85 int cpu;
86 56
87 if (!tracer_enabled) 57 if (!likely(tracer_enabled))
88 return; 58 return;
89 59
90 tracing_record_cmdline(curr); 60 pc = preempt_count();
61 tracing_record_cmdline(current);
91 62
92 local_irq_save(flags); 63 local_irq_save(flags);
93 cpu = raw_smp_processor_id(); 64 cpu = raw_smp_processor_id();
94 data = tr->data[cpu]; 65 data = ctx_trace->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96 66
97 if (likely(disabled == 1)) 67 if (likely(!atomic_read(&data->disabled)))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); 68 tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
69 flags, pc);
99 70
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags); 71 local_irq_restore(flags);
102} 72}
103 73
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr) 74static void sched_switch_reset(struct trace_array *tr)
130{ 75{
131 int cpu; 76 int cpu;
@@ -133,67 +78,47 @@ static void sched_switch_reset(struct trace_array *tr)
133 tr->time_start = ftrace_now(tr->cpu); 78 tr->time_start = ftrace_now(tr->cpu);
134 79
135 for_each_online_cpu(cpu) 80 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]); 81 tracing_reset(tr, cpu);
137} 82}
138 83
139static int tracing_sched_register(void) 84static int tracing_sched_register(void)
140{ 85{
141 int ret; 86 int ret;
142 87
143 ret = marker_probe_register("kernel_sched_wakeup", 88 ret = register_trace_sched_wakeup(probe_sched_wakeup);
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) { 89 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker" 90 pr_info("wakeup trace: Couldn't activate tracepoint"
149 " probe to kernel_sched_wakeup\n"); 91 " probe to kernel_sched_wakeup\n");
150 return ret; 92 return ret;
151 } 93 }
152 94
153 ret = marker_probe_register("kernel_sched_wakeup_new", 95 ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) { 96 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker" 97 pr_info("wakeup trace: Couldn't activate tracepoint"
159 " probe to kernel_sched_wakeup_new\n"); 98 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe; 99 goto fail_deprobe;
161 } 100 }
162 101
163 ret = marker_probe_register("kernel_sched_schedule", 102 ret = register_trace_sched_switch(probe_sched_switch);
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) { 103 if (ret) {
169 pr_info("sched trace: Couldn't add marker" 104 pr_info("sched trace: Couldn't activate tracepoint"
170 " probe to kernel_sched_schedule\n"); 105 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new; 106 goto fail_deprobe_wake_new;
172 } 107 }
173 108
174 return ret; 109 return ret;
175fail_deprobe_wake_new: 110fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new", 111 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe: 112fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup", 113 unregister_trace_sched_wakeup(probe_sched_wakeup);
181 wake_up_callback,
182 &ctx_trace);
183 return ret; 114 return ret;
184} 115}
185 116
186static void tracing_sched_unregister(void) 117static void tracing_sched_unregister(void)
187{ 118{
188 marker_probe_unregister("kernel_sched_schedule", 119 unregister_trace_sched_switch(probe_sched_switch);
189 sched_switch_callback, 120 unregister_trace_sched_wakeup_new(probe_sched_wakeup);
190 &ctx_trace); 121 unregister_trace_sched_wakeup(probe_sched_wakeup);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197} 122}
198 123
199static void tracing_start_sched_switch(void) 124static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c8d61df447..fe4a252c236 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/ftrace.h> 17#include <linux/ftrace.h>
18#include <linux/marker.h> 18#include <trace/sched.h>
19 19
20#include "trace.h" 20#include "trace.h"
21 21
@@ -26,7 +26,8 @@ static struct task_struct *wakeup_task;
26static int wakeup_cpu; 26static int wakeup_cpu;
27static unsigned wakeup_prio = -1; 27static unsigned wakeup_prio = -1;
28 28
29static DEFINE_SPINLOCK(wakeup_lock); 29static raw_spinlock_t wakeup_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
30 31
31static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
32 33
@@ -43,10 +44,12 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
43 long disabled; 44 long disabled;
44 int resched; 45 int resched;
45 int cpu; 46 int cpu;
47 int pc;
46 48
47 if (likely(!wakeup_task)) 49 if (likely(!wakeup_task))
48 return; 50 return;
49 51
52 pc = preempt_count();
50 resched = need_resched(); 53 resched = need_resched();
51 preempt_disable_notrace(); 54 preempt_disable_notrace();
52 55
@@ -56,7 +59,8 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
56 if (unlikely(disabled != 1)) 59 if (unlikely(disabled != 1))
57 goto out; 60 goto out;
58 61
59 spin_lock_irqsave(&wakeup_lock, flags); 62 local_irq_save(flags);
63 __raw_spin_lock(&wakeup_lock);
60 64
61 if (unlikely(!wakeup_task)) 65 if (unlikely(!wakeup_task))
62 goto unlock; 66 goto unlock;
@@ -68,10 +72,11 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
68 if (task_cpu(wakeup_task) != cpu) 72 if (task_cpu(wakeup_task) != cpu)
69 goto unlock; 73 goto unlock;
70 74
71 trace_function(tr, data, ip, parent_ip, flags); 75 trace_function(tr, data, ip, parent_ip, flags, pc);
72 76
73 unlock: 77 unlock:
74 spin_unlock_irqrestore(&wakeup_lock, flags); 78 __raw_spin_unlock(&wakeup_lock);
79 local_irq_restore(flags);
75 80
76 out: 81 out:
77 atomic_dec(&data->disabled); 82 atomic_dec(&data->disabled);
@@ -109,17 +114,18 @@ static int report_latency(cycle_t delta)
109} 114}
110 115
111static void notrace 116static void notrace
112wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 117probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
113 struct task_struct *next) 118 struct task_struct *next)
114{ 119{
115 unsigned long latency = 0, t0 = 0, t1 = 0; 120 unsigned long latency = 0, t0 = 0, t1 = 0;
116 struct trace_array **ptr = private;
117 struct trace_array *tr = *ptr;
118 struct trace_array_cpu *data; 121 struct trace_array_cpu *data;
119 cycle_t T0, T1, delta; 122 cycle_t T0, T1, delta;
120 unsigned long flags; 123 unsigned long flags;
121 long disabled; 124 long disabled;
122 int cpu; 125 int cpu;
126 int pc;
127
128 tracing_record_cmdline(prev);
123 129
124 if (unlikely(!tracer_enabled)) 130 if (unlikely(!tracer_enabled))
125 return; 131 return;
@@ -136,22 +142,25 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
136 if (next != wakeup_task) 142 if (next != wakeup_task)
137 return; 143 return;
138 144
145 pc = preempt_count();
146
139 /* The task we are waiting for is waking up */ 147 /* The task we are waiting for is waking up */
140 data = tr->data[wakeup_cpu]; 148 data = wakeup_trace->data[wakeup_cpu];
141 149
142 /* disable local data, not wakeup_cpu data */ 150 /* disable local data, not wakeup_cpu data */
143 cpu = raw_smp_processor_id(); 151 cpu = raw_smp_processor_id();
144 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 152 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
145 if (likely(disabled != 1)) 153 if (likely(disabled != 1))
146 goto out; 154 goto out;
147 155
148 spin_lock_irqsave(&wakeup_lock, flags); 156 local_irq_save(flags);
157 __raw_spin_lock(&wakeup_lock);
149 158
150 /* We could race with grabbing wakeup_lock */ 159 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task)) 160 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock; 161 goto out_unlock;
153 162
154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 163 trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
155 164
156 /* 165 /*
157 * usecs conversion is slow so we try to delay the conversion 166 * usecs conversion is slow so we try to delay the conversion
@@ -170,38 +179,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
170 t0 = nsecs_to_usecs(T0); 179 t0 = nsecs_to_usecs(T0);
171 t1 = nsecs_to_usecs(T1); 180 t1 = nsecs_to_usecs(T1);
172 181
173 update_max_tr(tr, wakeup_task, wakeup_cpu); 182 update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
174 183
175out_unlock: 184out_unlock:
176 __wakeup_reset(tr); 185 __wakeup_reset(wakeup_trace);
177 spin_unlock_irqrestore(&wakeup_lock, flags); 186 __raw_spin_unlock(&wakeup_lock);
187 local_irq_restore(flags);
178out: 188out:
179 atomic_dec(&tr->data[cpu]->disabled); 189 atomic_dec(&wakeup_trace->data[cpu]->disabled);
180}
181
182static notrace void
183sched_switch_callback(void *probe_data, void *call_data,
184 const char *format, va_list *args)
185{
186 struct task_struct *prev;
187 struct task_struct *next;
188 struct rq *__rq;
189
190 /* skip prev_pid %d next_pid %d prev_state %ld */
191 (void)va_arg(*args, int);
192 (void)va_arg(*args, int);
193 (void)va_arg(*args, long);
194 __rq = va_arg(*args, typeof(__rq));
195 prev = va_arg(*args, typeof(prev));
196 next = va_arg(*args, typeof(next));
197
198 tracing_record_cmdline(prev);
199
200 /*
201 * If tracer_switch_func only points to the local
202 * switch func, it still needs the ptr passed to it.
203 */
204 wakeup_sched_switch(probe_data, __rq, prev, next);
205} 190}
206 191
207static void __wakeup_reset(struct trace_array *tr) 192static void __wakeup_reset(struct trace_array *tr)
@@ -209,11 +194,9 @@ static void __wakeup_reset(struct trace_array *tr)
209 struct trace_array_cpu *data; 194 struct trace_array_cpu *data;
210 int cpu; 195 int cpu;
211 196
212 assert_spin_locked(&wakeup_lock);
213
214 for_each_possible_cpu(cpu) { 197 for_each_possible_cpu(cpu) {
215 data = tr->data[cpu]; 198 data = tr->data[cpu];
216 tracing_reset(data); 199 tracing_reset(tr, cpu);
217 } 200 }
218 201
219 wakeup_cpu = -1; 202 wakeup_cpu = -1;
@@ -229,37 +212,46 @@ static void wakeup_reset(struct trace_array *tr)
229{ 212{
230 unsigned long flags; 213 unsigned long flags;
231 214
232 spin_lock_irqsave(&wakeup_lock, flags); 215 local_irq_save(flags);
216 __raw_spin_lock(&wakeup_lock);
233 __wakeup_reset(tr); 217 __wakeup_reset(tr);
234 spin_unlock_irqrestore(&wakeup_lock, flags); 218 __raw_spin_unlock(&wakeup_lock);
219 local_irq_restore(flags);
235} 220}
236 221
237static void 222static void
238wakeup_check_start(struct trace_array *tr, struct task_struct *p, 223probe_wakeup(struct rq *rq, struct task_struct *p)
239 struct task_struct *curr)
240{ 224{
241 int cpu = smp_processor_id(); 225 int cpu = smp_processor_id();
242 unsigned long flags; 226 unsigned long flags;
243 long disabled; 227 long disabled;
228 int pc;
229
230 if (likely(!tracer_enabled))
231 return;
232
233 tracing_record_cmdline(p);
234 tracing_record_cmdline(current);
244 235
245 if (likely(!rt_task(p)) || 236 if (likely(!rt_task(p)) ||
246 p->prio >= wakeup_prio || 237 p->prio >= wakeup_prio ||
247 p->prio >= curr->prio) 238 p->prio >= current->prio)
248 return; 239 return;
249 240
250 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 241 pc = preempt_count();
242 disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
251 if (unlikely(disabled != 1)) 243 if (unlikely(disabled != 1))
252 goto out; 244 goto out;
253 245
254 /* interrupts should be off from try_to_wake_up */ 246 /* interrupts should be off from try_to_wake_up */
255 spin_lock(&wakeup_lock); 247 __raw_spin_lock(&wakeup_lock);
256 248
257 /* check for races. */ 249 /* check for races. */
258 if (!tracer_enabled || p->prio >= wakeup_prio) 250 if (!tracer_enabled || p->prio >= wakeup_prio)
259 goto out_locked; 251 goto out_locked;
260 252
261 /* reset the trace */ 253 /* reset the trace */
262 __wakeup_reset(tr); 254 __wakeup_reset(wakeup_trace);
263 255
264 wakeup_cpu = task_cpu(p); 256 wakeup_cpu = task_cpu(p);
265 wakeup_prio = p->prio; 257 wakeup_prio = p->prio;
@@ -269,74 +261,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
269 261
270 local_save_flags(flags); 262 local_save_flags(flags);
271 263
272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 264 wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
273 trace_function(tr, tr->data[wakeup_cpu], 265 trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
274 CALLER_ADDR1, CALLER_ADDR2, flags); 266 CALLER_ADDR1, CALLER_ADDR2, flags, pc);
275 267
276out_locked: 268out_locked:
277 spin_unlock(&wakeup_lock); 269 __raw_spin_unlock(&wakeup_lock);
278out: 270out:
279 atomic_dec(&tr->data[cpu]->disabled); 271 atomic_dec(&wakeup_trace->data[cpu]->disabled);
280}
281
282static notrace void
283wake_up_callback(void *probe_data, void *call_data,
284 const char *format, va_list *args)
285{
286 struct trace_array **ptr = probe_data;
287 struct trace_array *tr = *ptr;
288 struct task_struct *curr;
289 struct task_struct *task;
290 struct rq *__rq;
291
292 if (likely(!tracer_enabled))
293 return;
294
295 /* Skip pid %d state %ld */
296 (void)va_arg(*args, int);
297 (void)va_arg(*args, long);
298 /* now get the meat: "rq %p task %p rq->curr %p" */
299 __rq = va_arg(*args, typeof(__rq));
300 task = va_arg(*args, typeof(task));
301 curr = va_arg(*args, typeof(curr));
302
303 tracing_record_cmdline(task);
304 tracing_record_cmdline(curr);
305
306 wakeup_check_start(tr, task, curr);
307} 272}
308 273
309static void start_wakeup_tracer(struct trace_array *tr) 274static void start_wakeup_tracer(struct trace_array *tr)
310{ 275{
311 int ret; 276 int ret;
312 277
313 ret = marker_probe_register("kernel_sched_wakeup", 278 ret = register_trace_sched_wakeup(probe_wakeup);
314 "pid %d state %ld ## rq %p task %p rq->curr %p",
315 wake_up_callback,
316 &wakeup_trace);
317 if (ret) { 279 if (ret) {
318 pr_info("wakeup trace: Couldn't add marker" 280 pr_info("wakeup trace: Couldn't activate tracepoint"
319 " probe to kernel_sched_wakeup\n"); 281 " probe to kernel_sched_wakeup\n");
320 return; 282 return;
321 } 283 }
322 284
323 ret = marker_probe_register("kernel_sched_wakeup_new", 285 ret = register_trace_sched_wakeup_new(probe_wakeup);
324 "pid %d state %ld ## rq %p task %p rq->curr %p",
325 wake_up_callback,
326 &wakeup_trace);
327 if (ret) { 286 if (ret) {
328 pr_info("wakeup trace: Couldn't add marker" 287 pr_info("wakeup trace: Couldn't activate tracepoint"
329 " probe to kernel_sched_wakeup_new\n"); 288 " probe to kernel_sched_wakeup_new\n");
330 goto fail_deprobe; 289 goto fail_deprobe;
331 } 290 }
332 291
333 ret = marker_probe_register("kernel_sched_schedule", 292 ret = register_trace_sched_switch(probe_wakeup_sched_switch);
334 "prev_pid %d next_pid %d prev_state %ld "
335 "## rq %p prev %p next %p",
336 sched_switch_callback,
337 &wakeup_trace);
338 if (ret) { 293 if (ret) {
339 pr_info("sched trace: Couldn't add marker" 294 pr_info("sched trace: Couldn't activate tracepoint"
340 " probe to kernel_sched_schedule\n"); 295 " probe to kernel_sched_schedule\n");
341 goto fail_deprobe_wake_new; 296 goto fail_deprobe_wake_new;
342 } 297 }
@@ -358,28 +313,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
358 313
359 return; 314 return;
360fail_deprobe_wake_new: 315fail_deprobe_wake_new:
361 marker_probe_unregister("kernel_sched_wakeup_new", 316 unregister_trace_sched_wakeup_new(probe_wakeup);
362 wake_up_callback,
363 &wakeup_trace);
364fail_deprobe: 317fail_deprobe:
365 marker_probe_unregister("kernel_sched_wakeup", 318 unregister_trace_sched_wakeup(probe_wakeup);
366 wake_up_callback,
367 &wakeup_trace);
368} 319}
369 320
370static void stop_wakeup_tracer(struct trace_array *tr) 321static void stop_wakeup_tracer(struct trace_array *tr)
371{ 322{
372 tracer_enabled = 0; 323 tracer_enabled = 0;
373 unregister_ftrace_function(&trace_ops); 324 unregister_ftrace_function(&trace_ops);
374 marker_probe_unregister("kernel_sched_schedule", 325 unregister_trace_sched_switch(probe_wakeup_sched_switch);
375 sched_switch_callback, 326 unregister_trace_sched_wakeup_new(probe_wakeup);
376 &wakeup_trace); 327 unregister_trace_sched_wakeup(probe_wakeup);
377 marker_probe_unregister("kernel_sched_wakeup_new",
378 wake_up_callback,
379 &wakeup_trace);
380 marker_probe_unregister("kernel_sched_wakeup",
381 wake_up_callback,
382 &wakeup_trace);
383} 328}
384 329
385static void wakeup_tracer_init(struct trace_array *tr) 330static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 0911b7e073b..09cf230d7ec 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -9,65 +9,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
9 case TRACE_FN: 9 case TRACE_FN:
10 case TRACE_CTX: 10 case TRACE_CTX:
11 case TRACE_WAKE: 11 case TRACE_WAKE:
12 case TRACE_CONT:
12 case TRACE_STACK: 13 case TRACE_STACK:
14 case TRACE_PRINT:
13 case TRACE_SPECIAL: 15 case TRACE_SPECIAL:
14 return 1; 16 return 1;
15 } 17 }
16 return 0; 18 return 0;
17} 19}
18 20
19static int 21static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{ 22{
22 struct trace_entry *entries; 23 struct ring_buffer_event *event;
23 struct page *page; 24 struct trace_entry *entry;
24 int idx = 0;
25 int i;
26 25
27 BUG_ON(list_empty(&data->trace_pages)); 26 while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
28 page = list_entry(data->trace_pages.next, struct page, lru); 27 entry = ring_buffer_event_data(event);
29 entries = page_address(page);
30 28
31 check_pages(data); 29 if (!trace_valid_entry(entry)) {
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ", 30 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type); 31 entry->type);
46 goto failed; 32 goto failed;
47 } 33 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 } 34 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0; 35 return 0;
72 36
73 failed: 37 failed:
@@ -89,13 +53,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
89 /* Don't allow flipping of max traces now */ 53 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags); 54 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock); 55 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95 56
96 cnt += tr->data[cpu]->trace_idx; 57 cnt = ring_buffer_entries(tr->buffer);
97 58
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]); 59 for_each_possible_cpu(cpu) {
60 ret = trace_test_buffer_cpu(tr, cpu);
99 if (ret) 61 if (ret)
100 break; 62 break;
101 } 63 }
@@ -120,11 +82,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr, 82 struct trace_array *tr,
121 int (*func)(void)) 83 int (*func)(void))
122{ 84{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled; 85 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled; 86 int save_tracer_enabled = tracer_enabled;
87 unsigned long count;
127 char *func_name; 88 char *func_name;
89 int ret;
128 90
129 /* The ftrace test PASSED */ 91 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n"); 92 printk(KERN_CONT "PASSED\n");
@@ -157,6 +119,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
157 /* enable tracing */ 119 /* enable tracing */
158 tr->ctrl = 1; 120 tr->ctrl = 1;
159 trace->init(tr); 121 trace->init(tr);
122
160 /* Sleep for a 1/10 of a second */ 123 /* Sleep for a 1/10 of a second */
161 msleep(100); 124 msleep(100);
162 125
@@ -212,10 +175,10 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
212int 175int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) 176trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{ 177{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled; 178 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled; 179 int save_tracer_enabled = tracer_enabled;
180 unsigned long count;
181 int ret;
219 182
220 /* make sure msleep has been recorded */ 183 /* make sure msleep has been recorded */
221 msleep(1); 184 msleep(1);
@@ -415,6 +378,15 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
415} 378}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */ 379#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417 380
381#ifdef CONFIG_NOP_TRACER
382int
383trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr)
384{
385 /* What could possibly go wrong? */
386 return 0;
387}
388#endif
389
418#ifdef CONFIG_SCHED_TRACER 390#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data) 391static int trace_wakeup_test_thread(void *data)
420{ 392{
@@ -486,6 +458,9 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
486 458
487 wake_up_process(p); 459 wake_up_process(p);
488 460
461 /* give a little time to let the thread wake up */
462 msleep(100);
463
489 /* stop the tracing. */ 464 /* stop the tracing. */
490 tr->ctrl = 0; 465 tr->ctrl = 0;
491 trace->ctrl_update(tr); 466 trace->ctrl_update(tr);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
new file mode 100644
index 00000000000..74c5d9a3afa
--- /dev/null
+++ b/kernel/trace/trace_stack.c
@@ -0,0 +1,310 @@
1/*
2 * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
3 *
4 */
5#include <linux/stacktrace.h>
6#include <linux/kallsyms.h>
7#include <linux/seq_file.h>
8#include <linux/spinlock.h>
9#include <linux/uaccess.h>
10#include <linux/debugfs.h>
11#include <linux/ftrace.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/fs.h>
15#include "trace.h"
16
17#define STACK_TRACE_ENTRIES 500
18
19static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
20 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
21static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
22
23static struct stack_trace max_stack_trace = {
24 .max_entries = STACK_TRACE_ENTRIES,
25 .entries = stack_dump_trace,
26};
27
28static unsigned long max_stack_size;
29static raw_spinlock_t max_stack_lock =
30 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31
32static int stack_trace_disabled __read_mostly;
33static DEFINE_PER_CPU(int, trace_active);
34
35static inline void check_stack(void)
36{
37 unsigned long this_size, flags;
38 unsigned long *p, *top, *start;
39 int i;
40
41 this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
42 this_size = THREAD_SIZE - this_size;
43
44 if (this_size <= max_stack_size)
45 return;
46
47 raw_local_irq_save(flags);
48 __raw_spin_lock(&max_stack_lock);
49
50 /* a race could have already updated it */
51 if (this_size <= max_stack_size)
52 goto out;
53
54 max_stack_size = this_size;
55
56 max_stack_trace.nr_entries = 0;
57 max_stack_trace.skip = 3;
58
59 save_stack_trace(&max_stack_trace);
60
61 /*
62 * Now find where in the stack these are.
63 */
64 i = 0;
65 start = &this_size;
66 top = (unsigned long *)
67 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
68
69 /*
70 * Loop through all the entries. One of the entries may
71 * for some reason be missed on the stack, so we may
72 * have to account for them. If they are all there, this
73 * loop will only happen once. This code only takes place
74 * on a new max, so it is far from a fast path.
75 */
76 while (i < max_stack_trace.nr_entries) {
77
78 stack_dump_index[i] = this_size;
79 p = start;
80
81 for (; p < top && i < max_stack_trace.nr_entries; p++) {
82 if (*p == stack_dump_trace[i]) {
83 this_size = stack_dump_index[i++] =
84 (top - p) * sizeof(unsigned long);
85 /* Start the search from here */
86 start = p + 1;
87 }
88 }
89
90 i++;
91 }
92
93 out:
94 __raw_spin_unlock(&max_stack_lock);
95 raw_local_irq_restore(flags);
96}
97
98static void
99stack_trace_call(unsigned long ip, unsigned long parent_ip)
100{
101 int cpu, resched;
102
103 if (unlikely(!ftrace_enabled || stack_trace_disabled))
104 return;
105
106 resched = need_resched();
107 preempt_disable_notrace();
108
109 cpu = raw_smp_processor_id();
110 /* no atomic needed, we only modify this variable by this cpu */
111 if (per_cpu(trace_active, cpu)++ != 0)
112 goto out;
113
114 check_stack();
115
116 out:
117 per_cpu(trace_active, cpu)--;
118 /* prevent recursion in schedule */
119 if (resched)
120 preempt_enable_no_resched_notrace();
121 else
122 preempt_enable_notrace();
123}
124
125static struct ftrace_ops trace_ops __read_mostly =
126{
127 .func = stack_trace_call,
128};
129
130static ssize_t
131stack_max_size_read(struct file *filp, char __user *ubuf,
132 size_t count, loff_t *ppos)
133{
134 unsigned long *ptr = filp->private_data;
135 char buf[64];
136 int r;
137
138 r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
139 if (r > sizeof(buf))
140 r = sizeof(buf);
141 return simple_read_from_buffer(ubuf, count, ppos, buf, r);
142}
143
144static ssize_t
145stack_max_size_write(struct file *filp, const char __user *ubuf,
146 size_t count, loff_t *ppos)
147{
148 long *ptr = filp->private_data;
149 unsigned long val, flags;
150 char buf[64];
151 int ret;
152
153 if (count >= sizeof(buf))
154 return -EINVAL;
155
156 if (copy_from_user(&buf, ubuf, count))
157 return -EFAULT;
158
159 buf[count] = 0;
160
161 ret = strict_strtoul(buf, 10, &val);
162 if (ret < 0)
163 return ret;
164
165 raw_local_irq_save(flags);
166 __raw_spin_lock(&max_stack_lock);
167 *ptr = val;
168 __raw_spin_unlock(&max_stack_lock);
169 raw_local_irq_restore(flags);
170
171 return count;
172}
173
174static struct file_operations stack_max_size_fops = {
175 .open = tracing_open_generic,
176 .read = stack_max_size_read,
177 .write = stack_max_size_write,
178};
179
180static void *
181t_next(struct seq_file *m, void *v, loff_t *pos)
182{
183 long i = (long)m->private;
184
185 (*pos)++;
186
187 i++;
188
189 if (i >= max_stack_trace.nr_entries ||
190 stack_dump_trace[i] == ULONG_MAX)
191 return NULL;
192
193 m->private = (void *)i;
194
195 return &m->private;
196}
197
198static void *t_start(struct seq_file *m, loff_t *pos)
199{
200 void *t = &m->private;
201 loff_t l = 0;
202
203 local_irq_disable();
204 __raw_spin_lock(&max_stack_lock);
205
206 for (; t && l < *pos; t = t_next(m, t, &l))
207 ;
208
209 return t;
210}
211
212static void t_stop(struct seq_file *m, void *p)
213{
214 __raw_spin_unlock(&max_stack_lock);
215 local_irq_enable();
216}
217
218static int trace_lookup_stack(struct seq_file *m, long i)
219{
220 unsigned long addr = stack_dump_trace[i];
221#ifdef CONFIG_KALLSYMS
222 char str[KSYM_SYMBOL_LEN];
223
224 sprint_symbol(str, addr);
225
226 return seq_printf(m, "%s\n", str);
227#else
228 return seq_printf(m, "%p\n", (void*)addr);
229#endif
230}
231
232static int t_show(struct seq_file *m, void *v)
233{
234 long i = *(long *)v;
235 int size;
236
237 if (i < 0) {
238 seq_printf(m, " Depth Size Location"
239 " (%d entries)\n"
240 " ----- ---- --------\n",
241 max_stack_trace.nr_entries);
242 return 0;
243 }
244
245 if (i >= max_stack_trace.nr_entries ||
246 stack_dump_trace[i] == ULONG_MAX)
247 return 0;
248
249 if (i+1 == max_stack_trace.nr_entries ||
250 stack_dump_trace[i+1] == ULONG_MAX)
251 size = stack_dump_index[i];
252 else
253 size = stack_dump_index[i] - stack_dump_index[i+1];
254
255 seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size);
256
257 trace_lookup_stack(m, i);
258
259 return 0;
260}
261
262static struct seq_operations stack_trace_seq_ops = {
263 .start = t_start,
264 .next = t_next,
265 .stop = t_stop,
266 .show = t_show,
267};
268
269static int stack_trace_open(struct inode *inode, struct file *file)
270{
271 int ret;
272
273 ret = seq_open(file, &stack_trace_seq_ops);
274 if (!ret) {
275 struct seq_file *m = file->private_data;
276 m->private = (void *)-1;
277 }
278
279 return ret;
280}
281
282static struct file_operations stack_trace_fops = {
283 .open = stack_trace_open,
284 .read = seq_read,
285 .llseek = seq_lseek,
286};
287
288static __init int stack_trace_init(void)
289{
290 struct dentry *d_tracer;
291 struct dentry *entry;
292
293 d_tracer = tracing_init_dentry();
294
295 entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
296 &max_stack_size, &stack_max_size_fops);
297 if (!entry)
298 pr_warning("Could not create debugfs 'stack_max_size' entry\n");
299
300 entry = debugfs_create_file("stack_trace", 0444, d_tracer,
301 NULL, &stack_trace_fops);
302 if (!entry)
303 pr_warning("Could not create debugfs 'stack_trace' entry\n");
304
305 register_ftrace_function(&trace_ops);
306
307 return 0;
308}
309
310device_initcall(stack_trace_init);
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 2301e1e7c60..9587d3bcba5 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -161,7 +161,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
161 __trace_special(tr, data, 2, regs->ip, 0); 161 __trace_special(tr, data, 2, regs->ip, 0);
162 162
163 while (i < sample_max_depth) { 163 while (i < sample_max_depth) {
164 frame.next_fp = 0; 164 frame.next_fp = NULL;
165 frame.return_address = 0; 165 frame.return_address = 0;
166 if (!copy_stack_frame(fp, &frame)) 166 if (!copy_stack_frame(fp, &frame))
167 break; 167 break;
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
202 202
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; 205 hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
206 206
207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 207 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
208} 208}
@@ -241,7 +241,7 @@ static void stack_reset(struct trace_array *tr)
241 tr->time_start = ftrace_now(tr->cpu); 241 tr->time_start = ftrace_now(tr->cpu);
242 242
243 for_each_online_cpu(cpu) 243 for_each_online_cpu(cpu)
244 tracing_reset(tr->data[cpu]); 244 tracing_reset(tr, cpu);
245} 245}
246 246
247static void start_stack_trace(struct trace_array *tr) 247static void start_stack_trace(struct trace_array *tr)