aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile14
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/marker.c30
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/sched.c57
-rw-r--r--kernel/semaphore.c1
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/trace/Kconfig127
-rw-r--r--kernel/trace/Makefile22
-rw-r--r--kernel/trace/ftrace.c1710
-rw-r--r--kernel/trace/trace.c3100
-rw-r--r--kernel/trace/trace.h313
-rw-r--r--kernel/trace/trace_functions.c78
-rw-r--r--kernel/trace/trace_irqsoff.c486
-rw-r--r--kernel/trace/trace_sched_switch.c286
-rw-r--r--kernel/trace/trace_sched_wakeup.c447
-rw-r--r--kernel/trace/trace_selftest.c540
-rw-r--r--kernel/trace/trace_selftest_dynamic.c7
20 files changed, 7240 insertions, 28 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 1c9938addb9d..ca2433e84873 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,18 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -pg -mno-spe
15
16ifdef CONFIG_FTRACE
17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg
20CFLAGS_REMOVE_mutex-debug.o = -pg
21CFLAGS_REMOVE_rtmutex-debug.o = -pg
22CFLAGS_REMOVE_cgroup-debug.o = -pg
23CFLAGS_REMOVE_sched_clock.o = -pg
24endif
25
14obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o 26obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
15obj-$(CONFIG_STACKTRACE) += stacktrace.o 27obj-$(CONFIG_STACKTRACE) += stacktrace.o
16obj-y += time/ 28obj-y += time/
@@ -69,6 +81,8 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
69obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o 81obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
70obj-$(CONFIG_MARKERS) += marker.o 82obj-$(CONFIG_MARKERS) += marker.o
71obj-$(CONFIG_LATENCYTOP) += latencytop.o 83obj-$(CONFIG_LATENCYTOP) += latencytop.o
84obj-$(CONFIG_FTRACE) += trace/
85obj-$(CONFIG_TRACING) += trace/
72 86
73ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 87ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
74# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 88# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/fork.c b/kernel/fork.c
index 19908b26cf80..d66d676dc362 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -909,7 +909,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
909 909
910 rt_mutex_init_task(p); 910 rt_mutex_init_task(p);
911 911
912#ifdef CONFIG_TRACE_IRQFLAGS 912#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_LOCKDEP)
913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); 913 DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 914 DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
915#endif 915#endif
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81a4e4a3f087..65548eff029e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -39,6 +39,7 @@
39#include <linux/irqflags.h> 39#include <linux/irqflags.h>
40#include <linux/utsname.h> 40#include <linux/utsname.h>
41#include <linux/hash.h> 41#include <linux/hash.h>
42#include <linux/ftrace.h>
42 43
43#include <asm/sections.h> 44#include <asm/sections.h>
44 45
@@ -81,6 +82,8 @@ static int graph_lock(void)
81 __raw_spin_unlock(&lockdep_lock); 82 __raw_spin_unlock(&lockdep_lock);
82 return 0; 83 return 0;
83 } 84 }
85 /* prevent any recursions within lockdep from causing deadlocks */
86 current->lockdep_recursion++;
84 return 1; 87 return 1;
85} 88}
86 89
@@ -89,6 +92,7 @@ static inline int graph_unlock(void)
89 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock)) 92 if (debug_locks && !__raw_spin_is_locked(&lockdep_lock))
90 return DEBUG_LOCKS_WARN_ON(1); 93 return DEBUG_LOCKS_WARN_ON(1);
91 94
95 current->lockdep_recursion--;
92 __raw_spin_unlock(&lockdep_lock); 96 __raw_spin_unlock(&lockdep_lock);
93 return 0; 97 return 0;
94} 98}
@@ -982,7 +986,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
982 return 1; 986 return 1;
983} 987}
984 988
985#ifdef CONFIG_TRACE_IRQFLAGS 989#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
986/* 990/*
987 * Forwards and backwards subgraph searching, for the purposes of 991 * Forwards and backwards subgraph searching, for the purposes of
988 * proving that two subgraphs can be connected by a new dependency 992 * proving that two subgraphs can be connected by a new dependency
@@ -1680,7 +1684,7 @@ valid_state(struct task_struct *curr, struct held_lock *this,
1680static int mark_lock(struct task_struct *curr, struct held_lock *this, 1684static int mark_lock(struct task_struct *curr, struct held_lock *this,
1681 enum lock_usage_bit new_bit); 1685 enum lock_usage_bit new_bit);
1682 1686
1683#ifdef CONFIG_TRACE_IRQFLAGS 1687#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1684 1688
1685/* 1689/*
1686 * print irq inversion bug: 1690 * print irq inversion bug:
@@ -2013,11 +2017,13 @@ void early_boot_irqs_on(void)
2013/* 2017/*
2014 * Hardirqs will be enabled: 2018 * Hardirqs will be enabled:
2015 */ 2019 */
2016void trace_hardirqs_on(void) 2020void trace_hardirqs_on_caller(unsigned long a0)
2017{ 2021{
2018 struct task_struct *curr = current; 2022 struct task_struct *curr = current;
2019 unsigned long ip; 2023 unsigned long ip;
2020 2024
2025 time_hardirqs_on(CALLER_ADDR0, a0);
2026
2021 if (unlikely(!debug_locks || current->lockdep_recursion)) 2027 if (unlikely(!debug_locks || current->lockdep_recursion))
2022 return; 2028 return;
2023 2029
@@ -2055,16 +2061,23 @@ void trace_hardirqs_on(void)
2055 curr->hardirq_enable_event = ++curr->irq_events; 2061 curr->hardirq_enable_event = ++curr->irq_events;
2056 debug_atomic_inc(&hardirqs_on_events); 2062 debug_atomic_inc(&hardirqs_on_events);
2057} 2063}
2064EXPORT_SYMBOL(trace_hardirqs_on_caller);
2058 2065
2066void trace_hardirqs_on(void)
2067{
2068 trace_hardirqs_on_caller(CALLER_ADDR0);
2069}
2059EXPORT_SYMBOL(trace_hardirqs_on); 2070EXPORT_SYMBOL(trace_hardirqs_on);
2060 2071
2061/* 2072/*
2062 * Hardirqs were disabled: 2073 * Hardirqs were disabled:
2063 */ 2074 */
2064void trace_hardirqs_off(void) 2075void trace_hardirqs_off_caller(unsigned long a0)
2065{ 2076{
2066 struct task_struct *curr = current; 2077 struct task_struct *curr = current;
2067 2078
2079 time_hardirqs_off(CALLER_ADDR0, a0);
2080
2068 if (unlikely(!debug_locks || current->lockdep_recursion)) 2081 if (unlikely(!debug_locks || current->lockdep_recursion))
2069 return; 2082 return;
2070 2083
@@ -2082,7 +2095,12 @@ void trace_hardirqs_off(void)
2082 } else 2095 } else
2083 debug_atomic_inc(&redundant_hardirqs_off); 2096 debug_atomic_inc(&redundant_hardirqs_off);
2084} 2097}
2098EXPORT_SYMBOL(trace_hardirqs_off_caller);
2085 2099
2100void trace_hardirqs_off(void)
2101{
2102 trace_hardirqs_off_caller(CALLER_ADDR0);
2103}
2086EXPORT_SYMBOL(trace_hardirqs_off); 2104EXPORT_SYMBOL(trace_hardirqs_off);
2087 2105
2088/* 2106/*
@@ -2246,7 +2264,7 @@ static inline int separate_irq_context(struct task_struct *curr,
2246 * Mark a lock with a usage bit, and validate the state transition: 2264 * Mark a lock with a usage bit, and validate the state transition:
2247 */ 2265 */
2248static int mark_lock(struct task_struct *curr, struct held_lock *this, 2266static int mark_lock(struct task_struct *curr, struct held_lock *this,
2249 enum lock_usage_bit new_bit) 2267 enum lock_usage_bit new_bit)
2250{ 2268{
2251 unsigned int new_mask = 1 << new_bit, ret = 1; 2269 unsigned int new_mask = 1 << new_bit, ret = 1;
2252 2270
@@ -2686,7 +2704,7 @@ static void check_flags(unsigned long flags)
2686 * and also avoid lockdep recursion: 2704 * and also avoid lockdep recursion:
2687 */ 2705 */
2688void lock_acquire(struct lockdep_map *lock, unsigned int subclass, 2706void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2689 int trylock, int read, int check, unsigned long ip) 2707 int trylock, int read, int check, unsigned long ip)
2690{ 2708{
2691 unsigned long flags; 2709 unsigned long flags;
2692 2710
@@ -2708,7 +2726,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2708 2726
2709EXPORT_SYMBOL_GPL(lock_acquire); 2727EXPORT_SYMBOL_GPL(lock_acquire);
2710 2728
2711void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) 2729void lock_release(struct lockdep_map *lock, int nested,
2730 unsigned long ip)
2712{ 2731{
2713 unsigned long flags; 2732 unsigned long flags;
2714 2733
diff --git a/kernel/marker.c b/kernel/marker.c
index b5a9fe1d50d5..1abfb923b761 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -55,8 +55,8 @@ static DEFINE_MUTEX(markers_mutex);
55struct marker_entry { 55struct marker_entry {
56 struct hlist_node hlist; 56 struct hlist_node hlist;
57 char *format; 57 char *format;
58 void (*call)(const struct marker *mdata, /* Probe wrapper */ 58 /* Probe wrapper */
59 void *call_private, const char *fmt, ...); 59 void (*call)(const struct marker *mdata, void *call_private, ...);
60 struct marker_probe_closure single; 60 struct marker_probe_closure single;
61 struct marker_probe_closure *multi; 61 struct marker_probe_closure *multi;
62 int refcount; /* Number of times armed. 0 if disarmed. */ 62 int refcount; /* Number of times armed. 0 if disarmed. */
@@ -91,15 +91,13 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
91 * marker_probe_cb Callback that prepares the variable argument list for probes. 91 * marker_probe_cb Callback that prepares the variable argument list for probes.
92 * @mdata: pointer of type struct marker 92 * @mdata: pointer of type struct marker
93 * @call_private: caller site private data 93 * @call_private: caller site private data
94 * @fmt: format string
95 * @...: Variable argument list. 94 * @...: Variable argument list.
96 * 95 *
97 * Since we do not use "typical" pointer based RCU in the 1 argument case, we 96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
98 * need to put a full smp_rmb() in this branch. This is why we do not use 97 * need to put a full smp_rmb() in this branch. This is why we do not use
99 * rcu_dereference() for the pointer read. 98 * rcu_dereference() for the pointer read.
100 */ 99 */
101void marker_probe_cb(const struct marker *mdata, void *call_private, 100void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
102 const char *fmt, ...)
103{ 101{
104 va_list args; 102 va_list args;
105 char ptype; 103 char ptype;
@@ -120,8 +118,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
120 /* Must read the ptr before private data. They are not data 118 /* Must read the ptr before private data. They are not data
121 * dependant, so we put an explicit smp_rmb() here. */ 119 * dependant, so we put an explicit smp_rmb() here. */
122 smp_rmb(); 120 smp_rmb();
123 va_start(args, fmt); 121 va_start(args, call_private);
124 func(mdata->single.probe_private, call_private, fmt, &args); 122 func(mdata->single.probe_private, call_private, mdata->format,
123 &args);
125 va_end(args); 124 va_end(args);
126 } else { 125 } else {
127 struct marker_probe_closure *multi; 126 struct marker_probe_closure *multi;
@@ -136,9 +135,9 @@ void marker_probe_cb(const struct marker *mdata, void *call_private,
136 smp_read_barrier_depends(); 135 smp_read_barrier_depends();
137 multi = mdata->multi; 136 multi = mdata->multi;
138 for (i = 0; multi[i].func; i++) { 137 for (i = 0; multi[i].func; i++) {
139 va_start(args, fmt); 138 va_start(args, call_private);
140 multi[i].func(multi[i].probe_private, call_private, fmt, 139 multi[i].func(multi[i].probe_private, call_private,
141 &args); 140 mdata->format, &args);
142 va_end(args); 141 va_end(args);
143 } 142 }
144 } 143 }
@@ -150,13 +149,11 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
150 * marker_probe_cb Callback that does not prepare the variable argument list. 149 * marker_probe_cb Callback that does not prepare the variable argument list.
151 * @mdata: pointer of type struct marker 150 * @mdata: pointer of type struct marker
152 * @call_private: caller site private data 151 * @call_private: caller site private data
153 * @fmt: format string
154 * @...: Variable argument list. 152 * @...: Variable argument list.
155 * 153 *
156 * Should be connected to markers "MARK_NOARGS". 154 * Should be connected to markers "MARK_NOARGS".
157 */ 155 */
158void marker_probe_cb_noarg(const struct marker *mdata, 156void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
159 void *call_private, const char *fmt, ...)
160{ 157{
161 va_list args; /* not initialized */ 158 va_list args; /* not initialized */
162 char ptype; 159 char ptype;
@@ -172,7 +169,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
172 /* Must read the ptr before private data. They are not data 169 /* Must read the ptr before private data. They are not data
173 * dependant, so we put an explicit smp_rmb() here. */ 170 * dependant, so we put an explicit smp_rmb() here. */
174 smp_rmb(); 171 smp_rmb();
175 func(mdata->single.probe_private, call_private, fmt, &args); 172 func(mdata->single.probe_private, call_private, mdata->format,
173 &args);
176 } else { 174 } else {
177 struct marker_probe_closure *multi; 175 struct marker_probe_closure *multi;
178 int i; 176 int i;
@@ -186,8 +184,8 @@ void marker_probe_cb_noarg(const struct marker *mdata,
186 smp_read_barrier_depends(); 184 smp_read_barrier_depends();
187 multi = mdata->multi; 185 multi = mdata->multi;
188 for (i = 0; multi[i].func; i++) 186 for (i = 0; multi[i].func; i++)
189 multi[i].func(multi[i].probe_private, call_private, fmt, 187 multi[i].func(multi[i].probe_private, call_private,
190 &args); 188 mdata->format, &args);
191 } 189 }
192 preempt_enable(); 190 preempt_enable();
193} 191}
diff --git a/kernel/printk.c b/kernel/printk.c
index e2129e83fd75..75ef3af39132 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1041,7 +1041,9 @@ void release_console_sem(void)
1041 _log_end = log_end; 1041 _log_end = log_end;
1042 con_start = log_end; /* Flush */ 1042 con_start = log_end; /* Flush */
1043 spin_unlock(&logbuf_lock); 1043 spin_unlock(&logbuf_lock);
1044 stop_critical_timings(); /* don't trace print latency */
1044 call_console_drivers(_con_start, _log_end); 1045 call_console_drivers(_con_start, _log_end);
1046 start_critical_timings();
1045 local_irq_restore(flags); 1047 local_irq_restore(flags);
1046 } 1048 }
1047 console_locked = 0; 1049 console_locked = 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index 94ead43eda62..42899dce837d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -70,6 +70,7 @@
70#include <linux/bootmem.h> 70#include <linux/bootmem.h>
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h>
73 74
74#include <asm/tlb.h> 75#include <asm/tlb.h>
75#include <asm/irq_regs.h> 76#include <asm/irq_regs.h>
@@ -607,6 +608,24 @@ static inline void update_rq_clock(struct rq *rq)
607# define const_debug static const 608# define const_debug static const
608#endif 609#endif
609 610
611/**
612 * runqueue_is_locked
613 *
614 * Returns true if the current cpu runqueue is locked.
615 * This interface allows printk to be called with the runqueue lock
616 * held and know whether or not it is OK to wake up the klogd.
617 */
618int runqueue_is_locked(void)
619{
620 int cpu = get_cpu();
621 struct rq *rq = cpu_rq(cpu);
622 int ret;
623
624 ret = spin_is_locked(&rq->lock);
625 put_cpu();
626 return ret;
627}
628
610/* 629/*
611 * Debugging: various feature bits 630 * Debugging: various feature bits
612 */ 631 */
@@ -831,7 +850,7 @@ static unsigned long long __cpu_clock(int cpu)
831 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu 850 * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
832 * clock constructed from sched_clock(): 851 * clock constructed from sched_clock():
833 */ 852 */
834unsigned long long cpu_clock(int cpu) 853unsigned long long notrace cpu_clock(int cpu)
835{ 854{
836 unsigned long long prev_cpu_time, time, delta_time; 855 unsigned long long prev_cpu_time, time, delta_time;
837 unsigned long flags; 856 unsigned long flags;
@@ -2149,6 +2168,9 @@ out_activate:
2149 success = 1; 2168 success = 1;
2150 2169
2151out_running: 2170out_running:
2171 trace_mark(kernel_sched_wakeup,
2172 "pid %d state %ld ## rq %p task %p rq->curr %p",
2173 p->pid, p->state, rq, p, rq->curr);
2152 check_preempt_curr(rq, p); 2174 check_preempt_curr(rq, p);
2153 2175
2154 p->state = TASK_RUNNING; 2176 p->state = TASK_RUNNING;
@@ -2279,6 +2301,9 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2279 p->sched_class->task_new(rq, p); 2301 p->sched_class->task_new(rq, p);
2280 inc_nr_running(p, rq); 2302 inc_nr_running(p, rq);
2281 } 2303 }
2304 trace_mark(kernel_sched_wakeup_new,
2305 "pid %d state %ld ## rq %p task %p rq->curr %p",
2306 p->pid, p->state, rq, p, rq->curr);
2282 check_preempt_curr(rq, p); 2307 check_preempt_curr(rq, p);
2283#ifdef CONFIG_SMP 2308#ifdef CONFIG_SMP
2284 if (p->sched_class->task_wake_up) 2309 if (p->sched_class->task_wake_up)
@@ -2451,6 +2476,11 @@ context_switch(struct rq *rq, struct task_struct *prev,
2451 struct mm_struct *mm, *oldmm; 2476 struct mm_struct *mm, *oldmm;
2452 2477
2453 prepare_task_switch(rq, prev, next); 2478 prepare_task_switch(rq, prev, next);
2479 trace_mark(kernel_sched_schedule,
2480 "prev_pid %d next_pid %d prev_state %ld "
2481 "## rq %p prev %p next %p",
2482 prev->pid, next->pid, prev->state,
2483 rq, prev, next);
2454 mm = next->mm; 2484 mm = next->mm;
2455 oldmm = prev->active_mm; 2485 oldmm = prev->active_mm;
2456 /* 2486 /*
@@ -4021,26 +4051,44 @@ void scheduler_tick(void)
4021#endif 4051#endif
4022} 4052}
4023 4053
4024#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) 4054#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
4055 defined(CONFIG_PREEMPT_TRACER))
4056
4057static inline unsigned long get_parent_ip(unsigned long addr)
4058{
4059 if (in_lock_functions(addr)) {
4060 addr = CALLER_ADDR2;
4061 if (in_lock_functions(addr))
4062 addr = CALLER_ADDR3;
4063 }
4064 return addr;
4065}
4025 4066
4026void __kprobes add_preempt_count(int val) 4067void __kprobes add_preempt_count(int val)
4027{ 4068{
4069#ifdef CONFIG_DEBUG_PREEMPT
4028 /* 4070 /*
4029 * Underflow? 4071 * Underflow?
4030 */ 4072 */
4031 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) 4073 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
4032 return; 4074 return;
4075#endif
4033 preempt_count() += val; 4076 preempt_count() += val;
4077#ifdef CONFIG_DEBUG_PREEMPT
4034 /* 4078 /*
4035 * Spinlock count overflowing soon? 4079 * Spinlock count overflowing soon?
4036 */ 4080 */
4037 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= 4081 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
4038 PREEMPT_MASK - 10); 4082 PREEMPT_MASK - 10);
4083#endif
4084 if (preempt_count() == val)
4085 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4039} 4086}
4040EXPORT_SYMBOL(add_preempt_count); 4087EXPORT_SYMBOL(add_preempt_count);
4041 4088
4042void __kprobes sub_preempt_count(int val) 4089void __kprobes sub_preempt_count(int val)
4043{ 4090{
4091#ifdef CONFIG_DEBUG_PREEMPT
4044 /* 4092 /*
4045 * Underflow? 4093 * Underflow?
4046 */ 4094 */
@@ -4052,7 +4100,10 @@ void __kprobes sub_preempt_count(int val)
4052 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && 4100 if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
4053 !(preempt_count() & PREEMPT_MASK))) 4101 !(preempt_count() & PREEMPT_MASK)))
4054 return; 4102 return;
4103#endif
4055 4104
4105 if (preempt_count() == val)
4106 trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
4056 preempt_count() -= val; 4107 preempt_count() -= val;
4057} 4108}
4058EXPORT_SYMBOL(sub_preempt_count); 4109EXPORT_SYMBOL(sub_preempt_count);
@@ -5384,7 +5435,7 @@ out_unlock:
5384 return retval; 5435 return retval;
5385} 5436}
5386 5437
5387static const char stat_nam[] = "RSDTtZX"; 5438static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
5388 5439
5389void sched_show_task(struct task_struct *p) 5440void sched_show_task(struct task_struct *p)
5390{ 5441{
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 5c2942e768cd..aaaeae8244e7 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -31,6 +31,7 @@
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <linux/semaphore.h> 32#include <linux/semaphore.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
34#include <linux/ftrace.h>
34 35
35static noinline void __down(struct semaphore *sem); 36static noinline void __down(struct semaphore *sem);
36static noinline int __down_interruptible(struct semaphore *sem); 37static noinline int __down_interruptible(struct semaphore *sem);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index ae28c8245123..a1fb54c93cdd 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -436,7 +436,7 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
436} 436}
437EXPORT_SYMBOL(_spin_trylock_bh); 437EXPORT_SYMBOL(_spin_trylock_bh);
438 438
439int in_lock_functions(unsigned long addr) 439notrace int in_lock_functions(unsigned long addr)
440{ 440{
441 /* Linker adds these: start and end of __lockfunc functions */ 441 /* Linker adds these: start and end of __lockfunc functions */
442 extern char __lock_text_start[], __lock_text_end[]; 442 extern char __lock_text_start[], __lock_text_end[];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..efaf7c5500e9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -46,6 +46,7 @@
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 47#include <linux/acpi.h>
48#include <linux/reboot.h> 48#include <linux/reboot.h>
49#include <linux/ftrace.h>
49 50
50#include <asm/uaccess.h> 51#include <asm/uaccess.h>
51#include <asm/processor.h> 52#include <asm/processor.h>
@@ -455,6 +456,16 @@ static struct ctl_table kern_table[] = {
455 .mode = 0644, 456 .mode = 0644,
456 .proc_handler = &proc_dointvec, 457 .proc_handler = &proc_dointvec,
457 }, 458 },
459#ifdef CONFIG_FTRACE
460 {
461 .ctl_name = CTL_UNNUMBERED,
462 .procname = "ftrace_enabled",
463 .data = &ftrace_enabled,
464 .maxlen = sizeof(int),
465 .mode = 0644,
466 .proc_handler = &ftrace_enable_sysctl,
467 },
468#endif
458#ifdef CONFIG_KMOD 469#ifdef CONFIG_KMOD
459 { 470 {
460 .ctl_name = KERN_MODPROBE, 471 .ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
new file mode 100644
index 000000000000..5c2295b29f2c
--- /dev/null
+++ b/kernel/trace/Kconfig
@@ -0,0 +1,127 @@
1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
3#
4config HAVE_FTRACE
5 bool
6
7config HAVE_DYNAMIC_FTRACE
8 bool
9
10config TRACER_MAX_TRACE
11 bool
12
13config TRACING
14 bool
15 select DEBUG_FS
16 select STACKTRACE
17
18config FTRACE
19 bool "Kernel Function Tracer"
20 depends on HAVE_FTRACE
21 select FRAME_POINTER
22 select TRACING
23 select CONTEXT_SWITCH_TRACER
24 help
25 Enable the kernel to trace every kernel function. This is done
26 by using a compiler feature to insert a small, 5-byte No-Operation
27 instruction to the beginning of every kernel function, which NOP
28 sequence is then dynamically patched into a tracer call when
29 tracing is enabled by the administrator. If it's runtime disabled
30 (the bootup default), then the overhead of the instructions is very
31 small and not measurable even in micro-benchmarks.
32
33config IRQSOFF_TRACER
34 bool "Interrupts-off Latency Tracer"
35 default n
36 depends on TRACE_IRQFLAGS_SUPPORT
37 depends on GENERIC_TIME
38 depends on HAVE_FTRACE
39 select TRACE_IRQFLAGS
40 select TRACING
41 select TRACER_MAX_TRACE
42 help
43 This option measures the time spent in irqs-off critical
44 sections, with microsecond accuracy.
45
46 The default measurement method is a maximum search, which is
47 disabled by default and can be runtime (re-)started
48 via:
49
50 echo 0 > /debugfs/tracing/tracing_max_latency
51
52 (Note that kernel size and overhead increases with this option
53 enabled. This option and the preempt-off timing option can be
54 used together or separately.)
55
56config PREEMPT_TRACER
57 bool "Preemption-off Latency Tracer"
58 default n
59 depends on GENERIC_TIME
60 depends on PREEMPT
61 depends on HAVE_FTRACE
62 select TRACING
63 select TRACER_MAX_TRACE
64 help
65 This option measures the time spent in preemption off critical
66 sections, with microsecond accuracy.
67
68 The default measurement method is a maximum search, which is
69 disabled by default and can be runtime (re-)started
70 via:
71
72 echo 0 > /debugfs/tracing/tracing_max_latency
73
74 (Note that kernel size and overhead increases with this option
75 enabled. This option and the irqs-off timing option can be
76 used together or separately.)
77
78config SCHED_TRACER
79 bool "Scheduling Latency Tracer"
80 depends on HAVE_FTRACE
81 select TRACING
82 select CONTEXT_SWITCH_TRACER
83 select TRACER_MAX_TRACE
84 help
85 This tracer tracks the latency of the highest priority task
86 to be scheduled in, starting from the point it has woken up.
87
88config CONTEXT_SWITCH_TRACER
89 bool "Trace process context switches"
90 depends on HAVE_FTRACE
91 select TRACING
92 select MARKERS
93 help
94 This tracer gets called from the context switch and records
95 all switching of tasks.
96
97config DYNAMIC_FTRACE
98 bool "enable/disable ftrace tracepoints dynamically"
99 depends on FTRACE
100 depends on HAVE_DYNAMIC_FTRACE
101 default y
102 help
103 This option will modify all the calls to ftrace dynamically
104 (will patch them out of the binary image and replaces them
105 with a No-Op instruction) as they are called. A table is
106 created to dynamically enable them again.
107
108 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
109 has native performance as long as no tracing is active.
110
111 The changes to the code are done by a kernel thread that
112 wakes up once a second and checks to see if any ftrace calls
113 were made. If so, it runs stop_machine (stops all CPUS)
114 and modifies the code to jump over the call to ftrace.
115
116config FTRACE_SELFTEST
117 bool
118
119config FTRACE_STARTUP_TEST
120 bool "Perform a startup test on ftrace"
121 depends on TRACING
122 select FTRACE_SELFTEST
123 help
124 This option performs a series of startup tests on ftrace. On bootup
125 a series of tests are made to verify that the tracer is
126 functioning properly. It will do tests on all the configured
127 tracers of ftrace.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
new file mode 100644
index 000000000000..d9efbbfa2bdf
--- /dev/null
+++ b/kernel/trace/Makefile
@@ -0,0 +1,22 @@
1
2# Do not instrument the tracer itself:
3
4ifdef CONFIG_FTRACE
5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7
8# selftest needs instrumentation
9CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o
11endif
12
13obj-$(CONFIG_FTRACE) += libftrace.o
14
15obj-$(CONFIG_TRACING) += trace.o
16obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
17obj-$(CONFIG_FTRACE) += trace_functions.o
18obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
19obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
20obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
21
22libftrace-y := ftrace.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..0f271c45cd02
--- /dev/null
+++ b/kernel/trace/ftrace.c
@@ -0,0 +1,1710 @@
1/*
2 * Infrastructure for profiling code inserted by 'gcc -pg'.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2004-2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally ported from the -rt patch by:
8 * Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code in the latency_tracer, that is:
11 *
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 William Lee Irwin III
14 */
15
16#include <linux/stop_machine.h>
17#include <linux/clocksource.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/debugfs.h>
21#include <linux/hardirq.h>
22#include <linux/kthread.h>
23#include <linux/uaccess.h>
24#include <linux/kprobes.h>
25#include <linux/ftrace.h>
26#include <linux/sysctl.h>
27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h>
30
31#include <asm/ftrace.h>
32
33#include "trace.h"
34
35/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled;
38
39/*
40 * ftrace_disabled is set when an anomaly is discovered.
41 * ftrace_disabled is much stronger than ftrace_enabled.
42 */
43static int ftrace_disabled __read_mostly;
44
45static DEFINE_SPINLOCK(ftrace_lock);
46static DEFINE_MUTEX(ftrace_sysctl_lock);
47
48static struct ftrace_ops ftrace_list_end __read_mostly =
49{
50 .func = ftrace_stub,
51};
52
53static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
54ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
55
56static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
57{
58 struct ftrace_ops *op = ftrace_list;
59
60 /* in case someone actually ports this to alpha! */
61 read_barrier_depends();
62
63 while (op != &ftrace_list_end) {
64 /* silly alpha */
65 read_barrier_depends();
66 op->func(ip, parent_ip);
67 op = op->next;
68 };
69}
70
71/**
72 * clear_ftrace_function - reset the ftrace function
73 *
74 * This NULLs the ftrace function and in essence stops
75 * tracing. There may be lag
76 */
77void clear_ftrace_function(void)
78{
79 ftrace_trace_function = ftrace_stub;
80}
81
82static int __register_ftrace_function(struct ftrace_ops *ops)
83{
84 /* Should never be called by interrupts */
85 spin_lock(&ftrace_lock);
86
87 ops->next = ftrace_list;
88 /*
89 * We are entering ops into the ftrace_list but another
90 * CPU might be walking that list. We need to make sure
91 * the ops->next pointer is valid before another CPU sees
92 * the ops pointer included into the ftrace_list.
93 */
94 smp_wmb();
95 ftrace_list = ops;
96
97 if (ftrace_enabled) {
98 /*
99 * For one func, simply call it directly.
100 * For more than one func, call the chain.
101 */
102 if (ops->next == &ftrace_list_end)
103 ftrace_trace_function = ops->func;
104 else
105 ftrace_trace_function = ftrace_list_func;
106 }
107
108 spin_unlock(&ftrace_lock);
109
110 return 0;
111}
112
113static int __unregister_ftrace_function(struct ftrace_ops *ops)
114{
115 struct ftrace_ops **p;
116 int ret = 0;
117
118 spin_lock(&ftrace_lock);
119
120 /*
121 * If we are removing the last function, then simply point
122 * to the ftrace_stub.
123 */
124 if (ftrace_list == ops && ops->next == &ftrace_list_end) {
125 ftrace_trace_function = ftrace_stub;
126 ftrace_list = &ftrace_list_end;
127 goto out;
128 }
129
130 for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
131 if (*p == ops)
132 break;
133
134 if (*p != ops) {
135 ret = -1;
136 goto out;
137 }
138
139 *p = (*p)->next;
140
141 if (ftrace_enabled) {
142 /* If we only have one func left, then call that directly */
143 if (ftrace_list == &ftrace_list_end ||
144 ftrace_list->next == &ftrace_list_end)
145 ftrace_trace_function = ftrace_list->func;
146 }
147
148 out:
149 spin_unlock(&ftrace_lock);
150
151 return ret;
152}
153
154#ifdef CONFIG_DYNAMIC_FTRACE
155
156static struct task_struct *ftraced_task;
157
158enum {
159 FTRACE_ENABLE_CALLS = (1 << 0),
160 FTRACE_DISABLE_CALLS = (1 << 1),
161 FTRACE_UPDATE_TRACE_FUNC = (1 << 2),
162 FTRACE_ENABLE_MCOUNT = (1 << 3),
163 FTRACE_DISABLE_MCOUNT = (1 << 4),
164};
165
166static int ftrace_filtered;
167static int tracing_on;
168static int frozen_record_count;
169
170static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
171
172static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
173
174static DEFINE_SPINLOCK(ftrace_shutdown_lock);
175static DEFINE_MUTEX(ftraced_lock);
176static DEFINE_MUTEX(ftrace_regex_lock);
177
178struct ftrace_page {
179 struct ftrace_page *next;
180 unsigned long index;
181 struct dyn_ftrace records[];
182};
183
184#define ENTRIES_PER_PAGE \
185 ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
186
187/* estimate from running different kernels */
188#define NR_TO_INIT 10000
189
190static struct ftrace_page *ftrace_pages_start;
191static struct ftrace_page *ftrace_pages;
192
193static int ftraced_trigger;
194static int ftraced_suspend;
195static int ftraced_stop;
196
197static int ftrace_record_suspend;
198
199static struct dyn_ftrace *ftrace_free_records;
200
201
202#ifdef CONFIG_KPROBES
203static inline void freeze_record(struct dyn_ftrace *rec)
204{
205 if (!(rec->flags & FTRACE_FL_FROZEN)) {
206 rec->flags |= FTRACE_FL_FROZEN;
207 frozen_record_count++;
208 }
209}
210
211static inline void unfreeze_record(struct dyn_ftrace *rec)
212{
213 if (rec->flags & FTRACE_FL_FROZEN) {
214 rec->flags &= ~FTRACE_FL_FROZEN;
215 frozen_record_count--;
216 }
217}
218
219static inline int record_frozen(struct dyn_ftrace *rec)
220{
221 return rec->flags & FTRACE_FL_FROZEN;
222}
223#else
224# define freeze_record(rec) ({ 0; })
225# define unfreeze_record(rec) ({ 0; })
226# define record_frozen(rec) ({ 0; })
227#endif /* CONFIG_KPROBES */
228
229int skip_trace(unsigned long ip)
230{
231 unsigned long fl;
232 struct dyn_ftrace *rec;
233 struct hlist_node *t;
234 struct hlist_head *head;
235
236 if (frozen_record_count == 0)
237 return 0;
238
239 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
240 hlist_for_each_entry_rcu(rec, t, head, node) {
241 if (rec->ip == ip) {
242 if (record_frozen(rec)) {
243 if (rec->flags & FTRACE_FL_FAILED)
244 return 1;
245
246 if (!(rec->flags & FTRACE_FL_CONVERTED))
247 return 1;
248
249 if (!tracing_on || !ftrace_enabled)
250 return 1;
251
252 if (ftrace_filtered) {
253 fl = rec->flags & (FTRACE_FL_FILTER |
254 FTRACE_FL_NOTRACE);
255 if (!fl || (fl & FTRACE_FL_NOTRACE))
256 return 1;
257 }
258 }
259 break;
260 }
261 }
262
263 return 0;
264}
265
266static inline int
267ftrace_ip_in_hash(unsigned long ip, unsigned long key)
268{
269 struct dyn_ftrace *p;
270 struct hlist_node *t;
271 int found = 0;
272
273 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
274 if (p->ip == ip) {
275 found = 1;
276 break;
277 }
278 }
279
280 return found;
281}
282
283static inline void
284ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
285{
286 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
287}
288
289/* called from kstop_machine */
290static inline void ftrace_del_hash(struct dyn_ftrace *node)
291{
292 hlist_del(&node->node);
293}
294
295static void ftrace_free_rec(struct dyn_ftrace *rec)
296{
297 /* no locking, only called from kstop_machine */
298
299 rec->ip = (unsigned long)ftrace_free_records;
300 ftrace_free_records = rec;
301 rec->flags |= FTRACE_FL_FREE;
302}
303
304static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
305{
306 struct dyn_ftrace *rec;
307
308 /* First check for freed records */
309 if (ftrace_free_records) {
310 rec = ftrace_free_records;
311
312 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
313 WARN_ON_ONCE(1);
314 ftrace_free_records = NULL;
315 ftrace_disabled = 1;
316 ftrace_enabled = 0;
317 return NULL;
318 }
319
320 ftrace_free_records = (void *)rec->ip;
321 memset(rec, 0, sizeof(*rec));
322 return rec;
323 }
324
325 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
326 if (!ftrace_pages->next)
327 return NULL;
328 ftrace_pages = ftrace_pages->next;
329 }
330
331 return &ftrace_pages->records[ftrace_pages->index++];
332}
333
334static void
335ftrace_record_ip(unsigned long ip)
336{
337 struct dyn_ftrace *node;
338 unsigned long flags;
339 unsigned long key;
340 int resched;
341 int atomic;
342 int cpu;
343
344 if (!ftrace_enabled || ftrace_disabled)
345 return;
346
347 resched = need_resched();
348 preempt_disable_notrace();
349
350 /*
351 * We simply need to protect against recursion.
352 * Use the the raw version of smp_processor_id and not
353 * __get_cpu_var which can call debug hooks that can
354 * cause a recursive crash here.
355 */
356 cpu = raw_smp_processor_id();
357 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
358 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
359 goto out;
360
361 if (unlikely(ftrace_record_suspend))
362 goto out;
363
364 key = hash_long(ip, FTRACE_HASHBITS);
365
366 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
367
368 if (ftrace_ip_in_hash(ip, key))
369 goto out;
370
371 atomic = irqs_disabled();
372
373 spin_lock_irqsave(&ftrace_shutdown_lock, flags);
374
375 /* This ip may have hit the hash before the lock */
376 if (ftrace_ip_in_hash(ip, key))
377 goto out_unlock;
378
379 node = ftrace_alloc_dyn_node(ip);
380 if (!node)
381 goto out_unlock;
382
383 node->ip = ip;
384
385 ftrace_add_hash(node, key);
386
387 ftraced_trigger = 1;
388
389 out_unlock:
390 spin_unlock_irqrestore(&ftrace_shutdown_lock, flags);
391 out:
392 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
393
394 /* prevent recursion with scheduler */
395 if (resched)
396 preempt_enable_no_resched_notrace();
397 else
398 preempt_enable_notrace();
399}
400
401#define FTRACE_ADDR ((long)(ftrace_caller))
402
403static int
404__ftrace_replace_code(struct dyn_ftrace *rec,
405 unsigned char *old, unsigned char *new, int enable)
406{
407 unsigned long ip, fl;
408
409 ip = rec->ip;
410
411 if (ftrace_filtered && enable) {
412 /*
413 * If filtering is on:
414 *
415 * If this record is set to be filtered and
416 * is enabled then do nothing.
417 *
418 * If this record is set to be filtered and
419 * it is not enabled, enable it.
420 *
421 * If this record is not set to be filtered
422 * and it is not enabled do nothing.
423 *
424 * If this record is set not to trace then
425 * do nothing.
426 *
427 * If this record is set not to trace and
428 * it is enabled then disable it.
429 *
430 * If this record is not set to be filtered and
431 * it is enabled, disable it.
432 */
433
434 fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
435 FTRACE_FL_ENABLED);
436
437 if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
438 (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
439 !fl || (fl == FTRACE_FL_NOTRACE))
440 return 0;
441
442 /*
443 * If it is enabled disable it,
444 * otherwise enable it!
445 */
446 if (fl & FTRACE_FL_ENABLED) {
447 /* swap new and old */
448 new = old;
449 old = ftrace_call_replace(ip, FTRACE_ADDR);
450 rec->flags &= ~FTRACE_FL_ENABLED;
451 } else {
452 new = ftrace_call_replace(ip, FTRACE_ADDR);
453 rec->flags |= FTRACE_FL_ENABLED;
454 }
455 } else {
456
457 if (enable) {
458 /*
459 * If this record is set not to trace and is
460 * not enabled, do nothing.
461 */
462 fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
463 if (fl == FTRACE_FL_NOTRACE)
464 return 0;
465
466 new = ftrace_call_replace(ip, FTRACE_ADDR);
467 } else
468 old = ftrace_call_replace(ip, FTRACE_ADDR);
469
470 if (enable) {
471 if (rec->flags & FTRACE_FL_ENABLED)
472 return 0;
473 rec->flags |= FTRACE_FL_ENABLED;
474 } else {
475 if (!(rec->flags & FTRACE_FL_ENABLED))
476 return 0;
477 rec->flags &= ~FTRACE_FL_ENABLED;
478 }
479 }
480
481 return ftrace_modify_code(ip, old, new);
482}
483
484static void ftrace_replace_code(int enable)
485{
486 int i, failed;
487 unsigned char *new = NULL, *old = NULL;
488 struct dyn_ftrace *rec;
489 struct ftrace_page *pg;
490
491 if (enable)
492 old = ftrace_nop_replace();
493 else
494 new = ftrace_nop_replace();
495
496 for (pg = ftrace_pages_start; pg; pg = pg->next) {
497 for (i = 0; i < pg->index; i++) {
498 rec = &pg->records[i];
499
500 /* don't modify code that has already faulted */
501 if (rec->flags & FTRACE_FL_FAILED)
502 continue;
503
504 /* ignore updates to this record's mcount site */
505 if (get_kprobe((void *)rec->ip)) {
506 freeze_record(rec);
507 continue;
508 } else {
509 unfreeze_record(rec);
510 }
511
512 failed = __ftrace_replace_code(rec, old, new, enable);
513 if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
514 rec->flags |= FTRACE_FL_FAILED;
515 if ((system_state == SYSTEM_BOOTING) ||
516 !core_kernel_text(rec->ip)) {
517 ftrace_del_hash(rec);
518 ftrace_free_rec(rec);
519 }
520 }
521 }
522 }
523}
524
525static void ftrace_shutdown_replenish(void)
526{
527 if (ftrace_pages->next)
528 return;
529
530 /* allocate another page */
531 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
532}
533
534static int
535ftrace_code_disable(struct dyn_ftrace *rec)
536{
537 unsigned long ip;
538 unsigned char *nop, *call;
539 int failed;
540
541 ip = rec->ip;
542
543 nop = ftrace_nop_replace();
544 call = ftrace_call_replace(ip, MCOUNT_ADDR);
545
546 failed = ftrace_modify_code(ip, call, nop);
547 if (failed) {
548 rec->flags |= FTRACE_FL_FAILED;
549 return 0;
550 }
551 return 1;
552}
553
554static int __ftrace_update_code(void *ignore);
555
556static int __ftrace_modify_code(void *data)
557{
558 unsigned long addr;
559 int *command = data;
560
561 if (*command & FTRACE_ENABLE_CALLS) {
562 /*
563 * Update any recorded ips now that we have the
564 * machine stopped
565 */
566 __ftrace_update_code(NULL);
567 ftrace_replace_code(1);
568 tracing_on = 1;
569 } else if (*command & FTRACE_DISABLE_CALLS) {
570 ftrace_replace_code(0);
571 tracing_on = 0;
572 }
573
574 if (*command & FTRACE_UPDATE_TRACE_FUNC)
575 ftrace_update_ftrace_func(ftrace_trace_function);
576
577 if (*command & FTRACE_ENABLE_MCOUNT) {
578 addr = (unsigned long)ftrace_record_ip;
579 ftrace_mcount_set(&addr);
580 } else if (*command & FTRACE_DISABLE_MCOUNT) {
581 addr = (unsigned long)ftrace_stub;
582 ftrace_mcount_set(&addr);
583 }
584
585 return 0;
586}
587
588static void ftrace_run_update_code(int command)
589{
590 stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
591}
592
593void ftrace_disable_daemon(void)
594{
595 /* Stop the daemon from calling kstop_machine */
596 mutex_lock(&ftraced_lock);
597 ftraced_stop = 1;
598 mutex_unlock(&ftraced_lock);
599
600 ftrace_force_update();
601}
602
603void ftrace_enable_daemon(void)
604{
605 mutex_lock(&ftraced_lock);
606 ftraced_stop = 0;
607 mutex_unlock(&ftraced_lock);
608
609 ftrace_force_update();
610}
611
612static ftrace_func_t saved_ftrace_func;
613
614static void ftrace_startup(void)
615{
616 int command = 0;
617
618 if (unlikely(ftrace_disabled))
619 return;
620
621 mutex_lock(&ftraced_lock);
622 ftraced_suspend++;
623 if (ftraced_suspend == 1)
624 command |= FTRACE_ENABLE_CALLS;
625
626 if (saved_ftrace_func != ftrace_trace_function) {
627 saved_ftrace_func = ftrace_trace_function;
628 command |= FTRACE_UPDATE_TRACE_FUNC;
629 }
630
631 if (!command || !ftrace_enabled)
632 goto out;
633
634 ftrace_run_update_code(command);
635 out:
636 mutex_unlock(&ftraced_lock);
637}
638
639static void ftrace_shutdown(void)
640{
641 int command = 0;
642
643 if (unlikely(ftrace_disabled))
644 return;
645
646 mutex_lock(&ftraced_lock);
647 ftraced_suspend--;
648 if (!ftraced_suspend)
649 command |= FTRACE_DISABLE_CALLS;
650
651 if (saved_ftrace_func != ftrace_trace_function) {
652 saved_ftrace_func = ftrace_trace_function;
653 command |= FTRACE_UPDATE_TRACE_FUNC;
654 }
655
656 if (!command || !ftrace_enabled)
657 goto out;
658
659 ftrace_run_update_code(command);
660 out:
661 mutex_unlock(&ftraced_lock);
662}
663
664static void ftrace_startup_sysctl(void)
665{
666 int command = FTRACE_ENABLE_MCOUNT;
667
668 if (unlikely(ftrace_disabled))
669 return;
670
671 mutex_lock(&ftraced_lock);
672 /* Force update next time */
673 saved_ftrace_func = NULL;
674 /* ftraced_suspend is true if we want ftrace running */
675 if (ftraced_suspend)
676 command |= FTRACE_ENABLE_CALLS;
677
678 ftrace_run_update_code(command);
679 mutex_unlock(&ftraced_lock);
680}
681
682static void ftrace_shutdown_sysctl(void)
683{
684 int command = FTRACE_DISABLE_MCOUNT;
685
686 if (unlikely(ftrace_disabled))
687 return;
688
689 mutex_lock(&ftraced_lock);
690 /* ftraced_suspend is true if ftrace is running */
691 if (ftraced_suspend)
692 command |= FTRACE_DISABLE_CALLS;
693
694 ftrace_run_update_code(command);
695 mutex_unlock(&ftraced_lock);
696}
697
698static cycle_t ftrace_update_time;
699static unsigned long ftrace_update_cnt;
700unsigned long ftrace_update_tot_cnt;
701
702static int __ftrace_update_code(void *ignore)
703{
704 int i, save_ftrace_enabled;
705 cycle_t start, stop;
706 struct dyn_ftrace *p;
707 struct hlist_node *t, *n;
708 struct hlist_head *head, temp_list;
709
710 /* Don't be recording funcs now */
711 ftrace_record_suspend++;
712 save_ftrace_enabled = ftrace_enabled;
713 ftrace_enabled = 0;
714
715 start = ftrace_now(raw_smp_processor_id());
716 ftrace_update_cnt = 0;
717
718 /* No locks needed, the machine is stopped! */
719 for (i = 0; i < FTRACE_HASHSIZE; i++) {
720 INIT_HLIST_HEAD(&temp_list);
721 head = &ftrace_hash[i];
722
723 /* all CPUS are stopped, we are safe to modify code */
724 hlist_for_each_entry_safe(p, t, n, head, node) {
725 /* Skip over failed records which have not been
726 * freed. */
727 if (p->flags & FTRACE_FL_FAILED)
728 continue;
729
730 /* Unconverted records are always at the head of the
731 * hash bucket. Once we encounter a converted record,
732 * simply skip over to the next bucket. Saves ftraced
733 * some processor cycles (ftrace does its bid for
734 * global warming :-p ). */
735 if (p->flags & (FTRACE_FL_CONVERTED))
736 break;
737
738 /* Ignore updates to this record's mcount site.
739 * Reintroduce this record at the head of this
740 * bucket to attempt to "convert" it again if
741 * the kprobe on it is unregistered before the
742 * next run. */
743 if (get_kprobe((void *)p->ip)) {
744 ftrace_del_hash(p);
745 INIT_HLIST_NODE(&p->node);
746 hlist_add_head(&p->node, &temp_list);
747 freeze_record(p);
748 continue;
749 } else {
750 unfreeze_record(p);
751 }
752
753 /* convert record (i.e, patch mcount-call with NOP) */
754 if (ftrace_code_disable(p)) {
755 p->flags |= FTRACE_FL_CONVERTED;
756 ftrace_update_cnt++;
757 } else {
758 if ((system_state == SYSTEM_BOOTING) ||
759 !core_kernel_text(p->ip)) {
760 ftrace_del_hash(p);
761 ftrace_free_rec(p);
762 }
763 }
764 }
765
766 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
767 hlist_del(&p->node);
768 INIT_HLIST_NODE(&p->node);
769 hlist_add_head(&p->node, head);
770 }
771 }
772
773 stop = ftrace_now(raw_smp_processor_id());
774 ftrace_update_time = stop - start;
775 ftrace_update_tot_cnt += ftrace_update_cnt;
776 ftraced_trigger = 0;
777
778 ftrace_enabled = save_ftrace_enabled;
779 ftrace_record_suspend--;
780
781 return 0;
782}
783
784static int ftrace_update_code(void)
785{
786 if (unlikely(ftrace_disabled) ||
787 !ftrace_enabled || !ftraced_trigger)
788 return 0;
789
790 stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
791
792 return 1;
793}
794
795static int ftraced(void *ignore)
796{
797 unsigned long usecs;
798
799 while (!kthread_should_stop()) {
800
801 set_current_state(TASK_INTERRUPTIBLE);
802
803 /* check once a second */
804 schedule_timeout(HZ);
805
806 if (unlikely(ftrace_disabled))
807 continue;
808
809 mutex_lock(&ftrace_sysctl_lock);
810 mutex_lock(&ftraced_lock);
811 if (!ftraced_suspend && !ftraced_stop &&
812 ftrace_update_code()) {
813 usecs = nsecs_to_usecs(ftrace_update_time);
814 if (ftrace_update_tot_cnt > 100000) {
815 ftrace_update_tot_cnt = 0;
816 pr_info("hm, dftrace overflow: %lu change%s"
817 " (%lu total) in %lu usec%s\n",
818 ftrace_update_cnt,
819 ftrace_update_cnt != 1 ? "s" : "",
820 ftrace_update_tot_cnt,
821 usecs, usecs != 1 ? "s" : "");
822 ftrace_disabled = 1;
823 WARN_ON_ONCE(1);
824 }
825 }
826 mutex_unlock(&ftraced_lock);
827 mutex_unlock(&ftrace_sysctl_lock);
828
829 ftrace_shutdown_replenish();
830 }
831 __set_current_state(TASK_RUNNING);
832 return 0;
833}
834
835static int __init ftrace_dyn_table_alloc(void)
836{
837 struct ftrace_page *pg;
838 int cnt;
839 int i;
840
841 /* allocate a few pages */
842 ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
843 if (!ftrace_pages_start)
844 return -1;
845
846 /*
847 * Allocate a few more pages.
848 *
849 * TODO: have some parser search vmlinux before
850 * final linking to find all calls to ftrace.
851 * Then we can:
852 * a) know how many pages to allocate.
853 * and/or
854 * b) set up the table then.
855 *
856 * The dynamic code is still necessary for
857 * modules.
858 */
859
860 pg = ftrace_pages = ftrace_pages_start;
861
862 cnt = NR_TO_INIT / ENTRIES_PER_PAGE;
863
864 for (i = 0; i < cnt; i++) {
865 pg->next = (void *)get_zeroed_page(GFP_KERNEL);
866
867 /* If we fail, we'll try later anyway */
868 if (!pg->next)
869 break;
870
871 pg = pg->next;
872 }
873
874 return 0;
875}
876
877enum {
878 FTRACE_ITER_FILTER = (1 << 0),
879 FTRACE_ITER_CONT = (1 << 1),
880 FTRACE_ITER_NOTRACE = (1 << 2),
881 FTRACE_ITER_FAILURES = (1 << 3),
882};
883
884#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
885
886struct ftrace_iterator {
887 loff_t pos;
888 struct ftrace_page *pg;
889 unsigned idx;
890 unsigned flags;
891 unsigned char buffer[FTRACE_BUFF_MAX+1];
892 unsigned buffer_idx;
893 unsigned filtered;
894};
895
896static void *
897t_next(struct seq_file *m, void *v, loff_t *pos)
898{
899 struct ftrace_iterator *iter = m->private;
900 struct dyn_ftrace *rec = NULL;
901
902 (*pos)++;
903
904 retry:
905 if (iter->idx >= iter->pg->index) {
906 if (iter->pg->next) {
907 iter->pg = iter->pg->next;
908 iter->idx = 0;
909 goto retry;
910 }
911 } else {
912 rec = &iter->pg->records[iter->idx++];
913 if ((!(iter->flags & FTRACE_ITER_FAILURES) &&
914 (rec->flags & FTRACE_FL_FAILED)) ||
915
916 ((iter->flags & FTRACE_ITER_FAILURES) &&
917 (!(rec->flags & FTRACE_FL_FAILED) ||
918 (rec->flags & FTRACE_FL_FREE))) ||
919
920 ((iter->flags & FTRACE_ITER_FILTER) &&
921 !(rec->flags & FTRACE_FL_FILTER)) ||
922
923 ((iter->flags & FTRACE_ITER_NOTRACE) &&
924 !(rec->flags & FTRACE_FL_NOTRACE))) {
925 rec = NULL;
926 goto retry;
927 }
928 }
929
930 iter->pos = *pos;
931
932 return rec;
933}
934
935static void *t_start(struct seq_file *m, loff_t *pos)
936{
937 struct ftrace_iterator *iter = m->private;
938 void *p = NULL;
939 loff_t l = -1;
940
941 if (*pos != iter->pos) {
942 for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
943 ;
944 } else {
945 l = *pos;
946 p = t_next(m, p, &l);
947 }
948
949 return p;
950}
951
952static void t_stop(struct seq_file *m, void *p)
953{
954}
955
956static int t_show(struct seq_file *m, void *v)
957{
958 struct dyn_ftrace *rec = v;
959 char str[KSYM_SYMBOL_LEN];
960
961 if (!rec)
962 return 0;
963
964 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
965
966 seq_printf(m, "%s\n", str);
967
968 return 0;
969}
970
971static struct seq_operations show_ftrace_seq_ops = {
972 .start = t_start,
973 .next = t_next,
974 .stop = t_stop,
975 .show = t_show,
976};
977
978static int
979ftrace_avail_open(struct inode *inode, struct file *file)
980{
981 struct ftrace_iterator *iter;
982 int ret;
983
984 if (unlikely(ftrace_disabled))
985 return -ENODEV;
986
987 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
988 if (!iter)
989 return -ENOMEM;
990
991 iter->pg = ftrace_pages_start;
992 iter->pos = -1;
993
994 ret = seq_open(file, &show_ftrace_seq_ops);
995 if (!ret) {
996 struct seq_file *m = file->private_data;
997
998 m->private = iter;
999 } else {
1000 kfree(iter);
1001 }
1002
1003 return ret;
1004}
1005
1006int ftrace_avail_release(struct inode *inode, struct file *file)
1007{
1008 struct seq_file *m = (struct seq_file *)file->private_data;
1009 struct ftrace_iterator *iter = m->private;
1010
1011 seq_release(inode, file);
1012 kfree(iter);
1013
1014 return 0;
1015}
1016
1017static int
1018ftrace_failures_open(struct inode *inode, struct file *file)
1019{
1020 int ret;
1021 struct seq_file *m;
1022 struct ftrace_iterator *iter;
1023
1024 ret = ftrace_avail_open(inode, file);
1025 if (!ret) {
1026 m = (struct seq_file *)file->private_data;
1027 iter = (struct ftrace_iterator *)m->private;
1028 iter->flags = FTRACE_ITER_FAILURES;
1029 }
1030
1031 return ret;
1032}
1033
1034
1035static void ftrace_filter_reset(int enable)
1036{
1037 struct ftrace_page *pg;
1038 struct dyn_ftrace *rec;
1039 unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1040 unsigned i;
1041
1042 /* keep kstop machine from running */
1043 preempt_disable();
1044 if (enable)
1045 ftrace_filtered = 0;
1046 pg = ftrace_pages_start;
1047 while (pg) {
1048 for (i = 0; i < pg->index; i++) {
1049 rec = &pg->records[i];
1050 if (rec->flags & FTRACE_FL_FAILED)
1051 continue;
1052 rec->flags &= ~type;
1053 }
1054 pg = pg->next;
1055 }
1056 preempt_enable();
1057}
1058
1059static int
1060ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1061{
1062 struct ftrace_iterator *iter;
1063 int ret = 0;
1064
1065 if (unlikely(ftrace_disabled))
1066 return -ENODEV;
1067
1068 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1069 if (!iter)
1070 return -ENOMEM;
1071
1072 mutex_lock(&ftrace_regex_lock);
1073 if ((file->f_mode & FMODE_WRITE) &&
1074 !(file->f_flags & O_APPEND))
1075 ftrace_filter_reset(enable);
1076
1077 if (file->f_mode & FMODE_READ) {
1078 iter->pg = ftrace_pages_start;
1079 iter->pos = -1;
1080 iter->flags = enable ? FTRACE_ITER_FILTER :
1081 FTRACE_ITER_NOTRACE;
1082
1083 ret = seq_open(file, &show_ftrace_seq_ops);
1084 if (!ret) {
1085 struct seq_file *m = file->private_data;
1086 m->private = iter;
1087 } else
1088 kfree(iter);
1089 } else
1090 file->private_data = iter;
1091 mutex_unlock(&ftrace_regex_lock);
1092
1093 return ret;
1094}
1095
1096static int
1097ftrace_filter_open(struct inode *inode, struct file *file)
1098{
1099 return ftrace_regex_open(inode, file, 1);
1100}
1101
1102static int
1103ftrace_notrace_open(struct inode *inode, struct file *file)
1104{
1105 return ftrace_regex_open(inode, file, 0);
1106}
1107
1108static ssize_t
1109ftrace_regex_read(struct file *file, char __user *ubuf,
1110 size_t cnt, loff_t *ppos)
1111{
1112 if (file->f_mode & FMODE_READ)
1113 return seq_read(file, ubuf, cnt, ppos);
1114 else
1115 return -EPERM;
1116}
1117
1118static loff_t
1119ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
1120{
1121 loff_t ret;
1122
1123 if (file->f_mode & FMODE_READ)
1124 ret = seq_lseek(file, offset, origin);
1125 else
1126 file->f_pos = ret = 1;
1127
1128 return ret;
1129}
1130
1131enum {
1132 MATCH_FULL,
1133 MATCH_FRONT_ONLY,
1134 MATCH_MIDDLE_ONLY,
1135 MATCH_END_ONLY,
1136};
1137
1138static void
1139ftrace_match(unsigned char *buff, int len, int enable)
1140{
1141 char str[KSYM_SYMBOL_LEN];
1142 char *search = NULL;
1143 struct ftrace_page *pg;
1144 struct dyn_ftrace *rec;
1145 int type = MATCH_FULL;
1146 unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
1147 unsigned i, match = 0, search_len = 0;
1148
1149 for (i = 0; i < len; i++) {
1150 if (buff[i] == '*') {
1151 if (!i) {
1152 search = buff + i + 1;
1153 type = MATCH_END_ONLY;
1154 search_len = len - (i + 1);
1155 } else {
1156 if (type == MATCH_END_ONLY) {
1157 type = MATCH_MIDDLE_ONLY;
1158 } else {
1159 match = i;
1160 type = MATCH_FRONT_ONLY;
1161 }
1162 buff[i] = 0;
1163 break;
1164 }
1165 }
1166 }
1167
1168 /* keep kstop machine from running */
1169 preempt_disable();
1170 if (enable)
1171 ftrace_filtered = 1;
1172 pg = ftrace_pages_start;
1173 while (pg) {
1174 for (i = 0; i < pg->index; i++) {
1175 int matched = 0;
1176 char *ptr;
1177
1178 rec = &pg->records[i];
1179 if (rec->flags & FTRACE_FL_FAILED)
1180 continue;
1181 kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
1182 switch (type) {
1183 case MATCH_FULL:
1184 if (strcmp(str, buff) == 0)
1185 matched = 1;
1186 break;
1187 case MATCH_FRONT_ONLY:
1188 if (memcmp(str, buff, match) == 0)
1189 matched = 1;
1190 break;
1191 case MATCH_MIDDLE_ONLY:
1192 if (strstr(str, search))
1193 matched = 1;
1194 break;
1195 case MATCH_END_ONLY:
1196 ptr = strstr(str, search);
1197 if (ptr && (ptr[search_len] == 0))
1198 matched = 1;
1199 break;
1200 }
1201 if (matched)
1202 rec->flags |= flag;
1203 }
1204 pg = pg->next;
1205 }
1206 preempt_enable();
1207}
1208
1209static ssize_t
1210ftrace_regex_write(struct file *file, const char __user *ubuf,
1211 size_t cnt, loff_t *ppos, int enable)
1212{
1213 struct ftrace_iterator *iter;
1214 char ch;
1215 size_t read = 0;
1216 ssize_t ret;
1217
1218 if (!cnt || cnt < 0)
1219 return 0;
1220
1221 mutex_lock(&ftrace_regex_lock);
1222
1223 if (file->f_mode & FMODE_READ) {
1224 struct seq_file *m = file->private_data;
1225 iter = m->private;
1226 } else
1227 iter = file->private_data;
1228
1229 if (!*ppos) {
1230 iter->flags &= ~FTRACE_ITER_CONT;
1231 iter->buffer_idx = 0;
1232 }
1233
1234 ret = get_user(ch, ubuf++);
1235 if (ret)
1236 goto out;
1237 read++;
1238 cnt--;
1239
1240 if (!(iter->flags & ~FTRACE_ITER_CONT)) {
1241 /* skip white space */
1242 while (cnt && isspace(ch)) {
1243 ret = get_user(ch, ubuf++);
1244 if (ret)
1245 goto out;
1246 read++;
1247 cnt--;
1248 }
1249
1250 if (isspace(ch)) {
1251 file->f_pos += read;
1252 ret = read;
1253 goto out;
1254 }
1255
1256 iter->buffer_idx = 0;
1257 }
1258
1259 while (cnt && !isspace(ch)) {
1260 if (iter->buffer_idx < FTRACE_BUFF_MAX)
1261 iter->buffer[iter->buffer_idx++] = ch;
1262 else {
1263 ret = -EINVAL;
1264 goto out;
1265 }
1266 ret = get_user(ch, ubuf++);
1267 if (ret)
1268 goto out;
1269 read++;
1270 cnt--;
1271 }
1272
1273 if (isspace(ch)) {
1274 iter->filtered++;
1275 iter->buffer[iter->buffer_idx] = 0;
1276 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1277 iter->buffer_idx = 0;
1278 } else
1279 iter->flags |= FTRACE_ITER_CONT;
1280
1281
1282 file->f_pos += read;
1283
1284 ret = read;
1285 out:
1286 mutex_unlock(&ftrace_regex_lock);
1287
1288 return ret;
1289}
1290
1291static ssize_t
1292ftrace_filter_write(struct file *file, const char __user *ubuf,
1293 size_t cnt, loff_t *ppos)
1294{
1295 return ftrace_regex_write(file, ubuf, cnt, ppos, 1);
1296}
1297
1298static ssize_t
1299ftrace_notrace_write(struct file *file, const char __user *ubuf,
1300 size_t cnt, loff_t *ppos)
1301{
1302 return ftrace_regex_write(file, ubuf, cnt, ppos, 0);
1303}
1304
1305static void
1306ftrace_set_regex(unsigned char *buf, int len, int reset, int enable)
1307{
1308 if (unlikely(ftrace_disabled))
1309 return;
1310
1311 mutex_lock(&ftrace_regex_lock);
1312 if (reset)
1313 ftrace_filter_reset(enable);
1314 if (buf)
1315 ftrace_match(buf, len, enable);
1316 mutex_unlock(&ftrace_regex_lock);
1317}
1318
1319/**
1320 * ftrace_set_filter - set a function to filter on in ftrace
1321 * @buf - the string that holds the function filter text.
1322 * @len - the length of the string.
1323 * @reset - non zero to reset all filters before applying this filter.
1324 *
1325 * Filters denote which functions should be enabled when tracing is enabled.
1326 * If @buf is NULL and reset is set, all functions will be enabled for tracing.
1327 */
1328void ftrace_set_filter(unsigned char *buf, int len, int reset)
1329{
1330 ftrace_set_regex(buf, len, reset, 1);
1331}
1332
1333/**
1334 * ftrace_set_notrace - set a function to not trace in ftrace
1335 * @buf - the string that holds the function notrace text.
1336 * @len - the length of the string.
1337 * @reset - non zero to reset all filters before applying this filter.
1338 *
1339 * Notrace Filters denote which functions should not be enabled when tracing
1340 * is enabled. If @buf is NULL and reset is set, all functions will be enabled
1341 * for tracing.
1342 */
1343void ftrace_set_notrace(unsigned char *buf, int len, int reset)
1344{
1345 ftrace_set_regex(buf, len, reset, 0);
1346}
1347
1348static int
1349ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1350{
1351 struct seq_file *m = (struct seq_file *)file->private_data;
1352 struct ftrace_iterator *iter;
1353
1354 mutex_lock(&ftrace_regex_lock);
1355 if (file->f_mode & FMODE_READ) {
1356 iter = m->private;
1357
1358 seq_release(inode, file);
1359 } else
1360 iter = file->private_data;
1361
1362 if (iter->buffer_idx) {
1363 iter->filtered++;
1364 iter->buffer[iter->buffer_idx] = 0;
1365 ftrace_match(iter->buffer, iter->buffer_idx, enable);
1366 }
1367
1368 mutex_lock(&ftrace_sysctl_lock);
1369 mutex_lock(&ftraced_lock);
1370 if (iter->filtered && ftraced_suspend && ftrace_enabled)
1371 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1372 mutex_unlock(&ftraced_lock);
1373 mutex_unlock(&ftrace_sysctl_lock);
1374
1375 kfree(iter);
1376 mutex_unlock(&ftrace_regex_lock);
1377 return 0;
1378}
1379
1380static int
1381ftrace_filter_release(struct inode *inode, struct file *file)
1382{
1383 return ftrace_regex_release(inode, file, 1);
1384}
1385
1386static int
1387ftrace_notrace_release(struct inode *inode, struct file *file)
1388{
1389 return ftrace_regex_release(inode, file, 0);
1390}
1391
1392static ssize_t
1393ftraced_read(struct file *filp, char __user *ubuf,
1394 size_t cnt, loff_t *ppos)
1395{
1396 /* don't worry about races */
1397 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1398 int r = strlen(buf);
1399
1400 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1401}
1402
1403static ssize_t
1404ftraced_write(struct file *filp, const char __user *ubuf,
1405 size_t cnt, loff_t *ppos)
1406{
1407 char buf[64];
1408 long val;
1409 int ret;
1410
1411 if (cnt >= sizeof(buf))
1412 return -EINVAL;
1413
1414 if (copy_from_user(&buf, ubuf, cnt))
1415 return -EFAULT;
1416
1417 if (strncmp(buf, "enable", 6) == 0)
1418 val = 1;
1419 else if (strncmp(buf, "disable", 7) == 0)
1420 val = 0;
1421 else {
1422 buf[cnt] = 0;
1423
1424 ret = strict_strtoul(buf, 10, &val);
1425 if (ret < 0)
1426 return ret;
1427
1428 val = !!val;
1429 }
1430
1431 if (val)
1432 ftrace_enable_daemon();
1433 else
1434 ftrace_disable_daemon();
1435
1436 filp->f_pos += cnt;
1437
1438 return cnt;
1439}
1440
1441static struct file_operations ftrace_avail_fops = {
1442 .open = ftrace_avail_open,
1443 .read = seq_read,
1444 .llseek = seq_lseek,
1445 .release = ftrace_avail_release,
1446};
1447
1448static struct file_operations ftrace_failures_fops = {
1449 .open = ftrace_failures_open,
1450 .read = seq_read,
1451 .llseek = seq_lseek,
1452 .release = ftrace_avail_release,
1453};
1454
1455static struct file_operations ftrace_filter_fops = {
1456 .open = ftrace_filter_open,
1457 .read = ftrace_regex_read,
1458 .write = ftrace_filter_write,
1459 .llseek = ftrace_regex_lseek,
1460 .release = ftrace_filter_release,
1461};
1462
1463static struct file_operations ftrace_notrace_fops = {
1464 .open = ftrace_notrace_open,
1465 .read = ftrace_regex_read,
1466 .write = ftrace_notrace_write,
1467 .llseek = ftrace_regex_lseek,
1468 .release = ftrace_notrace_release,
1469};
1470
1471static struct file_operations ftraced_fops = {
1472 .open = tracing_open_generic,
1473 .read = ftraced_read,
1474 .write = ftraced_write,
1475};
1476
1477/**
1478 * ftrace_force_update - force an update to all recording ftrace functions
1479 */
1480int ftrace_force_update(void)
1481{
1482 int ret = 0;
1483
1484 if (unlikely(ftrace_disabled))
1485 return -ENODEV;
1486
1487 mutex_lock(&ftrace_sysctl_lock);
1488 mutex_lock(&ftraced_lock);
1489
1490 /*
1491 * If ftraced_trigger is not set, then there is nothing
1492 * to update.
1493 */
1494 if (ftraced_trigger && !ftrace_update_code())
1495 ret = -EBUSY;
1496
1497 mutex_unlock(&ftraced_lock);
1498 mutex_unlock(&ftrace_sysctl_lock);
1499
1500 return ret;
1501}
1502
1503static void ftrace_force_shutdown(void)
1504{
1505 struct task_struct *task;
1506 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1507
1508 mutex_lock(&ftraced_lock);
1509 task = ftraced_task;
1510 ftraced_task = NULL;
1511 ftraced_suspend = -1;
1512 ftrace_run_update_code(command);
1513 mutex_unlock(&ftraced_lock);
1514
1515 if (task)
1516 kthread_stop(task);
1517}
1518
1519static __init int ftrace_init_debugfs(void)
1520{
1521 struct dentry *d_tracer;
1522 struct dentry *entry;
1523
1524 d_tracer = tracing_init_dentry();
1525
1526 entry = debugfs_create_file("available_filter_functions", 0444,
1527 d_tracer, NULL, &ftrace_avail_fops);
1528 if (!entry)
1529 pr_warning("Could not create debugfs "
1530 "'available_filter_functions' entry\n");
1531
1532 entry = debugfs_create_file("failures", 0444,
1533 d_tracer, NULL, &ftrace_failures_fops);
1534 if (!entry)
1535 pr_warning("Could not create debugfs 'failures' entry\n");
1536
1537 entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
1538 NULL, &ftrace_filter_fops);
1539 if (!entry)
1540 pr_warning("Could not create debugfs "
1541 "'set_ftrace_filter' entry\n");
1542
1543 entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
1544 NULL, &ftrace_notrace_fops);
1545 if (!entry)
1546 pr_warning("Could not create debugfs "
1547 "'set_ftrace_notrace' entry\n");
1548
1549 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1550 NULL, &ftraced_fops);
1551 if (!entry)
1552 pr_warning("Could not create debugfs "
1553 "'ftraced_enabled' entry\n");
1554 return 0;
1555}
1556
1557fs_initcall(ftrace_init_debugfs);
1558
1559static int __init ftrace_dynamic_init(void)
1560{
1561 struct task_struct *p;
1562 unsigned long addr;
1563 int ret;
1564
1565 addr = (unsigned long)ftrace_record_ip;
1566
1567 stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
1568
1569 /* ftrace_dyn_arch_init places the return code in addr */
1570 if (addr) {
1571 ret = (int)addr;
1572 goto failed;
1573 }
1574
1575 ret = ftrace_dyn_table_alloc();
1576 if (ret)
1577 goto failed;
1578
1579 p = kthread_run(ftraced, NULL, "ftraced");
1580 if (IS_ERR(p)) {
1581 ret = -1;
1582 goto failed;
1583 }
1584
1585 last_ftrace_enabled = ftrace_enabled = 1;
1586 ftraced_task = p;
1587
1588 return 0;
1589
1590 failed:
1591 ftrace_disabled = 1;
1592 return ret;
1593}
1594
1595core_initcall(ftrace_dynamic_init);
1596#else
1597# define ftrace_startup() do { } while (0)
1598# define ftrace_shutdown() do { } while (0)
1599# define ftrace_startup_sysctl() do { } while (0)
1600# define ftrace_shutdown_sysctl() do { } while (0)
1601# define ftrace_force_shutdown() do { } while (0)
1602#endif /* CONFIG_DYNAMIC_FTRACE */
1603
1604/**
1605 * ftrace_kill - totally shutdown ftrace
1606 *
1607 * This is a safety measure. If something was detected that seems
1608 * wrong, calling this function will keep ftrace from doing
1609 * any more modifications, and updates.
1610 * used when something went wrong.
1611 */
1612void ftrace_kill(void)
1613{
1614 mutex_lock(&ftrace_sysctl_lock);
1615 ftrace_disabled = 1;
1616 ftrace_enabled = 0;
1617
1618 clear_ftrace_function();
1619 mutex_unlock(&ftrace_sysctl_lock);
1620
1621 /* Try to totally disable ftrace */
1622 ftrace_force_shutdown();
1623}
1624
1625/**
1626 * register_ftrace_function - register a function for profiling
1627 * @ops - ops structure that holds the function for profiling.
1628 *
1629 * Register a function to be called by all functions in the
1630 * kernel.
1631 *
1632 * Note: @ops->func and all the functions it calls must be labeled
1633 * with "notrace", otherwise it will go into a
1634 * recursive loop.
1635 */
1636int register_ftrace_function(struct ftrace_ops *ops)
1637{
1638 int ret;
1639
1640 if (unlikely(ftrace_disabled))
1641 return -1;
1642
1643 mutex_lock(&ftrace_sysctl_lock);
1644 ret = __register_ftrace_function(ops);
1645 ftrace_startup();
1646 mutex_unlock(&ftrace_sysctl_lock);
1647
1648 return ret;
1649}
1650
1651/**
1652 * unregister_ftrace_function - unresgister a function for profiling.
1653 * @ops - ops structure that holds the function to unregister
1654 *
1655 * Unregister a function that was added to be called by ftrace profiling.
1656 */
1657int unregister_ftrace_function(struct ftrace_ops *ops)
1658{
1659 int ret;
1660
1661 mutex_lock(&ftrace_sysctl_lock);
1662 ret = __unregister_ftrace_function(ops);
1663 ftrace_shutdown();
1664 mutex_unlock(&ftrace_sysctl_lock);
1665
1666 return ret;
1667}
1668
1669int
1670ftrace_enable_sysctl(struct ctl_table *table, int write,
1671 struct file *file, void __user *buffer, size_t *lenp,
1672 loff_t *ppos)
1673{
1674 int ret;
1675
1676 if (unlikely(ftrace_disabled))
1677 return -ENODEV;
1678
1679 mutex_lock(&ftrace_sysctl_lock);
1680
1681 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
1682
1683 if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
1684 goto out;
1685
1686 last_ftrace_enabled = ftrace_enabled;
1687
1688 if (ftrace_enabled) {
1689
1690 ftrace_startup_sysctl();
1691
1692 /* we are starting ftrace again */
1693 if (ftrace_list != &ftrace_list_end) {
1694 if (ftrace_list->next == &ftrace_list_end)
1695 ftrace_trace_function = ftrace_list->func;
1696 else
1697 ftrace_trace_function = ftrace_list_func;
1698 }
1699
1700 } else {
1701 /* stopping ftrace calls (just send to ftrace_stub) */
1702 ftrace_trace_function = ftrace_stub;
1703
1704 ftrace_shutdown_sysctl();
1705 }
1706
1707 out:
1708 mutex_unlock(&ftrace_sysctl_lock);
1709 return ret;
1710}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
new file mode 100644
index 000000000000..9ade79369bfb
--- /dev/null
+++ b/kernel/trace/trace.c
@@ -0,0 +1,3100 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 William Lee Irwin III
13 */
14#include <linux/utsrelease.h>
15#include <linux/kallsyms.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18#include <linux/pagemap.h>
19#include <linux/hardirq.h>
20#include <linux/linkage.h>
21#include <linux/uaccess.h>
22#include <linux/ftrace.h>
23#include <linux/module.h>
24#include <linux/percpu.h>
25#include <linux/ctype.h>
26#include <linux/init.h>
27#include <linux/poll.h>
28#include <linux/gfp.h>
29#include <linux/fs.h>
30#include <linux/kprobes.h>
31#include <linux/writeback.h>
32
33#include <linux/stacktrace.h>
34
35#include "trace.h"
36
37unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX;
38unsigned long __read_mostly tracing_thresh;
39
40static unsigned long __read_mostly tracing_nr_buffers;
41static cpumask_t __read_mostly tracing_buffer_mask;
42
43#define for_each_tracing_cpu(cpu) \
44 for_each_cpu_mask(cpu, tracing_buffer_mask)
45
46static int trace_alloc_page(void);
47static int trace_free_page(void);
48
49static int tracing_disabled = 1;
50
51static unsigned long tracing_pages_allocated;
52
53long
54ns2usecs(cycle_t nsec)
55{
56 nsec += 500;
57 do_div(nsec, 1000);
58 return nsec;
59}
60
61cycle_t ftrace_now(int cpu)
62{
63 return cpu_clock(cpu);
64}
65
66/*
67 * The global_trace is the descriptor that holds the tracing
68 * buffers for the live tracing. For each CPU, it contains
69 * a link list of pages that will store trace entries. The
70 * page descriptor of the pages in the memory is used to hold
71 * the link list by linking the lru item in the page descriptor
72 * to each of the pages in the buffer per CPU.
73 *
74 * For each active CPU there is a data field that holds the
75 * pages for the buffer for that CPU. Each CPU has the same number
76 * of pages allocated for its buffer.
77 */
78static struct trace_array global_trace;
79
80static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
81
82/*
83 * The max_tr is used to snapshot the global_trace when a maximum
84 * latency is reached. Some tracers will use this to store a maximum
85 * trace while it continues examining live traces.
86 *
87 * The buffers for the max_tr are set up the same as the global_trace.
88 * When a snapshot is taken, the link list of the max_tr is swapped
89 * with the link list of the global_trace and the buffers are reset for
90 * the global_trace so the tracing can continue.
91 */
92static struct trace_array max_tr;
93
94static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
95
96/* tracer_enabled is used to toggle activation of a tracer */
97static int tracer_enabled = 1;
98
99/*
100 * trace_nr_entries is the number of entries that is allocated
101 * for a buffer. Note, the number of entries is always rounded
102 * to ENTRIES_PER_PAGE.
103 */
104static unsigned long trace_nr_entries = 65536UL;
105
106/* trace_types holds a link list of available tracers. */
107static struct tracer *trace_types __read_mostly;
108
109/* current_trace points to the tracer that is currently active */
110static struct tracer *current_trace __read_mostly;
111
112/*
113 * max_tracer_type_len is used to simplify the allocating of
114 * buffers to read userspace tracer names. We keep track of
115 * the longest tracer name registered.
116 */
117static int max_tracer_type_len;
118
119/*
120 * trace_types_lock is used to protect the trace_types list.
121 * This lock is also used to keep user access serialized.
122 * Accesses from userspace will grab this lock while userspace
123 * activities happen inside the kernel.
124 */
125static DEFINE_MUTEX(trace_types_lock);
126
127/* trace_wait is a waitqueue for tasks blocked on trace_poll */
128static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
129
130/* trace_flags holds iter_ctrl options */
131unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
132
133static notrace void no_trace_init(struct trace_array *tr)
134{
135 int cpu;
136
137 if(tr->ctrl)
138 for_each_online_cpu(cpu)
139 tracing_reset(tr->data[cpu]);
140 tracer_enabled = 0;
141}
142
143/* dummy trace to disable tracing */
144static struct tracer no_tracer __read_mostly = {
145 .name = "none",
146 .init = no_trace_init
147};
148
149
150/**
151 * trace_wake_up - wake up tasks waiting for trace input
152 *
153 * Simply wakes up any task that is blocked on the trace_wait
154 * queue. These is used with trace_poll for tasks polling the trace.
155 */
156void trace_wake_up(void)
157{
158 /*
159 * The runqueue_is_locked() can fail, but this is the best we
160 * have for now:
161 */
162 if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
163 wake_up(&trace_wait);
164}
165
166#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
167
168static int __init set_nr_entries(char *str)
169{
170 unsigned long nr_entries;
171 int ret;
172
173 if (!str)
174 return 0;
175 ret = strict_strtoul(str, 0, &nr_entries);
176 /* nr_entries can not be zero */
177 if (ret < 0 || nr_entries == 0)
178 return 0;
179 trace_nr_entries = nr_entries;
180 return 1;
181}
182__setup("trace_entries=", set_nr_entries);
183
184unsigned long nsecs_to_usecs(unsigned long nsecs)
185{
186 return nsecs / 1000;
187}
188
189/*
190 * trace_flag_type is an enumeration that holds different
191 * states when a trace occurs. These are:
192 * IRQS_OFF - interrupts were disabled
193 * NEED_RESCED - reschedule is requested
194 * HARDIRQ - inside an interrupt handler
195 * SOFTIRQ - inside a softirq handler
196 */
197enum trace_flag_type {
198 TRACE_FLAG_IRQS_OFF = 0x01,
199 TRACE_FLAG_NEED_RESCHED = 0x02,
200 TRACE_FLAG_HARDIRQ = 0x04,
201 TRACE_FLAG_SOFTIRQ = 0x08,
202};
203
204/*
205 * TRACE_ITER_SYM_MASK masks the options in trace_flags that
206 * control the output of kernel symbols.
207 */
208#define TRACE_ITER_SYM_MASK \
209 (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
210
211/* These must match the bit postions in trace_iterator_flags */
212static const char *trace_options[] = {
213 "print-parent",
214 "sym-offset",
215 "sym-addr",
216 "verbose",
217 "raw",
218 "hex",
219 "bin",
220 "block",
221 "stacktrace",
222 "sched-tree",
223 NULL
224};
225
226/*
227 * ftrace_max_lock is used to protect the swapping of buffers
228 * when taking a max snapshot. The buffers themselves are
229 * protected by per_cpu spinlocks. But the action of the swap
230 * needs its own lock.
231 *
232 * This is defined as a raw_spinlock_t in order to help
233 * with performance when lockdep debugging is enabled.
234 */
235static raw_spinlock_t ftrace_max_lock =
236 (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
237
238/*
239 * Copy the new maximum trace into the separate maximum-trace
240 * structure. (this way the maximum trace is permanently saved,
241 * for later retrieval via /debugfs/tracing/latency_trace)
242 */
243static void
244__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
245{
246 struct trace_array_cpu *data = tr->data[cpu];
247
248 max_tr.cpu = cpu;
249 max_tr.time_start = data->preempt_timestamp;
250
251 data = max_tr.data[cpu];
252 data->saved_latency = tracing_max_latency;
253
254 memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
255 data->pid = tsk->pid;
256 data->uid = tsk->uid;
257 data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
258 data->policy = tsk->policy;
259 data->rt_priority = tsk->rt_priority;
260
261 /* record this tasks comm */
262 tracing_record_cmdline(current);
263}
264
265#define CHECK_COND(cond) \
266 if (unlikely(cond)) { \
267 tracing_disabled = 1; \
268 WARN_ON(1); \
269 return -1; \
270 }
271
272/**
273 * check_pages - integrity check of trace buffers
274 *
275 * As a safty measure we check to make sure the data pages have not
276 * been corrupted.
277 */
278int check_pages(struct trace_array_cpu *data)
279{
280 struct page *page, *tmp;
281
282 CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
283 CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
284
285 list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
286 CHECK_COND(page->lru.next->prev != &page->lru);
287 CHECK_COND(page->lru.prev->next != &page->lru);
288 }
289
290 return 0;
291}
292
293/**
294 * head_page - page address of the first page in per_cpu buffer.
295 *
296 * head_page returns the page address of the first page in
297 * a per_cpu buffer. This also preforms various consistency
298 * checks to make sure the buffer has not been corrupted.
299 */
300void *head_page(struct trace_array_cpu *data)
301{
302 struct page *page;
303
304 if (list_empty(&data->trace_pages))
305 return NULL;
306
307 page = list_entry(data->trace_pages.next, struct page, lru);
308 BUG_ON(&page->lru == &data->trace_pages);
309
310 return page_address(page);
311}
312
313/**
314 * trace_seq_printf - sequence printing of trace information
315 * @s: trace sequence descriptor
316 * @fmt: printf format string
317 *
318 * The tracer may use either sequence operations or its own
319 * copy to user routines. To simplify formating of a trace
320 * trace_seq_printf is used to store strings into a special
321 * buffer (@s). Then the output may be either used by
322 * the sequencer or pulled into another buffer.
323 */
324int
325trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
326{
327 int len = (PAGE_SIZE - 1) - s->len;
328 va_list ap;
329 int ret;
330
331 if (!len)
332 return 0;
333
334 va_start(ap, fmt);
335 ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
336 va_end(ap);
337
338 /* If we can't write it all, don't bother writing anything */
339 if (ret >= len)
340 return 0;
341
342 s->len += ret;
343
344 return len;
345}
346
347/**
348 * trace_seq_puts - trace sequence printing of simple string
349 * @s: trace sequence descriptor
350 * @str: simple string to record
351 *
352 * The tracer may use either the sequence operations or its own
353 * copy to user routines. This function records a simple string
354 * into a special buffer (@s) for later retrieval by a sequencer
355 * or other mechanism.
356 */
357static int
358trace_seq_puts(struct trace_seq *s, const char *str)
359{
360 int len = strlen(str);
361
362 if (len > ((PAGE_SIZE - 1) - s->len))
363 return 0;
364
365 memcpy(s->buffer + s->len, str, len);
366 s->len += len;
367
368 return len;
369}
370
371static int
372trace_seq_putc(struct trace_seq *s, unsigned char c)
373{
374 if (s->len >= (PAGE_SIZE - 1))
375 return 0;
376
377 s->buffer[s->len++] = c;
378
379 return 1;
380}
381
382static int
383trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
384{
385 if (len > ((PAGE_SIZE - 1) - s->len))
386 return 0;
387
388 memcpy(s->buffer + s->len, mem, len);
389 s->len += len;
390
391 return len;
392}
393
394#define HEX_CHARS 17
395static const char hex2asc[] = "0123456789abcdef";
396
397static int
398trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
399{
400 unsigned char hex[HEX_CHARS];
401 unsigned char *data = mem;
402 unsigned char byte;
403 int i, j;
404
405 BUG_ON(len >= HEX_CHARS);
406
407#ifdef __BIG_ENDIAN
408 for (i = 0, j = 0; i < len; i++) {
409#else
410 for (i = len-1, j = 0; i >= 0; i--) {
411#endif
412 byte = data[i];
413
414 hex[j++] = hex2asc[byte & 0x0f];
415 hex[j++] = hex2asc[byte >> 4];
416 }
417 hex[j++] = ' ';
418
419 return trace_seq_putmem(s, hex, j);
420}
421
422static void
423trace_seq_reset(struct trace_seq *s)
424{
425 s->len = 0;
426 s->readpos = 0;
427}
428
429ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
430{
431 int len;
432 int ret;
433
434 if (s->len <= s->readpos)
435 return -EBUSY;
436
437 len = s->len - s->readpos;
438 if (cnt > len)
439 cnt = len;
440 ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
441 if (ret)
442 return -EFAULT;
443
444 s->readpos += len;
445 return cnt;
446}
447
448static void
449trace_print_seq(struct seq_file *m, struct trace_seq *s)
450{
451 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
452
453 s->buffer[len] = 0;
454 seq_puts(m, s->buffer);
455
456 trace_seq_reset(s);
457}
458
459/*
460 * flip the trace buffers between two trace descriptors.
461 * This usually is the buffers between the global_trace and
462 * the max_tr to record a snapshot of a current trace.
463 *
464 * The ftrace_max_lock must be held.
465 */
466static void
467flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
468{
469 struct list_head flip_pages;
470
471 INIT_LIST_HEAD(&flip_pages);
472
473 memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
474 sizeof(struct trace_array_cpu) -
475 offsetof(struct trace_array_cpu, trace_head_idx));
476
477 check_pages(tr1);
478 check_pages(tr2);
479 list_splice_init(&tr1->trace_pages, &flip_pages);
480 list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
481 list_splice_init(&flip_pages, &tr2->trace_pages);
482 BUG_ON(!list_empty(&flip_pages));
483 check_pages(tr1);
484 check_pages(tr2);
485}
486
487/**
488 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
489 * @tr: tracer
490 * @tsk: the task with the latency
491 * @cpu: The cpu that initiated the trace.
492 *
493 * Flip the buffers between the @tr and the max_tr and record information
494 * about which task was the cause of this latency.
495 */
496void
497update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
498{
499 struct trace_array_cpu *data;
500 int i;
501
502 WARN_ON_ONCE(!irqs_disabled());
503 __raw_spin_lock(&ftrace_max_lock);
504 /* clear out all the previous traces */
505 for_each_tracing_cpu(i) {
506 data = tr->data[i];
507 flip_trace(max_tr.data[i], data);
508 tracing_reset(data);
509 }
510
511 __update_max_tr(tr, tsk, cpu);
512 __raw_spin_unlock(&ftrace_max_lock);
513}
514
515/**
516 * update_max_tr_single - only copy one trace over, and reset the rest
517 * @tr - tracer
518 * @tsk - task with the latency
519 * @cpu - the cpu of the buffer to copy.
520 *
521 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
522 */
523void
524update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
525{
526 struct trace_array_cpu *data = tr->data[cpu];
527 int i;
528
529 WARN_ON_ONCE(!irqs_disabled());
530 __raw_spin_lock(&ftrace_max_lock);
531 for_each_tracing_cpu(i)
532 tracing_reset(max_tr.data[i]);
533
534 flip_trace(max_tr.data[cpu], data);
535 tracing_reset(data);
536
537 __update_max_tr(tr, tsk, cpu);
538 __raw_spin_unlock(&ftrace_max_lock);
539}
540
541/**
542 * register_tracer - register a tracer with the ftrace system.
543 * @type - the plugin for the tracer
544 *
545 * Register a new plugin tracer.
546 */
547int register_tracer(struct tracer *type)
548{
549 struct tracer *t;
550 int len;
551 int ret = 0;
552
553 if (!type->name) {
554 pr_info("Tracer must have a name\n");
555 return -1;
556 }
557
558 mutex_lock(&trace_types_lock);
559 for (t = trace_types; t; t = t->next) {
560 if (strcmp(type->name, t->name) == 0) {
561 /* already found */
562 pr_info("Trace %s already registered\n",
563 type->name);
564 ret = -1;
565 goto out;
566 }
567 }
568
569#ifdef CONFIG_FTRACE_STARTUP_TEST
570 if (type->selftest) {
571 struct tracer *saved_tracer = current_trace;
572 struct trace_array_cpu *data;
573 struct trace_array *tr = &global_trace;
574 int saved_ctrl = tr->ctrl;
575 int i;
576 /*
577 * Run a selftest on this tracer.
578 * Here we reset the trace buffer, and set the current
579 * tracer to be this tracer. The tracer can then run some
580 * internal tracing to verify that everything is in order.
581 * If we fail, we do not register this tracer.
582 */
583 for_each_tracing_cpu(i) {
584 data = tr->data[i];
585 if (!head_page(data))
586 continue;
587 tracing_reset(data);
588 }
589 current_trace = type;
590 tr->ctrl = 0;
591 /* the test is responsible for initializing and enabling */
592 pr_info("Testing tracer %s: ", type->name);
593 ret = type->selftest(type, tr);
594 /* the test is responsible for resetting too */
595 current_trace = saved_tracer;
596 tr->ctrl = saved_ctrl;
597 if (ret) {
598 printk(KERN_CONT "FAILED!\n");
599 goto out;
600 }
601 /* Only reset on passing, to avoid touching corrupted buffers */
602 for_each_tracing_cpu(i) {
603 data = tr->data[i];
604 if (!head_page(data))
605 continue;
606 tracing_reset(data);
607 }
608 printk(KERN_CONT "PASSED\n");
609 }
610#endif
611
612 type->next = trace_types;
613 trace_types = type;
614 len = strlen(type->name);
615 if (len > max_tracer_type_len)
616 max_tracer_type_len = len;
617
618 out:
619 mutex_unlock(&trace_types_lock);
620
621 return ret;
622}
623
624void unregister_tracer(struct tracer *type)
625{
626 struct tracer **t;
627 int len;
628
629 mutex_lock(&trace_types_lock);
630 for (t = &trace_types; *t; t = &(*t)->next) {
631 if (*t == type)
632 goto found;
633 }
634 pr_info("Trace %s not registered\n", type->name);
635 goto out;
636
637 found:
638 *t = (*t)->next;
639 if (strlen(type->name) != max_tracer_type_len)
640 goto out;
641
642 max_tracer_type_len = 0;
643 for (t = &trace_types; *t; t = &(*t)->next) {
644 len = strlen((*t)->name);
645 if (len > max_tracer_type_len)
646 max_tracer_type_len = len;
647 }
648 out:
649 mutex_unlock(&trace_types_lock);
650}
651
652void tracing_reset(struct trace_array_cpu *data)
653{
654 data->trace_idx = 0;
655 data->overrun = 0;
656 data->trace_head = data->trace_tail = head_page(data);
657 data->trace_head_idx = 0;
658 data->trace_tail_idx = 0;
659}
660
661#define SAVED_CMDLINES 128
662static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
663static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
664static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
665static int cmdline_idx;
666static DEFINE_SPINLOCK(trace_cmdline_lock);
667
668/* temporary disable recording */
669atomic_t trace_record_cmdline_disabled __read_mostly;
670
671static void trace_init_cmdlines(void)
672{
673 memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
674 memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
675 cmdline_idx = 0;
676}
677
678void trace_stop_cmdline_recording(void);
679
680static void trace_save_cmdline(struct task_struct *tsk)
681{
682 unsigned map;
683 unsigned idx;
684
685 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
686 return;
687
688 /*
689 * It's not the end of the world if we don't get
690 * the lock, but we also don't want to spin
691 * nor do we want to disable interrupts,
692 * so if we miss here, then better luck next time.
693 */
694 if (!spin_trylock(&trace_cmdline_lock))
695 return;
696
697 idx = map_pid_to_cmdline[tsk->pid];
698 if (idx >= SAVED_CMDLINES) {
699 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
700
701 map = map_cmdline_to_pid[idx];
702 if (map <= PID_MAX_DEFAULT)
703 map_pid_to_cmdline[map] = (unsigned)-1;
704
705 map_pid_to_cmdline[tsk->pid] = idx;
706
707 cmdline_idx = idx;
708 }
709
710 memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
711
712 spin_unlock(&trace_cmdline_lock);
713}
714
715static char *trace_find_cmdline(int pid)
716{
717 char *cmdline = "<...>";
718 unsigned map;
719
720 if (!pid)
721 return "<idle>";
722
723 if (pid > PID_MAX_DEFAULT)
724 goto out;
725
726 map = map_pid_to_cmdline[pid];
727 if (map >= SAVED_CMDLINES)
728 goto out;
729
730 cmdline = saved_cmdlines[map];
731
732 out:
733 return cmdline;
734}
735
736void tracing_record_cmdline(struct task_struct *tsk)
737{
738 if (atomic_read(&trace_record_cmdline_disabled))
739 return;
740
741 trace_save_cmdline(tsk);
742}
743
744static inline struct list_head *
745trace_next_list(struct trace_array_cpu *data, struct list_head *next)
746{
747 /*
748 * Roundrobin - but skip the head (which is not a real page):
749 */
750 next = next->next;
751 if (unlikely(next == &data->trace_pages))
752 next = next->next;
753 BUG_ON(next == &data->trace_pages);
754
755 return next;
756}
757
758static inline void *
759trace_next_page(struct trace_array_cpu *data, void *addr)
760{
761 struct list_head *next;
762 struct page *page;
763
764 page = virt_to_page(addr);
765
766 next = trace_next_list(data, &page->lru);
767 page = list_entry(next, struct page, lru);
768
769 return page_address(page);
770}
771
772static inline struct trace_entry *
773tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
774{
775 unsigned long idx, idx_next;
776 struct trace_entry *entry;
777
778 data->trace_idx++;
779 idx = data->trace_head_idx;
780 idx_next = idx + 1;
781
782 BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
783
784 entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
785
786 if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
787 data->trace_head = trace_next_page(data, data->trace_head);
788 idx_next = 0;
789 }
790
791 if (data->trace_head == data->trace_tail &&
792 idx_next == data->trace_tail_idx) {
793 /* overrun */
794 data->overrun++;
795 data->trace_tail_idx++;
796 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
797 data->trace_tail =
798 trace_next_page(data, data->trace_tail);
799 data->trace_tail_idx = 0;
800 }
801 }
802
803 data->trace_head_idx = idx_next;
804
805 return entry;
806}
807
808static inline void
809tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
810{
811 struct task_struct *tsk = current;
812 unsigned long pc;
813
814 pc = preempt_count();
815
816 entry->preempt_count = pc & 0xff;
817 entry->pid = (tsk) ? tsk->pid : 0;
818 entry->t = ftrace_now(raw_smp_processor_id());
819 entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
820 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
821 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
822 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
823}
824
825void
826trace_function(struct trace_array *tr, struct trace_array_cpu *data,
827 unsigned long ip, unsigned long parent_ip, unsigned long flags)
828{
829 struct trace_entry *entry;
830 unsigned long irq_flags;
831
832 raw_local_irq_save(irq_flags);
833 __raw_spin_lock(&data->lock);
834 entry = tracing_get_trace_entry(tr, data);
835 tracing_generic_entry_update(entry, flags);
836 entry->type = TRACE_FN;
837 entry->fn.ip = ip;
838 entry->fn.parent_ip = parent_ip;
839 __raw_spin_unlock(&data->lock);
840 raw_local_irq_restore(irq_flags);
841}
842
843void
844ftrace(struct trace_array *tr, struct trace_array_cpu *data,
845 unsigned long ip, unsigned long parent_ip, unsigned long flags)
846{
847 if (likely(!atomic_read(&data->disabled)))
848 trace_function(tr, data, ip, parent_ip, flags);
849}
850
851void __trace_stack(struct trace_array *tr,
852 struct trace_array_cpu *data,
853 unsigned long flags,
854 int skip)
855{
856 struct trace_entry *entry;
857 struct stack_trace trace;
858
859 if (!(trace_flags & TRACE_ITER_STACKTRACE))
860 return;
861
862 entry = tracing_get_trace_entry(tr, data);
863 tracing_generic_entry_update(entry, flags);
864 entry->type = TRACE_STACK;
865
866 memset(&entry->stack, 0, sizeof(entry->stack));
867
868 trace.nr_entries = 0;
869 trace.max_entries = FTRACE_STACK_ENTRIES;
870 trace.skip = skip;
871 trace.entries = entry->stack.caller;
872
873 save_stack_trace(&trace);
874}
875
876void
877__trace_special(void *__tr, void *__data,
878 unsigned long arg1, unsigned long arg2, unsigned long arg3)
879{
880 struct trace_array_cpu *data = __data;
881 struct trace_array *tr = __tr;
882 struct trace_entry *entry;
883 unsigned long irq_flags;
884
885 raw_local_irq_save(irq_flags);
886 __raw_spin_lock(&data->lock);
887 entry = tracing_get_trace_entry(tr, data);
888 tracing_generic_entry_update(entry, 0);
889 entry->type = TRACE_SPECIAL;
890 entry->special.arg1 = arg1;
891 entry->special.arg2 = arg2;
892 entry->special.arg3 = arg3;
893 __trace_stack(tr, data, irq_flags, 4);
894 __raw_spin_unlock(&data->lock);
895 raw_local_irq_restore(irq_flags);
896
897 trace_wake_up();
898}
899
900void
901tracing_sched_switch_trace(struct trace_array *tr,
902 struct trace_array_cpu *data,
903 struct task_struct *prev,
904 struct task_struct *next,
905 unsigned long flags)
906{
907 struct trace_entry *entry;
908 unsigned long irq_flags;
909
910 raw_local_irq_save(irq_flags);
911 __raw_spin_lock(&data->lock);
912 entry = tracing_get_trace_entry(tr, data);
913 tracing_generic_entry_update(entry, flags);
914 entry->type = TRACE_CTX;
915 entry->ctx.prev_pid = prev->pid;
916 entry->ctx.prev_prio = prev->prio;
917 entry->ctx.prev_state = prev->state;
918 entry->ctx.next_pid = next->pid;
919 entry->ctx.next_prio = next->prio;
920 entry->ctx.next_state = next->state;
921 __trace_stack(tr, data, flags, 5);
922 __raw_spin_unlock(&data->lock);
923 raw_local_irq_restore(irq_flags);
924}
925
926void
927tracing_sched_wakeup_trace(struct trace_array *tr,
928 struct trace_array_cpu *data,
929 struct task_struct *wakee,
930 struct task_struct *curr,
931 unsigned long flags)
932{
933 struct trace_entry *entry;
934 unsigned long irq_flags;
935
936 raw_local_irq_save(irq_flags);
937 __raw_spin_lock(&data->lock);
938 entry = tracing_get_trace_entry(tr, data);
939 tracing_generic_entry_update(entry, flags);
940 entry->type = TRACE_WAKE;
941 entry->ctx.prev_pid = curr->pid;
942 entry->ctx.prev_prio = curr->prio;
943 entry->ctx.prev_state = curr->state;
944 entry->ctx.next_pid = wakee->pid;
945 entry->ctx.next_prio = wakee->prio;
946 entry->ctx.next_state = wakee->state;
947 __trace_stack(tr, data, flags, 6);
948 __raw_spin_unlock(&data->lock);
949 raw_local_irq_restore(irq_flags);
950
951 trace_wake_up();
952}
953
954void
955ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
956{
957 struct trace_array *tr = &global_trace;
958 struct trace_array_cpu *data;
959 unsigned long flags;
960 long disabled;
961 int cpu;
962
963 if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
964 return;
965
966 local_irq_save(flags);
967 cpu = raw_smp_processor_id();
968 data = tr->data[cpu];
969 disabled = atomic_inc_return(&data->disabled);
970
971 if (likely(disabled == 1))
972 __trace_special(tr, data, arg1, arg2, arg3);
973
974 atomic_dec(&data->disabled);
975 local_irq_restore(flags);
976}
977
978#ifdef CONFIG_FTRACE
979static void
980function_trace_call(unsigned long ip, unsigned long parent_ip)
981{
982 struct trace_array *tr = &global_trace;
983 struct trace_array_cpu *data;
984 unsigned long flags;
985 long disabled;
986 int cpu;
987
988 if (unlikely(!tracer_enabled))
989 return;
990
991 if (skip_trace(ip))
992 return;
993
994 local_irq_save(flags);
995 cpu = raw_smp_processor_id();
996 data = tr->data[cpu];
997 disabled = atomic_inc_return(&data->disabled);
998
999 if (likely(disabled == 1))
1000 trace_function(tr, data, ip, parent_ip, flags);
1001
1002 atomic_dec(&data->disabled);
1003 local_irq_restore(flags);
1004}
1005
1006static struct ftrace_ops trace_ops __read_mostly =
1007{
1008 .func = function_trace_call,
1009};
1010
1011void tracing_start_function_trace(void)
1012{
1013 register_ftrace_function(&trace_ops);
1014}
1015
1016void tracing_stop_function_trace(void)
1017{
1018 unregister_ftrace_function(&trace_ops);
1019}
1020#endif
1021
1022enum trace_file_type {
1023 TRACE_FILE_LAT_FMT = 1,
1024};
1025
1026static struct trace_entry *
1027trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1028 struct trace_iterator *iter, int cpu)
1029{
1030 struct page *page;
1031 struct trace_entry *array;
1032
1033 if (iter->next_idx[cpu] >= tr->entries ||
1034 iter->next_idx[cpu] >= data->trace_idx ||
1035 (data->trace_head == data->trace_tail &&
1036 data->trace_head_idx == data->trace_tail_idx))
1037 return NULL;
1038
1039 if (!iter->next_page[cpu]) {
1040 /* Initialize the iterator for this cpu trace buffer */
1041 WARN_ON(!data->trace_tail);
1042 page = virt_to_page(data->trace_tail);
1043 iter->next_page[cpu] = &page->lru;
1044 iter->next_page_idx[cpu] = data->trace_tail_idx;
1045 }
1046
1047 page = list_entry(iter->next_page[cpu], struct page, lru);
1048 BUG_ON(&data->trace_pages == &page->lru);
1049
1050 array = page_address(page);
1051
1052 WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1053 return &array[iter->next_page_idx[cpu]];
1054}
1055
1056static struct trace_entry *
1057find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1058{
1059 struct trace_array *tr = iter->tr;
1060 struct trace_entry *ent, *next = NULL;
1061 int next_cpu = -1;
1062 int cpu;
1063
1064 for_each_tracing_cpu(cpu) {
1065 if (!head_page(tr->data[cpu]))
1066 continue;
1067 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1068 /*
1069 * Pick the entry with the smallest timestamp:
1070 */
1071 if (ent && (!next || ent->t < next->t)) {
1072 next = ent;
1073 next_cpu = cpu;
1074 }
1075 }
1076
1077 if (ent_cpu)
1078 *ent_cpu = next_cpu;
1079
1080 return next;
1081}
1082
1083static void trace_iterator_increment(struct trace_iterator *iter)
1084{
1085 iter->idx++;
1086 iter->next_idx[iter->cpu]++;
1087 iter->next_page_idx[iter->cpu]++;
1088
1089 if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1090 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1091
1092 iter->next_page_idx[iter->cpu] = 0;
1093 iter->next_page[iter->cpu] =
1094 trace_next_list(data, iter->next_page[iter->cpu]);
1095 }
1096}
1097
1098static void trace_consume(struct trace_iterator *iter)
1099{
1100 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1101
1102 data->trace_tail_idx++;
1103 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1104 data->trace_tail = trace_next_page(data, data->trace_tail);
1105 data->trace_tail_idx = 0;
1106 }
1107
1108 /* Check if we empty it, then reset the index */
1109 if (data->trace_head == data->trace_tail &&
1110 data->trace_head_idx == data->trace_tail_idx)
1111 data->trace_idx = 0;
1112}
1113
1114static void *find_next_entry_inc(struct trace_iterator *iter)
1115{
1116 struct trace_entry *next;
1117 int next_cpu = -1;
1118
1119 next = find_next_entry(iter, &next_cpu);
1120
1121 iter->prev_ent = iter->ent;
1122 iter->prev_cpu = iter->cpu;
1123
1124 iter->ent = next;
1125 iter->cpu = next_cpu;
1126
1127 if (next)
1128 trace_iterator_increment(iter);
1129
1130 return next ? iter : NULL;
1131}
1132
1133static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1134{
1135 struct trace_iterator *iter = m->private;
1136 void *last_ent = iter->ent;
1137 int i = (int)*pos;
1138 void *ent;
1139
1140 (*pos)++;
1141
1142 /* can't go backwards */
1143 if (iter->idx > i)
1144 return NULL;
1145
1146 if (iter->idx < 0)
1147 ent = find_next_entry_inc(iter);
1148 else
1149 ent = iter;
1150
1151 while (ent && iter->idx < i)
1152 ent = find_next_entry_inc(iter);
1153
1154 iter->pos = *pos;
1155
1156 if (last_ent && !ent)
1157 seq_puts(m, "\n\nvim:ft=help\n");
1158
1159 return ent;
1160}
1161
1162static void *s_start(struct seq_file *m, loff_t *pos)
1163{
1164 struct trace_iterator *iter = m->private;
1165 void *p = NULL;
1166 loff_t l = 0;
1167 int i;
1168
1169 mutex_lock(&trace_types_lock);
1170
1171 if (!current_trace || current_trace != iter->trace) {
1172 mutex_unlock(&trace_types_lock);
1173 return NULL;
1174 }
1175
1176 atomic_inc(&trace_record_cmdline_disabled);
1177
1178 /* let the tracer grab locks here if needed */
1179 if (current_trace->start)
1180 current_trace->start(iter);
1181
1182 if (*pos != iter->pos) {
1183 iter->ent = NULL;
1184 iter->cpu = 0;
1185 iter->idx = -1;
1186 iter->prev_ent = NULL;
1187 iter->prev_cpu = -1;
1188
1189 for_each_tracing_cpu(i) {
1190 iter->next_idx[i] = 0;
1191 iter->next_page[i] = NULL;
1192 }
1193
1194 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1195 ;
1196
1197 } else {
1198 l = *pos - 1;
1199 p = s_next(m, p, &l);
1200 }
1201
1202 return p;
1203}
1204
1205static void s_stop(struct seq_file *m, void *p)
1206{
1207 struct trace_iterator *iter = m->private;
1208
1209 atomic_dec(&trace_record_cmdline_disabled);
1210
1211 /* let the tracer release locks here if needed */
1212 if (current_trace && current_trace == iter->trace && iter->trace->stop)
1213 iter->trace->stop(iter);
1214
1215 mutex_unlock(&trace_types_lock);
1216}
1217
1218#define KRETPROBE_MSG "[unknown/kretprobe'd]"
1219
1220#ifdef CONFIG_KRETPROBES
1221static inline int kretprobed(unsigned long addr)
1222{
1223 return addr == (unsigned long)kretprobe_trampoline;
1224}
1225#else
1226static inline int kretprobed(unsigned long addr)
1227{
1228 return 0;
1229}
1230#endif /* CONFIG_KRETPROBES */
1231
1232static int
1233seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1234{
1235#ifdef CONFIG_KALLSYMS
1236 char str[KSYM_SYMBOL_LEN];
1237
1238 kallsyms_lookup(address, NULL, NULL, NULL, str);
1239
1240 return trace_seq_printf(s, fmt, str);
1241#endif
1242 return 1;
1243}
1244
1245static int
1246seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1247 unsigned long address)
1248{
1249#ifdef CONFIG_KALLSYMS
1250 char str[KSYM_SYMBOL_LEN];
1251
1252 sprint_symbol(str, address);
1253 return trace_seq_printf(s, fmt, str);
1254#endif
1255 return 1;
1256}
1257
1258#ifndef CONFIG_64BIT
1259# define IP_FMT "%08lx"
1260#else
1261# define IP_FMT "%016lx"
1262#endif
1263
1264static int
1265seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1266{
1267 int ret;
1268
1269 if (!ip)
1270 return trace_seq_printf(s, "0");
1271
1272 if (sym_flags & TRACE_ITER_SYM_OFFSET)
1273 ret = seq_print_sym_offset(s, "%s", ip);
1274 else
1275 ret = seq_print_sym_short(s, "%s", ip);
1276
1277 if (!ret)
1278 return 0;
1279
1280 if (sym_flags & TRACE_ITER_SYM_ADDR)
1281 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1282 return ret;
1283}
1284
1285static void print_lat_help_header(struct seq_file *m)
1286{
1287 seq_puts(m, "# _------=> CPU# \n");
1288 seq_puts(m, "# / _-----=> irqs-off \n");
1289 seq_puts(m, "# | / _----=> need-resched \n");
1290 seq_puts(m, "# || / _---=> hardirq/softirq \n");
1291 seq_puts(m, "# ||| / _--=> preempt-depth \n");
1292 seq_puts(m, "# |||| / \n");
1293 seq_puts(m, "# ||||| delay \n");
1294 seq_puts(m, "# cmd pid ||||| time | caller \n");
1295 seq_puts(m, "# \\ / ||||| \\ | / \n");
1296}
1297
1298static void print_func_help_header(struct seq_file *m)
1299{
1300 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n");
1301 seq_puts(m, "# | | | | |\n");
1302}
1303
1304
1305static void
1306print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1307{
1308 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1309 struct trace_array *tr = iter->tr;
1310 struct trace_array_cpu *data = tr->data[tr->cpu];
1311 struct tracer *type = current_trace;
1312 unsigned long total = 0;
1313 unsigned long entries = 0;
1314 int cpu;
1315 const char *name = "preemption";
1316
1317 if (type)
1318 name = type->name;
1319
1320 for_each_tracing_cpu(cpu) {
1321 if (head_page(tr->data[cpu])) {
1322 total += tr->data[cpu]->trace_idx;
1323 if (tr->data[cpu]->trace_idx > tr->entries)
1324 entries += tr->entries;
1325 else
1326 entries += tr->data[cpu]->trace_idx;
1327 }
1328 }
1329
1330 seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1331 name, UTS_RELEASE);
1332 seq_puts(m, "-----------------------------------"
1333 "---------------------------------\n");
1334 seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1335 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1336 nsecs_to_usecs(data->saved_latency),
1337 entries,
1338 total,
1339 tr->cpu,
1340#if defined(CONFIG_PREEMPT_NONE)
1341 "server",
1342#elif defined(CONFIG_PREEMPT_VOLUNTARY)
1343 "desktop",
1344#elif defined(CONFIG_PREEMPT_DESKTOP)
1345 "preempt",
1346#else
1347 "unknown",
1348#endif
1349 /* These are reserved for later use */
1350 0, 0, 0, 0);
1351#ifdef CONFIG_SMP
1352 seq_printf(m, " #P:%d)\n", num_online_cpus());
1353#else
1354 seq_puts(m, ")\n");
1355#endif
1356 seq_puts(m, " -----------------\n");
1357 seq_printf(m, " | task: %.16s-%d "
1358 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1359 data->comm, data->pid, data->uid, data->nice,
1360 data->policy, data->rt_priority);
1361 seq_puts(m, " -----------------\n");
1362
1363 if (data->critical_start) {
1364 seq_puts(m, " => started at: ");
1365 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1366 trace_print_seq(m, &iter->seq);
1367 seq_puts(m, "\n => ended at: ");
1368 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1369 trace_print_seq(m, &iter->seq);
1370 seq_puts(m, "\n");
1371 }
1372
1373 seq_puts(m, "\n");
1374}
1375
1376static void
1377lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1378{
1379 int hardirq, softirq;
1380 char *comm;
1381
1382 comm = trace_find_cmdline(entry->pid);
1383
1384 trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1385 trace_seq_printf(s, "%d", cpu);
1386 trace_seq_printf(s, "%c%c",
1387 (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1388 ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1389
1390 hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1391 softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1392 if (hardirq && softirq) {
1393 trace_seq_putc(s, 'H');
1394 } else {
1395 if (hardirq) {
1396 trace_seq_putc(s, 'h');
1397 } else {
1398 if (softirq)
1399 trace_seq_putc(s, 's');
1400 else
1401 trace_seq_putc(s, '.');
1402 }
1403 }
1404
1405 if (entry->preempt_count)
1406 trace_seq_printf(s, "%x", entry->preempt_count);
1407 else
1408 trace_seq_puts(s, ".");
1409}
1410
1411unsigned long preempt_mark_thresh = 100;
1412
1413static void
1414lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1415 unsigned long rel_usecs)
1416{
1417 trace_seq_printf(s, " %4lldus", abs_usecs);
1418 if (rel_usecs > preempt_mark_thresh)
1419 trace_seq_puts(s, "!: ");
1420 else if (rel_usecs > 1)
1421 trace_seq_puts(s, "+: ");
1422 else
1423 trace_seq_puts(s, " : ");
1424}
1425
1426static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1427
1428static int
1429print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1430{
1431 struct trace_seq *s = &iter->seq;
1432 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1433 struct trace_entry *next_entry = find_next_entry(iter, NULL);
1434 unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1435 struct trace_entry *entry = iter->ent;
1436 unsigned long abs_usecs;
1437 unsigned long rel_usecs;
1438 char *comm;
1439 int S, T;
1440 int i;
1441 unsigned state;
1442
1443 if (!next_entry)
1444 next_entry = entry;
1445 rel_usecs = ns2usecs(next_entry->t - entry->t);
1446 abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1447
1448 if (verbose) {
1449 comm = trace_find_cmdline(entry->pid);
1450 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1451 " %ld.%03ldms (+%ld.%03ldms): ",
1452 comm,
1453 entry->pid, cpu, entry->flags,
1454 entry->preempt_count, trace_idx,
1455 ns2usecs(entry->t),
1456 abs_usecs/1000,
1457 abs_usecs % 1000, rel_usecs/1000,
1458 rel_usecs % 1000);
1459 } else {
1460 lat_print_generic(s, entry, cpu);
1461 lat_print_timestamp(s, abs_usecs, rel_usecs);
1462 }
1463 switch (entry->type) {
1464 case TRACE_FN:
1465 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1466 trace_seq_puts(s, " (");
1467 if (kretprobed(entry->fn.parent_ip))
1468 trace_seq_puts(s, KRETPROBE_MSG);
1469 else
1470 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1471 trace_seq_puts(s, ")\n");
1472 break;
1473 case TRACE_CTX:
1474 case TRACE_WAKE:
1475 T = entry->ctx.next_state < sizeof(state_to_char) ?
1476 state_to_char[entry->ctx.next_state] : 'X';
1477
1478 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1479 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1480 comm = trace_find_cmdline(entry->ctx.next_pid);
1481 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1482 entry->ctx.prev_pid,
1483 entry->ctx.prev_prio,
1484 S, entry->type == TRACE_CTX ? "==>" : " +",
1485 entry->ctx.next_pid,
1486 entry->ctx.next_prio,
1487 T, comm);
1488 break;
1489 case TRACE_SPECIAL:
1490 trace_seq_printf(s, "# %ld %ld %ld\n",
1491 entry->special.arg1,
1492 entry->special.arg2,
1493 entry->special.arg3);
1494 break;
1495 case TRACE_STACK:
1496 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1497 if (i)
1498 trace_seq_puts(s, " <= ");
1499 seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1500 }
1501 trace_seq_puts(s, "\n");
1502 break;
1503 default:
1504 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1505 }
1506 return 1;
1507}
1508
1509static int print_trace_fmt(struct trace_iterator *iter)
1510{
1511 struct trace_seq *s = &iter->seq;
1512 unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1513 struct trace_entry *entry;
1514 unsigned long usec_rem;
1515 unsigned long long t;
1516 unsigned long secs;
1517 char *comm;
1518 int ret;
1519 int S, T;
1520 int i;
1521
1522 entry = iter->ent;
1523
1524 comm = trace_find_cmdline(iter->ent->pid);
1525
1526 t = ns2usecs(entry->t);
1527 usec_rem = do_div(t, 1000000ULL);
1528 secs = (unsigned long)t;
1529
1530 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1531 if (!ret)
1532 return 0;
1533 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1534 if (!ret)
1535 return 0;
1536 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1537 if (!ret)
1538 return 0;
1539
1540 switch (entry->type) {
1541 case TRACE_FN:
1542 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1543 if (!ret)
1544 return 0;
1545 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1546 entry->fn.parent_ip) {
1547 ret = trace_seq_printf(s, " <-");
1548 if (!ret)
1549 return 0;
1550 if (kretprobed(entry->fn.parent_ip))
1551 ret = trace_seq_puts(s, KRETPROBE_MSG);
1552 else
1553 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1554 sym_flags);
1555 if (!ret)
1556 return 0;
1557 }
1558 ret = trace_seq_printf(s, "\n");
1559 if (!ret)
1560 return 0;
1561 break;
1562 case TRACE_CTX:
1563 case TRACE_WAKE:
1564 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1565 state_to_char[entry->ctx.prev_state] : 'X';
1566 T = entry->ctx.next_state < sizeof(state_to_char) ?
1567 state_to_char[entry->ctx.next_state] : 'X';
1568 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1569 entry->ctx.prev_pid,
1570 entry->ctx.prev_prio,
1571 S,
1572 entry->type == TRACE_CTX ? "==>" : " +",
1573 entry->ctx.next_pid,
1574 entry->ctx.next_prio,
1575 T);
1576 if (!ret)
1577 return 0;
1578 break;
1579 case TRACE_SPECIAL:
1580 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1581 entry->special.arg1,
1582 entry->special.arg2,
1583 entry->special.arg3);
1584 if (!ret)
1585 return 0;
1586 break;
1587 case TRACE_STACK:
1588 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1589 if (i) {
1590 ret = trace_seq_puts(s, " <= ");
1591 if (!ret)
1592 return 0;
1593 }
1594 ret = seq_print_ip_sym(s, entry->stack.caller[i],
1595 sym_flags);
1596 if (!ret)
1597 return 0;
1598 }
1599 ret = trace_seq_puts(s, "\n");
1600 if (!ret)
1601 return 0;
1602 break;
1603 }
1604 return 1;
1605}
1606
1607static int print_raw_fmt(struct trace_iterator *iter)
1608{
1609 struct trace_seq *s = &iter->seq;
1610 struct trace_entry *entry;
1611 int ret;
1612 int S, T;
1613
1614 entry = iter->ent;
1615
1616 ret = trace_seq_printf(s, "%d %d %llu ",
1617 entry->pid, iter->cpu, entry->t);
1618 if (!ret)
1619 return 0;
1620
1621 switch (entry->type) {
1622 case TRACE_FN:
1623 ret = trace_seq_printf(s, "%x %x\n",
1624 entry->fn.ip, entry->fn.parent_ip);
1625 if (!ret)
1626 return 0;
1627 break;
1628 case TRACE_CTX:
1629 case TRACE_WAKE:
1630 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1631 state_to_char[entry->ctx.prev_state] : 'X';
1632 T = entry->ctx.next_state < sizeof(state_to_char) ?
1633 state_to_char[entry->ctx.next_state] : 'X';
1634 if (entry->type == TRACE_WAKE)
1635 S = '+';
1636 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1637 entry->ctx.prev_pid,
1638 entry->ctx.prev_prio,
1639 S,
1640 entry->ctx.next_pid,
1641 entry->ctx.next_prio,
1642 T);
1643 if (!ret)
1644 return 0;
1645 break;
1646 case TRACE_SPECIAL:
1647 case TRACE_STACK:
1648 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1649 entry->special.arg1,
1650 entry->special.arg2,
1651 entry->special.arg3);
1652 if (!ret)
1653 return 0;
1654 break;
1655 }
1656 return 1;
1657}
1658
1659#define SEQ_PUT_FIELD_RET(s, x) \
1660do { \
1661 if (!trace_seq_putmem(s, &(x), sizeof(x))) \
1662 return 0; \
1663} while (0)
1664
1665#define SEQ_PUT_HEX_FIELD_RET(s, x) \
1666do { \
1667 if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \
1668 return 0; \
1669} while (0)
1670
1671static int print_hex_fmt(struct trace_iterator *iter)
1672{
1673 struct trace_seq *s = &iter->seq;
1674 unsigned char newline = '\n';
1675 struct trace_entry *entry;
1676 int S, T;
1677
1678 entry = iter->ent;
1679
1680 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1681 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1682 SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1683
1684 switch (entry->type) {
1685 case TRACE_FN:
1686 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1687 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1688 break;
1689 case TRACE_CTX:
1690 case TRACE_WAKE:
1691 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1692 state_to_char[entry->ctx.prev_state] : 'X';
1693 T = entry->ctx.next_state < sizeof(state_to_char) ?
1694 state_to_char[entry->ctx.next_state] : 'X';
1695 if (entry->type == TRACE_WAKE)
1696 S = '+';
1697 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1698 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1699 SEQ_PUT_HEX_FIELD_RET(s, S);
1700 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1701 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1702 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1703 SEQ_PUT_HEX_FIELD_RET(s, T);
1704 break;
1705 case TRACE_SPECIAL:
1706 case TRACE_STACK:
1707 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1708 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1709 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1710 break;
1711 }
1712 SEQ_PUT_FIELD_RET(s, newline);
1713
1714 return 1;
1715}
1716
1717static int print_bin_fmt(struct trace_iterator *iter)
1718{
1719 struct trace_seq *s = &iter->seq;
1720 struct trace_entry *entry;
1721
1722 entry = iter->ent;
1723
1724 SEQ_PUT_FIELD_RET(s, entry->pid);
1725 SEQ_PUT_FIELD_RET(s, entry->cpu);
1726 SEQ_PUT_FIELD_RET(s, entry->t);
1727
1728 switch (entry->type) {
1729 case TRACE_FN:
1730 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1731 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1732 break;
1733 case TRACE_CTX:
1734 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1735 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1736 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1737 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1738 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1739 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1740 break;
1741 case TRACE_SPECIAL:
1742 case TRACE_STACK:
1743 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1744 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1745 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1746 break;
1747 }
1748 return 1;
1749}
1750
1751static int trace_empty(struct trace_iterator *iter)
1752{
1753 struct trace_array_cpu *data;
1754 int cpu;
1755
1756 for_each_tracing_cpu(cpu) {
1757 data = iter->tr->data[cpu];
1758
1759 if (head_page(data) && data->trace_idx &&
1760 (data->trace_tail != data->trace_head ||
1761 data->trace_tail_idx != data->trace_head_idx))
1762 return 0;
1763 }
1764 return 1;
1765}
1766
1767static int print_trace_line(struct trace_iterator *iter)
1768{
1769 if (iter->trace && iter->trace->print_line)
1770 return iter->trace->print_line(iter);
1771
1772 if (trace_flags & TRACE_ITER_BIN)
1773 return print_bin_fmt(iter);
1774
1775 if (trace_flags & TRACE_ITER_HEX)
1776 return print_hex_fmt(iter);
1777
1778 if (trace_flags & TRACE_ITER_RAW)
1779 return print_raw_fmt(iter);
1780
1781 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1782 return print_lat_fmt(iter, iter->idx, iter->cpu);
1783
1784 return print_trace_fmt(iter);
1785}
1786
1787static int s_show(struct seq_file *m, void *v)
1788{
1789 struct trace_iterator *iter = v;
1790
1791 if (iter->ent == NULL) {
1792 if (iter->tr) {
1793 seq_printf(m, "# tracer: %s\n", iter->trace->name);
1794 seq_puts(m, "#\n");
1795 }
1796 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1797 /* print nothing if the buffers are empty */
1798 if (trace_empty(iter))
1799 return 0;
1800 print_trace_header(m, iter);
1801 if (!(trace_flags & TRACE_ITER_VERBOSE))
1802 print_lat_help_header(m);
1803 } else {
1804 if (!(trace_flags & TRACE_ITER_VERBOSE))
1805 print_func_help_header(m);
1806 }
1807 } else {
1808 print_trace_line(iter);
1809 trace_print_seq(m, &iter->seq);
1810 }
1811
1812 return 0;
1813}
1814
1815static struct seq_operations tracer_seq_ops = {
1816 .start = s_start,
1817 .next = s_next,
1818 .stop = s_stop,
1819 .show = s_show,
1820};
1821
1822static struct trace_iterator *
1823__tracing_open(struct inode *inode, struct file *file, int *ret)
1824{
1825 struct trace_iterator *iter;
1826
1827 if (tracing_disabled) {
1828 *ret = -ENODEV;
1829 return NULL;
1830 }
1831
1832 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1833 if (!iter) {
1834 *ret = -ENOMEM;
1835 goto out;
1836 }
1837
1838 mutex_lock(&trace_types_lock);
1839 if (current_trace && current_trace->print_max)
1840 iter->tr = &max_tr;
1841 else
1842 iter->tr = inode->i_private;
1843 iter->trace = current_trace;
1844 iter->pos = -1;
1845
1846 /* TODO stop tracer */
1847 *ret = seq_open(file, &tracer_seq_ops);
1848 if (!*ret) {
1849 struct seq_file *m = file->private_data;
1850 m->private = iter;
1851
1852 /* stop the trace while dumping */
1853 if (iter->tr->ctrl)
1854 tracer_enabled = 0;
1855
1856 if (iter->trace && iter->trace->open)
1857 iter->trace->open(iter);
1858 } else {
1859 kfree(iter);
1860 iter = NULL;
1861 }
1862 mutex_unlock(&trace_types_lock);
1863
1864 out:
1865 return iter;
1866}
1867
1868int tracing_open_generic(struct inode *inode, struct file *filp)
1869{
1870 if (tracing_disabled)
1871 return -ENODEV;
1872
1873 filp->private_data = inode->i_private;
1874 return 0;
1875}
1876
1877int tracing_release(struct inode *inode, struct file *file)
1878{
1879 struct seq_file *m = (struct seq_file *)file->private_data;
1880 struct trace_iterator *iter = m->private;
1881
1882 mutex_lock(&trace_types_lock);
1883 if (iter->trace && iter->trace->close)
1884 iter->trace->close(iter);
1885
1886 /* reenable tracing if it was previously enabled */
1887 if (iter->tr->ctrl)
1888 tracer_enabled = 1;
1889 mutex_unlock(&trace_types_lock);
1890
1891 seq_release(inode, file);
1892 kfree(iter);
1893 return 0;
1894}
1895
1896static int tracing_open(struct inode *inode, struct file *file)
1897{
1898 int ret;
1899
1900 __tracing_open(inode, file, &ret);
1901
1902 return ret;
1903}
1904
1905static int tracing_lt_open(struct inode *inode, struct file *file)
1906{
1907 struct trace_iterator *iter;
1908 int ret;
1909
1910 iter = __tracing_open(inode, file, &ret);
1911
1912 if (!ret)
1913 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1914
1915 return ret;
1916}
1917
1918
1919static void *
1920t_next(struct seq_file *m, void *v, loff_t *pos)
1921{
1922 struct tracer *t = m->private;
1923
1924 (*pos)++;
1925
1926 if (t)
1927 t = t->next;
1928
1929 m->private = t;
1930
1931 return t;
1932}
1933
1934static void *t_start(struct seq_file *m, loff_t *pos)
1935{
1936 struct tracer *t = m->private;
1937 loff_t l = 0;
1938
1939 mutex_lock(&trace_types_lock);
1940 for (; t && l < *pos; t = t_next(m, t, &l))
1941 ;
1942
1943 return t;
1944}
1945
1946static void t_stop(struct seq_file *m, void *p)
1947{
1948 mutex_unlock(&trace_types_lock);
1949}
1950
1951static int t_show(struct seq_file *m, void *v)
1952{
1953 struct tracer *t = v;
1954
1955 if (!t)
1956 return 0;
1957
1958 seq_printf(m, "%s", t->name);
1959 if (t->next)
1960 seq_putc(m, ' ');
1961 else
1962 seq_putc(m, '\n');
1963
1964 return 0;
1965}
1966
1967static struct seq_operations show_traces_seq_ops = {
1968 .start = t_start,
1969 .next = t_next,
1970 .stop = t_stop,
1971 .show = t_show,
1972};
1973
1974static int show_traces_open(struct inode *inode, struct file *file)
1975{
1976 int ret;
1977
1978 if (tracing_disabled)
1979 return -ENODEV;
1980
1981 ret = seq_open(file, &show_traces_seq_ops);
1982 if (!ret) {
1983 struct seq_file *m = file->private_data;
1984 m->private = trace_types;
1985 }
1986
1987 return ret;
1988}
1989
1990static struct file_operations tracing_fops = {
1991 .open = tracing_open,
1992 .read = seq_read,
1993 .llseek = seq_lseek,
1994 .release = tracing_release,
1995};
1996
1997static struct file_operations tracing_lt_fops = {
1998 .open = tracing_lt_open,
1999 .read = seq_read,
2000 .llseek = seq_lseek,
2001 .release = tracing_release,
2002};
2003
2004static struct file_operations show_traces_fops = {
2005 .open = show_traces_open,
2006 .read = seq_read,
2007 .release = seq_release,
2008};
2009
2010/*
2011 * Only trace on a CPU if the bitmask is set:
2012 */
2013static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2014
2015/*
2016 * When tracing/tracing_cpu_mask is modified then this holds
2017 * the new bitmask we are about to install:
2018 */
2019static cpumask_t tracing_cpumask_new;
2020
2021/*
2022 * The tracer itself will not take this lock, but still we want
2023 * to provide a consistent cpumask to user-space:
2024 */
2025static DEFINE_MUTEX(tracing_cpumask_update_lock);
2026
2027/*
2028 * Temporary storage for the character representation of the
2029 * CPU bitmask (and one more byte for the newline):
2030 */
2031static char mask_str[NR_CPUS + 1];
2032
2033static ssize_t
2034tracing_cpumask_read(struct file *filp, char __user *ubuf,
2035 size_t count, loff_t *ppos)
2036{
2037 int len;
2038
2039 mutex_lock(&tracing_cpumask_update_lock);
2040
2041 len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2042 if (count - len < 2) {
2043 count = -EINVAL;
2044 goto out_err;
2045 }
2046 len += sprintf(mask_str + len, "\n");
2047 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2048
2049out_err:
2050 mutex_unlock(&tracing_cpumask_update_lock);
2051
2052 return count;
2053}
2054
2055static ssize_t
2056tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2057 size_t count, loff_t *ppos)
2058{
2059 int err, cpu;
2060
2061 mutex_lock(&tracing_cpumask_update_lock);
2062 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2063 if (err)
2064 goto err_unlock;
2065
2066 raw_local_irq_disable();
2067 __raw_spin_lock(&ftrace_max_lock);
2068 for_each_tracing_cpu(cpu) {
2069 /*
2070 * Increase/decrease the disabled counter if we are
2071 * about to flip a bit in the cpumask:
2072 */
2073 if (cpu_isset(cpu, tracing_cpumask) &&
2074 !cpu_isset(cpu, tracing_cpumask_new)) {
2075 atomic_inc(&global_trace.data[cpu]->disabled);
2076 }
2077 if (!cpu_isset(cpu, tracing_cpumask) &&
2078 cpu_isset(cpu, tracing_cpumask_new)) {
2079 atomic_dec(&global_trace.data[cpu]->disabled);
2080 }
2081 }
2082 __raw_spin_unlock(&ftrace_max_lock);
2083 raw_local_irq_enable();
2084
2085 tracing_cpumask = tracing_cpumask_new;
2086
2087 mutex_unlock(&tracing_cpumask_update_lock);
2088
2089 return count;
2090
2091err_unlock:
2092 mutex_unlock(&tracing_cpumask_update_lock);
2093
2094 return err;
2095}
2096
2097static struct file_operations tracing_cpumask_fops = {
2098 .open = tracing_open_generic,
2099 .read = tracing_cpumask_read,
2100 .write = tracing_cpumask_write,
2101};
2102
2103static ssize_t
2104tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2105 size_t cnt, loff_t *ppos)
2106{
2107 char *buf;
2108 int r = 0;
2109 int len = 0;
2110 int i;
2111
2112 /* calulate max size */
2113 for (i = 0; trace_options[i]; i++) {
2114 len += strlen(trace_options[i]);
2115 len += 3; /* "no" and space */
2116 }
2117
2118 /* +2 for \n and \0 */
2119 buf = kmalloc(len + 2, GFP_KERNEL);
2120 if (!buf)
2121 return -ENOMEM;
2122
2123 for (i = 0; trace_options[i]; i++) {
2124 if (trace_flags & (1 << i))
2125 r += sprintf(buf + r, "%s ", trace_options[i]);
2126 else
2127 r += sprintf(buf + r, "no%s ", trace_options[i]);
2128 }
2129
2130 r += sprintf(buf + r, "\n");
2131 WARN_ON(r >= len + 2);
2132
2133 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2134
2135 kfree(buf);
2136
2137 return r;
2138}
2139
2140static ssize_t
2141tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2142 size_t cnt, loff_t *ppos)
2143{
2144 char buf[64];
2145 char *cmp = buf;
2146 int neg = 0;
2147 int i;
2148
2149 if (cnt >= sizeof(buf))
2150 return -EINVAL;
2151
2152 if (copy_from_user(&buf, ubuf, cnt))
2153 return -EFAULT;
2154
2155 buf[cnt] = 0;
2156
2157 if (strncmp(buf, "no", 2) == 0) {
2158 neg = 1;
2159 cmp += 2;
2160 }
2161
2162 for (i = 0; trace_options[i]; i++) {
2163 int len = strlen(trace_options[i]);
2164
2165 if (strncmp(cmp, trace_options[i], len) == 0) {
2166 if (neg)
2167 trace_flags &= ~(1 << i);
2168 else
2169 trace_flags |= (1 << i);
2170 break;
2171 }
2172 }
2173 /*
2174 * If no option could be set, return an error:
2175 */
2176 if (!trace_options[i])
2177 return -EINVAL;
2178
2179 filp->f_pos += cnt;
2180
2181 return cnt;
2182}
2183
2184static struct file_operations tracing_iter_fops = {
2185 .open = tracing_open_generic,
2186 .read = tracing_iter_ctrl_read,
2187 .write = tracing_iter_ctrl_write,
2188};
2189
2190static const char readme_msg[] =
2191 "tracing mini-HOWTO:\n\n"
2192 "# mkdir /debug\n"
2193 "# mount -t debugfs nodev /debug\n\n"
2194 "# cat /debug/tracing/available_tracers\n"
2195 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2196 "# cat /debug/tracing/current_tracer\n"
2197 "none\n"
2198 "# echo sched_switch > /debug/tracing/current_tracer\n"
2199 "# cat /debug/tracing/current_tracer\n"
2200 "sched_switch\n"
2201 "# cat /debug/tracing/iter_ctrl\n"
2202 "noprint-parent nosym-offset nosym-addr noverbose\n"
2203 "# echo print-parent > /debug/tracing/iter_ctrl\n"
2204 "# echo 1 > /debug/tracing/tracing_enabled\n"
2205 "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2206 "echo 0 > /debug/tracing/tracing_enabled\n"
2207;
2208
2209static ssize_t
2210tracing_readme_read(struct file *filp, char __user *ubuf,
2211 size_t cnt, loff_t *ppos)
2212{
2213 return simple_read_from_buffer(ubuf, cnt, ppos,
2214 readme_msg, strlen(readme_msg));
2215}
2216
2217static struct file_operations tracing_readme_fops = {
2218 .open = tracing_open_generic,
2219 .read = tracing_readme_read,
2220};
2221
2222static ssize_t
2223tracing_ctrl_read(struct file *filp, char __user *ubuf,
2224 size_t cnt, loff_t *ppos)
2225{
2226 struct trace_array *tr = filp->private_data;
2227 char buf[64];
2228 int r;
2229
2230 r = sprintf(buf, "%ld\n", tr->ctrl);
2231 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2232}
2233
2234static ssize_t
2235tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2236 size_t cnt, loff_t *ppos)
2237{
2238 struct trace_array *tr = filp->private_data;
2239 char buf[64];
2240 long val;
2241 int ret;
2242
2243 if (cnt >= sizeof(buf))
2244 return -EINVAL;
2245
2246 if (copy_from_user(&buf, ubuf, cnt))
2247 return -EFAULT;
2248
2249 buf[cnt] = 0;
2250
2251 ret = strict_strtoul(buf, 10, &val);
2252 if (ret < 0)
2253 return ret;
2254
2255 val = !!val;
2256
2257 mutex_lock(&trace_types_lock);
2258 if (tr->ctrl ^ val) {
2259 if (val)
2260 tracer_enabled = 1;
2261 else
2262 tracer_enabled = 0;
2263
2264 tr->ctrl = val;
2265
2266 if (current_trace && current_trace->ctrl_update)
2267 current_trace->ctrl_update(tr);
2268 }
2269 mutex_unlock(&trace_types_lock);
2270
2271 filp->f_pos += cnt;
2272
2273 return cnt;
2274}
2275
2276static ssize_t
2277tracing_set_trace_read(struct file *filp, char __user *ubuf,
2278 size_t cnt, loff_t *ppos)
2279{
2280 char buf[max_tracer_type_len+2];
2281 int r;
2282
2283 mutex_lock(&trace_types_lock);
2284 if (current_trace)
2285 r = sprintf(buf, "%s\n", current_trace->name);
2286 else
2287 r = sprintf(buf, "\n");
2288 mutex_unlock(&trace_types_lock);
2289
2290 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2291}
2292
2293static ssize_t
2294tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2295 size_t cnt, loff_t *ppos)
2296{
2297 struct trace_array *tr = &global_trace;
2298 struct tracer *t;
2299 char buf[max_tracer_type_len+1];
2300 int i;
2301
2302 if (cnt > max_tracer_type_len)
2303 cnt = max_tracer_type_len;
2304
2305 if (copy_from_user(&buf, ubuf, cnt))
2306 return -EFAULT;
2307
2308 buf[cnt] = 0;
2309
2310 /* strip ending whitespace. */
2311 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2312 buf[i] = 0;
2313
2314 mutex_lock(&trace_types_lock);
2315 for (t = trace_types; t; t = t->next) {
2316 if (strcmp(t->name, buf) == 0)
2317 break;
2318 }
2319 if (!t || t == current_trace)
2320 goto out;
2321
2322 if (current_trace && current_trace->reset)
2323 current_trace->reset(tr);
2324
2325 current_trace = t;
2326 if (t->init)
2327 t->init(tr);
2328
2329 out:
2330 mutex_unlock(&trace_types_lock);
2331
2332 filp->f_pos += cnt;
2333
2334 return cnt;
2335}
2336
2337static ssize_t
2338tracing_max_lat_read(struct file *filp, char __user *ubuf,
2339 size_t cnt, loff_t *ppos)
2340{
2341 unsigned long *ptr = filp->private_data;
2342 char buf[64];
2343 int r;
2344
2345 r = snprintf(buf, sizeof(buf), "%ld\n",
2346 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2347 if (r > sizeof(buf))
2348 r = sizeof(buf);
2349 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2350}
2351
2352static ssize_t
2353tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2354 size_t cnt, loff_t *ppos)
2355{
2356 long *ptr = filp->private_data;
2357 char buf[64];
2358 long val;
2359 int ret;
2360
2361 if (cnt >= sizeof(buf))
2362 return -EINVAL;
2363
2364 if (copy_from_user(&buf, ubuf, cnt))
2365 return -EFAULT;
2366
2367 buf[cnt] = 0;
2368
2369 ret = strict_strtoul(buf, 10, &val);
2370 if (ret < 0)
2371 return ret;
2372
2373 *ptr = val * 1000;
2374
2375 return cnt;
2376}
2377
2378static atomic_t tracing_reader;
2379
2380static int tracing_open_pipe(struct inode *inode, struct file *filp)
2381{
2382 struct trace_iterator *iter;
2383
2384 if (tracing_disabled)
2385 return -ENODEV;
2386
2387 /* We only allow for reader of the pipe */
2388 if (atomic_inc_return(&tracing_reader) != 1) {
2389 atomic_dec(&tracing_reader);
2390 return -EBUSY;
2391 }
2392
2393 /* create a buffer to store the information to pass to userspace */
2394 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2395 if (!iter)
2396 return -ENOMEM;
2397
2398 mutex_lock(&trace_types_lock);
2399 iter->tr = &global_trace;
2400 iter->trace = current_trace;
2401 filp->private_data = iter;
2402
2403 if (iter->trace->pipe_open)
2404 iter->trace->pipe_open(iter);
2405 mutex_unlock(&trace_types_lock);
2406
2407 return 0;
2408}
2409
2410static int tracing_release_pipe(struct inode *inode, struct file *file)
2411{
2412 struct trace_iterator *iter = file->private_data;
2413
2414 kfree(iter);
2415 atomic_dec(&tracing_reader);
2416
2417 return 0;
2418}
2419
2420static unsigned int
2421tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2422{
2423 struct trace_iterator *iter = filp->private_data;
2424
2425 if (trace_flags & TRACE_ITER_BLOCK) {
2426 /*
2427 * Always select as readable when in blocking mode
2428 */
2429 return POLLIN | POLLRDNORM;
2430 } else {
2431 if (!trace_empty(iter))
2432 return POLLIN | POLLRDNORM;
2433 poll_wait(filp, &trace_wait, poll_table);
2434 if (!trace_empty(iter))
2435 return POLLIN | POLLRDNORM;
2436
2437 return 0;
2438 }
2439}
2440
2441/*
2442 * Consumer reader.
2443 */
2444static ssize_t
2445tracing_read_pipe(struct file *filp, char __user *ubuf,
2446 size_t cnt, loff_t *ppos)
2447{
2448 struct trace_iterator *iter = filp->private_data;
2449 struct trace_array_cpu *data;
2450 static cpumask_t mask;
2451 unsigned long flags;
2452#ifdef CONFIG_FTRACE
2453 int ftrace_save;
2454#endif
2455 int cpu;
2456 ssize_t sret;
2457
2458 /* return any leftover data */
2459 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2460 if (sret != -EBUSY)
2461 return sret;
2462 sret = 0;
2463
2464 trace_seq_reset(&iter->seq);
2465
2466 mutex_lock(&trace_types_lock);
2467 if (iter->trace->read) {
2468 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2469 if (sret)
2470 goto out;
2471 }
2472
2473 while (trace_empty(iter)) {
2474
2475 if ((filp->f_flags & O_NONBLOCK)) {
2476 sret = -EAGAIN;
2477 goto out;
2478 }
2479
2480 /*
2481 * This is a make-shift waitqueue. The reason we don't use
2482 * an actual wait queue is because:
2483 * 1) we only ever have one waiter
2484 * 2) the tracing, traces all functions, we don't want
2485 * the overhead of calling wake_up and friends
2486 * (and tracing them too)
2487 * Anyway, this is really very primitive wakeup.
2488 */
2489 set_current_state(TASK_INTERRUPTIBLE);
2490 iter->tr->waiter = current;
2491
2492 mutex_unlock(&trace_types_lock);
2493
2494 /* sleep for 100 msecs, and try again. */
2495 schedule_timeout(HZ/10);
2496
2497 mutex_lock(&trace_types_lock);
2498
2499 iter->tr->waiter = NULL;
2500
2501 if (signal_pending(current)) {
2502 sret = -EINTR;
2503 goto out;
2504 }
2505
2506 if (iter->trace != current_trace)
2507 goto out;
2508
2509 /*
2510 * We block until we read something and tracing is disabled.
2511 * We still block if tracing is disabled, but we have never
2512 * read anything. This allows a user to cat this file, and
2513 * then enable tracing. But after we have read something,
2514 * we give an EOF when tracing is again disabled.
2515 *
2516 * iter->pos will be 0 if we haven't read anything.
2517 */
2518 if (!tracer_enabled && iter->pos)
2519 break;
2520
2521 continue;
2522 }
2523
2524 /* stop when tracing is finished */
2525 if (trace_empty(iter))
2526 goto out;
2527
2528 if (cnt >= PAGE_SIZE)
2529 cnt = PAGE_SIZE - 1;
2530
2531 /* reset all but tr, trace, and overruns */
2532 memset(&iter->seq, 0,
2533 sizeof(struct trace_iterator) -
2534 offsetof(struct trace_iterator, seq));
2535 iter->pos = -1;
2536
2537 /*
2538 * We need to stop all tracing on all CPUS to read the
2539 * the next buffer. This is a bit expensive, but is
2540 * not done often. We fill all what we can read,
2541 * and then release the locks again.
2542 */
2543
2544 cpus_clear(mask);
2545 local_irq_save(flags);
2546#ifdef CONFIG_FTRACE
2547 ftrace_save = ftrace_enabled;
2548 ftrace_enabled = 0;
2549#endif
2550 smp_wmb();
2551 for_each_tracing_cpu(cpu) {
2552 data = iter->tr->data[cpu];
2553
2554 if (!head_page(data) || !data->trace_idx)
2555 continue;
2556
2557 atomic_inc(&data->disabled);
2558 cpu_set(cpu, mask);
2559 }
2560
2561 for_each_cpu_mask(cpu, mask) {
2562 data = iter->tr->data[cpu];
2563 __raw_spin_lock(&data->lock);
2564
2565 if (data->overrun > iter->last_overrun[cpu])
2566 iter->overrun[cpu] +=
2567 data->overrun - iter->last_overrun[cpu];
2568 iter->last_overrun[cpu] = data->overrun;
2569 }
2570
2571 while (find_next_entry_inc(iter) != NULL) {
2572 int ret;
2573 int len = iter->seq.len;
2574
2575 ret = print_trace_line(iter);
2576 if (!ret) {
2577 /* don't print partial lines */
2578 iter->seq.len = len;
2579 break;
2580 }
2581
2582 trace_consume(iter);
2583
2584 if (iter->seq.len >= cnt)
2585 break;
2586 }
2587
2588 for_each_cpu_mask(cpu, mask) {
2589 data = iter->tr->data[cpu];
2590 __raw_spin_unlock(&data->lock);
2591 }
2592
2593 for_each_cpu_mask(cpu, mask) {
2594 data = iter->tr->data[cpu];
2595 atomic_dec(&data->disabled);
2596 }
2597#ifdef CONFIG_FTRACE
2598 ftrace_enabled = ftrace_save;
2599#endif
2600 local_irq_restore(flags);
2601
2602 /* Now copy what we have to the user */
2603 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2604 if (iter->seq.readpos >= iter->seq.len)
2605 trace_seq_reset(&iter->seq);
2606 if (sret == -EBUSY)
2607 sret = 0;
2608
2609out:
2610 mutex_unlock(&trace_types_lock);
2611
2612 return sret;
2613}
2614
2615static ssize_t
2616tracing_entries_read(struct file *filp, char __user *ubuf,
2617 size_t cnt, loff_t *ppos)
2618{
2619 struct trace_array *tr = filp->private_data;
2620 char buf[64];
2621 int r;
2622
2623 r = sprintf(buf, "%lu\n", tr->entries);
2624 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2625}
2626
2627static ssize_t
2628tracing_entries_write(struct file *filp, const char __user *ubuf,
2629 size_t cnt, loff_t *ppos)
2630{
2631 unsigned long val;
2632 char buf[64];
2633 int i, ret;
2634
2635 if (cnt >= sizeof(buf))
2636 return -EINVAL;
2637
2638 if (copy_from_user(&buf, ubuf, cnt))
2639 return -EFAULT;
2640
2641 buf[cnt] = 0;
2642
2643 ret = strict_strtoul(buf, 10, &val);
2644 if (ret < 0)
2645 return ret;
2646
2647 /* must have at least 1 entry */
2648 if (!val)
2649 return -EINVAL;
2650
2651 mutex_lock(&trace_types_lock);
2652
2653 if (current_trace != &no_tracer) {
2654 cnt = -EBUSY;
2655 pr_info("ftrace: set current_tracer to none"
2656 " before modifying buffer size\n");
2657 goto out;
2658 }
2659
2660 if (val > global_trace.entries) {
2661 long pages_requested;
2662 unsigned long freeable_pages;
2663
2664 /* make sure we have enough memory before mapping */
2665 pages_requested =
2666 (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2667
2668 /* account for each buffer (and max_tr) */
2669 pages_requested *= tracing_nr_buffers * 2;
2670
2671 /* Check for overflow */
2672 if (pages_requested < 0) {
2673 cnt = -ENOMEM;
2674 goto out;
2675 }
2676
2677 freeable_pages = determine_dirtyable_memory();
2678
2679 /* we only allow to request 1/4 of useable memory */
2680 if (pages_requested >
2681 ((freeable_pages + tracing_pages_allocated) / 4)) {
2682 cnt = -ENOMEM;
2683 goto out;
2684 }
2685
2686 while (global_trace.entries < val) {
2687 if (trace_alloc_page()) {
2688 cnt = -ENOMEM;
2689 goto out;
2690 }
2691 /* double check that we don't go over the known pages */
2692 if (tracing_pages_allocated > pages_requested)
2693 break;
2694 }
2695
2696 } else {
2697 /* include the number of entries in val (inc of page entries) */
2698 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2699 trace_free_page();
2700 }
2701
2702 /* check integrity */
2703 for_each_tracing_cpu(i)
2704 check_pages(global_trace.data[i]);
2705
2706 filp->f_pos += cnt;
2707
2708 /* If check pages failed, return ENOMEM */
2709 if (tracing_disabled)
2710 cnt = -ENOMEM;
2711 out:
2712 max_tr.entries = global_trace.entries;
2713 mutex_unlock(&trace_types_lock);
2714
2715 return cnt;
2716}
2717
2718static struct file_operations tracing_max_lat_fops = {
2719 .open = tracing_open_generic,
2720 .read = tracing_max_lat_read,
2721 .write = tracing_max_lat_write,
2722};
2723
2724static struct file_operations tracing_ctrl_fops = {
2725 .open = tracing_open_generic,
2726 .read = tracing_ctrl_read,
2727 .write = tracing_ctrl_write,
2728};
2729
2730static struct file_operations set_tracer_fops = {
2731 .open = tracing_open_generic,
2732 .read = tracing_set_trace_read,
2733 .write = tracing_set_trace_write,
2734};
2735
2736static struct file_operations tracing_pipe_fops = {
2737 .open = tracing_open_pipe,
2738 .poll = tracing_poll_pipe,
2739 .read = tracing_read_pipe,
2740 .release = tracing_release_pipe,
2741};
2742
2743static struct file_operations tracing_entries_fops = {
2744 .open = tracing_open_generic,
2745 .read = tracing_entries_read,
2746 .write = tracing_entries_write,
2747};
2748
2749#ifdef CONFIG_DYNAMIC_FTRACE
2750
2751static ssize_t
2752tracing_read_long(struct file *filp, char __user *ubuf,
2753 size_t cnt, loff_t *ppos)
2754{
2755 unsigned long *p = filp->private_data;
2756 char buf[64];
2757 int r;
2758
2759 r = sprintf(buf, "%ld\n", *p);
2760
2761 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2762}
2763
2764static struct file_operations tracing_read_long_fops = {
2765 .open = tracing_open_generic,
2766 .read = tracing_read_long,
2767};
2768#endif
2769
2770static struct dentry *d_tracer;
2771
2772struct dentry *tracing_init_dentry(void)
2773{
2774 static int once;
2775
2776 if (d_tracer)
2777 return d_tracer;
2778
2779 d_tracer = debugfs_create_dir("tracing", NULL);
2780
2781 if (!d_tracer && !once) {
2782 once = 1;
2783 pr_warning("Could not create debugfs directory 'tracing'\n");
2784 return NULL;
2785 }
2786
2787 return d_tracer;
2788}
2789
2790#ifdef CONFIG_FTRACE_SELFTEST
2791/* Let selftest have access to static functions in this file */
2792#include "trace_selftest.c"
2793#endif
2794
2795static __init void tracer_init_debugfs(void)
2796{
2797 struct dentry *d_tracer;
2798 struct dentry *entry;
2799
2800 d_tracer = tracing_init_dentry();
2801
2802 entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2803 &global_trace, &tracing_ctrl_fops);
2804 if (!entry)
2805 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2806
2807 entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2808 NULL, &tracing_iter_fops);
2809 if (!entry)
2810 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2811
2812 entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2813 NULL, &tracing_cpumask_fops);
2814 if (!entry)
2815 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2816
2817 entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2818 &global_trace, &tracing_lt_fops);
2819 if (!entry)
2820 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2821
2822 entry = debugfs_create_file("trace", 0444, d_tracer,
2823 &global_trace, &tracing_fops);
2824 if (!entry)
2825 pr_warning("Could not create debugfs 'trace' entry\n");
2826
2827 entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2828 &global_trace, &show_traces_fops);
2829 if (!entry)
2830 pr_warning("Could not create debugfs 'trace' entry\n");
2831
2832 entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2833 &global_trace, &set_tracer_fops);
2834 if (!entry)
2835 pr_warning("Could not create debugfs 'trace' entry\n");
2836
2837 entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2838 &tracing_max_latency,
2839 &tracing_max_lat_fops);
2840 if (!entry)
2841 pr_warning("Could not create debugfs "
2842 "'tracing_max_latency' entry\n");
2843
2844 entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2845 &tracing_thresh, &tracing_max_lat_fops);
2846 if (!entry)
2847 pr_warning("Could not create debugfs "
2848 "'tracing_threash' entry\n");
2849 entry = debugfs_create_file("README", 0644, d_tracer,
2850 NULL, &tracing_readme_fops);
2851 if (!entry)
2852 pr_warning("Could not create debugfs 'README' entry\n");
2853
2854 entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2855 NULL, &tracing_pipe_fops);
2856 if (!entry)
2857 pr_warning("Could not create debugfs "
2858 "'tracing_threash' entry\n");
2859
2860 entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2861 &global_trace, &tracing_entries_fops);
2862 if (!entry)
2863 pr_warning("Could not create debugfs "
2864 "'tracing_threash' entry\n");
2865
2866#ifdef CONFIG_DYNAMIC_FTRACE
2867 entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2868 &ftrace_update_tot_cnt,
2869 &tracing_read_long_fops);
2870 if (!entry)
2871 pr_warning("Could not create debugfs "
2872 "'dyn_ftrace_total_info' entry\n");
2873#endif
2874}
2875
2876static int trace_alloc_page(void)
2877{
2878 struct trace_array_cpu *data;
2879 struct page *page, *tmp;
2880 LIST_HEAD(pages);
2881 void *array;
2882 unsigned pages_allocated = 0;
2883 int i;
2884
2885 /* first allocate a page for each CPU */
2886 for_each_tracing_cpu(i) {
2887 array = (void *)__get_free_page(GFP_KERNEL);
2888 if (array == NULL) {
2889 printk(KERN_ERR "tracer: failed to allocate page"
2890 "for trace buffer!\n");
2891 goto free_pages;
2892 }
2893
2894 pages_allocated++;
2895 page = virt_to_page(array);
2896 list_add(&page->lru, &pages);
2897
2898/* Only allocate if we are actually using the max trace */
2899#ifdef CONFIG_TRACER_MAX_TRACE
2900 array = (void *)__get_free_page(GFP_KERNEL);
2901 if (array == NULL) {
2902 printk(KERN_ERR "tracer: failed to allocate page"
2903 "for trace buffer!\n");
2904 goto free_pages;
2905 }
2906 pages_allocated++;
2907 page = virt_to_page(array);
2908 list_add(&page->lru, &pages);
2909#endif
2910 }
2911
2912 /* Now that we successfully allocate a page per CPU, add them */
2913 for_each_tracing_cpu(i) {
2914 data = global_trace.data[i];
2915 page = list_entry(pages.next, struct page, lru);
2916 list_del_init(&page->lru);
2917 list_add_tail(&page->lru, &data->trace_pages);
2918 ClearPageLRU(page);
2919
2920#ifdef CONFIG_TRACER_MAX_TRACE
2921 data = max_tr.data[i];
2922 page = list_entry(pages.next, struct page, lru);
2923 list_del_init(&page->lru);
2924 list_add_tail(&page->lru, &data->trace_pages);
2925 SetPageLRU(page);
2926#endif
2927 }
2928 tracing_pages_allocated += pages_allocated;
2929 global_trace.entries += ENTRIES_PER_PAGE;
2930
2931 return 0;
2932
2933 free_pages:
2934 list_for_each_entry_safe(page, tmp, &pages, lru) {
2935 list_del_init(&page->lru);
2936 __free_page(page);
2937 }
2938 return -ENOMEM;
2939}
2940
2941static int trace_free_page(void)
2942{
2943 struct trace_array_cpu *data;
2944 struct page *page;
2945 struct list_head *p;
2946 int i;
2947 int ret = 0;
2948
2949 /* free one page from each buffer */
2950 for_each_tracing_cpu(i) {
2951 data = global_trace.data[i];
2952 p = data->trace_pages.next;
2953 if (p == &data->trace_pages) {
2954 /* should never happen */
2955 WARN_ON(1);
2956 tracing_disabled = 1;
2957 ret = -1;
2958 break;
2959 }
2960 page = list_entry(p, struct page, lru);
2961 ClearPageLRU(page);
2962 list_del(&page->lru);
2963 tracing_pages_allocated--;
2964 tracing_pages_allocated--;
2965 __free_page(page);
2966
2967 tracing_reset(data);
2968
2969#ifdef CONFIG_TRACER_MAX_TRACE
2970 data = max_tr.data[i];
2971 p = data->trace_pages.next;
2972 if (p == &data->trace_pages) {
2973 /* should never happen */
2974 WARN_ON(1);
2975 tracing_disabled = 1;
2976 ret = -1;
2977 break;
2978 }
2979 page = list_entry(p, struct page, lru);
2980 ClearPageLRU(page);
2981 list_del(&page->lru);
2982 __free_page(page);
2983
2984 tracing_reset(data);
2985#endif
2986 }
2987 global_trace.entries -= ENTRIES_PER_PAGE;
2988
2989 return ret;
2990}
2991
2992__init static int tracer_alloc_buffers(void)
2993{
2994 struct trace_array_cpu *data;
2995 void *array;
2996 struct page *page;
2997 int pages = 0;
2998 int ret = -ENOMEM;
2999 int i;
3000
3001 /* TODO: make the number of buffers hot pluggable with CPUS */
3002 tracing_nr_buffers = num_possible_cpus();
3003 tracing_buffer_mask = cpu_possible_map;
3004
3005 /* Allocate the first page for all buffers */
3006 for_each_tracing_cpu(i) {
3007 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3008 max_tr.data[i] = &per_cpu(max_data, i);
3009
3010 array = (void *)__get_free_page(GFP_KERNEL);
3011 if (array == NULL) {
3012 printk(KERN_ERR "tracer: failed to allocate page"
3013 "for trace buffer!\n");
3014 goto free_buffers;
3015 }
3016
3017 /* set the array to the list */
3018 INIT_LIST_HEAD(&data->trace_pages);
3019 page = virt_to_page(array);
3020 list_add(&page->lru, &data->trace_pages);
3021 /* use the LRU flag to differentiate the two buffers */
3022 ClearPageLRU(page);
3023
3024 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3025 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3026
3027/* Only allocate if we are actually using the max trace */
3028#ifdef CONFIG_TRACER_MAX_TRACE
3029 array = (void *)__get_free_page(GFP_KERNEL);
3030 if (array == NULL) {
3031 printk(KERN_ERR "tracer: failed to allocate page"
3032 "for trace buffer!\n");
3033 goto free_buffers;
3034 }
3035
3036 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3037 page = virt_to_page(array);
3038 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3039 SetPageLRU(page);
3040#endif
3041 }
3042
3043 /*
3044 * Since we allocate by orders of pages, we may be able to
3045 * round up a bit.
3046 */
3047 global_trace.entries = ENTRIES_PER_PAGE;
3048 pages++;
3049
3050 while (global_trace.entries < trace_nr_entries) {
3051 if (trace_alloc_page())
3052 break;
3053 pages++;
3054 }
3055 max_tr.entries = global_trace.entries;
3056
3057 pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3058 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3059 pr_info(" actual entries %ld\n", global_trace.entries);
3060
3061 tracer_init_debugfs();
3062
3063 trace_init_cmdlines();
3064
3065 register_tracer(&no_tracer);
3066 current_trace = &no_tracer;
3067
3068 /* All seems OK, enable tracing */
3069 global_trace.ctrl = tracer_enabled;
3070 tracing_disabled = 0;
3071
3072 return 0;
3073
3074 free_buffers:
3075 for (i-- ; i >= 0; i--) {
3076 struct page *page, *tmp;
3077 struct trace_array_cpu *data = global_trace.data[i];
3078
3079 if (data) {
3080 list_for_each_entry_safe(page, tmp,
3081 &data->trace_pages, lru) {
3082 list_del_init(&page->lru);
3083 __free_page(page);
3084 }
3085 }
3086
3087#ifdef CONFIG_TRACER_MAX_TRACE
3088 data = max_tr.data[i];
3089 if (data) {
3090 list_for_each_entry_safe(page, tmp,
3091 &data->trace_pages, lru) {
3092 list_del_init(&page->lru);
3093 __free_page(page);
3094 }
3095 }
3096#endif
3097 }
3098 return ret;
3099}
3100fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
new file mode 100644
index 000000000000..6b8bd8800d04
--- /dev/null
+++ b/kernel/trace/trace.h
@@ -0,0 +1,313 @@
1#ifndef _LINUX_KERNEL_TRACE_H
2#define _LINUX_KERNEL_TRACE_H
3
4#include <linux/fs.h>
5#include <asm/atomic.h>
6#include <linux/sched.h>
7#include <linux/clocksource.h>
8
9enum trace_type {
10 __TRACE_FIRST_TYPE = 0,
11
12 TRACE_FN,
13 TRACE_CTX,
14 TRACE_WAKE,
15 TRACE_STACK,
16 TRACE_SPECIAL,
17
18 __TRACE_LAST_TYPE
19};
20
21/*
22 * Function trace entry - function address and parent function addres:
23 */
24struct ftrace_entry {
25 unsigned long ip;
26 unsigned long parent_ip;
27};
28
29/*
30 * Context switch trace entry - which task (and prio) we switched from/to:
31 */
32struct ctx_switch_entry {
33 unsigned int prev_pid;
34 unsigned char prev_prio;
35 unsigned char prev_state;
36 unsigned int next_pid;
37 unsigned char next_prio;
38 unsigned char next_state;
39};
40
41/*
42 * Special (free-form) trace entry:
43 */
44struct special_entry {
45 unsigned long arg1;
46 unsigned long arg2;
47 unsigned long arg3;
48};
49
50/*
51 * Stack-trace entry:
52 */
53
54#define FTRACE_STACK_ENTRIES 8
55
56struct stack_entry {
57 unsigned long caller[FTRACE_STACK_ENTRIES];
58};
59
60/*
61 * The trace entry - the most basic unit of tracing. This is what
62 * is printed in the end as a single line in the trace output, such as:
63 *
64 * bash-15816 [01] 235.197585: idle_cpu <- irq_enter
65 */
66struct trace_entry {
67 char type;
68 char cpu;
69 char flags;
70 char preempt_count;
71 int pid;
72 cycle_t t;
73 union {
74 struct ftrace_entry fn;
75 struct ctx_switch_entry ctx;
76 struct special_entry special;
77 struct stack_entry stack;
78 };
79};
80
81#define TRACE_ENTRY_SIZE sizeof(struct trace_entry)
82
83/*
84 * The CPU trace array - it consists of thousands of trace entries
85 * plus some other descriptor data: (for example which task started
86 * the trace, etc.)
87 */
88struct trace_array_cpu {
89 struct list_head trace_pages;
90 atomic_t disabled;
91 raw_spinlock_t lock;
92 struct lock_class_key lock_key;
93
94 /* these fields get copied into max-trace: */
95 unsigned trace_head_idx;
96 unsigned trace_tail_idx;
97 void *trace_head; /* producer */
98 void *trace_tail; /* consumer */
99 unsigned long trace_idx;
100 unsigned long overrun;
101 unsigned long saved_latency;
102 unsigned long critical_start;
103 unsigned long critical_end;
104 unsigned long critical_sequence;
105 unsigned long nice;
106 unsigned long policy;
107 unsigned long rt_priority;
108 cycle_t preempt_timestamp;
109 pid_t pid;
110 uid_t uid;
111 char comm[TASK_COMM_LEN];
112};
113
114struct trace_iterator;
115
116/*
117 * The trace array - an array of per-CPU trace arrays. This is the
118 * highest level data structure that individual tracers deal with.
119 * They have on/off state as well:
120 */
121struct trace_array {
122 unsigned long entries;
123 long ctrl;
124 int cpu;
125 cycle_t time_start;
126 struct task_struct *waiter;
127 struct trace_array_cpu *data[NR_CPUS];
128};
129
130/*
131 * A specific tracer, represented by methods that operate on a trace array:
132 */
133struct tracer {
134 const char *name;
135 void (*init)(struct trace_array *tr);
136 void (*reset)(struct trace_array *tr);
137 void (*open)(struct trace_iterator *iter);
138 void (*pipe_open)(struct trace_iterator *iter);
139 void (*close)(struct trace_iterator *iter);
140 void (*start)(struct trace_iterator *iter);
141 void (*stop)(struct trace_iterator *iter);
142 ssize_t (*read)(struct trace_iterator *iter,
143 struct file *filp, char __user *ubuf,
144 size_t cnt, loff_t *ppos);
145 void (*ctrl_update)(struct trace_array *tr);
146#ifdef CONFIG_FTRACE_STARTUP_TEST
147 int (*selftest)(struct tracer *trace,
148 struct trace_array *tr);
149#endif
150 int (*print_line)(struct trace_iterator *iter);
151 struct tracer *next;
152 int print_max;
153};
154
155struct trace_seq {
156 unsigned char buffer[PAGE_SIZE];
157 unsigned int len;
158 unsigned int readpos;
159};
160
161/*
162 * Trace iterator - used by printout routines who present trace
163 * results to users and which routines might sleep, etc:
164 */
165struct trace_iterator {
166 struct trace_array *tr;
167 struct tracer *trace;
168 void *private;
169 long last_overrun[NR_CPUS];
170 long overrun[NR_CPUS];
171
172 /* The below is zeroed out in pipe_read */
173 struct trace_seq seq;
174 struct trace_entry *ent;
175 int cpu;
176
177 struct trace_entry *prev_ent;
178 int prev_cpu;
179
180 unsigned long iter_flags;
181 loff_t pos;
182 unsigned long next_idx[NR_CPUS];
183 struct list_head *next_page[NR_CPUS];
184 unsigned next_page_idx[NR_CPUS];
185 long idx;
186};
187
188void tracing_reset(struct trace_array_cpu *data);
189int tracing_open_generic(struct inode *inode, struct file *filp);
190struct dentry *tracing_init_dentry(void);
191void ftrace(struct trace_array *tr,
192 struct trace_array_cpu *data,
193 unsigned long ip,
194 unsigned long parent_ip,
195 unsigned long flags);
196void tracing_sched_switch_trace(struct trace_array *tr,
197 struct trace_array_cpu *data,
198 struct task_struct *prev,
199 struct task_struct *next,
200 unsigned long flags);
201void tracing_record_cmdline(struct task_struct *tsk);
202
203void tracing_sched_wakeup_trace(struct trace_array *tr,
204 struct trace_array_cpu *data,
205 struct task_struct *wakee,
206 struct task_struct *cur,
207 unsigned long flags);
208void trace_special(struct trace_array *tr,
209 struct trace_array_cpu *data,
210 unsigned long arg1,
211 unsigned long arg2,
212 unsigned long arg3);
213void trace_function(struct trace_array *tr,
214 struct trace_array_cpu *data,
215 unsigned long ip,
216 unsigned long parent_ip,
217 unsigned long flags);
218
219void tracing_start_function_trace(void);
220void tracing_stop_function_trace(void);
221void tracing_start_cmdline_record(void);
222void tracing_stop_cmdline_record(void);
223int register_tracer(struct tracer *type);
224void unregister_tracer(struct tracer *type);
225
226extern unsigned long nsecs_to_usecs(unsigned long nsecs);
227
228extern unsigned long tracing_max_latency;
229extern unsigned long tracing_thresh;
230
231void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
232void update_max_tr_single(struct trace_array *tr,
233 struct task_struct *tsk, int cpu);
234
235extern cycle_t ftrace_now(int cpu);
236
237#ifdef CONFIG_CONTEXT_SWITCH_TRACER
238typedef void
239(*tracer_switch_func_t)(void *private,
240 void *__rq,
241 struct task_struct *prev,
242 struct task_struct *next);
243
244struct tracer_switch_ops {
245 tracer_switch_func_t func;
246 void *private;
247 struct tracer_switch_ops *next;
248};
249
250#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
251
252#ifdef CONFIG_DYNAMIC_FTRACE
253extern unsigned long ftrace_update_tot_cnt;
254#define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
255extern int DYN_FTRACE_TEST_NAME(void);
256#endif
257
258#ifdef CONFIG_FTRACE_STARTUP_TEST
259#ifdef CONFIG_FTRACE
260extern int trace_selftest_startup_function(struct tracer *trace,
261 struct trace_array *tr);
262#endif
263#ifdef CONFIG_IRQSOFF_TRACER
264extern int trace_selftest_startup_irqsoff(struct tracer *trace,
265 struct trace_array *tr);
266#endif
267#ifdef CONFIG_PREEMPT_TRACER
268extern int trace_selftest_startup_preemptoff(struct tracer *trace,
269 struct trace_array *tr);
270#endif
271#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
272extern int trace_selftest_startup_preemptirqsoff(struct tracer *trace,
273 struct trace_array *tr);
274#endif
275#ifdef CONFIG_SCHED_TRACER
276extern int trace_selftest_startup_wakeup(struct tracer *trace,
277 struct trace_array *tr);
278#endif
279#ifdef CONFIG_CONTEXT_SWITCH_TRACER
280extern int trace_selftest_startup_sched_switch(struct tracer *trace,
281 struct trace_array *tr);
282#endif
283#endif /* CONFIG_FTRACE_STARTUP_TEST */
284
285extern void *head_page(struct trace_array_cpu *data);
286extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
287extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
288 size_t cnt);
289extern long ns2usecs(cycle_t nsec);
290
291extern unsigned long trace_flags;
292
293/*
294 * trace_iterator_flags is an enumeration that defines bit
295 * positions into trace_flags that controls the output.
296 *
297 * NOTE: These bits must match the trace_options array in
298 * trace.c.
299 */
300enum trace_iterator_flags {
301 TRACE_ITER_PRINT_PARENT = 0x01,
302 TRACE_ITER_SYM_OFFSET = 0x02,
303 TRACE_ITER_SYM_ADDR = 0x04,
304 TRACE_ITER_VERBOSE = 0x08,
305 TRACE_ITER_RAW = 0x10,
306 TRACE_ITER_HEX = 0x20,
307 TRACE_ITER_BIN = 0x40,
308 TRACE_ITER_BLOCK = 0x80,
309 TRACE_ITER_STACKTRACE = 0x100,
310 TRACE_ITER_SCHED_TREE = 0x200,
311};
312
313#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
new file mode 100644
index 000000000000..7ee7dcd76b7d
--- /dev/null
+++ b/kernel/trace/trace_functions.c
@@ -0,0 +1,78 @@
1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/debugfs.h>
13#include <linux/uaccess.h>
14#include <linux/ftrace.h>
15#include <linux/fs.h>
16
17#include "trace.h"
18
19static void function_reset(struct trace_array *tr)
20{
21 int cpu;
22
23 tr->time_start = ftrace_now(tr->cpu);
24
25 for_each_online_cpu(cpu)
26 tracing_reset(tr->data[cpu]);
27}
28
29static void start_function_trace(struct trace_array *tr)
30{
31 function_reset(tr);
32 tracing_start_cmdline_record();
33 tracing_start_function_trace();
34}
35
36static void stop_function_trace(struct trace_array *tr)
37{
38 tracing_stop_function_trace();
39 tracing_stop_cmdline_record();
40}
41
42static void function_trace_init(struct trace_array *tr)
43{
44 if (tr->ctrl)
45 start_function_trace(tr);
46}
47
48static void function_trace_reset(struct trace_array *tr)
49{
50 if (tr->ctrl)
51 stop_function_trace(tr);
52}
53
54static void function_trace_ctrl_update(struct trace_array *tr)
55{
56 if (tr->ctrl)
57 start_function_trace(tr);
58 else
59 stop_function_trace(tr);
60}
61
62static struct tracer function_trace __read_mostly =
63{
64 .name = "ftrace",
65 .init = function_trace_init,
66 .reset = function_trace_reset,
67 .ctrl_update = function_trace_ctrl_update,
68#ifdef CONFIG_FTRACE_SELFTEST
69 .selftest = trace_selftest_startup_function,
70#endif
71};
72
73static __init int init_function_trace(void)
74{
75 return register_tracer(&function_trace);
76}
77
78device_initcall(init_function_trace);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
new file mode 100644
index 000000000000..421d6fe3650e
--- /dev/null
+++ b/kernel/trace/trace_irqsoff.c
@@ -0,0 +1,486 @@
1/*
2 * trace irqs off criticall timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * From code in the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/kallsyms.h>
13#include <linux/debugfs.h>
14#include <linux/uaccess.h>
15#include <linux/module.h>
16#include <linux/ftrace.h>
17#include <linux/fs.h>
18
19#include "trace.h"
20
21static struct trace_array *irqsoff_trace __read_mostly;
22static int tracer_enabled __read_mostly;
23
24static DEFINE_PER_CPU(int, tracing_cpu);
25
26static DEFINE_SPINLOCK(max_trace_lock);
27
28enum {
29 TRACER_IRQS_OFF = (1 << 1),
30 TRACER_PREEMPT_OFF = (1 << 2),
31};
32
33static int trace_type __read_mostly;
34
35#ifdef CONFIG_PREEMPT_TRACER
36static inline int
37preempt_trace(void)
38{
39 return ((trace_type & TRACER_PREEMPT_OFF) && preempt_count());
40}
41#else
42# define preempt_trace() (0)
43#endif
44
45#ifdef CONFIG_IRQSOFF_TRACER
46static inline int
47irq_trace(void)
48{
49 return ((trace_type & TRACER_IRQS_OFF) &&
50 irqs_disabled());
51}
52#else
53# define irq_trace() (0)
54#endif
55
56/*
57 * Sequence count - we record it when starting a measurement and
58 * skip the latency if the sequence has changed - some other section
59 * did a maximum and could disturb our measurement with serial console
60 * printouts, etc. Truly coinciding maximum latencies should be rare
61 * and what happens together happens separately as well, so this doesnt
62 * decrease the validity of the maximum found:
63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence;
65
66#ifdef CONFIG_FTRACE
67/*
68 * irqsoff uses its own tracer function to keep the overhead down:
69 */
70static void
71irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
72{
73 struct trace_array *tr = irqsoff_trace;
74 struct trace_array_cpu *data;
75 unsigned long flags;
76 long disabled;
77 int cpu;
78
79 /*
80 * Does not matter if we preempt. We test the flags
81 * afterward, to see if irqs are disabled or not.
82 * If we preempt and get a false positive, the flags
83 * test will fail.
84 */
85 cpu = raw_smp_processor_id();
86 if (likely(!per_cpu(tracing_cpu, cpu)))
87 return;
88
89 local_save_flags(flags);
90 /* slight chance to get a false positive on tracing_cpu */
91 if (!irqs_disabled_flags(flags))
92 return;
93
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 trace_function(tr, data, ip, parent_ip, flags);
99
100 atomic_dec(&data->disabled);
101}
102
103static struct ftrace_ops trace_ops __read_mostly =
104{
105 .func = irqsoff_tracer_call,
106};
107#endif /* CONFIG_FTRACE */
108
109/*
110 * Should this new latency be reported/recorded?
111 */
112static int report_latency(cycle_t delta)
113{
114 if (tracing_thresh) {
115 if (delta < tracing_thresh)
116 return 0;
117 } else {
118 if (delta <= tracing_max_latency)
119 return 0;
120 }
121 return 1;
122}
123
124static void
125check_critical_timing(struct trace_array *tr,
126 struct trace_array_cpu *data,
127 unsigned long parent_ip,
128 int cpu)
129{
130 unsigned long latency, t0, t1;
131 cycle_t T0, T1, delta;
132 unsigned long flags;
133
134 /*
135 * usecs conversion is slow so we try to delay the conversion
136 * as long as possible:
137 */
138 T0 = data->preempt_timestamp;
139 T1 = ftrace_now(cpu);
140 delta = T1-T0;
141
142 local_save_flags(flags);
143
144 if (!report_latency(delta))
145 goto out;
146
147 spin_lock_irqsave(&max_trace_lock, flags);
148
149 /* check if we are still the max latency */
150 if (!report_latency(delta))
151 goto out_unlock;
152
153 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
154
155 latency = nsecs_to_usecs(delta);
156
157 if (data->critical_sequence != max_sequence)
158 goto out_unlock;
159
160 tracing_max_latency = delta;
161 t0 = nsecs_to_usecs(T0);
162 t1 = nsecs_to_usecs(T1);
163
164 data->critical_end = parent_ip;
165
166 update_max_tr_single(tr, current, cpu);
167
168 max_sequence++;
169
170out_unlock:
171 spin_unlock_irqrestore(&max_trace_lock, flags);
172
173out:
174 data->critical_sequence = max_sequence;
175 data->preempt_timestamp = ftrace_now(cpu);
176 tracing_reset(data);
177 trace_function(tr, data, CALLER_ADDR0, parent_ip, flags);
178}
179
180static inline void
181start_critical_timing(unsigned long ip, unsigned long parent_ip)
182{
183 int cpu;
184 struct trace_array *tr = irqsoff_trace;
185 struct trace_array_cpu *data;
186 unsigned long flags;
187
188 if (likely(!tracer_enabled))
189 return;
190
191 cpu = raw_smp_processor_id();
192
193 if (per_cpu(tracing_cpu, cpu))
194 return;
195
196 data = tr->data[cpu];
197
198 if (unlikely(!data) || atomic_read(&data->disabled))
199 return;
200
201 atomic_inc(&data->disabled);
202
203 data->critical_sequence = max_sequence;
204 data->preempt_timestamp = ftrace_now(cpu);
205 data->critical_start = parent_ip ? : ip;
206 tracing_reset(data);
207
208 local_save_flags(flags);
209
210 trace_function(tr, data, ip, parent_ip, flags);
211
212 per_cpu(tracing_cpu, cpu) = 1;
213
214 atomic_dec(&data->disabled);
215}
216
217static inline void
218stop_critical_timing(unsigned long ip, unsigned long parent_ip)
219{
220 int cpu;
221 struct trace_array *tr = irqsoff_trace;
222 struct trace_array_cpu *data;
223 unsigned long flags;
224
225 cpu = raw_smp_processor_id();
226 /* Always clear the tracing cpu on stopping the trace */
227 if (unlikely(per_cpu(tracing_cpu, cpu)))
228 per_cpu(tracing_cpu, cpu) = 0;
229 else
230 return;
231
232 if (!tracer_enabled)
233 return;
234
235 data = tr->data[cpu];
236
237 if (unlikely(!data) || unlikely(!head_page(data)) ||
238 !data->critical_start || atomic_read(&data->disabled))
239 return;
240
241 atomic_inc(&data->disabled);
242
243 local_save_flags(flags);
244 trace_function(tr, data, ip, parent_ip, flags);
245 check_critical_timing(tr, data, parent_ip ? : ip, cpu);
246 data->critical_start = 0;
247 atomic_dec(&data->disabled);
248}
249
250/* start and stop critical timings used to for stoppage (in idle) */
251void start_critical_timings(void)
252{
253 if (preempt_trace() || irq_trace())
254 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
255}
256
257void stop_critical_timings(void)
258{
259 if (preempt_trace() || irq_trace())
260 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
261}
262
263#ifdef CONFIG_IRQSOFF_TRACER
264#ifdef CONFIG_PROVE_LOCKING
265void time_hardirqs_on(unsigned long a0, unsigned long a1)
266{
267 if (!preempt_trace() && irq_trace())
268 stop_critical_timing(a0, a1);
269}
270
271void time_hardirqs_off(unsigned long a0, unsigned long a1)
272{
273 if (!preempt_trace() && irq_trace())
274 start_critical_timing(a0, a1);
275}
276
277#else /* !CONFIG_PROVE_LOCKING */
278
279/*
280 * Stubs:
281 */
282
283void early_boot_irqs_off(void)
284{
285}
286
287void early_boot_irqs_on(void)
288{
289}
290
291void trace_softirqs_on(unsigned long ip)
292{
293}
294
295void trace_softirqs_off(unsigned long ip)
296{
297}
298
299inline void print_irqtrace_events(struct task_struct *curr)
300{
301}
302
303/*
304 * We are only interested in hardirq on/off events:
305 */
306void trace_hardirqs_on(void)
307{
308 if (!preempt_trace() && irq_trace())
309 stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
310}
311EXPORT_SYMBOL(trace_hardirqs_on);
312
313void trace_hardirqs_off(void)
314{
315 if (!preempt_trace() && irq_trace())
316 start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
317}
318EXPORT_SYMBOL(trace_hardirqs_off);
319
320void trace_hardirqs_on_caller(unsigned long caller_addr)
321{
322 if (!preempt_trace() && irq_trace())
323 stop_critical_timing(CALLER_ADDR0, caller_addr);
324}
325EXPORT_SYMBOL(trace_hardirqs_on_caller);
326
327void trace_hardirqs_off_caller(unsigned long caller_addr)
328{
329 if (!preempt_trace() && irq_trace())
330 start_critical_timing(CALLER_ADDR0, caller_addr);
331}
332EXPORT_SYMBOL(trace_hardirqs_off_caller);
333
334#endif /* CONFIG_PROVE_LOCKING */
335#endif /* CONFIG_IRQSOFF_TRACER */
336
337#ifdef CONFIG_PREEMPT_TRACER
338void trace_preempt_on(unsigned long a0, unsigned long a1)
339{
340 stop_critical_timing(a0, a1);
341}
342
343void trace_preempt_off(unsigned long a0, unsigned long a1)
344{
345 start_critical_timing(a0, a1);
346}
347#endif /* CONFIG_PREEMPT_TRACER */
348
349static void start_irqsoff_tracer(struct trace_array *tr)
350{
351 register_ftrace_function(&trace_ops);
352 tracer_enabled = 1;
353}
354
355static void stop_irqsoff_tracer(struct trace_array *tr)
356{
357 tracer_enabled = 0;
358 unregister_ftrace_function(&trace_ops);
359}
360
361static void __irqsoff_tracer_init(struct trace_array *tr)
362{
363 irqsoff_trace = tr;
364 /* make sure that the tracer is visible */
365 smp_wmb();
366
367 if (tr->ctrl)
368 start_irqsoff_tracer(tr);
369}
370
371static void irqsoff_tracer_reset(struct trace_array *tr)
372{
373 if (tr->ctrl)
374 stop_irqsoff_tracer(tr);
375}
376
377static void irqsoff_tracer_ctrl_update(struct trace_array *tr)
378{
379 if (tr->ctrl)
380 start_irqsoff_tracer(tr);
381 else
382 stop_irqsoff_tracer(tr);
383}
384
385static void irqsoff_tracer_open(struct trace_iterator *iter)
386{
387 /* stop the trace while dumping */
388 if (iter->tr->ctrl)
389 stop_irqsoff_tracer(iter->tr);
390}
391
392static void irqsoff_tracer_close(struct trace_iterator *iter)
393{
394 if (iter->tr->ctrl)
395 start_irqsoff_tracer(iter->tr);
396}
397
398#ifdef CONFIG_IRQSOFF_TRACER
399static void irqsoff_tracer_init(struct trace_array *tr)
400{
401 trace_type = TRACER_IRQS_OFF;
402
403 __irqsoff_tracer_init(tr);
404}
405static struct tracer irqsoff_tracer __read_mostly =
406{
407 .name = "irqsoff",
408 .init = irqsoff_tracer_init,
409 .reset = irqsoff_tracer_reset,
410 .open = irqsoff_tracer_open,
411 .close = irqsoff_tracer_close,
412 .ctrl_update = irqsoff_tracer_ctrl_update,
413 .print_max = 1,
414#ifdef CONFIG_FTRACE_SELFTEST
415 .selftest = trace_selftest_startup_irqsoff,
416#endif
417};
418# define register_irqsoff(trace) register_tracer(&trace)
419#else
420# define register_irqsoff(trace) do { } while (0)
421#endif
422
423#ifdef CONFIG_PREEMPT_TRACER
424static void preemptoff_tracer_init(struct trace_array *tr)
425{
426 trace_type = TRACER_PREEMPT_OFF;
427
428 __irqsoff_tracer_init(tr);
429}
430
431static struct tracer preemptoff_tracer __read_mostly =
432{
433 .name = "preemptoff",
434 .init = preemptoff_tracer_init,
435 .reset = irqsoff_tracer_reset,
436 .open = irqsoff_tracer_open,
437 .close = irqsoff_tracer_close,
438 .ctrl_update = irqsoff_tracer_ctrl_update,
439 .print_max = 1,
440#ifdef CONFIG_FTRACE_SELFTEST
441 .selftest = trace_selftest_startup_preemptoff,
442#endif
443};
444# define register_preemptoff(trace) register_tracer(&trace)
445#else
446# define register_preemptoff(trace) do { } while (0)
447#endif
448
449#if defined(CONFIG_IRQSOFF_TRACER) && \
450 defined(CONFIG_PREEMPT_TRACER)
451
452static void preemptirqsoff_tracer_init(struct trace_array *tr)
453{
454 trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
455
456 __irqsoff_tracer_init(tr);
457}
458
459static struct tracer preemptirqsoff_tracer __read_mostly =
460{
461 .name = "preemptirqsoff",
462 .init = preemptirqsoff_tracer_init,
463 .reset = irqsoff_tracer_reset,
464 .open = irqsoff_tracer_open,
465 .close = irqsoff_tracer_close,
466 .ctrl_update = irqsoff_tracer_ctrl_update,
467 .print_max = 1,
468#ifdef CONFIG_FTRACE_SELFTEST
469 .selftest = trace_selftest_startup_preemptirqsoff,
470#endif
471};
472
473# define register_preemptirqsoff(trace) register_tracer(&trace)
474#else
475# define register_preemptirqsoff(trace) do { } while (0)
476#endif
477
478__init static int init_irqsoff_tracer(void)
479{
480 register_irqsoff(irqsoff_tracer);
481 register_preemptoff(preemptoff_tracer);
482 register_preemptirqsoff(preemptirqsoff_tracer);
483
484 return 0;
485}
486device_initcall(init_irqsoff_tracer);
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 000000000000..93a662009151
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,286 @@
1/*
2 * trace context switch
3 *
4 * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
5 *
6 */
7#include <linux/module.h>
8#include <linux/fs.h>
9#include <linux/debugfs.h>
10#include <linux/kallsyms.h>
11#include <linux/uaccess.h>
12#include <linux/marker.h>
13#include <linux/ftrace.h>
14
15#include "trace.h"
16
17static struct trace_array *ctx_trace;
18static int __read_mostly tracer_enabled;
19static atomic_t sched_ref;
20
21static void
22sched_switch_func(void *private, void *__rq, struct task_struct *prev,
23 struct task_struct *next)
24{
25 struct trace_array **ptr = private;
26 struct trace_array *tr = *ptr;
27 struct trace_array_cpu *data;
28 unsigned long flags;
29 long disabled;
30 int cpu;
31
32 tracing_record_cmdline(prev);
33 tracing_record_cmdline(next);
34
35 if (!tracer_enabled)
36 return;
37
38 local_irq_save(flags);
39 cpu = raw_smp_processor_id();
40 data = tr->data[cpu];
41 disabled = atomic_inc_return(&data->disabled);
42
43 if (likely(disabled == 1))
44 tracing_sched_switch_trace(tr, data, prev, next, flags);
45
46 atomic_dec(&data->disabled);
47 local_irq_restore(flags);
48}
49
50static notrace void
51sched_switch_callback(void *probe_data, void *call_data,
52 const char *format, va_list *args)
53{
54 struct task_struct *prev;
55 struct task_struct *next;
56 struct rq *__rq;
57
58 if (!atomic_read(&sched_ref))
59 return;
60
61 /* skip prev_pid %d next_pid %d prev_state %ld */
62 (void)va_arg(*args, int);
63 (void)va_arg(*args, int);
64 (void)va_arg(*args, long);
65 __rq = va_arg(*args, typeof(__rq));
66 prev = va_arg(*args, typeof(prev));
67 next = va_arg(*args, typeof(next));
68
69 /*
70 * If tracer_switch_func only points to the local
71 * switch func, it still needs the ptr passed to it.
72 */
73 sched_switch_func(probe_data, __rq, prev, next);
74}
75
76static void
77wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
78 task_struct *curr)
79{
80 struct trace_array **ptr = private;
81 struct trace_array *tr = *ptr;
82 struct trace_array_cpu *data;
83 unsigned long flags;
84 long disabled;
85 int cpu;
86
87 if (!tracer_enabled)
88 return;
89
90 tracing_record_cmdline(curr);
91
92 local_irq_save(flags);
93 cpu = raw_smp_processor_id();
94 data = tr->data[cpu];
95 disabled = atomic_inc_return(&data->disabled);
96
97 if (likely(disabled == 1))
98 tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
99
100 atomic_dec(&data->disabled);
101 local_irq_restore(flags);
102}
103
104static notrace void
105wake_up_callback(void *probe_data, void *call_data,
106 const char *format, va_list *args)
107{
108 struct task_struct *curr;
109 struct task_struct *task;
110 struct rq *__rq;
111
112 if (likely(!tracer_enabled))
113 return;
114
115 /* Skip pid %d state %ld */
116 (void)va_arg(*args, int);
117 (void)va_arg(*args, long);
118 /* now get the meat: "rq %p task %p rq->curr %p" */
119 __rq = va_arg(*args, typeof(__rq));
120 task = va_arg(*args, typeof(task));
121 curr = va_arg(*args, typeof(curr));
122
123 tracing_record_cmdline(task);
124 tracing_record_cmdline(curr);
125
126 wakeup_func(probe_data, __rq, task, curr);
127}
128
129static void sched_switch_reset(struct trace_array *tr)
130{
131 int cpu;
132
133 tr->time_start = ftrace_now(tr->cpu);
134
135 for_each_online_cpu(cpu)
136 tracing_reset(tr->data[cpu]);
137}
138
139static int tracing_sched_register(void)
140{
141 int ret;
142
143 ret = marker_probe_register("kernel_sched_wakeup",
144 "pid %d state %ld ## rq %p task %p rq->curr %p",
145 wake_up_callback,
146 &ctx_trace);
147 if (ret) {
148 pr_info("wakeup trace: Couldn't add marker"
149 " probe to kernel_sched_wakeup\n");
150 return ret;
151 }
152
153 ret = marker_probe_register("kernel_sched_wakeup_new",
154 "pid %d state %ld ## rq %p task %p rq->curr %p",
155 wake_up_callback,
156 &ctx_trace);
157 if (ret) {
158 pr_info("wakeup trace: Couldn't add marker"
159 " probe to kernel_sched_wakeup_new\n");
160 goto fail_deprobe;
161 }
162
163 ret = marker_probe_register("kernel_sched_schedule",
164 "prev_pid %d next_pid %d prev_state %ld "
165 "## rq %p prev %p next %p",
166 sched_switch_callback,
167 &ctx_trace);
168 if (ret) {
169 pr_info("sched trace: Couldn't add marker"
170 " probe to kernel_sched_schedule\n");
171 goto fail_deprobe_wake_new;
172 }
173
174 return ret;
175fail_deprobe_wake_new:
176 marker_probe_unregister("kernel_sched_wakeup_new",
177 wake_up_callback,
178 &ctx_trace);
179fail_deprobe:
180 marker_probe_unregister("kernel_sched_wakeup",
181 wake_up_callback,
182 &ctx_trace);
183 return ret;
184}
185
186static void tracing_sched_unregister(void)
187{
188 marker_probe_unregister("kernel_sched_schedule",
189 sched_switch_callback,
190 &ctx_trace);
191 marker_probe_unregister("kernel_sched_wakeup_new",
192 wake_up_callback,
193 &ctx_trace);
194 marker_probe_unregister("kernel_sched_wakeup",
195 wake_up_callback,
196 &ctx_trace);
197}
198
199static void tracing_start_sched_switch(void)
200{
201 long ref;
202
203 ref = atomic_inc_return(&sched_ref);
204 if (ref == 1)
205 tracing_sched_register();
206}
207
208static void tracing_stop_sched_switch(void)
209{
210 long ref;
211
212 ref = atomic_dec_and_test(&sched_ref);
213 if (ref)
214 tracing_sched_unregister();
215}
216
217void tracing_start_cmdline_record(void)
218{
219 tracing_start_sched_switch();
220}
221
222void tracing_stop_cmdline_record(void)
223{
224 tracing_stop_sched_switch();
225}
226
227static void start_sched_trace(struct trace_array *tr)
228{
229 sched_switch_reset(tr);
230 tracer_enabled = 1;
231 tracing_start_cmdline_record();
232}
233
234static void stop_sched_trace(struct trace_array *tr)
235{
236 tracing_stop_cmdline_record();
237 tracer_enabled = 0;
238}
239
240static void sched_switch_trace_init(struct trace_array *tr)
241{
242 ctx_trace = tr;
243
244 if (tr->ctrl)
245 start_sched_trace(tr);
246}
247
248static void sched_switch_trace_reset(struct trace_array *tr)
249{
250 if (tr->ctrl)
251 stop_sched_trace(tr);
252}
253
254static void sched_switch_trace_ctrl_update(struct trace_array *tr)
255{
256 /* When starting a new trace, reset the buffers */
257 if (tr->ctrl)
258 start_sched_trace(tr);
259 else
260 stop_sched_trace(tr);
261}
262
263static struct tracer sched_switch_trace __read_mostly =
264{
265 .name = "sched_switch",
266 .init = sched_switch_trace_init,
267 .reset = sched_switch_trace_reset,
268 .ctrl_update = sched_switch_trace_ctrl_update,
269#ifdef CONFIG_FTRACE_SELFTEST
270 .selftest = trace_selftest_startup_sched_switch,
271#endif
272};
273
274__init static int init_sched_switch_trace(void)
275{
276 int ret = 0;
277
278 if (atomic_read(&sched_ref))
279 ret = tracing_sched_register();
280 if (ret) {
281 pr_info("error registering scheduler trace\n");
282 return ret;
283 }
284 return register_tracer(&sched_switch_trace);
285}
286device_initcall(init_sched_switch_trace);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
new file mode 100644
index 000000000000..bf7e91caef57
--- /dev/null
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -0,0 +1,447 @@
1/*
2 * trace task wakeup timings
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Based on code from the latency_tracer, that is:
8 *
9 * Copyright (C) 2004-2006 Ingo Molnar
10 * Copyright (C) 2004 William Lee Irwin III
11 */
12#include <linux/module.h>
13#include <linux/fs.h>
14#include <linux/debugfs.h>
15#include <linux/kallsyms.h>
16#include <linux/uaccess.h>
17#include <linux/ftrace.h>
18#include <linux/marker.h>
19
20#include "trace.h"
21
22static struct trace_array *wakeup_trace;
23static int __read_mostly tracer_enabled;
24
25static struct task_struct *wakeup_task;
26static int wakeup_cpu;
27static unsigned wakeup_prio = -1;
28
29static DEFINE_SPINLOCK(wakeup_lock);
30
31static void __wakeup_reset(struct trace_array *tr);
32
33#ifdef CONFIG_FTRACE
34/*
35 * irqsoff uses its own tracer function to keep the overhead down:
36 */
37static void
38wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
39{
40 struct trace_array *tr = wakeup_trace;
41 struct trace_array_cpu *data;
42 unsigned long flags;
43 long disabled;
44 int resched;
45 int cpu;
46
47 if (likely(!wakeup_task))
48 return;
49
50 resched = need_resched();
51 preempt_disable_notrace();
52
53 cpu = raw_smp_processor_id();
54 data = tr->data[cpu];
55 disabled = atomic_inc_return(&data->disabled);
56 if (unlikely(disabled != 1))
57 goto out;
58
59 spin_lock_irqsave(&wakeup_lock, flags);
60
61 if (unlikely(!wakeup_task))
62 goto unlock;
63
64 /*
65 * The task can't disappear because it needs to
66 * wake up first, and we have the wakeup_lock.
67 */
68 if (task_cpu(wakeup_task) != cpu)
69 goto unlock;
70
71 trace_function(tr, data, ip, parent_ip, flags);
72
73 unlock:
74 spin_unlock_irqrestore(&wakeup_lock, flags);
75
76 out:
77 atomic_dec(&data->disabled);
78
79 /*
80 * To prevent recursion from the scheduler, if the
81 * resched flag was set before we entered, then
82 * don't reschedule.
83 */
84 if (resched)
85 preempt_enable_no_resched_notrace();
86 else
87 preempt_enable_notrace();
88}
89
90static struct ftrace_ops trace_ops __read_mostly =
91{
92 .func = wakeup_tracer_call,
93};
94#endif /* CONFIG_FTRACE */
95
96/*
97 * Should this new latency be reported/recorded?
98 */
99static int report_latency(cycle_t delta)
100{
101 if (tracing_thresh) {
102 if (delta < tracing_thresh)
103 return 0;
104 } else {
105 if (delta <= tracing_max_latency)
106 return 0;
107 }
108 return 1;
109}
110
111static void notrace
112wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
113 struct task_struct *next)
114{
115 unsigned long latency = 0, t0 = 0, t1 = 0;
116 struct trace_array **ptr = private;
117 struct trace_array *tr = *ptr;
118 struct trace_array_cpu *data;
119 cycle_t T0, T1, delta;
120 unsigned long flags;
121 long disabled;
122 int cpu;
123
124 if (unlikely(!tracer_enabled))
125 return;
126
127 /*
128 * When we start a new trace, we set wakeup_task to NULL
129 * and then set tracer_enabled = 1. We want to make sure
130 * that another CPU does not see the tracer_enabled = 1
131 * and the wakeup_task with an older task, that might
132 * actually be the same as next.
133 */
134 smp_rmb();
135
136 if (next != wakeup_task)
137 return;
138
139 /* The task we are waiting for is waking up */
140 data = tr->data[wakeup_cpu];
141
142 /* disable local data, not wakeup_cpu data */
143 cpu = raw_smp_processor_id();
144 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
145 if (likely(disabled != 1))
146 goto out;
147
148 spin_lock_irqsave(&wakeup_lock, flags);
149
150 /* We could race with grabbing wakeup_lock */
151 if (unlikely(!tracer_enabled || next != wakeup_task))
152 goto out_unlock;
153
154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
155
156 /*
157 * usecs conversion is slow so we try to delay the conversion
158 * as long as possible:
159 */
160 T0 = data->preempt_timestamp;
161 T1 = ftrace_now(cpu);
162 delta = T1-T0;
163
164 if (!report_latency(delta))
165 goto out_unlock;
166
167 latency = nsecs_to_usecs(delta);
168
169 tracing_max_latency = delta;
170 t0 = nsecs_to_usecs(T0);
171 t1 = nsecs_to_usecs(T1);
172
173 update_max_tr(tr, wakeup_task, wakeup_cpu);
174
175out_unlock:
176 __wakeup_reset(tr);
177 spin_unlock_irqrestore(&wakeup_lock, flags);
178out:
179 atomic_dec(&tr->data[cpu]->disabled);
180}
181
182static notrace void
183sched_switch_callback(void *probe_data, void *call_data,
184 const char *format, va_list *args)
185{
186 struct task_struct *prev;
187 struct task_struct *next;
188 struct rq *__rq;
189
190 /* skip prev_pid %d next_pid %d prev_state %ld */
191 (void)va_arg(*args, int);
192 (void)va_arg(*args, int);
193 (void)va_arg(*args, long);
194 __rq = va_arg(*args, typeof(__rq));
195 prev = va_arg(*args, typeof(prev));
196 next = va_arg(*args, typeof(next));
197
198 tracing_record_cmdline(prev);
199
200 /*
201 * If tracer_switch_func only points to the local
202 * switch func, it still needs the ptr passed to it.
203 */
204 wakeup_sched_switch(probe_data, __rq, prev, next);
205}
206
207static void __wakeup_reset(struct trace_array *tr)
208{
209 struct trace_array_cpu *data;
210 int cpu;
211
212 assert_spin_locked(&wakeup_lock);
213
214 for_each_possible_cpu(cpu) {
215 data = tr->data[cpu];
216 tracing_reset(data);
217 }
218
219 wakeup_cpu = -1;
220 wakeup_prio = -1;
221
222 if (wakeup_task)
223 put_task_struct(wakeup_task);
224
225 wakeup_task = NULL;
226}
227
228static void wakeup_reset(struct trace_array *tr)
229{
230 unsigned long flags;
231
232 spin_lock_irqsave(&wakeup_lock, flags);
233 __wakeup_reset(tr);
234 spin_unlock_irqrestore(&wakeup_lock, flags);
235}
236
237static void
238wakeup_check_start(struct trace_array *tr, struct task_struct *p,
239 struct task_struct *curr)
240{
241 int cpu = smp_processor_id();
242 unsigned long flags;
243 long disabled;
244
245 if (likely(!rt_task(p)) ||
246 p->prio >= wakeup_prio ||
247 p->prio >= curr->prio)
248 return;
249
250 disabled = atomic_inc_return(&tr->data[cpu]->disabled);
251 if (unlikely(disabled != 1))
252 goto out;
253
254 /* interrupts should be off from try_to_wake_up */
255 spin_lock(&wakeup_lock);
256
257 /* check for races. */
258 if (!tracer_enabled || p->prio >= wakeup_prio)
259 goto out_locked;
260
261 /* reset the trace */
262 __wakeup_reset(tr);
263
264 wakeup_cpu = task_cpu(p);
265 wakeup_prio = p->prio;
266
267 wakeup_task = p;
268 get_task_struct(wakeup_task);
269
270 local_save_flags(flags);
271
272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
273 trace_function(tr, tr->data[wakeup_cpu],
274 CALLER_ADDR1, CALLER_ADDR2, flags);
275
276out_locked:
277 spin_unlock(&wakeup_lock);
278out:
279 atomic_dec(&tr->data[cpu]->disabled);
280}
281
282static notrace void
283wake_up_callback(void *probe_data, void *call_data,
284 const char *format, va_list *args)
285{
286 struct trace_array **ptr = probe_data;
287 struct trace_array *tr = *ptr;
288 struct task_struct *curr;
289 struct task_struct *task;
290 struct rq *__rq;
291
292 if (likely(!tracer_enabled))
293 return;
294
295 /* Skip pid %d state %ld */
296 (void)va_arg(*args, int);
297 (void)va_arg(*args, long);
298 /* now get the meat: "rq %p task %p rq->curr %p" */
299 __rq = va_arg(*args, typeof(__rq));
300 task = va_arg(*args, typeof(task));
301 curr = va_arg(*args, typeof(curr));
302
303 tracing_record_cmdline(task);
304 tracing_record_cmdline(curr);
305
306 wakeup_check_start(tr, task, curr);
307}
308
309static void start_wakeup_tracer(struct trace_array *tr)
310{
311 int ret;
312
313 ret = marker_probe_register("kernel_sched_wakeup",
314 "pid %d state %ld ## rq %p task %p rq->curr %p",
315 wake_up_callback,
316 &wakeup_trace);
317 if (ret) {
318 pr_info("wakeup trace: Couldn't add marker"
319 " probe to kernel_sched_wakeup\n");
320 return;
321 }
322
323 ret = marker_probe_register("kernel_sched_wakeup_new",
324 "pid %d state %ld ## rq %p task %p rq->curr %p",
325 wake_up_callback,
326 &wakeup_trace);
327 if (ret) {
328 pr_info("wakeup trace: Couldn't add marker"
329 " probe to kernel_sched_wakeup_new\n");
330 goto fail_deprobe;
331 }
332
333 ret = marker_probe_register("kernel_sched_schedule",
334 "prev_pid %d next_pid %d prev_state %ld "
335 "## rq %p prev %p next %p",
336 sched_switch_callback,
337 &wakeup_trace);
338 if (ret) {
339 pr_info("sched trace: Couldn't add marker"
340 " probe to kernel_sched_schedule\n");
341 goto fail_deprobe_wake_new;
342 }
343
344 wakeup_reset(tr);
345
346 /*
347 * Don't let the tracer_enabled = 1 show up before
348 * the wakeup_task is reset. This may be overkill since
349 * wakeup_reset does a spin_unlock after setting the
350 * wakeup_task to NULL, but I want to be safe.
351 * This is a slow path anyway.
352 */
353 smp_wmb();
354
355 tracer_enabled = 1;
356 register_ftrace_function(&trace_ops);
357
358 return;
359fail_deprobe_wake_new:
360 marker_probe_unregister("kernel_sched_wakeup_new",
361 wake_up_callback,
362 &wakeup_trace);
363fail_deprobe:
364 marker_probe_unregister("kernel_sched_wakeup",
365 wake_up_callback,
366 &wakeup_trace);
367}
368
369static void stop_wakeup_tracer(struct trace_array *tr)
370{
371 tracer_enabled = 0;
372 unregister_ftrace_function(&trace_ops);
373 marker_probe_unregister("kernel_sched_schedule",
374 sched_switch_callback,
375 &wakeup_trace);
376 marker_probe_unregister("kernel_sched_wakeup_new",
377 wake_up_callback,
378 &wakeup_trace);
379 marker_probe_unregister("kernel_sched_wakeup",
380 wake_up_callback,
381 &wakeup_trace);
382}
383
384static void wakeup_tracer_init(struct trace_array *tr)
385{
386 wakeup_trace = tr;
387
388 if (tr->ctrl)
389 start_wakeup_tracer(tr);
390}
391
392static void wakeup_tracer_reset(struct trace_array *tr)
393{
394 if (tr->ctrl) {
395 stop_wakeup_tracer(tr);
396 /* make sure we put back any tasks we are tracing */
397 wakeup_reset(tr);
398 }
399}
400
401static void wakeup_tracer_ctrl_update(struct trace_array *tr)
402{
403 if (tr->ctrl)
404 start_wakeup_tracer(tr);
405 else
406 stop_wakeup_tracer(tr);
407}
408
409static void wakeup_tracer_open(struct trace_iterator *iter)
410{
411 /* stop the trace while dumping */
412 if (iter->tr->ctrl)
413 stop_wakeup_tracer(iter->tr);
414}
415
416static void wakeup_tracer_close(struct trace_iterator *iter)
417{
418 /* forget about any processes we were recording */
419 if (iter->tr->ctrl)
420 start_wakeup_tracer(iter->tr);
421}
422
423static struct tracer wakeup_tracer __read_mostly =
424{
425 .name = "wakeup",
426 .init = wakeup_tracer_init,
427 .reset = wakeup_tracer_reset,
428 .open = wakeup_tracer_open,
429 .close = wakeup_tracer_close,
430 .ctrl_update = wakeup_tracer_ctrl_update,
431 .print_max = 1,
432#ifdef CONFIG_FTRACE_SELFTEST
433 .selftest = trace_selftest_startup_wakeup,
434#endif
435};
436
437__init static int init_wakeup_tracer(void)
438{
439 int ret;
440
441 ret = register_tracer(&wakeup_tracer);
442 if (ret)
443 return ret;
444
445 return 0;
446}
447device_initcall(init_wakeup_tracer);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
new file mode 100644
index 000000000000..18c5423bc977
--- /dev/null
+++ b/kernel/trace/trace_selftest.c
@@ -0,0 +1,540 @@
1/* Include in trace.c */
2
3#include <linux/kthread.h>
4#include <linux/delay.h>
5
6static inline int trace_valid_entry(struct trace_entry *entry)
7{
8 switch (entry->type) {
9 case TRACE_FN:
10 case TRACE_CTX:
11 case TRACE_WAKE:
12 case TRACE_STACK:
13 case TRACE_SPECIAL:
14 return 1;
15 }
16 return 0;
17}
18
19static int
20trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
21{
22 struct trace_entry *entries;
23 struct page *page;
24 int idx = 0;
25 int i;
26
27 BUG_ON(list_empty(&data->trace_pages));
28 page = list_entry(data->trace_pages.next, struct page, lru);
29 entries = page_address(page);
30
31 check_pages(data);
32 if (head_page(data) != entries)
33 goto failed;
34
35 /*
36 * The starting trace buffer always has valid elements,
37 * if any element exists.
38 */
39 entries = head_page(data);
40
41 for (i = 0; i < tr->entries; i++) {
42
43 if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
44 printk(KERN_CONT ".. invalid entry %d ",
45 entries[idx].type);
46 goto failed;
47 }
48
49 idx++;
50 if (idx >= ENTRIES_PER_PAGE) {
51 page = virt_to_page(entries);
52 if (page->lru.next == &data->trace_pages) {
53 if (i != tr->entries - 1) {
54 printk(KERN_CONT ".. entries buffer mismatch");
55 goto failed;
56 }
57 } else {
58 page = list_entry(page->lru.next, struct page, lru);
59 entries = page_address(page);
60 }
61 idx = 0;
62 }
63 }
64
65 page = virt_to_page(entries);
66 if (page->lru.next != &data->trace_pages) {
67 printk(KERN_CONT ".. too many entries");
68 goto failed;
69 }
70
71 return 0;
72
73 failed:
74 /* disable tracing */
75 tracing_disabled = 1;
76 printk(KERN_CONT ".. corrupted trace buffer .. ");
77 return -1;
78}
79
80/*
81 * Test the trace buffer to see if all the elements
82 * are still sane.
83 */
84static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
85{
86 unsigned long flags, cnt = 0;
87 int cpu, ret = 0;
88
89 /* Don't allow flipping of max traces now */
90 raw_local_irq_save(flags);
91 __raw_spin_lock(&ftrace_max_lock);
92 for_each_possible_cpu(cpu) {
93 if (!head_page(tr->data[cpu]))
94 continue;
95
96 cnt += tr->data[cpu]->trace_idx;
97
98 ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
99 if (ret)
100 break;
101 }
102 __raw_spin_unlock(&ftrace_max_lock);
103 raw_local_irq_restore(flags);
104
105 if (count)
106 *count = cnt;
107
108 return ret;
109}
110
111#ifdef CONFIG_FTRACE
112
113#ifdef CONFIG_DYNAMIC_FTRACE
114
115#define __STR(x) #x
116#define STR(x) __STR(x)
117
118/* Test dynamic code modification and ftrace filters */
119int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
120 struct trace_array *tr,
121 int (*func)(void))
122{
123 unsigned long count;
124 int ret;
125 int save_ftrace_enabled = ftrace_enabled;
126 int save_tracer_enabled = tracer_enabled;
127 char *func_name;
128
129 /* The ftrace test PASSED */
130 printk(KERN_CONT "PASSED\n");
131 pr_info("Testing dynamic ftrace: ");
132
133 /* enable tracing, and record the filter function */
134 ftrace_enabled = 1;
135 tracer_enabled = 1;
136
137 /* passed in by parameter to fool gcc from optimizing */
138 func();
139
140 /* update the records */
141 ret = ftrace_force_update();
142 if (ret) {
143 printk(KERN_CONT ".. ftraced failed .. ");
144 return ret;
145 }
146
147 /*
148 * Some archs *cough*PowerPC*cough* add charachters to the
149 * start of the function names. We simply put a '*' to
150 * accomodate them.
151 */
152 func_name = "*" STR(DYN_FTRACE_TEST_NAME);
153
154 /* filter only on our function */
155 ftrace_set_filter(func_name, strlen(func_name), 1);
156
157 /* enable tracing */
158 tr->ctrl = 1;
159 trace->init(tr);
160 /* Sleep for a 1/10 of a second */
161 msleep(100);
162
163 /* we should have nothing in the buffer */
164 ret = trace_test_buffer(tr, &count);
165 if (ret)
166 goto out;
167
168 if (count) {
169 ret = -1;
170 printk(KERN_CONT ".. filter did not filter .. ");
171 goto out;
172 }
173
174 /* call our function again */
175 func();
176
177 /* sleep again */
178 msleep(100);
179
180 /* stop the tracing. */
181 tr->ctrl = 0;
182 trace->ctrl_update(tr);
183 ftrace_enabled = 0;
184
185 /* check the trace buffer */
186 ret = trace_test_buffer(tr, &count);
187 trace->reset(tr);
188
189 /* we should only have one item */
190 if (!ret && count != 1) {
191 printk(KERN_CONT ".. filter failed count=%ld ..", count);
192 ret = -1;
193 goto out;
194 }
195 out:
196 ftrace_enabled = save_ftrace_enabled;
197 tracer_enabled = save_tracer_enabled;
198
199 /* Enable tracing on all functions again */
200 ftrace_set_filter(NULL, 0, 1);
201
202 return ret;
203}
204#else
205# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
206#endif /* CONFIG_DYNAMIC_FTRACE */
207/*
208 * Simple verification test of ftrace function tracer.
209 * Enable ftrace, sleep 1/10 second, and then read the trace
210 * buffer to see if all is in order.
211 */
212int
213trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
214{
215 unsigned long count;
216 int ret;
217 int save_ftrace_enabled = ftrace_enabled;
218 int save_tracer_enabled = tracer_enabled;
219
220 /* make sure msleep has been recorded */
221 msleep(1);
222
223 /* force the recorded functions to be traced */
224 ret = ftrace_force_update();
225 if (ret) {
226 printk(KERN_CONT ".. ftraced failed .. ");
227 return ret;
228 }
229
230 /* start the tracing */
231 ftrace_enabled = 1;
232 tracer_enabled = 1;
233
234 tr->ctrl = 1;
235 trace->init(tr);
236 /* Sleep for a 1/10 of a second */
237 msleep(100);
238 /* stop the tracing. */
239 tr->ctrl = 0;
240 trace->ctrl_update(tr);
241 ftrace_enabled = 0;
242
243 /* check the trace buffer */
244 ret = trace_test_buffer(tr, &count);
245 trace->reset(tr);
246
247 if (!ret && !count) {
248 printk(KERN_CONT ".. no entries found ..");
249 ret = -1;
250 goto out;
251 }
252
253 ret = trace_selftest_startup_dynamic_tracing(trace, tr,
254 DYN_FTRACE_TEST_NAME);
255
256 out:
257 ftrace_enabled = save_ftrace_enabled;
258 tracer_enabled = save_tracer_enabled;
259
260 /* kill ftrace totally if we failed */
261 if (ret)
262 ftrace_kill();
263
264 return ret;
265}
266#endif /* CONFIG_FTRACE */
267
268#ifdef CONFIG_IRQSOFF_TRACER
269int
270trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
271{
272 unsigned long save_max = tracing_max_latency;
273 unsigned long count;
274 int ret;
275
276 /* start the tracing */
277 tr->ctrl = 1;
278 trace->init(tr);
279 /* reset the max latency */
280 tracing_max_latency = 0;
281 /* disable interrupts for a bit */
282 local_irq_disable();
283 udelay(100);
284 local_irq_enable();
285 /* stop the tracing. */
286 tr->ctrl = 0;
287 trace->ctrl_update(tr);
288 /* check both trace buffers */
289 ret = trace_test_buffer(tr, NULL);
290 if (!ret)
291 ret = trace_test_buffer(&max_tr, &count);
292 trace->reset(tr);
293
294 if (!ret && !count) {
295 printk(KERN_CONT ".. no entries found ..");
296 ret = -1;
297 }
298
299 tracing_max_latency = save_max;
300
301 return ret;
302}
303#endif /* CONFIG_IRQSOFF_TRACER */
304
305#ifdef CONFIG_PREEMPT_TRACER
306int
307trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
308{
309 unsigned long save_max = tracing_max_latency;
310 unsigned long count;
311 int ret;
312
313 /* start the tracing */
314 tr->ctrl = 1;
315 trace->init(tr);
316 /* reset the max latency */
317 tracing_max_latency = 0;
318 /* disable preemption for a bit */
319 preempt_disable();
320 udelay(100);
321 preempt_enable();
322 /* stop the tracing. */
323 tr->ctrl = 0;
324 trace->ctrl_update(tr);
325 /* check both trace buffers */
326 ret = trace_test_buffer(tr, NULL);
327 if (!ret)
328 ret = trace_test_buffer(&max_tr, &count);
329 trace->reset(tr);
330
331 if (!ret && !count) {
332 printk(KERN_CONT ".. no entries found ..");
333 ret = -1;
334 }
335
336 tracing_max_latency = save_max;
337
338 return ret;
339}
340#endif /* CONFIG_PREEMPT_TRACER */
341
342#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
343int
344trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
345{
346 unsigned long save_max = tracing_max_latency;
347 unsigned long count;
348 int ret;
349
350 /* start the tracing */
351 tr->ctrl = 1;
352 trace->init(tr);
353
354 /* reset the max latency */
355 tracing_max_latency = 0;
356
357 /* disable preemption and interrupts for a bit */
358 preempt_disable();
359 local_irq_disable();
360 udelay(100);
361 preempt_enable();
362 /* reverse the order of preempt vs irqs */
363 local_irq_enable();
364
365 /* stop the tracing. */
366 tr->ctrl = 0;
367 trace->ctrl_update(tr);
368 /* check both trace buffers */
369 ret = trace_test_buffer(tr, NULL);
370 if (ret)
371 goto out;
372
373 ret = trace_test_buffer(&max_tr, &count);
374 if (ret)
375 goto out;
376
377 if (!ret && !count) {
378 printk(KERN_CONT ".. no entries found ..");
379 ret = -1;
380 goto out;
381 }
382
383 /* do the test by disabling interrupts first this time */
384 tracing_max_latency = 0;
385 tr->ctrl = 1;
386 trace->ctrl_update(tr);
387 preempt_disable();
388 local_irq_disable();
389 udelay(100);
390 preempt_enable();
391 /* reverse the order of preempt vs irqs */
392 local_irq_enable();
393
394 /* stop the tracing. */
395 tr->ctrl = 0;
396 trace->ctrl_update(tr);
397 /* check both trace buffers */
398 ret = trace_test_buffer(tr, NULL);
399 if (ret)
400 goto out;
401
402 ret = trace_test_buffer(&max_tr, &count);
403
404 if (!ret && !count) {
405 printk(KERN_CONT ".. no entries found ..");
406 ret = -1;
407 goto out;
408 }
409
410 out:
411 trace->reset(tr);
412 tracing_max_latency = save_max;
413
414 return ret;
415}
416#endif /* CONFIG_IRQSOFF_TRACER && CONFIG_PREEMPT_TRACER */
417
418#ifdef CONFIG_SCHED_TRACER
419static int trace_wakeup_test_thread(void *data)
420{
421 /* Make this a RT thread, doesn't need to be too high */
422 struct sched_param param = { .sched_priority = 5 };
423 struct completion *x = data;
424
425 sched_setscheduler(current, SCHED_FIFO, &param);
426
427 /* Make it know we have a new prio */
428 complete(x);
429
430 /* now go to sleep and let the test wake us up */
431 set_current_state(TASK_INTERRUPTIBLE);
432 schedule();
433
434 /* we are awake, now wait to disappear */
435 while (!kthread_should_stop()) {
436 /*
437 * This is an RT task, do short sleeps to let
438 * others run.
439 */
440 msleep(100);
441 }
442
443 return 0;
444}
445
446int
447trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
448{
449 unsigned long save_max = tracing_max_latency;
450 struct task_struct *p;
451 struct completion isrt;
452 unsigned long count;
453 int ret;
454
455 init_completion(&isrt);
456
457 /* create a high prio thread */
458 p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
459 if (IS_ERR(p)) {
460 printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
461 return -1;
462 }
463
464 /* make sure the thread is running at an RT prio */
465 wait_for_completion(&isrt);
466
467 /* start the tracing */
468 tr->ctrl = 1;
469 trace->init(tr);
470 /* reset the max latency */
471 tracing_max_latency = 0;
472
473 /* sleep to let the RT thread sleep too */
474 msleep(100);
475
476 /*
477 * Yes this is slightly racy. It is possible that for some
478 * strange reason that the RT thread we created, did not
479 * call schedule for 100ms after doing the completion,
480 * and we do a wakeup on a task that already is awake.
481 * But that is extremely unlikely, and the worst thing that
482 * happens in such a case, is that we disable tracing.
483 * Honestly, if this race does happen something is horrible
484 * wrong with the system.
485 */
486
487 wake_up_process(p);
488
489 /* stop the tracing. */
490 tr->ctrl = 0;
491 trace->ctrl_update(tr);
492 /* check both trace buffers */
493 ret = trace_test_buffer(tr, NULL);
494 if (!ret)
495 ret = trace_test_buffer(&max_tr, &count);
496
497
498 trace->reset(tr);
499
500 tracing_max_latency = save_max;
501
502 /* kill the thread */
503 kthread_stop(p);
504
505 if (!ret && !count) {
506 printk(KERN_CONT ".. no entries found ..");
507 ret = -1;
508 }
509
510 return ret;
511}
512#endif /* CONFIG_SCHED_TRACER */
513
514#ifdef CONFIG_CONTEXT_SWITCH_TRACER
515int
516trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr)
517{
518 unsigned long count;
519 int ret;
520
521 /* start the tracing */
522 tr->ctrl = 1;
523 trace->init(tr);
524 /* Sleep for a 1/10 of a second */
525 msleep(100);
526 /* stop the tracing. */
527 tr->ctrl = 0;
528 trace->ctrl_update(tr);
529 /* check the trace buffer */
530 ret = trace_test_buffer(tr, &count);
531 trace->reset(tr);
532
533 if (!ret && !count) {
534 printk(KERN_CONT ".. no entries found ..");
535 ret = -1;
536 }
537
538 return ret;
539}
540#endif /* CONFIG_CONTEXT_SWITCH_TRACER */
diff --git a/kernel/trace/trace_selftest_dynamic.c b/kernel/trace/trace_selftest_dynamic.c
new file mode 100644
index 000000000000..54dd77cce5bf
--- /dev/null
+++ b/kernel/trace/trace_selftest_dynamic.c
@@ -0,0 +1,7 @@
1#include "trace.h"
2
3int DYN_FTRACE_TEST_NAME(void)
4{
5 /* used to call mcount */
6 return 0;
7}