diff options
author | Steven Rostedt <srostedt@redhat.com> | 2008-05-12 15:20:42 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2008-05-23 14:32:36 -0400 |
commit | 352ad25aa4a189c667cb2af333948d34692a2d27 (patch) | |
tree | 83a11c626b3818711d3b88061964c9ea603f7b68 /kernel/trace | |
parent | 35e8e302e5d6e32675df2fc1dd3a53dfa6630dc1 (diff) |
ftrace: tracer for scheduler wakeup latency
This patch adds the tracer that tracks the wakeup latency of the
highest priority waking task.
"wakeup" is added to /debugfs/tracing/available_tracers
Also added to /debugfs/tracing
tracing_max_latency
holds the current max latency for the wakeup
wakeup_thresh
if set to other than zero, a log will be recorded
for every wakeup that takes longer than the number
entered in here (usecs for all counters)
(deletes previous trace)
Examples:
(with ftrace_enabled = 0)
============
preemption latency trace v1.1.5 on 2.6.24-rc8
Signed-off-by: Ingo Molnar <mingo@elte.hu>
--------------------------------------------------------------------
latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2)
-----------------
| task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99)
-----------------
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | caller
\ / ||||| \ | /
quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 <ffffffff80233e80> (sched_exec+0xc9/0x100 <ffffffff80235343>)
quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 <ffffffff80338d2f> (schedule+0x483/0x6d5 <ffffffff8048b3ee>)
vim:ft=help
============
(with ftrace_enabled = 1)
============
preemption latency trace v1.1.5 on 2.6.24-rc8
--------------------------------------------------------------------
latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2)
-----------------
| task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99)
-----------------
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | caller
\ / ||||| \ | /
bash-10653 1d..3 0us : wake_up_process+0x15/0x17 <ffffffff80233e80> (sched_exec+0xc9/0x100 <ffffffff80235343>)
bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 <ffffffff80233dcf> (sub_preempt_count+0xc/0x7a <ffffffff8023309e>)
bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 <ffffffff80233df4> (update_rq_clock+0x9/0x20 <ffffffff802303f3>)
bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 <ffffffff80230408> (__update_rq_clock+0xc/0x90 <ffffffff80230366>)
bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 <ffffffff80230375> (sched_clock+0x9/0x29 <ffffffff80214529>)
bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 <ffffffff80233e04> (activate_task+0xc/0x3f <ffffffff8022ffca>)
bash-10653 1d..2 4us : activate_task+0x2d/0x3f <ffffffff8022ffeb> (enqueue_task+0xe/0x66 <ffffffff8022ff66>)
bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 <ffffffff8022ffb3> (enqueue_task_rt+0x9/0x3c <ffffffff80233351>)
bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 <ffffffff80233e18> (check_preempt_wakeup+0x12/0x99 <ffffffff80234f84>)
[...]
bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 <ffffffff80338aad> (_spin_unlock+0x9/0x33 <ffffffff8048d3ec>)
bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 <ffffffff8048d3fc> (sub_preempt_count+0xc/0x7a <ffffffff8023309e>)
bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff <ffffffff80339f66> (_spin_lock_irqsave+0xc/0xa9 <ffffffff8048d08b>)
bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 <ffffffff8048d098> (add_preempt_count+0xe/0x77 <ffffffff8023311a>)
bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 <ffffffff80338d2f> (schedule+0x483/0x6d5 <ffffffff8048b3ee>)
vim:ft=help
============
The [...] was added here to not waste your email box space.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 13 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 310 |
3 files changed, 324 insertions, 0 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d6aa92866cd..892ecc94a82b 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -4,6 +4,9 @@ | |||
4 | config HAVE_FTRACE | 4 | config HAVE_FTRACE |
5 | bool | 5 | bool |
6 | 6 | ||
7 | config TRACER_MAX_TRACE | ||
8 | bool | ||
9 | |||
7 | config TRACING | 10 | config TRACING |
8 | bool | 11 | bool |
9 | select DEBUG_FS | 12 | select DEBUG_FS |
@@ -23,6 +26,16 @@ config FTRACE | |||
23 | (the bootup default), then the overhead of the instructions is very | 26 | (the bootup default), then the overhead of the instructions is very |
24 | small and not measurable even in micro-benchmarks. | 27 | small and not measurable even in micro-benchmarks. |
25 | 28 | ||
29 | config SCHED_TRACER | ||
30 | bool "Scheduling Latency Tracer" | ||
31 | depends on DEBUG_KERNEL | ||
32 | select TRACING | ||
33 | select CONTEXT_SWITCH_TRACER | ||
34 | select TRACER_MAX_TRACE | ||
35 | help | ||
36 | This tracer tracks the latency of the highest priority task | ||
37 | to be scheduled in, starting from the point it has woken up. | ||
38 | |||
26 | config CONTEXT_SWITCH_TRACER | 39 | config CONTEXT_SWITCH_TRACER |
27 | bool "Trace process context switches" | 40 | bool "Trace process context switches" |
28 | depends on DEBUG_KERNEL | 41 | depends on DEBUG_KERNEL |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6b54ceb7f16e..5508cdb19aea 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -3,5 +3,6 @@ obj-$(CONFIG_FTRACE) += libftrace.o | |||
3 | obj-$(CONFIG_TRACING) += trace.o | 3 | obj-$(CONFIG_TRACING) += trace.o |
4 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o | 4 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
5 | obj-$(CONFIG_FTRACE) += trace_functions.o | 5 | obj-$(CONFIG_FTRACE) += trace_functions.o |
6 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | ||
6 | 7 | ||
7 | libftrace-y := ftrace.o | 8 | libftrace-y := ftrace.o |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c new file mode 100644 index 000000000000..7c3ccefcf4c3 --- /dev/null +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -0,0 +1,310 @@ | |||
1 | /* | ||
2 | * trace task wakeup timings | ||
3 | * | ||
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | ||
5 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
6 | * | ||
7 | * Based on code from the latency_tracer, that is: | ||
8 | * | ||
9 | * Copyright (C) 2004-2006 Ingo Molnar | ||
10 | * Copyright (C) 2004 William Lee Irwin III | ||
11 | */ | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/fs.h> | ||
14 | #include <linux/debugfs.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/ftrace.h> | ||
18 | |||
19 | #include "trace.h" | ||
20 | |||
21 | static struct trace_array *wakeup_trace; | ||
22 | static int __read_mostly tracer_enabled; | ||
23 | |||
24 | static struct task_struct *wakeup_task; | ||
25 | static int wakeup_cpu; | ||
26 | static unsigned wakeup_prio = -1; | ||
27 | |||
28 | static DEFINE_SPINLOCK(wakeup_lock); | ||
29 | |||
30 | static void notrace __wakeup_reset(struct trace_array *tr); | ||
31 | |||
32 | /* | ||
33 | * Should this new latency be reported/recorded? | ||
34 | */ | ||
35 | static int notrace report_latency(cycle_t delta) | ||
36 | { | ||
37 | if (tracing_thresh) { | ||
38 | if (delta < tracing_thresh) | ||
39 | return 0; | ||
40 | } else { | ||
41 | if (delta <= tracing_max_latency) | ||
42 | return 0; | ||
43 | } | ||
44 | return 1; | ||
45 | } | ||
46 | |||
47 | void notrace | ||
48 | wakeup_sched_switch(struct task_struct *prev, struct task_struct *next) | ||
49 | { | ||
50 | unsigned long latency = 0, t0 = 0, t1 = 0; | ||
51 | struct trace_array *tr = wakeup_trace; | ||
52 | struct trace_array_cpu *data; | ||
53 | cycle_t T0, T1, delta; | ||
54 | unsigned long flags; | ||
55 | long disabled; | ||
56 | int cpu; | ||
57 | |||
58 | if (unlikely(!tracer_enabled)) | ||
59 | return; | ||
60 | |||
61 | /* | ||
62 | * When we start a new trace, we set wakeup_task to NULL | ||
63 | * and then set tracer_enabled = 1. We want to make sure | ||
64 | * that another CPU does not see the tracer_enabled = 1 | ||
65 | * and the wakeup_task with an older task, that might | ||
66 | * actually be the same as next. | ||
67 | */ | ||
68 | smp_rmb(); | ||
69 | |||
70 | if (next != wakeup_task) | ||
71 | return; | ||
72 | |||
73 | /* The task we are waitng for is waking up */ | ||
74 | data = tr->data[wakeup_cpu]; | ||
75 | |||
76 | /* disable local data, not wakeup_cpu data */ | ||
77 | cpu = raw_smp_processor_id(); | ||
78 | disabled = atomic_inc_return(&tr->data[cpu]->disabled); | ||
79 | if (likely(disabled != 1)) | ||
80 | goto out; | ||
81 | |||
82 | spin_lock_irqsave(&wakeup_lock, flags); | ||
83 | |||
84 | /* We could race with grabbing wakeup_lock */ | ||
85 | if (unlikely(!tracer_enabled || next != wakeup_task)) | ||
86 | goto out_unlock; | ||
87 | |||
88 | ftrace(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); | ||
89 | |||
90 | /* | ||
91 | * usecs conversion is slow so we try to delay the conversion | ||
92 | * as long as possible: | ||
93 | */ | ||
94 | T0 = data->preempt_timestamp; | ||
95 | T1 = now(cpu); | ||
96 | delta = T1-T0; | ||
97 | |||
98 | if (!report_latency(delta)) | ||
99 | goto out_unlock; | ||
100 | |||
101 | latency = nsecs_to_usecs(delta); | ||
102 | |||
103 | tracing_max_latency = delta; | ||
104 | t0 = nsecs_to_usecs(T0); | ||
105 | t1 = nsecs_to_usecs(T1); | ||
106 | |||
107 | update_max_tr(tr, wakeup_task, wakeup_cpu); | ||
108 | |||
109 | if (tracing_thresh) { | ||
110 | printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency " | ||
111 | "violates %lu us threshold.\n" | ||
112 | " => started at timestamp %lu: ", | ||
113 | wakeup_task->comm, wakeup_task->pid, | ||
114 | raw_smp_processor_id(), | ||
115 | latency, nsecs_to_usecs(tracing_thresh), t0); | ||
116 | } else { | ||
117 | printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum " | ||
118 | "wakeup latency.\n => started at timestamp %lu: ", | ||
119 | wakeup_task->comm, wakeup_task->pid, | ||
120 | cpu, latency, t0); | ||
121 | } | ||
122 | |||
123 | printk(KERN_CONT " ended at timestamp %lu: ", t1); | ||
124 | dump_stack(); | ||
125 | t1 = nsecs_to_usecs(now(cpu)); | ||
126 | printk(KERN_CONT " dump-end timestamp %lu\n\n", t1); | ||
127 | |||
128 | out_unlock: | ||
129 | __wakeup_reset(tr); | ||
130 | spin_unlock_irqrestore(&wakeup_lock, flags); | ||
131 | out: | ||
132 | atomic_dec(&tr->data[cpu]->disabled); | ||
133 | } | ||
134 | |||
135 | static void notrace __wakeup_reset(struct trace_array *tr) | ||
136 | { | ||
137 | struct trace_array_cpu *data; | ||
138 | int cpu; | ||
139 | |||
140 | assert_spin_locked(&wakeup_lock); | ||
141 | |||
142 | for_each_possible_cpu(cpu) { | ||
143 | data = tr->data[cpu]; | ||
144 | tracing_reset(data); | ||
145 | } | ||
146 | |||
147 | wakeup_cpu = -1; | ||
148 | wakeup_prio = -1; | ||
149 | |||
150 | if (wakeup_task) | ||
151 | put_task_struct(wakeup_task); | ||
152 | |||
153 | wakeup_task = NULL; | ||
154 | } | ||
155 | |||
156 | static void notrace wakeup_reset(struct trace_array *tr) | ||
157 | { | ||
158 | unsigned long flags; | ||
159 | |||
160 | spin_lock_irqsave(&wakeup_lock, flags); | ||
161 | __wakeup_reset(tr); | ||
162 | spin_unlock_irqrestore(&wakeup_lock, flags); | ||
163 | } | ||
164 | |||
165 | static notrace void | ||
166 | wakeup_check_start(struct trace_array *tr, struct task_struct *p, | ||
167 | struct task_struct *curr) | ||
168 | { | ||
169 | int cpu = smp_processor_id(); | ||
170 | unsigned long flags; | ||
171 | long disabled; | ||
172 | |||
173 | if (likely(!rt_task(p)) || | ||
174 | p->prio >= wakeup_prio || | ||
175 | p->prio >= curr->prio) | ||
176 | return; | ||
177 | |||
178 | disabled = atomic_inc_return(&tr->data[cpu]->disabled); | ||
179 | if (unlikely(disabled != 1)) | ||
180 | goto out; | ||
181 | |||
182 | /* interrupts should be off from try_to_wake_up */ | ||
183 | spin_lock(&wakeup_lock); | ||
184 | |||
185 | /* check for races. */ | ||
186 | if (!tracer_enabled || p->prio >= wakeup_prio) | ||
187 | goto out_locked; | ||
188 | |||
189 | /* reset the trace */ | ||
190 | __wakeup_reset(tr); | ||
191 | |||
192 | wakeup_cpu = task_cpu(p); | ||
193 | wakeup_prio = p->prio; | ||
194 | |||
195 | wakeup_task = p; | ||
196 | get_task_struct(wakeup_task); | ||
197 | |||
198 | local_save_flags(flags); | ||
199 | |||
200 | tr->data[wakeup_cpu]->preempt_timestamp = now(cpu); | ||
201 | ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); | ||
202 | |||
203 | out_locked: | ||
204 | spin_unlock(&wakeup_lock); | ||
205 | out: | ||
206 | atomic_dec(&tr->data[cpu]->disabled); | ||
207 | } | ||
208 | |||
209 | notrace void | ||
210 | ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) | ||
211 | { | ||
212 | if (likely(!tracer_enabled)) | ||
213 | return; | ||
214 | |||
215 | wakeup_check_start(wakeup_trace, wakee, curr); | ||
216 | } | ||
217 | |||
218 | notrace void | ||
219 | ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) | ||
220 | { | ||
221 | if (likely(!tracer_enabled)) | ||
222 | return; | ||
223 | |||
224 | wakeup_check_start(wakeup_trace, wakee, curr); | ||
225 | } | ||
226 | |||
227 | static notrace void start_wakeup_tracer(struct trace_array *tr) | ||
228 | { | ||
229 | wakeup_reset(tr); | ||
230 | |||
231 | /* | ||
232 | * Don't let the tracer_enabled = 1 show up before | ||
233 | * the wakeup_task is reset. This may be overkill since | ||
234 | * wakeup_reset does a spin_unlock after setting the | ||
235 | * wakeup_task to NULL, but I want to be safe. | ||
236 | * This is a slow path anyway. | ||
237 | */ | ||
238 | smp_wmb(); | ||
239 | |||
240 | tracer_enabled = 1; | ||
241 | |||
242 | return; | ||
243 | } | ||
244 | |||
245 | static notrace void stop_wakeup_tracer(struct trace_array *tr) | ||
246 | { | ||
247 | tracer_enabled = 0; | ||
248 | } | ||
249 | |||
250 | static notrace void wakeup_tracer_init(struct trace_array *tr) | ||
251 | { | ||
252 | wakeup_trace = tr; | ||
253 | |||
254 | if (tr->ctrl) | ||
255 | start_wakeup_tracer(tr); | ||
256 | } | ||
257 | |||
258 | static notrace void wakeup_tracer_reset(struct trace_array *tr) | ||
259 | { | ||
260 | if (tr->ctrl) { | ||
261 | stop_wakeup_tracer(tr); | ||
262 | /* make sure we put back any tasks we are tracing */ | ||
263 | wakeup_reset(tr); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | static void wakeup_tracer_ctrl_update(struct trace_array *tr) | ||
268 | { | ||
269 | if (tr->ctrl) | ||
270 | start_wakeup_tracer(tr); | ||
271 | else | ||
272 | stop_wakeup_tracer(tr); | ||
273 | } | ||
274 | |||
275 | static void notrace wakeup_tracer_open(struct trace_iterator *iter) | ||
276 | { | ||
277 | /* stop the trace while dumping */ | ||
278 | if (iter->tr->ctrl) | ||
279 | stop_wakeup_tracer(iter->tr); | ||
280 | } | ||
281 | |||
282 | static void notrace wakeup_tracer_close(struct trace_iterator *iter) | ||
283 | { | ||
284 | /* forget about any processes we were recording */ | ||
285 | if (iter->tr->ctrl) | ||
286 | start_wakeup_tracer(iter->tr); | ||
287 | } | ||
288 | |||
289 | static struct tracer wakeup_tracer __read_mostly = | ||
290 | { | ||
291 | .name = "wakeup", | ||
292 | .init = wakeup_tracer_init, | ||
293 | .reset = wakeup_tracer_reset, | ||
294 | .open = wakeup_tracer_open, | ||
295 | .close = wakeup_tracer_close, | ||
296 | .ctrl_update = wakeup_tracer_ctrl_update, | ||
297 | .print_max = 1, | ||
298 | }; | ||
299 | |||
300 | __init static int init_wakeup_tracer(void) | ||
301 | { | ||
302 | int ret; | ||
303 | |||
304 | ret = register_tracer(&wakeup_tracer); | ||
305 | if (ret) | ||
306 | return ret; | ||
307 | |||
308 | return 0; | ||
309 | } | ||
310 | device_initcall(init_wakeup_tracer); | ||