diff options
-rw-r--r-- | kernel/trace/Kconfig | 8 | ||||
-rw-r--r-- | kernel/trace/Makefile | 1 | ||||
-rw-r--r-- | kernel/trace/trace.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace.h | 6 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 23 | ||||
-rw-r--r-- | kernel/trace/trace_sysprof.c | 363 |
6 files changed, 404 insertions, 0 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5c2295b29f2c..263e9e6bbd60 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -75,6 +75,14 @@ config PREEMPT_TRACER | |||
75 | enabled. This option and the irqs-off timing option can be | 75 | enabled. This option and the irqs-off timing option can be |
76 | used together or separately.) | 76 | used together or separately.) |
77 | 77 | ||
78 | config SYSPROF_TRACER | ||
79 | bool "Sysprof Tracer" | ||
80 | depends on X86 | ||
81 | select TRACING | ||
82 | help | ||
83 | This tracer provides the trace needed by the 'Sysprof' userspace | ||
84 | tool. | ||
85 | |||
78 | config SCHED_TRACER | 86 | config SCHED_TRACER |
79 | bool "Scheduling Latency Tracer" | 87 | bool "Scheduling Latency Tracer" |
80 | depends on HAVE_FTRACE | 88 | depends on HAVE_FTRACE |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c44a7dce9086..71d17de17288 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -14,6 +14,7 @@ obj-$(CONFIG_FTRACE) += libftrace.o | |||
14 | 14 | ||
15 | obj-$(CONFIG_TRACING) += trace.o | 15 | obj-$(CONFIG_TRACING) += trace.o |
16 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o | 16 | obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o |
17 | obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o | ||
17 | obj-$(CONFIG_FTRACE) += trace_functions.o | 18 | obj-$(CONFIG_FTRACE) += trace_functions.o |
18 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | 19 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o |
19 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o | 20 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a875600733b..e46de641ea44 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -2913,6 +2913,9 @@ static __init void tracer_init_debugfs(void) | |||
2913 | pr_warning("Could not create debugfs " | 2913 | pr_warning("Could not create debugfs " |
2914 | "'dyn_ftrace_total_info' entry\n"); | 2914 | "'dyn_ftrace_total_info' entry\n"); |
2915 | #endif | 2915 | #endif |
2916 | #ifdef CONFIG_SYSPROF_TRACER | ||
2917 | init_tracer_sysprof_debugfs(d_tracer); | ||
2918 | #endif | ||
2916 | } | 2919 | } |
2917 | 2920 | ||
2918 | static int trace_alloc_page(void) | 2921 | static int trace_alloc_page(void) |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4966e6a964fe..8cb215b239d5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -193,6 +193,8 @@ struct trace_iterator { | |||
193 | void tracing_reset(struct trace_array_cpu *data); | 193 | void tracing_reset(struct trace_array_cpu *data); |
194 | int tracing_open_generic(struct inode *inode, struct file *filp); | 194 | int tracing_open_generic(struct inode *inode, struct file *filp); |
195 | struct dentry *tracing_init_dentry(void); | 195 | struct dentry *tracing_init_dentry(void); |
196 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer); | ||
197 | |||
196 | void ftrace(struct trace_array *tr, | 198 | void ftrace(struct trace_array *tr, |
197 | struct trace_array_cpu *data, | 199 | struct trace_array_cpu *data, |
198 | unsigned long ip, | 200 | unsigned long ip, |
@@ -294,6 +296,10 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace, | |||
294 | extern int trace_selftest_startup_sched_switch(struct tracer *trace, | 296 | extern int trace_selftest_startup_sched_switch(struct tracer *trace, |
295 | struct trace_array *tr); | 297 | struct trace_array *tr); |
296 | #endif | 298 | #endif |
299 | #ifdef CONFIG_SYSPROF_TRACER | ||
300 | extern int trace_selftest_startup_sysprof(struct tracer *trace, | ||
301 | struct trace_array *tr); | ||
302 | #endif | ||
297 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 303 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
298 | 304 | ||
299 | extern void *head_page(struct trace_array_cpu *data); | 305 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 18c5423bc977..0911b7e073bf 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -538,3 +538,26 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr | |||
538 | return ret; | 538 | return ret; |
539 | } | 539 | } |
540 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ | 540 | #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ |
541 | |||
542 | #ifdef CONFIG_SYSPROF_TRACER | ||
543 | int | ||
544 | trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) | ||
545 | { | ||
546 | unsigned long count; | ||
547 | int ret; | ||
548 | |||
549 | /* start the tracing */ | ||
550 | tr->ctrl = 1; | ||
551 | trace->init(tr); | ||
552 | /* Sleep for a 1/10 of a second */ | ||
553 | msleep(100); | ||
554 | /* stop the tracing. */ | ||
555 | tr->ctrl = 0; | ||
556 | trace->ctrl_update(tr); | ||
557 | /* check the trace buffer */ | ||
558 | ret = trace_test_buffer(tr, &count); | ||
559 | trace->reset(tr); | ||
560 | |||
561 | return ret; | ||
562 | } | ||
563 | #endif /* CONFIG_SYSPROF_TRACER */ | ||
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c new file mode 100644 index 000000000000..2301e1e7c606 --- /dev/null +++ b/kernel/trace/trace_sysprof.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * trace stack traces | ||
3 | * | ||
4 | * Copyright (C) 2004-2008, Soeren Sandmann | ||
5 | * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> | ||
6 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/kallsyms.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/hrtimer.h> | ||
11 | #include <linux/uaccess.h> | ||
12 | #include <linux/ftrace.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/fs.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #include "trace.h" | ||
20 | |||
21 | static struct trace_array *sysprof_trace; | ||
22 | static int __read_mostly tracer_enabled; | ||
23 | |||
24 | /* | ||
25 | * 1 msec sample interval by default: | ||
26 | */ | ||
27 | static unsigned long sample_period = 1000000; | ||
28 | static const unsigned int sample_max_depth = 512; | ||
29 | |||
30 | static DEFINE_MUTEX(sample_timer_lock); | ||
31 | /* | ||
32 | * Per CPU hrtimers that do the profiling: | ||
33 | */ | ||
34 | static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); | ||
35 | |||
36 | struct stack_frame { | ||
37 | const void __user *next_fp; | ||
38 | unsigned long return_address; | ||
39 | }; | ||
40 | |||
41 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
42 | { | ||
43 | int ret; | ||
44 | |||
45 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
46 | return 0; | ||
47 | |||
48 | ret = 1; | ||
49 | pagefault_disable(); | ||
50 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
51 | ret = 0; | ||
52 | pagefault_enable(); | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | struct backtrace_info { | ||
58 | struct trace_array_cpu *data; | ||
59 | struct trace_array *tr; | ||
60 | int pos; | ||
61 | }; | ||
62 | |||
63 | static void | ||
64 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
65 | { | ||
66 | /* Ignore warnings */ | ||
67 | } | ||
68 | |||
69 | static void backtrace_warning(void *data, char *msg) | ||
70 | { | ||
71 | /* Ignore warnings */ | ||
72 | } | ||
73 | |||
74 | static int backtrace_stack(void *data, char *name) | ||
75 | { | ||
76 | /* Don't bother with IRQ stacks for now */ | ||
77 | return -1; | ||
78 | } | ||
79 | |||
80 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
81 | { | ||
82 | struct backtrace_info *info = data; | ||
83 | |||
84 | if (info->pos < sample_max_depth && reliable) { | ||
85 | __trace_special(info->tr, info->data, 1, addr, 0); | ||
86 | |||
87 | info->pos++; | ||
88 | } | ||
89 | } | ||
90 | |||
91 | const static struct stacktrace_ops backtrace_ops = { | ||
92 | .warning = backtrace_warning, | ||
93 | .warning_symbol = backtrace_warning_symbol, | ||
94 | .stack = backtrace_stack, | ||
95 | .address = backtrace_address, | ||
96 | }; | ||
97 | |||
98 | static int | ||
99 | trace_kernel(struct pt_regs *regs, struct trace_array *tr, | ||
100 | struct trace_array_cpu *data) | ||
101 | { | ||
102 | struct backtrace_info info; | ||
103 | unsigned long bp; | ||
104 | char *stack; | ||
105 | |||
106 | info.tr = tr; | ||
107 | info.data = data; | ||
108 | info.pos = 1; | ||
109 | |||
110 | __trace_special(info.tr, info.data, 1, regs->ip, 0); | ||
111 | |||
112 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
113 | #ifdef CONFIG_FRAME_POINTER | ||
114 | bp = regs->bp; | ||
115 | #else | ||
116 | bp = 0; | ||
117 | #endif | ||
118 | |||
119 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); | ||
120 | |||
121 | return info.pos; | ||
122 | } | ||
123 | |||
124 | static void timer_notify(struct pt_regs *regs, int cpu) | ||
125 | { | ||
126 | struct trace_array_cpu *data; | ||
127 | struct stack_frame frame; | ||
128 | struct trace_array *tr; | ||
129 | const void __user *fp; | ||
130 | int is_user; | ||
131 | int i; | ||
132 | |||
133 | if (!regs) | ||
134 | return; | ||
135 | |||
136 | tr = sysprof_trace; | ||
137 | data = tr->data[cpu]; | ||
138 | is_user = user_mode(regs); | ||
139 | |||
140 | if (!current || current->pid == 0) | ||
141 | return; | ||
142 | |||
143 | if (is_user && current->state != TASK_RUNNING) | ||
144 | return; | ||
145 | |||
146 | __trace_special(tr, data, 0, 0, current->pid); | ||
147 | |||
148 | if (!is_user) | ||
149 | i = trace_kernel(regs, tr, data); | ||
150 | else | ||
151 | i = 0; | ||
152 | |||
153 | /* | ||
154 | * Trace user stack if we are not a kernel thread | ||
155 | */ | ||
156 | if (current->mm && i < sample_max_depth) { | ||
157 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
158 | |||
159 | fp = (void __user *)regs->bp; | ||
160 | |||
161 | __trace_special(tr, data, 2, regs->ip, 0); | ||
162 | |||
163 | while (i < sample_max_depth) { | ||
164 | frame.next_fp = 0; | ||
165 | frame.return_address = 0; | ||
166 | if (!copy_stack_frame(fp, &frame)) | ||
167 | break; | ||
168 | if ((unsigned long)fp < regs->sp) | ||
169 | break; | ||
170 | |||
171 | __trace_special(tr, data, 2, frame.return_address, | ||
172 | (unsigned long)fp); | ||
173 | fp = frame.next_fp; | ||
174 | |||
175 | i++; | ||
176 | } | ||
177 | |||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Special trace entry if we overflow the max depth: | ||
182 | */ | ||
183 | if (i == sample_max_depth) | ||
184 | __trace_special(tr, data, -1, -1, -1); | ||
185 | |||
186 | __trace_special(tr, data, 3, current->pid, i); | ||
187 | } | ||
188 | |||
189 | static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) | ||
190 | { | ||
191 | /* trace here */ | ||
192 | timer_notify(get_irq_regs(), smp_processor_id()); | ||
193 | |||
194 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | ||
195 | |||
196 | return HRTIMER_RESTART; | ||
197 | } | ||
198 | |||
199 | static void start_stack_timer(int cpu) | ||
200 | { | ||
201 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
202 | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
204 | hrtimer->function = stack_trace_timer_fn; | ||
205 | hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
206 | |||
207 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | ||
208 | } | ||
209 | |||
210 | static void start_stack_timers(void) | ||
211 | { | ||
212 | cpumask_t saved_mask = current->cpus_allowed; | ||
213 | int cpu; | ||
214 | |||
215 | for_each_online_cpu(cpu) { | ||
216 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
217 | start_stack_timer(cpu); | ||
218 | } | ||
219 | set_cpus_allowed_ptr(current, &saved_mask); | ||
220 | } | ||
221 | |||
222 | static void stop_stack_timer(int cpu) | ||
223 | { | ||
224 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
225 | |||
226 | hrtimer_cancel(hrtimer); | ||
227 | } | ||
228 | |||
229 | static void stop_stack_timers(void) | ||
230 | { | ||
231 | int cpu; | ||
232 | |||
233 | for_each_online_cpu(cpu) | ||
234 | stop_stack_timer(cpu); | ||
235 | } | ||
236 | |||
237 | static void stack_reset(struct trace_array *tr) | ||
238 | { | ||
239 | int cpu; | ||
240 | |||
241 | tr->time_start = ftrace_now(tr->cpu); | ||
242 | |||
243 | for_each_online_cpu(cpu) | ||
244 | tracing_reset(tr->data[cpu]); | ||
245 | } | ||
246 | |||
247 | static void start_stack_trace(struct trace_array *tr) | ||
248 | { | ||
249 | mutex_lock(&sample_timer_lock); | ||
250 | stack_reset(tr); | ||
251 | start_stack_timers(); | ||
252 | tracer_enabled = 1; | ||
253 | mutex_unlock(&sample_timer_lock); | ||
254 | } | ||
255 | |||
256 | static void stop_stack_trace(struct trace_array *tr) | ||
257 | { | ||
258 | mutex_lock(&sample_timer_lock); | ||
259 | stop_stack_timers(); | ||
260 | tracer_enabled = 0; | ||
261 | mutex_unlock(&sample_timer_lock); | ||
262 | } | ||
263 | |||
264 | static void stack_trace_init(struct trace_array *tr) | ||
265 | { | ||
266 | sysprof_trace = tr; | ||
267 | |||
268 | if (tr->ctrl) | ||
269 | start_stack_trace(tr); | ||
270 | } | ||
271 | |||
272 | static void stack_trace_reset(struct trace_array *tr) | ||
273 | { | ||
274 | if (tr->ctrl) | ||
275 | stop_stack_trace(tr); | ||
276 | } | ||
277 | |||
278 | static void stack_trace_ctrl_update(struct trace_array *tr) | ||
279 | { | ||
280 | /* When starting a new trace, reset the buffers */ | ||
281 | if (tr->ctrl) | ||
282 | start_stack_trace(tr); | ||
283 | else | ||
284 | stop_stack_trace(tr); | ||
285 | } | ||
286 | |||
287 | static struct tracer stack_trace __read_mostly = | ||
288 | { | ||
289 | .name = "sysprof", | ||
290 | .init = stack_trace_init, | ||
291 | .reset = stack_trace_reset, | ||
292 | .ctrl_update = stack_trace_ctrl_update, | ||
293 | #ifdef CONFIG_FTRACE_SELFTEST | ||
294 | .selftest = trace_selftest_startup_sysprof, | ||
295 | #endif | ||
296 | }; | ||
297 | |||
298 | __init static int init_stack_trace(void) | ||
299 | { | ||
300 | return register_tracer(&stack_trace); | ||
301 | } | ||
302 | device_initcall(init_stack_trace); | ||
303 | |||
304 | #define MAX_LONG_DIGITS 22 | ||
305 | |||
306 | static ssize_t | ||
307 | sysprof_sample_read(struct file *filp, char __user *ubuf, | ||
308 | size_t cnt, loff_t *ppos) | ||
309 | { | ||
310 | char buf[MAX_LONG_DIGITS]; | ||
311 | int r; | ||
312 | |||
313 | r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); | ||
314 | |||
315 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
316 | } | ||
317 | |||
318 | static ssize_t | ||
319 | sysprof_sample_write(struct file *filp, const char __user *ubuf, | ||
320 | size_t cnt, loff_t *ppos) | ||
321 | { | ||
322 | char buf[MAX_LONG_DIGITS]; | ||
323 | unsigned long val; | ||
324 | |||
325 | if (cnt > MAX_LONG_DIGITS-1) | ||
326 | cnt = MAX_LONG_DIGITS-1; | ||
327 | |||
328 | if (copy_from_user(&buf, ubuf, cnt)) | ||
329 | return -EFAULT; | ||
330 | |||
331 | buf[cnt] = 0; | ||
332 | |||
333 | val = simple_strtoul(buf, NULL, 10); | ||
334 | /* | ||
335 | * Enforce a minimum sample period of 100 usecs: | ||
336 | */ | ||
337 | if (val < 100) | ||
338 | val = 100; | ||
339 | |||
340 | mutex_lock(&sample_timer_lock); | ||
341 | stop_stack_timers(); | ||
342 | sample_period = val * 1000; | ||
343 | start_stack_timers(); | ||
344 | mutex_unlock(&sample_timer_lock); | ||
345 | |||
346 | return cnt; | ||
347 | } | ||
348 | |||
349 | static struct file_operations sysprof_sample_fops = { | ||
350 | .read = sysprof_sample_read, | ||
351 | .write = sysprof_sample_write, | ||
352 | }; | ||
353 | |||
354 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer) | ||
355 | { | ||
356 | struct dentry *entry; | ||
357 | |||
358 | entry = debugfs_create_file("sysprof_sample_period", 0644, | ||
359 | d_tracer, NULL, &sysprof_sample_fops); | ||
360 | if (entry) | ||
361 | return; | ||
362 | pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); | ||
363 | } | ||