diff options
Diffstat (limited to 'kernel/trace/trace_sysprof.c')
-rw-r--r-- | kernel/trace/trace_sysprof.c | 363 |
1 files changed, 363 insertions, 0 deletions
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c new file mode 100644 index 000000000000..2301e1e7c606 --- /dev/null +++ b/kernel/trace/trace_sysprof.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * trace stack traces | ||
3 | * | ||
4 | * Copyright (C) 2004-2008, Soeren Sandmann | ||
5 | * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> | ||
6 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/kallsyms.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/hrtimer.h> | ||
11 | #include <linux/uaccess.h> | ||
12 | #include <linux/ftrace.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/fs.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #include "trace.h" | ||
20 | |||
21 | static struct trace_array *sysprof_trace; | ||
22 | static int __read_mostly tracer_enabled; | ||
23 | |||
24 | /* | ||
25 | * 1 msec sample interval by default: | ||
26 | */ | ||
27 | static unsigned long sample_period = 1000000; | ||
28 | static const unsigned int sample_max_depth = 512; | ||
29 | |||
30 | static DEFINE_MUTEX(sample_timer_lock); | ||
31 | /* | ||
32 | * Per CPU hrtimers that do the profiling: | ||
33 | */ | ||
34 | static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); | ||
35 | |||
36 | struct stack_frame { | ||
37 | const void __user *next_fp; | ||
38 | unsigned long return_address; | ||
39 | }; | ||
40 | |||
41 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
42 | { | ||
43 | int ret; | ||
44 | |||
45 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
46 | return 0; | ||
47 | |||
48 | ret = 1; | ||
49 | pagefault_disable(); | ||
50 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
51 | ret = 0; | ||
52 | pagefault_enable(); | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | struct backtrace_info { | ||
58 | struct trace_array_cpu *data; | ||
59 | struct trace_array *tr; | ||
60 | int pos; | ||
61 | }; | ||
62 | |||
63 | static void | ||
64 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
65 | { | ||
66 | /* Ignore warnings */ | ||
67 | } | ||
68 | |||
69 | static void backtrace_warning(void *data, char *msg) | ||
70 | { | ||
71 | /* Ignore warnings */ | ||
72 | } | ||
73 | |||
74 | static int backtrace_stack(void *data, char *name) | ||
75 | { | ||
76 | /* Don't bother with IRQ stacks for now */ | ||
77 | return -1; | ||
78 | } | ||
79 | |||
80 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
81 | { | ||
82 | struct backtrace_info *info = data; | ||
83 | |||
84 | if (info->pos < sample_max_depth && reliable) { | ||
85 | __trace_special(info->tr, info->data, 1, addr, 0); | ||
86 | |||
87 | info->pos++; | ||
88 | } | ||
89 | } | ||
90 | |||
91 | const static struct stacktrace_ops backtrace_ops = { | ||
92 | .warning = backtrace_warning, | ||
93 | .warning_symbol = backtrace_warning_symbol, | ||
94 | .stack = backtrace_stack, | ||
95 | .address = backtrace_address, | ||
96 | }; | ||
97 | |||
98 | static int | ||
99 | trace_kernel(struct pt_regs *regs, struct trace_array *tr, | ||
100 | struct trace_array_cpu *data) | ||
101 | { | ||
102 | struct backtrace_info info; | ||
103 | unsigned long bp; | ||
104 | char *stack; | ||
105 | |||
106 | info.tr = tr; | ||
107 | info.data = data; | ||
108 | info.pos = 1; | ||
109 | |||
110 | __trace_special(info.tr, info.data, 1, regs->ip, 0); | ||
111 | |||
112 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
113 | #ifdef CONFIG_FRAME_POINTER | ||
114 | bp = regs->bp; | ||
115 | #else | ||
116 | bp = 0; | ||
117 | #endif | ||
118 | |||
119 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); | ||
120 | |||
121 | return info.pos; | ||
122 | } | ||
123 | |||
124 | static void timer_notify(struct pt_regs *regs, int cpu) | ||
125 | { | ||
126 | struct trace_array_cpu *data; | ||
127 | struct stack_frame frame; | ||
128 | struct trace_array *tr; | ||
129 | const void __user *fp; | ||
130 | int is_user; | ||
131 | int i; | ||
132 | |||
133 | if (!regs) | ||
134 | return; | ||
135 | |||
136 | tr = sysprof_trace; | ||
137 | data = tr->data[cpu]; | ||
138 | is_user = user_mode(regs); | ||
139 | |||
140 | if (!current || current->pid == 0) | ||
141 | return; | ||
142 | |||
143 | if (is_user && current->state != TASK_RUNNING) | ||
144 | return; | ||
145 | |||
146 | __trace_special(tr, data, 0, 0, current->pid); | ||
147 | |||
148 | if (!is_user) | ||
149 | i = trace_kernel(regs, tr, data); | ||
150 | else | ||
151 | i = 0; | ||
152 | |||
153 | /* | ||
154 | * Trace user stack if we are not a kernel thread | ||
155 | */ | ||
156 | if (current->mm && i < sample_max_depth) { | ||
157 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
158 | |||
159 | fp = (void __user *)regs->bp; | ||
160 | |||
161 | __trace_special(tr, data, 2, regs->ip, 0); | ||
162 | |||
163 | while (i < sample_max_depth) { | ||
164 | frame.next_fp = 0; | ||
165 | frame.return_address = 0; | ||
166 | if (!copy_stack_frame(fp, &frame)) | ||
167 | break; | ||
168 | if ((unsigned long)fp < regs->sp) | ||
169 | break; | ||
170 | |||
171 | __trace_special(tr, data, 2, frame.return_address, | ||
172 | (unsigned long)fp); | ||
173 | fp = frame.next_fp; | ||
174 | |||
175 | i++; | ||
176 | } | ||
177 | |||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Special trace entry if we overflow the max depth: | ||
182 | */ | ||
183 | if (i == sample_max_depth) | ||
184 | __trace_special(tr, data, -1, -1, -1); | ||
185 | |||
186 | __trace_special(tr, data, 3, current->pid, i); | ||
187 | } | ||
188 | |||
189 | static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) | ||
190 | { | ||
191 | /* trace here */ | ||
192 | timer_notify(get_irq_regs(), smp_processor_id()); | ||
193 | |||
194 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | ||
195 | |||
196 | return HRTIMER_RESTART; | ||
197 | } | ||
198 | |||
199 | static void start_stack_timer(int cpu) | ||
200 | { | ||
201 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
202 | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
204 | hrtimer->function = stack_trace_timer_fn; | ||
205 | hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
206 | |||
207 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | ||
208 | } | ||
209 | |||
210 | static void start_stack_timers(void) | ||
211 | { | ||
212 | cpumask_t saved_mask = current->cpus_allowed; | ||
213 | int cpu; | ||
214 | |||
215 | for_each_online_cpu(cpu) { | ||
216 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
217 | start_stack_timer(cpu); | ||
218 | } | ||
219 | set_cpus_allowed_ptr(current, &saved_mask); | ||
220 | } | ||
221 | |||
222 | static void stop_stack_timer(int cpu) | ||
223 | { | ||
224 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
225 | |||
226 | hrtimer_cancel(hrtimer); | ||
227 | } | ||
228 | |||
229 | static void stop_stack_timers(void) | ||
230 | { | ||
231 | int cpu; | ||
232 | |||
233 | for_each_online_cpu(cpu) | ||
234 | stop_stack_timer(cpu); | ||
235 | } | ||
236 | |||
237 | static void stack_reset(struct trace_array *tr) | ||
238 | { | ||
239 | int cpu; | ||
240 | |||
241 | tr->time_start = ftrace_now(tr->cpu); | ||
242 | |||
243 | for_each_online_cpu(cpu) | ||
244 | tracing_reset(tr->data[cpu]); | ||
245 | } | ||
246 | |||
247 | static void start_stack_trace(struct trace_array *tr) | ||
248 | { | ||
249 | mutex_lock(&sample_timer_lock); | ||
250 | stack_reset(tr); | ||
251 | start_stack_timers(); | ||
252 | tracer_enabled = 1; | ||
253 | mutex_unlock(&sample_timer_lock); | ||
254 | } | ||
255 | |||
256 | static void stop_stack_trace(struct trace_array *tr) | ||
257 | { | ||
258 | mutex_lock(&sample_timer_lock); | ||
259 | stop_stack_timers(); | ||
260 | tracer_enabled = 0; | ||
261 | mutex_unlock(&sample_timer_lock); | ||
262 | } | ||
263 | |||
264 | static void stack_trace_init(struct trace_array *tr) | ||
265 | { | ||
266 | sysprof_trace = tr; | ||
267 | |||
268 | if (tr->ctrl) | ||
269 | start_stack_trace(tr); | ||
270 | } | ||
271 | |||
272 | static void stack_trace_reset(struct trace_array *tr) | ||
273 | { | ||
274 | if (tr->ctrl) | ||
275 | stop_stack_trace(tr); | ||
276 | } | ||
277 | |||
278 | static void stack_trace_ctrl_update(struct trace_array *tr) | ||
279 | { | ||
280 | /* When starting a new trace, reset the buffers */ | ||
281 | if (tr->ctrl) | ||
282 | start_stack_trace(tr); | ||
283 | else | ||
284 | stop_stack_trace(tr); | ||
285 | } | ||
286 | |||
287 | static struct tracer stack_trace __read_mostly = | ||
288 | { | ||
289 | .name = "sysprof", | ||
290 | .init = stack_trace_init, | ||
291 | .reset = stack_trace_reset, | ||
292 | .ctrl_update = stack_trace_ctrl_update, | ||
293 | #ifdef CONFIG_FTRACE_SELFTEST | ||
294 | .selftest = trace_selftest_startup_sysprof, | ||
295 | #endif | ||
296 | }; | ||
297 | |||
298 | __init static int init_stack_trace(void) | ||
299 | { | ||
300 | return register_tracer(&stack_trace); | ||
301 | } | ||
302 | device_initcall(init_stack_trace); | ||
303 | |||
304 | #define MAX_LONG_DIGITS 22 | ||
305 | |||
306 | static ssize_t | ||
307 | sysprof_sample_read(struct file *filp, char __user *ubuf, | ||
308 | size_t cnt, loff_t *ppos) | ||
309 | { | ||
310 | char buf[MAX_LONG_DIGITS]; | ||
311 | int r; | ||
312 | |||
313 | r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); | ||
314 | |||
315 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
316 | } | ||
317 | |||
318 | static ssize_t | ||
319 | sysprof_sample_write(struct file *filp, const char __user *ubuf, | ||
320 | size_t cnt, loff_t *ppos) | ||
321 | { | ||
322 | char buf[MAX_LONG_DIGITS]; | ||
323 | unsigned long val; | ||
324 | |||
325 | if (cnt > MAX_LONG_DIGITS-1) | ||
326 | cnt = MAX_LONG_DIGITS-1; | ||
327 | |||
328 | if (copy_from_user(&buf, ubuf, cnt)) | ||
329 | return -EFAULT; | ||
330 | |||
331 | buf[cnt] = 0; | ||
332 | |||
333 | val = simple_strtoul(buf, NULL, 10); | ||
334 | /* | ||
335 | * Enforce a minimum sample period of 100 usecs: | ||
336 | */ | ||
337 | if (val < 100) | ||
338 | val = 100; | ||
339 | |||
340 | mutex_lock(&sample_timer_lock); | ||
341 | stop_stack_timers(); | ||
342 | sample_period = val * 1000; | ||
343 | start_stack_timers(); | ||
344 | mutex_unlock(&sample_timer_lock); | ||
345 | |||
346 | return cnt; | ||
347 | } | ||
348 | |||
349 | static struct file_operations sysprof_sample_fops = { | ||
350 | .read = sysprof_sample_read, | ||
351 | .write = sysprof_sample_write, | ||
352 | }; | ||
353 | |||
354 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer) | ||
355 | { | ||
356 | struct dentry *entry; | ||
357 | |||
358 | entry = debugfs_create_file("sysprof_sample_period", 0644, | ||
359 | d_tracer, NULL, &sysprof_sample_fops); | ||
360 | if (entry) | ||
361 | return; | ||
362 | pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); | ||
363 | } | ||