diff options
Diffstat (limited to 'kernel/trace/trace_sysprof.c')
-rw-r--r-- | kernel/trace/trace_sysprof.c | 365 |
1 files changed, 365 insertions, 0 deletions
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c new file mode 100644 index 000000000000..63528086337c --- /dev/null +++ b/kernel/trace/trace_sysprof.c | |||
@@ -0,0 +1,365 @@ | |||
1 | /* | ||
2 | * trace stack traces | ||
3 | * | ||
4 | * Copyright (C) 2004-2008, Soeren Sandmann | ||
5 | * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> | ||
6 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/kallsyms.h> | ||
9 | #include <linux/debugfs.h> | ||
10 | #include <linux/hrtimer.h> | ||
11 | #include <linux/uaccess.h> | ||
12 | #include <linux/ftrace.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/fs.h> | ||
16 | |||
17 | #include <asm/stacktrace.h> | ||
18 | |||
19 | #include "trace.h" | ||
20 | |||
21 | static struct trace_array *sysprof_trace; | ||
22 | static int __read_mostly tracer_enabled; | ||
23 | |||
24 | /* | ||
25 | * 1 msec sample interval by default: | ||
26 | */ | ||
27 | static unsigned long sample_period = 1000000; | ||
28 | static const unsigned int sample_max_depth = 512; | ||
29 | |||
30 | static DEFINE_MUTEX(sample_timer_lock); | ||
31 | /* | ||
32 | * Per CPU hrtimers that do the profiling: | ||
33 | */ | ||
34 | static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); | ||
35 | |||
36 | struct stack_frame { | ||
37 | const void __user *next_fp; | ||
38 | unsigned long return_address; | ||
39 | }; | ||
40 | |||
41 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
42 | { | ||
43 | int ret; | ||
44 | |||
45 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
46 | return 0; | ||
47 | |||
48 | ret = 1; | ||
49 | pagefault_disable(); | ||
50 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
51 | ret = 0; | ||
52 | pagefault_enable(); | ||
53 | |||
54 | return ret; | ||
55 | } | ||
56 | |||
57 | struct backtrace_info { | ||
58 | struct trace_array_cpu *data; | ||
59 | struct trace_array *tr; | ||
60 | int pos; | ||
61 | }; | ||
62 | |||
63 | static void | ||
64 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
65 | { | ||
66 | /* Ignore warnings */ | ||
67 | } | ||
68 | |||
69 | static void backtrace_warning(void *data, char *msg) | ||
70 | { | ||
71 | /* Ignore warnings */ | ||
72 | } | ||
73 | |||
74 | static int backtrace_stack(void *data, char *name) | ||
75 | { | ||
76 | /* Don't bother with IRQ stacks for now */ | ||
77 | return -1; | ||
78 | } | ||
79 | |||
80 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
81 | { | ||
82 | struct backtrace_info *info = data; | ||
83 | |||
84 | if (info->pos < sample_max_depth && reliable) { | ||
85 | __trace_special(info->tr, info->data, 1, addr, 0); | ||
86 | |||
87 | info->pos++; | ||
88 | } | ||
89 | } | ||
90 | |||
91 | const static struct stacktrace_ops backtrace_ops = { | ||
92 | .warning = backtrace_warning, | ||
93 | .warning_symbol = backtrace_warning_symbol, | ||
94 | .stack = backtrace_stack, | ||
95 | .address = backtrace_address, | ||
96 | }; | ||
97 | |||
98 | static int | ||
99 | trace_kernel(struct pt_regs *regs, struct trace_array *tr, | ||
100 | struct trace_array_cpu *data) | ||
101 | { | ||
102 | struct backtrace_info info; | ||
103 | unsigned long bp; | ||
104 | char *stack; | ||
105 | |||
106 | info.tr = tr; | ||
107 | info.data = data; | ||
108 | info.pos = 1; | ||
109 | |||
110 | __trace_special(info.tr, info.data, 1, regs->ip, 0); | ||
111 | |||
112 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
113 | #ifdef CONFIG_FRAME_POINTER | ||
114 | bp = regs->bp; | ||
115 | #else | ||
116 | bp = 0; | ||
117 | #endif | ||
118 | |||
119 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); | ||
120 | |||
121 | return info.pos; | ||
122 | } | ||
123 | |||
124 | static void timer_notify(struct pt_regs *regs, int cpu) | ||
125 | { | ||
126 | struct trace_array_cpu *data; | ||
127 | struct stack_frame frame; | ||
128 | struct trace_array *tr; | ||
129 | const void __user *fp; | ||
130 | int is_user; | ||
131 | int i; | ||
132 | |||
133 | if (!regs) | ||
134 | return; | ||
135 | |||
136 | tr = sysprof_trace; | ||
137 | data = tr->data[cpu]; | ||
138 | is_user = user_mode(regs); | ||
139 | |||
140 | if (!current || current->pid == 0) | ||
141 | return; | ||
142 | |||
143 | if (is_user && current->state != TASK_RUNNING) | ||
144 | return; | ||
145 | |||
146 | __trace_special(tr, data, 0, 0, current->pid); | ||
147 | |||
148 | if (!is_user) | ||
149 | i = trace_kernel(regs, tr, data); | ||
150 | else | ||
151 | i = 0; | ||
152 | |||
153 | /* | ||
154 | * Trace user stack if we are not a kernel thread | ||
155 | */ | ||
156 | if (current->mm && i < sample_max_depth) { | ||
157 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
158 | |||
159 | fp = (void __user *)regs->bp; | ||
160 | |||
161 | __trace_special(tr, data, 2, regs->ip, 0); | ||
162 | |||
163 | while (i < sample_max_depth) { | ||
164 | frame.next_fp = 0; | ||
165 | frame.return_address = 0; | ||
166 | if (!copy_stack_frame(fp, &frame)) | ||
167 | break; | ||
168 | if ((unsigned long)fp < regs->sp) | ||
169 | break; | ||
170 | |||
171 | __trace_special(tr, data, 2, frame.return_address, | ||
172 | (unsigned long)fp); | ||
173 | fp = frame.next_fp; | ||
174 | |||
175 | i++; | ||
176 | } | ||
177 | |||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Special trace entry if we overflow the max depth: | ||
182 | */ | ||
183 | if (i == sample_max_depth) | ||
184 | __trace_special(tr, data, -1, -1, -1); | ||
185 | |||
186 | __trace_special(tr, data, 3, current->pid, i); | ||
187 | } | ||
188 | |||
189 | static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) | ||
190 | { | ||
191 | /* trace here */ | ||
192 | timer_notify(get_irq_regs(), smp_processor_id()); | ||
193 | |||
194 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | ||
195 | |||
196 | return HRTIMER_RESTART; | ||
197 | } | ||
198 | |||
199 | static void start_stack_timer(int cpu) | ||
200 | { | ||
201 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
202 | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
204 | hrtimer->function = stack_trace_timer_fn; | ||
205 | hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; | ||
206 | |||
207 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | ||
208 | } | ||
209 | |||
210 | static void start_stack_timers(void) | ||
211 | { | ||
212 | cpumask_t saved_mask = current->cpus_allowed; | ||
213 | int cpu; | ||
214 | |||
215 | for_each_online_cpu(cpu) { | ||
216 | cpumask_of_cpu_ptr(new_mask, cpu); | ||
217 | |||
218 | set_cpus_allowed_ptr(current, new_mask); | ||
219 | start_stack_timer(cpu); | ||
220 | } | ||
221 | set_cpus_allowed_ptr(current, &saved_mask); | ||
222 | } | ||
223 | |||
224 | static void stop_stack_timer(int cpu) | ||
225 | { | ||
226 | struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); | ||
227 | |||
228 | hrtimer_cancel(hrtimer); | ||
229 | } | ||
230 | |||
231 | static void stop_stack_timers(void) | ||
232 | { | ||
233 | int cpu; | ||
234 | |||
235 | for_each_online_cpu(cpu) | ||
236 | stop_stack_timer(cpu); | ||
237 | } | ||
238 | |||
239 | static void stack_reset(struct trace_array *tr) | ||
240 | { | ||
241 | int cpu; | ||
242 | |||
243 | tr->time_start = ftrace_now(tr->cpu); | ||
244 | |||
245 | for_each_online_cpu(cpu) | ||
246 | tracing_reset(tr->data[cpu]); | ||
247 | } | ||
248 | |||
249 | static void start_stack_trace(struct trace_array *tr) | ||
250 | { | ||
251 | mutex_lock(&sample_timer_lock); | ||
252 | stack_reset(tr); | ||
253 | start_stack_timers(); | ||
254 | tracer_enabled = 1; | ||
255 | mutex_unlock(&sample_timer_lock); | ||
256 | } | ||
257 | |||
258 | static void stop_stack_trace(struct trace_array *tr) | ||
259 | { | ||
260 | mutex_lock(&sample_timer_lock); | ||
261 | stop_stack_timers(); | ||
262 | tracer_enabled = 0; | ||
263 | mutex_unlock(&sample_timer_lock); | ||
264 | } | ||
265 | |||
266 | static void stack_trace_init(struct trace_array *tr) | ||
267 | { | ||
268 | sysprof_trace = tr; | ||
269 | |||
270 | if (tr->ctrl) | ||
271 | start_stack_trace(tr); | ||
272 | } | ||
273 | |||
274 | static void stack_trace_reset(struct trace_array *tr) | ||
275 | { | ||
276 | if (tr->ctrl) | ||
277 | stop_stack_trace(tr); | ||
278 | } | ||
279 | |||
280 | static void stack_trace_ctrl_update(struct trace_array *tr) | ||
281 | { | ||
282 | /* When starting a new trace, reset the buffers */ | ||
283 | if (tr->ctrl) | ||
284 | start_stack_trace(tr); | ||
285 | else | ||
286 | stop_stack_trace(tr); | ||
287 | } | ||
288 | |||
289 | static struct tracer stack_trace __read_mostly = | ||
290 | { | ||
291 | .name = "sysprof", | ||
292 | .init = stack_trace_init, | ||
293 | .reset = stack_trace_reset, | ||
294 | .ctrl_update = stack_trace_ctrl_update, | ||
295 | #ifdef CONFIG_FTRACE_SELFTEST | ||
296 | .selftest = trace_selftest_startup_sysprof, | ||
297 | #endif | ||
298 | }; | ||
299 | |||
300 | __init static int init_stack_trace(void) | ||
301 | { | ||
302 | return register_tracer(&stack_trace); | ||
303 | } | ||
304 | device_initcall(init_stack_trace); | ||
305 | |||
306 | #define MAX_LONG_DIGITS 22 | ||
307 | |||
308 | static ssize_t | ||
309 | sysprof_sample_read(struct file *filp, char __user *ubuf, | ||
310 | size_t cnt, loff_t *ppos) | ||
311 | { | ||
312 | char buf[MAX_LONG_DIGITS]; | ||
313 | int r; | ||
314 | |||
315 | r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); | ||
316 | |||
317 | return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | ||
318 | } | ||
319 | |||
320 | static ssize_t | ||
321 | sysprof_sample_write(struct file *filp, const char __user *ubuf, | ||
322 | size_t cnt, loff_t *ppos) | ||
323 | { | ||
324 | char buf[MAX_LONG_DIGITS]; | ||
325 | unsigned long val; | ||
326 | |||
327 | if (cnt > MAX_LONG_DIGITS-1) | ||
328 | cnt = MAX_LONG_DIGITS-1; | ||
329 | |||
330 | if (copy_from_user(&buf, ubuf, cnt)) | ||
331 | return -EFAULT; | ||
332 | |||
333 | buf[cnt] = 0; | ||
334 | |||
335 | val = simple_strtoul(buf, NULL, 10); | ||
336 | /* | ||
337 | * Enforce a minimum sample period of 100 usecs: | ||
338 | */ | ||
339 | if (val < 100) | ||
340 | val = 100; | ||
341 | |||
342 | mutex_lock(&sample_timer_lock); | ||
343 | stop_stack_timers(); | ||
344 | sample_period = val * 1000; | ||
345 | start_stack_timers(); | ||
346 | mutex_unlock(&sample_timer_lock); | ||
347 | |||
348 | return cnt; | ||
349 | } | ||
350 | |||
351 | static struct file_operations sysprof_sample_fops = { | ||
352 | .read = sysprof_sample_read, | ||
353 | .write = sysprof_sample_write, | ||
354 | }; | ||
355 | |||
356 | void init_tracer_sysprof_debugfs(struct dentry *d_tracer) | ||
357 | { | ||
358 | struct dentry *entry; | ||
359 | |||
360 | entry = debugfs_create_file("sysprof_sample_period", 0644, | ||
361 | d_tracer, NULL, &sysprof_sample_fops); | ||
362 | if (entry) | ||
363 | return; | ||
364 | pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n"); | ||
365 | } | ||