aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace/fgraph.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-31 14:46:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-31 14:46:59 -0500
commit495d714ad140e1732e66c45d0409054b24c1a0d6 (patch)
tree373ec6619adea47d848d36f140b32def27164bbd /kernel/trace/fgraph.c
parentf12e840c819bab42621685558a01d3f46ab9a226 (diff)
parent3d739c1f6156c70eb0548aa288dcfbac9e0bd162 (diff)
Merge tag 'trace-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing updates from Steven Rostedt: - Rework of the kprobe/uprobe and synthetic events to consolidate all the dynamic event code. This will make changes in the future easier. - Partial rewrite of the function graph tracing infrastructure. This will allow for multiple users of hooking onto functions to get the callback (return) of the function. This is the ground work for having kprobes and function graph tracer using one code base. - Clean up of the histogram code that will facilitate adding more features to the histograms in the future. - Addition of str_has_prefix() and a few use cases. There currently is a similar function strstart() that is used in a few places, but only returns a bool and not a length. These instances will be removed in the future to use str_has_prefix() instead. - A few other various clean ups as well. * tag 'trace-v4.21' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (57 commits) tracing: Use the return of str_has_prefix() to remove open coded numbers tracing: Have the historgram use the result of str_has_prefix() for len of prefix tracing: Use str_has_prefix() instead of using fixed sizes tracing: Use str_has_prefix() helper for histogram code string.h: Add str_has_prefix() helper function tracing: Make function ‘ftrace_exports’ static tracing: Simplify printf'ing in seq_print_sym tracing: Avoid -Wformat-nonliteral warning tracing: Merge seq_print_sym_short() and seq_print_sym_offset() tracing: Add hist trigger comments for variable-related fields tracing: Remove hist trigger synth_var_refs tracing: Use hist trigger's var_ref array to destroy var_refs tracing: Remove open-coding of hist trigger var_ref management tracing: Use var_refs[] for hist trigger reference checking tracing: Change strlen to sizeof for hist trigger static strings tracing: Remove unnecessary hist trigger struct field tracing: Fix ftrace_graph_get_ret_stack() to use task and not current seq_buf: Use size_t for len in seq_buf_puts() seq_buf: Make seq_buf_puts() null-terminate the buffer arm64: Use ftrace_graph_get_ret_stack() instead of curr_ret_stack ...
Diffstat (limited to 'kernel/trace/fgraph.c')
-rw-r--r--kernel/trace/fgraph.c626
1 files changed, 626 insertions, 0 deletions
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
new file mode 100644
index 000000000000..8dfd5021b933
--- /dev/null
+++ b/kernel/trace/fgraph.c
@@ -0,0 +1,626 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Infrastructure to took into function calls and returns.
4 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 * Mostly borrowed from function tracer which
6 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
7 *
8 * Highly modified by Steven Rostedt (VMware).
9 */
10#include <linux/suspend.h>
11#include <linux/ftrace.h>
12#include <linux/slab.h>
13
14#include <trace/events/sched.h>
15
16#include "ftrace_internal.h"
17
18#ifdef CONFIG_DYNAMIC_FTRACE
19#define ASSIGN_OPS_HASH(opsname, val) \
20 .func_hash = val, \
21 .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
22#else
23#define ASSIGN_OPS_HASH(opsname, val)
24#endif
25
26static bool kill_ftrace_graph;
27int ftrace_graph_active;
28
29/* Both enabled by default (can be cleared by function_graph tracer flags */
30static bool fgraph_sleep_time = true;
31
32/**
33 * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
34 *
35 * ftrace_graph_stop() is called when a severe error is detected in
36 * the function graph tracing. This function is called by the critical
37 * paths of function graph to keep those paths from doing any more harm.
38 */
39bool ftrace_graph_is_dead(void)
40{
41 return kill_ftrace_graph;
42}
43
44/**
45 * ftrace_graph_stop - set to permanently disable function graph tracincg
46 *
47 * In case of an error int function graph tracing, this is called
48 * to try to keep function graph tracing from causing any more harm.
49 * Usually this is pretty severe and this is called to try to at least
50 * get a warning out to the user.
51 */
52void ftrace_graph_stop(void)
53{
54 kill_ftrace_graph = true;
55}
56
57/* Add a function return address to the trace stack on thread info.*/
58static int
59ftrace_push_return_trace(unsigned long ret, unsigned long func,
60 unsigned long frame_pointer, unsigned long *retp)
61{
62 unsigned long long calltime;
63 int index;
64
65 if (unlikely(ftrace_graph_is_dead()))
66 return -EBUSY;
67
68 if (!current->ret_stack)
69 return -EBUSY;
70
71 /*
72 * We must make sure the ret_stack is tested before we read
73 * anything else.
74 */
75 smp_rmb();
76
77 /* The return trace stack is full */
78 if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
79 atomic_inc(&current->trace_overrun);
80 return -EBUSY;
81 }
82
83 calltime = trace_clock_local();
84
85 index = ++current->curr_ret_stack;
86 barrier();
87 current->ret_stack[index].ret = ret;
88 current->ret_stack[index].func = func;
89 current->ret_stack[index].calltime = calltime;
90#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
91 current->ret_stack[index].fp = frame_pointer;
92#endif
93#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
94 current->ret_stack[index].retp = retp;
95#endif
96 return 0;
97}
98
99int function_graph_enter(unsigned long ret, unsigned long func,
100 unsigned long frame_pointer, unsigned long *retp)
101{
102 struct ftrace_graph_ent trace;
103
104 trace.func = func;
105 trace.depth = ++current->curr_ret_depth;
106
107 if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
108 goto out;
109
110 /* Only trace if the calling function expects to */
111 if (!ftrace_graph_entry(&trace))
112 goto out_ret;
113
114 return 0;
115 out_ret:
116 current->curr_ret_stack--;
117 out:
118 current->curr_ret_depth--;
119 return -EBUSY;
120}
121
122/* Retrieve a function return address to the trace stack on thread info.*/
123static void
124ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
125 unsigned long frame_pointer)
126{
127 int index;
128
129 index = current->curr_ret_stack;
130
131 if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
132 ftrace_graph_stop();
133 WARN_ON(1);
134 /* Might as well panic, otherwise we have no where to go */
135 *ret = (unsigned long)panic;
136 return;
137 }
138
139#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
140 /*
141 * The arch may choose to record the frame pointer used
142 * and check it here to make sure that it is what we expect it
143 * to be. If gcc does not set the place holder of the return
144 * address in the frame pointer, and does a copy instead, then
145 * the function graph trace will fail. This test detects this
146 * case.
147 *
148 * Currently, x86_32 with optimize for size (-Os) makes the latest
149 * gcc do the above.
150 *
151 * Note, -mfentry does not use frame pointers, and this test
152 * is not needed if CC_USING_FENTRY is set.
153 */
154 if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
155 ftrace_graph_stop();
156 WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
157 " from func %ps return to %lx\n",
158 current->ret_stack[index].fp,
159 frame_pointer,
160 (void *)current->ret_stack[index].func,
161 current->ret_stack[index].ret);
162 *ret = (unsigned long)panic;
163 return;
164 }
165#endif
166
167 *ret = current->ret_stack[index].ret;
168 trace->func = current->ret_stack[index].func;
169 trace->calltime = current->ret_stack[index].calltime;
170 trace->overrun = atomic_read(&current->trace_overrun);
171 trace->depth = current->curr_ret_depth--;
172 /*
173 * We still want to trace interrupts coming in if
174 * max_depth is set to 1. Make sure the decrement is
175 * seen before ftrace_graph_return.
176 */
177 barrier();
178}
179
180/*
181 * Hibernation protection.
182 * The state of the current task is too much unstable during
183 * suspend/restore to disk. We want to protect against that.
184 */
185static int
186ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
187 void *unused)
188{
189 switch (state) {
190 case PM_HIBERNATION_PREPARE:
191 pause_graph_tracing();
192 break;
193
194 case PM_POST_HIBERNATION:
195 unpause_graph_tracing();
196 break;
197 }
198 return NOTIFY_DONE;
199}
200
201static struct notifier_block ftrace_suspend_notifier = {
202 .notifier_call = ftrace_suspend_notifier_call,
203};
204
205/*
206 * Send the trace to the ring-buffer.
207 * @return the original return address.
208 */
209unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
210{
211 struct ftrace_graph_ret trace;
212 unsigned long ret;
213
214 ftrace_pop_return_trace(&trace, &ret, frame_pointer);
215 trace.rettime = trace_clock_local();
216 ftrace_graph_return(&trace);
217 /*
218 * The ftrace_graph_return() may still access the current
219 * ret_stack structure, we need to make sure the update of
220 * curr_ret_stack is after that.
221 */
222 barrier();
223 current->curr_ret_stack--;
224
225 if (unlikely(!ret)) {
226 ftrace_graph_stop();
227 WARN_ON(1);
228 /* Might as well panic. What else to do? */
229 ret = (unsigned long)panic;
230 }
231
232 return ret;
233}
234
235/**
236 * ftrace_graph_get_ret_stack - return the entry of the shadow stack
237 * @task: The task to read the shadow stack from
238 * @idx: Index down the shadow stack
239 *
240 * Return the ret_struct on the shadow stack of the @task at the
241 * call graph at @idx starting with zero. If @idx is zero, it
242 * will return the last saved ret_stack entry. If it is greater than
243 * zero, it will return the corresponding ret_stack for the depth
244 * of saved return addresses.
245 */
246struct ftrace_ret_stack *
247ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
248{
249 idx = task->curr_ret_stack - idx;
250
251 if (idx >= 0 && idx <= task->curr_ret_stack)
252 return &task->ret_stack[idx];
253
254 return NULL;
255}
256
257/**
258 * ftrace_graph_ret_addr - convert a potentially modified stack return address
259 * to its original value
260 *
261 * This function can be called by stack unwinding code to convert a found stack
262 * return address ('ret') to its original value, in case the function graph
263 * tracer has modified it to be 'return_to_handler'. If the address hasn't
264 * been modified, the unchanged value of 'ret' is returned.
265 *
266 * 'idx' is a state variable which should be initialized by the caller to zero
267 * before the first call.
268 *
269 * 'retp' is a pointer to the return address on the stack. It's ignored if
270 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
271 */
272#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
273unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
274 unsigned long ret, unsigned long *retp)
275{
276 int index = task->curr_ret_stack;
277 int i;
278
279 if (ret != (unsigned long)return_to_handler)
280 return ret;
281
282 if (index < 0)
283 return ret;
284
285 for (i = 0; i <= index; i++)
286 if (task->ret_stack[i].retp == retp)
287 return task->ret_stack[i].ret;
288
289 return ret;
290}
291#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
292unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
293 unsigned long ret, unsigned long *retp)
294{
295 int task_idx;
296
297 if (ret != (unsigned long)return_to_handler)
298 return ret;
299
300 task_idx = task->curr_ret_stack;
301
302 if (!task->ret_stack || task_idx < *idx)
303 return ret;
304
305 task_idx -= *idx;
306 (*idx)++;
307
308 return task->ret_stack[task_idx].ret;
309}
310#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
311
312static struct ftrace_ops graph_ops = {
313 .func = ftrace_stub,
314 .flags = FTRACE_OPS_FL_RECURSION_SAFE |
315 FTRACE_OPS_FL_INITIALIZED |
316 FTRACE_OPS_FL_PID |
317 FTRACE_OPS_FL_STUB,
318#ifdef FTRACE_GRAPH_TRAMP_ADDR
319 .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
320 /* trampoline_size is only needed for dynamically allocated tramps */
321#endif
322 ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
323};
324
325void ftrace_graph_sleep_time_control(bool enable)
326{
327 fgraph_sleep_time = enable;
328}
329
330int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
331{
332 return 0;
333}
334
335/* The callbacks that hook a function */
336trace_func_graph_ret_t ftrace_graph_return =
337 (trace_func_graph_ret_t)ftrace_stub;
338trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
339static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
340
341/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
342static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
343{
344 int i;
345 int ret = 0;
346 int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
347 struct task_struct *g, *t;
348
349 for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
350 ret_stack_list[i] =
351 kmalloc_array(FTRACE_RETFUNC_DEPTH,
352 sizeof(struct ftrace_ret_stack),
353 GFP_KERNEL);
354 if (!ret_stack_list[i]) {
355 start = 0;
356 end = i;
357 ret = -ENOMEM;
358 goto free;
359 }
360 }
361
362 read_lock(&tasklist_lock);
363 do_each_thread(g, t) {
364 if (start == end) {
365 ret = -EAGAIN;
366 goto unlock;
367 }
368
369 if (t->ret_stack == NULL) {
370 atomic_set(&t->tracing_graph_pause, 0);
371 atomic_set(&t->trace_overrun, 0);
372 t->curr_ret_stack = -1;
373 t->curr_ret_depth = -1;
374 /* Make sure the tasks see the -1 first: */
375 smp_wmb();
376 t->ret_stack = ret_stack_list[start++];
377 }
378 } while_each_thread(g, t);
379
380unlock:
381 read_unlock(&tasklist_lock);
382free:
383 for (i = start; i < end; i++)
384 kfree(ret_stack_list[i]);
385 return ret;
386}
387
388static void
389ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
390 struct task_struct *prev, struct task_struct *next)
391{
392 unsigned long long timestamp;
393 int index;
394
395 /*
396 * Does the user want to count the time a function was asleep.
397 * If so, do not update the time stamps.
398 */
399 if (fgraph_sleep_time)
400 return;
401
402 timestamp = trace_clock_local();
403
404 prev->ftrace_timestamp = timestamp;
405
406 /* only process tasks that we timestamped */
407 if (!next->ftrace_timestamp)
408 return;
409
410 /*
411 * Update all the counters in next to make up for the
412 * time next was sleeping.
413 */
414 timestamp -= next->ftrace_timestamp;
415
416 for (index = next->curr_ret_stack; index >= 0; index--)
417 next->ret_stack[index].calltime += timestamp;
418}
419
420static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
421{
422 if (!ftrace_ops_test(&global_ops, trace->func, NULL))
423 return 0;
424 return __ftrace_graph_entry(trace);
425}
426
427/*
428 * The function graph tracer should only trace the functions defined
429 * by set_ftrace_filter and set_ftrace_notrace. If another function
430 * tracer ops is registered, the graph tracer requires testing the
431 * function against the global ops, and not just trace any function
432 * that any ftrace_ops registered.
433 */
434void update_function_graph_func(void)
435{
436 struct ftrace_ops *op;
437 bool do_test = false;
438
439 /*
440 * The graph and global ops share the same set of functions
441 * to test. If any other ops is on the list, then
442 * the graph tracing needs to test if its the function
443 * it should call.
444 */
445 do_for_each_ftrace_op(op, ftrace_ops_list) {
446 if (op != &global_ops && op != &graph_ops &&
447 op != &ftrace_list_end) {
448 do_test = true;
449 /* in double loop, break out with goto */
450 goto out;
451 }
452 } while_for_each_ftrace_op(op);
453 out:
454 if (do_test)
455 ftrace_graph_entry = ftrace_graph_entry_test;
456 else
457 ftrace_graph_entry = __ftrace_graph_entry;
458}
459
460static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
461
462static void
463graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
464{
465 atomic_set(&t->tracing_graph_pause, 0);
466 atomic_set(&t->trace_overrun, 0);
467 t->ftrace_timestamp = 0;
468 /* make curr_ret_stack visible before we add the ret_stack */
469 smp_wmb();
470 t->ret_stack = ret_stack;
471}
472
473/*
474 * Allocate a return stack for the idle task. May be the first
475 * time through, or it may be done by CPU hotplug online.
476 */
477void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
478{
479 t->curr_ret_stack = -1;
480 t->curr_ret_depth = -1;
481 /*
482 * The idle task has no parent, it either has its own
483 * stack or no stack at all.
484 */
485 if (t->ret_stack)
486 WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
487
488 if (ftrace_graph_active) {
489 struct ftrace_ret_stack *ret_stack;
490
491 ret_stack = per_cpu(idle_ret_stack, cpu);
492 if (!ret_stack) {
493 ret_stack =
494 kmalloc_array(FTRACE_RETFUNC_DEPTH,
495 sizeof(struct ftrace_ret_stack),
496 GFP_KERNEL);
497 if (!ret_stack)
498 return;
499 per_cpu(idle_ret_stack, cpu) = ret_stack;
500 }
501 graph_init_task(t, ret_stack);
502 }
503}
504
505/* Allocate a return stack for newly created task */
506void ftrace_graph_init_task(struct task_struct *t)
507{
508 /* Make sure we do not use the parent ret_stack */
509 t->ret_stack = NULL;
510 t->curr_ret_stack = -1;
511 t->curr_ret_depth = -1;
512
513 if (ftrace_graph_active) {
514 struct ftrace_ret_stack *ret_stack;
515
516 ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
517 sizeof(struct ftrace_ret_stack),
518 GFP_KERNEL);
519 if (!ret_stack)
520 return;
521 graph_init_task(t, ret_stack);
522 }
523}
524
525void ftrace_graph_exit_task(struct task_struct *t)
526{
527 struct ftrace_ret_stack *ret_stack = t->ret_stack;
528
529 t->ret_stack = NULL;
530 /* NULL must become visible to IRQs before we free it: */
531 barrier();
532
533 kfree(ret_stack);
534}
535
536/* Allocate a return stack for each task */
537static int start_graph_tracing(void)
538{
539 struct ftrace_ret_stack **ret_stack_list;
540 int ret, cpu;
541
542 ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
543 sizeof(struct ftrace_ret_stack *),
544 GFP_KERNEL);
545
546 if (!ret_stack_list)
547 return -ENOMEM;
548
549 /* The cpu_boot init_task->ret_stack will never be freed */
550 for_each_online_cpu(cpu) {
551 if (!idle_task(cpu)->ret_stack)
552 ftrace_graph_init_idle_task(idle_task(cpu), cpu);
553 }
554
555 do {
556 ret = alloc_retstack_tasklist(ret_stack_list);
557 } while (ret == -EAGAIN);
558
559 if (!ret) {
560 ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
561 if (ret)
562 pr_info("ftrace_graph: Couldn't activate tracepoint"
563 " probe to kernel_sched_switch\n");
564 }
565
566 kfree(ret_stack_list);
567 return ret;
568}
569
570int register_ftrace_graph(struct fgraph_ops *gops)
571{
572 int ret = 0;
573
574 mutex_lock(&ftrace_lock);
575
576 /* we currently allow only one tracer registered at a time */
577 if (ftrace_graph_active) {
578 ret = -EBUSY;
579 goto out;
580 }
581
582 register_pm_notifier(&ftrace_suspend_notifier);
583
584 ftrace_graph_active++;
585 ret = start_graph_tracing();
586 if (ret) {
587 ftrace_graph_active--;
588 goto out;
589 }
590
591 ftrace_graph_return = gops->retfunc;
592
593 /*
594 * Update the indirect function to the entryfunc, and the
595 * function that gets called to the entry_test first. Then
596 * call the update fgraph entry function to determine if
597 * the entryfunc should be called directly or not.
598 */
599 __ftrace_graph_entry = gops->entryfunc;
600 ftrace_graph_entry = ftrace_graph_entry_test;
601 update_function_graph_func();
602
603 ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
604out:
605 mutex_unlock(&ftrace_lock);
606 return ret;
607}
608
609void unregister_ftrace_graph(struct fgraph_ops *gops)
610{
611 mutex_lock(&ftrace_lock);
612
613 if (unlikely(!ftrace_graph_active))
614 goto out;
615
616 ftrace_graph_active--;
617 ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
618 ftrace_graph_entry = ftrace_graph_entry_stub;
619 __ftrace_graph_entry = ftrace_graph_entry_stub;
620 ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
621 unregister_pm_notifier(&ftrace_suspend_notifier);
622 unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
623
624 out:
625 mutex_unlock(&ftrace_lock);
626}