aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2016-06-23 14:03:47 -0400
committerSteven Rostedt <rostedt@goodmis.org>2016-06-23 18:48:56 -0400
commitbe54f69c26193de31053190761e521903b89d098 (patch)
tree001283d319ccad1acc68ba950476fd7ccac540cb
parent9a51933e360897d9b3867c9b09dd5ccf7493e97e (diff)
tracing: Skip more functions when doing stack tracing of events
# echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace <idle>-0 [002] d..2 1982.525169: <stack trace> => save_stack_trace => __ftrace_trace_stack => trace_buffer_unlock_commit_regs => event_trigger_unlock_commit => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The above shows that we are seeing 6 functions before ever making it to the caller of the sched_switch event. # echo stacktrace > events/sched/sched_switch/trigger # cat trace <idle>-0 [002] d..3 2146.335208: <stack trace> => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The stacktrace trigger isn't as bad, because it adds its own skip to the stacktracing, but still has two events extra. One issue is that if the stacktrace passes its own "regs" then there should be no addition to the skip, as the regs will not include the functions being called. This was an issue that was fixed by commit 7717c6be6999 ("tracing: Fix stacktrace skip depth in trace_buffer_unlock_commit_regs()" as adding the skip number for kprobes made the probes not have any stack at all. But since this is only an issue when regs is being used, a skip should be added if regs is NULL. Now we have: # echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace <idle>-0 [000] d..2 1297.676333: <stack trace> => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => rest_init => start_kernel => x86_64_start_reservations => x86_64_start_kernel # echo stacktrace > events/sched/sched_switch/trigger # cat trace <idle>-0 [002] d..3 1370.759745: <stack trace> => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary And kprobes are not touched. Reported-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--kernel/trace/trace.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 45e6747589c6..3d9f31b576f3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2118,7 +2118,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2118{ 2118{
2119 __buffer_unlock_commit(buffer, event); 2119 __buffer_unlock_commit(buffer, event);
2120 2120
2121 ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); 2121 /*
2122 * If regs is not set, then skip the following callers:
2123 * trace_buffer_unlock_commit_regs
2124 * event_trigger_unlock_commit
2125 * trace_event_buffer_commit
2126 * trace_event_raw_event_sched_switch
2127 * Note, we can still get here via blktrace, wakeup tracer
2128 * and mmiotrace, but that's ok if they lose a function or
2129 * two. They are that meaningful.
2130 */
2131 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2122 ftrace_trace_userstack(buffer, flags, pc); 2132 ftrace_trace_userstack(buffer, flags, pc);
2123} 2133}
2124 2134
@@ -2169,6 +2179,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
2169 trace.skip = skip; 2179 trace.skip = skip;
2170 2180
2171 /* 2181 /*
2182 * Add two, for this function and the call to save_stack_trace()
2183 * If regs is set, then these functions will not be in the way.
2184 */
2185 if (!regs)
2186 trace.skip += 2;
2187
2188 /*
2172 * Since events can happen in NMIs there's no safe way to 2189 * Since events can happen in NMIs there's no safe way to
2173 * use the per cpu ftrace_stacks. We reserve it and if an interrupt 2190 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2174 * or NMI comes in, it will just have to use the default 2191 * or NMI comes in, it will just have to use the default