diff options
author | Steven Rostedt (Red Hat) <rostedt@goodmis.org> | 2013-03-13 23:34:22 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2013-03-15 00:36:07 -0400 |
commit | 4df297129f622bdc18935c856f42b9ddd18f9f28 (patch) | |
tree | 71e6271f1ab2f2ba260df6c44aaed8ba156ac54a /kernel/trace | |
parent | d4ecbfc49b4b1d4b597fb5ba9e4fa25d62f105c5 (diff) |
tracing: Remove most or all of stack tracer stack size from stack_max_size
Currently, the depth reported in the stack tracer stack_trace file
does not match the stack_max_size file. This is because the stack_max_size
includes the overhead of stack tracer itself while the depth does not.
The first time a max is triggered, a calculation is not performed that
figures out the overhead of the stack tracer and subtracts it from
the stack_max_size variable. The overhead is stored and is subtracted
from the reported stack size for comparing for a new max.
Now the stack_max_size corresponds to the reported depth:
# cat stack_max_size
4640
# cat stack_trace
Depth Size Location (48 entries)
----- ---- --------
0) 4640 32 _raw_spin_lock+0x18/0x24
1) 4608 112 ____cache_alloc+0xb7/0x22d
2) 4496 80 kmem_cache_alloc+0x63/0x12f
3) 4416 16 mempool_alloc_slab+0x15/0x17
[...]
While testing against and older gcc on x86 that uses mcount instead
of fentry, I found that pasing in ip + MCOUNT_INSN_SIZE let the
stack trace show one more function deep which was missing before.
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/trace_stack.c | 75 |
1 files changed, 54 insertions, 21 deletions
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index ea28e4b0ed58..aab277b67fa9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -20,27 +20,24 @@ | |||
20 | 20 | ||
21 | #define STACK_TRACE_ENTRIES 500 | 21 | #define STACK_TRACE_ENTRIES 500 |
22 | 22 | ||
23 | /* | ||
24 | * If fentry is used, then the function being traced will | ||
25 | * jump to fentry directly before it sets up its stack frame. | ||
26 | * We need to ignore that one and record the parent. Since | ||
27 | * the stack frame for the traced function wasn't set up yet, | ||
28 | * the stack_trace wont see the parent. That needs to be added | ||
29 | * manually to stack_dump_trace[] as the first element. | ||
30 | */ | ||
31 | #ifdef CC_USING_FENTRY | 23 | #ifdef CC_USING_FENTRY |
32 | # define add_func 1 | 24 | # define fentry 1 |
33 | #else | 25 | #else |
34 | # define add_func 0 | 26 | # define fentry 0 |
35 | #endif | 27 | #endif |
36 | 28 | ||
37 | static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = | 29 | static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = |
38 | { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; | 30 | { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; |
39 | static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; | 31 | static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; |
40 | 32 | ||
33 | /* | ||
34 | * Reserve one entry for the passed in ip. This will allow | ||
35 | * us to remove most or all of the stack size overhead | ||
36 | * added by the stack tracer itself. | ||
37 | */ | ||
41 | static struct stack_trace max_stack_trace = { | 38 | static struct stack_trace max_stack_trace = { |
42 | .max_entries = STACK_TRACE_ENTRIES - add_func, | 39 | .max_entries = STACK_TRACE_ENTRIES - 1, |
43 | .entries = &stack_dump_trace[add_func], | 40 | .entries = &stack_dump_trace[1], |
44 | }; | 41 | }; |
45 | 42 | ||
46 | static unsigned long max_stack_size; | 43 | static unsigned long max_stack_size; |
@@ -58,10 +55,14 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
58 | { | 55 | { |
59 | unsigned long this_size, flags; | 56 | unsigned long this_size, flags; |
60 | unsigned long *p, *top, *start; | 57 | unsigned long *p, *top, *start; |
58 | static int tracer_frame; | ||
59 | int frame_size = ACCESS_ONCE(tracer_frame); | ||
61 | int i; | 60 | int i; |
62 | 61 | ||
63 | this_size = ((unsigned long)stack) & (THREAD_SIZE-1); | 62 | this_size = ((unsigned long)stack) & (THREAD_SIZE-1); |
64 | this_size = THREAD_SIZE - this_size; | 63 | this_size = THREAD_SIZE - this_size; |
64 | /* Remove the frame of the tracer */ | ||
65 | this_size -= frame_size; | ||
65 | 66 | ||
66 | if (this_size <= max_stack_size) | 67 | if (this_size <= max_stack_size) |
67 | return; | 68 | return; |
@@ -73,6 +74,10 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
73 | local_irq_save(flags); | 74 | local_irq_save(flags); |
74 | arch_spin_lock(&max_stack_lock); | 75 | arch_spin_lock(&max_stack_lock); |
75 | 76 | ||
77 | /* In case another CPU set the tracer_frame on us */ | ||
78 | if (unlikely(!frame_size)) | ||
79 | this_size -= tracer_frame; | ||
80 | |||
76 | /* a race could have already updated it */ | 81 | /* a race could have already updated it */ |
77 | if (this_size <= max_stack_size) | 82 | if (this_size <= max_stack_size) |
78 | goto out; | 83 | goto out; |
@@ -85,15 +90,12 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
85 | save_stack_trace(&max_stack_trace); | 90 | save_stack_trace(&max_stack_trace); |
86 | 91 | ||
87 | /* | 92 | /* |
88 | * When fentry is used, the traced function does not get | 93 | * Add the passed in ip from the function tracer. |
89 | * its stack frame set up, and we lose the parent. | 94 | * Searching for this on the stack will skip over |
90 | * Add that one in manally. We set up save_stack_trace() | 95 | * most of the overhead from the stack tracer itself. |
91 | * to not touch the first element in this case. | ||
92 | */ | 96 | */ |
93 | if (add_func) { | 97 | stack_dump_trace[0] = ip; |
94 | stack_dump_trace[0] = ip; | 98 | max_stack_trace.nr_entries++; |
95 | max_stack_trace.nr_entries++; | ||
96 | } | ||
97 | 99 | ||
98 | /* | 100 | /* |
99 | * Now find where in the stack these are. | 101 | * Now find where in the stack these are. |
@@ -123,6 +125,18 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
123 | found = 1; | 125 | found = 1; |
124 | /* Start the search from here */ | 126 | /* Start the search from here */ |
125 | start = p + 1; | 127 | start = p + 1; |
128 | /* | ||
129 | * We do not want to show the overhead | ||
130 | * of the stack tracer stack in the | ||
131 | * max stack. If we haven't figured | ||
132 | * out what that is, then figure it out | ||
133 | * now. | ||
134 | */ | ||
135 | if (unlikely(!tracer_frame) && i == 1) { | ||
136 | tracer_frame = (p - stack) * | ||
137 | sizeof(unsigned long); | ||
138 | max_stack_size -= tracer_frame; | ||
139 | } | ||
126 | } | 140 | } |
127 | } | 141 | } |
128 | 142 | ||
@@ -149,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, | |||
149 | if (per_cpu(trace_active, cpu)++ != 0) | 163 | if (per_cpu(trace_active, cpu)++ != 0) |
150 | goto out; | 164 | goto out; |
151 | 165 | ||
152 | check_stack(parent_ip, &stack); | 166 | /* |
167 | * When fentry is used, the traced function does not get | ||
168 | * its stack frame set up, and we lose the parent. | ||
169 | * The ip is pretty useless because the function tracer | ||
170 | * was called before that function set up its stack frame. | ||
171 | * In this case, we use the parent ip. | ||
172 | * | ||
173 | * By adding the return address of either the parent ip | ||
174 | * or the current ip we can disregard most of the stack usage | ||
175 | * caused by the stack tracer itself. | ||
176 | * | ||
177 | * The function tracer always reports the address of where the | ||
178 | * mcount call was, but the stack will hold the return address. | ||
179 | */ | ||
180 | if (fentry) | ||
181 | ip = parent_ip; | ||
182 | else | ||
183 | ip += MCOUNT_INSN_SIZE; | ||
184 | |||
185 | check_stack(ip, &stack); | ||
153 | 186 | ||
154 | out: | 187 | out: |
155 | per_cpu(trace_active, cpu)--; | 188 | per_cpu(trace_active, cpu)--; |