aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorSteven Rostedt (Red Hat) <rostedt@goodmis.org>2013-03-13 23:34:22 -0400
committerSteven Rostedt <rostedt@goodmis.org>2013-03-15 00:36:07 -0400
commit4df297129f622bdc18935c856f42b9ddd18f9f28 (patch)
tree71e6271f1ab2f2ba260df6c44aaed8ba156ac54a /kernel/trace
parentd4ecbfc49b4b1d4b597fb5ba9e4fa25d62f105c5 (diff)
tracing: Remove most or all of stack tracer stack size from stack_max_size
Currently, the depth reported in the stack tracer stack_trace file does not match the stack_max_size file. This is because the stack_max_size includes the overhead of stack tracer itself while the depth does not. The first time a max is triggered, a calculation is not performed that figures out the overhead of the stack tracer and subtracts it from the stack_max_size variable. The overhead is stored and is subtracted from the reported stack size for comparing for a new max. Now the stack_max_size corresponds to the reported depth: # cat stack_max_size 4640 # cat stack_trace Depth Size Location (48 entries) ----- ---- -------- 0) 4640 32 _raw_spin_lock+0x18/0x24 1) 4608 112 ____cache_alloc+0xb7/0x22d 2) 4496 80 kmem_cache_alloc+0x63/0x12f 3) 4416 16 mempool_alloc_slab+0x15/0x17 [...] While testing against and older gcc on x86 that uses mcount instead of fentry, I found that pasing in ip + MCOUNT_INSN_SIZE let the stack trace show one more function deep which was missing before. Cc: stable@vger.kernel.org Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/trace_stack.c75
1 files changed, 54 insertions, 21 deletions
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index ea28e4b0ed58..aab277b67fa9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -20,27 +20,24 @@
20 20
21#define STACK_TRACE_ENTRIES 500 21#define STACK_TRACE_ENTRIES 500
22 22
23/*
24 * If fentry is used, then the function being traced will
25 * jump to fentry directly before it sets up its stack frame.
26 * We need to ignore that one and record the parent. Since
27 * the stack frame for the traced function wasn't set up yet,
28 * the stack_trace wont see the parent. That needs to be added
29 * manually to stack_dump_trace[] as the first element.
30 */
31#ifdef CC_USING_FENTRY 23#ifdef CC_USING_FENTRY
32# define add_func 1 24# define fentry 1
33#else 25#else
34# define add_func 0 26# define fentry 0
35#endif 27#endif
36 28
37static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = 29static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
38 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; 30 { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
39static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; 31static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
40 32
33/*
34 * Reserve one entry for the passed in ip. This will allow
35 * us to remove most or all of the stack size overhead
36 * added by the stack tracer itself.
37 */
41static struct stack_trace max_stack_trace = { 38static struct stack_trace max_stack_trace = {
42 .max_entries = STACK_TRACE_ENTRIES - add_func, 39 .max_entries = STACK_TRACE_ENTRIES - 1,
43 .entries = &stack_dump_trace[add_func], 40 .entries = &stack_dump_trace[1],
44}; 41};
45 42
46static unsigned long max_stack_size; 43static unsigned long max_stack_size;
@@ -58,10 +55,14 @@ check_stack(unsigned long ip, unsigned long *stack)
58{ 55{
59 unsigned long this_size, flags; 56 unsigned long this_size, flags;
60 unsigned long *p, *top, *start; 57 unsigned long *p, *top, *start;
58 static int tracer_frame;
59 int frame_size = ACCESS_ONCE(tracer_frame);
61 int i; 60 int i;
62 61
63 this_size = ((unsigned long)stack) & (THREAD_SIZE-1); 62 this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
64 this_size = THREAD_SIZE - this_size; 63 this_size = THREAD_SIZE - this_size;
64 /* Remove the frame of the tracer */
65 this_size -= frame_size;
65 66
66 if (this_size <= max_stack_size) 67 if (this_size <= max_stack_size)
67 return; 68 return;
@@ -73,6 +74,10 @@ check_stack(unsigned long ip, unsigned long *stack)
73 local_irq_save(flags); 74 local_irq_save(flags);
74 arch_spin_lock(&max_stack_lock); 75 arch_spin_lock(&max_stack_lock);
75 76
77 /* In case another CPU set the tracer_frame on us */
78 if (unlikely(!frame_size))
79 this_size -= tracer_frame;
80
76 /* a race could have already updated it */ 81 /* a race could have already updated it */
77 if (this_size <= max_stack_size) 82 if (this_size <= max_stack_size)
78 goto out; 83 goto out;
@@ -85,15 +90,12 @@ check_stack(unsigned long ip, unsigned long *stack)
85 save_stack_trace(&max_stack_trace); 90 save_stack_trace(&max_stack_trace);
86 91
87 /* 92 /*
88 * When fentry is used, the traced function does not get 93 * Add the passed in ip from the function tracer.
89 * its stack frame set up, and we lose the parent. 94 * Searching for this on the stack will skip over
90 * Add that one in manally. We set up save_stack_trace() 95 * most of the overhead from the stack tracer itself.
91 * to not touch the first element in this case.
92 */ 96 */
93 if (add_func) { 97 stack_dump_trace[0] = ip;
94 stack_dump_trace[0] = ip; 98 max_stack_trace.nr_entries++;
95 max_stack_trace.nr_entries++;
96 }
97 99
98 /* 100 /*
99 * Now find where in the stack these are. 101 * Now find where in the stack these are.
@@ -123,6 +125,18 @@ check_stack(unsigned long ip, unsigned long *stack)
123 found = 1; 125 found = 1;
124 /* Start the search from here */ 126 /* Start the search from here */
125 start = p + 1; 127 start = p + 1;
128 /*
129 * We do not want to show the overhead
130 * of the stack tracer stack in the
131 * max stack. If we haven't figured
132 * out what that is, then figure it out
133 * now.
134 */
135 if (unlikely(!tracer_frame) && i == 1) {
136 tracer_frame = (p - stack) *
137 sizeof(unsigned long);
138 max_stack_size -= tracer_frame;
139 }
126 } 140 }
127 } 141 }
128 142
@@ -149,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
149 if (per_cpu(trace_active, cpu)++ != 0) 163 if (per_cpu(trace_active, cpu)++ != 0)
150 goto out; 164 goto out;
151 165
152 check_stack(parent_ip, &stack); 166 /*
167 * When fentry is used, the traced function does not get
168 * its stack frame set up, and we lose the parent.
169 * The ip is pretty useless because the function tracer
170 * was called before that function set up its stack frame.
171 * In this case, we use the parent ip.
172 *
173 * By adding the return address of either the parent ip
174 * or the current ip we can disregard most of the stack usage
175 * caused by the stack tracer itself.
176 *
177 * The function tracer always reports the address of where the
178 * mcount call was, but the stack will hold the return address.
179 */
180 if (fentry)
181 ip = parent_ip;
182 else
183 ip += MCOUNT_INSN_SIZE;
184
185 check_stack(ip, &stack);
153 186
154 out: 187 out:
155 per_cpu(trace_active, cpu)--; 188 per_cpu(trace_active, cpu)--;