aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/thread-stack.c
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2018-12-21 07:06:19 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-01-02 09:03:17 -0500
commit256d92bc93fd40411a02be5cdba74a7bf91e6e09 (patch)
tree92c8ed48b49b018b802461536265b8772339d6c7 /tools/perf/util/thread-stack.c
parent139f42f3b3b495e61bb2cfef40e1dd5e845e3052 (diff)
perf thread-stack: Fix thread stack processing for the idle task
perf creates a single 'struct thread' to represent the idle task. That is because threads are identified by PID and TID, and the idle task always has PID == TID == 0. However, there are actually separate idle tasks for each CPU. That creates a problem for thread stack processing which assumes that each thread has a single stack, not one stack per CPU. Fix that by passing through the CPU number, and in the case of the idle "thread", pick the thread stack from an array based on the CPU number. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20181221120620.9659-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/thread-stack.c')
-rw-r--r--tools/perf/util/thread-stack.c72
1 files changed, 58 insertions, 14 deletions
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 248ed3945bec..d52f27f373ce 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/log2.h>
18#include <errno.h> 19#include <errno.h>
19#include "thread.h" 20#include "thread.h"
20#include "event.h" 21#include "event.h"
@@ -75,6 +76,16 @@ struct thread_stack {
75 unsigned int arr_sz; 76 unsigned int arr_sz;
76}; 77};
77 78
79/*
80 * Assume pid == tid == 0 identifies the idle task as defined by
81 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
82 * and therefore requires a stack for each cpu.
83 */
84static inline bool thread_stack__per_cpu(struct thread *thread)
85{
86 return !(thread->tid || thread->pid_);
87}
88
78static int thread_stack__grow(struct thread_stack *ts) 89static int thread_stack__grow(struct thread_stack *ts)
79{ 90{
80 struct thread_stack_entry *new_stack; 91 struct thread_stack_entry *new_stack;
@@ -111,13 +122,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
111 return 0; 122 return 0;
112} 123}
113 124
114static struct thread_stack *thread_stack__new(struct thread *thread, 125static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
115 struct call_return_processor *crp) 126 struct call_return_processor *crp)
116{ 127{
117 struct thread_stack *ts = thread->ts, *new_ts; 128 struct thread_stack *ts = thread->ts, *new_ts;
118 unsigned int old_sz = ts ? ts->arr_sz : 0; 129 unsigned int old_sz = ts ? ts->arr_sz : 0;
119 unsigned int new_sz = 1; 130 unsigned int new_sz = 1;
120 131
132 if (thread_stack__per_cpu(thread) && cpu > 0)
133 new_sz = roundup_pow_of_two(cpu + 1);
134
121 if (!ts || new_sz > old_sz) { 135 if (!ts || new_sz > old_sz) {
122 new_ts = calloc(new_sz, sizeof(*ts)); 136 new_ts = calloc(new_sz, sizeof(*ts));
123 if (!new_ts) 137 if (!new_ts)
@@ -130,6 +144,10 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
130 ts = new_ts; 144 ts = new_ts;
131 } 145 }
132 146
147 if (thread_stack__per_cpu(thread) && cpu > 0 &&
148 (unsigned int)cpu < ts->arr_sz)
149 ts += cpu;
150
133 if (!ts->stack && 151 if (!ts->stack &&
134 thread_stack__init(ts, thread, crp)) 152 thread_stack__init(ts, thread, crp))
135 return NULL; 153 return NULL;
@@ -137,9 +155,34 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
137 return ts; 155 return ts;
138} 156}
139 157
140static inline struct thread_stack *thread__stack(struct thread *thread) 158static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
141{ 159{
142 return thread ? thread->ts : NULL; 160 struct thread_stack *ts = thread->ts;
161
162 if (cpu < 0)
163 cpu = 0;
164
165 if (!ts || (unsigned int)cpu >= ts->arr_sz)
166 return NULL;
167
168 ts += cpu;
169
170 if (!ts->stack)
171 return NULL;
172
173 return ts;
174}
175
176static inline struct thread_stack *thread__stack(struct thread *thread,
177 int cpu)
178{
179 if (!thread)
180 return NULL;
181
182 if (thread_stack__per_cpu(thread))
183 return thread__cpu_stack(thread, cpu);
184
185 return thread->ts;
143} 186}
144 187
145static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 188static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
@@ -270,16 +313,16 @@ int thread_stack__flush(struct thread *thread)
270 return err; 313 return err;
271} 314}
272 315
273int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 316int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
274 u64 to_ip, u16 insn_len, u64 trace_nr) 317 u64 to_ip, u16 insn_len, u64 trace_nr)
275{ 318{
276 struct thread_stack *ts = thread__stack(thread); 319 struct thread_stack *ts = thread__stack(thread, cpu);
277 320
278 if (!thread) 321 if (!thread)
279 return -EINVAL; 322 return -EINVAL;
280 323
281 if (!ts) { 324 if (!ts) {
282 ts = thread_stack__new(thread, NULL); 325 ts = thread_stack__new(thread, cpu, NULL);
283 if (!ts) { 326 if (!ts) {
284 pr_warning("Out of memory: no thread stack\n"); 327 pr_warning("Out of memory: no thread stack\n");
285 return -ENOMEM; 328 return -ENOMEM;
@@ -329,9 +372,9 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
329 return 0; 372 return 0;
330} 373}
331 374
332void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) 375void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
333{ 376{
334 struct thread_stack *ts = thread__stack(thread); 377 struct thread_stack *ts = thread__stack(thread, cpu);
335 378
336 if (!ts) 379 if (!ts)
337 return; 380 return;
@@ -375,10 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
375 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 418 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
376} 419}
377 420
378void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 421void thread_stack__sample(struct thread *thread, int cpu,
422 struct ip_callchain *chain,
379 size_t sz, u64 ip, u64 kernel_start) 423 size_t sz, u64 ip, u64 kernel_start)
380{ 424{
381 struct thread_stack *ts = thread__stack(thread); 425 struct thread_stack *ts = thread__stack(thread, cpu);
382 u64 context = callchain_context(ip, kernel_start); 426 u64 context = callchain_context(ip, kernel_start);
383 u64 last_context; 427 u64 last_context;
384 size_t i, j; 428 size_t i, j;
@@ -651,7 +695,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
651 struct addr_location *to_al, u64 ref, 695 struct addr_location *to_al, u64 ref,
652 struct call_return_processor *crp) 696 struct call_return_processor *crp)
653{ 697{
654 struct thread_stack *ts = thread__stack(thread); 698 struct thread_stack *ts = thread__stack(thread, sample->cpu);
655 int err = 0; 699 int err = 0;
656 700
657 if (ts && !ts->crp) { 701 if (ts && !ts->crp) {
@@ -661,7 +705,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
661 } 705 }
662 706
663 if (!ts) { 707 if (!ts) {
664 ts = thread_stack__new(thread, crp); 708 ts = thread_stack__new(thread, sample->cpu, crp);
665 if (!ts) 709 if (!ts)
666 return -ENOMEM; 710 return -ENOMEM;
667 ts->comm = comm; 711 ts->comm = comm;
@@ -726,9 +770,9 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
726 return err; 770 return err;
727} 771}
728 772
729size_t thread_stack__depth(struct thread *thread) 773size_t thread_stack__depth(struct thread *thread, int cpu)
730{ 774{
731 struct thread_stack *ts = thread__stack(thread); 775 struct thread_stack *ts = thread__stack(thread, cpu);
732 776
733 if (!ts) 777 if (!ts)
734 return 0; 778 return 0;