aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdrian Hunter <adrian.hunter@intel.com>2018-12-21 07:06:19 -0500
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-01-02 09:03:17 -0500
commit256d92bc93fd40411a02be5cdba74a7bf91e6e09 (patch)
tree92c8ed48b49b018b802461536265b8772339d6c7
parent139f42f3b3b495e61bb2cfef40e1dd5e845e3052 (diff)
perf thread-stack: Fix thread stack processing for the idle task
perf creates a single 'struct thread' to represent the idle task. That is because threads are identified by PID and TID, and the idle task always has PID == TID == 0. However, there are actually separate idle tasks for each CPU. That creates a problem for thread stack processing which assumes that each thread has a single stack, not one stack per CPU. Fix that by passing through the CPU number, and in the case of the idle "thread", pick the thread stack from an array based on the CPU number. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: http://lkml.kernel.org/r/20181221120620.9659-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/thread-stack.c72
-rw-r--r--tools/perf/util/thread-stack.h8
5 files changed, 69 insertions, 25 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 88d52ed85ffc..d079f36d342d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1182,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
1182 struct addr_location *al, FILE *fp) 1182 struct addr_location *al, FILE *fp)
1183{ 1183{
1184 struct perf_event_attr *attr = &evsel->attr; 1184 struct perf_event_attr *attr = &evsel->attr;
1185 size_t depth = thread_stack__depth(thread); 1185 size_t depth = thread_stack__depth(thread, sample->cpu);
1186 const char *name = NULL; 1186 const char *name = NULL;
1187 static int spacing; 1187 static int spacing;
1188 int len = 0; 1188 int len = 0;
@@ -1716,7 +1716,7 @@ static bool show_event(struct perf_sample *sample,
1716 struct thread *thread, 1716 struct thread *thread,
1717 struct addr_location *al) 1717 struct addr_location *al)
1718{ 1718{
1719 int depth = thread_stack__depth(thread); 1719 int depth = thread_stack__depth(thread, sample->cpu);
1720 1720
1721 if (!symbol_conf.graph_function) 1721 if (!symbol_conf.graph_function)
1722 return true; 1722 return true;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7b27d77306c2..ee6ca65f81f4 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
451 continue; 451 continue;
452 intel_bts_get_branch_type(btsq, branch); 452 intel_bts_get_branch_type(btsq, branch);
453 if (btsq->bts->synth_opts.thread_stack) 453 if (btsq->bts->synth_opts.thread_stack)
454 thread_stack__event(thread, btsq->sample_flags, 454 thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
455 le64_to_cpu(branch->from), 455 le64_to_cpu(branch->from),
456 le64_to_cpu(branch->to), 456 le64_to_cpu(branch->to),
457 btsq->intel_pt_insn.length, 457 btsq->intel_pt_insn.length,
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
523 !btsq->bts->synth_opts.thread_stack && thread && 523 !btsq->bts->synth_opts.thread_stack && thread &&
524 (!old_buffer || btsq->bts->sampling_mode || 524 (!old_buffer || btsq->bts->sampling_mode ||
525 (btsq->bts->snapshot_mode && !buffer->consecutive))) 525 (btsq->bts->snapshot_mode && !buffer->consecutive)))
526 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); 526 thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
527 527
528 err = intel_bts_process_buffer(btsq, buffer, thread); 528 err = intel_bts_process_buffer(btsq, buffer, thread);
529 529
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 149ff361ca78..2e72373ec6df 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
1174 intel_pt_prep_b_sample(pt, ptq, event, sample); 1174 intel_pt_prep_b_sample(pt, ptq, event, sample);
1175 1175
1176 if (pt->synth_opts.callchain) { 1176 if (pt->synth_opts.callchain) {
1177 thread_stack__sample(ptq->thread, ptq->chain, 1177 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1178 pt->synth_opts.callchain_sz + 1, 1178 pt->synth_opts.callchain_sz + 1,
1179 sample->ip, pt->kernel_start); 1179 sample->ip, pt->kernel_start);
1180 sample->callchain = ptq->chain; 1180 sample->callchain = ptq->chain;
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1526 return 0; 1526 return 0;
1527 1527
1528 if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) 1528 if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1529 thread_stack__event(ptq->thread, ptq->flags, state->from_ip, 1529 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
1530 state->to_ip, ptq->insn_len, 1530 state->to_ip, ptq->insn_len,
1531 state->trace_nr); 1531 state->trace_nr);
1532 else 1532 else
1533 thread_stack__set_trace_nr(ptq->thread, state->trace_nr); 1533 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
1534 1534
1535 if (pt->sample_branches) { 1535 if (pt->sample_branches) {
1536 err = intel_pt_synth_branch_sample(ptq); 1536 err = intel_pt_synth_branch_sample(ptq);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 248ed3945bec..d52f27f373ce 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/log2.h>
18#include <errno.h> 19#include <errno.h>
19#include "thread.h" 20#include "thread.h"
20#include "event.h" 21#include "event.h"
@@ -75,6 +76,16 @@ struct thread_stack {
75 unsigned int arr_sz; 76 unsigned int arr_sz;
76}; 77};
77 78
79/*
80 * Assume pid == tid == 0 identifies the idle task as defined by
81 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
82 * and therefore requires a stack for each cpu.
83 */
84static inline bool thread_stack__per_cpu(struct thread *thread)
85{
86 return !(thread->tid || thread->pid_);
87}
88
78static int thread_stack__grow(struct thread_stack *ts) 89static int thread_stack__grow(struct thread_stack *ts)
79{ 90{
80 struct thread_stack_entry *new_stack; 91 struct thread_stack_entry *new_stack;
@@ -111,13 +122,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
111 return 0; 122 return 0;
112} 123}
113 124
114static struct thread_stack *thread_stack__new(struct thread *thread, 125static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
115 struct call_return_processor *crp) 126 struct call_return_processor *crp)
116{ 127{
117 struct thread_stack *ts = thread->ts, *new_ts; 128 struct thread_stack *ts = thread->ts, *new_ts;
118 unsigned int old_sz = ts ? ts->arr_sz : 0; 129 unsigned int old_sz = ts ? ts->arr_sz : 0;
119 unsigned int new_sz = 1; 130 unsigned int new_sz = 1;
120 131
132 if (thread_stack__per_cpu(thread) && cpu > 0)
133 new_sz = roundup_pow_of_two(cpu + 1);
134
121 if (!ts || new_sz > old_sz) { 135 if (!ts || new_sz > old_sz) {
122 new_ts = calloc(new_sz, sizeof(*ts)); 136 new_ts = calloc(new_sz, sizeof(*ts));
123 if (!new_ts) 137 if (!new_ts)
@@ -130,6 +144,10 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
130 ts = new_ts; 144 ts = new_ts;
131 } 145 }
132 146
147 if (thread_stack__per_cpu(thread) && cpu > 0 &&
148 (unsigned int)cpu < ts->arr_sz)
149 ts += cpu;
150
133 if (!ts->stack && 151 if (!ts->stack &&
134 thread_stack__init(ts, thread, crp)) 152 thread_stack__init(ts, thread, crp))
135 return NULL; 153 return NULL;
@@ -137,9 +155,34 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
137 return ts; 155 return ts;
138} 156}
139 157
140static inline struct thread_stack *thread__stack(struct thread *thread) 158static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
141{ 159{
142 return thread ? thread->ts : NULL; 160 struct thread_stack *ts = thread->ts;
161
162 if (cpu < 0)
163 cpu = 0;
164
165 if (!ts || (unsigned int)cpu >= ts->arr_sz)
166 return NULL;
167
168 ts += cpu;
169
170 if (!ts->stack)
171 return NULL;
172
173 return ts;
174}
175
176static inline struct thread_stack *thread__stack(struct thread *thread,
177 int cpu)
178{
179 if (!thread)
180 return NULL;
181
182 if (thread_stack__per_cpu(thread))
183 return thread__cpu_stack(thread, cpu);
184
185 return thread->ts;
143} 186}
144 187
145static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 188static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
@@ -270,16 +313,16 @@ int thread_stack__flush(struct thread *thread)
270 return err; 313 return err;
271} 314}
272 315
273int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 316int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
274 u64 to_ip, u16 insn_len, u64 trace_nr) 317 u64 to_ip, u16 insn_len, u64 trace_nr)
275{ 318{
276 struct thread_stack *ts = thread__stack(thread); 319 struct thread_stack *ts = thread__stack(thread, cpu);
277 320
278 if (!thread) 321 if (!thread)
279 return -EINVAL; 322 return -EINVAL;
280 323
281 if (!ts) { 324 if (!ts) {
282 ts = thread_stack__new(thread, NULL); 325 ts = thread_stack__new(thread, cpu, NULL);
283 if (!ts) { 326 if (!ts) {
284 pr_warning("Out of memory: no thread stack\n"); 327 pr_warning("Out of memory: no thread stack\n");
285 return -ENOMEM; 328 return -ENOMEM;
@@ -329,9 +372,9 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
329 return 0; 372 return 0;
330} 373}
331 374
332void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) 375void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
333{ 376{
334 struct thread_stack *ts = thread__stack(thread); 377 struct thread_stack *ts = thread__stack(thread, cpu);
335 378
336 if (!ts) 379 if (!ts)
337 return; 380 return;
@@ -375,10 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
375 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 418 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
376} 419}
377 420
378void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 421void thread_stack__sample(struct thread *thread, int cpu,
422 struct ip_callchain *chain,
379 size_t sz, u64 ip, u64 kernel_start) 423 size_t sz, u64 ip, u64 kernel_start)
380{ 424{
381 struct thread_stack *ts = thread__stack(thread); 425 struct thread_stack *ts = thread__stack(thread, cpu);
382 u64 context = callchain_context(ip, kernel_start); 426 u64 context = callchain_context(ip, kernel_start);
383 u64 last_context; 427 u64 last_context;
384 size_t i, j; 428 size_t i, j;
@@ -651,7 +695,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
651 struct addr_location *to_al, u64 ref, 695 struct addr_location *to_al, u64 ref,
652 struct call_return_processor *crp) 696 struct call_return_processor *crp)
653{ 697{
654 struct thread_stack *ts = thread__stack(thread); 698 struct thread_stack *ts = thread__stack(thread, sample->cpu);
655 int err = 0; 699 int err = 0;
656 700
657 if (ts && !ts->crp) { 701 if (ts && !ts->crp) {
@@ -661,7 +705,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
661 } 705 }
662 706
663 if (!ts) { 707 if (!ts) {
664 ts = thread_stack__new(thread, crp); 708 ts = thread_stack__new(thread, sample->cpu, crp);
665 if (!ts) 709 if (!ts)
666 return -ENOMEM; 710 return -ENOMEM;
667 ts->comm = comm; 711 ts->comm = comm;
@@ -726,9 +770,9 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
726 return err; 770 return err;
727} 771}
728 772
729size_t thread_stack__depth(struct thread *thread) 773size_t thread_stack__depth(struct thread *thread, int cpu)
730{ 774{
731 struct thread_stack *ts = thread__stack(thread); 775 struct thread_stack *ts = thread__stack(thread, cpu);
732 776
733 if (!ts) 777 if (!ts)
734 return 0; 778 return 0;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index f97c00a8c251..1f626f4a1c40 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -80,14 +80,14 @@ struct call_return_processor {
80 void *data; 80 void *data;
81}; 81};
82 82
83int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 83int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
84 u64 to_ip, u16 insn_len, u64 trace_nr); 84 u64 to_ip, u16 insn_len, u64 trace_nr);
85void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); 85void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
86void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 86void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
87 size_t sz, u64 ip, u64 kernel_start); 87 size_t sz, u64 ip, u64 kernel_start);
88int thread_stack__flush(struct thread *thread); 88int thread_stack__flush(struct thread *thread);
89void thread_stack__free(struct thread *thread); 89void thread_stack__free(struct thread *thread);
90size_t thread_stack__depth(struct thread *thread); 90size_t thread_stack__depth(struct thread *thread, int cpu);
91 91
92struct call_return_processor * 92struct call_return_processor *
93call_return_processor__new(int (*process)(struct call_return *cr, void *data), 93call_return_processor__new(int (*process)(struct call_return *cr, void *data),