aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt.c6
-rw-r--r--tools/perf/util/thread-stack.c72
-rw-r--r--tools/perf/util/thread-stack.h8
5 files changed, 69 insertions, 25 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 88d52ed85ffc..d079f36d342d 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1182,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
1182 struct addr_location *al, FILE *fp) 1182 struct addr_location *al, FILE *fp)
1183{ 1183{
1184 struct perf_event_attr *attr = &evsel->attr; 1184 struct perf_event_attr *attr = &evsel->attr;
1185 size_t depth = thread_stack__depth(thread); 1185 size_t depth = thread_stack__depth(thread, sample->cpu);
1186 const char *name = NULL; 1186 const char *name = NULL;
1187 static int spacing; 1187 static int spacing;
1188 int len = 0; 1188 int len = 0;
@@ -1716,7 +1716,7 @@ static bool show_event(struct perf_sample *sample,
1716 struct thread *thread, 1716 struct thread *thread,
1717 struct addr_location *al) 1717 struct addr_location *al)
1718{ 1718{
1719 int depth = thread_stack__depth(thread); 1719 int depth = thread_stack__depth(thread, sample->cpu);
1720 1720
1721 if (!symbol_conf.graph_function) 1721 if (!symbol_conf.graph_function)
1722 return true; 1722 return true;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7b27d77306c2..ee6ca65f81f4 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
451 continue; 451 continue;
452 intel_bts_get_branch_type(btsq, branch); 452 intel_bts_get_branch_type(btsq, branch);
453 if (btsq->bts->synth_opts.thread_stack) 453 if (btsq->bts->synth_opts.thread_stack)
454 thread_stack__event(thread, btsq->sample_flags, 454 thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
455 le64_to_cpu(branch->from), 455 le64_to_cpu(branch->from),
456 le64_to_cpu(branch->to), 456 le64_to_cpu(branch->to),
457 btsq->intel_pt_insn.length, 457 btsq->intel_pt_insn.length,
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
523 !btsq->bts->synth_opts.thread_stack && thread && 523 !btsq->bts->synth_opts.thread_stack && thread &&
524 (!old_buffer || btsq->bts->sampling_mode || 524 (!old_buffer || btsq->bts->sampling_mode ||
525 (btsq->bts->snapshot_mode && !buffer->consecutive))) 525 (btsq->bts->snapshot_mode && !buffer->consecutive)))
526 thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); 526 thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
527 527
528 err = intel_bts_process_buffer(btsq, buffer, thread); 528 err = intel_bts_process_buffer(btsq, buffer, thread);
529 529
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 149ff361ca78..2e72373ec6df 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
1174 intel_pt_prep_b_sample(pt, ptq, event, sample); 1174 intel_pt_prep_b_sample(pt, ptq, event, sample);
1175 1175
1176 if (pt->synth_opts.callchain) { 1176 if (pt->synth_opts.callchain) {
1177 thread_stack__sample(ptq->thread, ptq->chain, 1177 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1178 pt->synth_opts.callchain_sz + 1, 1178 pt->synth_opts.callchain_sz + 1,
1179 sample->ip, pt->kernel_start); 1179 sample->ip, pt->kernel_start);
1180 sample->callchain = ptq->chain; 1180 sample->callchain = ptq->chain;
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1526 return 0; 1526 return 0;
1527 1527
1528 if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) 1528 if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1529 thread_stack__event(ptq->thread, ptq->flags, state->from_ip, 1529 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
1530 state->to_ip, ptq->insn_len, 1530 state->to_ip, ptq->insn_len,
1531 state->trace_nr); 1531 state->trace_nr);
1532 else 1532 else
1533 thread_stack__set_trace_nr(ptq->thread, state->trace_nr); 1533 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
1534 1534
1535 if (pt->sample_branches) { 1535 if (pt->sample_branches) {
1536 err = intel_pt_synth_branch_sample(ptq); 1536 err = intel_pt_synth_branch_sample(ptq);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 248ed3945bec..d52f27f373ce 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/rbtree.h> 16#include <linux/rbtree.h>
17#include <linux/list.h> 17#include <linux/list.h>
18#include <linux/log2.h>
18#include <errno.h> 19#include <errno.h>
19#include "thread.h" 20#include "thread.h"
20#include "event.h" 21#include "event.h"
@@ -75,6 +76,16 @@ struct thread_stack {
75 unsigned int arr_sz; 76 unsigned int arr_sz;
76}; 77};
77 78
79/*
80 * Assume pid == tid == 0 identifies the idle task as defined by
81 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
82 * and therefore requires a stack for each cpu.
83 */
84static inline bool thread_stack__per_cpu(struct thread *thread)
85{
86 return !(thread->tid || thread->pid_);
87}
88
78static int thread_stack__grow(struct thread_stack *ts) 89static int thread_stack__grow(struct thread_stack *ts)
79{ 90{
80 struct thread_stack_entry *new_stack; 91 struct thread_stack_entry *new_stack;
@@ -111,13 +122,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
111 return 0; 122 return 0;
112} 123}
113 124
114static struct thread_stack *thread_stack__new(struct thread *thread, 125static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
115 struct call_return_processor *crp) 126 struct call_return_processor *crp)
116{ 127{
117 struct thread_stack *ts = thread->ts, *new_ts; 128 struct thread_stack *ts = thread->ts, *new_ts;
118 unsigned int old_sz = ts ? ts->arr_sz : 0; 129 unsigned int old_sz = ts ? ts->arr_sz : 0;
119 unsigned int new_sz = 1; 130 unsigned int new_sz = 1;
120 131
132 if (thread_stack__per_cpu(thread) && cpu > 0)
133 new_sz = roundup_pow_of_two(cpu + 1);
134
121 if (!ts || new_sz > old_sz) { 135 if (!ts || new_sz > old_sz) {
122 new_ts = calloc(new_sz, sizeof(*ts)); 136 new_ts = calloc(new_sz, sizeof(*ts));
123 if (!new_ts) 137 if (!new_ts)
@@ -130,6 +144,10 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
130 ts = new_ts; 144 ts = new_ts;
131 } 145 }
132 146
147 if (thread_stack__per_cpu(thread) && cpu > 0 &&
148 (unsigned int)cpu < ts->arr_sz)
149 ts += cpu;
150
133 if (!ts->stack && 151 if (!ts->stack &&
134 thread_stack__init(ts, thread, crp)) 152 thread_stack__init(ts, thread, crp))
135 return NULL; 153 return NULL;
@@ -137,9 +155,34 @@ static struct thread_stack *thread_stack__new(struct thread *thread,
137 return ts; 155 return ts;
138} 156}
139 157
140static inline struct thread_stack *thread__stack(struct thread *thread) 158static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
141{ 159{
142 return thread ? thread->ts : NULL; 160 struct thread_stack *ts = thread->ts;
161
162 if (cpu < 0)
163 cpu = 0;
164
165 if (!ts || (unsigned int)cpu >= ts->arr_sz)
166 return NULL;
167
168 ts += cpu;
169
170 if (!ts->stack)
171 return NULL;
172
173 return ts;
174}
175
176static inline struct thread_stack *thread__stack(struct thread *thread,
177 int cpu)
178{
179 if (!thread)
180 return NULL;
181
182 if (thread_stack__per_cpu(thread))
183 return thread__cpu_stack(thread, cpu);
184
185 return thread->ts;
143} 186}
144 187
145static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, 188static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
@@ -270,16 +313,16 @@ int thread_stack__flush(struct thread *thread)
270 return err; 313 return err;
271} 314}
272 315
273int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 316int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
274 u64 to_ip, u16 insn_len, u64 trace_nr) 317 u64 to_ip, u16 insn_len, u64 trace_nr)
275{ 318{
276 struct thread_stack *ts = thread__stack(thread); 319 struct thread_stack *ts = thread__stack(thread, cpu);
277 320
278 if (!thread) 321 if (!thread)
279 return -EINVAL; 322 return -EINVAL;
280 323
281 if (!ts) { 324 if (!ts) {
282 ts = thread_stack__new(thread, NULL); 325 ts = thread_stack__new(thread, cpu, NULL);
283 if (!ts) { 326 if (!ts) {
284 pr_warning("Out of memory: no thread stack\n"); 327 pr_warning("Out of memory: no thread stack\n");
285 return -ENOMEM; 328 return -ENOMEM;
@@ -329,9 +372,9 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
329 return 0; 372 return 0;
330} 373}
331 374
332void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) 375void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
333{ 376{
334 struct thread_stack *ts = thread__stack(thread); 377 struct thread_stack *ts = thread__stack(thread, cpu);
335 378
336 if (!ts) 379 if (!ts)
337 return; 380 return;
@@ -375,10 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start)
375 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; 418 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
376} 419}
377 420
378void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 421void thread_stack__sample(struct thread *thread, int cpu,
422 struct ip_callchain *chain,
379 size_t sz, u64 ip, u64 kernel_start) 423 size_t sz, u64 ip, u64 kernel_start)
380{ 424{
381 struct thread_stack *ts = thread__stack(thread); 425 struct thread_stack *ts = thread__stack(thread, cpu);
382 u64 context = callchain_context(ip, kernel_start); 426 u64 context = callchain_context(ip, kernel_start);
383 u64 last_context; 427 u64 last_context;
384 size_t i, j; 428 size_t i, j;
@@ -651,7 +695,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
651 struct addr_location *to_al, u64 ref, 695 struct addr_location *to_al, u64 ref,
652 struct call_return_processor *crp) 696 struct call_return_processor *crp)
653{ 697{
654 struct thread_stack *ts = thread__stack(thread); 698 struct thread_stack *ts = thread__stack(thread, sample->cpu);
655 int err = 0; 699 int err = 0;
656 700
657 if (ts && !ts->crp) { 701 if (ts && !ts->crp) {
@@ -661,7 +705,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
661 } 705 }
662 706
663 if (!ts) { 707 if (!ts) {
664 ts = thread_stack__new(thread, crp); 708 ts = thread_stack__new(thread, sample->cpu, crp);
665 if (!ts) 709 if (!ts)
666 return -ENOMEM; 710 return -ENOMEM;
667 ts->comm = comm; 711 ts->comm = comm;
@@ -726,9 +770,9 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
726 return err; 770 return err;
727} 771}
728 772
729size_t thread_stack__depth(struct thread *thread) 773size_t thread_stack__depth(struct thread *thread, int cpu)
730{ 774{
731 struct thread_stack *ts = thread__stack(thread); 775 struct thread_stack *ts = thread__stack(thread, cpu);
732 776
733 if (!ts) 777 if (!ts)
734 return 0; 778 return 0;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index f97c00a8c251..1f626f4a1c40 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -80,14 +80,14 @@ struct call_return_processor {
80 void *data; 80 void *data;
81}; 81};
82 82
83int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, 83int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
84 u64 to_ip, u16 insn_len, u64 trace_nr); 84 u64 to_ip, u16 insn_len, u64 trace_nr);
85void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); 85void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
86void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, 86void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
87 size_t sz, u64 ip, u64 kernel_start); 87 size_t sz, u64 ip, u64 kernel_start);
88int thread_stack__flush(struct thread *thread); 88int thread_stack__flush(struct thread *thread);
89void thread_stack__free(struct thread *thread); 89void thread_stack__free(struct thread *thread);
90size_t thread_stack__depth(struct thread *thread); 90size_t thread_stack__depth(struct thread *thread, int cpu);
91 91
92struct call_return_processor * 92struct call_return_processor *
93call_return_processor__new(int (*process)(struct call_return *cr, void *data), 93call_return_processor__new(int (*process)(struct call_return *cr, void *data),