diff options
-rw-r--r-- | tools/perf/builtin-script.c | 4 | ||||
-rw-r--r-- | tools/perf/util/intel-bts.c | 4 | ||||
-rw-r--r-- | tools/perf/util/intel-pt.c | 6 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.c | 72 | ||||
-rw-r--r-- | tools/perf/util/thread-stack.h | 8 |
5 files changed, 69 insertions, 25 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 88d52ed85ffc..d079f36d342d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -1182,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, | |||
1182 | struct addr_location *al, FILE *fp) | 1182 | struct addr_location *al, FILE *fp) |
1183 | { | 1183 | { |
1184 | struct perf_event_attr *attr = &evsel->attr; | 1184 | struct perf_event_attr *attr = &evsel->attr; |
1185 | size_t depth = thread_stack__depth(thread); | 1185 | size_t depth = thread_stack__depth(thread, sample->cpu); |
1186 | const char *name = NULL; | 1186 | const char *name = NULL; |
1187 | static int spacing; | 1187 | static int spacing; |
1188 | int len = 0; | 1188 | int len = 0; |
@@ -1716,7 +1716,7 @@ static bool show_event(struct perf_sample *sample, | |||
1716 | struct thread *thread, | 1716 | struct thread *thread, |
1717 | struct addr_location *al) | 1717 | struct addr_location *al) |
1718 | { | 1718 | { |
1719 | int depth = thread_stack__depth(thread); | 1719 | int depth = thread_stack__depth(thread, sample->cpu); |
1720 | 1720 | ||
1721 | if (!symbol_conf.graph_function) | 1721 | if (!symbol_conf.graph_function) |
1722 | return true; | 1722 | return true; |
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7b27d77306c2..ee6ca65f81f4 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c | |||
@@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, | |||
451 | continue; | 451 | continue; |
452 | intel_bts_get_branch_type(btsq, branch); | 452 | intel_bts_get_branch_type(btsq, branch); |
453 | if (btsq->bts->synth_opts.thread_stack) | 453 | if (btsq->bts->synth_opts.thread_stack) |
454 | thread_stack__event(thread, btsq->sample_flags, | 454 | thread_stack__event(thread, btsq->cpu, btsq->sample_flags, |
455 | le64_to_cpu(branch->from), | 455 | le64_to_cpu(branch->from), |
456 | le64_to_cpu(branch->to), | 456 | le64_to_cpu(branch->to), |
457 | btsq->intel_pt_insn.length, | 457 | btsq->intel_pt_insn.length, |
@@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) | |||
523 | !btsq->bts->synth_opts.thread_stack && thread && | 523 | !btsq->bts->synth_opts.thread_stack && thread && |
524 | (!old_buffer || btsq->bts->sampling_mode || | 524 | (!old_buffer || btsq->bts->sampling_mode || |
525 | (btsq->bts->snapshot_mode && !buffer->consecutive))) | 525 | (btsq->bts->snapshot_mode && !buffer->consecutive))) |
526 | thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); | 526 | thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1); |
527 | 527 | ||
528 | err = intel_bts_process_buffer(btsq, buffer, thread); | 528 | err = intel_bts_process_buffer(btsq, buffer, thread); |
529 | 529 | ||
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 149ff361ca78..2e72373ec6df 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
@@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt, | |||
1174 | intel_pt_prep_b_sample(pt, ptq, event, sample); | 1174 | intel_pt_prep_b_sample(pt, ptq, event, sample); |
1175 | 1175 | ||
1176 | if (pt->synth_opts.callchain) { | 1176 | if (pt->synth_opts.callchain) { |
1177 | thread_stack__sample(ptq->thread, ptq->chain, | 1177 | thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, |
1178 | pt->synth_opts.callchain_sz + 1, | 1178 | pt->synth_opts.callchain_sz + 1, |
1179 | sample->ip, pt->kernel_start); | 1179 | sample->ip, pt->kernel_start); |
1180 | sample->callchain = ptq->chain; | 1180 | sample->callchain = ptq->chain; |
@@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
1526 | return 0; | 1526 | return 0; |
1527 | 1527 | ||
1528 | if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) | 1528 | if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) |
1529 | thread_stack__event(ptq->thread, ptq->flags, state->from_ip, | 1529 | thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, |
1530 | state->to_ip, ptq->insn_len, | 1530 | state->to_ip, ptq->insn_len, |
1531 | state->trace_nr); | 1531 | state->trace_nr); |
1532 | else | 1532 | else |
1533 | thread_stack__set_trace_nr(ptq->thread, state->trace_nr); | 1533 | thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); |
1534 | 1534 | ||
1535 | if (pt->sample_branches) { | 1535 | if (pt->sample_branches) { |
1536 | err = intel_pt_synth_branch_sample(ptq); | 1536 | err = intel_pt_synth_branch_sample(ptq); |
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 248ed3945bec..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
17 | #include <linux/list.h> | 17 | #include <linux/list.h> |
18 | #include <linux/log2.h> | ||
18 | #include <errno.h> | 19 | #include <errno.h> |
19 | #include "thread.h" | 20 | #include "thread.h" |
20 | #include "event.h" | 21 | #include "event.h" |
@@ -75,6 +76,16 @@ struct thread_stack { | |||
75 | unsigned int arr_sz; | 76 | unsigned int arr_sz; |
76 | }; | 77 | }; |
77 | 78 | ||
79 | /* | ||
80 | * Assume pid == tid == 0 identifies the idle task as defined by | ||
81 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, | ||
82 | * and therefore requires a stack for each cpu. | ||
83 | */ | ||
84 | static inline bool thread_stack__per_cpu(struct thread *thread) | ||
85 | { | ||
86 | return !(thread->tid || thread->pid_); | ||
87 | } | ||
88 | |||
78 | static int thread_stack__grow(struct thread_stack *ts) | 89 | static int thread_stack__grow(struct thread_stack *ts) |
79 | { | 90 | { |
80 | struct thread_stack_entry *new_stack; | 91 | struct thread_stack_entry *new_stack; |
@@ -111,13 +122,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, | |||
111 | return 0; | 122 | return 0; |
112 | } | 123 | } |
113 | 124 | ||
114 | static struct thread_stack *thread_stack__new(struct thread *thread, | 125 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, |
115 | struct call_return_processor *crp) | 126 | struct call_return_processor *crp) |
116 | { | 127 | { |
117 | struct thread_stack *ts = thread->ts, *new_ts; | 128 | struct thread_stack *ts = thread->ts, *new_ts; |
118 | unsigned int old_sz = ts ? ts->arr_sz : 0; | 129 | unsigned int old_sz = ts ? ts->arr_sz : 0; |
119 | unsigned int new_sz = 1; | 130 | unsigned int new_sz = 1; |
120 | 131 | ||
132 | if (thread_stack__per_cpu(thread) && cpu > 0) | ||
133 | new_sz = roundup_pow_of_two(cpu + 1); | ||
134 | |||
121 | if (!ts || new_sz > old_sz) { | 135 | if (!ts || new_sz > old_sz) { |
122 | new_ts = calloc(new_sz, sizeof(*ts)); | 136 | new_ts = calloc(new_sz, sizeof(*ts)); |
123 | if (!new_ts) | 137 | if (!new_ts) |
@@ -130,6 +144,10 @@ static struct thread_stack *thread_stack__new(struct thread *thread, | |||
130 | ts = new_ts; | 144 | ts = new_ts; |
131 | } | 145 | } |
132 | 146 | ||
147 | if (thread_stack__per_cpu(thread) && cpu > 0 && | ||
148 | (unsigned int)cpu < ts->arr_sz) | ||
149 | ts += cpu; | ||
150 | |||
133 | if (!ts->stack && | 151 | if (!ts->stack && |
134 | thread_stack__init(ts, thread, crp)) | 152 | thread_stack__init(ts, thread, crp)) |
135 | return NULL; | 153 | return NULL; |
@@ -137,9 +155,34 @@ static struct thread_stack *thread_stack__new(struct thread *thread, | |||
137 | return ts; | 155 | return ts; |
138 | } | 156 | } |
139 | 157 | ||
140 | static inline struct thread_stack *thread__stack(struct thread *thread) | 158 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) |
141 | { | 159 | { |
142 | return thread ? thread->ts : NULL; | 160 | struct thread_stack *ts = thread->ts; |
161 | |||
162 | if (cpu < 0) | ||
163 | cpu = 0; | ||
164 | |||
165 | if (!ts || (unsigned int)cpu >= ts->arr_sz) | ||
166 | return NULL; | ||
167 | |||
168 | ts += cpu; | ||
169 | |||
170 | if (!ts->stack) | ||
171 | return NULL; | ||
172 | |||
173 | return ts; | ||
174 | } | ||
175 | |||
176 | static inline struct thread_stack *thread__stack(struct thread *thread, | ||
177 | int cpu) | ||
178 | { | ||
179 | if (!thread) | ||
180 | return NULL; | ||
181 | |||
182 | if (thread_stack__per_cpu(thread)) | ||
183 | return thread__cpu_stack(thread, cpu); | ||
184 | |||
185 | return thread->ts; | ||
143 | } | 186 | } |
144 | 187 | ||
145 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, | 188 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
@@ -270,16 +313,16 @@ int thread_stack__flush(struct thread *thread) | |||
270 | return err; | 313 | return err; |
271 | } | 314 | } |
272 | 315 | ||
273 | int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | 316 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
274 | u64 to_ip, u16 insn_len, u64 trace_nr) | 317 | u64 to_ip, u16 insn_len, u64 trace_nr) |
275 | { | 318 | { |
276 | struct thread_stack *ts = thread__stack(thread); | 319 | struct thread_stack *ts = thread__stack(thread, cpu); |
277 | 320 | ||
278 | if (!thread) | 321 | if (!thread) |
279 | return -EINVAL; | 322 | return -EINVAL; |
280 | 323 | ||
281 | if (!ts) { | 324 | if (!ts) { |
282 | ts = thread_stack__new(thread, NULL); | 325 | ts = thread_stack__new(thread, cpu, NULL); |
283 | if (!ts) { | 326 | if (!ts) { |
284 | pr_warning("Out of memory: no thread stack\n"); | 327 | pr_warning("Out of memory: no thread stack\n"); |
285 | return -ENOMEM; | 328 | return -ENOMEM; |
@@ -329,9 +372,9 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | |||
329 | return 0; | 372 | return 0; |
330 | } | 373 | } |
331 | 374 | ||
332 | void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) | 375 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
333 | { | 376 | { |
334 | struct thread_stack *ts = thread__stack(thread); | 377 | struct thread_stack *ts = thread__stack(thread, cpu); |
335 | 378 | ||
336 | if (!ts) | 379 | if (!ts) |
337 | return; | 380 | return; |
@@ -375,10 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start) | |||
375 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; | 418 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; |
376 | } | 419 | } |
377 | 420 | ||
378 | void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | 421 | void thread_stack__sample(struct thread *thread, int cpu, |
422 | struct ip_callchain *chain, | ||
379 | size_t sz, u64 ip, u64 kernel_start) | 423 | size_t sz, u64 ip, u64 kernel_start) |
380 | { | 424 | { |
381 | struct thread_stack *ts = thread__stack(thread); | 425 | struct thread_stack *ts = thread__stack(thread, cpu); |
382 | u64 context = callchain_context(ip, kernel_start); | 426 | u64 context = callchain_context(ip, kernel_start); |
383 | u64 last_context; | 427 | u64 last_context; |
384 | size_t i, j; | 428 | size_t i, j; |
@@ -651,7 +695,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
651 | struct addr_location *to_al, u64 ref, | 695 | struct addr_location *to_al, u64 ref, |
652 | struct call_return_processor *crp) | 696 | struct call_return_processor *crp) |
653 | { | 697 | { |
654 | struct thread_stack *ts = thread__stack(thread); | 698 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
655 | int err = 0; | 699 | int err = 0; |
656 | 700 | ||
657 | if (ts && !ts->crp) { | 701 | if (ts && !ts->crp) { |
@@ -661,7 +705,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
661 | } | 705 | } |
662 | 706 | ||
663 | if (!ts) { | 707 | if (!ts) { |
664 | ts = thread_stack__new(thread, crp); | 708 | ts = thread_stack__new(thread, sample->cpu, crp); |
665 | if (!ts) | 709 | if (!ts) |
666 | return -ENOMEM; | 710 | return -ENOMEM; |
667 | ts->comm = comm; | 711 | ts->comm = comm; |
@@ -726,9 +770,9 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
726 | return err; | 770 | return err; |
727 | } | 771 | } |
728 | 772 | ||
729 | size_t thread_stack__depth(struct thread *thread) | 773 | size_t thread_stack__depth(struct thread *thread, int cpu) |
730 | { | 774 | { |
731 | struct thread_stack *ts = thread__stack(thread); | 775 | struct thread_stack *ts = thread__stack(thread, cpu); |
732 | 776 | ||
733 | if (!ts) | 777 | if (!ts) |
734 | return 0; | 778 | return 0; |
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index f97c00a8c251..1f626f4a1c40 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h | |||
@@ -80,14 +80,14 @@ struct call_return_processor { | |||
80 | void *data; | 80 | void *data; |
81 | }; | 81 | }; |
82 | 82 | ||
83 | int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | 83 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
84 | u64 to_ip, u16 insn_len, u64 trace_nr); | 84 | u64 to_ip, u16 insn_len, u64 trace_nr); |
85 | void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); | 85 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); |
86 | void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | 86 | void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, |
87 | size_t sz, u64 ip, u64 kernel_start); | 87 | size_t sz, u64 ip, u64 kernel_start); |
88 | int thread_stack__flush(struct thread *thread); | 88 | int thread_stack__flush(struct thread *thread); |
89 | void thread_stack__free(struct thread *thread); | 89 | void thread_stack__free(struct thread *thread); |
90 | size_t thread_stack__depth(struct thread *thread); | 90 | size_t thread_stack__depth(struct thread *thread, int cpu); |
91 | 91 | ||
92 | struct call_return_processor * | 92 | struct call_return_processor * |
93 | call_return_processor__new(int (*process)(struct call_return *cr, void *data), | 93 | call_return_processor__new(int (*process)(struct call_return *cr, void *data), |