diff options
| -rw-r--r-- | tools/perf/builtin-script.c | 4 | ||||
| -rw-r--r-- | tools/perf/util/intel-bts.c | 4 | ||||
| -rw-r--r-- | tools/perf/util/intel-pt.c | 6 | ||||
| -rw-r--r-- | tools/perf/util/thread-stack.c | 72 | ||||
| -rw-r--r-- | tools/perf/util/thread-stack.h | 8 |
5 files changed, 69 insertions, 25 deletions
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 88d52ed85ffc..d079f36d342d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
| @@ -1182,7 +1182,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, | |||
| 1182 | struct addr_location *al, FILE *fp) | 1182 | struct addr_location *al, FILE *fp) |
| 1183 | { | 1183 | { |
| 1184 | struct perf_event_attr *attr = &evsel->attr; | 1184 | struct perf_event_attr *attr = &evsel->attr; |
| 1185 | size_t depth = thread_stack__depth(thread); | 1185 | size_t depth = thread_stack__depth(thread, sample->cpu); |
| 1186 | const char *name = NULL; | 1186 | const char *name = NULL; |
| 1187 | static int spacing; | 1187 | static int spacing; |
| 1188 | int len = 0; | 1188 | int len = 0; |
| @@ -1716,7 +1716,7 @@ static bool show_event(struct perf_sample *sample, | |||
| 1716 | struct thread *thread, | 1716 | struct thread *thread, |
| 1717 | struct addr_location *al) | 1717 | struct addr_location *al) |
| 1718 | { | 1718 | { |
| 1719 | int depth = thread_stack__depth(thread); | 1719 | int depth = thread_stack__depth(thread, sample->cpu); |
| 1720 | 1720 | ||
| 1721 | if (!symbol_conf.graph_function) | 1721 | if (!symbol_conf.graph_function) |
| 1722 | return true; | 1722 | return true; |
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7b27d77306c2..ee6ca65f81f4 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c | |||
| @@ -451,7 +451,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, | |||
| 451 | continue; | 451 | continue; |
| 452 | intel_bts_get_branch_type(btsq, branch); | 452 | intel_bts_get_branch_type(btsq, branch); |
| 453 | if (btsq->bts->synth_opts.thread_stack) | 453 | if (btsq->bts->synth_opts.thread_stack) |
| 454 | thread_stack__event(thread, btsq->sample_flags, | 454 | thread_stack__event(thread, btsq->cpu, btsq->sample_flags, |
| 455 | le64_to_cpu(branch->from), | 455 | le64_to_cpu(branch->from), |
| 456 | le64_to_cpu(branch->to), | 456 | le64_to_cpu(branch->to), |
| 457 | btsq->intel_pt_insn.length, | 457 | btsq->intel_pt_insn.length, |
| @@ -523,7 +523,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) | |||
| 523 | !btsq->bts->synth_opts.thread_stack && thread && | 523 | !btsq->bts->synth_opts.thread_stack && thread && |
| 524 | (!old_buffer || btsq->bts->sampling_mode || | 524 | (!old_buffer || btsq->bts->sampling_mode || |
| 525 | (btsq->bts->snapshot_mode && !buffer->consecutive))) | 525 | (btsq->bts->snapshot_mode && !buffer->consecutive))) |
| 526 | thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); | 526 | thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1); |
| 527 | 527 | ||
| 528 | err = intel_bts_process_buffer(btsq, buffer, thread); | 528 | err = intel_bts_process_buffer(btsq, buffer, thread); |
| 529 | 529 | ||
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 149ff361ca78..2e72373ec6df 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
| @@ -1174,7 +1174,7 @@ static void intel_pt_prep_sample(struct intel_pt *pt, | |||
| 1174 | intel_pt_prep_b_sample(pt, ptq, event, sample); | 1174 | intel_pt_prep_b_sample(pt, ptq, event, sample); |
| 1175 | 1175 | ||
| 1176 | if (pt->synth_opts.callchain) { | 1176 | if (pt->synth_opts.callchain) { |
| 1177 | thread_stack__sample(ptq->thread, ptq->chain, | 1177 | thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, |
| 1178 | pt->synth_opts.callchain_sz + 1, | 1178 | pt->synth_opts.callchain_sz + 1, |
| 1179 | sample->ip, pt->kernel_start); | 1179 | sample->ip, pt->kernel_start); |
| 1180 | sample->callchain = ptq->chain; | 1180 | sample->callchain = ptq->chain; |
| @@ -1526,11 +1526,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) | |||
| 1526 | return 0; | 1526 | return 0; |
| 1527 | 1527 | ||
| 1528 | if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) | 1528 | if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) |
| 1529 | thread_stack__event(ptq->thread, ptq->flags, state->from_ip, | 1529 | thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, |
| 1530 | state->to_ip, ptq->insn_len, | 1530 | state->to_ip, ptq->insn_len, |
| 1531 | state->trace_nr); | 1531 | state->trace_nr); |
| 1532 | else | 1532 | else |
| 1533 | thread_stack__set_trace_nr(ptq->thread, state->trace_nr); | 1533 | thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); |
| 1534 | 1534 | ||
| 1535 | if (pt->sample_branches) { | 1535 | if (pt->sample_branches) { |
| 1536 | err = intel_pt_synth_branch_sample(ptq); | 1536 | err = intel_pt_synth_branch_sample(ptq); |
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 248ed3945bec..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | 15 | ||
| 16 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
| 17 | #include <linux/list.h> | 17 | #include <linux/list.h> |
| 18 | #include <linux/log2.h> | ||
| 18 | #include <errno.h> | 19 | #include <errno.h> |
| 19 | #include "thread.h" | 20 | #include "thread.h" |
| 20 | #include "event.h" | 21 | #include "event.h" |
| @@ -75,6 +76,16 @@ struct thread_stack { | |||
| 75 | unsigned int arr_sz; | 76 | unsigned int arr_sz; |
| 76 | }; | 77 | }; |
| 77 | 78 | ||
| 79 | /* | ||
| 80 | * Assume pid == tid == 0 identifies the idle task as defined by | ||
| 81 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, | ||
| 82 | * and therefore requires a stack for each cpu. | ||
| 83 | */ | ||
| 84 | static inline bool thread_stack__per_cpu(struct thread *thread) | ||
| 85 | { | ||
| 86 | return !(thread->tid || thread->pid_); | ||
| 87 | } | ||
| 88 | |||
| 78 | static int thread_stack__grow(struct thread_stack *ts) | 89 | static int thread_stack__grow(struct thread_stack *ts) |
| 79 | { | 90 | { |
| 80 | struct thread_stack_entry *new_stack; | 91 | struct thread_stack_entry *new_stack; |
| @@ -111,13 +122,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, | |||
| 111 | return 0; | 122 | return 0; |
| 112 | } | 123 | } |
| 113 | 124 | ||
| 114 | static struct thread_stack *thread_stack__new(struct thread *thread, | 125 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, |
| 115 | struct call_return_processor *crp) | 126 | struct call_return_processor *crp) |
| 116 | { | 127 | { |
| 117 | struct thread_stack *ts = thread->ts, *new_ts; | 128 | struct thread_stack *ts = thread->ts, *new_ts; |
| 118 | unsigned int old_sz = ts ? ts->arr_sz : 0; | 129 | unsigned int old_sz = ts ? ts->arr_sz : 0; |
| 119 | unsigned int new_sz = 1; | 130 | unsigned int new_sz = 1; |
| 120 | 131 | ||
| 132 | if (thread_stack__per_cpu(thread) && cpu > 0) | ||
| 133 | new_sz = roundup_pow_of_two(cpu + 1); | ||
| 134 | |||
| 121 | if (!ts || new_sz > old_sz) { | 135 | if (!ts || new_sz > old_sz) { |
| 122 | new_ts = calloc(new_sz, sizeof(*ts)); | 136 | new_ts = calloc(new_sz, sizeof(*ts)); |
| 123 | if (!new_ts) | 137 | if (!new_ts) |
| @@ -130,6 +144,10 @@ static struct thread_stack *thread_stack__new(struct thread *thread, | |||
| 130 | ts = new_ts; | 144 | ts = new_ts; |
| 131 | } | 145 | } |
| 132 | 146 | ||
| 147 | if (thread_stack__per_cpu(thread) && cpu > 0 && | ||
| 148 | (unsigned int)cpu < ts->arr_sz) | ||
| 149 | ts += cpu; | ||
| 150 | |||
| 133 | if (!ts->stack && | 151 | if (!ts->stack && |
| 134 | thread_stack__init(ts, thread, crp)) | 152 | thread_stack__init(ts, thread, crp)) |
| 135 | return NULL; | 153 | return NULL; |
| @@ -137,9 +155,34 @@ static struct thread_stack *thread_stack__new(struct thread *thread, | |||
| 137 | return ts; | 155 | return ts; |
| 138 | } | 156 | } |
| 139 | 157 | ||
| 140 | static inline struct thread_stack *thread__stack(struct thread *thread) | 158 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) |
| 141 | { | 159 | { |
| 142 | return thread ? thread->ts : NULL; | 160 | struct thread_stack *ts = thread->ts; |
| 161 | |||
| 162 | if (cpu < 0) | ||
| 163 | cpu = 0; | ||
| 164 | |||
| 165 | if (!ts || (unsigned int)cpu >= ts->arr_sz) | ||
| 166 | return NULL; | ||
| 167 | |||
| 168 | ts += cpu; | ||
| 169 | |||
| 170 | if (!ts->stack) | ||
| 171 | return NULL; | ||
| 172 | |||
| 173 | return ts; | ||
| 174 | } | ||
| 175 | |||
| 176 | static inline struct thread_stack *thread__stack(struct thread *thread, | ||
| 177 | int cpu) | ||
| 178 | { | ||
| 179 | if (!thread) | ||
| 180 | return NULL; | ||
| 181 | |||
| 182 | if (thread_stack__per_cpu(thread)) | ||
| 183 | return thread__cpu_stack(thread, cpu); | ||
| 184 | |||
| 185 | return thread->ts; | ||
| 143 | } | 186 | } |
| 144 | 187 | ||
| 145 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, | 188 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
| @@ -270,16 +313,16 @@ int thread_stack__flush(struct thread *thread) | |||
| 270 | return err; | 313 | return err; |
| 271 | } | 314 | } |
| 272 | 315 | ||
| 273 | int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | 316 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
| 274 | u64 to_ip, u16 insn_len, u64 trace_nr) | 317 | u64 to_ip, u16 insn_len, u64 trace_nr) |
| 275 | { | 318 | { |
| 276 | struct thread_stack *ts = thread__stack(thread); | 319 | struct thread_stack *ts = thread__stack(thread, cpu); |
| 277 | 320 | ||
| 278 | if (!thread) | 321 | if (!thread) |
| 279 | return -EINVAL; | 322 | return -EINVAL; |
| 280 | 323 | ||
| 281 | if (!ts) { | 324 | if (!ts) { |
| 282 | ts = thread_stack__new(thread, NULL); | 325 | ts = thread_stack__new(thread, cpu, NULL); |
| 283 | if (!ts) { | 326 | if (!ts) { |
| 284 | pr_warning("Out of memory: no thread stack\n"); | 327 | pr_warning("Out of memory: no thread stack\n"); |
| 285 | return -ENOMEM; | 328 | return -ENOMEM; |
| @@ -329,9 +372,9 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | |||
| 329 | return 0; | 372 | return 0; |
| 330 | } | 373 | } |
| 331 | 374 | ||
| 332 | void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) | 375 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
| 333 | { | 376 | { |
| 334 | struct thread_stack *ts = thread__stack(thread); | 377 | struct thread_stack *ts = thread__stack(thread, cpu); |
| 335 | 378 | ||
| 336 | if (!ts) | 379 | if (!ts) |
| 337 | return; | 380 | return; |
| @@ -375,10 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start) | |||
| 375 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; | 418 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; |
| 376 | } | 419 | } |
| 377 | 420 | ||
| 378 | void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | 421 | void thread_stack__sample(struct thread *thread, int cpu, |
| 422 | struct ip_callchain *chain, | ||
| 379 | size_t sz, u64 ip, u64 kernel_start) | 423 | size_t sz, u64 ip, u64 kernel_start) |
| 380 | { | 424 | { |
| 381 | struct thread_stack *ts = thread__stack(thread); | 425 | struct thread_stack *ts = thread__stack(thread, cpu); |
| 382 | u64 context = callchain_context(ip, kernel_start); | 426 | u64 context = callchain_context(ip, kernel_start); |
| 383 | u64 last_context; | 427 | u64 last_context; |
| 384 | size_t i, j; | 428 | size_t i, j; |
| @@ -651,7 +695,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 651 | struct addr_location *to_al, u64 ref, | 695 | struct addr_location *to_al, u64 ref, |
| 652 | struct call_return_processor *crp) | 696 | struct call_return_processor *crp) |
| 653 | { | 697 | { |
| 654 | struct thread_stack *ts = thread__stack(thread); | 698 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
| 655 | int err = 0; | 699 | int err = 0; |
| 656 | 700 | ||
| 657 | if (ts && !ts->crp) { | 701 | if (ts && !ts->crp) { |
| @@ -661,7 +705,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 661 | } | 705 | } |
| 662 | 706 | ||
| 663 | if (!ts) { | 707 | if (!ts) { |
| 664 | ts = thread_stack__new(thread, crp); | 708 | ts = thread_stack__new(thread, sample->cpu, crp); |
| 665 | if (!ts) | 709 | if (!ts) |
| 666 | return -ENOMEM; | 710 | return -ENOMEM; |
| 667 | ts->comm = comm; | 711 | ts->comm = comm; |
| @@ -726,9 +770,9 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 726 | return err; | 770 | return err; |
| 727 | } | 771 | } |
| 728 | 772 | ||
| 729 | size_t thread_stack__depth(struct thread *thread) | 773 | size_t thread_stack__depth(struct thread *thread, int cpu) |
| 730 | { | 774 | { |
| 731 | struct thread_stack *ts = thread__stack(thread); | 775 | struct thread_stack *ts = thread__stack(thread, cpu); |
| 732 | 776 | ||
| 733 | if (!ts) | 777 | if (!ts) |
| 734 | return 0; | 778 | return 0; |
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index f97c00a8c251..1f626f4a1c40 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h | |||
| @@ -80,14 +80,14 @@ struct call_return_processor { | |||
| 80 | void *data; | 80 | void *data; |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | 83 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
| 84 | u64 to_ip, u16 insn_len, u64 trace_nr); | 84 | u64 to_ip, u16 insn_len, u64 trace_nr); |
| 85 | void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); | 85 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); |
| 86 | void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | 86 | void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, |
| 87 | size_t sz, u64 ip, u64 kernel_start); | 87 | size_t sz, u64 ip, u64 kernel_start); |
| 88 | int thread_stack__flush(struct thread *thread); | 88 | int thread_stack__flush(struct thread *thread); |
| 89 | void thread_stack__free(struct thread *thread); | 89 | void thread_stack__free(struct thread *thread); |
| 90 | size_t thread_stack__depth(struct thread *thread); | 90 | size_t thread_stack__depth(struct thread *thread, int cpu); |
| 91 | 91 | ||
| 92 | struct call_return_processor * | 92 | struct call_return_processor * |
| 93 | call_return_processor__new(int (*process)(struct call_return *cr, void *data), | 93 | call_return_processor__new(int (*process)(struct call_return *cr, void *data), |
