diff options
Diffstat (limited to 'tools/perf/util/thread-stack.c')
-rw-r--r-- | tools/perf/util/thread-stack.c | 227 |
1 files changed, 164 insertions, 63 deletions
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 61a4286a74dc..d52f27f373ce 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
17 | #include <linux/list.h> | 17 | #include <linux/list.h> |
18 | #include <linux/log2.h> | ||
18 | #include <errno.h> | 19 | #include <errno.h> |
19 | #include "thread.h" | 20 | #include "thread.h" |
20 | #include "event.h" | 21 | #include "event.h" |
@@ -60,6 +61,7 @@ struct thread_stack_entry { | |||
60 | * @last_time: last timestamp | 61 | * @last_time: last timestamp |
61 | * @crp: call/return processor | 62 | * @crp: call/return processor |
62 | * @comm: current comm | 63 | * @comm: current comm |
64 | * @arr_sz: size of array if this is the first element of an array | ||
63 | */ | 65 | */ |
64 | struct thread_stack { | 66 | struct thread_stack { |
65 | struct thread_stack_entry *stack; | 67 | struct thread_stack_entry *stack; |
@@ -71,8 +73,19 @@ struct thread_stack { | |||
71 | u64 last_time; | 73 | u64 last_time; |
72 | struct call_return_processor *crp; | 74 | struct call_return_processor *crp; |
73 | struct comm *comm; | 75 | struct comm *comm; |
76 | unsigned int arr_sz; | ||
74 | }; | 77 | }; |
75 | 78 | ||
79 | /* | ||
80 | * Assume pid == tid == 0 identifies the idle task as defined by | ||
81 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, | ||
82 | * and therefore requires a stack for each cpu. | ||
83 | */ | ||
84 | static inline bool thread_stack__per_cpu(struct thread *thread) | ||
85 | { | ||
86 | return !(thread->tid || thread->pid_); | ||
87 | } | ||
88 | |||
76 | static int thread_stack__grow(struct thread_stack *ts) | 89 | static int thread_stack__grow(struct thread_stack *ts) |
77 | { | 90 | { |
78 | struct thread_stack_entry *new_stack; | 91 | struct thread_stack_entry *new_stack; |
@@ -91,19 +104,14 @@ static int thread_stack__grow(struct thread_stack *ts) | |||
91 | return 0; | 104 | return 0; |
92 | } | 105 | } |
93 | 106 | ||
94 | static struct thread_stack *thread_stack__new(struct thread *thread, | 107 | static int thread_stack__init(struct thread_stack *ts, struct thread *thread, |
95 | struct call_return_processor *crp) | 108 | struct call_return_processor *crp) |
96 | { | 109 | { |
97 | struct thread_stack *ts; | 110 | int err; |
98 | |||
99 | ts = zalloc(sizeof(struct thread_stack)); | ||
100 | if (!ts) | ||
101 | return NULL; | ||
102 | 111 | ||
103 | if (thread_stack__grow(ts)) { | 112 | err = thread_stack__grow(ts); |
104 | free(ts); | 113 | if (err) |
105 | return NULL; | 114 | return err; |
106 | } | ||
107 | 115 | ||
108 | if (thread->mg && thread->mg->machine) | 116 | if (thread->mg && thread->mg->machine) |
109 | ts->kernel_start = machine__kernel_start(thread->mg->machine); | 117 | ts->kernel_start = machine__kernel_start(thread->mg->machine); |
@@ -111,9 +119,72 @@ static struct thread_stack *thread_stack__new(struct thread *thread, | |||
111 | ts->kernel_start = 1ULL << 63; | 119 | ts->kernel_start = 1ULL << 63; |
112 | ts->crp = crp; | 120 | ts->crp = crp; |
113 | 121 | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, | ||
126 | struct call_return_processor *crp) | ||
127 | { | ||
128 | struct thread_stack *ts = thread->ts, *new_ts; | ||
129 | unsigned int old_sz = ts ? ts->arr_sz : 0; | ||
130 | unsigned int new_sz = 1; | ||
131 | |||
132 | if (thread_stack__per_cpu(thread) && cpu > 0) | ||
133 | new_sz = roundup_pow_of_two(cpu + 1); | ||
134 | |||
135 | if (!ts || new_sz > old_sz) { | ||
136 | new_ts = calloc(new_sz, sizeof(*ts)); | ||
137 | if (!new_ts) | ||
138 | return NULL; | ||
139 | if (ts) | ||
140 | memcpy(new_ts, ts, old_sz * sizeof(*ts)); | ||
141 | new_ts->arr_sz = new_sz; | ||
142 | zfree(&thread->ts); | ||
143 | thread->ts = new_ts; | ||
144 | ts = new_ts; | ||
145 | } | ||
146 | |||
147 | if (thread_stack__per_cpu(thread) && cpu > 0 && | ||
148 | (unsigned int)cpu < ts->arr_sz) | ||
149 | ts += cpu; | ||
150 | |||
151 | if (!ts->stack && | ||
152 | thread_stack__init(ts, thread, crp)) | ||
153 | return NULL; | ||
154 | |||
114 | return ts; | 155 | return ts; |
115 | } | 156 | } |
116 | 157 | ||
158 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) | ||
159 | { | ||
160 | struct thread_stack *ts = thread->ts; | ||
161 | |||
162 | if (cpu < 0) | ||
163 | cpu = 0; | ||
164 | |||
165 | if (!ts || (unsigned int)cpu >= ts->arr_sz) | ||
166 | return NULL; | ||
167 | |||
168 | ts += cpu; | ||
169 | |||
170 | if (!ts->stack) | ||
171 | return NULL; | ||
172 | |||
173 | return ts; | ||
174 | } | ||
175 | |||
176 | static inline struct thread_stack *thread__stack(struct thread *thread, | ||
177 | int cpu) | ||
178 | { | ||
179 | if (!thread) | ||
180 | return NULL; | ||
181 | |||
182 | if (thread_stack__per_cpu(thread)) | ||
183 | return thread__cpu_stack(thread, cpu); | ||
184 | |||
185 | return thread->ts; | ||
186 | } | ||
187 | |||
117 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, | 188 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
118 | bool trace_end) | 189 | bool trace_end) |
119 | { | 190 | { |
@@ -226,25 +297,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) | |||
226 | 297 | ||
227 | int thread_stack__flush(struct thread *thread) | 298 | int thread_stack__flush(struct thread *thread) |
228 | { | 299 | { |
229 | if (thread->ts) | 300 | struct thread_stack *ts = thread->ts; |
230 | return __thread_stack__flush(thread, thread->ts); | 301 | unsigned int pos; |
302 | int err = 0; | ||
231 | 303 | ||
232 | return 0; | 304 | if (ts) { |
305 | for (pos = 0; pos < ts->arr_sz; pos++) { | ||
306 | int ret = __thread_stack__flush(thread, ts + pos); | ||
307 | |||
308 | if (ret) | ||
309 | err = ret; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | return err; | ||
233 | } | 314 | } |
234 | 315 | ||
235 | int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | 316 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
236 | u64 to_ip, u16 insn_len, u64 trace_nr) | 317 | u64 to_ip, u16 insn_len, u64 trace_nr) |
237 | { | 318 | { |
319 | struct thread_stack *ts = thread__stack(thread, cpu); | ||
320 | |||
238 | if (!thread) | 321 | if (!thread) |
239 | return -EINVAL; | 322 | return -EINVAL; |
240 | 323 | ||
241 | if (!thread->ts) { | 324 | if (!ts) { |
242 | thread->ts = thread_stack__new(thread, NULL); | 325 | ts = thread_stack__new(thread, cpu, NULL); |
243 | if (!thread->ts) { | 326 | if (!ts) { |
244 | pr_warning("Out of memory: no thread stack\n"); | 327 | pr_warning("Out of memory: no thread stack\n"); |
245 | return -ENOMEM; | 328 | return -ENOMEM; |
246 | } | 329 | } |
247 | thread->ts->trace_nr = trace_nr; | 330 | ts->trace_nr = trace_nr; |
248 | } | 331 | } |
249 | 332 | ||
250 | /* | 333 | /* |
@@ -252,14 +335,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | |||
252 | * the stack might be completely invalid. Better to report nothing than | 335 | * the stack might be completely invalid. Better to report nothing than |
253 | * to report something misleading, so flush the stack. | 336 | * to report something misleading, so flush the stack. |
254 | */ | 337 | */ |
255 | if (trace_nr != thread->ts->trace_nr) { | 338 | if (trace_nr != ts->trace_nr) { |
256 | if (thread->ts->trace_nr) | 339 | if (ts->trace_nr) |
257 | __thread_stack__flush(thread, thread->ts); | 340 | __thread_stack__flush(thread, ts); |
258 | thread->ts->trace_nr = trace_nr; | 341 | ts->trace_nr = trace_nr; |
259 | } | 342 | } |
260 | 343 | ||
261 | /* Stop here if thread_stack__process() is in use */ | 344 | /* Stop here if thread_stack__process() is in use */ |
262 | if (thread->ts->crp) | 345 | if (ts->crp) |
263 | return 0; | 346 | return 0; |
264 | 347 | ||
265 | if (flags & PERF_IP_FLAG_CALL) { | 348 | if (flags & PERF_IP_FLAG_CALL) { |
@@ -270,7 +353,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | |||
270 | ret_addr = from_ip + insn_len; | 353 | ret_addr = from_ip + insn_len; |
271 | if (ret_addr == to_ip) | 354 | if (ret_addr == to_ip) |
272 | return 0; /* Zero-length calls are excluded */ | 355 | return 0; /* Zero-length calls are excluded */ |
273 | return thread_stack__push(thread->ts, ret_addr, | 356 | return thread_stack__push(ts, ret_addr, |
274 | flags & PERF_IP_FLAG_TRACE_END); | 357 | flags & PERF_IP_FLAG_TRACE_END); |
275 | } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { | 358 | } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { |
276 | /* | 359 | /* |
@@ -280,32 +363,52 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, | |||
280 | * address, so try to pop that. Also, do not expect a call made | 363 | * address, so try to pop that. Also, do not expect a call made |
281 | * when the trace ended, to return, so pop that. | 364 | * when the trace ended, to return, so pop that. |
282 | */ | 365 | */ |
283 | thread_stack__pop(thread->ts, to_ip); | 366 | thread_stack__pop(ts, to_ip); |
284 | thread_stack__pop_trace_end(thread->ts); | 367 | thread_stack__pop_trace_end(ts); |
285 | } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { | 368 | } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { |
286 | thread_stack__pop(thread->ts, to_ip); | 369 | thread_stack__pop(ts, to_ip); |
287 | } | 370 | } |
288 | 371 | ||
289 | return 0; | 372 | return 0; |
290 | } | 373 | } |
291 | 374 | ||
292 | void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) | 375 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
293 | { | 376 | { |
294 | if (!thread || !thread->ts) | 377 | struct thread_stack *ts = thread__stack(thread, cpu); |
378 | |||
379 | if (!ts) | ||
295 | return; | 380 | return; |
296 | 381 | ||
297 | if (trace_nr != thread->ts->trace_nr) { | 382 | if (trace_nr != ts->trace_nr) { |
298 | if (thread->ts->trace_nr) | 383 | if (ts->trace_nr) |
299 | __thread_stack__flush(thread, thread->ts); | 384 | __thread_stack__flush(thread, ts); |
300 | thread->ts->trace_nr = trace_nr; | 385 | ts->trace_nr = trace_nr; |
301 | } | 386 | } |
302 | } | 387 | } |
303 | 388 | ||
389 | static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) | ||
390 | { | ||
391 | __thread_stack__flush(thread, ts); | ||
392 | zfree(&ts->stack); | ||
393 | } | ||
394 | |||
395 | static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) | ||
396 | { | ||
397 | unsigned int arr_sz = ts->arr_sz; | ||
398 | |||
399 | __thread_stack__free(thread, ts); | ||
400 | memset(ts, 0, sizeof(*ts)); | ||
401 | ts->arr_sz = arr_sz; | ||
402 | } | ||
403 | |||
304 | void thread_stack__free(struct thread *thread) | 404 | void thread_stack__free(struct thread *thread) |
305 | { | 405 | { |
306 | if (thread->ts) { | 406 | struct thread_stack *ts = thread->ts; |
307 | __thread_stack__flush(thread, thread->ts); | 407 | unsigned int pos; |
308 | zfree(&thread->ts->stack); | 408 | |
409 | if (ts) { | ||
410 | for (pos = 0; pos < ts->arr_sz; pos++) | ||
411 | __thread_stack__free(thread, ts + pos); | ||
309 | zfree(&thread->ts); | 412 | zfree(&thread->ts); |
310 | } | 413 | } |
311 | } | 414 | } |
@@ -315,9 +418,11 @@ static inline u64 callchain_context(u64 ip, u64 kernel_start) | |||
315 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; | 418 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; |
316 | } | 419 | } |
317 | 420 | ||
318 | void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | 421 | void thread_stack__sample(struct thread *thread, int cpu, |
422 | struct ip_callchain *chain, | ||
319 | size_t sz, u64 ip, u64 kernel_start) | 423 | size_t sz, u64 ip, u64 kernel_start) |
320 | { | 424 | { |
425 | struct thread_stack *ts = thread__stack(thread, cpu); | ||
321 | u64 context = callchain_context(ip, kernel_start); | 426 | u64 context = callchain_context(ip, kernel_start); |
322 | u64 last_context; | 427 | u64 last_context; |
323 | size_t i, j; | 428 | size_t i, j; |
@@ -330,15 +435,15 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, | |||
330 | chain->ips[0] = context; | 435 | chain->ips[0] = context; |
331 | chain->ips[1] = ip; | 436 | chain->ips[1] = ip; |
332 | 437 | ||
333 | if (!thread || !thread->ts) { | 438 | if (!ts) { |
334 | chain->nr = 2; | 439 | chain->nr = 2; |
335 | return; | 440 | return; |
336 | } | 441 | } |
337 | 442 | ||
338 | last_context = context; | 443 | last_context = context; |
339 | 444 | ||
340 | for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { | 445 | for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { |
341 | ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; | 446 | ip = ts->stack[ts->cnt - j].ret_addr; |
342 | context = callchain_context(ip, kernel_start); | 447 | context = callchain_context(ip, kernel_start); |
343 | if (context != last_context) { | 448 | if (context != last_context) { |
344 | if (i >= sz - 1) | 449 | if (i >= sz - 1) |
@@ -449,7 +554,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, | |||
449 | return 1; | 554 | return 1; |
450 | } | 555 | } |
451 | 556 | ||
452 | static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, | 557 | static int thread_stack__bottom(struct thread_stack *ts, |
453 | struct perf_sample *sample, | 558 | struct perf_sample *sample, |
454 | struct addr_location *from_al, | 559 | struct addr_location *from_al, |
455 | struct addr_location *to_al, u64 ref) | 560 | struct addr_location *to_al, u64 ref) |
@@ -474,7 +579,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, | |||
474 | if (!cp) | 579 | if (!cp) |
475 | return -ENOMEM; | 580 | return -ENOMEM; |
476 | 581 | ||
477 | return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, | 582 | return thread_stack__push_cp(ts, ip, sample->time, ref, cp, |
478 | true, false); | 583 | true, false); |
479 | } | 584 | } |
480 | 585 | ||
@@ -590,24 +695,19 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
590 | struct addr_location *to_al, u64 ref, | 695 | struct addr_location *to_al, u64 ref, |
591 | struct call_return_processor *crp) | 696 | struct call_return_processor *crp) |
592 | { | 697 | { |
593 | struct thread_stack *ts = thread->ts; | 698 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
594 | int err = 0; | 699 | int err = 0; |
595 | 700 | ||
596 | if (ts) { | 701 | if (ts && !ts->crp) { |
597 | if (!ts->crp) { | 702 | /* Supersede thread_stack__event() */ |
598 | /* Supersede thread_stack__event() */ | 703 | thread_stack__reset(thread, ts); |
599 | thread_stack__free(thread); | 704 | ts = NULL; |
600 | thread->ts = thread_stack__new(thread, crp); | 705 | } |
601 | if (!thread->ts) | 706 | |
602 | return -ENOMEM; | 707 | if (!ts) { |
603 | ts = thread->ts; | 708 | ts = thread_stack__new(thread, sample->cpu, crp); |
604 | ts->comm = comm; | 709 | if (!ts) |
605 | } | ||
606 | } else { | ||
607 | thread->ts = thread_stack__new(thread, crp); | ||
608 | if (!thread->ts) | ||
609 | return -ENOMEM; | 710 | return -ENOMEM; |
610 | ts = thread->ts; | ||
611 | ts->comm = comm; | 711 | ts->comm = comm; |
612 | } | 712 | } |
613 | 713 | ||
@@ -621,8 +721,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
621 | 721 | ||
622 | /* If the stack is empty, put the current symbol on the stack */ | 722 | /* If the stack is empty, put the current symbol on the stack */ |
623 | if (!ts->cnt) { | 723 | if (!ts->cnt) { |
624 | err = thread_stack__bottom(thread, ts, sample, from_al, to_al, | 724 | err = thread_stack__bottom(ts, sample, from_al, to_al, ref); |
625 | ref); | ||
626 | if (err) | 725 | if (err) |
627 | return err; | 726 | return err; |
628 | } | 727 | } |
@@ -671,9 +770,11 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
671 | return err; | 770 | return err; |
672 | } | 771 | } |
673 | 772 | ||
674 | size_t thread_stack__depth(struct thread *thread) | 773 | size_t thread_stack__depth(struct thread *thread, int cpu) |
675 | { | 774 | { |
676 | if (!thread->ts) | 775 | struct thread_stack *ts = thread__stack(thread, cpu); |
776 | |||
777 | if (!ts) | ||
677 | return 0; | 778 | return 0; |
678 | return thread->ts->cnt; | 779 | return ts->cnt; |
679 | } | 780 | } |