diff options
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 12 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 31 | ||||
-rw-r--r-- | tools/perf/builtin-stat.c | 105 | ||||
-rw-r--r-- | tools/perf/util/callchain.c | 4 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 34 | ||||
-rw-r--r-- | tools/perf/util/evsel.h | 30 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 126 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 3 |
9 files changed, 257 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..dd7cccdde498 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -159,7 +159,7 @@ OPTIONS | |||
159 | --dump-raw-trace:: | 159 | --dump-raw-trace:: |
160 | Dump raw trace in ASCII. | 160 | Dump raw trace in ASCII. |
161 | 161 | ||
162 | -g [type,min[,limit],order[,key]]:: | 162 | -g [type,min[,limit],order[,key][,branch]]:: |
163 | --call-graph:: | 163 | --call-graph:: |
164 | Display call chains using type, min percent threshold, optional print | 164 | Display call chains using type, min percent threshold, optional print |
165 | limit and order. | 165 | limit and order. |
@@ -177,6 +177,11 @@ OPTIONS | |||
177 | - function: compare on functions | 177 | - function: compare on functions |
178 | - address: compare on individual code addresses | 178 | - address: compare on individual code addresses |
179 | 179 | ||
180 | branch can be: | ||
181 | - branch: include last branch information in callgraph | ||
182 | when available. Usually more convenient to use --branch-history | ||
183 | for this. | ||
184 | |||
180 | Default: fractal,0.5,callee,function. | 185 | Default: fractal,0.5,callee,function. |
181 | 186 | ||
182 | --children:: | 187 | --children:: |
@@ -266,6 +271,11 @@ OPTIONS | |||
266 | branch stacks and it will automatically switch to the branch view mode, | 271 | branch stacks and it will automatically switch to the branch view mode, |
267 | unless --no-branch-stack is used. | 272 | unless --no-branch-stack is used. |
268 | 273 | ||
274 | --branch-history:: | ||
275 | Add the addresses of sampled taken branches to the callstack. | ||
276 | This allows to examine the path the program took to each sample. | ||
277 | The data collection must have used -b (or -j) and -g. | ||
278 | |||
269 | --objdump=<path>:: | 279 | --objdump=<path>:: |
270 | Path to objdump binary. | 280 | Path to objdump binary. |
271 | 281 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..39367609c707 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -226,8 +226,9 @@ static int report__setup_sample_type(struct report *rep) | |||
226 | return -EINVAL; | 226 | return -EINVAL; |
227 | } | 227 | } |
228 | if (symbol_conf.use_callchain) { | 228 | if (symbol_conf.use_callchain) { |
229 | ui__error("Selected -g but no callchain data. Did " | 229 | ui__error("Selected -g or --branch-history but no " |
230 | "you call 'perf record' without -g?\n"); | 230 | "callchain data. Did\n" |
231 | "you call 'perf record' without -g?\n"); | ||
231 | return -1; | 232 | return -1; |
232 | } | 233 | } |
233 | } else if (!rep->dont_use_callchains && | 234 | } else if (!rep->dont_use_callchains && |
@@ -575,6 +576,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
575 | struct stat st; | 576 | struct stat st; |
576 | bool has_br_stack = false; | 577 | bool has_br_stack = false; |
577 | int branch_mode = -1; | 578 | int branch_mode = -1; |
579 | bool branch_call_mode = false; | ||
578 | char callchain_default_opt[] = "fractal,0.5,callee"; | 580 | char callchain_default_opt[] = "fractal,0.5,callee"; |
579 | const char * const report_usage[] = { | 581 | const char * const report_usage[] = { |
580 | "perf report [<options>]", | 582 | "perf report [<options>]", |
@@ -637,8 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
637 | "regex filter to identify parent, see: '--sort parent'"), | 639 | "regex filter to identify parent, see: '--sort parent'"), |
638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, | 640 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, |
639 | "Only display entries with parent-match"), | 641 | "Only display entries with parent-match"), |
640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 642 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", |
641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 643 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " |
642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), | 644 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), |
643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, | 645 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, |
644 | "Accumulate callchains of children and show total overhead as well"), | 646 | "Accumulate callchains of children and show total overhead as well"), |
@@ -684,7 +686,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
684 | OPT_BOOLEAN(0, "group", &symbol_conf.event_group, | 686 | OPT_BOOLEAN(0, "group", &symbol_conf.event_group, |
685 | "Show event group information together"), | 687 | "Show event group information together"), |
686 | OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", | 688 | OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", |
687 | "use branch records for histogram filling", parse_branch_mode), | 689 | "use branch records for per branch histogram filling", |
690 | parse_branch_mode), | ||
691 | OPT_BOOLEAN(0, "branch-history", &branch_call_mode, | ||
692 | "add last branch records to call history"), | ||
688 | OPT_STRING(0, "objdump", &objdump_path, "path", | 693 | OPT_STRING(0, "objdump", &objdump_path, "path", |
689 | "objdump binary to use for disassembly and annotations"), | 694 | "objdump binary to use for disassembly and annotations"), |
690 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, | 695 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, |
@@ -745,10 +750,24 @@ repeat: | |||
745 | has_br_stack = perf_header__has_feat(&session->header, | 750 | has_br_stack = perf_header__has_feat(&session->header, |
746 | HEADER_BRANCH_STACK); | 751 | HEADER_BRANCH_STACK); |
747 | 752 | ||
748 | if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) { | 753 | /* |
754 | * Branch mode is a tristate: | ||
755 | * -1 means default, so decide based on the file having branch data. | ||
756 | * 0/1 means the user chose a mode. | ||
757 | */ | ||
758 | if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && | ||
759 | branch_call_mode == -1) { | ||
749 | sort__mode = SORT_MODE__BRANCH; | 760 | sort__mode = SORT_MODE__BRANCH; |
750 | symbol_conf.cumulate_callchain = false; | 761 | symbol_conf.cumulate_callchain = false; |
751 | } | 762 | } |
763 | if (branch_call_mode) { | ||
764 | callchain_param.key = CCKEY_ADDRESS; | ||
765 | callchain_param.branch_callstack = 1; | ||
766 | symbol_conf.use_callchain = true; | ||
767 | callchain_register_param(&callchain_param); | ||
768 | if (sort_order == NULL) | ||
769 | sort_order = "srcline,symbol,dso"; | ||
770 | } | ||
752 | 771 | ||
753 | if (report.mem_mode) { | 772 | if (report.mem_mode) { |
754 | if (sort__mode == SORT_MODE__BRANCH) { | 773 | if (sort__mode == SORT_MODE__BRANCH) { |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 055ce9232c9e..891086376381 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
388 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | 388 | update_stats(&runtime_itlb_cache_stats[0], count[0]); |
389 | } | 389 | } |
390 | 390 | ||
391 | static void zero_per_pkg(struct perf_evsel *counter) | ||
392 | { | ||
393 | if (counter->per_pkg_mask) | ||
394 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
395 | } | ||
396 | |||
397 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | ||
398 | { | ||
399 | unsigned long *mask = counter->per_pkg_mask; | ||
400 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
401 | int s; | ||
402 | |||
403 | *skip = false; | ||
404 | |||
405 | if (!counter->per_pkg) | ||
406 | return 0; | ||
407 | |||
408 | if (cpu_map__empty(cpus)) | ||
409 | return 0; | ||
410 | |||
411 | if (!mask) { | ||
412 | mask = zalloc(MAX_NR_CPUS); | ||
413 | if (!mask) | ||
414 | return -ENOMEM; | ||
415 | |||
416 | counter->per_pkg_mask = mask; | ||
417 | } | ||
418 | |||
419 | s = cpu_map__get_socket(cpus, cpu); | ||
420 | if (s < 0) | ||
421 | return -1; | ||
422 | |||
423 | *skip = test_and_set_bit(s, mask) == 1; | ||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, | ||
428 | struct perf_counts_values *count) | ||
429 | { | ||
430 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
431 | static struct perf_counts_values zero; | ||
432 | bool skip = false; | ||
433 | |||
434 | if (check_per_pkg(evsel, cpu, &skip)) { | ||
435 | pr_err("failed to read per-pkg counter\n"); | ||
436 | return -1; | ||
437 | } | ||
438 | |||
439 | if (skip) | ||
440 | count = &zero; | ||
441 | |||
442 | switch (aggr_mode) { | ||
443 | case AGGR_CORE: | ||
444 | case AGGR_SOCKET: | ||
445 | case AGGR_NONE: | ||
446 | if (!evsel->snapshot) | ||
447 | perf_evsel__compute_deltas(evsel, cpu, count); | ||
448 | perf_counts_values__scale(count, scale, NULL); | ||
449 | evsel->counts->cpu[cpu] = *count; | ||
450 | update_shadow_stats(evsel, count->values); | ||
451 | break; | ||
452 | case AGGR_GLOBAL: | ||
453 | aggr->val += count->val; | ||
454 | if (scale) { | ||
455 | aggr->ena += count->ena; | ||
456 | aggr->run += count->run; | ||
457 | } | ||
458 | default: | ||
459 | break; | ||
460 | } | ||
461 | |||
462 | return 0; | ||
463 | } | ||
464 | |||
465 | static int read_counter(struct perf_evsel *counter); | ||
466 | |||
391 | /* | 467 | /* |
392 | * Read out the results of a single counter: | 468 | * Read out the results of a single counter: |
393 | * aggregate counts across CPUs in system-wide mode | 469 | * aggregate counts across CPUs in system-wide mode |
394 | */ | 470 | */ |
395 | static int read_counter_aggr(struct perf_evsel *counter) | 471 | static int read_counter_aggr(struct perf_evsel *counter) |
396 | { | 472 | { |
473 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
397 | struct perf_stat *ps = counter->priv; | 474 | struct perf_stat *ps = counter->priv; |
398 | u64 *count = counter->counts->aggr.values; | 475 | u64 *count = counter->counts->aggr.values; |
399 | int i; | 476 | int i; |
400 | 477 | ||
401 | if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), | 478 | aggr->val = aggr->ena = aggr->run = 0; |
402 | thread_map__nr(evsel_list->threads), scale) < 0) | 479 | |
480 | if (read_counter(counter)) | ||
403 | return -1; | 481 | return -1; |
404 | 482 | ||
483 | if (!counter->snapshot) | ||
484 | perf_evsel__compute_deltas(counter, -1, aggr); | ||
485 | perf_counts_values__scale(aggr, scale, &counter->counts->scaled); | ||
486 | |||
405 | for (i = 0; i < 3; i++) | 487 | for (i = 0; i < 3; i++) |
406 | update_stats(&ps->res_stats[i], count[i]); | 488 | update_stats(&ps->res_stats[i], count[i]); |
407 | 489 | ||
@@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
424 | */ | 506 | */ |
425 | static int read_counter(struct perf_evsel *counter) | 507 | static int read_counter(struct perf_evsel *counter) |
426 | { | 508 | { |
427 | u64 *count; | 509 | int nthreads = thread_map__nr(evsel_list->threads); |
428 | int cpu; | 510 | int ncpus = perf_evsel__nr_cpus(counter); |
511 | int cpu, thread; | ||
429 | 512 | ||
430 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { | 513 | if (counter->system_wide) |
431 | if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) | 514 | nthreads = 1; |
432 | return -1; | ||
433 | 515 | ||
434 | count = counter->counts->cpu[cpu].values; | 516 | if (counter->per_pkg) |
517 | zero_per_pkg(counter); | ||
435 | 518 | ||
436 | update_shadow_stats(counter, count); | 519 | for (thread = 0; thread < nthreads; thread++) { |
520 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
521 | if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) | ||
522 | return -1; | ||
523 | } | ||
437 | } | 524 | } |
438 | 525 | ||
439 | return 0; | 526 | return 0; |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) | |||
149 | callchain_param.key = CCKEY_ADDRESS; | 149 | callchain_param.key = CCKEY_ADDRESS; |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | if (!strncmp(value, "branch", strlen(value))) { | ||
153 | callchain_param.branch_callstack = 1; | ||
154 | return 0; | ||
155 | } | ||
152 | return -1; | 156 | return -1; |
153 | } | 157 | } |
154 | 158 | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -63,6 +63,7 @@ struct callchain_param { | |||
63 | sort_chain_func_t sort; | 63 | sort_chain_func_t sort; |
64 | enum chain_order order; | 64 | enum chain_order order; |
65 | enum chain_key key; | 65 | enum chain_key key; |
66 | bool branch_callstack; | ||
66 | }; | 67 | }; |
67 | 68 | ||
68 | extern struct callchain_param callchain_param; | 69 | extern struct callchain_param callchain_param; |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2d26b7ad6fe0..1e90c8557ede 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | |||
954 | return 0; | 954 | return 0; |
955 | } | 955 | } |
956 | 956 | ||
957 | int __perf_evsel__read(struct perf_evsel *evsel, | ||
958 | int ncpus, int nthreads, bool scale) | ||
959 | { | ||
960 | size_t nv = scale ? 3 : 1; | ||
961 | int cpu, thread; | ||
962 | struct perf_counts_values *aggr = &evsel->counts->aggr, count; | ||
963 | |||
964 | if (evsel->system_wide) | ||
965 | nthreads = 1; | ||
966 | |||
967 | aggr->val = aggr->ena = aggr->run = 0; | ||
968 | |||
969 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
970 | for (thread = 0; thread < nthreads; thread++) { | ||
971 | if (FD(evsel, cpu, thread) < 0) | ||
972 | continue; | ||
973 | |||
974 | if (readn(FD(evsel, cpu, thread), | ||
975 | &count, nv * sizeof(u64)) < 0) | ||
976 | return -errno; | ||
977 | |||
978 | aggr->val += count.val; | ||
979 | if (scale) { | ||
980 | aggr->ena += count.ena; | ||
981 | aggr->run += count.run; | ||
982 | } | ||
983 | } | ||
984 | } | ||
985 | |||
986 | perf_evsel__compute_deltas(evsel, -1, aggr); | ||
987 | perf_counts_values__scale(aggr, scale, &evsel->counts->scaled); | ||
988 | return 0; | ||
989 | } | ||
990 | |||
991 | static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) | 957 | static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) |
992 | { | 958 | { |
993 | struct perf_evsel *leader = evsel->leader; | 959 | struct perf_evsel *leader = evsel->leader; |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b18d58da580b..38622747d130 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -93,6 +93,7 @@ struct perf_evsel { | |||
93 | bool system_wide; | 93 | bool system_wide; |
94 | bool tracking; | 94 | bool tracking; |
95 | bool per_pkg; | 95 | bool per_pkg; |
96 | unsigned long *per_pkg_mask; | ||
96 | /* parse modifier helper */ | 97 | /* parse modifier helper */ |
97 | int exclude_GH; | 98 | int exclude_GH; |
98 | int nr_members; | 99 | int nr_members; |
@@ -271,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, | |||
271 | return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); | 272 | return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); |
272 | } | 273 | } |
273 | 274 | ||
274 | int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, | ||
275 | bool scale); | ||
276 | |||
277 | /** | ||
278 | * perf_evsel__read - Read the aggregate results on all CPUs | ||
279 | * | ||
280 | * @evsel - event selector to read value | ||
281 | * @ncpus - Number of cpus affected, from zero | ||
282 | * @nthreads - Number of threads affected, from zero | ||
283 | */ | ||
284 | static inline int perf_evsel__read(struct perf_evsel *evsel, | ||
285 | int ncpus, int nthreads) | ||
286 | { | ||
287 | return __perf_evsel__read(evsel, ncpus, nthreads, false); | ||
288 | } | ||
289 | |||
290 | /** | ||
291 | * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled | ||
292 | * | ||
293 | * @evsel - event selector to read value | ||
294 | * @ncpus - Number of cpus affected, from zero | ||
295 | * @nthreads - Number of threads affected, from zero | ||
296 | */ | ||
297 | static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, | ||
298 | int ncpus, int nthreads) | ||
299 | { | ||
300 | return __perf_evsel__read(evsel, ncpus, nthreads, true); | ||
301 | } | ||
302 | |||
303 | int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, | 275 | int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, |
304 | struct perf_sample *sample); | 276 | struct perf_sample *sample); |
305 | 277 | ||
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <stdbool.h> | 12 | #include <stdbool.h> |
13 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
14 | #include "unwind.h" | 14 | #include "unwind.h" |
15 | #include "linux/hash.h" | ||
15 | 16 | ||
16 | static void dsos__init(struct dsos *dsos) | 17 | static void dsos__init(struct dsos *dsos) |
17 | { | 18 | { |
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, | |||
1391 | 1392 | ||
1392 | al.filtered = 0; | 1393 | al.filtered = 0; |
1393 | al.sym = NULL; | 1394 | al.sym = NULL; |
1394 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | 1395 | if (cpumode == -1) |
1396 | thread__find_cpumode_addr_location(thread, MAP__FUNCTION, | ||
1397 | ip, &al); | ||
1398 | else | ||
1399 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | ||
1395 | ip, &al); | 1400 | ip, &al); |
1396 | if (al.sym != NULL) { | 1401 | if (al.sym != NULL) { |
1397 | if (sort__has_parent && !*parent && | 1402 | if (sort__has_parent && !*parent && |
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
1427 | return bi; | 1432 | return bi; |
1428 | } | 1433 | } |
1429 | 1434 | ||
1435 | #define CHASHSZ 127 | ||
1436 | #define CHASHBITS 7 | ||
1437 | #define NO_ENTRY 0xff | ||
1438 | |||
1439 | #define PERF_MAX_BRANCH_DEPTH 127 | ||
1440 | |||
1441 | /* Remove loops. */ | ||
1442 | static int remove_loops(struct branch_entry *l, int nr) | ||
1443 | { | ||
1444 | int i, j, off; | ||
1445 | unsigned char chash[CHASHSZ]; | ||
1446 | |||
1447 | memset(chash, NO_ENTRY, sizeof(chash)); | ||
1448 | |||
1449 | BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); | ||
1450 | |||
1451 | for (i = 0; i < nr; i++) { | ||
1452 | int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; | ||
1453 | |||
1454 | /* no collision handling for now */ | ||
1455 | if (chash[h] == NO_ENTRY) { | ||
1456 | chash[h] = i; | ||
1457 | } else if (l[chash[h]].from == l[i].from) { | ||
1458 | bool is_loop = true; | ||
1459 | /* check if it is a real loop */ | ||
1460 | off = 0; | ||
1461 | for (j = chash[h]; j < i && i + off < nr; j++, off++) | ||
1462 | if (l[j].from != l[i + off].from) { | ||
1463 | is_loop = false; | ||
1464 | break; | ||
1465 | } | ||
1466 | if (is_loop) { | ||
1467 | memmove(l + i, l + i + off, | ||
1468 | (nr - (i + off)) * sizeof(*l)); | ||
1469 | nr -= off; | ||
1470 | } | ||
1471 | } | ||
1472 | } | ||
1473 | return nr; | ||
1474 | } | ||
1475 | |||
1430 | static int thread__resolve_callchain_sample(struct thread *thread, | 1476 | static int thread__resolve_callchain_sample(struct thread *thread, |
1431 | struct ip_callchain *chain, | 1477 | struct ip_callchain *chain, |
1478 | struct branch_stack *branch, | ||
1432 | struct symbol **parent, | 1479 | struct symbol **parent, |
1433 | struct addr_location *root_al, | 1480 | struct addr_location *root_al, |
1434 | int max_stack) | 1481 | int max_stack) |
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1438 | int i; | 1485 | int i; |
1439 | int j; | 1486 | int j; |
1440 | int err; | 1487 | int err; |
1441 | int skip_idx __maybe_unused; | 1488 | int skip_idx = -1; |
1489 | int first_call = 0; | ||
1490 | |||
1491 | /* | ||
1492 | * Based on DWARF debug information, some architectures skip | ||
1493 | * a callchain entry saved by the kernel. | ||
1494 | */ | ||
1495 | if (chain->nr < PERF_MAX_STACK_DEPTH) | ||
1496 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1442 | 1497 | ||
1443 | callchain_cursor_reset(&callchain_cursor); | 1498 | callchain_cursor_reset(&callchain_cursor); |
1444 | 1499 | ||
1500 | /* | ||
1501 | * Add branches to call stack for easier browsing. This gives | ||
1502 | * more context for a sample than just the callers. | ||
1503 | * | ||
1504 | * This uses individual histograms of paths compared to the | ||
1505 | * aggregated histograms the normal LBR mode uses. | ||
1506 | * | ||
1507 | * Limitations for now: | ||
1508 | * - No extra filters | ||
1509 | * - No annotations (should annotate somehow) | ||
1510 | */ | ||
1511 | |||
1512 | if (branch && callchain_param.branch_callstack) { | ||
1513 | int nr = min(max_stack, (int)branch->nr); | ||
1514 | struct branch_entry be[nr]; | ||
1515 | |||
1516 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | ||
1517 | pr_warning("corrupted branch chain. skipping...\n"); | ||
1518 | goto check_calls; | ||
1519 | } | ||
1520 | |||
1521 | for (i = 0; i < nr; i++) { | ||
1522 | if (callchain_param.order == ORDER_CALLEE) { | ||
1523 | be[i] = branch->entries[i]; | ||
1524 | /* | ||
1525 | * Check for overlap into the callchain. | ||
1526 | * The return address is one off compared to | ||
1527 | * the branch entry. To adjust for this | ||
1528 | * assume the calling instruction is not longer | ||
1529 | * than 8 bytes. | ||
1530 | */ | ||
1531 | if (i == skip_idx || | ||
1532 | chain->ips[first_call] >= PERF_CONTEXT_MAX) | ||
1533 | first_call++; | ||
1534 | else if (be[i].from < chain->ips[first_call] && | ||
1535 | be[i].from >= chain->ips[first_call] - 8) | ||
1536 | first_call++; | ||
1537 | } else | ||
1538 | be[i] = branch->entries[branch->nr - i - 1]; | ||
1539 | } | ||
1540 | |||
1541 | nr = remove_loops(be, nr); | ||
1542 | |||
1543 | for (i = 0; i < nr; i++) { | ||
1544 | err = add_callchain_ip(thread, parent, root_al, | ||
1545 | -1, be[i].to); | ||
1546 | if (!err) | ||
1547 | err = add_callchain_ip(thread, parent, root_al, | ||
1548 | -1, be[i].from); | ||
1549 | if (err == -EINVAL) | ||
1550 | break; | ||
1551 | if (err) | ||
1552 | return err; | ||
1553 | } | ||
1554 | chain_nr -= nr; | ||
1555 | } | ||
1556 | |||
1557 | check_calls: | ||
1445 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | 1558 | if (chain->nr > PERF_MAX_STACK_DEPTH) { |
1446 | pr_warning("corrupted callchain. skipping...\n"); | 1559 | pr_warning("corrupted callchain. skipping...\n"); |
1447 | return 0; | 1560 | return 0; |
1448 | } | 1561 | } |
1449 | 1562 | ||
1450 | /* | 1563 | for (i = first_call; i < chain_nr; i++) { |
1451 | * Based on DWARF debug information, some architectures skip | ||
1452 | * a callchain entry saved by the kernel. | ||
1453 | */ | ||
1454 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1455 | |||
1456 | for (i = 0; i < chain_nr; i++) { | ||
1457 | u64 ip; | 1564 | u64 ip; |
1458 | 1565 | ||
1459 | if (callchain_param.order == ORDER_CALLEE) | 1566 | if (callchain_param.order == ORDER_CALLEE) |
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, | |||
1517 | int max_stack) | 1624 | int max_stack) |
1518 | { | 1625 | { |
1519 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, | 1626 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, |
1627 | sample->branch_stack, | ||
1520 | parent, root_al, max_stack); | 1628 | parent, root_al, max_stack); |
1521 | if (ret) | 1629 | if (ret) |
1522 | return ret; | 1630 | return ret; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -102,7 +102,8 @@ struct symbol_conf { | |||
102 | demangle, | 102 | demangle, |
103 | demangle_kernel, | 103 | demangle_kernel, |
104 | filter_relative, | 104 | filter_relative, |
105 | show_hist_headers; | 105 | show_hist_headers, |
106 | branch_callstack; | ||
106 | const char *vmlinux_name, | 107 | const char *vmlinux_name, |
107 | *kallsyms_name, | 108 | *kallsyms_name, |
108 | *source_prefix, | 109 | *source_prefix, |