diff options
| -rw-r--r-- | tools/perf/Documentation/perf-report.txt | 12 | ||||
| -rw-r--r-- | tools/perf/builtin-report.c | 31 | ||||
| -rw-r--r-- | tools/perf/builtin-stat.c | 105 | ||||
| -rw-r--r-- | tools/perf/util/callchain.c | 4 | ||||
| -rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
| -rw-r--r-- | tools/perf/util/evsel.c | 34 | ||||
| -rw-r--r-- | tools/perf/util/evsel.h | 30 | ||||
| -rw-r--r-- | tools/perf/util/machine.c | 126 | ||||
| -rw-r--r-- | tools/perf/util/symbol.h | 3 |
9 files changed, 257 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..dd7cccdde498 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
| @@ -159,7 +159,7 @@ OPTIONS | |||
| 159 | --dump-raw-trace:: | 159 | --dump-raw-trace:: |
| 160 | Dump raw trace in ASCII. | 160 | Dump raw trace in ASCII. |
| 161 | 161 | ||
| 162 | -g [type,min[,limit],order[,key]]:: | 162 | -g [type,min[,limit],order[,key][,branch]]:: |
| 163 | --call-graph:: | 163 | --call-graph:: |
| 164 | Display call chains using type, min percent threshold, optional print | 164 | Display call chains using type, min percent threshold, optional print |
| 165 | limit and order. | 165 | limit and order. |
| @@ -177,6 +177,11 @@ OPTIONS | |||
| 177 | - function: compare on functions | 177 | - function: compare on functions |
| 178 | - address: compare on individual code addresses | 178 | - address: compare on individual code addresses |
| 179 | 179 | ||
| 180 | branch can be: | ||
| 181 | - branch: include last branch information in callgraph | ||
| 182 | when available. Usually more convenient to use --branch-history | ||
| 183 | for this. | ||
| 184 | |||
| 180 | Default: fractal,0.5,callee,function. | 185 | Default: fractal,0.5,callee,function. |
| 181 | 186 | ||
| 182 | --children:: | 187 | --children:: |
| @@ -266,6 +271,11 @@ OPTIONS | |||
| 266 | branch stacks and it will automatically switch to the branch view mode, | 271 | branch stacks and it will automatically switch to the branch view mode, |
| 267 | unless --no-branch-stack is used. | 272 | unless --no-branch-stack is used. |
| 268 | 273 | ||
| 274 | --branch-history:: | ||
| 275 | Add the addresses of sampled taken branches to the callstack. | ||
| 276 | This allows to examine the path the program took to each sample. | ||
| 277 | The data collection must have used -b (or -j) and -g. | ||
| 278 | |||
| 269 | --objdump=<path>:: | 279 | --objdump=<path>:: |
| 270 | Path to objdump binary. | 280 | Path to objdump binary. |
| 271 | 281 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..39367609c707 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -226,8 +226,9 @@ static int report__setup_sample_type(struct report *rep) | |||
| 226 | return -EINVAL; | 226 | return -EINVAL; |
| 227 | } | 227 | } |
| 228 | if (symbol_conf.use_callchain) { | 228 | if (symbol_conf.use_callchain) { |
| 229 | ui__error("Selected -g but no callchain data. Did " | 229 | ui__error("Selected -g or --branch-history but no " |
| 230 | "you call 'perf record' without -g?\n"); | 230 | "callchain data. Did\n" |
| 231 | "you call 'perf record' without -g?\n"); | ||
| 231 | return -1; | 232 | return -1; |
| 232 | } | 233 | } |
| 233 | } else if (!rep->dont_use_callchains && | 234 | } else if (!rep->dont_use_callchains && |
| @@ -575,6 +576,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 575 | struct stat st; | 576 | struct stat st; |
| 576 | bool has_br_stack = false; | 577 | bool has_br_stack = false; |
| 577 | int branch_mode = -1; | 578 | int branch_mode = -1; |
| 579 | bool branch_call_mode = false; | ||
| 578 | char callchain_default_opt[] = "fractal,0.5,callee"; | 580 | char callchain_default_opt[] = "fractal,0.5,callee"; |
| 579 | const char * const report_usage[] = { | 581 | const char * const report_usage[] = { |
| 580 | "perf report [<options>]", | 582 | "perf report [<options>]", |
| @@ -637,8 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 637 | "regex filter to identify parent, see: '--sort parent'"), | 639 | "regex filter to identify parent, see: '--sort parent'"), |
| 638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, | 640 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, |
| 639 | "Only display entries with parent-match"), | 641 | "Only display entries with parent-match"), |
| 640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 642 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", |
| 641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 643 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " |
| 642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), | 644 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), |
| 643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, | 645 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, |
| 644 | "Accumulate callchains of children and show total overhead as well"), | 646 | "Accumulate callchains of children and show total overhead as well"), |
| @@ -684,7 +686,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 684 | OPT_BOOLEAN(0, "group", &symbol_conf.event_group, | 686 | OPT_BOOLEAN(0, "group", &symbol_conf.event_group, |
| 685 | "Show event group information together"), | 687 | "Show event group information together"), |
| 686 | OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", | 688 | OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", |
| 687 | "use branch records for histogram filling", parse_branch_mode), | 689 | "use branch records for per branch histogram filling", |
| 690 | parse_branch_mode), | ||
| 691 | OPT_BOOLEAN(0, "branch-history", &branch_call_mode, | ||
| 692 | "add last branch records to call history"), | ||
| 688 | OPT_STRING(0, "objdump", &objdump_path, "path", | 693 | OPT_STRING(0, "objdump", &objdump_path, "path", |
| 689 | "objdump binary to use for disassembly and annotations"), | 694 | "objdump binary to use for disassembly and annotations"), |
| 690 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, | 695 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, |
| @@ -745,10 +750,24 @@ repeat: | |||
| 745 | has_br_stack = perf_header__has_feat(&session->header, | 750 | has_br_stack = perf_header__has_feat(&session->header, |
| 746 | HEADER_BRANCH_STACK); | 751 | HEADER_BRANCH_STACK); |
| 747 | 752 | ||
| 748 | if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) { | 753 | /* |
| 754 | * Branch mode is a tristate: | ||
| 755 | * -1 means default, so decide based on the file having branch data. | ||
| 756 | * 0/1 means the user chose a mode. | ||
| 757 | */ | ||
| 758 | if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && | ||
| 759 | branch_call_mode == -1) { | ||
| 749 | sort__mode = SORT_MODE__BRANCH; | 760 | sort__mode = SORT_MODE__BRANCH; |
| 750 | symbol_conf.cumulate_callchain = false; | 761 | symbol_conf.cumulate_callchain = false; |
| 751 | } | 762 | } |
| 763 | if (branch_call_mode) { | ||
| 764 | callchain_param.key = CCKEY_ADDRESS; | ||
| 765 | callchain_param.branch_callstack = 1; | ||
| 766 | symbol_conf.use_callchain = true; | ||
| 767 | callchain_register_param(&callchain_param); | ||
| 768 | if (sort_order == NULL) | ||
| 769 | sort_order = "srcline,symbol,dso"; | ||
| 770 | } | ||
| 752 | 771 | ||
| 753 | if (report.mem_mode) { | 772 | if (report.mem_mode) { |
| 754 | if (sort__mode == SORT_MODE__BRANCH) { | 773 | if (sort__mode == SORT_MODE__BRANCH) { |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 055ce9232c9e..891086376381 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) | |||
| 388 | update_stats(&runtime_itlb_cache_stats[0], count[0]); | 388 | update_stats(&runtime_itlb_cache_stats[0], count[0]); |
| 389 | } | 389 | } |
| 390 | 390 | ||
| 391 | static void zero_per_pkg(struct perf_evsel *counter) | ||
| 392 | { | ||
| 393 | if (counter->per_pkg_mask) | ||
| 394 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
| 395 | } | ||
| 396 | |||
| 397 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | ||
| 398 | { | ||
| 399 | unsigned long *mask = counter->per_pkg_mask; | ||
| 400 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
| 401 | int s; | ||
| 402 | |||
| 403 | *skip = false; | ||
| 404 | |||
| 405 | if (!counter->per_pkg) | ||
| 406 | return 0; | ||
| 407 | |||
| 408 | if (cpu_map__empty(cpus)) | ||
| 409 | return 0; | ||
| 410 | |||
| 411 | if (!mask) { | ||
| 412 | mask = zalloc(MAX_NR_CPUS); | ||
| 413 | if (!mask) | ||
| 414 | return -ENOMEM; | ||
| 415 | |||
| 416 | counter->per_pkg_mask = mask; | ||
| 417 | } | ||
| 418 | |||
| 419 | s = cpu_map__get_socket(cpus, cpu); | ||
| 420 | if (s < 0) | ||
| 421 | return -1; | ||
| 422 | |||
| 423 | *skip = test_and_set_bit(s, mask) == 1; | ||
| 424 | return 0; | ||
| 425 | } | ||
| 426 | |||
| 427 | static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, | ||
| 428 | struct perf_counts_values *count) | ||
| 429 | { | ||
| 430 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
| 431 | static struct perf_counts_values zero; | ||
| 432 | bool skip = false; | ||
| 433 | |||
| 434 | if (check_per_pkg(evsel, cpu, &skip)) { | ||
| 435 | pr_err("failed to read per-pkg counter\n"); | ||
| 436 | return -1; | ||
| 437 | } | ||
| 438 | |||
| 439 | if (skip) | ||
| 440 | count = &zero; | ||
| 441 | |||
| 442 | switch (aggr_mode) { | ||
| 443 | case AGGR_CORE: | ||
| 444 | case AGGR_SOCKET: | ||
| 445 | case AGGR_NONE: | ||
| 446 | if (!evsel->snapshot) | ||
| 447 | perf_evsel__compute_deltas(evsel, cpu, count); | ||
| 448 | perf_counts_values__scale(count, scale, NULL); | ||
| 449 | evsel->counts->cpu[cpu] = *count; | ||
| 450 | update_shadow_stats(evsel, count->values); | ||
| 451 | break; | ||
| 452 | case AGGR_GLOBAL: | ||
| 453 | aggr->val += count->val; | ||
| 454 | if (scale) { | ||
| 455 | aggr->ena += count->ena; | ||
| 456 | aggr->run += count->run; | ||
| 457 | } | ||
| 458 | default: | ||
| 459 | break; | ||
| 460 | } | ||
| 461 | |||
| 462 | return 0; | ||
| 463 | } | ||
| 464 | |||
| 465 | static int read_counter(struct perf_evsel *counter); | ||
| 466 | |||
| 391 | /* | 467 | /* |
| 392 | * Read out the results of a single counter: | 468 | * Read out the results of a single counter: |
| 393 | * aggregate counts across CPUs in system-wide mode | 469 | * aggregate counts across CPUs in system-wide mode |
| 394 | */ | 470 | */ |
| 395 | static int read_counter_aggr(struct perf_evsel *counter) | 471 | static int read_counter_aggr(struct perf_evsel *counter) |
| 396 | { | 472 | { |
| 473 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
| 397 | struct perf_stat *ps = counter->priv; | 474 | struct perf_stat *ps = counter->priv; |
| 398 | u64 *count = counter->counts->aggr.values; | 475 | u64 *count = counter->counts->aggr.values; |
| 399 | int i; | 476 | int i; |
| 400 | 477 | ||
| 401 | if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), | 478 | aggr->val = aggr->ena = aggr->run = 0; |
| 402 | thread_map__nr(evsel_list->threads), scale) < 0) | 479 | |
| 480 | if (read_counter(counter)) | ||
| 403 | return -1; | 481 | return -1; |
| 404 | 482 | ||
| 483 | if (!counter->snapshot) | ||
| 484 | perf_evsel__compute_deltas(counter, -1, aggr); | ||
| 485 | perf_counts_values__scale(aggr, scale, &counter->counts->scaled); | ||
| 486 | |||
| 405 | for (i = 0; i < 3; i++) | 487 | for (i = 0; i < 3; i++) |
| 406 | update_stats(&ps->res_stats[i], count[i]); | 488 | update_stats(&ps->res_stats[i], count[i]); |
| 407 | 489 | ||
| @@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter) | |||
| 424 | */ | 506 | */ |
| 425 | static int read_counter(struct perf_evsel *counter) | 507 | static int read_counter(struct perf_evsel *counter) |
| 426 | { | 508 | { |
| 427 | u64 *count; | 509 | int nthreads = thread_map__nr(evsel_list->threads); |
| 428 | int cpu; | 510 | int ncpus = perf_evsel__nr_cpus(counter); |
| 511 | int cpu, thread; | ||
| 429 | 512 | ||
| 430 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { | 513 | if (counter->system_wide) |
| 431 | if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) | 514 | nthreads = 1; |
| 432 | return -1; | ||
| 433 | 515 | ||
| 434 | count = counter->counts->cpu[cpu].values; | 516 | if (counter->per_pkg) |
| 517 | zero_per_pkg(counter); | ||
| 435 | 518 | ||
| 436 | update_shadow_stats(counter, count); | 519 | for (thread = 0; thread < nthreads; thread++) { |
| 520 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
| 521 | if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) | ||
| 522 | return -1; | ||
| 523 | } | ||
| 437 | } | 524 | } |
| 438 | 525 | ||
| 439 | return 0; | 526 | return 0; |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
| @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) | |||
| 149 | callchain_param.key = CCKEY_ADDRESS; | 149 | callchain_param.key = CCKEY_ADDRESS; |
| 150 | return 0; | 150 | return 0; |
| 151 | } | 151 | } |
| 152 | if (!strncmp(value, "branch", strlen(value))) { | ||
| 153 | callchain_param.branch_callstack = 1; | ||
| 154 | return 0; | ||
| 155 | } | ||
| 152 | return -1; | 156 | return -1; |
| 153 | } | 157 | } |
| 154 | 158 | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
| @@ -63,6 +63,7 @@ struct callchain_param { | |||
| 63 | sort_chain_func_t sort; | 63 | sort_chain_func_t sort; |
| 64 | enum chain_order order; | 64 | enum chain_order order; |
| 65 | enum chain_key key; | 65 | enum chain_key key; |
| 66 | bool branch_callstack; | ||
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | extern struct callchain_param callchain_param; | 69 | extern struct callchain_param callchain_param; |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2d26b7ad6fe0..1e90c8557ede 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
| @@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, | |||
| 954 | return 0; | 954 | return 0; |
| 955 | } | 955 | } |
| 956 | 956 | ||
| 957 | int __perf_evsel__read(struct perf_evsel *evsel, | ||
| 958 | int ncpus, int nthreads, bool scale) | ||
| 959 | { | ||
| 960 | size_t nv = scale ? 3 : 1; | ||
| 961 | int cpu, thread; | ||
| 962 | struct perf_counts_values *aggr = &evsel->counts->aggr, count; | ||
| 963 | |||
| 964 | if (evsel->system_wide) | ||
| 965 | nthreads = 1; | ||
| 966 | |||
| 967 | aggr->val = aggr->ena = aggr->run = 0; | ||
| 968 | |||
| 969 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
| 970 | for (thread = 0; thread < nthreads; thread++) { | ||
| 971 | if (FD(evsel, cpu, thread) < 0) | ||
| 972 | continue; | ||
| 973 | |||
| 974 | if (readn(FD(evsel, cpu, thread), | ||
| 975 | &count, nv * sizeof(u64)) < 0) | ||
| 976 | return -errno; | ||
| 977 | |||
| 978 | aggr->val += count.val; | ||
| 979 | if (scale) { | ||
| 980 | aggr->ena += count.ena; | ||
| 981 | aggr->run += count.run; | ||
| 982 | } | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | perf_evsel__compute_deltas(evsel, -1, aggr); | ||
| 987 | perf_counts_values__scale(aggr, scale, &evsel->counts->scaled); | ||
| 988 | return 0; | ||
| 989 | } | ||
| 990 | |||
| 991 | static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) | 957 | static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) |
| 992 | { | 958 | { |
| 993 | struct perf_evsel *leader = evsel->leader; | 959 | struct perf_evsel *leader = evsel->leader; |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b18d58da580b..38622747d130 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
| @@ -93,6 +93,7 @@ struct perf_evsel { | |||
| 93 | bool system_wide; | 93 | bool system_wide; |
| 94 | bool tracking; | 94 | bool tracking; |
| 95 | bool per_pkg; | 95 | bool per_pkg; |
| 96 | unsigned long *per_pkg_mask; | ||
| 96 | /* parse modifier helper */ | 97 | /* parse modifier helper */ |
| 97 | int exclude_GH; | 98 | int exclude_GH; |
| 98 | int nr_members; | 99 | int nr_members; |
| @@ -271,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, | |||
| 271 | return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); | 272 | return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); |
| 272 | } | 273 | } |
| 273 | 274 | ||
| 274 | int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, | ||
| 275 | bool scale); | ||
| 276 | |||
| 277 | /** | ||
| 278 | * perf_evsel__read - Read the aggregate results on all CPUs | ||
| 279 | * | ||
| 280 | * @evsel - event selector to read value | ||
| 281 | * @ncpus - Number of cpus affected, from zero | ||
| 282 | * @nthreads - Number of threads affected, from zero | ||
| 283 | */ | ||
| 284 | static inline int perf_evsel__read(struct perf_evsel *evsel, | ||
| 285 | int ncpus, int nthreads) | ||
| 286 | { | ||
| 287 | return __perf_evsel__read(evsel, ncpus, nthreads, false); | ||
| 288 | } | ||
| 289 | |||
| 290 | /** | ||
| 291 | * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled | ||
| 292 | * | ||
| 293 | * @evsel - event selector to read value | ||
| 294 | * @ncpus - Number of cpus affected, from zero | ||
| 295 | * @nthreads - Number of threads affected, from zero | ||
| 296 | */ | ||
| 297 | static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, | ||
| 298 | int ncpus, int nthreads) | ||
| 299 | { | ||
| 300 | return __perf_evsel__read(evsel, ncpus, nthreads, true); | ||
| 301 | } | ||
| 302 | |||
| 303 | int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, | 275 | int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, |
| 304 | struct perf_sample *sample); | 276 | struct perf_sample *sample); |
| 305 | 277 | ||
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <stdbool.h> | 12 | #include <stdbool.h> |
| 13 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
| 14 | #include "unwind.h" | 14 | #include "unwind.h" |
| 15 | #include "linux/hash.h" | ||
| 15 | 16 | ||
| 16 | static void dsos__init(struct dsos *dsos) | 17 | static void dsos__init(struct dsos *dsos) |
| 17 | { | 18 | { |
| @@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, | |||
| 1391 | 1392 | ||
| 1392 | al.filtered = 0; | 1393 | al.filtered = 0; |
| 1393 | al.sym = NULL; | 1394 | al.sym = NULL; |
| 1394 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | 1395 | if (cpumode == -1) |
| 1396 | thread__find_cpumode_addr_location(thread, MAP__FUNCTION, | ||
| 1397 | ip, &al); | ||
| 1398 | else | ||
| 1399 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | ||
| 1395 | ip, &al); | 1400 | ip, &al); |
| 1396 | if (al.sym != NULL) { | 1401 | if (al.sym != NULL) { |
| 1397 | if (sort__has_parent && !*parent && | 1402 | if (sort__has_parent && !*parent && |
| @@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
| 1427 | return bi; | 1432 | return bi; |
| 1428 | } | 1433 | } |
| 1429 | 1434 | ||
| 1435 | #define CHASHSZ 127 | ||
| 1436 | #define CHASHBITS 7 | ||
| 1437 | #define NO_ENTRY 0xff | ||
| 1438 | |||
| 1439 | #define PERF_MAX_BRANCH_DEPTH 127 | ||
| 1440 | |||
| 1441 | /* Remove loops. */ | ||
| 1442 | static int remove_loops(struct branch_entry *l, int nr) | ||
| 1443 | { | ||
| 1444 | int i, j, off; | ||
| 1445 | unsigned char chash[CHASHSZ]; | ||
| 1446 | |||
| 1447 | memset(chash, NO_ENTRY, sizeof(chash)); | ||
| 1448 | |||
| 1449 | BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); | ||
| 1450 | |||
| 1451 | for (i = 0; i < nr; i++) { | ||
| 1452 | int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; | ||
| 1453 | |||
| 1454 | /* no collision handling for now */ | ||
| 1455 | if (chash[h] == NO_ENTRY) { | ||
| 1456 | chash[h] = i; | ||
| 1457 | } else if (l[chash[h]].from == l[i].from) { | ||
| 1458 | bool is_loop = true; | ||
| 1459 | /* check if it is a real loop */ | ||
| 1460 | off = 0; | ||
| 1461 | for (j = chash[h]; j < i && i + off < nr; j++, off++) | ||
| 1462 | if (l[j].from != l[i + off].from) { | ||
| 1463 | is_loop = false; | ||
| 1464 | break; | ||
| 1465 | } | ||
| 1466 | if (is_loop) { | ||
| 1467 | memmove(l + i, l + i + off, | ||
| 1468 | (nr - (i + off)) * sizeof(*l)); | ||
| 1469 | nr -= off; | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | } | ||
| 1473 | return nr; | ||
| 1474 | } | ||
| 1475 | |||
| 1430 | static int thread__resolve_callchain_sample(struct thread *thread, | 1476 | static int thread__resolve_callchain_sample(struct thread *thread, |
| 1431 | struct ip_callchain *chain, | 1477 | struct ip_callchain *chain, |
| 1478 | struct branch_stack *branch, | ||
| 1432 | struct symbol **parent, | 1479 | struct symbol **parent, |
| 1433 | struct addr_location *root_al, | 1480 | struct addr_location *root_al, |
| 1434 | int max_stack) | 1481 | int max_stack) |
| @@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
| 1438 | int i; | 1485 | int i; |
| 1439 | int j; | 1486 | int j; |
| 1440 | int err; | 1487 | int err; |
| 1441 | int skip_idx __maybe_unused; | 1488 | int skip_idx = -1; |
| 1489 | int first_call = 0; | ||
| 1490 | |||
| 1491 | /* | ||
| 1492 | * Based on DWARF debug information, some architectures skip | ||
| 1493 | * a callchain entry saved by the kernel. | ||
| 1494 | */ | ||
| 1495 | if (chain->nr < PERF_MAX_STACK_DEPTH) | ||
| 1496 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
| 1442 | 1497 | ||
| 1443 | callchain_cursor_reset(&callchain_cursor); | 1498 | callchain_cursor_reset(&callchain_cursor); |
| 1444 | 1499 | ||
| 1500 | /* | ||
| 1501 | * Add branches to call stack for easier browsing. This gives | ||
| 1502 | * more context for a sample than just the callers. | ||
| 1503 | * | ||
| 1504 | * This uses individual histograms of paths compared to the | ||
| 1505 | * aggregated histograms the normal LBR mode uses. | ||
| 1506 | * | ||
| 1507 | * Limitations for now: | ||
| 1508 | * - No extra filters | ||
| 1509 | * - No annotations (should annotate somehow) | ||
| 1510 | */ | ||
| 1511 | |||
| 1512 | if (branch && callchain_param.branch_callstack) { | ||
| 1513 | int nr = min(max_stack, (int)branch->nr); | ||
| 1514 | struct branch_entry be[nr]; | ||
| 1515 | |||
| 1516 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | ||
| 1517 | pr_warning("corrupted branch chain. skipping...\n"); | ||
| 1518 | goto check_calls; | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | for (i = 0; i < nr; i++) { | ||
| 1522 | if (callchain_param.order == ORDER_CALLEE) { | ||
| 1523 | be[i] = branch->entries[i]; | ||
| 1524 | /* | ||
| 1525 | * Check for overlap into the callchain. | ||
| 1526 | * The return address is one off compared to | ||
| 1527 | * the branch entry. To adjust for this | ||
| 1528 | * assume the calling instruction is not longer | ||
| 1529 | * than 8 bytes. | ||
| 1530 | */ | ||
| 1531 | if (i == skip_idx || | ||
| 1532 | chain->ips[first_call] >= PERF_CONTEXT_MAX) | ||
| 1533 | first_call++; | ||
| 1534 | else if (be[i].from < chain->ips[first_call] && | ||
| 1535 | be[i].from >= chain->ips[first_call] - 8) | ||
| 1536 | first_call++; | ||
| 1537 | } else | ||
| 1538 | be[i] = branch->entries[branch->nr - i - 1]; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | nr = remove_loops(be, nr); | ||
| 1542 | |||
| 1543 | for (i = 0; i < nr; i++) { | ||
| 1544 | err = add_callchain_ip(thread, parent, root_al, | ||
| 1545 | -1, be[i].to); | ||
| 1546 | if (!err) | ||
| 1547 | err = add_callchain_ip(thread, parent, root_al, | ||
| 1548 | -1, be[i].from); | ||
| 1549 | if (err == -EINVAL) | ||
| 1550 | break; | ||
| 1551 | if (err) | ||
| 1552 | return err; | ||
| 1553 | } | ||
| 1554 | chain_nr -= nr; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | check_calls: | ||
| 1445 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | 1558 | if (chain->nr > PERF_MAX_STACK_DEPTH) { |
| 1446 | pr_warning("corrupted callchain. skipping...\n"); | 1559 | pr_warning("corrupted callchain. skipping...\n"); |
| 1447 | return 0; | 1560 | return 0; |
| 1448 | } | 1561 | } |
| 1449 | 1562 | ||
| 1450 | /* | 1563 | for (i = first_call; i < chain_nr; i++) { |
| 1451 | * Based on DWARF debug information, some architectures skip | ||
| 1452 | * a callchain entry saved by the kernel. | ||
| 1453 | */ | ||
| 1454 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
| 1455 | |||
| 1456 | for (i = 0; i < chain_nr; i++) { | ||
| 1457 | u64 ip; | 1564 | u64 ip; |
| 1458 | 1565 | ||
| 1459 | if (callchain_param.order == ORDER_CALLEE) | 1566 | if (callchain_param.order == ORDER_CALLEE) |
| @@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, | |||
| 1517 | int max_stack) | 1624 | int max_stack) |
| 1518 | { | 1625 | { |
| 1519 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, | 1626 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, |
| 1627 | sample->branch_stack, | ||
| 1520 | parent, root_al, max_stack); | 1628 | parent, root_al, max_stack); |
| 1521 | if (ret) | 1629 | if (ret) |
| 1522 | return ret; | 1630 | return ret; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -102,7 +102,8 @@ struct symbol_conf { | |||
| 102 | demangle, | 102 | demangle, |
| 103 | demangle_kernel, | 103 | demangle_kernel, |
| 104 | filter_relative, | 104 | filter_relative, |
| 105 | show_hist_headers; | 105 | show_hist_headers, |
| 106 | branch_callstack; | ||
| 106 | const char *vmlinux_name, | 107 | const char *vmlinux_name, |
| 107 | *kallsyms_name, | 108 | *kallsyms_name, |
| 108 | *source_prefix, | 109 | *source_prefix, |
