aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-12-08 01:45:45 -0500
committerIngo Molnar <mingo@kernel.org>2014-12-08 01:45:45 -0500
commitcfa0bd52d0ba9b852f76c7b3f1055edd5e5c7846 (patch)
tree1076abb283310578da29e08b0eda281388ce22ea
parente460bfdcf3b243abebc4682d4312670a4a7dc7a4 (diff)
parent09a6a1b07e5a579ef770d9728f5b158408c73c23 (diff)
Merge tag 'perf-core-for-mingo-2' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements from Arnaldo Carvalho de Melo: User visible changes: - Support handling complete branch stacks as histograms (Andi Kleen) Infrastructure changes: - Prep work for supporting per-pkg and snapshot counters in 'perf stat' (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--tools/perf/Documentation/perf-report.txt12
-rw-r--r--tools/perf/builtin-report.c31
-rw-r--r--tools/perf/builtin-stat.c105
-rw-r--r--tools/perf/util/callchain.c4
-rw-r--r--tools/perf/util/callchain.h1
-rw-r--r--tools/perf/util/evsel.c34
-rw-r--r--tools/perf/util/evsel.h30
-rw-r--r--tools/perf/util/machine.c126
-rw-r--r--tools/perf/util/symbol.h3
9 files changed, 257 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 0927bf4e6c2a..dd7cccdde498 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -159,7 +159,7 @@ OPTIONS
159--dump-raw-trace:: 159--dump-raw-trace::
160 Dump raw trace in ASCII. 160 Dump raw trace in ASCII.
161 161
162-g [type,min[,limit],order[,key]]:: 162-g [type,min[,limit],order[,key][,branch]]::
163--call-graph:: 163--call-graph::
164 Display call chains using type, min percent threshold, optional print 164 Display call chains using type, min percent threshold, optional print
165 limit and order. 165 limit and order.
@@ -177,6 +177,11 @@ OPTIONS
177 - function: compare on functions 177 - function: compare on functions
178 - address: compare on individual code addresses 178 - address: compare on individual code addresses
179 179
180 branch can be:
181 - branch: include last branch information in callgraph
182 when available. Usually more convenient to use --branch-history
183 for this.
184
180 Default: fractal,0.5,callee,function. 185 Default: fractal,0.5,callee,function.
181 186
182--children:: 187--children::
@@ -266,6 +271,11 @@ OPTIONS
266 branch stacks and it will automatically switch to the branch view mode, 271 branch stacks and it will automatically switch to the branch view mode,
267 unless --no-branch-stack is used. 272 unless --no-branch-stack is used.
268 273
274--branch-history::
275 Add the addresses of sampled taken branches to the callstack.
276 This allows to examine the path the program took to each sample.
277 The data collection must have used -b (or -j) and -g.
278
269--objdump=<path>:: 279--objdump=<path>::
270 Path to objdump binary. 280 Path to objdump binary.
271 281
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 140a6cd88351..39367609c707 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -226,8 +226,9 @@ static int report__setup_sample_type(struct report *rep)
226 return -EINVAL; 226 return -EINVAL;
227 } 227 }
228 if (symbol_conf.use_callchain) { 228 if (symbol_conf.use_callchain) {
229 ui__error("Selected -g but no callchain data. Did " 229 ui__error("Selected -g or --branch-history but no "
230 "you call 'perf record' without -g?\n"); 230 "callchain data. Did\n"
231 "you call 'perf record' without -g?\n");
231 return -1; 232 return -1;
232 } 233 }
233 } else if (!rep->dont_use_callchains && 234 } else if (!rep->dont_use_callchains &&
@@ -575,6 +576,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
575 struct stat st; 576 struct stat st;
576 bool has_br_stack = false; 577 bool has_br_stack = false;
577 int branch_mode = -1; 578 int branch_mode = -1;
579 bool branch_call_mode = false;
578 char callchain_default_opt[] = "fractal,0.5,callee"; 580 char callchain_default_opt[] = "fractal,0.5,callee";
579 const char * const report_usage[] = { 581 const char * const report_usage[] = {
580 "perf report [<options>]", 582 "perf report [<options>]",
@@ -637,8 +639,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
637 "regex filter to identify parent, see: '--sort parent'"), 639 "regex filter to identify parent, see: '--sort parent'"),
638 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 640 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
639 "Only display entries with parent-match"), 641 "Only display entries with parent-match"),
640 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 642 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
641 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " 643 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
642 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), 644 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
643 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, 645 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
644 "Accumulate callchains of children and show total overhead as well"), 646 "Accumulate callchains of children and show total overhead as well"),
@@ -684,7 +686,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
684 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 686 OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
685 "Show event group information together"), 687 "Show event group information together"),
686 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", 688 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
687 "use branch records for histogram filling", parse_branch_mode), 689 "use branch records for per branch histogram filling",
690 parse_branch_mode),
691 OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
692 "add last branch records to call history"),
688 OPT_STRING(0, "objdump", &objdump_path, "path", 693 OPT_STRING(0, "objdump", &objdump_path, "path",
689 "objdump binary to use for disassembly and annotations"), 694 "objdump binary to use for disassembly and annotations"),
690 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, 695 OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
@@ -745,10 +750,24 @@ repeat:
745 has_br_stack = perf_header__has_feat(&session->header, 750 has_br_stack = perf_header__has_feat(&session->header,
746 HEADER_BRANCH_STACK); 751 HEADER_BRANCH_STACK);
747 752
748 if ((branch_mode == -1 && has_br_stack) || branch_mode == 1) { 753 /*
754 * Branch mode is a tristate:
755 * -1 means default, so decide based on the file having branch data.
756 * 0/1 means the user chose a mode.
757 */
758 if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
759 branch_call_mode == -1) {
749 sort__mode = SORT_MODE__BRANCH; 760 sort__mode = SORT_MODE__BRANCH;
750 symbol_conf.cumulate_callchain = false; 761 symbol_conf.cumulate_callchain = false;
751 } 762 }
763 if (branch_call_mode) {
764 callchain_param.key = CCKEY_ADDRESS;
765 callchain_param.branch_callstack = 1;
766 symbol_conf.use_callchain = true;
767 callchain_register_param(&callchain_param);
768 if (sort_order == NULL)
769 sort_order = "srcline,symbol,dso";
770 }
752 771
753 if (report.mem_mode) { 772 if (report.mem_mode) {
754 if (sort__mode == SORT_MODE__BRANCH) { 773 if (sort__mode == SORT_MODE__BRANCH) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 055ce9232c9e..891086376381 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -388,20 +388,102 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
388 update_stats(&runtime_itlb_cache_stats[0], count[0]); 388 update_stats(&runtime_itlb_cache_stats[0], count[0]);
389} 389}
390 390
391static void zero_per_pkg(struct perf_evsel *counter)
392{
393 if (counter->per_pkg_mask)
394 memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
395}
396
397static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
398{
399 unsigned long *mask = counter->per_pkg_mask;
400 struct cpu_map *cpus = perf_evsel__cpus(counter);
401 int s;
402
403 *skip = false;
404
405 if (!counter->per_pkg)
406 return 0;
407
408 if (cpu_map__empty(cpus))
409 return 0;
410
411 if (!mask) {
412 mask = zalloc(MAX_NR_CPUS);
413 if (!mask)
414 return -ENOMEM;
415
416 counter->per_pkg_mask = mask;
417 }
418
419 s = cpu_map__get_socket(cpus, cpu);
420 if (s < 0)
421 return -1;
422
423 *skip = test_and_set_bit(s, mask) == 1;
424 return 0;
425}
426
427static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
428 struct perf_counts_values *count)
429{
430 struct perf_counts_values *aggr = &evsel->counts->aggr;
431 static struct perf_counts_values zero;
432 bool skip = false;
433
434 if (check_per_pkg(evsel, cpu, &skip)) {
435 pr_err("failed to read per-pkg counter\n");
436 return -1;
437 }
438
439 if (skip)
440 count = &zero;
441
442 switch (aggr_mode) {
443 case AGGR_CORE:
444 case AGGR_SOCKET:
445 case AGGR_NONE:
446 if (!evsel->snapshot)
447 perf_evsel__compute_deltas(evsel, cpu, count);
448 perf_counts_values__scale(count, scale, NULL);
449 evsel->counts->cpu[cpu] = *count;
450 update_shadow_stats(evsel, count->values);
451 break;
452 case AGGR_GLOBAL:
453 aggr->val += count->val;
454 if (scale) {
455 aggr->ena += count->ena;
456 aggr->run += count->run;
457 }
458 default:
459 break;
460 }
461
462 return 0;
463}
464
465static int read_counter(struct perf_evsel *counter);
466
391/* 467/*
392 * Read out the results of a single counter: 468 * Read out the results of a single counter:
393 * aggregate counts across CPUs in system-wide mode 469 * aggregate counts across CPUs in system-wide mode
394 */ 470 */
395static int read_counter_aggr(struct perf_evsel *counter) 471static int read_counter_aggr(struct perf_evsel *counter)
396{ 472{
473 struct perf_counts_values *aggr = &counter->counts->aggr;
397 struct perf_stat *ps = counter->priv; 474 struct perf_stat *ps = counter->priv;
398 u64 *count = counter->counts->aggr.values; 475 u64 *count = counter->counts->aggr.values;
399 int i; 476 int i;
400 477
401 if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter), 478 aggr->val = aggr->ena = aggr->run = 0;
402 thread_map__nr(evsel_list->threads), scale) < 0) 479
480 if (read_counter(counter))
403 return -1; 481 return -1;
404 482
483 if (!counter->snapshot)
484 perf_evsel__compute_deltas(counter, -1, aggr);
485 perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
486
405 for (i = 0; i < 3; i++) 487 for (i = 0; i < 3; i++)
406 update_stats(&ps->res_stats[i], count[i]); 488 update_stats(&ps->res_stats[i], count[i]);
407 489
@@ -424,16 +506,21 @@ static int read_counter_aggr(struct perf_evsel *counter)
424 */ 506 */
425static int read_counter(struct perf_evsel *counter) 507static int read_counter(struct perf_evsel *counter)
426{ 508{
427 u64 *count; 509 int nthreads = thread_map__nr(evsel_list->threads);
428 int cpu; 510 int ncpus = perf_evsel__nr_cpus(counter);
511 int cpu, thread;
429 512
430 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 513 if (counter->system_wide)
431 if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0) 514 nthreads = 1;
432 return -1;
433 515
434 count = counter->counts->cpu[cpu].values; 516 if (counter->per_pkg)
517 zero_per_pkg(counter);
435 518
436 update_shadow_stats(counter, count); 519 for (thread = 0; thread < nthreads; thread++) {
520 for (cpu = 0; cpu < ncpus; cpu++) {
521 if (perf_evsel__read_cb(counter, cpu, thread, read_cb))
522 return -1;
523 }
437 } 524 }
438 525
439 return 0; 526 return 0;
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 517ed84db97a..cf524a35cc84 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value)
149 callchain_param.key = CCKEY_ADDRESS; 149 callchain_param.key = CCKEY_ADDRESS;
150 return 0; 150 return 0;
151 } 151 }
152 if (!strncmp(value, "branch", strlen(value))) {
153 callchain_param.branch_callstack = 1;
154 return 0;
155 }
152 return -1; 156 return -1;
153} 157}
154 158
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 3f158474c892..dbc08cf5f970 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -63,6 +63,7 @@ struct callchain_param {
63 sort_chain_func_t sort; 63 sort_chain_func_t sort;
64 enum chain_order order; 64 enum chain_order order;
65 enum chain_key key; 65 enum chain_key key;
66 bool branch_callstack;
66}; 67};
67 68
68extern struct callchain_param callchain_param; 69extern struct callchain_param callchain_param;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 2d26b7ad6fe0..1e90c8557ede 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
954 return 0; 954 return 0;
955} 955}
956 956
957int __perf_evsel__read(struct perf_evsel *evsel,
958 int ncpus, int nthreads, bool scale)
959{
960 size_t nv = scale ? 3 : 1;
961 int cpu, thread;
962 struct perf_counts_values *aggr = &evsel->counts->aggr, count;
963
964 if (evsel->system_wide)
965 nthreads = 1;
966
967 aggr->val = aggr->ena = aggr->run = 0;
968
969 for (cpu = 0; cpu < ncpus; cpu++) {
970 for (thread = 0; thread < nthreads; thread++) {
971 if (FD(evsel, cpu, thread) < 0)
972 continue;
973
974 if (readn(FD(evsel, cpu, thread),
975 &count, nv * sizeof(u64)) < 0)
976 return -errno;
977
978 aggr->val += count.val;
979 if (scale) {
980 aggr->ena += count.ena;
981 aggr->run += count.run;
982 }
983 }
984 }
985
986 perf_evsel__compute_deltas(evsel, -1, aggr);
987 perf_counts_values__scale(aggr, scale, &evsel->counts->scaled);
988 return 0;
989}
990
991static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) 957static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
992{ 958{
993 struct perf_evsel *leader = evsel->leader; 959 struct perf_evsel *leader = evsel->leader;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b18d58da580b..38622747d130 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -93,6 +93,7 @@ struct perf_evsel {
93 bool system_wide; 93 bool system_wide;
94 bool tracking; 94 bool tracking;
95 bool per_pkg; 95 bool per_pkg;
96 unsigned long *per_pkg_mask;
96 /* parse modifier helper */ 97 /* parse modifier helper */
97 int exclude_GH; 98 int exclude_GH;
98 int nr_members; 99 int nr_members;
@@ -271,35 +272,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
271 return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); 272 return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
272} 273}
273 274
274int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
275 bool scale);
276
277/**
278 * perf_evsel__read - Read the aggregate results on all CPUs
279 *
280 * @evsel - event selector to read value
281 * @ncpus - Number of cpus affected, from zero
282 * @nthreads - Number of threads affected, from zero
283 */
284static inline int perf_evsel__read(struct perf_evsel *evsel,
285 int ncpus, int nthreads)
286{
287 return __perf_evsel__read(evsel, ncpus, nthreads, false);
288}
289
290/**
291 * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
292 *
293 * @evsel - event selector to read value
294 * @ncpus - Number of cpus affected, from zero
295 * @nthreads - Number of threads affected, from zero
296 */
297static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
298 int ncpus, int nthreads)
299{
300 return __perf_evsel__read(evsel, ncpus, nthreads, true);
301}
302
303int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, 275int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
304 struct perf_sample *sample); 276 struct perf_sample *sample);
305 277
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b75b487574c7..15dd0a9691ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
12#include <stdbool.h> 12#include <stdbool.h>
13#include <symbol/kallsyms.h> 13#include <symbol/kallsyms.h>
14#include "unwind.h" 14#include "unwind.h"
15#include "linux/hash.h"
15 16
16static void dsos__init(struct dsos *dsos) 17static void dsos__init(struct dsos *dsos)
17{ 18{
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread,
1391 1392
1392 al.filtered = 0; 1393 al.filtered = 0;
1393 al.sym = NULL; 1394 al.sym = NULL;
1394 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 1395 if (cpumode == -1)
1396 thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
1397 ip, &al);
1398 else
1399 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1395 ip, &al); 1400 ip, &al);
1396 if (al.sym != NULL) { 1401 if (al.sym != NULL) {
1397 if (sort__has_parent && !*parent && 1402 if (sort__has_parent && !*parent &&
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
1427 return bi; 1432 return bi;
1428} 1433}
1429 1434
1435#define CHASHSZ 127
1436#define CHASHBITS 7
1437#define NO_ENTRY 0xff
1438
1439#define PERF_MAX_BRANCH_DEPTH 127
1440
1441/* Remove loops. */
1442static int remove_loops(struct branch_entry *l, int nr)
1443{
1444 int i, j, off;
1445 unsigned char chash[CHASHSZ];
1446
1447 memset(chash, NO_ENTRY, sizeof(chash));
1448
1449 BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
1450
1451 for (i = 0; i < nr; i++) {
1452 int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
1453
1454 /* no collision handling for now */
1455 if (chash[h] == NO_ENTRY) {
1456 chash[h] = i;
1457 } else if (l[chash[h]].from == l[i].from) {
1458 bool is_loop = true;
1459 /* check if it is a real loop */
1460 off = 0;
1461 for (j = chash[h]; j < i && i + off < nr; j++, off++)
1462 if (l[j].from != l[i + off].from) {
1463 is_loop = false;
1464 break;
1465 }
1466 if (is_loop) {
1467 memmove(l + i, l + i + off,
1468 (nr - (i + off)) * sizeof(*l));
1469 nr -= off;
1470 }
1471 }
1472 }
1473 return nr;
1474}
1475
1430static int thread__resolve_callchain_sample(struct thread *thread, 1476static int thread__resolve_callchain_sample(struct thread *thread,
1431 struct ip_callchain *chain, 1477 struct ip_callchain *chain,
1478 struct branch_stack *branch,
1432 struct symbol **parent, 1479 struct symbol **parent,
1433 struct addr_location *root_al, 1480 struct addr_location *root_al,
1434 int max_stack) 1481 int max_stack)
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1438 int i; 1485 int i;
1439 int j; 1486 int j;
1440 int err; 1487 int err;
1441 int skip_idx __maybe_unused; 1488 int skip_idx = -1;
1489 int first_call = 0;
1490
1491 /*
1492 * Based on DWARF debug information, some architectures skip
1493 * a callchain entry saved by the kernel.
1494 */
1495 if (chain->nr < PERF_MAX_STACK_DEPTH)
1496 skip_idx = arch_skip_callchain_idx(thread, chain);
1442 1497
1443 callchain_cursor_reset(&callchain_cursor); 1498 callchain_cursor_reset(&callchain_cursor);
1444 1499
1500 /*
1501 * Add branches to call stack for easier browsing. This gives
1502 * more context for a sample than just the callers.
1503 *
1504 * This uses individual histograms of paths compared to the
1505 * aggregated histograms the normal LBR mode uses.
1506 *
1507 * Limitations for now:
1508 * - No extra filters
1509 * - No annotations (should annotate somehow)
1510 */
1511
1512 if (branch && callchain_param.branch_callstack) {
1513 int nr = min(max_stack, (int)branch->nr);
1514 struct branch_entry be[nr];
1515
1516 if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
1517 pr_warning("corrupted branch chain. skipping...\n");
1518 goto check_calls;
1519 }
1520
1521 for (i = 0; i < nr; i++) {
1522 if (callchain_param.order == ORDER_CALLEE) {
1523 be[i] = branch->entries[i];
1524 /*
1525 * Check for overlap into the callchain.
1526 * The return address is one off compared to
1527 * the branch entry. To adjust for this
1528 * assume the calling instruction is not longer
1529 * than 8 bytes.
1530 */
1531 if (i == skip_idx ||
1532 chain->ips[first_call] >= PERF_CONTEXT_MAX)
1533 first_call++;
1534 else if (be[i].from < chain->ips[first_call] &&
1535 be[i].from >= chain->ips[first_call] - 8)
1536 first_call++;
1537 } else
1538 be[i] = branch->entries[branch->nr - i - 1];
1539 }
1540
1541 nr = remove_loops(be, nr);
1542
1543 for (i = 0; i < nr; i++) {
1544 err = add_callchain_ip(thread, parent, root_al,
1545 -1, be[i].to);
1546 if (!err)
1547 err = add_callchain_ip(thread, parent, root_al,
1548 -1, be[i].from);
1549 if (err == -EINVAL)
1550 break;
1551 if (err)
1552 return err;
1553 }
1554 chain_nr -= nr;
1555 }
1556
1557check_calls:
1445 if (chain->nr > PERF_MAX_STACK_DEPTH) { 1558 if (chain->nr > PERF_MAX_STACK_DEPTH) {
1446 pr_warning("corrupted callchain. skipping...\n"); 1559 pr_warning("corrupted callchain. skipping...\n");
1447 return 0; 1560 return 0;
1448 } 1561 }
1449 1562
1450 /* 1563 for (i = first_call; i < chain_nr; i++) {
1451 * Based on DWARF debug information, some architectures skip
1452 * a callchain entry saved by the kernel.
1453 */
1454 skip_idx = arch_skip_callchain_idx(thread, chain);
1455
1456 for (i = 0; i < chain_nr; i++) {
1457 u64 ip; 1564 u64 ip;
1458 1565
1459 if (callchain_param.order == ORDER_CALLEE) 1566 if (callchain_param.order == ORDER_CALLEE)
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread,
1517 int max_stack) 1624 int max_stack)
1518{ 1625{
1519 int ret = thread__resolve_callchain_sample(thread, sample->callchain, 1626 int ret = thread__resolve_callchain_sample(thread, sample->callchain,
1627 sample->branch_stack,
1520 parent, root_al, max_stack); 1628 parent, root_al, max_stack);
1521 if (ret) 1629 if (ret)
1522 return ret; 1630 return ret;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e0b297c50f9d..9d602e9c6f59 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -102,7 +102,8 @@ struct symbol_conf {
102 demangle, 102 demangle,
103 demangle_kernel, 103 demangle_kernel,
104 filter_relative, 104 filter_relative,
105 show_hist_headers; 105 show_hist_headers,
106 branch_callstack;
106 const char *vmlinux_name, 107 const char *vmlinux_name,
107 *kallsyms_name, 108 *kallsyms_name,
108 *source_prefix, 109 *source_prefix,