aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/perf/Documentation/perf-report.txt7
-rw-r--r--tools/perf/builtin-report.c4
-rw-r--r--tools/perf/util/callchain.c4
-rw-r--r--tools/perf/util/callchain.h1
-rw-r--r--tools/perf/util/machine.c126
-rw-r--r--tools/perf/util/symbol.h3
6 files changed, 132 insertions, 13 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 0927bf4e6c2a..22706beffabc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -159,7 +159,7 @@ OPTIONS
159--dump-raw-trace:: 159--dump-raw-trace::
160 Dump raw trace in ASCII. 160 Dump raw trace in ASCII.
161 161
162-g [type,min[,limit],order[,key]]:: 162-g [type,min[,limit],order[,key][,branch]]::
163--call-graph:: 163--call-graph::
164 Display call chains using type, min percent threshold, optional print 164 Display call chains using type, min percent threshold, optional print
165 limit and order. 165 limit and order.
@@ -177,6 +177,11 @@ OPTIONS
177 - function: compare on functions 177 - function: compare on functions
178 - address: compare on individual code addresses 178 - address: compare on individual code addresses
179 179
180 branch can be:
181 - branch: include last branch information in callgraph
182 when available. Usually more convenient to use --branch-history
183 for this.
184
180 Default: fractal,0.5,callee,function. 185 Default: fractal,0.5,callee,function.
181 186
182--children:: 187--children::
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 140a6cd88351..410d44fac64f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -637,8 +637,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
637 "regex filter to identify parent, see: '--sort parent'"), 637 "regex filter to identify parent, see: '--sort parent'"),
638 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 638 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
639 "Only display entries with parent-match"), 639 "Only display entries with parent-match"),
640 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 640 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]",
641 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " 641 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. "
642 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), 642 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
643 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, 643 OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
644 "Accumulate callchains of children and show total overhead as well"), 644 "Accumulate callchains of children and show total overhead as well"),
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 517ed84db97a..cf524a35cc84 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value)
149 callchain_param.key = CCKEY_ADDRESS; 149 callchain_param.key = CCKEY_ADDRESS;
150 return 0; 150 return 0;
151 } 151 }
152 if (!strncmp(value, "branch", strlen(value))) {
153 callchain_param.branch_callstack = 1;
154 return 0;
155 }
152 return -1; 156 return -1;
153} 157}
154 158
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 3f158474c892..dbc08cf5f970 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -63,6 +63,7 @@ struct callchain_param {
63 sort_chain_func_t sort; 63 sort_chain_func_t sort;
64 enum chain_order order; 64 enum chain_order order;
65 enum chain_key key; 65 enum chain_key key;
66 bool branch_callstack;
66}; 67};
67 68
68extern struct callchain_param callchain_param; 69extern struct callchain_param callchain_param;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b75b487574c7..15dd0a9691ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
12#include <stdbool.h> 12#include <stdbool.h>
13#include <symbol/kallsyms.h> 13#include <symbol/kallsyms.h>
14#include "unwind.h" 14#include "unwind.h"
15#include "linux/hash.h"
15 16
16static void dsos__init(struct dsos *dsos) 17static void dsos__init(struct dsos *dsos)
17{ 18{
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread,
1391 1392
1392 al.filtered = 0; 1393 al.filtered = 0;
1393 al.sym = NULL; 1394 al.sym = NULL;
1394 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 1395 if (cpumode == -1)
1396 thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
1397 ip, &al);
1398 else
1399 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1395 ip, &al); 1400 ip, &al);
1396 if (al.sym != NULL) { 1401 if (al.sym != NULL) {
1397 if (sort__has_parent && !*parent && 1402 if (sort__has_parent && !*parent &&
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
1427 return bi; 1432 return bi;
1428} 1433}
1429 1434
1435#define CHASHSZ 127
1436#define CHASHBITS 7
1437#define NO_ENTRY 0xff
1438
1439#define PERF_MAX_BRANCH_DEPTH 127
1440
1441/* Remove loops. */
1442static int remove_loops(struct branch_entry *l, int nr)
1443{
1444 int i, j, off;
1445 unsigned char chash[CHASHSZ];
1446
1447 memset(chash, NO_ENTRY, sizeof(chash));
1448
1449 BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
1450
1451 for (i = 0; i < nr; i++) {
1452 int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
1453
1454 /* no collision handling for now */
1455 if (chash[h] == NO_ENTRY) {
1456 chash[h] = i;
1457 } else if (l[chash[h]].from == l[i].from) {
1458 bool is_loop = true;
1459 /* check if it is a real loop */
1460 off = 0;
1461 for (j = chash[h]; j < i && i + off < nr; j++, off++)
1462 if (l[j].from != l[i + off].from) {
1463 is_loop = false;
1464 break;
1465 }
1466 if (is_loop) {
1467 memmove(l + i, l + i + off,
1468 (nr - (i + off)) * sizeof(*l));
1469 nr -= off;
1470 }
1471 }
1472 }
1473 return nr;
1474}
1475
1430static int thread__resolve_callchain_sample(struct thread *thread, 1476static int thread__resolve_callchain_sample(struct thread *thread,
1431 struct ip_callchain *chain, 1477 struct ip_callchain *chain,
1478 struct branch_stack *branch,
1432 struct symbol **parent, 1479 struct symbol **parent,
1433 struct addr_location *root_al, 1480 struct addr_location *root_al,
1434 int max_stack) 1481 int max_stack)
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1438 int i; 1485 int i;
1439 int j; 1486 int j;
1440 int err; 1487 int err;
1441 int skip_idx __maybe_unused; 1488 int skip_idx = -1;
1489 int first_call = 0;
1490
1491 /*
1492 * Based on DWARF debug information, some architectures skip
1493 * a callchain entry saved by the kernel.
1494 */
1495 if (chain->nr < PERF_MAX_STACK_DEPTH)
1496 skip_idx = arch_skip_callchain_idx(thread, chain);
1442 1497
1443 callchain_cursor_reset(&callchain_cursor); 1498 callchain_cursor_reset(&callchain_cursor);
1444 1499
1500 /*
1501 * Add branches to call stack for easier browsing. This gives
1502 * more context for a sample than just the callers.
1503 *
1504 * This uses individual histograms of paths compared to the
1505 * aggregated histograms the normal LBR mode uses.
1506 *
1507 * Limitations for now:
1508 * - No extra filters
1509 * - No annotations (should annotate somehow)
1510 */
1511
1512 if (branch && callchain_param.branch_callstack) {
1513 int nr = min(max_stack, (int)branch->nr);
1514 struct branch_entry be[nr];
1515
1516 if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
1517 pr_warning("corrupted branch chain. skipping...\n");
1518 goto check_calls;
1519 }
1520
1521 for (i = 0; i < nr; i++) {
1522 if (callchain_param.order == ORDER_CALLEE) {
1523 be[i] = branch->entries[i];
1524 /*
1525 * Check for overlap into the callchain.
1526 * The return address is one off compared to
1527 * the branch entry. To adjust for this
1528 * assume the calling instruction is not longer
1529 * than 8 bytes.
1530 */
1531 if (i == skip_idx ||
1532 chain->ips[first_call] >= PERF_CONTEXT_MAX)
1533 first_call++;
1534 else if (be[i].from < chain->ips[first_call] &&
1535 be[i].from >= chain->ips[first_call] - 8)
1536 first_call++;
1537 } else
1538 be[i] = branch->entries[branch->nr - i - 1];
1539 }
1540
1541 nr = remove_loops(be, nr);
1542
1543 for (i = 0; i < nr; i++) {
1544 err = add_callchain_ip(thread, parent, root_al,
1545 -1, be[i].to);
1546 if (!err)
1547 err = add_callchain_ip(thread, parent, root_al,
1548 -1, be[i].from);
1549 if (err == -EINVAL)
1550 break;
1551 if (err)
1552 return err;
1553 }
1554 chain_nr -= nr;
1555 }
1556
1557check_calls:
1445 if (chain->nr > PERF_MAX_STACK_DEPTH) { 1558 if (chain->nr > PERF_MAX_STACK_DEPTH) {
1446 pr_warning("corrupted callchain. skipping...\n"); 1559 pr_warning("corrupted callchain. skipping...\n");
1447 return 0; 1560 return 0;
1448 } 1561 }
1449 1562
1450 /* 1563 for (i = first_call; i < chain_nr; i++) {
1451 * Based on DWARF debug information, some architectures skip
1452 * a callchain entry saved by the kernel.
1453 */
1454 skip_idx = arch_skip_callchain_idx(thread, chain);
1455
1456 for (i = 0; i < chain_nr; i++) {
1457 u64 ip; 1564 u64 ip;
1458 1565
1459 if (callchain_param.order == ORDER_CALLEE) 1566 if (callchain_param.order == ORDER_CALLEE)
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread,
1517 int max_stack) 1624 int max_stack)
1518{ 1625{
1519 int ret = thread__resolve_callchain_sample(thread, sample->callchain, 1626 int ret = thread__resolve_callchain_sample(thread, sample->callchain,
1627 sample->branch_stack,
1520 parent, root_al, max_stack); 1628 parent, root_al, max_stack);
1521 if (ret) 1629 if (ret)
1522 return ret; 1630 return ret;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index e0b297c50f9d..9d602e9c6f59 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -102,7 +102,8 @@ struct symbol_conf {
102 demangle, 102 demangle,
103 demangle_kernel, 103 demangle_kernel,
104 filter_relative, 104 filter_relative,
105 show_hist_headers; 105 show_hist_headers,
106 branch_callstack;
106 const char *vmlinux_name, 107 const char *vmlinux_name,
107 *kallsyms_name, 108 *kallsyms_name,
108 *source_prefix, 109 *source_prefix,