diff options
-rw-r--r-- | tools/perf/Documentation/perf-report.txt | 7 | ||||
-rw-r--r-- | tools/perf/builtin-report.c | 4 | ||||
-rw-r--r-- | tools/perf/util/callchain.c | 4 | ||||
-rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 126 | ||||
-rw-r--r-- | tools/perf/util/symbol.h | 3 |
6 files changed, 132 insertions, 13 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..22706beffabc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -159,7 +159,7 @@ OPTIONS | |||
159 | --dump-raw-trace:: | 159 | --dump-raw-trace:: |
160 | Dump raw trace in ASCII. | 160 | Dump raw trace in ASCII. |
161 | 161 | ||
162 | -g [type,min[,limit],order[,key]]:: | 162 | -g [type,min[,limit],order[,key][,branch]]:: |
163 | --call-graph:: | 163 | --call-graph:: |
164 | Display call chains using type, min percent threshold, optional print | 164 | Display call chains using type, min percent threshold, optional print |
165 | limit and order. | 165 | limit and order. |
@@ -177,6 +177,11 @@ OPTIONS | |||
177 | - function: compare on functions | 177 | - function: compare on functions |
178 | - address: compare on individual code addresses | 178 | - address: compare on individual code addresses |
179 | 179 | ||
180 | branch can be: | ||
181 | - branch: include last branch information in callgraph | ||
182 | when available. Usually more convenient to use --branch-history | ||
183 | for this. | ||
184 | |||
180 | Default: fractal,0.5,callee,function. | 185 | Default: fractal,0.5,callee,function. |
181 | 186 | ||
182 | --children:: | 187 | --children:: |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..410d44fac64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -637,8 +637,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
637 | "regex filter to identify parent, see: '--sort parent'"), | 637 | "regex filter to identify parent, see: '--sort parent'"), |
638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, | 638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, |
639 | "Only display entries with parent-match"), | 639 | "Only display entries with parent-match"), |
640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", |
641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " |
642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), | 642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), |
643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, | 643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, |
644 | "Accumulate callchains of children and show total overhead as well"), | 644 | "Accumulate callchains of children and show total overhead as well"), |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) | |||
149 | callchain_param.key = CCKEY_ADDRESS; | 149 | callchain_param.key = CCKEY_ADDRESS; |
150 | return 0; | 150 | return 0; |
151 | } | 151 | } |
152 | if (!strncmp(value, "branch", strlen(value))) { | ||
153 | callchain_param.branch_callstack = 1; | ||
154 | return 0; | ||
155 | } | ||
152 | return -1; | 156 | return -1; |
153 | } | 157 | } |
154 | 158 | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -63,6 +63,7 @@ struct callchain_param { | |||
63 | sort_chain_func_t sort; | 63 | sort_chain_func_t sort; |
64 | enum chain_order order; | 64 | enum chain_order order; |
65 | enum chain_key key; | 65 | enum chain_key key; |
66 | bool branch_callstack; | ||
66 | }; | 67 | }; |
67 | 68 | ||
68 | extern struct callchain_param callchain_param; | 69 | extern struct callchain_param callchain_param; |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <stdbool.h> | 12 | #include <stdbool.h> |
13 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
14 | #include "unwind.h" | 14 | #include "unwind.h" |
15 | #include "linux/hash.h" | ||
15 | 16 | ||
16 | static void dsos__init(struct dsos *dsos) | 17 | static void dsos__init(struct dsos *dsos) |
17 | { | 18 | { |
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, | |||
1391 | 1392 | ||
1392 | al.filtered = 0; | 1393 | al.filtered = 0; |
1393 | al.sym = NULL; | 1394 | al.sym = NULL; |
1394 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | 1395 | if (cpumode == -1) |
1396 | thread__find_cpumode_addr_location(thread, MAP__FUNCTION, | ||
1397 | ip, &al); | ||
1398 | else | ||
1399 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | ||
1395 | ip, &al); | 1400 | ip, &al); |
1396 | if (al.sym != NULL) { | 1401 | if (al.sym != NULL) { |
1397 | if (sort__has_parent && !*parent && | 1402 | if (sort__has_parent && !*parent && |
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
1427 | return bi; | 1432 | return bi; |
1428 | } | 1433 | } |
1429 | 1434 | ||
1435 | #define CHASHSZ 127 | ||
1436 | #define CHASHBITS 7 | ||
1437 | #define NO_ENTRY 0xff | ||
1438 | |||
1439 | #define PERF_MAX_BRANCH_DEPTH 127 | ||
1440 | |||
1441 | /* Remove loops. */ | ||
1442 | static int remove_loops(struct branch_entry *l, int nr) | ||
1443 | { | ||
1444 | int i, j, off; | ||
1445 | unsigned char chash[CHASHSZ]; | ||
1446 | |||
1447 | memset(chash, NO_ENTRY, sizeof(chash)); | ||
1448 | |||
1449 | BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); | ||
1450 | |||
1451 | for (i = 0; i < nr; i++) { | ||
1452 | int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; | ||
1453 | |||
1454 | /* no collision handling for now */ | ||
1455 | if (chash[h] == NO_ENTRY) { | ||
1456 | chash[h] = i; | ||
1457 | } else if (l[chash[h]].from == l[i].from) { | ||
1458 | bool is_loop = true; | ||
1459 | /* check if it is a real loop */ | ||
1460 | off = 0; | ||
1461 | for (j = chash[h]; j < i && i + off < nr; j++, off++) | ||
1462 | if (l[j].from != l[i + off].from) { | ||
1463 | is_loop = false; | ||
1464 | break; | ||
1465 | } | ||
1466 | if (is_loop) { | ||
1467 | memmove(l + i, l + i + off, | ||
1468 | (nr - (i + off)) * sizeof(*l)); | ||
1469 | nr -= off; | ||
1470 | } | ||
1471 | } | ||
1472 | } | ||
1473 | return nr; | ||
1474 | } | ||
1475 | |||
1430 | static int thread__resolve_callchain_sample(struct thread *thread, | 1476 | static int thread__resolve_callchain_sample(struct thread *thread, |
1431 | struct ip_callchain *chain, | 1477 | struct ip_callchain *chain, |
1478 | struct branch_stack *branch, | ||
1432 | struct symbol **parent, | 1479 | struct symbol **parent, |
1433 | struct addr_location *root_al, | 1480 | struct addr_location *root_al, |
1434 | int max_stack) | 1481 | int max_stack) |
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1438 | int i; | 1485 | int i; |
1439 | int j; | 1486 | int j; |
1440 | int err; | 1487 | int err; |
1441 | int skip_idx __maybe_unused; | 1488 | int skip_idx = -1; |
1489 | int first_call = 0; | ||
1490 | |||
1491 | /* | ||
1492 | * Based on DWARF debug information, some architectures skip | ||
1493 | * a callchain entry saved by the kernel. | ||
1494 | */ | ||
1495 | if (chain->nr < PERF_MAX_STACK_DEPTH) | ||
1496 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1442 | 1497 | ||
1443 | callchain_cursor_reset(&callchain_cursor); | 1498 | callchain_cursor_reset(&callchain_cursor); |
1444 | 1499 | ||
1500 | /* | ||
1501 | * Add branches to call stack for easier browsing. This gives | ||
1502 | * more context for a sample than just the callers. | ||
1503 | * | ||
1504 | * This uses individual histograms of paths compared to the | ||
1505 | * aggregated histograms the normal LBR mode uses. | ||
1506 | * | ||
1507 | * Limitations for now: | ||
1508 | * - No extra filters | ||
1509 | * - No annotations (should annotate somehow) | ||
1510 | */ | ||
1511 | |||
1512 | if (branch && callchain_param.branch_callstack) { | ||
1513 | int nr = min(max_stack, (int)branch->nr); | ||
1514 | struct branch_entry be[nr]; | ||
1515 | |||
1516 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | ||
1517 | pr_warning("corrupted branch chain. skipping...\n"); | ||
1518 | goto check_calls; | ||
1519 | } | ||
1520 | |||
1521 | for (i = 0; i < nr; i++) { | ||
1522 | if (callchain_param.order == ORDER_CALLEE) { | ||
1523 | be[i] = branch->entries[i]; | ||
1524 | /* | ||
1525 | * Check for overlap into the callchain. | ||
1526 | * The return address is one off compared to | ||
1527 | * the branch entry. To adjust for this | ||
1528 | * assume the calling instruction is not longer | ||
1529 | * than 8 bytes. | ||
1530 | */ | ||
1531 | if (i == skip_idx || | ||
1532 | chain->ips[first_call] >= PERF_CONTEXT_MAX) | ||
1533 | first_call++; | ||
1534 | else if (be[i].from < chain->ips[first_call] && | ||
1535 | be[i].from >= chain->ips[first_call] - 8) | ||
1536 | first_call++; | ||
1537 | } else | ||
1538 | be[i] = branch->entries[branch->nr - i - 1]; | ||
1539 | } | ||
1540 | |||
1541 | nr = remove_loops(be, nr); | ||
1542 | |||
1543 | for (i = 0; i < nr; i++) { | ||
1544 | err = add_callchain_ip(thread, parent, root_al, | ||
1545 | -1, be[i].to); | ||
1546 | if (!err) | ||
1547 | err = add_callchain_ip(thread, parent, root_al, | ||
1548 | -1, be[i].from); | ||
1549 | if (err == -EINVAL) | ||
1550 | break; | ||
1551 | if (err) | ||
1552 | return err; | ||
1553 | } | ||
1554 | chain_nr -= nr; | ||
1555 | } | ||
1556 | |||
1557 | check_calls: | ||
1445 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | 1558 | if (chain->nr > PERF_MAX_STACK_DEPTH) { |
1446 | pr_warning("corrupted callchain. skipping...\n"); | 1559 | pr_warning("corrupted callchain. skipping...\n"); |
1447 | return 0; | 1560 | return 0; |
1448 | } | 1561 | } |
1449 | 1562 | ||
1450 | /* | 1563 | for (i = first_call; i < chain_nr; i++) { |
1451 | * Based on DWARF debug information, some architectures skip | ||
1452 | * a callchain entry saved by the kernel. | ||
1453 | */ | ||
1454 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1455 | |||
1456 | for (i = 0; i < chain_nr; i++) { | ||
1457 | u64 ip; | 1564 | u64 ip; |
1458 | 1565 | ||
1459 | if (callchain_param.order == ORDER_CALLEE) | 1566 | if (callchain_param.order == ORDER_CALLEE) |
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, | |||
1517 | int max_stack) | 1624 | int max_stack) |
1518 | { | 1625 | { |
1519 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, | 1626 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, |
1627 | sample->branch_stack, | ||
1520 | parent, root_al, max_stack); | 1628 | parent, root_al, max_stack); |
1521 | if (ret) | 1629 | if (ret) |
1522 | return ret; | 1630 | return ret; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -102,7 +102,8 @@ struct symbol_conf { | |||
102 | demangle, | 102 | demangle, |
103 | demangle_kernel, | 103 | demangle_kernel, |
104 | filter_relative, | 104 | filter_relative, |
105 | show_hist_headers; | 105 | show_hist_headers, |
106 | branch_callstack; | ||
106 | const char *vmlinux_name, | 107 | const char *vmlinux_name, |
107 | *kallsyms_name, | 108 | *kallsyms_name, |
108 | *source_prefix, | 109 | *source_prefix, |