diff options
| -rw-r--r-- | tools/perf/Documentation/perf-report.txt | 7 | ||||
| -rw-r--r-- | tools/perf/builtin-report.c | 4 | ||||
| -rw-r--r-- | tools/perf/util/callchain.c | 4 | ||||
| -rw-r--r-- | tools/perf/util/callchain.h | 1 | ||||
| -rw-r--r-- | tools/perf/util/machine.c | 126 | ||||
| -rw-r--r-- | tools/perf/util/symbol.h | 3 |
6 files changed, 132 insertions, 13 deletions
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..22706beffabc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
| @@ -159,7 +159,7 @@ OPTIONS | |||
| 159 | --dump-raw-trace:: | 159 | --dump-raw-trace:: |
| 160 | Dump raw trace in ASCII. | 160 | Dump raw trace in ASCII. |
| 161 | 161 | ||
| 162 | -g [type,min[,limit],order[,key]]:: | 162 | -g [type,min[,limit],order[,key][,branch]]:: |
| 163 | --call-graph:: | 163 | --call-graph:: |
| 164 | Display call chains using type, min percent threshold, optional print | 164 | Display call chains using type, min percent threshold, optional print |
| 165 | limit and order. | 165 | limit and order. |
| @@ -177,6 +177,11 @@ OPTIONS | |||
| 177 | - function: compare on functions | 177 | - function: compare on functions |
| 178 | - address: compare on individual code addresses | 178 | - address: compare on individual code addresses |
| 179 | 179 | ||
| 180 | branch can be: | ||
| 181 | - branch: include last branch information in callgraph | ||
| 182 | when available. Usually more convenient to use --branch-history | ||
| 183 | for this. | ||
| 184 | |||
| 180 | Default: fractal,0.5,callee,function. | 185 | Default: fractal,0.5,callee,function. |
| 181 | 186 | ||
| 182 | --children:: | 187 | --children:: |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..410d44fac64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -637,8 +637,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
| 637 | "regex filter to identify parent, see: '--sort parent'"), | 637 | "regex filter to identify parent, see: '--sort parent'"), |
| 638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, | 638 | OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, |
| 639 | "Only display entries with parent-match"), | 639 | "Only display entries with parent-match"), |
| 640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", | 640 | OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", |
| 641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " | 641 | "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " |
| 642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), | 642 | "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), |
| 643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, | 643 | OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, |
| 644 | "Accumulate callchains of children and show total overhead as well"), | 644 | "Accumulate callchains of children and show total overhead as well"), |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
| @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) | |||
| 149 | callchain_param.key = CCKEY_ADDRESS; | 149 | callchain_param.key = CCKEY_ADDRESS; |
| 150 | return 0; | 150 | return 0; |
| 151 | } | 151 | } |
| 152 | if (!strncmp(value, "branch", strlen(value))) { | ||
| 153 | callchain_param.branch_callstack = 1; | ||
| 154 | return 0; | ||
| 155 | } | ||
| 152 | return -1; | 156 | return -1; |
| 153 | } | 157 | } |
| 154 | 158 | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
| @@ -63,6 +63,7 @@ struct callchain_param { | |||
| 63 | sort_chain_func_t sort; | 63 | sort_chain_func_t sort; |
| 64 | enum chain_order order; | 64 | enum chain_order order; |
| 65 | enum chain_key key; | 65 | enum chain_key key; |
| 66 | bool branch_callstack; | ||
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | extern struct callchain_param callchain_param; | 69 | extern struct callchain_param callchain_param; |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <stdbool.h> | 12 | #include <stdbool.h> |
| 13 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
| 14 | #include "unwind.h" | 14 | #include "unwind.h" |
| 15 | #include "linux/hash.h" | ||
| 15 | 16 | ||
| 16 | static void dsos__init(struct dsos *dsos) | 17 | static void dsos__init(struct dsos *dsos) |
| 17 | { | 18 | { |
| @@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, | |||
| 1391 | 1392 | ||
| 1392 | al.filtered = 0; | 1393 | al.filtered = 0; |
| 1393 | al.sym = NULL; | 1394 | al.sym = NULL; |
| 1394 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | 1395 | if (cpumode == -1) |
| 1396 | thread__find_cpumode_addr_location(thread, MAP__FUNCTION, | ||
| 1397 | ip, &al); | ||
| 1398 | else | ||
| 1399 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | ||
| 1395 | ip, &al); | 1400 | ip, &al); |
| 1396 | if (al.sym != NULL) { | 1401 | if (al.sym != NULL) { |
| 1397 | if (sort__has_parent && !*parent && | 1402 | if (sort__has_parent && !*parent && |
| @@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
| 1427 | return bi; | 1432 | return bi; |
| 1428 | } | 1433 | } |
| 1429 | 1434 | ||
| 1435 | #define CHASHSZ 127 | ||
| 1436 | #define CHASHBITS 7 | ||
| 1437 | #define NO_ENTRY 0xff | ||
| 1438 | |||
| 1439 | #define PERF_MAX_BRANCH_DEPTH 127 | ||
| 1440 | |||
| 1441 | /* Remove loops. */ | ||
| 1442 | static int remove_loops(struct branch_entry *l, int nr) | ||
| 1443 | { | ||
| 1444 | int i, j, off; | ||
| 1445 | unsigned char chash[CHASHSZ]; | ||
| 1446 | |||
| 1447 | memset(chash, NO_ENTRY, sizeof(chash)); | ||
| 1448 | |||
| 1449 | BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); | ||
| 1450 | |||
| 1451 | for (i = 0; i < nr; i++) { | ||
| 1452 | int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; | ||
| 1453 | |||
| 1454 | /* no collision handling for now */ | ||
| 1455 | if (chash[h] == NO_ENTRY) { | ||
| 1456 | chash[h] = i; | ||
| 1457 | } else if (l[chash[h]].from == l[i].from) { | ||
| 1458 | bool is_loop = true; | ||
| 1459 | /* check if it is a real loop */ | ||
| 1460 | off = 0; | ||
| 1461 | for (j = chash[h]; j < i && i + off < nr; j++, off++) | ||
| 1462 | if (l[j].from != l[i + off].from) { | ||
| 1463 | is_loop = false; | ||
| 1464 | break; | ||
| 1465 | } | ||
| 1466 | if (is_loop) { | ||
| 1467 | memmove(l + i, l + i + off, | ||
| 1468 | (nr - (i + off)) * sizeof(*l)); | ||
| 1469 | nr -= off; | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | } | ||
| 1473 | return nr; | ||
| 1474 | } | ||
| 1475 | |||
| 1430 | static int thread__resolve_callchain_sample(struct thread *thread, | 1476 | static int thread__resolve_callchain_sample(struct thread *thread, |
| 1431 | struct ip_callchain *chain, | 1477 | struct ip_callchain *chain, |
| 1478 | struct branch_stack *branch, | ||
| 1432 | struct symbol **parent, | 1479 | struct symbol **parent, |
| 1433 | struct addr_location *root_al, | 1480 | struct addr_location *root_al, |
| 1434 | int max_stack) | 1481 | int max_stack) |
| @@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
| 1438 | int i; | 1485 | int i; |
| 1439 | int j; | 1486 | int j; |
| 1440 | int err; | 1487 | int err; |
| 1441 | int skip_idx __maybe_unused; | 1488 | int skip_idx = -1; |
| 1489 | int first_call = 0; | ||
| 1490 | |||
| 1491 | /* | ||
| 1492 | * Based on DWARF debug information, some architectures skip | ||
| 1493 | * a callchain entry saved by the kernel. | ||
| 1494 | */ | ||
| 1495 | if (chain->nr < PERF_MAX_STACK_DEPTH) | ||
| 1496 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
| 1442 | 1497 | ||
| 1443 | callchain_cursor_reset(&callchain_cursor); | 1498 | callchain_cursor_reset(&callchain_cursor); |
| 1444 | 1499 | ||
| 1500 | /* | ||
| 1501 | * Add branches to call stack for easier browsing. This gives | ||
| 1502 | * more context for a sample than just the callers. | ||
| 1503 | * | ||
| 1504 | * This uses individual histograms of paths compared to the | ||
| 1505 | * aggregated histograms the normal LBR mode uses. | ||
| 1506 | * | ||
| 1507 | * Limitations for now: | ||
| 1508 | * - No extra filters | ||
| 1509 | * - No annotations (should annotate somehow) | ||
| 1510 | */ | ||
| 1511 | |||
| 1512 | if (branch && callchain_param.branch_callstack) { | ||
| 1513 | int nr = min(max_stack, (int)branch->nr); | ||
| 1514 | struct branch_entry be[nr]; | ||
| 1515 | |||
| 1516 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | ||
| 1517 | pr_warning("corrupted branch chain. skipping...\n"); | ||
| 1518 | goto check_calls; | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | for (i = 0; i < nr; i++) { | ||
| 1522 | if (callchain_param.order == ORDER_CALLEE) { | ||
| 1523 | be[i] = branch->entries[i]; | ||
| 1524 | /* | ||
| 1525 | * Check for overlap into the callchain. | ||
| 1526 | * The return address is one off compared to | ||
| 1527 | * the branch entry. To adjust for this | ||
| 1528 | * assume the calling instruction is not longer | ||
| 1529 | * than 8 bytes. | ||
| 1530 | */ | ||
| 1531 | if (i == skip_idx || | ||
| 1532 | chain->ips[first_call] >= PERF_CONTEXT_MAX) | ||
| 1533 | first_call++; | ||
| 1534 | else if (be[i].from < chain->ips[first_call] && | ||
| 1535 | be[i].from >= chain->ips[first_call] - 8) | ||
| 1536 | first_call++; | ||
| 1537 | } else | ||
| 1538 | be[i] = branch->entries[branch->nr - i - 1]; | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | nr = remove_loops(be, nr); | ||
| 1542 | |||
| 1543 | for (i = 0; i < nr; i++) { | ||
| 1544 | err = add_callchain_ip(thread, parent, root_al, | ||
| 1545 | -1, be[i].to); | ||
| 1546 | if (!err) | ||
| 1547 | err = add_callchain_ip(thread, parent, root_al, | ||
| 1548 | -1, be[i].from); | ||
| 1549 | if (err == -EINVAL) | ||
| 1550 | break; | ||
| 1551 | if (err) | ||
| 1552 | return err; | ||
| 1553 | } | ||
| 1554 | chain_nr -= nr; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | check_calls: | ||
| 1445 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | 1558 | if (chain->nr > PERF_MAX_STACK_DEPTH) { |
| 1446 | pr_warning("corrupted callchain. skipping...\n"); | 1559 | pr_warning("corrupted callchain. skipping...\n"); |
| 1447 | return 0; | 1560 | return 0; |
| 1448 | } | 1561 | } |
| 1449 | 1562 | ||
| 1450 | /* | 1563 | for (i = first_call; i < chain_nr; i++) { |
| 1451 | * Based on DWARF debug information, some architectures skip | ||
| 1452 | * a callchain entry saved by the kernel. | ||
| 1453 | */ | ||
| 1454 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
| 1455 | |||
| 1456 | for (i = 0; i < chain_nr; i++) { | ||
| 1457 | u64 ip; | 1564 | u64 ip; |
| 1458 | 1565 | ||
| 1459 | if (callchain_param.order == ORDER_CALLEE) | 1566 | if (callchain_param.order == ORDER_CALLEE) |
| @@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, | |||
| 1517 | int max_stack) | 1624 | int max_stack) |
| 1518 | { | 1625 | { |
| 1519 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, | 1626 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, |
| 1627 | sample->branch_stack, | ||
| 1520 | parent, root_al, max_stack); | 1628 | parent, root_al, max_stack); |
| 1521 | if (ret) | 1629 | if (ret) |
| 1522 | return ret; | 1630 | return ret; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -102,7 +102,8 @@ struct symbol_conf { | |||
| 102 | demangle, | 102 | demangle, |
| 103 | demangle_kernel, | 103 | demangle_kernel, |
| 104 | filter_relative, | 104 | filter_relative, |
| 105 | show_hist_headers; | 105 | show_hist_headers, |
| 106 | branch_callstack; | ||
| 106 | const char *vmlinux_name, | 107 | const char *vmlinux_name, |
| 107 | *kallsyms_name, | 108 | *kallsyms_name, |
| 108 | *source_prefix, | 109 | *source_prefix, |
