diff options
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r-- | tools/perf/util/machine.c | 126 |
1 files changed, 117 insertions, 9 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <stdbool.h> | 12 | #include <stdbool.h> |
13 | #include <symbol/kallsyms.h> | 13 | #include <symbol/kallsyms.h> |
14 | #include "unwind.h" | 14 | #include "unwind.h" |
15 | #include "linux/hash.h" | ||
15 | 16 | ||
16 | static void dsos__init(struct dsos *dsos) | 17 | static void dsos__init(struct dsos *dsos) |
17 | { | 18 | { |
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, | |||
1391 | 1392 | ||
1392 | al.filtered = 0; | 1393 | al.filtered = 0; |
1393 | al.sym = NULL; | 1394 | al.sym = NULL; |
1394 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | 1395 | if (cpumode == -1) |
1396 | thread__find_cpumode_addr_location(thread, MAP__FUNCTION, | ||
1397 | ip, &al); | ||
1398 | else | ||
1399 | thread__find_addr_location(thread, cpumode, MAP__FUNCTION, | ||
1395 | ip, &al); | 1400 | ip, &al); |
1396 | if (al.sym != NULL) { | 1401 | if (al.sym != NULL) { |
1397 | if (sort__has_parent && !*parent && | 1402 | if (sort__has_parent && !*parent && |
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, | |||
1427 | return bi; | 1432 | return bi; |
1428 | } | 1433 | } |
1429 | 1434 | ||
1435 | #define CHASHSZ 127 | ||
1436 | #define CHASHBITS 7 | ||
1437 | #define NO_ENTRY 0xff | ||
1438 | |||
1439 | #define PERF_MAX_BRANCH_DEPTH 127 | ||
1440 | |||
1441 | /* Remove loops. */ | ||
1442 | static int remove_loops(struct branch_entry *l, int nr) | ||
1443 | { | ||
1444 | int i, j, off; | ||
1445 | unsigned char chash[CHASHSZ]; | ||
1446 | |||
1447 | memset(chash, NO_ENTRY, sizeof(chash)); | ||
1448 | |||
1449 | BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); | ||
1450 | |||
1451 | for (i = 0; i < nr; i++) { | ||
1452 | int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; | ||
1453 | |||
1454 | /* no collision handling for now */ | ||
1455 | if (chash[h] == NO_ENTRY) { | ||
1456 | chash[h] = i; | ||
1457 | } else if (l[chash[h]].from == l[i].from) { | ||
1458 | bool is_loop = true; | ||
1459 | /* check if it is a real loop */ | ||
1460 | off = 0; | ||
1461 | for (j = chash[h]; j < i && i + off < nr; j++, off++) | ||
1462 | if (l[j].from != l[i + off].from) { | ||
1463 | is_loop = false; | ||
1464 | break; | ||
1465 | } | ||
1466 | if (is_loop) { | ||
1467 | memmove(l + i, l + i + off, | ||
1468 | (nr - (i + off)) * sizeof(*l)); | ||
1469 | nr -= off; | ||
1470 | } | ||
1471 | } | ||
1472 | } | ||
1473 | return nr; | ||
1474 | } | ||
1475 | |||
1430 | static int thread__resolve_callchain_sample(struct thread *thread, | 1476 | static int thread__resolve_callchain_sample(struct thread *thread, |
1431 | struct ip_callchain *chain, | 1477 | struct ip_callchain *chain, |
1478 | struct branch_stack *branch, | ||
1432 | struct symbol **parent, | 1479 | struct symbol **parent, |
1433 | struct addr_location *root_al, | 1480 | struct addr_location *root_al, |
1434 | int max_stack) | 1481 | int max_stack) |
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, | |||
1438 | int i; | 1485 | int i; |
1439 | int j; | 1486 | int j; |
1440 | int err; | 1487 | int err; |
1441 | int skip_idx __maybe_unused; | 1488 | int skip_idx = -1; |
1489 | int first_call = 0; | ||
1490 | |||
1491 | /* | ||
1492 | * Based on DWARF debug information, some architectures skip | ||
1493 | * a callchain entry saved by the kernel. | ||
1494 | */ | ||
1495 | if (chain->nr < PERF_MAX_STACK_DEPTH) | ||
1496 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1442 | 1497 | ||
1443 | callchain_cursor_reset(&callchain_cursor); | 1498 | callchain_cursor_reset(&callchain_cursor); |
1444 | 1499 | ||
1500 | /* | ||
1501 | * Add branches to call stack for easier browsing. This gives | ||
1502 | * more context for a sample than just the callers. | ||
1503 | * | ||
1504 | * This uses individual histograms of paths compared to the | ||
1505 | * aggregated histograms the normal LBR mode uses. | ||
1506 | * | ||
1507 | * Limitations for now: | ||
1508 | * - No extra filters | ||
1509 | * - No annotations (should annotate somehow) | ||
1510 | */ | ||
1511 | |||
1512 | if (branch && callchain_param.branch_callstack) { | ||
1513 | int nr = min(max_stack, (int)branch->nr); | ||
1514 | struct branch_entry be[nr]; | ||
1515 | |||
1516 | if (branch->nr > PERF_MAX_BRANCH_DEPTH) { | ||
1517 | pr_warning("corrupted branch chain. skipping...\n"); | ||
1518 | goto check_calls; | ||
1519 | } | ||
1520 | |||
1521 | for (i = 0; i < nr; i++) { | ||
1522 | if (callchain_param.order == ORDER_CALLEE) { | ||
1523 | be[i] = branch->entries[i]; | ||
1524 | /* | ||
1525 | * Check for overlap into the callchain. | ||
1526 | * The return address is one off compared to | ||
1527 | * the branch entry. To adjust for this | ||
1528 | * assume the calling instruction is not longer | ||
1529 | * than 8 bytes. | ||
1530 | */ | ||
1531 | if (i == skip_idx || | ||
1532 | chain->ips[first_call] >= PERF_CONTEXT_MAX) | ||
1533 | first_call++; | ||
1534 | else if (be[i].from < chain->ips[first_call] && | ||
1535 | be[i].from >= chain->ips[first_call] - 8) | ||
1536 | first_call++; | ||
1537 | } else | ||
1538 | be[i] = branch->entries[branch->nr - i - 1]; | ||
1539 | } | ||
1540 | |||
1541 | nr = remove_loops(be, nr); | ||
1542 | |||
1543 | for (i = 0; i < nr; i++) { | ||
1544 | err = add_callchain_ip(thread, parent, root_al, | ||
1545 | -1, be[i].to); | ||
1546 | if (!err) | ||
1547 | err = add_callchain_ip(thread, parent, root_al, | ||
1548 | -1, be[i].from); | ||
1549 | if (err == -EINVAL) | ||
1550 | break; | ||
1551 | if (err) | ||
1552 | return err; | ||
1553 | } | ||
1554 | chain_nr -= nr; | ||
1555 | } | ||
1556 | |||
1557 | check_calls: | ||
1445 | if (chain->nr > PERF_MAX_STACK_DEPTH) { | 1558 | if (chain->nr > PERF_MAX_STACK_DEPTH) { |
1446 | pr_warning("corrupted callchain. skipping...\n"); | 1559 | pr_warning("corrupted callchain. skipping...\n"); |
1447 | return 0; | 1560 | return 0; |
1448 | } | 1561 | } |
1449 | 1562 | ||
1450 | /* | 1563 | for (i = first_call; i < chain_nr; i++) { |
1451 | * Based on DWARF debug information, some architectures skip | ||
1452 | * a callchain entry saved by the kernel. | ||
1453 | */ | ||
1454 | skip_idx = arch_skip_callchain_idx(thread, chain); | ||
1455 | |||
1456 | for (i = 0; i < chain_nr; i++) { | ||
1457 | u64 ip; | 1564 | u64 ip; |
1458 | 1565 | ||
1459 | if (callchain_param.order == ORDER_CALLEE) | 1566 | if (callchain_param.order == ORDER_CALLEE) |
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, | |||
1517 | int max_stack) | 1624 | int max_stack) |
1518 | { | 1625 | { |
1519 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, | 1626 | int ret = thread__resolve_callchain_sample(thread, sample->callchain, |
1627 | sample->branch_stack, | ||
1520 | parent, root_al, max_stack); | 1628 | parent, root_al, max_stack); |
1521 | if (ret) | 1629 | if (ret) |
1522 | return ret; | 1630 | return ret; |