aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/util/machine.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/machine.c')
-rw-r--r--tools/perf/util/machine.c126
1 files changed, 117 insertions, 9 deletions
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b75b487574c7..15dd0a9691ce 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -12,6 +12,7 @@
12#include <stdbool.h> 12#include <stdbool.h>
13#include <symbol/kallsyms.h> 13#include <symbol/kallsyms.h>
14#include "unwind.h" 14#include "unwind.h"
15#include "linux/hash.h"
15 16
16static void dsos__init(struct dsos *dsos) 17static void dsos__init(struct dsos *dsos)
17{ 18{
@@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread,
1391 1392
1392 al.filtered = 0; 1393 al.filtered = 0;
1393 al.sym = NULL; 1394 al.sym = NULL;
1394 thread__find_addr_location(thread, cpumode, MAP__FUNCTION, 1395 if (cpumode == -1)
1396 thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
1397 ip, &al);
1398 else
1399 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1395 ip, &al); 1400 ip, &al);
1396 if (al.sym != NULL) { 1401 if (al.sym != NULL) {
1397 if (sort__has_parent && !*parent && 1402 if (sort__has_parent && !*parent &&
@@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
1427 return bi; 1432 return bi;
1428} 1433}
1429 1434
1435#define CHASHSZ 127
1436#define CHASHBITS 7
1437#define NO_ENTRY 0xff
1438
1439#define PERF_MAX_BRANCH_DEPTH 127
1440
1441/* Remove loops. */
1442static int remove_loops(struct branch_entry *l, int nr)
1443{
1444 int i, j, off;
1445 unsigned char chash[CHASHSZ];
1446
1447 memset(chash, NO_ENTRY, sizeof(chash));
1448
1449 BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
1450
1451 for (i = 0; i < nr; i++) {
1452 int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
1453
1454 /* no collision handling for now */
1455 if (chash[h] == NO_ENTRY) {
1456 chash[h] = i;
1457 } else if (l[chash[h]].from == l[i].from) {
1458 bool is_loop = true;
1459 /* check if it is a real loop */
1460 off = 0;
1461 for (j = chash[h]; j < i && i + off < nr; j++, off++)
1462 if (l[j].from != l[i + off].from) {
1463 is_loop = false;
1464 break;
1465 }
1466 if (is_loop) {
1467 memmove(l + i, l + i + off,
1468 (nr - (i + off)) * sizeof(*l));
1469 nr -= off;
1470 }
1471 }
1472 }
1473 return nr;
1474}
1475
1430static int thread__resolve_callchain_sample(struct thread *thread, 1476static int thread__resolve_callchain_sample(struct thread *thread,
1431 struct ip_callchain *chain, 1477 struct ip_callchain *chain,
1478 struct branch_stack *branch,
1432 struct symbol **parent, 1479 struct symbol **parent,
1433 struct addr_location *root_al, 1480 struct addr_location *root_al,
1434 int max_stack) 1481 int max_stack)
@@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread,
1438 int i; 1485 int i;
1439 int j; 1486 int j;
1440 int err; 1487 int err;
1441 int skip_idx __maybe_unused; 1488 int skip_idx = -1;
1489 int first_call = 0;
1490
1491 /*
1492 * Based on DWARF debug information, some architectures skip
1493 * a callchain entry saved by the kernel.
1494 */
1495 if (chain->nr < PERF_MAX_STACK_DEPTH)
1496 skip_idx = arch_skip_callchain_idx(thread, chain);
1442 1497
1443 callchain_cursor_reset(&callchain_cursor); 1498 callchain_cursor_reset(&callchain_cursor);
1444 1499
1500 /*
1501 * Add branches to call stack for easier browsing. This gives
1502 * more context for a sample than just the callers.
1503 *
1504 * This uses individual histograms of paths compared to the
1505 * aggregated histograms the normal LBR mode uses.
1506 *
1507 * Limitations for now:
1508 * - No extra filters
1509 * - No annotations (should annotate somehow)
1510 */
1511
1512 if (branch && callchain_param.branch_callstack) {
1513 int nr = min(max_stack, (int)branch->nr);
1514 struct branch_entry be[nr];
1515
1516 if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
1517 pr_warning("corrupted branch chain. skipping...\n");
1518 goto check_calls;
1519 }
1520
1521 for (i = 0; i < nr; i++) {
1522 if (callchain_param.order == ORDER_CALLEE) {
1523 be[i] = branch->entries[i];
1524 /*
1525 * Check for overlap into the callchain.
1526 * The return address is one off compared to
1527 * the branch entry. To adjust for this
1528 * assume the calling instruction is not longer
1529 * than 8 bytes.
1530 */
1531 if (i == skip_idx ||
1532 chain->ips[first_call] >= PERF_CONTEXT_MAX)
1533 first_call++;
1534 else if (be[i].from < chain->ips[first_call] &&
1535 be[i].from >= chain->ips[first_call] - 8)
1536 first_call++;
1537 } else
1538 be[i] = branch->entries[branch->nr - i - 1];
1539 }
1540
1541 nr = remove_loops(be, nr);
1542
1543 for (i = 0; i < nr; i++) {
1544 err = add_callchain_ip(thread, parent, root_al,
1545 -1, be[i].to);
1546 if (!err)
1547 err = add_callchain_ip(thread, parent, root_al,
1548 -1, be[i].from);
1549 if (err == -EINVAL)
1550 break;
1551 if (err)
1552 return err;
1553 }
1554 chain_nr -= nr;
1555 }
1556
1557check_calls:
1445 if (chain->nr > PERF_MAX_STACK_DEPTH) { 1558 if (chain->nr > PERF_MAX_STACK_DEPTH) {
1446 pr_warning("corrupted callchain. skipping...\n"); 1559 pr_warning("corrupted callchain. skipping...\n");
1447 return 0; 1560 return 0;
1448 } 1561 }
1449 1562
1450 /* 1563 for (i = first_call; i < chain_nr; i++) {
1451 * Based on DWARF debug information, some architectures skip
1452 * a callchain entry saved by the kernel.
1453 */
1454 skip_idx = arch_skip_callchain_idx(thread, chain);
1455
1456 for (i = 0; i < chain_nr; i++) {
1457 u64 ip; 1564 u64 ip;
1458 1565
1459 if (callchain_param.order == ORDER_CALLEE) 1566 if (callchain_param.order == ORDER_CALLEE)
@@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread,
1517 int max_stack) 1624 int max_stack)
1518{ 1625{
1519 int ret = thread__resolve_callchain_sample(thread, sample->callchain, 1626 int ret = thread__resolve_callchain_sample(thread, sample->callchain,
1627 sample->branch_stack,
1520 parent, root_al, max_stack); 1628 parent, root_al, max_stack);
1521 if (ret) 1629 if (ret)
1522 return ret; 1630 return ret;