diff options
33 files changed, 2017 insertions, 243 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
| @@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) | |||
| 685 | { | 685 | { |
| 686 | int err; | 686 | int err; |
| 687 | 687 | ||
| 688 | /* does not support taken branch sampling */ | ||
| 689 | if (has_branch_stack(event)) | ||
| 690 | return -EOPNOTSUPP; | ||
| 691 | |||
| 688 | switch (event->attr.type) { | 692 | switch (event->attr.type) { |
| 689 | case PERF_TYPE_RAW: | 693 | case PERF_TYPE_RAW: |
| 690 | case PERF_TYPE_HARDWARE: | 694 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
| @@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) | |||
| 539 | int err = 0; | 539 | int err = 0; |
| 540 | atomic_t *active_events = &armpmu->active_events; | 540 | atomic_t *active_events = &armpmu->active_events; |
| 541 | 541 | ||
| 542 | /* does not support taken branch sampling */ | ||
| 543 | if (has_branch_stack(event)) | ||
| 544 | return -EOPNOTSUPP; | ||
| 545 | |||
| 542 | if (armpmu->map_event(event) == -ENOENT) | 546 | if (armpmu->map_event(event) == -ENOENT) |
| 543 | return -ENOENT; | 547 | return -ENOENT; |
| 544 | 548 | ||
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
| @@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) | |||
| 606 | { | 606 | { |
| 607 | int err = 0; | 607 | int err = 0; |
| 608 | 608 | ||
| 609 | /* does not support taken branch sampling */ | ||
| 610 | if (has_branch_stack(event)) | ||
| 611 | return -EOPNOTSUPP; | ||
| 612 | |||
| 609 | switch (event->attr.type) { | 613 | switch (event->attr.type) { |
| 610 | case PERF_TYPE_RAW: | 614 | case PERF_TYPE_RAW: |
| 611 | case PERF_TYPE_HARDWARE: | 615 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index f04c2301725e..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
| @@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) | |||
| 1084 | if (!ppmu) | 1084 | if (!ppmu) |
| 1085 | return -ENOENT; | 1085 | return -ENOENT; |
| 1086 | 1086 | ||
| 1087 | /* does not support taken branch sampling */ | ||
| 1088 | if (has_branch_stack(event)) | ||
| 1089 | return -EOPNOTSUPP; | ||
| 1090 | |||
| 1087 | switch (event->attr.type) { | 1091 | switch (event->attr.type) { |
| 1088 | case PERF_TYPE_HARDWARE: | 1092 | case PERF_TYPE_HARDWARE: |
| 1089 | ev = event->attr.config; | 1093 | ev = event->attr.config; |
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
| @@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) | |||
| 310 | { | 310 | { |
| 311 | int err; | 311 | int err; |
| 312 | 312 | ||
| 313 | /* does not support taken branch sampling */ | ||
| 314 | if (has_branch_stack(event)) | ||
| 315 | return -EOPNOTSUPP; | ||
| 316 | |||
| 313 | switch (event->attr.type) { | 317 | switch (event->attr.type) { |
| 314 | case PERF_TYPE_RAW: | 318 | case PERF_TYPE_RAW: |
| 315 | case PERF_TYPE_HW_CACHE: | 319 | case PERF_TYPE_HW_CACHE: |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
| @@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
| 1105 | if (atomic_read(&nmi_active) < 0) | 1105 | if (atomic_read(&nmi_active) < 0) |
| 1106 | return -ENODEV; | 1106 | return -ENODEV; |
| 1107 | 1107 | ||
| 1108 | /* does not support taken branch sampling */ | ||
| 1109 | if (has_branch_stack(event)) | ||
| 1110 | return -EOPNOTSUPP; | ||
| 1111 | |||
| 1108 | switch (attr->type) { | 1112 | switch (attr->type) { |
| 1109 | case PERF_TYPE_HARDWARE: | 1113 | case PERF_TYPE_HARDWARE: |
| 1110 | if (attr->config >= sparc_pmu->max_events) | 1114 | if (attr->config >= sparc_pmu->max_events) |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -56,6 +56,13 @@ | |||
| 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
| 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
| 58 | 58 | ||
| 59 | #define MSR_LBR_SELECT 0x000001c8 | ||
| 60 | #define MSR_LBR_TOS 0x000001c9 | ||
| 61 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
| 62 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
| 63 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
| 64 | #define MSR_LBR_CORE_TO 0x00000060 | ||
| 65 | |||
| 59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 66 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
| 60 | #define MSR_IA32_DS_AREA 0x00000600 | 67 | #define MSR_IA32_DS_AREA 0x00000600 |
| 61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 68 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f8bddb5b0600..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) | |||
| 353 | return 0; | 353 | return 0; |
| 354 | } | 354 | } |
| 355 | 355 | ||
| 356 | /* | ||
| 357 | * check that branch_sample_type is compatible with | ||
| 358 | * settings needed for precise_ip > 1 which implies | ||
| 359 | * using the LBR to capture ALL taken branches at the | ||
| 360 | * priv levels of the measurement | ||
| 361 | */ | ||
| 362 | static inline int precise_br_compat(struct perf_event *event) | ||
| 363 | { | ||
| 364 | u64 m = event->attr.branch_sample_type; | ||
| 365 | u64 b = 0; | ||
| 366 | |||
| 367 | /* must capture all branches */ | ||
| 368 | if (!(m & PERF_SAMPLE_BRANCH_ANY)) | ||
| 369 | return 0; | ||
| 370 | |||
| 371 | m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; | ||
| 372 | |||
| 373 | if (!event->attr.exclude_user) | ||
| 374 | b |= PERF_SAMPLE_BRANCH_USER; | ||
| 375 | |||
| 376 | if (!event->attr.exclude_kernel) | ||
| 377 | b |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 378 | |||
| 379 | /* | ||
| 380 | * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 | ||
| 381 | */ | ||
| 382 | |||
| 383 | return m == b; | ||
| 384 | } | ||
| 385 | |||
| 356 | int x86_pmu_hw_config(struct perf_event *event) | 386 | int x86_pmu_hw_config(struct perf_event *event) |
| 357 | { | 387 | { |
| 358 | if (event->attr.precise_ip) { | 388 | if (event->attr.precise_ip) { |
| @@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
| 369 | 399 | ||
| 370 | if (event->attr.precise_ip > precise) | 400 | if (event->attr.precise_ip > precise) |
| 371 | return -EOPNOTSUPP; | 401 | return -EOPNOTSUPP; |
| 402 | /* | ||
| 403 | * check that PEBS LBR correction does not conflict with | ||
| 404 | * whatever the user is asking with attr->branch_sample_type | ||
| 405 | */ | ||
| 406 | if (event->attr.precise_ip > 1) { | ||
| 407 | u64 *br_type = &event->attr.branch_sample_type; | ||
| 408 | |||
| 409 | if (has_branch_stack(event)) { | ||
| 410 | if (!precise_br_compat(event)) | ||
| 411 | return -EOPNOTSUPP; | ||
| 412 | |||
| 413 | /* branch_sample_type is compatible */ | ||
| 414 | |||
| 415 | } else { | ||
| 416 | /* | ||
| 417 | * user did not specify branch_sample_type | ||
| 418 | * | ||
| 419 | * For PEBS fixups, we capture all | ||
| 420 | * the branches at the priv level of the | ||
| 421 | * event. | ||
| 422 | */ | ||
| 423 | *br_type = PERF_SAMPLE_BRANCH_ANY; | ||
| 424 | |||
| 425 | if (!event->attr.exclude_user) | ||
| 426 | *br_type |= PERF_SAMPLE_BRANCH_USER; | ||
| 427 | |||
| 428 | if (!event->attr.exclude_kernel) | ||
| 429 | *br_type |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 430 | } | ||
| 431 | } | ||
| 372 | } | 432 | } |
| 373 | 433 | ||
| 374 | /* | 434 | /* |
| @@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
| 426 | /* mark unused */ | 486 | /* mark unused */ |
| 427 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
| 428 | 488 | ||
| 489 | /* mark not used */ | ||
| 490 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
| 491 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
| 492 | |||
| 429 | return x86_pmu.hw_config(event); | 493 | return x86_pmu.hw_config(event); |
| 430 | } | 494 | } |
| 431 | 495 | ||
| @@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { | |||
| 1607 | NULL, | 1671 | NULL, |
| 1608 | }; | 1672 | }; |
| 1609 | 1673 | ||
| 1674 | static void x86_pmu_flush_branch_stack(void) | ||
| 1675 | { | ||
| 1676 | if (x86_pmu.flush_branch_stack) | ||
| 1677 | x86_pmu.flush_branch_stack(); | ||
| 1678 | } | ||
| 1679 | |||
| 1610 | static struct pmu pmu = { | 1680 | static struct pmu pmu = { |
| 1611 | .pmu_enable = x86_pmu_enable, | 1681 | .pmu_enable = x86_pmu_enable, |
| 1612 | .pmu_disable = x86_pmu_disable, | 1682 | .pmu_disable = x86_pmu_disable, |
| 1613 | 1683 | ||
| 1614 | .attr_groups = x86_pmu_attr_groups, | 1684 | .attr_groups = x86_pmu_attr_groups, |
| 1615 | 1685 | ||
| 1616 | .event_init = x86_pmu_event_init, | 1686 | .event_init = x86_pmu_event_init, |
| 1617 | 1687 | ||
| 1618 | .add = x86_pmu_add, | 1688 | .add = x86_pmu_add, |
| 1619 | .del = x86_pmu_del, | 1689 | .del = x86_pmu_del, |
| 1620 | .start = x86_pmu_start, | 1690 | .start = x86_pmu_start, |
| 1621 | .stop = x86_pmu_stop, | 1691 | .stop = x86_pmu_stop, |
| 1622 | .read = x86_pmu_read, | 1692 | .read = x86_pmu_read, |
| 1623 | 1693 | ||
| 1624 | .start_txn = x86_pmu_start_txn, | 1694 | .start_txn = x86_pmu_start_txn, |
| 1625 | .cancel_txn = x86_pmu_cancel_txn, | 1695 | .cancel_txn = x86_pmu_cancel_txn, |
| 1626 | .commit_txn = x86_pmu_commit_txn, | 1696 | .commit_txn = x86_pmu_commit_txn, |
| 1627 | 1697 | ||
| 1628 | .event_idx = x86_pmu_event_idx, | 1698 | .event_idx = x86_pmu_event_idx, |
| 1699 | .flush_branch_stack = x86_pmu_flush_branch_stack, | ||
| 1629 | }; | 1700 | }; |
| 1630 | 1701 | ||
| 1631 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | 1702 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 66fda0c26402..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
| @@ -33,6 +33,7 @@ enum extra_reg_type { | |||
| 33 | 33 | ||
| 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
| 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
| 36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
| 36 | 37 | ||
| 37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
| 38 | }; | 39 | }; |
| @@ -130,6 +131,8 @@ struct cpu_hw_events { | |||
| 130 | void *lbr_context; | 131 | void *lbr_context; |
| 131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
| 132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
| 134 | struct er_account *lbr_sel; | ||
| 135 | u64 br_sel; | ||
| 133 | 136 | ||
| 134 | /* | 137 | /* |
| 135 | * Intel host/guest exclude bits | 138 | * Intel host/guest exclude bits |
| @@ -344,6 +347,7 @@ struct x86_pmu { | |||
| 344 | void (*cpu_starting)(int cpu); | 347 | void (*cpu_starting)(int cpu); |
| 345 | void (*cpu_dying)(int cpu); | 348 | void (*cpu_dying)(int cpu); |
| 346 | void (*cpu_dead)(int cpu); | 349 | void (*cpu_dead)(int cpu); |
| 350 | void (*flush_branch_stack)(void); | ||
| 347 | 351 | ||
| 348 | /* | 352 | /* |
| 349 | * Intel Arch Perfmon v2+ | 353 | * Intel Arch Perfmon v2+ |
| @@ -365,6 +369,8 @@ struct x86_pmu { | |||
| 365 | */ | 369 | */ |
| 366 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 370 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
| 367 | int lbr_nr; /* hardware stack size */ | 371 | int lbr_nr; /* hardware stack size */ |
| 372 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
| 373 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
| 368 | 374 | ||
| 369 | /* | 375 | /* |
| 370 | * Extra registers for events | 376 | * Extra registers for events |
| @@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint; | |||
| 478 | 484 | ||
| 479 | extern struct event_constraint unconstrained; | 485 | extern struct event_constraint unconstrained; |
| 480 | 486 | ||
| 487 | static inline bool kernel_ip(unsigned long ip) | ||
| 488 | { | ||
| 489 | #ifdef CONFIG_X86_32 | ||
| 490 | return ip > PAGE_OFFSET; | ||
| 491 | #else | ||
| 492 | return (long)ip < 0; | ||
| 493 | #endif | ||
| 494 | } | ||
| 495 | |||
| 481 | #ifdef CONFIG_CPU_SUP_AMD | 496 | #ifdef CONFIG_CPU_SUP_AMD |
| 482 | 497 | ||
| 483 | int amd_pmu_init(void); | 498 | int amd_pmu_init(void); |
| @@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); | |||
| 558 | 573 | ||
| 559 | void intel_pmu_lbr_init_atom(void); | 574 | void intel_pmu_lbr_init_atom(void); |
| 560 | 575 | ||
| 576 | void intel_pmu_lbr_init_snb(void); | ||
| 577 | |||
| 578 | int intel_pmu_setup_lbr_filter(struct perf_event *event); | ||
| 579 | |||
| 561 | int p4_pmu_init(void); | 580 | int p4_pmu_init(void); |
| 562 | 581 | ||
| 563 | int p6_pmu_init(void); | 582 | int p6_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
| 139 | if (ret) | 139 | if (ret) |
| 140 | return ret; | 140 | return ret; |
| 141 | 141 | ||
| 142 | if (has_branch_stack(event)) | ||
| 143 | return -EOPNOTSUPP; | ||
| 144 | |||
| 142 | if (event->attr.exclude_host && event->attr.exclude_guest) | 145 | if (event->attr.exclude_host && event->attr.exclude_guest) |
| 143 | /* | 146 | /* |
| 144 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 147 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 4bd9c9ef9d42..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
| 728 | }, | 728 | }, |
| 729 | }; | 729 | }; |
| 730 | 730 | ||
| 731 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | ||
| 732 | { | ||
| 733 | /* user explicitly requested branch sampling */ | ||
| 734 | if (has_branch_stack(event)) | ||
| 735 | return true; | ||
| 736 | |||
| 737 | /* implicit branch sampling to correct PEBS skid */ | ||
| 738 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 739 | return true; | ||
| 740 | |||
| 741 | return false; | ||
| 742 | } | ||
| 743 | |||
| 731 | static void intel_pmu_disable_all(void) | 744 | static void intel_pmu_disable_all(void) |
| 732 | { | 745 | { |
| 733 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
| 882 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 895 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
| 883 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 896 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
| 884 | 897 | ||
| 898 | /* | ||
| 899 | * must disable before any actual event | ||
| 900 | * because any event may be combined with LBR | ||
| 901 | */ | ||
| 902 | if (intel_pmu_needs_lbr_smpl(event)) | ||
| 903 | intel_pmu_lbr_disable(event); | ||
| 904 | |||
| 885 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 905 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 886 | intel_pmu_disable_fixed(hwc); | 906 | intel_pmu_disable_fixed(hwc); |
| 887 | return; | 907 | return; |
| @@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 936 | intel_pmu_enable_bts(hwc->config); | 956 | intel_pmu_enable_bts(hwc->config); |
| 937 | return; | 957 | return; |
| 938 | } | 958 | } |
| 959 | /* | ||
| 960 | * must enabled before any actual event | ||
| 961 | * because any event may be combined with LBR | ||
| 962 | */ | ||
| 963 | if (intel_pmu_needs_lbr_smpl(event)) | ||
| 964 | intel_pmu_lbr_enable(event); | ||
| 939 | 965 | ||
| 940 | if (event->attr.exclude_host) | 966 | if (event->attr.exclude_host) |
| 941 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 967 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
| @@ -1058,6 +1084,9 @@ again: | |||
| 1058 | 1084 | ||
| 1059 | data.period = event->hw.last_period; | 1085 | data.period = event->hw.last_period; |
| 1060 | 1086 | ||
| 1087 | if (has_branch_stack(event)) | ||
| 1088 | data.br_stack = &cpuc->lbr_stack; | ||
| 1089 | |||
| 1061 | if (perf_event_overflow(event, &data, regs)) | 1090 | if (perf_event_overflow(event, &data, regs)) |
| 1062 | x86_pmu_stop(event, 0); | 1091 | x86_pmu_stop(event, 0); |
| 1063 | } | 1092 | } |
| @@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
| 1124 | */ | 1153 | */ |
| 1125 | static struct event_constraint * | 1154 | static struct event_constraint * |
| 1126 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1155 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
| 1127 | struct perf_event *event) | 1156 | struct perf_event *event, |
| 1157 | struct hw_perf_event_extra *reg) | ||
| 1128 | { | 1158 | { |
| 1129 | struct event_constraint *c = &emptyconstraint; | 1159 | struct event_constraint *c = &emptyconstraint; |
| 1130 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
| 1131 | struct er_account *era; | 1160 | struct er_account *era; |
| 1132 | unsigned long flags; | 1161 | unsigned long flags; |
| 1133 | int orig_idx = reg->idx; | 1162 | int orig_idx = reg->idx; |
| 1134 | 1163 | ||
| 1135 | /* already allocated shared msr */ | 1164 | /* already allocated shared msr */ |
| 1136 | if (reg->alloc) | 1165 | if (reg->alloc) |
| 1137 | return &unconstrained; | 1166 | return NULL; /* call x86_get_event_constraint() */ |
| 1138 | 1167 | ||
| 1139 | again: | 1168 | again: |
| 1140 | era = &cpuc->shared_regs->regs[reg->idx]; | 1169 | era = &cpuc->shared_regs->regs[reg->idx]; |
| @@ -1157,14 +1186,10 @@ again: | |||
| 1157 | reg->alloc = 1; | 1186 | reg->alloc = 1; |
| 1158 | 1187 | ||
| 1159 | /* | 1188 | /* |
| 1160 | * All events using extra_reg are unconstrained. | 1189 | * need to call x86_get_event_constraint() |
| 1161 | * Avoids calling x86_get_event_constraints() | 1190 | * to check if associated event has constraints |
| 1162 | * | ||
| 1163 | * Must revisit if extra_reg controlling events | ||
| 1164 | * ever have constraints. Worst case we go through | ||
| 1165 | * the regular event constraint table. | ||
| 1166 | */ | 1191 | */ |
| 1167 | c = &unconstrained; | 1192 | c = NULL; |
| 1168 | } else if (intel_try_alt_er(event, orig_idx)) { | 1193 | } else if (intel_try_alt_er(event, orig_idx)) { |
| 1169 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1194 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 1170 | goto again; | 1195 | goto again; |
| @@ -1201,11 +1226,23 @@ static struct event_constraint * | |||
| 1201 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1226 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
| 1202 | struct perf_event *event) | 1227 | struct perf_event *event) |
| 1203 | { | 1228 | { |
| 1204 | struct event_constraint *c = NULL; | 1229 | struct event_constraint *c = NULL, *d; |
| 1205 | 1230 | struct hw_perf_event_extra *xreg, *breg; | |
| 1206 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1231 | |
| 1207 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1232 | xreg = &event->hw.extra_reg; |
| 1208 | 1233 | if (xreg->idx != EXTRA_REG_NONE) { | |
| 1234 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
| 1235 | if (c == &emptyconstraint) | ||
| 1236 | return c; | ||
| 1237 | } | ||
| 1238 | breg = &event->hw.branch_reg; | ||
| 1239 | if (breg->idx != EXTRA_REG_NONE) { | ||
| 1240 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
| 1241 | if (d == &emptyconstraint) { | ||
| 1242 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
| 1243 | c = d; | ||
| 1244 | } | ||
| 1245 | } | ||
| 1209 | return c; | 1246 | return c; |
| 1210 | } | 1247 | } |
| 1211 | 1248 | ||
| @@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
| 1253 | reg = &event->hw.extra_reg; | 1290 | reg = &event->hw.extra_reg; |
| 1254 | if (reg->idx != EXTRA_REG_NONE) | 1291 | if (reg->idx != EXTRA_REG_NONE) |
| 1255 | __intel_shared_reg_put_constraints(cpuc, reg); | 1292 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 1293 | |||
| 1294 | reg = &event->hw.branch_reg; | ||
| 1295 | if (reg->idx != EXTRA_REG_NONE) | ||
| 1296 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
| 1256 | } | 1297 | } |
| 1257 | 1298 | ||
| 1258 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1299 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
| @@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
| 1295 | event->hw.config = alt_config; | 1336 | event->hw.config = alt_config; |
| 1296 | } | 1337 | } |
| 1297 | 1338 | ||
| 1339 | if (intel_pmu_needs_lbr_smpl(event)) { | ||
| 1340 | ret = intel_pmu_setup_lbr_filter(event); | ||
| 1341 | if (ret) | ||
| 1342 | return ret; | ||
| 1343 | } | ||
| 1344 | |||
| 1298 | if (event->attr.type != PERF_TYPE_RAW) | 1345 | if (event->attr.type != PERF_TYPE_RAW) |
| 1299 | return 0; | 1346 | return 0; |
| 1300 | 1347 | ||
| @@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
| 1433 | { | 1480 | { |
| 1434 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1481 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 1435 | 1482 | ||
| 1436 | if (!x86_pmu.extra_regs) | 1483 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
| 1437 | return NOTIFY_OK; | 1484 | return NOTIFY_OK; |
| 1438 | 1485 | ||
| 1439 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1486 | cpuc->shared_regs = allocate_shared_regs(cpu); |
| @@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
| 1455 | */ | 1502 | */ |
| 1456 | intel_pmu_lbr_reset(); | 1503 | intel_pmu_lbr_reset(); |
| 1457 | 1504 | ||
| 1458 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1505 | cpuc->lbr_sel = NULL; |
| 1506 | |||
| 1507 | if (!cpuc->shared_regs) | ||
| 1459 | return; | 1508 | return; |
| 1460 | 1509 | ||
| 1461 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1510 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
| 1462 | struct intel_shared_regs *pc; | 1511 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
| 1512 | struct intel_shared_regs *pc; | ||
| 1463 | 1513 | ||
| 1464 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1514 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
| 1465 | if (pc && pc->core_id == core_id) { | 1515 | if (pc && pc->core_id == core_id) { |
| 1466 | cpuc->kfree_on_online = cpuc->shared_regs; | 1516 | cpuc->kfree_on_online = cpuc->shared_regs; |
| 1467 | cpuc->shared_regs = pc; | 1517 | cpuc->shared_regs = pc; |
| 1468 | break; | 1518 | break; |
| 1519 | } | ||
| 1469 | } | 1520 | } |
| 1521 | cpuc->shared_regs->core_id = core_id; | ||
| 1522 | cpuc->shared_regs->refcnt++; | ||
| 1470 | } | 1523 | } |
| 1471 | 1524 | ||
| 1472 | cpuc->shared_regs->core_id = core_id; | 1525 | if (x86_pmu.lbr_sel_map) |
| 1473 | cpuc->shared_regs->refcnt++; | 1526 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
| 1474 | } | 1527 | } |
| 1475 | 1528 | ||
| 1476 | static void intel_pmu_cpu_dying(int cpu) | 1529 | static void intel_pmu_cpu_dying(int cpu) |
| @@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) | |||
| 1488 | fini_debug_store_on_cpu(cpu); | 1541 | fini_debug_store_on_cpu(cpu); |
| 1489 | } | 1542 | } |
| 1490 | 1543 | ||
| 1544 | static void intel_pmu_flush_branch_stack(void) | ||
| 1545 | { | ||
| 1546 | /* | ||
| 1547 | * Intel LBR does not tag entries with the | ||
| 1548 | * PID of the current task, then we need to | ||
| 1549 | * flush it on ctxsw | ||
| 1550 | * For now, we simply reset it | ||
| 1551 | */ | ||
| 1552 | if (x86_pmu.lbr_nr) | ||
| 1553 | intel_pmu_lbr_reset(); | ||
| 1554 | } | ||
| 1555 | |||
| 1491 | static __initconst const struct x86_pmu intel_pmu = { | 1556 | static __initconst const struct x86_pmu intel_pmu = { |
| 1492 | .name = "Intel", | 1557 | .name = "Intel", |
| 1493 | .handle_irq = intel_pmu_handle_irq, | 1558 | .handle_irq = intel_pmu_handle_irq, |
| @@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
| 1515 | .cpu_starting = intel_pmu_cpu_starting, | 1580 | .cpu_starting = intel_pmu_cpu_starting, |
| 1516 | .cpu_dying = intel_pmu_cpu_dying, | 1581 | .cpu_dying = intel_pmu_cpu_dying, |
| 1517 | .guest_get_msrs = intel_guest_get_msrs, | 1582 | .guest_get_msrs = intel_guest_get_msrs, |
| 1583 | .flush_branch_stack = intel_pmu_flush_branch_stack, | ||
| 1518 | }; | 1584 | }; |
| 1519 | 1585 | ||
| 1520 | static __init void intel_clovertown_quirk(void) | 1586 | static __init void intel_clovertown_quirk(void) |
| @@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void) | |||
| 1745 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1811 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 1746 | sizeof(hw_cache_event_ids)); | 1812 | sizeof(hw_cache_event_ids)); |
| 1747 | 1813 | ||
| 1748 | intel_pmu_lbr_init_nhm(); | 1814 | intel_pmu_lbr_init_snb(); |
| 1749 | 1815 | ||
| 1750 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1816 | x86_pmu.event_constraints = intel_snb_event_constraints; |
| 1751 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1817 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
| 4 | 4 | ||
| 5 | #include <asm/perf_event.h> | 5 | #include <asm/perf_event.h> |
| 6 | #include <asm/insn.h> | ||
| 6 | 7 | ||
| 7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
| 8 | 9 | ||
| @@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
| 439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 440 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
| 440 | 441 | ||
| 441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 442 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
| 442 | |||
| 443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 444 | intel_pmu_lbr_enable(event); | ||
| 445 | } | 443 | } |
| 446 | 444 | ||
| 447 | void intel_pmu_pebs_disable(struct perf_event *event) | 445 | void intel_pmu_pebs_disable(struct perf_event *event) |
| @@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
| 454 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 452 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
| 455 | 453 | ||
| 456 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 454 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
| 457 | |||
| 458 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
| 459 | intel_pmu_lbr_disable(event); | ||
| 460 | } | 455 | } |
| 461 | 456 | ||
| 462 | void intel_pmu_pebs_enable_all(void) | 457 | void intel_pmu_pebs_enable_all(void) |
| @@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) | |||
| 475 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 470 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
| 476 | } | 471 | } |
| 477 | 472 | ||
| 478 | #include <asm/insn.h> | ||
| 479 | |||
| 480 | static inline bool kernel_ip(unsigned long ip) | ||
| 481 | { | ||
| 482 | #ifdef CONFIG_X86_32 | ||
| 483 | return ip > PAGE_OFFSET; | ||
| 484 | #else | ||
| 485 | return (long)ip < 0; | ||
| 486 | #endif | ||
| 487 | } | ||
| 488 | |||
| 489 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | 473 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) |
| 490 | { | 474 | { |
| 491 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| @@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
| 572 | * both formats and we don't use the other fields in this | 556 | * both formats and we don't use the other fields in this |
| 573 | * routine. | 557 | * routine. |
| 574 | */ | 558 | */ |
| 559 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 575 | struct pebs_record_core *pebs = __pebs; | 560 | struct pebs_record_core *pebs = __pebs; |
| 576 | struct perf_sample_data data; | 561 | struct perf_sample_data data; |
| 577 | struct pt_regs regs; | 562 | struct pt_regs regs; |
| @@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
| 602 | else | 587 | else |
| 603 | regs.flags &= ~PERF_EFLAGS_EXACT; | 588 | regs.flags &= ~PERF_EFLAGS_EXACT; |
| 604 | 589 | ||
| 590 | if (has_branch_stack(event)) | ||
| 591 | data.br_stack = &cpuc->lbr_stack; | ||
| 592 | |||
| 605 | if (perf_event_overflow(event, &data, ®s)) | 593 | if (perf_event_overflow(event, &data, ®s)) |
| 606 | x86_pmu_stop(event, 0); | 594 | x86_pmu_stop(event, 0); |
| 607 | } | 595 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <asm/perf_event.h> | 4 | #include <asm/perf_event.h> |
| 5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
| 6 | #include <asm/insn.h> | ||
| 6 | 7 | ||
| 7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
| 8 | 9 | ||
| @@ -14,6 +15,100 @@ enum { | |||
| 14 | }; | 15 | }; |
| 15 | 16 | ||
| 16 | /* | 17 | /* |
| 18 | * Intel LBR_SELECT bits | ||
| 19 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
| 20 | * | ||
| 21 | * Hardware branch filter (not available on all CPUs) | ||
| 22 | */ | ||
| 23 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
| 24 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
| 25 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
| 26 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
| 27 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
| 28 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
| 29 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
| 30 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
| 31 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
| 32 | |||
| 33 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
| 34 | #define LBR_USER (1 << LBR_USER_BIT) | ||
| 35 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
| 36 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
| 37 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
| 38 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
| 39 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
| 40 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
| 41 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
| 42 | |||
| 43 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
| 44 | |||
| 45 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
| 46 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
| 47 | #define LBR_IGN 0 /* ignored */ | ||
| 48 | |||
| 49 | #define LBR_ANY \ | ||
| 50 | (LBR_JCC |\ | ||
| 51 | LBR_REL_CALL |\ | ||
| 52 | LBR_IND_CALL |\ | ||
| 53 | LBR_RETURN |\ | ||
| 54 | LBR_REL_JMP |\ | ||
| 55 | LBR_IND_JMP |\ | ||
| 56 | LBR_FAR) | ||
| 57 | |||
| 58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 59 | |||
| 60 | #define for_each_branch_sample_type(x) \ | ||
| 61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
| 62 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
| 63 | |||
| 64 | /* | ||
| 65 | * x86control flow change classification | ||
| 66 | * x86control flow changes include branches, interrupts, traps, faults | ||
| 67 | */ | ||
| 68 | enum { | ||
| 69 | X86_BR_NONE = 0, /* unknown */ | ||
| 70 | |||
| 71 | X86_BR_USER = 1 << 0, /* branch target is user */ | ||
| 72 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ | ||
| 73 | |||
| 74 | X86_BR_CALL = 1 << 2, /* call */ | ||
| 75 | X86_BR_RET = 1 << 3, /* return */ | ||
| 76 | X86_BR_SYSCALL = 1 << 4, /* syscall */ | ||
| 77 | X86_BR_SYSRET = 1 << 5, /* syscall return */ | ||
| 78 | X86_BR_INT = 1 << 6, /* sw interrupt */ | ||
| 79 | X86_BR_IRET = 1 << 7, /* return from interrupt */ | ||
| 80 | X86_BR_JCC = 1 << 8, /* conditional */ | ||
| 81 | X86_BR_JMP = 1 << 9, /* jump */ | ||
| 82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | ||
| 83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | ||
| 84 | }; | ||
| 85 | |||
| 86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | ||
| 87 | |||
| 88 | #define X86_BR_ANY \ | ||
| 89 | (X86_BR_CALL |\ | ||
| 90 | X86_BR_RET |\ | ||
| 91 | X86_BR_SYSCALL |\ | ||
| 92 | X86_BR_SYSRET |\ | ||
| 93 | X86_BR_INT |\ | ||
| 94 | X86_BR_IRET |\ | ||
| 95 | X86_BR_JCC |\ | ||
| 96 | X86_BR_JMP |\ | ||
| 97 | X86_BR_IRQ |\ | ||
| 98 | X86_BR_IND_CALL) | ||
| 99 | |||
| 100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | ||
| 101 | |||
| 102 | #define X86_BR_ANY_CALL \ | ||
| 103 | (X86_BR_CALL |\ | ||
| 104 | X86_BR_IND_CALL |\ | ||
| 105 | X86_BR_SYSCALL |\ | ||
| 106 | X86_BR_IRQ |\ | ||
| 107 | X86_BR_INT) | ||
| 108 | |||
| 109 | static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); | ||
| 110 | |||
| 111 | /* | ||
| 17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 112 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
| 18 | * otherwise it becomes near impossible to get a reliable stack. | 113 | * otherwise it becomes near impossible to get a reliable stack. |
| 19 | */ | 114 | */ |
| @@ -21,6 +116,10 @@ enum { | |||
| 21 | static void __intel_pmu_lbr_enable(void) | 116 | static void __intel_pmu_lbr_enable(void) |
| 22 | { | 117 | { |
| 23 | u64 debugctl; | 118 | u64 debugctl; |
| 119 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 120 | |||
| 121 | if (cpuc->lbr_sel) | ||
| 122 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | ||
| 24 | 123 | ||
| 25 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | 124 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
| 26 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | 125 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
| @@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
| 76 | * Reset the LBR stack if we changed task context to | 175 | * Reset the LBR stack if we changed task context to |
| 77 | * avoid data leaks. | 176 | * avoid data leaks. |
| 78 | */ | 177 | */ |
| 79 | |||
| 80 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | 178 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
| 81 | intel_pmu_lbr_reset(); | 179 | intel_pmu_lbr_reset(); |
| 82 | cpuc->lbr_context = event->ctx; | 180 | cpuc->lbr_context = event->ctx; |
| 83 | } | 181 | } |
| 182 | cpuc->br_sel = event->hw.branch_reg.reg; | ||
| 84 | 183 | ||
| 85 | cpuc->lbr_users++; | 184 | cpuc->lbr_users++; |
| 86 | } | 185 | } |
| @@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) | |||
| 95 | cpuc->lbr_users--; | 194 | cpuc->lbr_users--; |
| 96 | WARN_ON_ONCE(cpuc->lbr_users < 0); | 195 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
| 97 | 196 | ||
| 98 | if (cpuc->enabled && !cpuc->lbr_users) | 197 | if (cpuc->enabled && !cpuc->lbr_users) { |
| 99 | __intel_pmu_lbr_disable(); | 198 | __intel_pmu_lbr_disable(); |
| 199 | /* avoid stale pointer */ | ||
| 200 | cpuc->lbr_context = NULL; | ||
| 201 | } | ||
| 100 | } | 202 | } |
| 101 | 203 | ||
| 102 | void intel_pmu_lbr_enable_all(void) | 204 | void intel_pmu_lbr_enable_all(void) |
| @@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) | |||
| 115 | __intel_pmu_lbr_disable(); | 217 | __intel_pmu_lbr_disable(); |
| 116 | } | 218 | } |
| 117 | 219 | ||
| 220 | /* | ||
| 221 | * TOS = most recently recorded branch | ||
| 222 | */ | ||
| 118 | static inline u64 intel_pmu_lbr_tos(void) | 223 | static inline u64 intel_pmu_lbr_tos(void) |
| 119 | { | 224 | { |
| 120 | u64 tos; | 225 | u64 tos; |
| @@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
| 142 | 247 | ||
| 143 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | 248 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); |
| 144 | 249 | ||
| 145 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | 250 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
| 146 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | 251 | cpuc->lbr_entries[i].to = msr_lastbranch.to; |
| 147 | cpuc->lbr_entries[i].flags = 0; | 252 | cpuc->lbr_entries[i].mispred = 0; |
| 253 | cpuc->lbr_entries[i].predicted = 0; | ||
| 254 | cpuc->lbr_entries[i].reserved = 0; | ||
| 148 | } | 255 | } |
| 149 | cpuc->lbr_stack.nr = i; | 256 | cpuc->lbr_stack.nr = i; |
| 150 | } | 257 | } |
| 151 | 258 | ||
| 152 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
| 153 | |||
| 154 | /* | 259 | /* |
| 155 | * Due to lack of segmentation in Linux the effective address (offset) | 260 | * Due to lack of segmentation in Linux the effective address (offset) |
| 156 | * is the same as the linear address, allowing us to merge the LIP and EIP | 261 | * is the same as the linear address, allowing us to merge the LIP and EIP |
| @@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
| 165 | 270 | ||
| 166 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
| 167 | unsigned long lbr_idx = (tos - i) & mask; | 272 | unsigned long lbr_idx = (tos - i) & mask; |
| 168 | u64 from, to, flags = 0; | 273 | u64 from, to, mis = 0, pred = 0; |
| 169 | 274 | ||
| 170 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
| 171 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
| 172 | 277 | ||
| 173 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
| 174 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
| 280 | pred = !mis; | ||
| 175 | from = (u64)((((s64)from) << 1) >> 1); | 281 | from = (u64)((((s64)from) << 1) >> 1); |
| 176 | } | 282 | } |
| 177 | 283 | ||
| 178 | cpuc->lbr_entries[i].from = from; | 284 | cpuc->lbr_entries[i].from = from; |
| 179 | cpuc->lbr_entries[i].to = to; | 285 | cpuc->lbr_entries[i].to = to; |
| 180 | cpuc->lbr_entries[i].flags = flags; | 286 | cpuc->lbr_entries[i].mispred = mis; |
| 287 | cpuc->lbr_entries[i].predicted = pred; | ||
| 288 | cpuc->lbr_entries[i].reserved = 0; | ||
| 181 | } | 289 | } |
| 182 | cpuc->lbr_stack.nr = i; | 290 | cpuc->lbr_stack.nr = i; |
| 183 | } | 291 | } |
| @@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) | |||
| 193 | intel_pmu_lbr_read_32(cpuc); | 301 | intel_pmu_lbr_read_32(cpuc); |
| 194 | else | 302 | else |
| 195 | intel_pmu_lbr_read_64(cpuc); | 303 | intel_pmu_lbr_read_64(cpuc); |
| 304 | |||
| 305 | intel_pmu_lbr_filter(cpuc); | ||
| 306 | } | ||
| 307 | |||
| 308 | /* | ||
| 309 | * SW filter is used: | ||
| 310 | * - in case there is no HW filter | ||
| 311 | * - in case the HW filter has errata or limitations | ||
| 312 | */ | ||
| 313 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | ||
| 314 | { | ||
| 315 | u64 br_type = event->attr.branch_sample_type; | ||
| 316 | int mask = 0; | ||
| 317 | |||
| 318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | ||
| 319 | mask |= X86_BR_USER; | ||
| 320 | |||
| 321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) | ||
| 322 | mask |= X86_BR_KERNEL; | ||
| 323 | |||
| 324 | /* we ignore BRANCH_HV here */ | ||
| 325 | |||
| 326 | if (br_type & PERF_SAMPLE_BRANCH_ANY) | ||
| 327 | mask |= X86_BR_ANY; | ||
| 328 | |||
| 329 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) | ||
| 330 | mask |= X86_BR_ANY_CALL; | ||
| 331 | |||
| 332 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) | ||
| 333 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; | ||
| 334 | |||
| 335 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | ||
| 336 | mask |= X86_BR_IND_CALL; | ||
| 337 | /* | ||
| 338 | * stash actual user request into reg, it may | ||
| 339 | * be used by fixup code for some CPU | ||
| 340 | */ | ||
| 341 | event->hw.branch_reg.reg = mask; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* | ||
| 345 | * setup the HW LBR filter | ||
| 346 | * Used only when available, may not be enough to disambiguate | ||
| 347 | * all branches, may need the help of the SW filter | ||
| 348 | */ | ||
| 349 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | ||
| 350 | { | ||
| 351 | struct hw_perf_event_extra *reg; | ||
| 352 | u64 br_type = event->attr.branch_sample_type; | ||
| 353 | u64 mask = 0, m; | ||
| 354 | u64 v; | ||
| 355 | |||
| 356 | for_each_branch_sample_type(m) { | ||
| 357 | if (!(br_type & m)) | ||
| 358 | continue; | ||
| 359 | |||
| 360 | v = x86_pmu.lbr_sel_map[m]; | ||
| 361 | if (v == LBR_NOT_SUPP) | ||
| 362 | return -EOPNOTSUPP; | ||
| 363 | |||
| 364 | if (v != LBR_IGN) | ||
| 365 | mask |= v; | ||
| 366 | } | ||
| 367 | reg = &event->hw.branch_reg; | ||
| 368 | reg->idx = EXTRA_REG_LBR; | ||
| 369 | |||
| 370 | /* LBR_SELECT operates in suppress mode so invert mask */ | ||
| 371 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | ||
| 372 | |||
| 373 | return 0; | ||
| 374 | } | ||
| 375 | |||
| 376 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | ||
| 377 | { | ||
| 378 | int ret = 0; | ||
| 379 | |||
| 380 | /* | ||
| 381 | * no LBR on this PMU | ||
| 382 | */ | ||
| 383 | if (!x86_pmu.lbr_nr) | ||
| 384 | return -EOPNOTSUPP; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * setup SW LBR filter | ||
| 388 | */ | ||
| 389 | intel_pmu_setup_sw_lbr_filter(event); | ||
| 390 | |||
| 391 | /* | ||
| 392 | * setup HW LBR filter, if any | ||
| 393 | */ | ||
| 394 | if (x86_pmu.lbr_sel_map) | ||
| 395 | ret = intel_pmu_setup_hw_lbr_filter(event); | ||
| 396 | |||
| 397 | return ret; | ||
| 196 | } | 398 | } |
| 197 | 399 | ||
| 400 | /* | ||
| 401 | * return the type of control flow change at address "from" | ||
| 402 | * intruction is not necessarily a branch (in case of interrupt). | ||
| 403 | * | ||
| 404 | * The branch type returned also includes the priv level of the | ||
| 405 | * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). | ||
| 406 | * | ||
| 407 | * If a branch type is unknown OR the instruction cannot be | ||
| 408 | * decoded (e.g., text page not present), then X86_BR_NONE is | ||
| 409 | * returned. | ||
| 410 | */ | ||
| 411 | static int branch_type(unsigned long from, unsigned long to) | ||
| 412 | { | ||
| 413 | struct insn insn; | ||
| 414 | void *addr; | ||
| 415 | int bytes, size = MAX_INSN_SIZE; | ||
| 416 | int ret = X86_BR_NONE; | ||
| 417 | int ext, to_plm, from_plm; | ||
| 418 | u8 buf[MAX_INSN_SIZE]; | ||
| 419 | int is64 = 0; | ||
| 420 | |||
| 421 | to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; | ||
| 422 | from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * maybe zero if lbr did not fill up after a reset by the time | ||
| 426 | * we get a PMU interrupt | ||
| 427 | */ | ||
| 428 | if (from == 0 || to == 0) | ||
| 429 | return X86_BR_NONE; | ||
| 430 | |||
| 431 | if (from_plm == X86_BR_USER) { | ||
| 432 | /* | ||
| 433 | * can happen if measuring at the user level only | ||
| 434 | * and we interrupt in a kernel thread, e.g., idle. | ||
| 435 | */ | ||
| 436 | if (!current->mm) | ||
| 437 | return X86_BR_NONE; | ||
| 438 | |||
| 439 | /* may fail if text not present */ | ||
| 440 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | ||
| 441 | if (bytes != size) | ||
| 442 | return X86_BR_NONE; | ||
| 443 | |||
| 444 | addr = buf; | ||
| 445 | } else | ||
| 446 | addr = (void *)from; | ||
| 447 | |||
| 448 | /* | ||
| 449 | * decoder needs to know the ABI especially | ||
| 450 | * on 64-bit systems running 32-bit apps | ||
| 451 | */ | ||
| 452 | #ifdef CONFIG_X86_64 | ||
| 453 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | ||
| 454 | #endif | ||
| 455 | insn_init(&insn, addr, is64); | ||
| 456 | insn_get_opcode(&insn); | ||
| 457 | |||
| 458 | switch (insn.opcode.bytes[0]) { | ||
| 459 | case 0xf: | ||
| 460 | switch (insn.opcode.bytes[1]) { | ||
| 461 | case 0x05: /* syscall */ | ||
| 462 | case 0x34: /* sysenter */ | ||
| 463 | ret = X86_BR_SYSCALL; | ||
| 464 | break; | ||
| 465 | case 0x07: /* sysret */ | ||
| 466 | case 0x35: /* sysexit */ | ||
| 467 | ret = X86_BR_SYSRET; | ||
| 468 | break; | ||
| 469 | case 0x80 ... 0x8f: /* conditional */ | ||
| 470 | ret = X86_BR_JCC; | ||
| 471 | break; | ||
| 472 | default: | ||
| 473 | ret = X86_BR_NONE; | ||
| 474 | } | ||
| 475 | break; | ||
| 476 | case 0x70 ... 0x7f: /* conditional */ | ||
| 477 | ret = X86_BR_JCC; | ||
| 478 | break; | ||
| 479 | case 0xc2: /* near ret */ | ||
| 480 | case 0xc3: /* near ret */ | ||
| 481 | case 0xca: /* far ret */ | ||
| 482 | case 0xcb: /* far ret */ | ||
| 483 | ret = X86_BR_RET; | ||
| 484 | break; | ||
| 485 | case 0xcf: /* iret */ | ||
| 486 | ret = X86_BR_IRET; | ||
| 487 | break; | ||
| 488 | case 0xcc ... 0xce: /* int */ | ||
| 489 | ret = X86_BR_INT; | ||
| 490 | break; | ||
| 491 | case 0xe8: /* call near rel */ | ||
| 492 | case 0x9a: /* call far absolute */ | ||
| 493 | ret = X86_BR_CALL; | ||
| 494 | break; | ||
| 495 | case 0xe0 ... 0xe3: /* loop jmp */ | ||
| 496 | ret = X86_BR_JCC; | ||
| 497 | break; | ||
| 498 | case 0xe9 ... 0xeb: /* jmp */ | ||
| 499 | ret = X86_BR_JMP; | ||
| 500 | break; | ||
| 501 | case 0xff: /* call near absolute, call far absolute ind */ | ||
| 502 | insn_get_modrm(&insn); | ||
| 503 | ext = (insn.modrm.bytes[0] >> 3) & 0x7; | ||
| 504 | switch (ext) { | ||
| 505 | case 2: /* near ind call */ | ||
| 506 | case 3: /* far ind call */ | ||
| 507 | ret = X86_BR_IND_CALL; | ||
| 508 | break; | ||
| 509 | case 4: | ||
| 510 | case 5: | ||
| 511 | ret = X86_BR_JMP; | ||
| 512 | break; | ||
| 513 | } | ||
| 514 | break; | ||
| 515 | default: | ||
| 516 | ret = X86_BR_NONE; | ||
| 517 | } | ||
| 518 | /* | ||
| 519 | * interrupts, traps, faults (and thus ring transition) may | ||
| 520 | * occur on any instructions. Thus, to classify them correctly, | ||
| 521 | * we need to first look at the from and to priv levels. If they | ||
| 522 | * are different and to is in the kernel, then it indicates | ||
| 523 | * a ring transition. If the from instruction is not a ring | ||
| 524 | * transition instr (syscall, systenter, int), then it means | ||
| 525 | * it was a irq, trap or fault. | ||
| 526 | * | ||
| 527 | * we have no way of detecting kernel to kernel faults. | ||
| 528 | */ | ||
| 529 | if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL | ||
| 530 | && ret != X86_BR_SYSCALL && ret != X86_BR_INT) | ||
| 531 | ret = X86_BR_IRQ; | ||
| 532 | |||
| 533 | /* | ||
| 534 | * branch priv level determined by target as | ||
| 535 | * is done by HW when LBR_SELECT is implemented | ||
| 536 | */ | ||
| 537 | if (ret != X86_BR_NONE) | ||
| 538 | ret |= to_plm; | ||
| 539 | |||
| 540 | return ret; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * implement actual branch filter based on user demand. | ||
| 545 | * Hardware may not exactly satisfy that request, thus | ||
| 546 | * we need to inspect opcodes. Mismatched branches are | ||
| 547 | * discarded. Therefore, the number of branches returned | ||
| 548 | * in PERF_SAMPLE_BRANCH_STACK sample may vary. | ||
| 549 | */ | ||
| 550 | static void | ||
| 551 | intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | ||
| 552 | { | ||
| 553 | u64 from, to; | ||
| 554 | int br_sel = cpuc->br_sel; | ||
| 555 | int i, j, type; | ||
| 556 | bool compress = false; | ||
| 557 | |||
| 558 | /* if sampling all branches, then nothing to filter */ | ||
| 559 | if ((br_sel & X86_BR_ALL) == X86_BR_ALL) | ||
| 560 | return; | ||
| 561 | |||
| 562 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { | ||
| 563 | |||
| 564 | from = cpuc->lbr_entries[i].from; | ||
| 565 | to = cpuc->lbr_entries[i].to; | ||
| 566 | |||
| 567 | type = branch_type(from, to); | ||
| 568 | |||
| 569 | /* if type does not correspond, then discard */ | ||
| 570 | if (type == X86_BR_NONE || (br_sel & type) != type) { | ||
| 571 | cpuc->lbr_entries[i].from = 0; | ||
| 572 | compress = true; | ||
| 573 | } | ||
| 574 | } | ||
| 575 | |||
| 576 | if (!compress) | ||
| 577 | return; | ||
| 578 | |||
| 579 | /* remove all entries with from=0 */ | ||
| 580 | for (i = 0; i < cpuc->lbr_stack.nr; ) { | ||
| 581 | if (!cpuc->lbr_entries[i].from) { | ||
| 582 | j = i; | ||
| 583 | while (++j < cpuc->lbr_stack.nr) | ||
| 584 | cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; | ||
| 585 | cpuc->lbr_stack.nr--; | ||
| 586 | if (!cpuc->lbr_entries[i].from) | ||
| 587 | continue; | ||
| 588 | } | ||
| 589 | i++; | ||
| 590 | } | ||
| 591 | } | ||
| 592 | |||
| 593 | /* | ||
| 594 | * Map interface branch filters onto LBR filters | ||
| 595 | */ | ||
| 596 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
| 597 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
| 598 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
| 599 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
| 600 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
| 601 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
| 602 | | LBR_IND_JMP | LBR_FAR, | ||
| 603 | /* | ||
| 604 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
| 605 | */ | ||
| 606 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
| 607 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
| 608 | /* | ||
| 609 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
| 610 | */ | ||
| 611 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
| 612 | }; | ||
| 613 | |||
| 614 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
| 615 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
| 616 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
| 617 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
| 618 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
| 619 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
| 620 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
| 621 | | LBR_FAR, | ||
| 622 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
| 623 | }; | ||
| 624 | |||
| 625 | /* core */ | ||
| 198 | void intel_pmu_lbr_init_core(void) | 626 | void intel_pmu_lbr_init_core(void) |
| 199 | { | 627 | { |
| 200 | x86_pmu.lbr_nr = 4; | 628 | x86_pmu.lbr_nr = 4; |
| 201 | x86_pmu.lbr_tos = 0x01c9; | 629 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 202 | x86_pmu.lbr_from = 0x40; | 630 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
| 203 | x86_pmu.lbr_to = 0x60; | 631 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
| 632 | |||
| 633 | /* | ||
| 634 | * SW branch filter usage: | ||
| 635 | * - compensate for lack of HW filter | ||
| 636 | */ | ||
| 637 | pr_cont("4-deep LBR, "); | ||
| 204 | } | 638 | } |
| 205 | 639 | ||
| 640 | /* nehalem/westmere */ | ||
| 206 | void intel_pmu_lbr_init_nhm(void) | 641 | void intel_pmu_lbr_init_nhm(void) |
| 207 | { | 642 | { |
| 208 | x86_pmu.lbr_nr = 16; | 643 | x86_pmu.lbr_nr = 16; |
| 209 | x86_pmu.lbr_tos = 0x01c9; | 644 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 210 | x86_pmu.lbr_from = 0x680; | 645 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
| 211 | x86_pmu.lbr_to = 0x6c0; | 646 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
| 647 | |||
| 648 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
| 649 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
| 650 | |||
| 651 | /* | ||
| 652 | * SW branch filter usage: | ||
| 653 | * - workaround LBR_SEL errata (see above) | ||
| 654 | * - support syscall, sysret capture. | ||
| 655 | * That requires LBR_FAR but that means far | ||
| 656 | * jmp need to be filtered out | ||
| 657 | */ | ||
| 658 | pr_cont("16-deep LBR, "); | ||
| 659 | } | ||
| 660 | |||
| 661 | /* sandy bridge */ | ||
| 662 | void intel_pmu_lbr_init_snb(void) | ||
| 663 | { | ||
| 664 | x86_pmu.lbr_nr = 16; | ||
| 665 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
| 666 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
| 667 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
| 668 | |||
| 669 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
| 670 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
| 671 | |||
| 672 | /* | ||
| 673 | * SW branch filter usage: | ||
| 674 | * - support syscall, sysret capture. | ||
| 675 | * That requires LBR_FAR but that means far | ||
| 676 | * jmp need to be filtered out | ||
| 677 | */ | ||
| 678 | pr_cont("16-deep LBR, "); | ||
| 212 | } | 679 | } |
| 213 | 680 | ||
| 681 | /* atom */ | ||
| 214 | void intel_pmu_lbr_init_atom(void) | 682 | void intel_pmu_lbr_init_atom(void) |
| 215 | { | 683 | { |
| 684 | /* | ||
| 685 | * only models starting at stepping 10 seems | ||
| 686 | * to have an operational LBR which can freeze | ||
| 687 | * on PMU interrupt | ||
| 688 | */ | ||
| 689 | if (boot_cpu_data.x86_mask < 10) { | ||
| 690 | pr_cont("LBR disabled due to erratum"); | ||
| 691 | return; | ||
| 692 | } | ||
| 693 | |||
| 216 | x86_pmu.lbr_nr = 8; | 694 | x86_pmu.lbr_nr = 8; |
| 217 | x86_pmu.lbr_tos = 0x01c9; | 695 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
| 218 | x86_pmu.lbr_from = 0x40; | 696 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
| 219 | x86_pmu.lbr_to = 0x60; | 697 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
| 698 | |||
| 699 | /* | ||
| 700 | * SW branch filter usage: | ||
| 701 | * - compensate for lack of HW filter | ||
| 702 | */ | ||
| 703 | pr_cont("8-deep LBR, "); | ||
| 220 | } | 704 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 64426b71381f..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -129,11 +129,40 @@ enum perf_event_sample_format { | |||
| 129 | PERF_SAMPLE_PERIOD = 1U << 8, | 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
| 132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | ||
| 132 | 133 | ||
| 133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
| 134 | }; | 135 | }; |
| 135 | 136 | ||
| 136 | /* | 137 | /* |
| 138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | ||
| 139 | * | ||
| 140 | * If the user does not pass priv level information via branch_sample_type, | ||
| 141 | * the kernel uses the event's priv level. Branch and event priv levels do | ||
| 142 | * not have to match. Branch priv level is checked for permissions. | ||
| 143 | * | ||
| 144 | * The branch types can be combined, however BRANCH_ANY covers all types | ||
| 145 | * of branches and therefore it supersedes all the other types. | ||
| 146 | */ | ||
| 147 | enum perf_branch_sample_type { | ||
| 148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | ||
| 149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | ||
| 150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | ||
| 151 | |||
| 152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | ||
| 153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | ||
| 154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | ||
| 155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | ||
| 156 | |||
| 157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | ||
| 158 | }; | ||
| 159 | |||
| 160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | ||
| 161 | (PERF_SAMPLE_BRANCH_USER|\ | ||
| 162 | PERF_SAMPLE_BRANCH_KERNEL|\ | ||
| 163 | PERF_SAMPLE_BRANCH_HV) | ||
| 164 | |||
| 165 | /* | ||
| 137 | * The format of the data returned by read() on a perf event fd, | 166 | * The format of the data returned by read() on a perf event fd, |
| 138 | * as specified by attr.read_format: | 167 | * as specified by attr.read_format: |
| 139 | * | 168 | * |
| @@ -163,6 +192,8 @@ enum perf_event_read_format { | |||
| 163 | }; | 192 | }; |
| 164 | 193 | ||
| 165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
| 195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | ||
| 196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | ||
| 166 | 197 | ||
| 167 | /* | 198 | /* |
| 168 | * Hardware event_id to monitor via a performance monitoring event: | 199 | * Hardware event_id to monitor via a performance monitoring event: |
| @@ -240,6 +271,7 @@ struct perf_event_attr { | |||
| 240 | __u64 bp_len; | 271 | __u64 bp_len; |
| 241 | __u64 config2; /* extension of config1 */ | 272 | __u64 config2; /* extension of config1 */ |
| 242 | }; | 273 | }; |
| 274 | __u64 branch_sample_type; /* enum branch_sample_type */ | ||
| 243 | }; | 275 | }; |
| 244 | 276 | ||
| 245 | /* | 277 | /* |
| @@ -458,6 +490,8 @@ enum perf_event_type { | |||
| 458 | * | 490 | * |
| 459 | * { u32 size; | 491 | * { u32 size; |
| 460 | * char data[size];}&& PERF_SAMPLE_RAW | 492 | * char data[size];}&& PERF_SAMPLE_RAW |
| 493 | * | ||
| 494 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | ||
| 461 | * }; | 495 | * }; |
| 462 | */ | 496 | */ |
| 463 | PERF_RECORD_SAMPLE = 9, | 497 | PERF_RECORD_SAMPLE = 9, |
| @@ -530,12 +564,34 @@ struct perf_raw_record { | |||
| 530 | void *data; | 564 | void *data; |
| 531 | }; | 565 | }; |
| 532 | 566 | ||
| 567 | /* | ||
| 568 | * single taken branch record layout: | ||
| 569 | * | ||
| 570 | * from: source instruction (may not always be a branch insn) | ||
| 571 | * to: branch target | ||
| 572 | * mispred: branch target was mispredicted | ||
| 573 | * predicted: branch target was predicted | ||
| 574 | * | ||
| 575 | * support for mispred, predicted is optional. In case it | ||
| 576 | * is not supported mispred = predicted = 0. | ||
| 577 | */ | ||
| 533 | struct perf_branch_entry { | 578 | struct perf_branch_entry { |
| 534 | __u64 from; | 579 | __u64 from; |
| 535 | __u64 to; | 580 | __u64 to; |
| 536 | __u64 flags; | 581 | __u64 mispred:1, /* target mispredicted */ |
| 582 | predicted:1,/* target predicted */ | ||
| 583 | reserved:62; | ||
| 537 | }; | 584 | }; |
| 538 | 585 | ||
| 586 | /* | ||
| 587 | * branch stack layout: | ||
| 588 | * nr: number of taken branches stored in entries[] | ||
| 589 | * | ||
| 590 | * Note that nr can vary from sample to sample | ||
| 591 | * branches (to, from) are stored from most recent | ||
| 592 | * to least recent, i.e., entries[0] contains the most | ||
| 593 | * recent branch. | ||
| 594 | */ | ||
| 539 | struct perf_branch_stack { | 595 | struct perf_branch_stack { |
| 540 | __u64 nr; | 596 | __u64 nr; |
| 541 | struct perf_branch_entry entries[0]; | 597 | struct perf_branch_entry entries[0]; |
| @@ -566,7 +622,9 @@ struct hw_perf_event { | |||
| 566 | unsigned long event_base; | 622 | unsigned long event_base; |
| 567 | int idx; | 623 | int idx; |
| 568 | int last_cpu; | 624 | int last_cpu; |
| 625 | |||
| 569 | struct hw_perf_event_extra extra_reg; | 626 | struct hw_perf_event_extra extra_reg; |
| 627 | struct hw_perf_event_extra branch_reg; | ||
| 570 | }; | 628 | }; |
| 571 | struct { /* software */ | 629 | struct { /* software */ |
| 572 | struct hrtimer hrtimer; | 630 | struct hrtimer hrtimer; |
| @@ -690,6 +748,11 @@ struct pmu { | |||
| 690 | * if no implementation is provided it will default to: event->hw.idx + 1. | 748 | * if no implementation is provided it will default to: event->hw.idx + 1. |
| 691 | */ | 749 | */ |
| 692 | int (*event_idx) (struct perf_event *event); /*optional */ | 750 | int (*event_idx) (struct perf_event *event); /*optional */ |
| 751 | |||
| 752 | /* | ||
| 753 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
| 754 | */ | ||
| 755 | void (*flush_branch_stack) (void); | ||
| 693 | }; | 756 | }; |
| 694 | 757 | ||
| 695 | /** | 758 | /** |
| @@ -923,7 +986,8 @@ struct perf_event_context { | |||
| 923 | u64 parent_gen; | 986 | u64 parent_gen; |
| 924 | u64 generation; | 987 | u64 generation; |
| 925 | int pin_count; | 988 | int pin_count; |
| 926 | int nr_cgroups; /* cgroup events present */ | 989 | int nr_cgroups; /* cgroup evts */ |
| 990 | int nr_branch_stack; /* branch_stack evt */ | ||
| 927 | struct rcu_head rcu_head; | 991 | struct rcu_head rcu_head; |
| 928 | }; | 992 | }; |
| 929 | 993 | ||
| @@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
| 988 | extern u64 perf_event_read_value(struct perf_event *event, | 1052 | extern u64 perf_event_read_value(struct perf_event *event, |
| 989 | u64 *enabled, u64 *running); | 1053 | u64 *enabled, u64 *running); |
| 990 | 1054 | ||
| 1055 | |||
| 991 | struct perf_sample_data { | 1056 | struct perf_sample_data { |
| 992 | u64 type; | 1057 | u64 type; |
| 993 | 1058 | ||
| @@ -1007,12 +1072,14 @@ struct perf_sample_data { | |||
| 1007 | u64 period; | 1072 | u64 period; |
| 1008 | struct perf_callchain_entry *callchain; | 1073 | struct perf_callchain_entry *callchain; |
| 1009 | struct perf_raw_record *raw; | 1074 | struct perf_raw_record *raw; |
| 1075 | struct perf_branch_stack *br_stack; | ||
| 1010 | }; | 1076 | }; |
| 1011 | 1077 | ||
| 1012 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) | 1078 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
| 1013 | { | 1079 | { |
| 1014 | data->addr = addr; | 1080 | data->addr = addr; |
| 1015 | data->raw = NULL; | 1081 | data->raw = NULL; |
| 1082 | data->br_stack = NULL; | ||
| 1016 | } | 1083 | } |
| 1017 | 1084 | ||
| 1018 | extern void perf_output_sample(struct perf_output_handle *handle, | 1085 | extern void perf_output_sample(struct perf_output_handle *handle, |
| @@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); | |||
| 1151 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | 1218 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
| 1152 | #endif | 1219 | #endif |
| 1153 | 1220 | ||
| 1221 | static inline bool has_branch_stack(struct perf_event *event) | ||
| 1222 | { | ||
| 1223 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | ||
| 1224 | } | ||
| 1225 | |||
| 1154 | extern int perf_output_begin(struct perf_output_handle *handle, | 1226 | extern int perf_output_begin(struct perf_output_handle *handle, |
| 1155 | struct perf_event *event, unsigned int size); | 1227 | struct perf_event *event, unsigned int size); |
| 1156 | extern void perf_output_end(struct perf_output_handle *handle); | 1228 | extern void perf_output_end(struct perf_output_handle *handle); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index e8b32ac75ce3..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
| 118 | PERF_FLAG_FD_OUTPUT |\ | 118 | PERF_FLAG_FD_OUTPUT |\ |
| 119 | PERF_FLAG_PID_CGROUP) | 119 | PERF_FLAG_PID_CGROUP) |
| 120 | 120 | ||
| 121 | /* | ||
| 122 | * branch priv levels that need permission checks | ||
| 123 | */ | ||
| 124 | #define PERF_SAMPLE_BRANCH_PERM_PLM \ | ||
| 125 | (PERF_SAMPLE_BRANCH_KERNEL |\ | ||
| 126 | PERF_SAMPLE_BRANCH_HV) | ||
| 127 | |||
| 121 | enum event_type_t { | 128 | enum event_type_t { |
| 122 | EVENT_FLEXIBLE = 0x1, | 129 | EVENT_FLEXIBLE = 0x1, |
| 123 | EVENT_PINNED = 0x2, | 130 | EVENT_PINNED = 0x2, |
| @@ -130,6 +137,7 @@ enum event_type_t { | |||
| 130 | */ | 137 | */ |
| 131 | struct static_key_deferred perf_sched_events __read_mostly; | 138 | struct static_key_deferred perf_sched_events __read_mostly; |
| 132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 139 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
| 140 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | ||
| 133 | 141 | ||
| 134 | static atomic_t nr_mmap_events __read_mostly; | 142 | static atomic_t nr_mmap_events __read_mostly; |
| 135 | static atomic_t nr_comm_events __read_mostly; | 143 | static atomic_t nr_comm_events __read_mostly; |
| @@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 881 | if (is_cgroup_event(event)) | 889 | if (is_cgroup_event(event)) |
| 882 | ctx->nr_cgroups++; | 890 | ctx->nr_cgroups++; |
| 883 | 891 | ||
| 892 | if (has_branch_stack(event)) | ||
| 893 | ctx->nr_branch_stack++; | ||
| 894 | |||
| 884 | list_add_rcu(&event->event_entry, &ctx->event_list); | 895 | list_add_rcu(&event->event_entry, &ctx->event_list); |
| 885 | if (!ctx->nr_events) | 896 | if (!ctx->nr_events) |
| 886 | perf_pmu_rotate_start(ctx->pmu); | 897 | perf_pmu_rotate_start(ctx->pmu); |
| @@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 1020 | cpuctx->cgrp = NULL; | 1031 | cpuctx->cgrp = NULL; |
| 1021 | } | 1032 | } |
| 1022 | 1033 | ||
| 1034 | if (has_branch_stack(event)) | ||
| 1035 | ctx->nr_branch_stack--; | ||
| 1036 | |||
| 1023 | ctx->nr_events--; | 1037 | ctx->nr_events--; |
| 1024 | if (event->attr.inherit_stat) | 1038 | if (event->attr.inherit_stat) |
| 1025 | ctx->nr_stat--; | 1039 | ctx->nr_stat--; |
| @@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2195 | } | 2209 | } |
| 2196 | 2210 | ||
| 2197 | /* | 2211 | /* |
| 2212 | * When sampling the branck stack in system-wide, it may be necessary | ||
| 2213 | * to flush the stack on context switch. This happens when the branch | ||
| 2214 | * stack does not tag its entries with the pid of the current task. | ||
| 2215 | * Otherwise it becomes impossible to associate a branch entry with a | ||
| 2216 | * task. This ambiguity is more likely to appear when the branch stack | ||
| 2217 | * supports priv level filtering and the user sets it to monitor only | ||
| 2218 | * at the user level (which could be a useful measurement in system-wide | ||
| 2219 | * mode). In that case, the risk is high of having a branch stack with | ||
| 2220 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
| 2221 | * entries or stashing them somewhere in the PMU specific code layer. | ||
| 2222 | * | ||
| 2223 | * This function provides the context switch callback to the lower code | ||
| 2224 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
| 2225 | * with at least one active event using taken branch sampling. | ||
| 2226 | */ | ||
| 2227 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
| 2228 | struct task_struct *task) | ||
| 2229 | { | ||
| 2230 | struct perf_cpu_context *cpuctx; | ||
| 2231 | struct pmu *pmu; | ||
| 2232 | unsigned long flags; | ||
| 2233 | |||
| 2234 | /* no need to flush branch stack if not changing task */ | ||
| 2235 | if (prev == task) | ||
| 2236 | return; | ||
| 2237 | |||
| 2238 | local_irq_save(flags); | ||
| 2239 | |||
| 2240 | rcu_read_lock(); | ||
| 2241 | |||
| 2242 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
| 2243 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
| 2244 | |||
| 2245 | /* | ||
| 2246 | * check if the context has at least one | ||
| 2247 | * event using PERF_SAMPLE_BRANCH_STACK | ||
| 2248 | */ | ||
| 2249 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
| 2250 | && pmu->flush_branch_stack) { | ||
| 2251 | |||
| 2252 | pmu = cpuctx->ctx.pmu; | ||
| 2253 | |||
| 2254 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
| 2255 | |||
| 2256 | perf_pmu_disable(pmu); | ||
| 2257 | |||
| 2258 | pmu->flush_branch_stack(); | ||
| 2259 | |||
| 2260 | perf_pmu_enable(pmu); | ||
| 2261 | |||
| 2262 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
| 2263 | } | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | rcu_read_unlock(); | ||
| 2267 | |||
| 2268 | local_irq_restore(flags); | ||
| 2269 | } | ||
| 2270 | |||
| 2271 | /* | ||
| 2198 | * Called from scheduler to add the events of the current task | 2272 | * Called from scheduler to add the events of the current task |
| 2199 | * with interrupts disabled. | 2273 | * with interrupts disabled. |
| 2200 | * | 2274 | * |
| @@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
| 2225 | */ | 2299 | */ |
| 2226 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2300 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
| 2227 | perf_cgroup_sched_in(prev, task); | 2301 | perf_cgroup_sched_in(prev, task); |
| 2302 | |||
| 2303 | /* check for system-wide branch_stack events */ | ||
| 2304 | if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) | ||
| 2305 | perf_branch_stack_sched_in(prev, task); | ||
| 2228 | } | 2306 | } |
| 2229 | 2307 | ||
| 2230 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2308 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
| @@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event) | |||
| 2791 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2869 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
| 2792 | static_key_slow_dec_deferred(&perf_sched_events); | 2870 | static_key_slow_dec_deferred(&perf_sched_events); |
| 2793 | } | 2871 | } |
| 2872 | |||
| 2873 | if (has_branch_stack(event)) { | ||
| 2874 | static_key_slow_dec_deferred(&perf_sched_events); | ||
| 2875 | /* is system-wide event */ | ||
| 2876 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
| 2877 | atomic_dec(&per_cpu(perf_branch_stack_events, | ||
| 2878 | event->cpu)); | ||
| 2879 | } | ||
| 2794 | } | 2880 | } |
| 2795 | 2881 | ||
| 2796 | if (event->rb) { | 2882 | if (event->rb) { |
| @@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
| 3907 | } | 3993 | } |
| 3908 | } | 3994 | } |
| 3909 | } | 3995 | } |
| 3996 | |||
| 3997 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 3998 | if (data->br_stack) { | ||
| 3999 | size_t size; | ||
| 4000 | |||
| 4001 | size = data->br_stack->nr | ||
| 4002 | * sizeof(struct perf_branch_entry); | ||
| 4003 | |||
| 4004 | perf_output_put(handle, data->br_stack->nr); | ||
| 4005 | perf_output_copy(handle, data->br_stack->entries, size); | ||
| 4006 | } else { | ||
| 4007 | /* | ||
| 4008 | * we always store at least the value of nr | ||
| 4009 | */ | ||
| 4010 | u64 nr = 0; | ||
| 4011 | perf_output_put(handle, nr); | ||
| 4012 | } | ||
| 4013 | } | ||
| 3910 | } | 4014 | } |
| 3911 | 4015 | ||
| 3912 | void perf_prepare_sample(struct perf_event_header *header, | 4016 | void perf_prepare_sample(struct perf_event_header *header, |
| @@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
| 3949 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | 4053 | WARN_ON_ONCE(size & (sizeof(u64)-1)); |
| 3950 | header->size += size; | 4054 | header->size += size; |
| 3951 | } | 4055 | } |
| 4056 | |||
| 4057 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 4058 | int size = sizeof(u64); /* nr */ | ||
| 4059 | if (data->br_stack) { | ||
| 4060 | size += data->br_stack->nr | ||
| 4061 | * sizeof(struct perf_branch_entry); | ||
| 4062 | } | ||
| 4063 | header->size += size; | ||
| 4064 | } | ||
| 3952 | } | 4065 | } |
| 3953 | 4066 | ||
| 3954 | static void perf_event_output(struct perf_event *event, | 4067 | static void perf_event_output(struct perf_event *event, |
| @@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) | |||
| 5010 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5123 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
| 5011 | return -ENOENT; | 5124 | return -ENOENT; |
| 5012 | 5125 | ||
| 5126 | /* | ||
| 5127 | * no branch sampling for software events | ||
| 5128 | */ | ||
| 5129 | if (has_branch_stack(event)) | ||
| 5130 | return -EOPNOTSUPP; | ||
| 5131 | |||
| 5013 | switch (event_id) { | 5132 | switch (event_id) { |
| 5014 | case PERF_COUNT_SW_CPU_CLOCK: | 5133 | case PERF_COUNT_SW_CPU_CLOCK: |
| 5015 | case PERF_COUNT_SW_TASK_CLOCK: | 5134 | case PERF_COUNT_SW_TASK_CLOCK: |
| @@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) | |||
| 5120 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 5239 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
| 5121 | return -ENOENT; | 5240 | return -ENOENT; |
| 5122 | 5241 | ||
| 5242 | /* | ||
| 5243 | * no branch sampling for tracepoint events | ||
| 5244 | */ | ||
| 5245 | if (has_branch_stack(event)) | ||
| 5246 | return -EOPNOTSUPP; | ||
| 5247 | |||
| 5123 | err = perf_trace_init(event); | 5248 | err = perf_trace_init(event); |
| 5124 | if (err) | 5249 | if (err) |
| 5125 | return err; | 5250 | return err; |
| @@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
| 5345 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5470 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
| 5346 | return -ENOENT; | 5471 | return -ENOENT; |
| 5347 | 5472 | ||
| 5473 | /* | ||
| 5474 | * no branch sampling for software events | ||
| 5475 | */ | ||
| 5476 | if (has_branch_stack(event)) | ||
| 5477 | return -EOPNOTSUPP; | ||
| 5478 | |||
| 5348 | perf_swevent_init_hrtimer(event); | 5479 | perf_swevent_init_hrtimer(event); |
| 5349 | 5480 | ||
| 5350 | return 0; | 5481 | return 0; |
| @@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) | |||
| 5419 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5550 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
| 5420 | return -ENOENT; | 5551 | return -ENOENT; |
| 5421 | 5552 | ||
| 5553 | /* | ||
| 5554 | * no branch sampling for software events | ||
| 5555 | */ | ||
| 5556 | if (has_branch_stack(event)) | ||
| 5557 | return -EOPNOTSUPP; | ||
| 5558 | |||
| 5422 | perf_swevent_init_hrtimer(event); | 5559 | perf_swevent_init_hrtimer(event); |
| 5423 | 5560 | ||
| 5424 | return 0; | 5561 | return 0; |
| @@ -5866,6 +6003,12 @@ done: | |||
| 5866 | return ERR_PTR(err); | 6003 | return ERR_PTR(err); |
| 5867 | } | 6004 | } |
| 5868 | } | 6005 | } |
| 6006 | if (has_branch_stack(event)) { | ||
| 6007 | static_key_slow_inc(&perf_sched_events.key); | ||
| 6008 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
| 6009 | atomic_inc(&per_cpu(perf_branch_stack_events, | ||
| 6010 | event->cpu)); | ||
| 6011 | } | ||
| 5869 | } | 6012 | } |
| 5870 | 6013 | ||
| 5871 | return event; | 6014 | return event; |
| @@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
| 5935 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | 6078 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) |
| 5936 | return -EINVAL; | 6079 | return -EINVAL; |
| 5937 | 6080 | ||
| 6081 | if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 6082 | u64 mask = attr->branch_sample_type; | ||
| 6083 | |||
| 6084 | /* only using defined bits */ | ||
| 6085 | if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) | ||
| 6086 | return -EINVAL; | ||
| 6087 | |||
| 6088 | /* at least one branch bit must be set */ | ||
| 6089 | if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) | ||
| 6090 | return -EINVAL; | ||
| 6091 | |||
| 6092 | /* kernel level capture: check permissions */ | ||
| 6093 | if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) | ||
| 6094 | && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
| 6095 | return -EACCES; | ||
| 6096 | |||
| 6097 | /* propagate priv level, when not set for branch */ | ||
| 6098 | if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { | ||
| 6099 | |||
| 6100 | /* exclude_kernel checked on syscall entry */ | ||
| 6101 | if (!attr->exclude_kernel) | ||
| 6102 | mask |= PERF_SAMPLE_BRANCH_KERNEL; | ||
| 6103 | |||
| 6104 | if (!attr->exclude_user) | ||
| 6105 | mask |= PERF_SAMPLE_BRANCH_USER; | ||
| 6106 | |||
| 6107 | if (!attr->exclude_hv) | ||
| 6108 | mask |= PERF_SAMPLE_BRANCH_HV; | ||
| 6109 | /* | ||
| 6110 | * adjust user setting (for HW filter setup) | ||
| 6111 | */ | ||
| 6112 | attr->branch_sample_type = mask; | ||
| 6113 | } | ||
| 6114 | } | ||
| 5938 | out: | 6115 | out: |
| 5939 | return ret; | 6116 | return ret; |
| 5940 | 6117 | ||
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3330022a7ac1..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
| @@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) | |||
| 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
| 582 | return -ENOENT; | 582 | return -ENOENT; |
| 583 | 583 | ||
| 584 | /* | ||
| 585 | * no branch sampling for breakpoint events | ||
| 586 | */ | ||
| 587 | if (has_branch_stack(bp)) | ||
| 588 | return -EOPNOTSUPP; | ||
| 589 | |||
| 584 | err = register_perf_hw_breakpoint(bp); | 590 | err = register_perf_hw_breakpoint(bp); |
| 585 | if (err) | 591 | if (err) |
| 586 | return err; | 592 | return err; |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a5766b4b0125..a1386b2fff00 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
| @@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha | |||
| 152 | corresponding events, i.e., they always refer to events defined earlier on the command | 152 | corresponding events, i.e., they always refer to events defined earlier on the command |
| 153 | line. | 153 | line. |
| 154 | 154 | ||
| 155 | -b:: | ||
| 156 | --branch-any:: | ||
| 157 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
| 158 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
| 159 | |||
| 160 | -j:: | ||
| 161 | --branch-filter:: | ||
| 162 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
| 163 | taken branches. The number of branches captured with each sample depends on the | ||
| 164 | underlying hardware, the type of branches of interest, and the executed code. | ||
| 165 | It is possible to select the types of branches captured by enabling filters. The | ||
| 166 | following filters are defined: | ||
| 167 | |||
| 168 | - any: any type of branches | ||
| 169 | - any_call: any function call or system call | ||
| 170 | - any_ret: any function return or system call return | ||
| 171 | - any_ind: any indirect branch | ||
| 172 | - u: only when the branch target is at the user level | ||
| 173 | - k: only when the branch target is in the kernel | ||
| 174 | - hv: only when the target is at the hypervisor level | ||
| 175 | |||
| 176 | + | ||
| 177 | The option requires at least one branch type among any, any_call, any_ret, ind_call. | ||
| 178 | The privilege levels may be ommitted, in which case, the privilege levels of the associated | ||
| 179 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
| 180 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
| 181 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
| 182 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
| 183 | Note that this feature may not be available on all processors. | ||
| 184 | |||
| 155 | SEE ALSO | 185 | SEE ALSO |
| 156 | -------- | 186 | -------- |
| 157 | linkperf:perf-stat[1], linkperf:perf-list[1] | 187 | linkperf:perf-stat[1], linkperf:perf-list[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712e..87feeee8b90c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
| @@ -153,6 +153,16 @@ OPTIONS | |||
| 153 | information which may be very large and thus may clutter the display. | 153 | information which may be very large and thus may clutter the display. |
| 154 | It currently includes: cpu and numa topology of the host system. | 154 | It currently includes: cpu and numa topology of the host system. |
| 155 | 155 | ||
| 156 | -b:: | ||
| 157 | --branch-stack:: | ||
| 158 | Use the addresses of sampled taken branches instead of the instruction | ||
| 159 | address to build the histograms. To generate meaningful output, the | ||
| 160 | perf.data file must have been obtained using perf record -b or | ||
| 161 | perf record --branch-filter xxx where xxx is a branch filter option. | ||
| 162 | perf report is able to auto-detect whether a perf.data file contains | ||
| 163 | branch stacks and it will automatically switch to the branch view mode, | ||
| 164 | unless --no-branch-stack is used. | ||
| 165 | |||
| 156 | SEE ALSO | 166 | SEE ALSO |
| 157 | -------- | 167 | -------- |
| 158 | linkperf:perf-stat[1], linkperf:perf-annotate[1] | 168 | linkperf:perf-stat[1], linkperf:perf-annotate[1] |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef202..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
| @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
| 473 | if (!have_tracepoints(&evsel_list->entries)) | 473 | if (!have_tracepoints(&evsel_list->entries)) |
| 474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); | 474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); |
| 475 | 475 | ||
| 476 | if (!rec->opts.branch_stack) | ||
| 477 | perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); | ||
| 478 | |||
| 476 | if (!rec->file_new) { | 479 | if (!rec->file_new) { |
| 477 | err = perf_session__read_header(session, output); | 480 | err = perf_session__read_header(session, output); |
| 478 | if (err < 0) | 481 | if (err < 0) |
| @@ -638,6 +641,90 @@ out_delete_session: | |||
| 638 | return err; | 641 | return err; |
| 639 | } | 642 | } |
| 640 | 643 | ||
| 644 | #define BRANCH_OPT(n, m) \ | ||
| 645 | { .name = n, .mode = (m) } | ||
| 646 | |||
| 647 | #define BRANCH_END { .name = NULL } | ||
| 648 | |||
| 649 | struct branch_mode { | ||
| 650 | const char *name; | ||
| 651 | int mode; | ||
| 652 | }; | ||
| 653 | |||
| 654 | static const struct branch_mode branch_modes[] = { | ||
| 655 | BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), | ||
| 656 | BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), | ||
| 657 | BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), | ||
| 658 | BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), | ||
| 659 | BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), | ||
| 660 | BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), | ||
| 661 | BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), | ||
| 662 | BRANCH_END | ||
| 663 | }; | ||
| 664 | |||
| 665 | static int | ||
| 666 | parse_branch_stack(const struct option *opt, const char *str, int unset) | ||
| 667 | { | ||
| 668 | #define ONLY_PLM \ | ||
| 669 | (PERF_SAMPLE_BRANCH_USER |\ | ||
| 670 | PERF_SAMPLE_BRANCH_KERNEL |\ | ||
| 671 | PERF_SAMPLE_BRANCH_HV) | ||
| 672 | |||
| 673 | uint64_t *mode = (uint64_t *)opt->value; | ||
| 674 | const struct branch_mode *br; | ||
| 675 | char *s, *os = NULL, *p; | ||
| 676 | int ret = -1; | ||
| 677 | |||
| 678 | if (unset) | ||
| 679 | return 0; | ||
| 680 | |||
| 681 | /* | ||
| 682 | * cannot set it twice, -b + --branch-filter for instance | ||
| 683 | */ | ||
| 684 | if (*mode) | ||
| 685 | return -1; | ||
| 686 | |||
| 687 | /* str may be NULL in case no arg is passed to -b */ | ||
| 688 | if (str) { | ||
| 689 | /* because str is read-only */ | ||
| 690 | s = os = strdup(str); | ||
| 691 | if (!s) | ||
| 692 | return -1; | ||
| 693 | |||
| 694 | for (;;) { | ||
| 695 | p = strchr(s, ','); | ||
| 696 | if (p) | ||
| 697 | *p = '\0'; | ||
| 698 | |||
| 699 | for (br = branch_modes; br->name; br++) { | ||
| 700 | if (!strcasecmp(s, br->name)) | ||
| 701 | break; | ||
| 702 | } | ||
| 703 | if (!br->name) { | ||
| 704 | ui__warning("unknown branch filter %s," | ||
| 705 | " check man page\n", s); | ||
| 706 | goto error; | ||
| 707 | } | ||
| 708 | |||
| 709 | *mode |= br->mode; | ||
| 710 | |||
| 711 | if (!p) | ||
| 712 | break; | ||
| 713 | |||
| 714 | s = p + 1; | ||
| 715 | } | ||
| 716 | } | ||
| 717 | ret = 0; | ||
| 718 | |||
| 719 | /* default to any branch */ | ||
| 720 | if ((*mode & ~ONLY_PLM) == 0) { | ||
| 721 | *mode = PERF_SAMPLE_BRANCH_ANY; | ||
| 722 | } | ||
| 723 | error: | ||
| 724 | free(os); | ||
| 725 | return ret; | ||
| 726 | } | ||
| 727 | |||
| 641 | static const char * const record_usage[] = { | 728 | static const char * const record_usage[] = { |
| 642 | "perf record [<options>] [<command>]", | 729 | "perf record [<options>] [<command>]", |
| 643 | "perf record [<options>] -- <command> [<options>]", | 730 | "perf record [<options>] -- <command> [<options>]", |
| @@ -727,6 +814,14 @@ const struct option record_options[] = { | |||
| 727 | "monitor event in cgroup name only", | 814 | "monitor event in cgroup name only", |
| 728 | parse_cgroups), | 815 | parse_cgroups), |
| 729 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), | 816 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), |
| 817 | |||
| 818 | OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, | ||
| 819 | "branch any", "sample any taken branches", | ||
| 820 | parse_branch_stack), | ||
| 821 | |||
| 822 | OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, | ||
| 823 | "branch filter mask", "branch stack filter modes", | ||
| 824 | parse_branch_stack), | ||
| 730 | OPT_END() | 825 | OPT_END() |
| 731 | }; | 826 | }; |
| 732 | 827 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -53,6 +53,82 @@ struct perf_report { | |||
| 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | ||
| 57 | struct addr_location *al, | ||
| 58 | struct perf_sample *sample, | ||
| 59 | struct perf_evsel *evsel, | ||
| 60 | struct machine *machine) | ||
| 61 | { | ||
| 62 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
| 63 | struct symbol *parent = NULL; | ||
| 64 | int err = 0; | ||
| 65 | unsigned i; | ||
| 66 | struct hist_entry *he; | ||
| 67 | struct branch_info *bi, *bx; | ||
| 68 | |||
| 69 | if ((sort__has_parent || symbol_conf.use_callchain) | ||
| 70 | && sample->callchain) { | ||
| 71 | err = machine__resolve_callchain(machine, evsel, al->thread, | ||
| 72 | sample->callchain, &parent); | ||
| 73 | if (err) | ||
| 74 | return err; | ||
| 75 | } | ||
| 76 | |||
| 77 | bi = machine__resolve_bstack(machine, al->thread, | ||
| 78 | sample->branch_stack); | ||
| 79 | if (!bi) | ||
| 80 | return -ENOMEM; | ||
| 81 | |||
| 82 | for (i = 0; i < sample->branch_stack->nr; i++) { | ||
| 83 | if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) | ||
| 84 | continue; | ||
| 85 | /* | ||
| 86 | * The report shows the percentage of total branches captured | ||
| 87 | * and not events sampled. Thus we use a pseudo period of 1. | ||
| 88 | */ | ||
| 89 | he = __hists__add_branch_entry(&evsel->hists, al, parent, | ||
| 90 | &bi[i], 1); | ||
| 91 | if (he) { | ||
| 92 | struct annotation *notes; | ||
| 93 | err = -ENOMEM; | ||
| 94 | bx = he->branch_info; | ||
| 95 | if (bx->from.sym && use_browser > 0) { | ||
| 96 | notes = symbol__annotation(bx->from.sym); | ||
| 97 | if (!notes->src | ||
| 98 | && symbol__alloc_hist(bx->from.sym) < 0) | ||
| 99 | goto out; | ||
| 100 | |||
| 101 | err = symbol__inc_addr_samples(bx->from.sym, | ||
| 102 | bx->from.map, | ||
| 103 | evsel->idx, | ||
| 104 | bx->from.al_addr); | ||
| 105 | if (err) | ||
| 106 | goto out; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (bx->to.sym && use_browser > 0) { | ||
| 110 | notes = symbol__annotation(bx->to.sym); | ||
| 111 | if (!notes->src | ||
| 112 | && symbol__alloc_hist(bx->to.sym) < 0) | ||
| 113 | goto out; | ||
| 114 | |||
| 115 | err = symbol__inc_addr_samples(bx->to.sym, | ||
| 116 | bx->to.map, | ||
| 117 | evsel->idx, | ||
| 118 | bx->to.al_addr); | ||
| 119 | if (err) | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | evsel->hists.stats.total_period += 1; | ||
| 123 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | ||
| 124 | err = 0; | ||
| 125 | } else | ||
| 126 | return -ENOMEM; | ||
| 127 | } | ||
| 128 | out: | ||
| 129 | return err; | ||
| 130 | } | ||
| 131 | |||
| 56 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 132 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, |
| 57 | struct addr_location *al, | 133 | struct addr_location *al, |
| 58 | struct perf_sample *sample, | 134 | struct perf_sample *sample, |
| @@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, | |||
| 126 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) | 202 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) |
| 127 | return 0; | 203 | return 0; |
| 128 | 204 | ||
| 129 | if (al.map != NULL) | 205 | if (sort__branch_mode == 1) { |
| 130 | al.map->dso->hit = 1; | 206 | if (perf_report__add_branch_hist_entry(tool, &al, sample, |
| 207 | evsel, machine)) { | ||
| 208 | pr_debug("problem adding lbr entry, skipping event\n"); | ||
| 209 | return -1; | ||
| 210 | } | ||
| 211 | } else { | ||
| 212 | if (al.map != NULL) | ||
| 213 | al.map->dso->hit = 1; | ||
| 131 | 214 | ||
| 132 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { | 215 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { |
| 133 | pr_debug("problem incrementing symbol period, skipping event\n"); | 216 | pr_debug("problem incrementing symbol period, skipping event\n"); |
| 134 | return -1; | 217 | return -1; |
| 218 | } | ||
| 135 | } | 219 | } |
| 136 | |||
| 137 | return 0; | 220 | return 0; |
| 138 | } | 221 | } |
| 139 | 222 | ||
| @@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) | |||
| 188 | } | 271 | } |
| 189 | } | 272 | } |
| 190 | 273 | ||
| 274 | if (sort__branch_mode == 1) { | ||
| 275 | if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { | ||
| 276 | fprintf(stderr, "selected -b but no branch data." | ||
| 277 | " Did you call perf record without" | ||
| 278 | " -b?\n"); | ||
| 279 | return -1; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 191 | return 0; | 283 | return 0; |
| 192 | } | 284 | } |
| 193 | 285 | ||
| @@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) | |||
| 246 | { | 338 | { |
| 247 | int ret = -EINVAL; | 339 | int ret = -EINVAL; |
| 248 | u64 nr_samples; | 340 | u64 nr_samples; |
| 249 | struct perf_session *session; | 341 | struct perf_session *session = rep->session; |
| 250 | struct perf_evsel *pos; | 342 | struct perf_evsel *pos; |
| 251 | struct map *kernel_map; | 343 | struct map *kernel_map; |
| 252 | struct kmap *kernel_kmap; | 344 | struct kmap *kernel_kmap; |
| @@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) | |||
| 254 | 346 | ||
| 255 | signal(SIGINT, sig_handler); | 347 | signal(SIGINT, sig_handler); |
| 256 | 348 | ||
| 257 | session = perf_session__new(rep->input_name, O_RDONLY, | ||
| 258 | rep->force, false, &rep->tool); | ||
| 259 | if (session == NULL) | ||
| 260 | return -ENOMEM; | ||
| 261 | |||
| 262 | rep->session = session; | ||
| 263 | |||
| 264 | if (rep->cpu_list) { | 349 | if (rep->cpu_list) { |
| 265 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, | 350 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, |
| 266 | rep->cpu_bitmap); | 351 | rep->cpu_bitmap); |
| @@ -427,9 +512,19 @@ setup: | |||
| 427 | return 0; | 512 | return 0; |
| 428 | } | 513 | } |
| 429 | 514 | ||
| 515 | static int | ||
| 516 | parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) | ||
| 517 | { | ||
| 518 | sort__branch_mode = !unset; | ||
| 519 | return 0; | ||
| 520 | } | ||
| 521 | |||
| 430 | int cmd_report(int argc, const char **argv, const char *prefix __used) | 522 | int cmd_report(int argc, const char **argv, const char *prefix __used) |
| 431 | { | 523 | { |
| 524 | struct perf_session *session; | ||
| 432 | struct stat st; | 525 | struct stat st; |
| 526 | bool has_br_stack = false; | ||
| 527 | int ret = -1; | ||
| 433 | char callchain_default_opt[] = "fractal,0.5,callee"; | 528 | char callchain_default_opt[] = "fractal,0.5,callee"; |
| 434 | const char * const report_usage[] = { | 529 | const char * const report_usage[] = { |
| 435 | "perf report [<options>]", | 530 | "perf report [<options>]", |
| @@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 477 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, | 572 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, |
| 478 | "Use the stdio interface"), | 573 | "Use the stdio interface"), |
| 479 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 574 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
| 480 | "sort by key(s): pid, comm, dso, symbol, parent"), | 575 | "sort by key(s): pid, comm, dso, symbol, parent, dso_to," |
| 576 | " dso_from, symbol_to, symbol_from, mispredict"), | ||
| 481 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 577 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
| 482 | "Show sample percentage for different cpu modes"), | 578 | "Show sample percentage for different cpu modes"), |
| 483 | OPT_STRING('p', "parent", &parent_pattern, "regex", | 579 | OPT_STRING('p', "parent", &parent_pattern, "regex", |
| @@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 517 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 613 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
| 518 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, | 614 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, |
| 519 | "Show a column with the sum of periods"), | 615 | "Show a column with the sum of periods"), |
| 616 | OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", | ||
| 617 | "use branch records for histogram filling", parse_branch_mode), | ||
| 520 | OPT_END() | 618 | OPT_END() |
| 521 | }; | 619 | }; |
| 522 | 620 | ||
| @@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 536 | else | 634 | else |
| 537 | report.input_name = "perf.data"; | 635 | report.input_name = "perf.data"; |
| 538 | } | 636 | } |
| 637 | session = perf_session__new(report.input_name, O_RDONLY, | ||
| 638 | report.force, false, &report.tool); | ||
| 639 | if (session == NULL) | ||
| 640 | return -ENOMEM; | ||
| 539 | 641 | ||
| 540 | if (strcmp(report.input_name, "-") != 0) | 642 | report.session = session; |
| 643 | |||
| 644 | has_br_stack = perf_header__has_feat(&session->header, | ||
| 645 | HEADER_BRANCH_STACK); | ||
| 646 | |||
| 647 | if (sort__branch_mode == -1 && has_br_stack) | ||
| 648 | sort__branch_mode = 1; | ||
| 649 | |||
| 650 | /* sort__branch_mode could be 0 if --no-branch-stack */ | ||
| 651 | if (sort__branch_mode == 1) { | ||
| 652 | /* | ||
| 653 | * if no sort_order is provided, then specify | ||
| 654 | * branch-mode specific order | ||
| 655 | */ | ||
| 656 | if (sort_order == default_sort_order) | ||
| 657 | sort_order = "comm,dso_from,symbol_from," | ||
| 658 | "dso_to,symbol_to"; | ||
| 659 | |||
| 660 | } | ||
| 661 | |||
| 662 | if (strcmp(report.input_name, "-") != 0) { | ||
| 541 | setup_browser(true); | 663 | setup_browser(true); |
| 542 | else | 664 | } else { |
| 543 | use_browser = 0; | 665 | use_browser = 0; |
| 666 | } | ||
| 544 | 667 | ||
| 545 | /* | 668 | /* |
| 546 | * Only in the newt browser we are doing integrated annotation, | 669 | * Only in the newt browser we are doing integrated annotation, |
| @@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 568 | } | 691 | } |
| 569 | 692 | ||
| 570 | if (symbol__init() < 0) | 693 | if (symbol__init() < 0) |
| 571 | return -1; | 694 | goto error; |
| 572 | 695 | ||
| 573 | setup_sorting(report_usage, options); | 696 | setup_sorting(report_usage, options); |
| 574 | 697 | ||
| 575 | if (parent_pattern != default_parent_pattern) { | 698 | if (parent_pattern != default_parent_pattern) { |
| 576 | if (sort_dimension__add("parent") < 0) | 699 | if (sort_dimension__add("parent") < 0) |
| 577 | return -1; | 700 | goto error; |
| 578 | 701 | ||
| 579 | /* | 702 | /* |
| 580 | * Only show the parent fields if we explicitly | 703 | * Only show the parent fields if we explicitly |
| @@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
| 592 | if (argc) | 715 | if (argc) |
| 593 | usage_with_options(report_usage, options); | 716 | usage_with_options(report_usage, options); |
| 594 | 717 | ||
| 595 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
| 596 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); | 718 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); |
| 597 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
| 598 | 719 | ||
| 599 | return __cmd_report(&report); | 720 | if (sort__branch_mode == 1) { |
| 721 | sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); | ||
| 722 | sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); | ||
| 723 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); | ||
| 724 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); | ||
| 725 | } else { | ||
| 726 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
| 727 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
| 728 | } | ||
| 729 | |||
| 730 | ret = __cmd_report(&report); | ||
| 731 | error: | ||
| 732 | perf_session__delete(session); | ||
| 733 | return ret; | ||
| 600 | } | 734 | } |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f0227e93665d..eec392e48067 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
| @@ -179,6 +179,23 @@ struct ip_callchain { | |||
| 179 | u64 ips[0]; | 179 | u64 ips[0]; |
| 180 | }; | 180 | }; |
| 181 | 181 | ||
| 182 | struct branch_flags { | ||
| 183 | u64 mispred:1; | ||
| 184 | u64 predicted:1; | ||
| 185 | u64 reserved:62; | ||
| 186 | }; | ||
| 187 | |||
| 188 | struct branch_entry { | ||
| 189 | u64 from; | ||
| 190 | u64 to; | ||
| 191 | struct branch_flags flags; | ||
| 192 | }; | ||
| 193 | |||
| 194 | struct branch_stack { | ||
| 195 | u64 nr; | ||
| 196 | struct branch_entry entries[0]; | ||
| 197 | }; | ||
| 198 | |||
| 182 | extern bool perf_host, perf_guest; | 199 | extern bool perf_host, perf_guest; |
| 183 | extern const char perf_version_string[]; | 200 | extern const char perf_version_string[]; |
| 184 | 201 | ||
| @@ -205,6 +222,7 @@ struct perf_record_opts { | |||
| 205 | unsigned int freq; | 222 | unsigned int freq; |
| 206 | unsigned int mmap_pages; | 223 | unsigned int mmap_pages; |
| 207 | unsigned int user_freq; | 224 | unsigned int user_freq; |
| 225 | int branch_stack; | ||
| 208 | u64 default_interval; | 226 | u64 default_interval; |
| 209 | u64 user_interval; | 227 | u64 user_interval; |
| 210 | const char *cpu_list; | 228 | const char *cpu_list; |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
| @@ -81,6 +81,7 @@ struct perf_sample { | |||
| 81 | u32 raw_size; | 81 | u32 raw_size; |
| 82 | void *raw_data; | 82 | void *raw_data; |
| 83 | struct ip_callchain *callchain; | 83 | struct ip_callchain *callchain; |
| 84 | struct branch_stack *branch_stack; | ||
| 84 | }; | 85 | }; |
| 85 | 86 | ||
| 86 | #define BUILD_ID_SIZE 20 | 87 | #define BUILD_ID_SIZE 20 |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 302d49a9f985..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
| @@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
| 126 | attr->watermark = 0; | 126 | attr->watermark = 0; |
| 127 | attr->wakeup_events = 1; | 127 | attr->wakeup_events = 1; |
| 128 | } | 128 | } |
| 129 | if (opts->branch_stack) { | ||
| 130 | attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
| 131 | attr->branch_sample_type = opts->branch_stack; | ||
| 132 | } | ||
| 129 | 133 | ||
| 130 | attr->mmap = track; | 134 | attr->mmap = track; |
| 131 | attr->comm = track; | 135 | attr->comm = track; |
| @@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
| 576 | data->raw_data = (void *) pdata; | 580 | data->raw_data = (void *) pdata; |
| 577 | } | 581 | } |
| 578 | 582 | ||
| 583 | if (type & PERF_SAMPLE_BRANCH_STACK) { | ||
| 584 | u64 sz; | ||
| 585 | |||
| 586 | data->branch_stack = (struct branch_stack *)array; | ||
| 587 | array++; /* nr */ | ||
| 588 | |||
| 589 | sz = data->branch_stack->nr * sizeof(struct branch_entry); | ||
| 590 | sz /= sizeof(u64); | ||
| 591 | array += sz; | ||
| 592 | } | ||
| 579 | return 0; | 593 | return 0; |
| 580 | } | 594 | } |
| 581 | 595 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9f867d96c6a5..0d9b6da86a39 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
| @@ -1023,6 +1023,12 @@ write_it: | |||
| 1023 | return do_write_string(fd, buffer); | 1023 | return do_write_string(fd, buffer); |
| 1024 | } | 1024 | } |
| 1025 | 1025 | ||
| 1026 | static int write_branch_stack(int fd __used, struct perf_header *h __used, | ||
| 1027 | struct perf_evlist *evlist __used) | ||
| 1028 | { | ||
| 1029 | return 0; | ||
| 1030 | } | ||
| 1031 | |||
| 1026 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) | 1032 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) |
| 1027 | { | 1033 | { |
| 1028 | char *str = do_read_string(fd, ph); | 1034 | char *str = do_read_string(fd, ph); |
| @@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
| 1144 | uint64_t id; | 1150 | uint64_t id; |
| 1145 | void *buf = NULL; | 1151 | void *buf = NULL; |
| 1146 | char *str; | 1152 | char *str; |
| 1147 | u32 nre, sz, nr, i, j, msz; | 1153 | u32 nre, sz, nr, i, j; |
| 1148 | int ret; | 1154 | ssize_t ret; |
| 1155 | size_t msz; | ||
| 1149 | 1156 | ||
| 1150 | /* number of events */ | 1157 | /* number of events */ |
| 1151 | ret = read(fd, &nre, sizeof(nre)); | 1158 | ret = read(fd, &nre, sizeof(nre)); |
| @@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
| 1162 | if (ph->needs_swap) | 1169 | if (ph->needs_swap) |
| 1163 | sz = bswap_32(sz); | 1170 | sz = bswap_32(sz); |
| 1164 | 1171 | ||
| 1165 | /* | ||
| 1166 | * ensure it is at least to our ABI rev | ||
| 1167 | */ | ||
| 1168 | if (sz < (u32)sizeof(attr)) | ||
| 1169 | goto error; | ||
| 1170 | |||
| 1171 | memset(&attr, 0, sizeof(attr)); | 1172 | memset(&attr, 0, sizeof(attr)); |
| 1172 | 1173 | ||
| 1173 | /* read entire region to sync up to next field */ | 1174 | /* buffer to hold on file attr struct */ |
| 1174 | buf = malloc(sz); | 1175 | buf = malloc(sz); |
| 1175 | if (!buf) | 1176 | if (!buf) |
| 1176 | goto error; | 1177 | goto error; |
| 1177 | 1178 | ||
| 1178 | msz = sizeof(attr); | 1179 | msz = sizeof(attr); |
| 1179 | if (sz < msz) | 1180 | if (sz < (ssize_t)msz) |
| 1180 | msz = sz; | 1181 | msz = sz; |
| 1181 | 1182 | ||
| 1182 | for (i = 0 ; i < nre; i++) { | 1183 | for (i = 0 ; i < nre; i++) { |
| 1183 | 1184 | ||
| 1185 | /* | ||
| 1186 | * must read entire on-file attr struct to | ||
| 1187 | * sync up with layout. | ||
| 1188 | */ | ||
| 1184 | ret = read(fd, buf, sz); | 1189 | ret = read(fd, buf, sz); |
| 1185 | if (ret != (ssize_t)sz) | 1190 | if (ret != (ssize_t)sz) |
| 1186 | goto error; | 1191 | goto error; |
| @@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) | |||
| 1316 | free(str); | 1321 | free(str); |
| 1317 | } | 1322 | } |
| 1318 | 1323 | ||
| 1324 | static void print_branch_stack(struct perf_header *ph __used, int fd __used, | ||
| 1325 | FILE *fp) | ||
| 1326 | { | ||
| 1327 | fprintf(fp, "# contains samples with branch stack\n"); | ||
| 1328 | } | ||
| 1329 | |||
| 1319 | static int __event_process_build_id(struct build_id_event *bev, | 1330 | static int __event_process_build_id(struct build_id_event *bev, |
| 1320 | char *filename, | 1331 | char *filename, |
| 1321 | struct perf_session *session) | 1332 | struct perf_session *session) |
| @@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | |||
| 1520 | FEAT_OPA(HEADER_CMDLINE, cmdline), | 1531 | FEAT_OPA(HEADER_CMDLINE, cmdline), |
| 1521 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), | 1532 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), |
| 1522 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), | 1533 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), |
| 1534 | FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), | ||
| 1523 | }; | 1535 | }; |
| 1524 | 1536 | ||
| 1525 | struct header_print_data { | 1537 | struct header_print_data { |
| @@ -1804,35 +1816,101 @@ out_free: | |||
| 1804 | return err; | 1816 | return err; |
| 1805 | } | 1817 | } |
| 1806 | 1818 | ||
| 1807 | static int check_magic_endian(u64 *magic, struct perf_file_header *header, | 1819 | static const int attr_file_abi_sizes[] = { |
| 1808 | struct perf_header *ph) | 1820 | [0] = PERF_ATTR_SIZE_VER0, |
| 1821 | [1] = PERF_ATTR_SIZE_VER1, | ||
| 1822 | 0, | ||
| 1823 | }; | ||
| 1824 | |||
| 1825 | /* | ||
| 1826 | * In the legacy file format, the magic number is not used to encode endianness. | ||
| 1827 | * hdr_sz was used to encode endianness. But given that hdr_sz can vary based | ||
| 1828 | * on ABI revisions, we need to try all combinations for all endianness to | ||
| 1829 | * detect the endianness. | ||
| 1830 | */ | ||
| 1831 | static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
| 1809 | { | 1832 | { |
| 1810 | int ret; | 1833 | uint64_t ref_size, attr_size; |
| 1834 | int i; | ||
| 1811 | 1835 | ||
| 1812 | /* check for legacy format */ | 1836 | for (i = 0 ; attr_file_abi_sizes[i]; i++) { |
| 1813 | ret = memcmp(magic, __perf_magic1, sizeof(*magic)); | 1837 | ref_size = attr_file_abi_sizes[i] |
| 1814 | if (ret == 0) { | 1838 | + sizeof(struct perf_file_section); |
| 1815 | pr_debug("legacy perf.data format\n"); | 1839 | if (hdr_sz != ref_size) { |
| 1816 | if (!header) | 1840 | attr_size = bswap_64(hdr_sz); |
| 1817 | return -1; | 1841 | if (attr_size != ref_size) |
| 1842 | continue; | ||
| 1818 | 1843 | ||
| 1819 | if (header->attr_size != sizeof(struct perf_file_attr)) { | 1844 | ph->needs_swap = true; |
| 1820 | u64 attr_size = bswap_64(header->attr_size); | 1845 | } |
| 1846 | pr_debug("ABI%d perf.data file detected, need_swap=%d\n", | ||
| 1847 | i, | ||
| 1848 | ph->needs_swap); | ||
| 1849 | return 0; | ||
| 1850 | } | ||
| 1851 | /* could not determine endianness */ | ||
| 1852 | return -1; | ||
| 1853 | } | ||
| 1821 | 1854 | ||
| 1822 | if (attr_size != sizeof(struct perf_file_attr)) | 1855 | #define PERF_PIPE_HDR_VER0 16 |
| 1823 | return -1; | 1856 | |
| 1857 | static const size_t attr_pipe_abi_sizes[] = { | ||
| 1858 | [0] = PERF_PIPE_HDR_VER0, | ||
| 1859 | 0, | ||
| 1860 | }; | ||
| 1861 | |||
| 1862 | /* | ||
| 1863 | * In the legacy pipe format, there is an implicit assumption that endiannesss | ||
| 1864 | * between host recording the samples, and host parsing the samples is the | ||
| 1865 | * same. This is not always the case given that the pipe output may always be | ||
| 1866 | * redirected into a file and analyzed on a different machine with possibly a | ||
| 1867 | * different endianness and perf_event ABI revsions in the perf tool itself. | ||
| 1868 | */ | ||
| 1869 | static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
| 1870 | { | ||
| 1871 | u64 attr_size; | ||
| 1872 | int i; | ||
| 1873 | |||
| 1874 | for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { | ||
| 1875 | if (hdr_sz != attr_pipe_abi_sizes[i]) { | ||
| 1876 | attr_size = bswap_64(hdr_sz); | ||
| 1877 | if (attr_size != hdr_sz) | ||
| 1878 | continue; | ||
| 1824 | 1879 | ||
| 1825 | ph->needs_swap = true; | 1880 | ph->needs_swap = true; |
| 1826 | } | 1881 | } |
| 1882 | pr_debug("Pipe ABI%d perf.data file detected\n", i); | ||
| 1827 | return 0; | 1883 | return 0; |
| 1828 | } | 1884 | } |
| 1885 | return -1; | ||
| 1886 | } | ||
| 1887 | |||
| 1888 | static int check_magic_endian(u64 magic, uint64_t hdr_sz, | ||
| 1889 | bool is_pipe, struct perf_header *ph) | ||
| 1890 | { | ||
| 1891 | int ret; | ||
| 1892 | |||
| 1893 | /* check for legacy format */ | ||
| 1894 | ret = memcmp(&magic, __perf_magic1, sizeof(magic)); | ||
| 1895 | if (ret == 0) { | ||
| 1896 | pr_debug("legacy perf.data format\n"); | ||
| 1897 | if (is_pipe) | ||
| 1898 | return try_all_pipe_abis(hdr_sz, ph); | ||
| 1899 | |||
| 1900 | return try_all_file_abis(hdr_sz, ph); | ||
| 1901 | } | ||
| 1902 | /* | ||
| 1903 | * the new magic number serves two purposes: | ||
| 1904 | * - unique number to identify actual perf.data files | ||
| 1905 | * - encode endianness of file | ||
| 1906 | */ | ||
| 1829 | 1907 | ||
| 1830 | /* check magic number with same endianness */ | 1908 | /* check magic number with one endianness */ |
| 1831 | if (*magic == __perf_magic2) | 1909 | if (magic == __perf_magic2) |
| 1832 | return 0; | 1910 | return 0; |
| 1833 | 1911 | ||
| 1834 | /* check magic number but opposite endianness */ | 1912 | /* check magic number with opposite endianness */ |
| 1835 | if (*magic != __perf_magic2_sw) | 1913 | if (magic != __perf_magic2_sw) |
| 1836 | return -1; | 1914 | return -1; |
| 1837 | 1915 | ||
| 1838 | ph->needs_swap = true; | 1916 | ph->needs_swap = true; |
| @@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header, | |||
| 1851 | if (ret <= 0) | 1929 | if (ret <= 0) |
| 1852 | return -1; | 1930 | return -1; |
| 1853 | 1931 | ||
| 1854 | if (check_magic_endian(&header->magic, header, ph) < 0) | 1932 | if (check_magic_endian(header->magic, |
| 1933 | header->attr_size, false, ph) < 0) { | ||
| 1934 | pr_debug("magic/endian check failed\n"); | ||
| 1855 | return -1; | 1935 | return -1; |
| 1936 | } | ||
| 1856 | 1937 | ||
| 1857 | if (ph->needs_swap) { | 1938 | if (ph->needs_swap) { |
| 1858 | mem_bswap_64(header, offsetof(struct perf_file_header, | 1939 | mem_bswap_64(header, offsetof(struct perf_file_header, |
| @@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, | |||
| 1939 | if (ret <= 0) | 2020 | if (ret <= 0) |
| 1940 | return -1; | 2021 | return -1; |
| 1941 | 2022 | ||
| 1942 | if (check_magic_endian(&header->magic, NULL, ph) < 0) | 2023 | if (check_magic_endian(header->magic, header->size, true, ph) < 0) { |
| 2024 | pr_debug("endian/magic failed\n"); | ||
| 1943 | return -1; | 2025 | return -1; |
| 2026 | } | ||
| 2027 | |||
| 2028 | if (ph->needs_swap) | ||
| 2029 | header->size = bswap_64(header->size); | ||
| 1944 | 2030 | ||
| 1945 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) | 2031 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) |
| 1946 | return -1; | 2032 | return -1; |
| 1947 | 2033 | ||
| 1948 | if (header->size != sizeof(*header)) { | ||
| 1949 | u64 size = bswap_64(header->size); | ||
| 1950 | |||
| 1951 | if (size != sizeof(*header)) | ||
| 1952 | return -1; | ||
| 1953 | |||
| 1954 | ph->needs_swap = true; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | return 0; | 2034 | return 0; |
| 1958 | } | 2035 | } |
| 1959 | 2036 | ||
| @@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) | |||
| 1973 | return 0; | 2050 | return 0; |
| 1974 | } | 2051 | } |
| 1975 | 2052 | ||
| 2053 | static int read_attr(int fd, struct perf_header *ph, | ||
| 2054 | struct perf_file_attr *f_attr) | ||
| 2055 | { | ||
| 2056 | struct perf_event_attr *attr = &f_attr->attr; | ||
| 2057 | size_t sz, left; | ||
| 2058 | size_t our_sz = sizeof(f_attr->attr); | ||
| 2059 | int ret; | ||
| 2060 | |||
| 2061 | memset(f_attr, 0, sizeof(*f_attr)); | ||
| 2062 | |||
| 2063 | /* read minimal guaranteed structure */ | ||
| 2064 | ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); | ||
| 2065 | if (ret <= 0) { | ||
| 2066 | pr_debug("cannot read %d bytes of header attr\n", | ||
| 2067 | PERF_ATTR_SIZE_VER0); | ||
| 2068 | return -1; | ||
| 2069 | } | ||
| 2070 | |||
| 2071 | /* on file perf_event_attr size */ | ||
| 2072 | sz = attr->size; | ||
| 2073 | |||
| 2074 | if (ph->needs_swap) | ||
| 2075 | sz = bswap_32(sz); | ||
| 2076 | |||
| 2077 | if (sz == 0) { | ||
| 2078 | /* assume ABI0 */ | ||
| 2079 | sz = PERF_ATTR_SIZE_VER0; | ||
| 2080 | } else if (sz > our_sz) { | ||
| 2081 | pr_debug("file uses a more recent and unsupported ABI" | ||
| 2082 | " (%zu bytes extra)\n", sz - our_sz); | ||
| 2083 | return -1; | ||
| 2084 | } | ||
| 2085 | /* what we have not yet read and that we know about */ | ||
| 2086 | left = sz - PERF_ATTR_SIZE_VER0; | ||
| 2087 | if (left) { | ||
| 2088 | void *ptr = attr; | ||
| 2089 | ptr += PERF_ATTR_SIZE_VER0; | ||
| 2090 | |||
| 2091 | ret = readn(fd, ptr, left); | ||
| 2092 | } | ||
| 2093 | /* read perf_file_section, ids are read in caller */ | ||
| 2094 | ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); | ||
| 2095 | |||
| 2096 | return ret <= 0 ? -1 : 0; | ||
| 2097 | } | ||
| 2098 | |||
| 1976 | int perf_session__read_header(struct perf_session *session, int fd) | 2099 | int perf_session__read_header(struct perf_session *session, int fd) |
| 1977 | { | 2100 | { |
| 1978 | struct perf_header *header = &session->header; | 2101 | struct perf_header *header = &session->header; |
| @@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) | |||
| 1988 | if (session->fd_pipe) | 2111 | if (session->fd_pipe) |
| 1989 | return perf_header__read_pipe(session, fd); | 2112 | return perf_header__read_pipe(session, fd); |
| 1990 | 2113 | ||
| 1991 | if (perf_file_header__read(&f_header, header, fd) < 0) { | 2114 | if (perf_file_header__read(&f_header, header, fd) < 0) |
| 1992 | pr_debug("incompatible file format\n"); | ||
| 1993 | return -EINVAL; | 2115 | return -EINVAL; |
| 1994 | } | ||
| 1995 | 2116 | ||
| 1996 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | 2117 | nr_attrs = f_header.attrs.size / f_header.attr_size; |
| 1997 | lseek(fd, f_header.attrs.offset, SEEK_SET); | 2118 | lseek(fd, f_header.attrs.offset, SEEK_SET); |
| 1998 | 2119 | ||
| 1999 | for (i = 0; i < nr_attrs; i++) { | 2120 | for (i = 0; i < nr_attrs; i++) { |
| 2000 | struct perf_evsel *evsel; | 2121 | struct perf_evsel *evsel; |
| 2001 | off_t tmp; | 2122 | off_t tmp; |
| 2002 | 2123 | ||
| 2003 | if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) | 2124 | if (read_attr(fd, header, &f_attr) < 0) |
| 2004 | goto out_errno; | 2125 | goto out_errno; |
| 2005 | 2126 | ||
| 2006 | if (header->needs_swap) | 2127 | if (header->needs_swap) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e68f617d082f..21a6be09c129 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
| @@ -27,7 +27,7 @@ enum { | |||
| 27 | HEADER_EVENT_DESC, | 27 | HEADER_EVENT_DESC, |
| 28 | HEADER_CPU_TOPOLOGY, | 28 | HEADER_CPU_TOPOLOGY, |
| 29 | HEADER_NUMA_TOPOLOGY, | 29 | HEADER_NUMA_TOPOLOGY, |
| 30 | 30 | HEADER_BRANCH_STACK, | |
| 31 | HEADER_LAST_FEATURE, | 31 | HEADER_LAST_FEATURE, |
| 32 | HEADER_FEAT_BITS = 256, | 32 | HEADER_FEAT_BITS = 256, |
| 33 | }; | 33 | }; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac7..8380c3db1c92 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
| @@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) | |||
| 50 | hists__set_col_len(hists, col, 0); | 50 | hists__set_col_len(hists, col, 0); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | static void hists__set_unres_dso_col_len(struct hists *hists, int dso) | ||
| 54 | { | ||
| 55 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
| 56 | |||
| 57 | if (hists__col_len(hists, dso) < unresolved_col_width && | ||
| 58 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
| 59 | !symbol_conf.dso_list) | ||
| 60 | hists__set_col_len(hists, dso, unresolved_col_width); | ||
| 61 | } | ||
| 62 | |||
| 53 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | 63 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) |
| 54 | { | 64 | { |
| 65 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
| 55 | u16 len; | 66 | u16 len; |
| 56 | 67 | ||
| 57 | if (h->ms.sym) | 68 | if (h->ms.sym) |
| 58 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); | 69 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); |
| 59 | else { | 70 | else |
| 60 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | 71 | hists__set_unres_dso_col_len(hists, HISTC_DSO); |
| 61 | |||
| 62 | if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && | ||
| 63 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
| 64 | !symbol_conf.dso_list) | ||
| 65 | hists__set_col_len(hists, HISTC_DSO, | ||
| 66 | unresolved_col_width); | ||
| 67 | } | ||
| 68 | 72 | ||
| 69 | len = thread__comm_len(h->thread); | 73 | len = thread__comm_len(h->thread); |
| 70 | if (hists__new_col_len(hists, HISTC_COMM, len)) | 74 | if (hists__new_col_len(hists, HISTC_COMM, len)) |
| @@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
| 74 | len = dso__name_len(h->ms.map->dso); | 78 | len = dso__name_len(h->ms.map->dso); |
| 75 | hists__new_col_len(hists, HISTC_DSO, len); | 79 | hists__new_col_len(hists, HISTC_DSO, len); |
| 76 | } | 80 | } |
| 81 | |||
| 82 | if (h->branch_info) { | ||
| 83 | int symlen; | ||
| 84 | /* | ||
| 85 | * +4 accounts for '[x] ' priv level info | ||
| 86 | * +2 account of 0x prefix on raw addresses | ||
| 87 | */ | ||
| 88 | if (h->branch_info->from.sym) { | ||
| 89 | symlen = (int)h->branch_info->from.sym->namelen + 4; | ||
| 90 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
| 91 | |||
| 92 | symlen = dso__name_len(h->branch_info->from.map->dso); | ||
| 93 | hists__new_col_len(hists, HISTC_DSO_FROM, symlen); | ||
| 94 | } else { | ||
| 95 | symlen = unresolved_col_width + 4 + 2; | ||
| 96 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
| 97 | hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); | ||
| 98 | } | ||
| 99 | |||
| 100 | if (h->branch_info->to.sym) { | ||
| 101 | symlen = (int)h->branch_info->to.sym->namelen + 4; | ||
| 102 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
| 103 | |||
| 104 | symlen = dso__name_len(h->branch_info->to.map->dso); | ||
| 105 | hists__new_col_len(hists, HISTC_DSO_TO, symlen); | ||
| 106 | } else { | ||
| 107 | symlen = unresolved_col_width + 4 + 2; | ||
| 108 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
| 109 | hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); | ||
| 110 | } | ||
| 111 | } | ||
| 77 | } | 112 | } |
| 78 | 113 | ||
| 79 | static void hist_entry__add_cpumode_period(struct hist_entry *he, | 114 | static void hist_entry__add_cpumode_period(struct hist_entry *he, |
| @@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) | |||
| 195 | return 0; | 230 | return 0; |
| 196 | } | 231 | } |
| 197 | 232 | ||
| 198 | struct hist_entry *__hists__add_entry(struct hists *hists, | 233 | static struct hist_entry *add_hist_entry(struct hists *hists, |
| 234 | struct hist_entry *entry, | ||
| 199 | struct addr_location *al, | 235 | struct addr_location *al, |
| 200 | struct symbol *sym_parent, u64 period) | 236 | u64 period) |
| 201 | { | 237 | { |
| 202 | struct rb_node **p; | 238 | struct rb_node **p; |
| 203 | struct rb_node *parent = NULL; | 239 | struct rb_node *parent = NULL; |
| 204 | struct hist_entry *he; | 240 | struct hist_entry *he; |
| 205 | struct hist_entry entry = { | ||
| 206 | .thread = al->thread, | ||
| 207 | .ms = { | ||
| 208 | .map = al->map, | ||
| 209 | .sym = al->sym, | ||
| 210 | }, | ||
| 211 | .cpu = al->cpu, | ||
| 212 | .ip = al->addr, | ||
| 213 | .level = al->level, | ||
| 214 | .period = period, | ||
| 215 | .parent = sym_parent, | ||
| 216 | .filtered = symbol__parent_filter(sym_parent), | ||
| 217 | }; | ||
| 218 | int cmp; | 241 | int cmp; |
| 219 | 242 | ||
| 220 | pthread_mutex_lock(&hists->lock); | 243 | pthread_mutex_lock(&hists->lock); |
| @@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
| 225 | parent = *p; | 248 | parent = *p; |
| 226 | he = rb_entry(parent, struct hist_entry, rb_node_in); | 249 | he = rb_entry(parent, struct hist_entry, rb_node_in); |
| 227 | 250 | ||
| 228 | cmp = hist_entry__cmp(&entry, he); | 251 | cmp = hist_entry__cmp(entry, he); |
| 229 | 252 | ||
| 230 | if (!cmp) { | 253 | if (!cmp) { |
| 231 | he->period += period; | 254 | he->period += period; |
| @@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
| 239 | p = &(*p)->rb_right; | 262 | p = &(*p)->rb_right; |
| 240 | } | 263 | } |
| 241 | 264 | ||
| 242 | he = hist_entry__new(&entry); | 265 | he = hist_entry__new(entry); |
| 243 | if (!he) | 266 | if (!he) |
| 244 | goto out_unlock; | 267 | goto out_unlock; |
| 245 | 268 | ||
| @@ -252,6 +275,51 @@ out_unlock: | |||
| 252 | return he; | 275 | return he; |
| 253 | } | 276 | } |
| 254 | 277 | ||
| 278 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
| 279 | struct addr_location *al, | ||
| 280 | struct symbol *sym_parent, | ||
| 281 | struct branch_info *bi, | ||
| 282 | u64 period) | ||
| 283 | { | ||
| 284 | struct hist_entry entry = { | ||
| 285 | .thread = al->thread, | ||
| 286 | .ms = { | ||
| 287 | .map = bi->to.map, | ||
| 288 | .sym = bi->to.sym, | ||
| 289 | }, | ||
| 290 | .cpu = al->cpu, | ||
| 291 | .ip = bi->to.addr, | ||
| 292 | .level = al->level, | ||
| 293 | .period = period, | ||
| 294 | .parent = sym_parent, | ||
| 295 | .filtered = symbol__parent_filter(sym_parent), | ||
| 296 | .branch_info = bi, | ||
| 297 | }; | ||
| 298 | |||
| 299 | return add_hist_entry(self, &entry, al, period); | ||
| 300 | } | ||
| 301 | |||
| 302 | struct hist_entry *__hists__add_entry(struct hists *self, | ||
| 303 | struct addr_location *al, | ||
| 304 | struct symbol *sym_parent, u64 period) | ||
| 305 | { | ||
| 306 | struct hist_entry entry = { | ||
| 307 | .thread = al->thread, | ||
| 308 | .ms = { | ||
| 309 | .map = al->map, | ||
| 310 | .sym = al->sym, | ||
| 311 | }, | ||
| 312 | .cpu = al->cpu, | ||
| 313 | .ip = al->addr, | ||
| 314 | .level = al->level, | ||
| 315 | .period = period, | ||
| 316 | .parent = sym_parent, | ||
| 317 | .filtered = symbol__parent_filter(sym_parent), | ||
| 318 | }; | ||
| 319 | |||
| 320 | return add_hist_entry(self, &entry, al, period); | ||
| 321 | } | ||
| 322 | |||
| 255 | int64_t | 323 | int64_t |
| 256 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | 324 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) |
| 257 | { | 325 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e862..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
| @@ -42,6 +42,11 @@ enum hist_column { | |||
| 42 | HISTC_COMM, | 42 | HISTC_COMM, |
| 43 | HISTC_PARENT, | 43 | HISTC_PARENT, |
| 44 | HISTC_CPU, | 44 | HISTC_CPU, |
| 45 | HISTC_MISPREDICT, | ||
| 46 | HISTC_SYMBOL_FROM, | ||
| 47 | HISTC_SYMBOL_TO, | ||
| 48 | HISTC_DSO_FROM, | ||
| 49 | HISTC_DSO_TO, | ||
| 45 | HISTC_NR_COLS, /* Last entry */ | 50 | HISTC_NR_COLS, /* Last entry */ |
| 46 | }; | 51 | }; |
| 47 | 52 | ||
| @@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, | |||
| 74 | struct hists *hists); | 79 | struct hists *hists); |
| 75 | void hist_entry__free(struct hist_entry *); | 80 | void hist_entry__free(struct hist_entry *); |
| 76 | 81 | ||
| 82 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
| 83 | struct addr_location *al, | ||
| 84 | struct symbol *sym_parent, | ||
| 85 | struct branch_info *bi, | ||
| 86 | u64 period); | ||
| 87 | |||
| 77 | void hists__output_resort(struct hists *self); | 88 | void hists__output_resort(struct hists *self); |
| 78 | void hists__output_resort_threaded(struct hists *hists); | 89 | void hists__output_resort_threaded(struct hists *hists); |
| 79 | void hists__collapse_resort(struct hists *self); | 90 | void hists__collapse_resort(struct hists *self); |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f833cf9c6a9..002ebbf59f48 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
| @@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
| 24 | self->fd = STDIN_FILENO; | 24 | self->fd = STDIN_FILENO; |
| 25 | 25 | ||
| 26 | if (perf_session__read_header(self, self->fd) < 0) | 26 | if (perf_session__read_header(self, self->fd) < 0) |
| 27 | pr_err("incompatible file format"); | 27 | pr_err("incompatible file format (rerun with -v to learn more)"); |
| 28 | 28 | ||
| 29 | return 0; | 29 | return 0; |
| 30 | } | 30 | } |
| @@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | if (perf_session__read_header(self, self->fd) < 0) { | 58 | if (perf_session__read_header(self, self->fd) < 0) { |
| 59 | pr_err("incompatible file format"); | 59 | pr_err("incompatible file format (rerun with -v to learn more)"); |
| 60 | goto out_close; | 60 | goto out_close; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| @@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) | |||
| 229 | return 0; | 229 | return 0; |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | static const u8 cpumodes[] = { | ||
| 233 | PERF_RECORD_MISC_USER, | ||
| 234 | PERF_RECORD_MISC_KERNEL, | ||
| 235 | PERF_RECORD_MISC_GUEST_USER, | ||
| 236 | PERF_RECORD_MISC_GUEST_KERNEL | ||
| 237 | }; | ||
| 238 | #define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) | ||
| 239 | |||
| 240 | static void ip__resolve_ams(struct machine *self, struct thread *thread, | ||
| 241 | struct addr_map_symbol *ams, | ||
| 242 | u64 ip) | ||
| 243 | { | ||
| 244 | struct addr_location al; | ||
| 245 | size_t i; | ||
| 246 | u8 m; | ||
| 247 | |||
| 248 | memset(&al, 0, sizeof(al)); | ||
| 249 | |||
| 250 | for (i = 0; i < NCPUMODES; i++) { | ||
| 251 | m = cpumodes[i]; | ||
| 252 | /* | ||
| 253 | * We cannot use the header.misc hint to determine whether a | ||
| 254 | * branch stack address is user, kernel, guest, hypervisor. | ||
| 255 | * Branches may straddle the kernel/user/hypervisor boundaries. | ||
| 256 | * Thus, we have to try consecutively until we find a match | ||
| 257 | * or else, the symbol is unknown | ||
| 258 | */ | ||
| 259 | thread__find_addr_location(thread, self, m, MAP__FUNCTION, | ||
| 260 | ip, &al, NULL); | ||
| 261 | if (al.sym) | ||
| 262 | goto found; | ||
| 263 | } | ||
| 264 | found: | ||
| 265 | ams->addr = ip; | ||
| 266 | ams->al_addr = al.addr; | ||
| 267 | ams->sym = al.sym; | ||
| 268 | ams->map = al.map; | ||
| 269 | } | ||
| 270 | |||
| 271 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
| 272 | struct thread *thr, | ||
| 273 | struct branch_stack *bs) | ||
| 274 | { | ||
| 275 | struct branch_info *bi; | ||
| 276 | unsigned int i; | ||
| 277 | |||
| 278 | bi = calloc(bs->nr, sizeof(struct branch_info)); | ||
| 279 | if (!bi) | ||
| 280 | return NULL; | ||
| 281 | |||
| 282 | for (i = 0; i < bs->nr; i++) { | ||
| 283 | ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); | ||
| 284 | ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); | ||
| 285 | bi[i].flags = bs->entries[i].flags; | ||
| 286 | } | ||
| 287 | return bi; | ||
| 288 | } | ||
| 289 | |||
| 232 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | 290 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, |
| 233 | struct thread *thread, | 291 | struct thread *thread, |
| 234 | struct ip_callchain *chain, | 292 | struct ip_callchain *chain, |
| @@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) | |||
| 697 | i, sample->callchain->ips[i]); | 755 | i, sample->callchain->ips[i]); |
| 698 | } | 756 | } |
| 699 | 757 | ||
| 758 | static void branch_stack__printf(struct perf_sample *sample) | ||
| 759 | { | ||
| 760 | uint64_t i; | ||
| 761 | |||
| 762 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | ||
| 763 | |||
| 764 | for (i = 0; i < sample->branch_stack->nr; i++) | ||
| 765 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | ||
| 766 | i, sample->branch_stack->entries[i].from, | ||
| 767 | sample->branch_stack->entries[i].to); | ||
| 768 | } | ||
| 769 | |||
| 700 | static void perf_session__print_tstamp(struct perf_session *session, | 770 | static void perf_session__print_tstamp(struct perf_session *session, |
| 701 | union perf_event *event, | 771 | union perf_event *event, |
| 702 | struct perf_sample *sample) | 772 | struct perf_sample *sample) |
| @@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, | |||
| 744 | 814 | ||
| 745 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) | 815 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) |
| 746 | callchain__printf(sample); | 816 | callchain__printf(sample); |
| 817 | |||
| 818 | if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
| 819 | branch_stack__printf(sample); | ||
| 747 | } | 820 | } |
| 748 | 821 | ||
| 749 | static struct machine * | 822 | static struct machine * |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c8d90178e7de..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
| @@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel | |||
| 73 | struct ip_callchain *chain, | 73 | struct ip_callchain *chain, |
| 74 | struct symbol **parent); | 74 | struct symbol **parent); |
| 75 | 75 | ||
| 76 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
| 77 | struct thread *thread, | ||
| 78 | struct branch_stack *bs); | ||
| 79 | |||
| 76 | bool perf_session__has_traces(struct perf_session *self, const char *msg); | 80 | bool perf_session__has_traces(struct perf_session *self, const char *msg); |
| 77 | 81 | ||
| 78 | void mem_bswap_64(void *src, int byte_size); | 82 | void mem_bswap_64(void *src, int byte_size); |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 16da30d8d765..88dbcf6f9575 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
| @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; | |||
| 8 | const char *sort_order = default_sort_order; | 8 | const char *sort_order = default_sort_order; |
| 9 | int sort__need_collapse = 0; | 9 | int sort__need_collapse = 0; |
| 10 | int sort__has_parent = 0; | 10 | int sort__has_parent = 0; |
| 11 | int sort__branch_mode = -1; /* -1 = means not set */ | ||
| 11 | 12 | ||
| 12 | enum sort_type sort__first_dimension; | 13 | enum sort_type sort__first_dimension; |
| 13 | 14 | ||
| @@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, | |||
| 94 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); | 95 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); |
| 95 | } | 96 | } |
| 96 | 97 | ||
| 98 | static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) | ||
| 99 | { | ||
| 100 | struct dso *dso_l = map_l ? map_l->dso : NULL; | ||
| 101 | struct dso *dso_r = map_r ? map_r->dso : NULL; | ||
| 102 | const char *dso_name_l, *dso_name_r; | ||
| 103 | |||
| 104 | if (!dso_l || !dso_r) | ||
| 105 | return cmp_null(dso_l, dso_r); | ||
| 106 | |||
| 107 | if (verbose) { | ||
| 108 | dso_name_l = dso_l->long_name; | ||
| 109 | dso_name_r = dso_r->long_name; | ||
| 110 | } else { | ||
| 111 | dso_name_l = dso_l->short_name; | ||
| 112 | dso_name_r = dso_r->short_name; | ||
| 113 | } | ||
| 114 | |||
| 115 | return strcmp(dso_name_l, dso_name_r); | ||
| 116 | } | ||
| 117 | |||
| 97 | struct sort_entry sort_comm = { | 118 | struct sort_entry sort_comm = { |
| 98 | .se_header = "Command", | 119 | .se_header = "Command", |
| 99 | .se_cmp = sort__comm_cmp, | 120 | .se_cmp = sort__comm_cmp, |
| @@ -107,36 +128,74 @@ struct sort_entry sort_comm = { | |||
| 107 | static int64_t | 128 | static int64_t |
| 108 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | 129 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) |
| 109 | { | 130 | { |
| 110 | struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; | 131 | return _sort__dso_cmp(left->ms.map, right->ms.map); |
| 111 | struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; | 132 | } |
| 112 | const char *dso_name_l, *dso_name_r; | ||
| 113 | 133 | ||
| 114 | if (!dso_l || !dso_r) | ||
| 115 | return cmp_null(dso_l, dso_r); | ||
| 116 | 134 | ||
| 117 | if (verbose) { | 135 | static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, |
| 118 | dso_name_l = dso_l->long_name; | 136 | u64 ip_l, u64 ip_r) |
| 119 | dso_name_r = dso_r->long_name; | 137 | { |
| 120 | } else { | 138 | if (!sym_l || !sym_r) |
| 121 | dso_name_l = dso_l->short_name; | 139 | return cmp_null(sym_l, sym_r); |
| 122 | dso_name_r = dso_r->short_name; | 140 | |
| 141 | if (sym_l == sym_r) | ||
| 142 | return 0; | ||
| 143 | |||
| 144 | if (sym_l) | ||
| 145 | ip_l = sym_l->start; | ||
| 146 | if (sym_r) | ||
| 147 | ip_r = sym_r->start; | ||
| 148 | |||
| 149 | return (int64_t)(ip_r - ip_l); | ||
| 150 | } | ||
| 151 | |||
| 152 | static int _hist_entry__dso_snprintf(struct map *map, char *bf, | ||
| 153 | size_t size, unsigned int width) | ||
| 154 | { | ||
| 155 | if (map && map->dso) { | ||
| 156 | const char *dso_name = !verbose ? map->dso->short_name : | ||
| 157 | map->dso->long_name; | ||
| 158 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | ||
| 123 | } | 159 | } |
| 124 | 160 | ||
| 125 | return strcmp(dso_name_l, dso_name_r); | 161 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); |
| 126 | } | 162 | } |
| 127 | 163 | ||
| 128 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, | 164 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, |
| 129 | size_t size, unsigned int width) | 165 | size_t size, unsigned int width) |
| 130 | { | 166 | { |
| 131 | if (self->ms.map && self->ms.map->dso) { | 167 | return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); |
| 132 | const char *dso_name = !verbose ? self->ms.map->dso->short_name : | 168 | } |
| 133 | self->ms.map->dso->long_name; | 169 | |
| 134 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | 170 | static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, |
| 171 | u64 ip, char level, char *bf, size_t size, | ||
| 172 | unsigned int width __used) | ||
| 173 | { | ||
| 174 | size_t ret = 0; | ||
| 175 | |||
| 176 | if (verbose) { | ||
| 177 | char o = map ? dso__symtab_origin(map->dso) : '!'; | ||
| 178 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
| 179 | BITS_PER_LONG / 4, ip, o); | ||
| 135 | } | 180 | } |
| 136 | 181 | ||
| 137 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); | 182 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); |
| 183 | if (sym) | ||
| 184 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
| 185 | width - ret, | ||
| 186 | sym->name); | ||
| 187 | else { | ||
| 188 | size_t len = BITS_PER_LONG / 4; | ||
| 189 | ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", | ||
| 190 | len, ip); | ||
| 191 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
| 192 | width - ret, ""); | ||
| 193 | } | ||
| 194 | |||
| 195 | return ret; | ||
| 138 | } | 196 | } |
| 139 | 197 | ||
| 198 | |||
| 140 | struct sort_entry sort_dso = { | 199 | struct sort_entry sort_dso = { |
| 141 | .se_header = "Shared Object", | 200 | .se_header = "Shared Object", |
| 142 | .se_cmp = sort__dso_cmp, | 201 | .se_cmp = sort__dso_cmp, |
| @@ -144,8 +203,14 @@ struct sort_entry sort_dso = { | |||
| 144 | .se_width_idx = HISTC_DSO, | 203 | .se_width_idx = HISTC_DSO, |
| 145 | }; | 204 | }; |
| 146 | 205 | ||
| 147 | /* --sort symbol */ | 206 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, |
| 207 | size_t size, unsigned int width __used) | ||
| 208 | { | ||
| 209 | return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, | ||
| 210 | self->level, bf, size, width); | ||
| 211 | } | ||
| 148 | 212 | ||
| 213 | /* --sort symbol */ | ||
| 149 | static int64_t | 214 | static int64_t |
| 150 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 215 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
| 151 | { | 216 | { |
| @@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | |||
| 163 | ip_l = left->ms.sym->start; | 228 | ip_l = left->ms.sym->start; |
| 164 | ip_r = right->ms.sym->start; | 229 | ip_r = right->ms.sym->start; |
| 165 | 230 | ||
| 166 | return (int64_t)(ip_r - ip_l); | 231 | return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); |
| 167 | } | ||
| 168 | |||
| 169 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | ||
| 170 | size_t size, unsigned int width __used) | ||
| 171 | { | ||
| 172 | size_t ret = 0; | ||
| 173 | |||
| 174 | if (verbose) { | ||
| 175 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; | ||
| 176 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
| 177 | BITS_PER_LONG / 4, self->ip, o); | ||
| 178 | } | ||
| 179 | |||
| 180 | if (!sort_dso.elide) | ||
| 181 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); | ||
| 182 | |||
| 183 | if (self->ms.sym) | ||
| 184 | ret += repsep_snprintf(bf + ret, size - ret, "%s", | ||
| 185 | self->ms.sym->name); | ||
| 186 | else | ||
| 187 | ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", | ||
| 188 | BITS_PER_LONG / 4, self->ip); | ||
| 189 | |||
| 190 | return ret; | ||
| 191 | } | 232 | } |
| 192 | 233 | ||
| 193 | struct sort_entry sort_sym = { | 234 | struct sort_entry sort_sym = { |
| @@ -246,19 +287,155 @@ struct sort_entry sort_cpu = { | |||
| 246 | .se_width_idx = HISTC_CPU, | 287 | .se_width_idx = HISTC_CPU, |
| 247 | }; | 288 | }; |
| 248 | 289 | ||
| 290 | static int64_t | ||
| 291 | sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 292 | { | ||
| 293 | return _sort__dso_cmp(left->branch_info->from.map, | ||
| 294 | right->branch_info->from.map); | ||
| 295 | } | ||
| 296 | |||
| 297 | static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, | ||
| 298 | size_t size, unsigned int width) | ||
| 299 | { | ||
| 300 | return _hist_entry__dso_snprintf(self->branch_info->from.map, | ||
| 301 | bf, size, width); | ||
| 302 | } | ||
| 303 | |||
| 304 | struct sort_entry sort_dso_from = { | ||
| 305 | .se_header = "Source Shared Object", | ||
| 306 | .se_cmp = sort__dso_from_cmp, | ||
| 307 | .se_snprintf = hist_entry__dso_from_snprintf, | ||
| 308 | .se_width_idx = HISTC_DSO_FROM, | ||
| 309 | }; | ||
| 310 | |||
| 311 | static int64_t | ||
| 312 | sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 313 | { | ||
| 314 | return _sort__dso_cmp(left->branch_info->to.map, | ||
| 315 | right->branch_info->to.map); | ||
| 316 | } | ||
| 317 | |||
| 318 | static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, | ||
| 319 | size_t size, unsigned int width) | ||
| 320 | { | ||
| 321 | return _hist_entry__dso_snprintf(self->branch_info->to.map, | ||
| 322 | bf, size, width); | ||
| 323 | } | ||
| 324 | |||
| 325 | static int64_t | ||
| 326 | sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 327 | { | ||
| 328 | struct addr_map_symbol *from_l = &left->branch_info->from; | ||
| 329 | struct addr_map_symbol *from_r = &right->branch_info->from; | ||
| 330 | |||
| 331 | if (!from_l->sym && !from_r->sym) | ||
| 332 | return right->level - left->level; | ||
| 333 | |||
| 334 | return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, | ||
| 335 | from_r->addr); | ||
| 336 | } | ||
| 337 | |||
| 338 | static int64_t | ||
| 339 | sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 340 | { | ||
| 341 | struct addr_map_symbol *to_l = &left->branch_info->to; | ||
| 342 | struct addr_map_symbol *to_r = &right->branch_info->to; | ||
| 343 | |||
| 344 | if (!to_l->sym && !to_r->sym) | ||
| 345 | return right->level - left->level; | ||
| 346 | |||
| 347 | return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); | ||
| 348 | } | ||
| 349 | |||
| 350 | static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, | ||
| 351 | size_t size, unsigned int width __used) | ||
| 352 | { | ||
| 353 | struct addr_map_symbol *from = &self->branch_info->from; | ||
| 354 | return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, | ||
| 355 | self->level, bf, size, width); | ||
| 356 | |||
| 357 | } | ||
| 358 | |||
| 359 | static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, | ||
| 360 | size_t size, unsigned int width __used) | ||
| 361 | { | ||
| 362 | struct addr_map_symbol *to = &self->branch_info->to; | ||
| 363 | return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, | ||
| 364 | self->level, bf, size, width); | ||
| 365 | |||
| 366 | } | ||
| 367 | |||
| 368 | struct sort_entry sort_dso_to = { | ||
| 369 | .se_header = "Target Shared Object", | ||
| 370 | .se_cmp = sort__dso_to_cmp, | ||
| 371 | .se_snprintf = hist_entry__dso_to_snprintf, | ||
| 372 | .se_width_idx = HISTC_DSO_TO, | ||
| 373 | }; | ||
| 374 | |||
| 375 | struct sort_entry sort_sym_from = { | ||
| 376 | .se_header = "Source Symbol", | ||
| 377 | .se_cmp = sort__sym_from_cmp, | ||
| 378 | .se_snprintf = hist_entry__sym_from_snprintf, | ||
| 379 | .se_width_idx = HISTC_SYMBOL_FROM, | ||
| 380 | }; | ||
| 381 | |||
| 382 | struct sort_entry sort_sym_to = { | ||
| 383 | .se_header = "Target Symbol", | ||
| 384 | .se_cmp = sort__sym_to_cmp, | ||
| 385 | .se_snprintf = hist_entry__sym_to_snprintf, | ||
| 386 | .se_width_idx = HISTC_SYMBOL_TO, | ||
| 387 | }; | ||
| 388 | |||
| 389 | static int64_t | ||
| 390 | sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) | ||
| 391 | { | ||
| 392 | const unsigned char mp = left->branch_info->flags.mispred != | ||
| 393 | right->branch_info->flags.mispred; | ||
| 394 | const unsigned char p = left->branch_info->flags.predicted != | ||
| 395 | right->branch_info->flags.predicted; | ||
| 396 | |||
| 397 | return mp || p; | ||
| 398 | } | ||
| 399 | |||
| 400 | static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, | ||
| 401 | size_t size, unsigned int width){ | ||
| 402 | static const char *out = "N/A"; | ||
| 403 | |||
| 404 | if (self->branch_info->flags.predicted) | ||
| 405 | out = "N"; | ||
| 406 | else if (self->branch_info->flags.mispred) | ||
| 407 | out = "Y"; | ||
| 408 | |||
| 409 | return repsep_snprintf(bf, size, "%-*s", width, out); | ||
| 410 | } | ||
| 411 | |||
| 412 | struct sort_entry sort_mispredict = { | ||
| 413 | .se_header = "Branch Mispredicted", | ||
| 414 | .se_cmp = sort__mispredict_cmp, | ||
| 415 | .se_snprintf = hist_entry__mispredict_snprintf, | ||
| 416 | .se_width_idx = HISTC_MISPREDICT, | ||
| 417 | }; | ||
| 418 | |||
| 249 | struct sort_dimension { | 419 | struct sort_dimension { |
| 250 | const char *name; | 420 | const char *name; |
| 251 | struct sort_entry *entry; | 421 | struct sort_entry *entry; |
| 252 | int taken; | 422 | int taken; |
| 253 | }; | 423 | }; |
| 254 | 424 | ||
| 425 | #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } | ||
| 426 | |||
| 255 | static struct sort_dimension sort_dimensions[] = { | 427 | static struct sort_dimension sort_dimensions[] = { |
| 256 | { .name = "pid", .entry = &sort_thread, }, | 428 | DIM(SORT_PID, "pid", sort_thread), |
| 257 | { .name = "comm", .entry = &sort_comm, }, | 429 | DIM(SORT_COMM, "comm", sort_comm), |
| 258 | { .name = "dso", .entry = &sort_dso, }, | 430 | DIM(SORT_DSO, "dso", sort_dso), |
| 259 | { .name = "symbol", .entry = &sort_sym, }, | 431 | DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), |
| 260 | { .name = "parent", .entry = &sort_parent, }, | 432 | DIM(SORT_DSO_TO, "dso_to", sort_dso_to), |
| 261 | { .name = "cpu", .entry = &sort_cpu, }, | 433 | DIM(SORT_SYM, "symbol", sort_sym), |
| 434 | DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), | ||
| 435 | DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), | ||
| 436 | DIM(SORT_PARENT, "parent", sort_parent), | ||
| 437 | DIM(SORT_CPU, "cpu", sort_cpu), | ||
| 438 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | ||
| 262 | }; | 439 | }; |
| 263 | 440 | ||
| 264 | int sort_dimension__add(const char *tok) | 441 | int sort_dimension__add(const char *tok) |
| @@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok) | |||
| 270 | 447 | ||
| 271 | if (strncasecmp(tok, sd->name, strlen(tok))) | 448 | if (strncasecmp(tok, sd->name, strlen(tok))) |
| 272 | continue; | 449 | continue; |
| 273 | |||
| 274 | if (sd->entry == &sort_parent) { | 450 | if (sd->entry == &sort_parent) { |
| 275 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); | 451 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); |
| 276 | if (ret) { | 452 | if (ret) { |
| @@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok) | |||
| 302 | sort__first_dimension = SORT_PARENT; | 478 | sort__first_dimension = SORT_PARENT; |
| 303 | else if (!strcmp(sd->name, "cpu")) | 479 | else if (!strcmp(sd->name, "cpu")) |
| 304 | sort__first_dimension = SORT_CPU; | 480 | sort__first_dimension = SORT_CPU; |
| 481 | else if (!strcmp(sd->name, "symbol_from")) | ||
| 482 | sort__first_dimension = SORT_SYM_FROM; | ||
| 483 | else if (!strcmp(sd->name, "symbol_to")) | ||
| 484 | sort__first_dimension = SORT_SYM_TO; | ||
| 485 | else if (!strcmp(sd->name, "dso_from")) | ||
| 486 | sort__first_dimension = SORT_DSO_FROM; | ||
| 487 | else if (!strcmp(sd->name, "dso_to")) | ||
| 488 | sort__first_dimension = SORT_DSO_TO; | ||
| 489 | else if (!strcmp(sd->name, "mispredict")) | ||
| 490 | sort__first_dimension = SORT_MISPREDICT; | ||
| 305 | } | 491 | } |
| 306 | 492 | ||
| 307 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | 493 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); |
| @@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok) | |||
| 309 | 495 | ||
| 310 | return 0; | 496 | return 0; |
| 311 | } | 497 | } |
| 312 | |||
| 313 | return -ESRCH; | 498 | return -ESRCH; |
| 314 | } | 499 | } |
| 315 | 500 | ||
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..472aa5a63a58 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
| @@ -31,11 +31,16 @@ extern const char *parent_pattern; | |||
| 31 | extern const char default_sort_order[]; | 31 | extern const char default_sort_order[]; |
| 32 | extern int sort__need_collapse; | 32 | extern int sort__need_collapse; |
| 33 | extern int sort__has_parent; | 33 | extern int sort__has_parent; |
| 34 | extern int sort__branch_mode; | ||
| 34 | extern char *field_sep; | 35 | extern char *field_sep; |
| 35 | extern struct sort_entry sort_comm; | 36 | extern struct sort_entry sort_comm; |
| 36 | extern struct sort_entry sort_dso; | 37 | extern struct sort_entry sort_dso; |
| 37 | extern struct sort_entry sort_sym; | 38 | extern struct sort_entry sort_sym; |
| 38 | extern struct sort_entry sort_parent; | 39 | extern struct sort_entry sort_parent; |
| 40 | extern struct sort_entry sort_dso_from; | ||
| 41 | extern struct sort_entry sort_dso_to; | ||
| 42 | extern struct sort_entry sort_sym_from; | ||
| 43 | extern struct sort_entry sort_sym_to; | ||
| 39 | extern enum sort_type sort__first_dimension; | 44 | extern enum sort_type sort__first_dimension; |
| 40 | 45 | ||
| 41 | /** | 46 | /** |
| @@ -72,6 +77,7 @@ struct hist_entry { | |||
| 72 | struct hist_entry *pair; | 77 | struct hist_entry *pair; |
| 73 | struct rb_root sorted_chain; | 78 | struct rb_root sorted_chain; |
| 74 | }; | 79 | }; |
| 80 | struct branch_info *branch_info; | ||
| 75 | struct callchain_root callchain[0]; | 81 | struct callchain_root callchain[0]; |
| 76 | }; | 82 | }; |
| 77 | 83 | ||
| @@ -82,6 +88,11 @@ enum sort_type { | |||
| 82 | SORT_SYM, | 88 | SORT_SYM, |
| 83 | SORT_PARENT, | 89 | SORT_PARENT, |
| 84 | SORT_CPU, | 90 | SORT_CPU, |
| 91 | SORT_DSO_FROM, | ||
| 92 | SORT_DSO_TO, | ||
| 93 | SORT_SYM_FROM, | ||
| 94 | SORT_SYM_TO, | ||
| 95 | SORT_MISPREDICT, | ||
| 85 | }; | 96 | }; |
| 86 | 97 | ||
| 87 | /* | 98 | /* |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a683d4fc918..ac49ef208a5f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
| 6 | #include <stdint.h> | 6 | #include <stdint.h> |
| 7 | #include "map.h" | 7 | #include "map.h" |
| 8 | #include "../perf.h" | ||
| 8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
| 9 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
| 10 | #include <stdio.h> | 11 | #include <stdio.h> |
| @@ -96,7 +97,11 @@ struct symbol_conf { | |||
| 96 | *col_width_list_str; | 97 | *col_width_list_str; |
| 97 | struct strlist *dso_list, | 98 | struct strlist *dso_list, |
| 98 | *comm_list, | 99 | *comm_list, |
| 99 | *sym_list; | 100 | *sym_list, |
| 101 | *dso_from_list, | ||
| 102 | *dso_to_list, | ||
| 103 | *sym_from_list, | ||
| 104 | *sym_to_list; | ||
| 100 | const char *symfs; | 105 | const char *symfs; |
| 101 | }; | 106 | }; |
| 102 | 107 | ||
| @@ -120,6 +125,19 @@ struct map_symbol { | |||
| 120 | bool has_children; | 125 | bool has_children; |
| 121 | }; | 126 | }; |
| 122 | 127 | ||
| 128 | struct addr_map_symbol { | ||
| 129 | struct map *map; | ||
| 130 | struct symbol *sym; | ||
| 131 | u64 addr; | ||
| 132 | u64 al_addr; | ||
| 133 | }; | ||
| 134 | |||
| 135 | struct branch_info { | ||
| 136 | struct addr_map_symbol from; | ||
| 137 | struct addr_map_symbol to; | ||
| 138 | struct branch_flags flags; | ||
| 139 | }; | ||
| 140 | |||
| 123 | struct addr_location { | 141 | struct addr_location { |
| 124 | struct thread *thread; | 142 | struct thread *thread; |
| 125 | struct map *map; | 143 | struct map *map; |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bfba0490c098..de8ece8bcce3 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
| @@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) | |||
| 805 | self->hists = hists; | 805 | self->hists = hists; |
| 806 | self->b.refresh = hist_browser__refresh; | 806 | self->b.refresh = hist_browser__refresh; |
| 807 | self->b.seek = ui_browser__hists_seek; | 807 | self->b.seek = ui_browser__hists_seek; |
| 808 | self->b.use_navkeypressed = true, | 808 | self->b.use_navkeypressed = true; |
| 809 | self->has_symbols = sort_sym.list.next != NULL; | 809 | if (sort__branch_mode == 1) |
| 810 | self->has_symbols = sort_sym_from.list.next != NULL; | ||
| 811 | else | ||
| 812 | self->has_symbols = sort_sym.list.next != NULL; | ||
| 810 | } | 813 | } |
| 811 | 814 | ||
| 812 | return self; | 815 | return self; |
| @@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
| 853 | return printed; | 856 | return printed; |
| 854 | } | 857 | } |
| 855 | 858 | ||
| 859 | static inline void free_popup_options(char **options, int n) | ||
| 860 | { | ||
| 861 | int i; | ||
| 862 | |||
| 863 | for (i = 0; i < n; ++i) { | ||
| 864 | free(options[i]); | ||
| 865 | options[i] = NULL; | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 856 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | 869 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, |
| 857 | const char *helpline, const char *ev_name, | 870 | const char *helpline, const char *ev_name, |
| 858 | bool left_exits, | 871 | bool left_exits, |
| @@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 861 | { | 874 | { |
| 862 | struct hists *self = &evsel->hists; | 875 | struct hists *self = &evsel->hists; |
| 863 | struct hist_browser *browser = hist_browser__new(self); | 876 | struct hist_browser *browser = hist_browser__new(self); |
| 877 | struct branch_info *bi; | ||
| 864 | struct pstack *fstack; | 878 | struct pstack *fstack; |
| 879 | char *options[16]; | ||
| 880 | int nr_options = 0; | ||
| 865 | int key = -1; | 881 | int key = -1; |
| 866 | 882 | ||
| 867 | if (browser == NULL) | 883 | if (browser == NULL) |
| @@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 873 | 889 | ||
| 874 | ui_helpline__push(helpline); | 890 | ui_helpline__push(helpline); |
| 875 | 891 | ||
| 892 | memset(options, 0, sizeof(options)); | ||
| 893 | |||
| 876 | while (1) { | 894 | while (1) { |
| 877 | const struct thread *thread = NULL; | 895 | const struct thread *thread = NULL; |
| 878 | const struct dso *dso = NULL; | 896 | const struct dso *dso = NULL; |
| 879 | char *options[16]; | 897 | int choice = 0, |
| 880 | int nr_options = 0, choice = 0, i, | ||
| 881 | annotate = -2, zoom_dso = -2, zoom_thread = -2, | 898 | annotate = -2, zoom_dso = -2, zoom_thread = -2, |
| 882 | browse_map = -2; | 899 | annotate_f = -2, annotate_t = -2, browse_map = -2; |
| 900 | |||
| 901 | nr_options = 0; | ||
| 883 | 902 | ||
| 884 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); | 903 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); |
| 885 | 904 | ||
| @@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 887 | thread = hist_browser__selected_thread(browser); | 906 | thread = hist_browser__selected_thread(browser); |
| 888 | dso = browser->selection->map ? browser->selection->map->dso : NULL; | 907 | dso = browser->selection->map ? browser->selection->map->dso : NULL; |
| 889 | } | 908 | } |
| 890 | |||
| 891 | switch (key) { | 909 | switch (key) { |
| 892 | case K_TAB: | 910 | case K_TAB: |
| 893 | case K_UNTAB: | 911 | case K_UNTAB: |
| @@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 902 | if (!browser->has_symbols) { | 920 | if (!browser->has_symbols) { |
| 903 | ui_browser__warning(&browser->b, delay_secs * 2, | 921 | ui_browser__warning(&browser->b, delay_secs * 2, |
| 904 | "Annotation is only available for symbolic views, " | 922 | "Annotation is only available for symbolic views, " |
| 905 | "include \"sym\" in --sort to use it."); | 923 | "include \"sym*\" in --sort to use it."); |
| 906 | continue; | 924 | continue; |
| 907 | } | 925 | } |
| 908 | 926 | ||
| @@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 972 | if (!browser->has_symbols) | 990 | if (!browser->has_symbols) |
| 973 | goto add_exit_option; | 991 | goto add_exit_option; |
| 974 | 992 | ||
| 975 | if (browser->selection != NULL && | 993 | if (sort__branch_mode == 1) { |
| 976 | browser->selection->sym != NULL && | 994 | bi = browser->he_selection->branch_info; |
| 977 | !browser->selection->map->dso->annotate_warned && | 995 | if (browser->selection != NULL && |
| 978 | asprintf(&options[nr_options], "Annotate %s", | 996 | bi && |
| 979 | browser->selection->sym->name) > 0) | 997 | bi->from.sym != NULL && |
| 980 | annotate = nr_options++; | 998 | !bi->from.map->dso->annotate_warned && |
| 999 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1000 | bi->from.sym->name) > 0) | ||
| 1001 | annotate_f = nr_options++; | ||
| 1002 | |||
| 1003 | if (browser->selection != NULL && | ||
| 1004 | bi && | ||
| 1005 | bi->to.sym != NULL && | ||
| 1006 | !bi->to.map->dso->annotate_warned && | ||
| 1007 | (bi->to.sym != bi->from.sym || | ||
| 1008 | bi->to.map->dso != bi->from.map->dso) && | ||
| 1009 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1010 | bi->to.sym->name) > 0) | ||
| 1011 | annotate_t = nr_options++; | ||
| 1012 | } else { | ||
| 1013 | |||
| 1014 | if (browser->selection != NULL && | ||
| 1015 | browser->selection->sym != NULL && | ||
| 1016 | !browser->selection->map->dso->annotate_warned && | ||
| 1017 | asprintf(&options[nr_options], "Annotate %s", | ||
| 1018 | browser->selection->sym->name) > 0) | ||
| 1019 | annotate = nr_options++; | ||
| 1020 | } | ||
| 981 | 1021 | ||
| 982 | if (thread != NULL && | 1022 | if (thread != NULL && |
| 983 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", | 1023 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", |
| @@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 998 | browse_map = nr_options++; | 1038 | browse_map = nr_options++; |
| 999 | add_exit_option: | 1039 | add_exit_option: |
| 1000 | options[nr_options++] = (char *)"Exit"; | 1040 | options[nr_options++] = (char *)"Exit"; |
| 1001 | 1041 | retry_popup_menu: | |
| 1002 | choice = ui__popup_menu(nr_options, options); | 1042 | choice = ui__popup_menu(nr_options, options); |
| 1003 | 1043 | ||
| 1004 | for (i = 0; i < nr_options - 1; ++i) | ||
| 1005 | free(options[i]); | ||
| 1006 | |||
| 1007 | if (choice == nr_options - 1) | 1044 | if (choice == nr_options - 1) |
| 1008 | break; | 1045 | break; |
| 1009 | 1046 | ||
| 1010 | if (choice == -1) | 1047 | if (choice == -1) { |
| 1048 | free_popup_options(options, nr_options - 1); | ||
| 1011 | continue; | 1049 | continue; |
| 1050 | } | ||
| 1012 | 1051 | ||
| 1013 | if (choice == annotate) { | 1052 | if (choice == annotate || choice == annotate_t || choice == annotate_f) { |
| 1014 | struct hist_entry *he; | 1053 | struct hist_entry *he; |
| 1015 | int err; | 1054 | int err; |
| 1016 | do_annotate: | 1055 | do_annotate: |
| 1017 | he = hist_browser__selected_entry(browser); | 1056 | he = hist_browser__selected_entry(browser); |
| 1018 | if (he == NULL) | 1057 | if (he == NULL) |
| 1019 | continue; | 1058 | continue; |
| 1059 | |||
| 1060 | /* | ||
| 1061 | * we stash the branch_info symbol + map into the | ||
| 1062 | * the ms so we don't have to rewrite all the annotation | ||
| 1063 | * code to use branch_info. | ||
| 1064 | * in branch mode, the ms struct is not used | ||
| 1065 | */ | ||
| 1066 | if (choice == annotate_f) { | ||
| 1067 | he->ms.sym = he->branch_info->from.sym; | ||
| 1068 | he->ms.map = he->branch_info->from.map; | ||
| 1069 | } else if (choice == annotate_t) { | ||
| 1070 | he->ms.sym = he->branch_info->to.sym; | ||
| 1071 | he->ms.map = he->branch_info->to.map; | ||
| 1072 | } | ||
| 1073 | |||
| 1020 | /* | 1074 | /* |
| 1021 | * Don't let this be freed, say, by hists__decay_entry. | 1075 | * Don't let this be freed, say, by hists__decay_entry. |
| 1022 | */ | 1076 | */ |
| @@ -1024,9 +1078,18 @@ do_annotate: | |||
| 1024 | err = hist_entry__tui_annotate(he, evsel->idx, | 1078 | err = hist_entry__tui_annotate(he, evsel->idx, |
| 1025 | timer, arg, delay_secs); | 1079 | timer, arg, delay_secs); |
| 1026 | he->used = false; | 1080 | he->used = false; |
| 1081 | /* | ||
| 1082 | * offer option to annotate the other branch source or target | ||
| 1083 | * (if they exists) when returning from annotate | ||
| 1084 | */ | ||
| 1085 | if ((err == 'q' || err == CTRL('c')) | ||
| 1086 | && annotate_t != -2 && annotate_f != -2) | ||
| 1087 | goto retry_popup_menu; | ||
| 1088 | |||
| 1027 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); | 1089 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); |
| 1028 | if (err) | 1090 | if (err) |
| 1029 | ui_browser__handle_resize(&browser->b); | 1091 | ui_browser__handle_resize(&browser->b); |
| 1092 | |||
| 1030 | } else if (choice == browse_map) | 1093 | } else if (choice == browse_map) |
| 1031 | map__browse(browser->selection->map); | 1094 | map__browse(browser->selection->map); |
| 1032 | else if (choice == zoom_dso) { | 1095 | else if (choice == zoom_dso) { |
| @@ -1072,6 +1135,7 @@ out_free_stack: | |||
| 1072 | pstack__delete(fstack); | 1135 | pstack__delete(fstack); |
| 1073 | out: | 1136 | out: |
| 1074 | hist_browser__delete(browser); | 1137 | hist_browser__delete(browser); |
| 1138 | free_popup_options(options, nr_options - 1); | ||
| 1075 | return key; | 1139 | return key; |
| 1076 | } | 1140 | } |
| 1077 | 1141 | ||
