diff options
author | Ingo Molnar <mingo@elte.hu> | 2012-03-12 15:46:35 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-03-12 15:47:05 -0400 |
commit | bea95c152dee1791dd02cbc708afbb115bb00f9a (patch) | |
tree | af9994c42c5fdd81ba3dadd7b812e2fa85273353 | |
parent | f9b4eeb809c6d031cc9561cc34dd691701cb2c2a (diff) | |
parent | 24bff2dc0f77b1f186b7bdf30060caf3df191a68 (diff) |
Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
33 files changed, 2017 insertions, 243 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) | |||
685 | { | 685 | { |
686 | int err; | 686 | int err; |
687 | 687 | ||
688 | /* does not support taken branch sampling */ | ||
689 | if (has_branch_stack(event)) | ||
690 | return -EOPNOTSUPP; | ||
691 | |||
688 | switch (event->attr.type) { | 692 | switch (event->attr.type) { |
689 | case PERF_TYPE_RAW: | 693 | case PERF_TYPE_RAW: |
690 | case PERF_TYPE_HARDWARE: | 694 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) | |||
539 | int err = 0; | 539 | int err = 0; |
540 | atomic_t *active_events = &armpmu->active_events; | 540 | atomic_t *active_events = &armpmu->active_events; |
541 | 541 | ||
542 | /* does not support taken branch sampling */ | ||
543 | if (has_branch_stack(event)) | ||
544 | return -EOPNOTSUPP; | ||
545 | |||
542 | if (armpmu->map_event(event) == -ENOENT) | 546 | if (armpmu->map_event(event) == -ENOENT) |
543 | return -ENOENT; | 547 | return -ENOENT; |
544 | 548 | ||
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) | |||
606 | { | 606 | { |
607 | int err = 0; | 607 | int err = 0; |
608 | 608 | ||
609 | /* does not support taken branch sampling */ | ||
610 | if (has_branch_stack(event)) | ||
611 | return -EOPNOTSUPP; | ||
612 | |||
609 | switch (event->attr.type) { | 613 | switch (event->attr.type) { |
610 | case PERF_TYPE_RAW: | 614 | case PERF_TYPE_RAW: |
611 | case PERF_TYPE_HARDWARE: | 615 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index f04c2301725e..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1084 | if (!ppmu) | 1084 | if (!ppmu) |
1085 | return -ENOENT; | 1085 | return -ENOENT; |
1086 | 1086 | ||
1087 | /* does not support taken branch sampling */ | ||
1088 | if (has_branch_stack(event)) | ||
1089 | return -EOPNOTSUPP; | ||
1090 | |||
1087 | switch (event->attr.type) { | 1091 | switch (event->attr.type) { |
1088 | case PERF_TYPE_HARDWARE: | 1092 | case PERF_TYPE_HARDWARE: |
1089 | ev = event->attr.config; | 1093 | ev = event->attr.config; |
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) | |||
310 | { | 310 | { |
311 | int err; | 311 | int err; |
312 | 312 | ||
313 | /* does not support taken branch sampling */ | ||
314 | if (has_branch_stack(event)) | ||
315 | return -EOPNOTSUPP; | ||
316 | |||
313 | switch (event->attr.type) { | 317 | switch (event->attr.type) { |
314 | case PERF_TYPE_RAW: | 318 | case PERF_TYPE_RAW: |
315 | case PERF_TYPE_HW_CACHE: | 319 | case PERF_TYPE_HW_CACHE: |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1105 | if (atomic_read(&nmi_active) < 0) | 1105 | if (atomic_read(&nmi_active) < 0) |
1106 | return -ENODEV; | 1106 | return -ENODEV; |
1107 | 1107 | ||
1108 | /* does not support taken branch sampling */ | ||
1109 | if (has_branch_stack(event)) | ||
1110 | return -EOPNOTSUPP; | ||
1111 | |||
1108 | switch (attr->type) { | 1112 | switch (attr->type) { |
1109 | case PERF_TYPE_HARDWARE: | 1113 | case PERF_TYPE_HARDWARE: |
1110 | if (attr->config >= sparc_pmu->max_events) | 1114 | if (attr->config >= sparc_pmu->max_events) |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -56,6 +56,13 @@ | |||
56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
58 | 58 | ||
59 | #define MSR_LBR_SELECT 0x000001c8 | ||
60 | #define MSR_LBR_TOS 0x000001c9 | ||
61 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
62 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
63 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
64 | #define MSR_LBR_CORE_TO 0x00000060 | ||
65 | |||
59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 66 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
60 | #define MSR_IA32_DS_AREA 0x00000600 | 67 | #define MSR_IA32_DS_AREA 0x00000600 |
61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 68 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f8bddb5b0600..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) | |||
353 | return 0; | 353 | return 0; |
354 | } | 354 | } |
355 | 355 | ||
356 | /* | ||
357 | * check that branch_sample_type is compatible with | ||
358 | * settings needed for precise_ip > 1 which implies | ||
359 | * using the LBR to capture ALL taken branches at the | ||
360 | * priv levels of the measurement | ||
361 | */ | ||
362 | static inline int precise_br_compat(struct perf_event *event) | ||
363 | { | ||
364 | u64 m = event->attr.branch_sample_type; | ||
365 | u64 b = 0; | ||
366 | |||
367 | /* must capture all branches */ | ||
368 | if (!(m & PERF_SAMPLE_BRANCH_ANY)) | ||
369 | return 0; | ||
370 | |||
371 | m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; | ||
372 | |||
373 | if (!event->attr.exclude_user) | ||
374 | b |= PERF_SAMPLE_BRANCH_USER; | ||
375 | |||
376 | if (!event->attr.exclude_kernel) | ||
377 | b |= PERF_SAMPLE_BRANCH_KERNEL; | ||
378 | |||
379 | /* | ||
380 | * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 | ||
381 | */ | ||
382 | |||
383 | return m == b; | ||
384 | } | ||
385 | |||
356 | int x86_pmu_hw_config(struct perf_event *event) | 386 | int x86_pmu_hw_config(struct perf_event *event) |
357 | { | 387 | { |
358 | if (event->attr.precise_ip) { | 388 | if (event->attr.precise_ip) { |
@@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
369 | 399 | ||
370 | if (event->attr.precise_ip > precise) | 400 | if (event->attr.precise_ip > precise) |
371 | return -EOPNOTSUPP; | 401 | return -EOPNOTSUPP; |
402 | /* | ||
403 | * check that PEBS LBR correction does not conflict with | ||
404 | * whatever the user is asking with attr->branch_sample_type | ||
405 | */ | ||
406 | if (event->attr.precise_ip > 1) { | ||
407 | u64 *br_type = &event->attr.branch_sample_type; | ||
408 | |||
409 | if (has_branch_stack(event)) { | ||
410 | if (!precise_br_compat(event)) | ||
411 | return -EOPNOTSUPP; | ||
412 | |||
413 | /* branch_sample_type is compatible */ | ||
414 | |||
415 | } else { | ||
416 | /* | ||
417 | * user did not specify branch_sample_type | ||
418 | * | ||
419 | * For PEBS fixups, we capture all | ||
420 | * the branches at the priv level of the | ||
421 | * event. | ||
422 | */ | ||
423 | *br_type = PERF_SAMPLE_BRANCH_ANY; | ||
424 | |||
425 | if (!event->attr.exclude_user) | ||
426 | *br_type |= PERF_SAMPLE_BRANCH_USER; | ||
427 | |||
428 | if (!event->attr.exclude_kernel) | ||
429 | *br_type |= PERF_SAMPLE_BRANCH_KERNEL; | ||
430 | } | ||
431 | } | ||
372 | } | 432 | } |
373 | 433 | ||
374 | /* | 434 | /* |
@@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
426 | /* mark unused */ | 486 | /* mark unused */ |
427 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
428 | 488 | ||
489 | /* mark not used */ | ||
490 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
491 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
492 | |||
429 | return x86_pmu.hw_config(event); | 493 | return x86_pmu.hw_config(event); |
430 | } | 494 | } |
431 | 495 | ||
@@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { | |||
1607 | NULL, | 1671 | NULL, |
1608 | }; | 1672 | }; |
1609 | 1673 | ||
1674 | static void x86_pmu_flush_branch_stack(void) | ||
1675 | { | ||
1676 | if (x86_pmu.flush_branch_stack) | ||
1677 | x86_pmu.flush_branch_stack(); | ||
1678 | } | ||
1679 | |||
1610 | static struct pmu pmu = { | 1680 | static struct pmu pmu = { |
1611 | .pmu_enable = x86_pmu_enable, | 1681 | .pmu_enable = x86_pmu_enable, |
1612 | .pmu_disable = x86_pmu_disable, | 1682 | .pmu_disable = x86_pmu_disable, |
1613 | 1683 | ||
1614 | .attr_groups = x86_pmu_attr_groups, | 1684 | .attr_groups = x86_pmu_attr_groups, |
1615 | 1685 | ||
1616 | .event_init = x86_pmu_event_init, | 1686 | .event_init = x86_pmu_event_init, |
1617 | 1687 | ||
1618 | .add = x86_pmu_add, | 1688 | .add = x86_pmu_add, |
1619 | .del = x86_pmu_del, | 1689 | .del = x86_pmu_del, |
1620 | .start = x86_pmu_start, | 1690 | .start = x86_pmu_start, |
1621 | .stop = x86_pmu_stop, | 1691 | .stop = x86_pmu_stop, |
1622 | .read = x86_pmu_read, | 1692 | .read = x86_pmu_read, |
1623 | 1693 | ||
1624 | .start_txn = x86_pmu_start_txn, | 1694 | .start_txn = x86_pmu_start_txn, |
1625 | .cancel_txn = x86_pmu_cancel_txn, | 1695 | .cancel_txn = x86_pmu_cancel_txn, |
1626 | .commit_txn = x86_pmu_commit_txn, | 1696 | .commit_txn = x86_pmu_commit_txn, |
1627 | 1697 | ||
1628 | .event_idx = x86_pmu_event_idx, | 1698 | .event_idx = x86_pmu_event_idx, |
1699 | .flush_branch_stack = x86_pmu_flush_branch_stack, | ||
1629 | }; | 1700 | }; |
1630 | 1701 | ||
1631 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | 1702 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 66fda0c26402..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -33,6 +33,7 @@ enum extra_reg_type { | |||
33 | 33 | ||
34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
36 | 37 | ||
37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
38 | }; | 39 | }; |
@@ -130,6 +131,8 @@ struct cpu_hw_events { | |||
130 | void *lbr_context; | 131 | void *lbr_context; |
131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
134 | struct er_account *lbr_sel; | ||
135 | u64 br_sel; | ||
133 | 136 | ||
134 | /* | 137 | /* |
135 | * Intel host/guest exclude bits | 138 | * Intel host/guest exclude bits |
@@ -344,6 +347,7 @@ struct x86_pmu { | |||
344 | void (*cpu_starting)(int cpu); | 347 | void (*cpu_starting)(int cpu); |
345 | void (*cpu_dying)(int cpu); | 348 | void (*cpu_dying)(int cpu); |
346 | void (*cpu_dead)(int cpu); | 349 | void (*cpu_dead)(int cpu); |
350 | void (*flush_branch_stack)(void); | ||
347 | 351 | ||
348 | /* | 352 | /* |
349 | * Intel Arch Perfmon v2+ | 353 | * Intel Arch Perfmon v2+ |
@@ -365,6 +369,8 @@ struct x86_pmu { | |||
365 | */ | 369 | */ |
366 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 370 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
367 | int lbr_nr; /* hardware stack size */ | 371 | int lbr_nr; /* hardware stack size */ |
372 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
373 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
368 | 374 | ||
369 | /* | 375 | /* |
370 | * Extra registers for events | 376 | * Extra registers for events |
@@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint; | |||
478 | 484 | ||
479 | extern struct event_constraint unconstrained; | 485 | extern struct event_constraint unconstrained; |
480 | 486 | ||
487 | static inline bool kernel_ip(unsigned long ip) | ||
488 | { | ||
489 | #ifdef CONFIG_X86_32 | ||
490 | return ip > PAGE_OFFSET; | ||
491 | #else | ||
492 | return (long)ip < 0; | ||
493 | #endif | ||
494 | } | ||
495 | |||
481 | #ifdef CONFIG_CPU_SUP_AMD | 496 | #ifdef CONFIG_CPU_SUP_AMD |
482 | 497 | ||
483 | int amd_pmu_init(void); | 498 | int amd_pmu_init(void); |
@@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); | |||
558 | 573 | ||
559 | void intel_pmu_lbr_init_atom(void); | 574 | void intel_pmu_lbr_init_atom(void); |
560 | 575 | ||
576 | void intel_pmu_lbr_init_snb(void); | ||
577 | |||
578 | int intel_pmu_setup_lbr_filter(struct perf_event *event); | ||
579 | |||
561 | int p4_pmu_init(void); | 580 | int p4_pmu_init(void); |
562 | 581 | ||
563 | int p6_pmu_init(void); | 582 | int p6_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
139 | if (ret) | 139 | if (ret) |
140 | return ret; | 140 | return ret; |
141 | 141 | ||
142 | if (has_branch_stack(event)) | ||
143 | return -EOPNOTSUPP; | ||
144 | |||
142 | if (event->attr.exclude_host && event->attr.exclude_guest) | 145 | if (event->attr.exclude_host && event->attr.exclude_guest) |
143 | /* | 146 | /* |
144 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 147 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 4bd9c9ef9d42..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
728 | }, | 728 | }, |
729 | }; | 729 | }; |
730 | 730 | ||
731 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | ||
732 | { | ||
733 | /* user explicitly requested branch sampling */ | ||
734 | if (has_branch_stack(event)) | ||
735 | return true; | ||
736 | |||
737 | /* implicit branch sampling to correct PEBS skid */ | ||
738 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
739 | return true; | ||
740 | |||
741 | return false; | ||
742 | } | ||
743 | |||
731 | static void intel_pmu_disable_all(void) | 744 | static void intel_pmu_disable_all(void) |
732 | { | 745 | { |
733 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
882 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 895 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
883 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 896 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
884 | 897 | ||
898 | /* | ||
899 | * must disable before any actual event | ||
900 | * because any event may be combined with LBR | ||
901 | */ | ||
902 | if (intel_pmu_needs_lbr_smpl(event)) | ||
903 | intel_pmu_lbr_disable(event); | ||
904 | |||
885 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 905 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
886 | intel_pmu_disable_fixed(hwc); | 906 | intel_pmu_disable_fixed(hwc); |
887 | return; | 907 | return; |
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
936 | intel_pmu_enable_bts(hwc->config); | 956 | intel_pmu_enable_bts(hwc->config); |
937 | return; | 957 | return; |
938 | } | 958 | } |
959 | /* | ||
960 | * must enabled before any actual event | ||
961 | * because any event may be combined with LBR | ||
962 | */ | ||
963 | if (intel_pmu_needs_lbr_smpl(event)) | ||
964 | intel_pmu_lbr_enable(event); | ||
939 | 965 | ||
940 | if (event->attr.exclude_host) | 966 | if (event->attr.exclude_host) |
941 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 967 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
@@ -1058,6 +1084,9 @@ again: | |||
1058 | 1084 | ||
1059 | data.period = event->hw.last_period; | 1085 | data.period = event->hw.last_period; |
1060 | 1086 | ||
1087 | if (has_branch_stack(event)) | ||
1088 | data.br_stack = &cpuc->lbr_stack; | ||
1089 | |||
1061 | if (perf_event_overflow(event, &data, regs)) | 1090 | if (perf_event_overflow(event, &data, regs)) |
1062 | x86_pmu_stop(event, 0); | 1091 | x86_pmu_stop(event, 0); |
1063 | } | 1092 | } |
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
1124 | */ | 1153 | */ |
1125 | static struct event_constraint * | 1154 | static struct event_constraint * |
1126 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1155 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1127 | struct perf_event *event) | 1156 | struct perf_event *event, |
1157 | struct hw_perf_event_extra *reg) | ||
1128 | { | 1158 | { |
1129 | struct event_constraint *c = &emptyconstraint; | 1159 | struct event_constraint *c = &emptyconstraint; |
1130 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
1131 | struct er_account *era; | 1160 | struct er_account *era; |
1132 | unsigned long flags; | 1161 | unsigned long flags; |
1133 | int orig_idx = reg->idx; | 1162 | int orig_idx = reg->idx; |
1134 | 1163 | ||
1135 | /* already allocated shared msr */ | 1164 | /* already allocated shared msr */ |
1136 | if (reg->alloc) | 1165 | if (reg->alloc) |
1137 | return &unconstrained; | 1166 | return NULL; /* call x86_get_event_constraint() */ |
1138 | 1167 | ||
1139 | again: | 1168 | again: |
1140 | era = &cpuc->shared_regs->regs[reg->idx]; | 1169 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1157,14 +1186,10 @@ again: | |||
1157 | reg->alloc = 1; | 1186 | reg->alloc = 1; |
1158 | 1187 | ||
1159 | /* | 1188 | /* |
1160 | * All events using extra_reg are unconstrained. | 1189 | * need to call x86_get_event_constraint() |
1161 | * Avoids calling x86_get_event_constraints() | 1190 | * to check if associated event has constraints |
1162 | * | ||
1163 | * Must revisit if extra_reg controlling events | ||
1164 | * ever have constraints. Worst case we go through | ||
1165 | * the regular event constraint table. | ||
1166 | */ | 1191 | */ |
1167 | c = &unconstrained; | 1192 | c = NULL; |
1168 | } else if (intel_try_alt_er(event, orig_idx)) { | 1193 | } else if (intel_try_alt_er(event, orig_idx)) { |
1169 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1194 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1170 | goto again; | 1195 | goto again; |
@@ -1201,11 +1226,23 @@ static struct event_constraint * | |||
1201 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1226 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
1202 | struct perf_event *event) | 1227 | struct perf_event *event) |
1203 | { | 1228 | { |
1204 | struct event_constraint *c = NULL; | 1229 | struct event_constraint *c = NULL, *d; |
1205 | 1230 | struct hw_perf_event_extra *xreg, *breg; | |
1206 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1231 | |
1207 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1232 | xreg = &event->hw.extra_reg; |
1208 | 1233 | if (xreg->idx != EXTRA_REG_NONE) { | |
1234 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
1235 | if (c == &emptyconstraint) | ||
1236 | return c; | ||
1237 | } | ||
1238 | breg = &event->hw.branch_reg; | ||
1239 | if (breg->idx != EXTRA_REG_NONE) { | ||
1240 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
1241 | if (d == &emptyconstraint) { | ||
1242 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
1243 | c = d; | ||
1244 | } | ||
1245 | } | ||
1209 | return c; | 1246 | return c; |
1210 | } | 1247 | } |
1211 | 1248 | ||
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1253 | reg = &event->hw.extra_reg; | 1290 | reg = &event->hw.extra_reg; |
1254 | if (reg->idx != EXTRA_REG_NONE) | 1291 | if (reg->idx != EXTRA_REG_NONE) |
1255 | __intel_shared_reg_put_constraints(cpuc, reg); | 1292 | __intel_shared_reg_put_constraints(cpuc, reg); |
1293 | |||
1294 | reg = &event->hw.branch_reg; | ||
1295 | if (reg->idx != EXTRA_REG_NONE) | ||
1296 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
1256 | } | 1297 | } |
1257 | 1298 | ||
1258 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1299 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
@@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1295 | event->hw.config = alt_config; | 1336 | event->hw.config = alt_config; |
1296 | } | 1337 | } |
1297 | 1338 | ||
1339 | if (intel_pmu_needs_lbr_smpl(event)) { | ||
1340 | ret = intel_pmu_setup_lbr_filter(event); | ||
1341 | if (ret) | ||
1342 | return ret; | ||
1343 | } | ||
1344 | |||
1298 | if (event->attr.type != PERF_TYPE_RAW) | 1345 | if (event->attr.type != PERF_TYPE_RAW) |
1299 | return 0; | 1346 | return 0; |
1300 | 1347 | ||
@@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
1433 | { | 1480 | { |
1434 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1481 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1435 | 1482 | ||
1436 | if (!x86_pmu.extra_regs) | 1483 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
1437 | return NOTIFY_OK; | 1484 | return NOTIFY_OK; |
1438 | 1485 | ||
1439 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1486 | cpuc->shared_regs = allocate_shared_regs(cpu); |
@@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1455 | */ | 1502 | */ |
1456 | intel_pmu_lbr_reset(); | 1503 | intel_pmu_lbr_reset(); |
1457 | 1504 | ||
1458 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1505 | cpuc->lbr_sel = NULL; |
1506 | |||
1507 | if (!cpuc->shared_regs) | ||
1459 | return; | 1508 | return; |
1460 | 1509 | ||
1461 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1510 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
1462 | struct intel_shared_regs *pc; | 1511 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1512 | struct intel_shared_regs *pc; | ||
1463 | 1513 | ||
1464 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1514 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
1465 | if (pc && pc->core_id == core_id) { | 1515 | if (pc && pc->core_id == core_id) { |
1466 | cpuc->kfree_on_online = cpuc->shared_regs; | 1516 | cpuc->kfree_on_online = cpuc->shared_regs; |
1467 | cpuc->shared_regs = pc; | 1517 | cpuc->shared_regs = pc; |
1468 | break; | 1518 | break; |
1519 | } | ||
1469 | } | 1520 | } |
1521 | cpuc->shared_regs->core_id = core_id; | ||
1522 | cpuc->shared_regs->refcnt++; | ||
1470 | } | 1523 | } |
1471 | 1524 | ||
1472 | cpuc->shared_regs->core_id = core_id; | 1525 | if (x86_pmu.lbr_sel_map) |
1473 | cpuc->shared_regs->refcnt++; | 1526 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
1474 | } | 1527 | } |
1475 | 1528 | ||
1476 | static void intel_pmu_cpu_dying(int cpu) | 1529 | static void intel_pmu_cpu_dying(int cpu) |
@@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) | |||
1488 | fini_debug_store_on_cpu(cpu); | 1541 | fini_debug_store_on_cpu(cpu); |
1489 | } | 1542 | } |
1490 | 1543 | ||
1544 | static void intel_pmu_flush_branch_stack(void) | ||
1545 | { | ||
1546 | /* | ||
1547 | * Intel LBR does not tag entries with the | ||
1548 | * PID of the current task, then we need to | ||
1549 | * flush it on ctxsw | ||
1550 | * For now, we simply reset it | ||
1551 | */ | ||
1552 | if (x86_pmu.lbr_nr) | ||
1553 | intel_pmu_lbr_reset(); | ||
1554 | } | ||
1555 | |||
1491 | static __initconst const struct x86_pmu intel_pmu = { | 1556 | static __initconst const struct x86_pmu intel_pmu = { |
1492 | .name = "Intel", | 1557 | .name = "Intel", |
1493 | .handle_irq = intel_pmu_handle_irq, | 1558 | .handle_irq = intel_pmu_handle_irq, |
@@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1515 | .cpu_starting = intel_pmu_cpu_starting, | 1580 | .cpu_starting = intel_pmu_cpu_starting, |
1516 | .cpu_dying = intel_pmu_cpu_dying, | 1581 | .cpu_dying = intel_pmu_cpu_dying, |
1517 | .guest_get_msrs = intel_guest_get_msrs, | 1582 | .guest_get_msrs = intel_guest_get_msrs, |
1583 | .flush_branch_stack = intel_pmu_flush_branch_stack, | ||
1518 | }; | 1584 | }; |
1519 | 1585 | ||
1520 | static __init void intel_clovertown_quirk(void) | 1586 | static __init void intel_clovertown_quirk(void) |
@@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void) | |||
1745 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1811 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1746 | sizeof(hw_cache_event_ids)); | 1812 | sizeof(hw_cache_event_ids)); |
1747 | 1813 | ||
1748 | intel_pmu_lbr_init_nhm(); | 1814 | intel_pmu_lbr_init_snb(); |
1749 | 1815 | ||
1750 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1816 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1751 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1817 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | 4 | ||
5 | #include <asm/perf_event.h> | 5 | #include <asm/perf_event.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 440 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
440 | 441 | ||
441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 442 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
442 | |||
443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
444 | intel_pmu_lbr_enable(event); | ||
445 | } | 443 | } |
446 | 444 | ||
447 | void intel_pmu_pebs_disable(struct perf_event *event) | 445 | void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
454 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 452 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
455 | 453 | ||
456 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 454 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
457 | |||
458 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
459 | intel_pmu_lbr_disable(event); | ||
460 | } | 455 | } |
461 | 456 | ||
462 | void intel_pmu_pebs_enable_all(void) | 457 | void intel_pmu_pebs_enable_all(void) |
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) | |||
475 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 470 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
476 | } | 471 | } |
477 | 472 | ||
478 | #include <asm/insn.h> | ||
479 | |||
480 | static inline bool kernel_ip(unsigned long ip) | ||
481 | { | ||
482 | #ifdef CONFIG_X86_32 | ||
483 | return ip > PAGE_OFFSET; | ||
484 | #else | ||
485 | return (long)ip < 0; | ||
486 | #endif | ||
487 | } | ||
488 | |||
489 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | 473 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) |
490 | { | 474 | { |
491 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
572 | * both formats and we don't use the other fields in this | 556 | * both formats and we don't use the other fields in this |
573 | * routine. | 557 | * routine. |
574 | */ | 558 | */ |
559 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
575 | struct pebs_record_core *pebs = __pebs; | 560 | struct pebs_record_core *pebs = __pebs; |
576 | struct perf_sample_data data; | 561 | struct perf_sample_data data; |
577 | struct pt_regs regs; | 562 | struct pt_regs regs; |
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
602 | else | 587 | else |
603 | regs.flags &= ~PERF_EFLAGS_EXACT; | 588 | regs.flags &= ~PERF_EFLAGS_EXACT; |
604 | 589 | ||
590 | if (has_branch_stack(event)) | ||
591 | data.br_stack = &cpuc->lbr_stack; | ||
592 | |||
605 | if (perf_event_overflow(event, &data, ®s)) | 593 | if (perf_event_overflow(event, &data, ®s)) |
606 | x86_pmu_stop(event, 0); | 594 | x86_pmu_stop(event, 0); |
607 | } | 595 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <asm/perf_event.h> | 4 | #include <asm/perf_event.h> |
5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -14,6 +15,100 @@ enum { | |||
14 | }; | 15 | }; |
15 | 16 | ||
16 | /* | 17 | /* |
18 | * Intel LBR_SELECT bits | ||
19 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
20 | * | ||
21 | * Hardware branch filter (not available on all CPUs) | ||
22 | */ | ||
23 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
24 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
25 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
26 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
27 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
28 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
29 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
30 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
31 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
32 | |||
33 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
34 | #define LBR_USER (1 << LBR_USER_BIT) | ||
35 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
36 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
37 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
38 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
39 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
40 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
41 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
42 | |||
43 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
44 | |||
45 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
46 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
47 | #define LBR_IGN 0 /* ignored */ | ||
48 | |||
49 | #define LBR_ANY \ | ||
50 | (LBR_JCC |\ | ||
51 | LBR_REL_CALL |\ | ||
52 | LBR_IND_CALL |\ | ||
53 | LBR_RETURN |\ | ||
54 | LBR_REL_JMP |\ | ||
55 | LBR_IND_JMP |\ | ||
56 | LBR_FAR) | ||
57 | |||
58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
59 | |||
60 | #define for_each_branch_sample_type(x) \ | ||
61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
62 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
63 | |||
64 | /* | ||
65 | * x86control flow change classification | ||
66 | * x86control flow changes include branches, interrupts, traps, faults | ||
67 | */ | ||
68 | enum { | ||
69 | X86_BR_NONE = 0, /* unknown */ | ||
70 | |||
71 | X86_BR_USER = 1 << 0, /* branch target is user */ | ||
72 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ | ||
73 | |||
74 | X86_BR_CALL = 1 << 2, /* call */ | ||
75 | X86_BR_RET = 1 << 3, /* return */ | ||
76 | X86_BR_SYSCALL = 1 << 4, /* syscall */ | ||
77 | X86_BR_SYSRET = 1 << 5, /* syscall return */ | ||
78 | X86_BR_INT = 1 << 6, /* sw interrupt */ | ||
79 | X86_BR_IRET = 1 << 7, /* return from interrupt */ | ||
80 | X86_BR_JCC = 1 << 8, /* conditional */ | ||
81 | X86_BR_JMP = 1 << 9, /* jump */ | ||
82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | ||
83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | ||
84 | }; | ||
85 | |||
86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | ||
87 | |||
88 | #define X86_BR_ANY \ | ||
89 | (X86_BR_CALL |\ | ||
90 | X86_BR_RET |\ | ||
91 | X86_BR_SYSCALL |\ | ||
92 | X86_BR_SYSRET |\ | ||
93 | X86_BR_INT |\ | ||
94 | X86_BR_IRET |\ | ||
95 | X86_BR_JCC |\ | ||
96 | X86_BR_JMP |\ | ||
97 | X86_BR_IRQ |\ | ||
98 | X86_BR_IND_CALL) | ||
99 | |||
100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | ||
101 | |||
102 | #define X86_BR_ANY_CALL \ | ||
103 | (X86_BR_CALL |\ | ||
104 | X86_BR_IND_CALL |\ | ||
105 | X86_BR_SYSCALL |\ | ||
106 | X86_BR_IRQ |\ | ||
107 | X86_BR_INT) | ||
108 | |||
109 | static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); | ||
110 | |||
111 | /* | ||
17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 112 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
18 | * otherwise it becomes near impossible to get a reliable stack. | 113 | * otherwise it becomes near impossible to get a reliable stack. |
19 | */ | 114 | */ |
@@ -21,6 +116,10 @@ enum { | |||
21 | static void __intel_pmu_lbr_enable(void) | 116 | static void __intel_pmu_lbr_enable(void) |
22 | { | 117 | { |
23 | u64 debugctl; | 118 | u64 debugctl; |
119 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
120 | |||
121 | if (cpuc->lbr_sel) | ||
122 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | ||
24 | 123 | ||
25 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | 124 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
26 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | 125 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
76 | * Reset the LBR stack if we changed task context to | 175 | * Reset the LBR stack if we changed task context to |
77 | * avoid data leaks. | 176 | * avoid data leaks. |
78 | */ | 177 | */ |
79 | |||
80 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | 178 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
81 | intel_pmu_lbr_reset(); | 179 | intel_pmu_lbr_reset(); |
82 | cpuc->lbr_context = event->ctx; | 180 | cpuc->lbr_context = event->ctx; |
83 | } | 181 | } |
182 | cpuc->br_sel = event->hw.branch_reg.reg; | ||
84 | 183 | ||
85 | cpuc->lbr_users++; | 184 | cpuc->lbr_users++; |
86 | } | 185 | } |
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) | |||
95 | cpuc->lbr_users--; | 194 | cpuc->lbr_users--; |
96 | WARN_ON_ONCE(cpuc->lbr_users < 0); | 195 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
97 | 196 | ||
98 | if (cpuc->enabled && !cpuc->lbr_users) | 197 | if (cpuc->enabled && !cpuc->lbr_users) { |
99 | __intel_pmu_lbr_disable(); | 198 | __intel_pmu_lbr_disable(); |
199 | /* avoid stale pointer */ | ||
200 | cpuc->lbr_context = NULL; | ||
201 | } | ||
100 | } | 202 | } |
101 | 203 | ||
102 | void intel_pmu_lbr_enable_all(void) | 204 | void intel_pmu_lbr_enable_all(void) |
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) | |||
115 | __intel_pmu_lbr_disable(); | 217 | __intel_pmu_lbr_disable(); |
116 | } | 218 | } |
117 | 219 | ||
220 | /* | ||
221 | * TOS = most recently recorded branch | ||
222 | */ | ||
118 | static inline u64 intel_pmu_lbr_tos(void) | 223 | static inline u64 intel_pmu_lbr_tos(void) |
119 | { | 224 | { |
120 | u64 tos; | 225 | u64 tos; |
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
142 | 247 | ||
143 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | 248 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); |
144 | 249 | ||
145 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | 250 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
146 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | 251 | cpuc->lbr_entries[i].to = msr_lastbranch.to; |
147 | cpuc->lbr_entries[i].flags = 0; | 252 | cpuc->lbr_entries[i].mispred = 0; |
253 | cpuc->lbr_entries[i].predicted = 0; | ||
254 | cpuc->lbr_entries[i].reserved = 0; | ||
148 | } | 255 | } |
149 | cpuc->lbr_stack.nr = i; | 256 | cpuc->lbr_stack.nr = i; |
150 | } | 257 | } |
151 | 258 | ||
152 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
153 | |||
154 | /* | 259 | /* |
155 | * Due to lack of segmentation in Linux the effective address (offset) | 260 | * Due to lack of segmentation in Linux the effective address (offset) |
156 | * is the same as the linear address, allowing us to merge the LIP and EIP | 261 | * is the same as the linear address, allowing us to merge the LIP and EIP |
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
165 | 270 | ||
166 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
167 | unsigned long lbr_idx = (tos - i) & mask; | 272 | unsigned long lbr_idx = (tos - i) & mask; |
168 | u64 from, to, flags = 0; | 273 | u64 from, to, mis = 0, pred = 0; |
169 | 274 | ||
170 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
171 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
172 | 277 | ||
173 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
174 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
280 | pred = !mis; | ||
175 | from = (u64)((((s64)from) << 1) >> 1); | 281 | from = (u64)((((s64)from) << 1) >> 1); |
176 | } | 282 | } |
177 | 283 | ||
178 | cpuc->lbr_entries[i].from = from; | 284 | cpuc->lbr_entries[i].from = from; |
179 | cpuc->lbr_entries[i].to = to; | 285 | cpuc->lbr_entries[i].to = to; |
180 | cpuc->lbr_entries[i].flags = flags; | 286 | cpuc->lbr_entries[i].mispred = mis; |
287 | cpuc->lbr_entries[i].predicted = pred; | ||
288 | cpuc->lbr_entries[i].reserved = 0; | ||
181 | } | 289 | } |
182 | cpuc->lbr_stack.nr = i; | 290 | cpuc->lbr_stack.nr = i; |
183 | } | 291 | } |
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) | |||
193 | intel_pmu_lbr_read_32(cpuc); | 301 | intel_pmu_lbr_read_32(cpuc); |
194 | else | 302 | else |
195 | intel_pmu_lbr_read_64(cpuc); | 303 | intel_pmu_lbr_read_64(cpuc); |
304 | |||
305 | intel_pmu_lbr_filter(cpuc); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * SW filter is used: | ||
310 | * - in case there is no HW filter | ||
311 | * - in case the HW filter has errata or limitations | ||
312 | */ | ||
313 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | ||
314 | { | ||
315 | u64 br_type = event->attr.branch_sample_type; | ||
316 | int mask = 0; | ||
317 | |||
318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | ||
319 | mask |= X86_BR_USER; | ||
320 | |||
321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) | ||
322 | mask |= X86_BR_KERNEL; | ||
323 | |||
324 | /* we ignore BRANCH_HV here */ | ||
325 | |||
326 | if (br_type & PERF_SAMPLE_BRANCH_ANY) | ||
327 | mask |= X86_BR_ANY; | ||
328 | |||
329 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) | ||
330 | mask |= X86_BR_ANY_CALL; | ||
331 | |||
332 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) | ||
333 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; | ||
334 | |||
335 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | ||
336 | mask |= X86_BR_IND_CALL; | ||
337 | /* | ||
338 | * stash actual user request into reg, it may | ||
339 | * be used by fixup code for some CPU | ||
340 | */ | ||
341 | event->hw.branch_reg.reg = mask; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * setup the HW LBR filter | ||
346 | * Used only when available, may not be enough to disambiguate | ||
347 | * all branches, may need the help of the SW filter | ||
348 | */ | ||
349 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | ||
350 | { | ||
351 | struct hw_perf_event_extra *reg; | ||
352 | u64 br_type = event->attr.branch_sample_type; | ||
353 | u64 mask = 0, m; | ||
354 | u64 v; | ||
355 | |||
356 | for_each_branch_sample_type(m) { | ||
357 | if (!(br_type & m)) | ||
358 | continue; | ||
359 | |||
360 | v = x86_pmu.lbr_sel_map[m]; | ||
361 | if (v == LBR_NOT_SUPP) | ||
362 | return -EOPNOTSUPP; | ||
363 | |||
364 | if (v != LBR_IGN) | ||
365 | mask |= v; | ||
366 | } | ||
367 | reg = &event->hw.branch_reg; | ||
368 | reg->idx = EXTRA_REG_LBR; | ||
369 | |||
370 | /* LBR_SELECT operates in suppress mode so invert mask */ | ||
371 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | ||
377 | { | ||
378 | int ret = 0; | ||
379 | |||
380 | /* | ||
381 | * no LBR on this PMU | ||
382 | */ | ||
383 | if (!x86_pmu.lbr_nr) | ||
384 | return -EOPNOTSUPP; | ||
385 | |||
386 | /* | ||
387 | * setup SW LBR filter | ||
388 | */ | ||
389 | intel_pmu_setup_sw_lbr_filter(event); | ||
390 | |||
391 | /* | ||
392 | * setup HW LBR filter, if any | ||
393 | */ | ||
394 | if (x86_pmu.lbr_sel_map) | ||
395 | ret = intel_pmu_setup_hw_lbr_filter(event); | ||
396 | |||
397 | return ret; | ||
196 | } | 398 | } |
197 | 399 | ||
400 | /* | ||
401 | * return the type of control flow change at address "from" | ||
402 | * intruction is not necessarily a branch (in case of interrupt). | ||
403 | * | ||
404 | * The branch type returned also includes the priv level of the | ||
405 | * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). | ||
406 | * | ||
407 | * If a branch type is unknown OR the instruction cannot be | ||
408 | * decoded (e.g., text page not present), then X86_BR_NONE is | ||
409 | * returned. | ||
410 | */ | ||
411 | static int branch_type(unsigned long from, unsigned long to) | ||
412 | { | ||
413 | struct insn insn; | ||
414 | void *addr; | ||
415 | int bytes, size = MAX_INSN_SIZE; | ||
416 | int ret = X86_BR_NONE; | ||
417 | int ext, to_plm, from_plm; | ||
418 | u8 buf[MAX_INSN_SIZE]; | ||
419 | int is64 = 0; | ||
420 | |||
421 | to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; | ||
422 | from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; | ||
423 | |||
424 | /* | ||
425 | * maybe zero if lbr did not fill up after a reset by the time | ||
426 | * we get a PMU interrupt | ||
427 | */ | ||
428 | if (from == 0 || to == 0) | ||
429 | return X86_BR_NONE; | ||
430 | |||
431 | if (from_plm == X86_BR_USER) { | ||
432 | /* | ||
433 | * can happen if measuring at the user level only | ||
434 | * and we interrupt in a kernel thread, e.g., idle. | ||
435 | */ | ||
436 | if (!current->mm) | ||
437 | return X86_BR_NONE; | ||
438 | |||
439 | /* may fail if text not present */ | ||
440 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | ||
441 | if (bytes != size) | ||
442 | return X86_BR_NONE; | ||
443 | |||
444 | addr = buf; | ||
445 | } else | ||
446 | addr = (void *)from; | ||
447 | |||
448 | /* | ||
449 | * decoder needs to know the ABI especially | ||
450 | * on 64-bit systems running 32-bit apps | ||
451 | */ | ||
452 | #ifdef CONFIG_X86_64 | ||
453 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | ||
454 | #endif | ||
455 | insn_init(&insn, addr, is64); | ||
456 | insn_get_opcode(&insn); | ||
457 | |||
458 | switch (insn.opcode.bytes[0]) { | ||
459 | case 0xf: | ||
460 | switch (insn.opcode.bytes[1]) { | ||
461 | case 0x05: /* syscall */ | ||
462 | case 0x34: /* sysenter */ | ||
463 | ret = X86_BR_SYSCALL; | ||
464 | break; | ||
465 | case 0x07: /* sysret */ | ||
466 | case 0x35: /* sysexit */ | ||
467 | ret = X86_BR_SYSRET; | ||
468 | break; | ||
469 | case 0x80 ... 0x8f: /* conditional */ | ||
470 | ret = X86_BR_JCC; | ||
471 | break; | ||
472 | default: | ||
473 | ret = X86_BR_NONE; | ||
474 | } | ||
475 | break; | ||
476 | case 0x70 ... 0x7f: /* conditional */ | ||
477 | ret = X86_BR_JCC; | ||
478 | break; | ||
479 | case 0xc2: /* near ret */ | ||
480 | case 0xc3: /* near ret */ | ||
481 | case 0xca: /* far ret */ | ||
482 | case 0xcb: /* far ret */ | ||
483 | ret = X86_BR_RET; | ||
484 | break; | ||
485 | case 0xcf: /* iret */ | ||
486 | ret = X86_BR_IRET; | ||
487 | break; | ||
488 | case 0xcc ... 0xce: /* int */ | ||
489 | ret = X86_BR_INT; | ||
490 | break; | ||
491 | case 0xe8: /* call near rel */ | ||
492 | case 0x9a: /* call far absolute */ | ||
493 | ret = X86_BR_CALL; | ||
494 | break; | ||
495 | case 0xe0 ... 0xe3: /* loop jmp */ | ||
496 | ret = X86_BR_JCC; | ||
497 | break; | ||
498 | case 0xe9 ... 0xeb: /* jmp */ | ||
499 | ret = X86_BR_JMP; | ||
500 | break; | ||
501 | case 0xff: /* call near absolute, call far absolute ind */ | ||
502 | insn_get_modrm(&insn); | ||
503 | ext = (insn.modrm.bytes[0] >> 3) & 0x7; | ||
504 | switch (ext) { | ||
505 | case 2: /* near ind call */ | ||
506 | case 3: /* far ind call */ | ||
507 | ret = X86_BR_IND_CALL; | ||
508 | break; | ||
509 | case 4: | ||
510 | case 5: | ||
511 | ret = X86_BR_JMP; | ||
512 | break; | ||
513 | } | ||
514 | break; | ||
515 | default: | ||
516 | ret = X86_BR_NONE; | ||
517 | } | ||
518 | /* | ||
519 | * interrupts, traps, faults (and thus ring transition) may | ||
520 | * occur on any instructions. Thus, to classify them correctly, | ||
521 | * we need to first look at the from and to priv levels. If they | ||
522 | * are different and to is in the kernel, then it indicates | ||
523 | * a ring transition. If the from instruction is not a ring | ||
524 | * transition instr (syscall, systenter, int), then it means | ||
525 | * it was a irq, trap or fault. | ||
526 | * | ||
527 | * we have no way of detecting kernel to kernel faults. | ||
528 | */ | ||
529 | if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL | ||
530 | && ret != X86_BR_SYSCALL && ret != X86_BR_INT) | ||
531 | ret = X86_BR_IRQ; | ||
532 | |||
533 | /* | ||
534 | * branch priv level determined by target as | ||
535 | * is done by HW when LBR_SELECT is implemented | ||
536 | */ | ||
537 | if (ret != X86_BR_NONE) | ||
538 | ret |= to_plm; | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * implement actual branch filter based on user demand. | ||
545 | * Hardware may not exactly satisfy that request, thus | ||
546 | * we need to inspect opcodes. Mismatched branches are | ||
547 | * discarded. Therefore, the number of branches returned | ||
548 | * in PERF_SAMPLE_BRANCH_STACK sample may vary. | ||
549 | */ | ||
550 | static void | ||
551 | intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | ||
552 | { | ||
553 | u64 from, to; | ||
554 | int br_sel = cpuc->br_sel; | ||
555 | int i, j, type; | ||
556 | bool compress = false; | ||
557 | |||
558 | /* if sampling all branches, then nothing to filter */ | ||
559 | if ((br_sel & X86_BR_ALL) == X86_BR_ALL) | ||
560 | return; | ||
561 | |||
562 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { | ||
563 | |||
564 | from = cpuc->lbr_entries[i].from; | ||
565 | to = cpuc->lbr_entries[i].to; | ||
566 | |||
567 | type = branch_type(from, to); | ||
568 | |||
569 | /* if type does not correspond, then discard */ | ||
570 | if (type == X86_BR_NONE || (br_sel & type) != type) { | ||
571 | cpuc->lbr_entries[i].from = 0; | ||
572 | compress = true; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | if (!compress) | ||
577 | return; | ||
578 | |||
579 | /* remove all entries with from=0 */ | ||
580 | for (i = 0; i < cpuc->lbr_stack.nr; ) { | ||
581 | if (!cpuc->lbr_entries[i].from) { | ||
582 | j = i; | ||
583 | while (++j < cpuc->lbr_stack.nr) | ||
584 | cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; | ||
585 | cpuc->lbr_stack.nr--; | ||
586 | if (!cpuc->lbr_entries[i].from) | ||
587 | continue; | ||
588 | } | ||
589 | i++; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * Map interface branch filters onto LBR filters | ||
595 | */ | ||
596 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
597 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
598 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
599 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
600 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
601 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
602 | | LBR_IND_JMP | LBR_FAR, | ||
603 | /* | ||
604 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
605 | */ | ||
606 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
607 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
608 | /* | ||
609 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
610 | */ | ||
611 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
612 | }; | ||
613 | |||
614 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
615 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
616 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
617 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
618 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
619 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
620 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
621 | | LBR_FAR, | ||
622 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
623 | }; | ||
624 | |||
625 | /* core */ | ||
198 | void intel_pmu_lbr_init_core(void) | 626 | void intel_pmu_lbr_init_core(void) |
199 | { | 627 | { |
200 | x86_pmu.lbr_nr = 4; | 628 | x86_pmu.lbr_nr = 4; |
201 | x86_pmu.lbr_tos = 0x01c9; | 629 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
202 | x86_pmu.lbr_from = 0x40; | 630 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
203 | x86_pmu.lbr_to = 0x60; | 631 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
632 | |||
633 | /* | ||
634 | * SW branch filter usage: | ||
635 | * - compensate for lack of HW filter | ||
636 | */ | ||
637 | pr_cont("4-deep LBR, "); | ||
204 | } | 638 | } |
205 | 639 | ||
640 | /* nehalem/westmere */ | ||
206 | void intel_pmu_lbr_init_nhm(void) | 641 | void intel_pmu_lbr_init_nhm(void) |
207 | { | 642 | { |
208 | x86_pmu.lbr_nr = 16; | 643 | x86_pmu.lbr_nr = 16; |
209 | x86_pmu.lbr_tos = 0x01c9; | 644 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
210 | x86_pmu.lbr_from = 0x680; | 645 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
211 | x86_pmu.lbr_to = 0x6c0; | 646 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
647 | |||
648 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
649 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
650 | |||
651 | /* | ||
652 | * SW branch filter usage: | ||
653 | * - workaround LBR_SEL errata (see above) | ||
654 | * - support syscall, sysret capture. | ||
655 | * That requires LBR_FAR but that means far | ||
656 | * jmp need to be filtered out | ||
657 | */ | ||
658 | pr_cont("16-deep LBR, "); | ||
659 | } | ||
660 | |||
661 | /* sandy bridge */ | ||
662 | void intel_pmu_lbr_init_snb(void) | ||
663 | { | ||
664 | x86_pmu.lbr_nr = 16; | ||
665 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
666 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
667 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
668 | |||
669 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
670 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
671 | |||
672 | /* | ||
673 | * SW branch filter usage: | ||
674 | * - support syscall, sysret capture. | ||
675 | * That requires LBR_FAR but that means far | ||
676 | * jmp need to be filtered out | ||
677 | */ | ||
678 | pr_cont("16-deep LBR, "); | ||
212 | } | 679 | } |
213 | 680 | ||
681 | /* atom */ | ||
214 | void intel_pmu_lbr_init_atom(void) | 682 | void intel_pmu_lbr_init_atom(void) |
215 | { | 683 | { |
684 | /* | ||
685 | * only models starting at stepping 10 seems | ||
686 | * to have an operational LBR which can freeze | ||
687 | * on PMU interrupt | ||
688 | */ | ||
689 | if (boot_cpu_data.x86_mask < 10) { | ||
690 | pr_cont("LBR disabled due to erratum"); | ||
691 | return; | ||
692 | } | ||
693 | |||
216 | x86_pmu.lbr_nr = 8; | 694 | x86_pmu.lbr_nr = 8; |
217 | x86_pmu.lbr_tos = 0x01c9; | 695 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
218 | x86_pmu.lbr_from = 0x40; | 696 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
219 | x86_pmu.lbr_to = 0x60; | 697 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
698 | |||
699 | /* | ||
700 | * SW branch filter usage: | ||
701 | * - compensate for lack of HW filter | ||
702 | */ | ||
703 | pr_cont("8-deep LBR, "); | ||
220 | } | 704 | } |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 64426b71381f..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -129,11 +129,40 @@ enum perf_event_sample_format { | |||
129 | PERF_SAMPLE_PERIOD = 1U << 8, | 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
130 | PERF_SAMPLE_STREAM_ID = 1U << 9, | 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
131 | PERF_SAMPLE_RAW = 1U << 10, | 131 | PERF_SAMPLE_RAW = 1U << 10, |
132 | PERF_SAMPLE_BRANCH_STACK = 1U << 11, | ||
132 | 133 | ||
133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ | 134 | PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ |
134 | }; | 135 | }; |
135 | 136 | ||
136 | /* | 137 | /* |
138 | * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set | ||
139 | * | ||
140 | * If the user does not pass priv level information via branch_sample_type, | ||
141 | * the kernel uses the event's priv level. Branch and event priv levels do | ||
142 | * not have to match. Branch priv level is checked for permissions. | ||
143 | * | ||
144 | * The branch types can be combined, however BRANCH_ANY covers all types | ||
145 | * of branches and therefore it supersedes all the other types. | ||
146 | */ | ||
147 | enum perf_branch_sample_type { | ||
148 | PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ | ||
149 | PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ | ||
150 | PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ | ||
151 | |||
152 | PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ | ||
153 | PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ | ||
154 | PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ | ||
155 | PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ | ||
156 | |||
157 | PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ | ||
158 | }; | ||
159 | |||
160 | #define PERF_SAMPLE_BRANCH_PLM_ALL \ | ||
161 | (PERF_SAMPLE_BRANCH_USER|\ | ||
162 | PERF_SAMPLE_BRANCH_KERNEL|\ | ||
163 | PERF_SAMPLE_BRANCH_HV) | ||
164 | |||
165 | /* | ||
137 | * The format of the data returned by read() on a perf event fd, | 166 | * The format of the data returned by read() on a perf event fd, |
138 | * as specified by attr.read_format: | 167 | * as specified by attr.read_format: |
139 | * | 168 | * |
@@ -163,6 +192,8 @@ enum perf_event_read_format { | |||
163 | }; | 192 | }; |
164 | 193 | ||
165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ | 194 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
195 | #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ | ||
196 | #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ | ||
166 | 197 | ||
167 | /* | 198 | /* |
168 | * Hardware event_id to monitor via a performance monitoring event: | 199 | * Hardware event_id to monitor via a performance monitoring event: |
@@ -240,6 +271,7 @@ struct perf_event_attr { | |||
240 | __u64 bp_len; | 271 | __u64 bp_len; |
241 | __u64 config2; /* extension of config1 */ | 272 | __u64 config2; /* extension of config1 */ |
242 | }; | 273 | }; |
274 | __u64 branch_sample_type; /* enum branch_sample_type */ | ||
243 | }; | 275 | }; |
244 | 276 | ||
245 | /* | 277 | /* |
@@ -458,6 +490,8 @@ enum perf_event_type { | |||
458 | * | 490 | * |
459 | * { u32 size; | 491 | * { u32 size; |
460 | * char data[size];}&& PERF_SAMPLE_RAW | 492 | * char data[size];}&& PERF_SAMPLE_RAW |
493 | * | ||
494 | * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK | ||
461 | * }; | 495 | * }; |
462 | */ | 496 | */ |
463 | PERF_RECORD_SAMPLE = 9, | 497 | PERF_RECORD_SAMPLE = 9, |
@@ -530,12 +564,34 @@ struct perf_raw_record { | |||
530 | void *data; | 564 | void *data; |
531 | }; | 565 | }; |
532 | 566 | ||
567 | /* | ||
568 | * single taken branch record layout: | ||
569 | * | ||
570 | * from: source instruction (may not always be a branch insn) | ||
571 | * to: branch target | ||
572 | * mispred: branch target was mispredicted | ||
573 | * predicted: branch target was predicted | ||
574 | * | ||
575 | * support for mispred, predicted is optional. In case it | ||
576 | * is not supported mispred = predicted = 0. | ||
577 | */ | ||
533 | struct perf_branch_entry { | 578 | struct perf_branch_entry { |
534 | __u64 from; | 579 | __u64 from; |
535 | __u64 to; | 580 | __u64 to; |
536 | __u64 flags; | 581 | __u64 mispred:1, /* target mispredicted */ |
582 | predicted:1,/* target predicted */ | ||
583 | reserved:62; | ||
537 | }; | 584 | }; |
538 | 585 | ||
586 | /* | ||
587 | * branch stack layout: | ||
588 | * nr: number of taken branches stored in entries[] | ||
589 | * | ||
590 | * Note that nr can vary from sample to sample | ||
591 | * branches (to, from) are stored from most recent | ||
592 | * to least recent, i.e., entries[0] contains the most | ||
593 | * recent branch. | ||
594 | */ | ||
539 | struct perf_branch_stack { | 595 | struct perf_branch_stack { |
540 | __u64 nr; | 596 | __u64 nr; |
541 | struct perf_branch_entry entries[0]; | 597 | struct perf_branch_entry entries[0]; |
@@ -566,7 +622,9 @@ struct hw_perf_event { | |||
566 | unsigned long event_base; | 622 | unsigned long event_base; |
567 | int idx; | 623 | int idx; |
568 | int last_cpu; | 624 | int last_cpu; |
625 | |||
569 | struct hw_perf_event_extra extra_reg; | 626 | struct hw_perf_event_extra extra_reg; |
627 | struct hw_perf_event_extra branch_reg; | ||
570 | }; | 628 | }; |
571 | struct { /* software */ | 629 | struct { /* software */ |
572 | struct hrtimer hrtimer; | 630 | struct hrtimer hrtimer; |
@@ -690,6 +748,11 @@ struct pmu { | |||
690 | * if no implementation is provided it will default to: event->hw.idx + 1. | 748 | * if no implementation is provided it will default to: event->hw.idx + 1. |
691 | */ | 749 | */ |
692 | int (*event_idx) (struct perf_event *event); /*optional */ | 750 | int (*event_idx) (struct perf_event *event); /*optional */ |
751 | |||
752 | /* | ||
753 | * flush branch stack on context-switches (needed in cpu-wide mode) | ||
754 | */ | ||
755 | void (*flush_branch_stack) (void); | ||
693 | }; | 756 | }; |
694 | 757 | ||
695 | /** | 758 | /** |
@@ -923,7 +986,8 @@ struct perf_event_context { | |||
923 | u64 parent_gen; | 986 | u64 parent_gen; |
924 | u64 generation; | 987 | u64 generation; |
925 | int pin_count; | 988 | int pin_count; |
926 | int nr_cgroups; /* cgroup events present */ | 989 | int nr_cgroups; /* cgroup evts */ |
990 | int nr_branch_stack; /* branch_stack evt */ | ||
927 | struct rcu_head rcu_head; | 991 | struct rcu_head rcu_head; |
928 | }; | 992 | }; |
929 | 993 | ||
@@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, | |||
988 | extern u64 perf_event_read_value(struct perf_event *event, | 1052 | extern u64 perf_event_read_value(struct perf_event *event, |
989 | u64 *enabled, u64 *running); | 1053 | u64 *enabled, u64 *running); |
990 | 1054 | ||
1055 | |||
991 | struct perf_sample_data { | 1056 | struct perf_sample_data { |
992 | u64 type; | 1057 | u64 type; |
993 | 1058 | ||
@@ -1007,12 +1072,14 @@ struct perf_sample_data { | |||
1007 | u64 period; | 1072 | u64 period; |
1008 | struct perf_callchain_entry *callchain; | 1073 | struct perf_callchain_entry *callchain; |
1009 | struct perf_raw_record *raw; | 1074 | struct perf_raw_record *raw; |
1075 | struct perf_branch_stack *br_stack; | ||
1010 | }; | 1076 | }; |
1011 | 1077 | ||
1012 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) | 1078 | static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
1013 | { | 1079 | { |
1014 | data->addr = addr; | 1080 | data->addr = addr; |
1015 | data->raw = NULL; | 1081 | data->raw = NULL; |
1082 | data->br_stack = NULL; | ||
1016 | } | 1083 | } |
1017 | 1084 | ||
1018 | extern void perf_output_sample(struct perf_output_handle *handle, | 1085 | extern void perf_output_sample(struct perf_output_handle *handle, |
@@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); | |||
1151 | # define perf_instruction_pointer(regs) instruction_pointer(regs) | 1218 | # define perf_instruction_pointer(regs) instruction_pointer(regs) |
1152 | #endif | 1219 | #endif |
1153 | 1220 | ||
1221 | static inline bool has_branch_stack(struct perf_event *event) | ||
1222 | { | ||
1223 | return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; | ||
1224 | } | ||
1225 | |||
1154 | extern int perf_output_begin(struct perf_output_handle *handle, | 1226 | extern int perf_output_begin(struct perf_output_handle *handle, |
1155 | struct perf_event *event, unsigned int size); | 1227 | struct perf_event *event, unsigned int size); |
1156 | extern void perf_output_end(struct perf_output_handle *handle); | 1228 | extern void perf_output_end(struct perf_output_handle *handle); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index e8b32ac75ce3..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | |||
118 | PERF_FLAG_FD_OUTPUT |\ | 118 | PERF_FLAG_FD_OUTPUT |\ |
119 | PERF_FLAG_PID_CGROUP) | 119 | PERF_FLAG_PID_CGROUP) |
120 | 120 | ||
121 | /* | ||
122 | * branch priv levels that need permission checks | ||
123 | */ | ||
124 | #define PERF_SAMPLE_BRANCH_PERM_PLM \ | ||
125 | (PERF_SAMPLE_BRANCH_KERNEL |\ | ||
126 | PERF_SAMPLE_BRANCH_HV) | ||
127 | |||
121 | enum event_type_t { | 128 | enum event_type_t { |
122 | EVENT_FLEXIBLE = 0x1, | 129 | EVENT_FLEXIBLE = 0x1, |
123 | EVENT_PINNED = 0x2, | 130 | EVENT_PINNED = 0x2, |
@@ -130,6 +137,7 @@ enum event_type_t { | |||
130 | */ | 137 | */ |
131 | struct static_key_deferred perf_sched_events __read_mostly; | 138 | struct static_key_deferred perf_sched_events __read_mostly; |
132 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); | 139 | static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); |
140 | static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); | ||
133 | 141 | ||
134 | static atomic_t nr_mmap_events __read_mostly; | 142 | static atomic_t nr_mmap_events __read_mostly; |
135 | static atomic_t nr_comm_events __read_mostly; | 143 | static atomic_t nr_comm_events __read_mostly; |
@@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
881 | if (is_cgroup_event(event)) | 889 | if (is_cgroup_event(event)) |
882 | ctx->nr_cgroups++; | 890 | ctx->nr_cgroups++; |
883 | 891 | ||
892 | if (has_branch_stack(event)) | ||
893 | ctx->nr_branch_stack++; | ||
894 | |||
884 | list_add_rcu(&event->event_entry, &ctx->event_list); | 895 | list_add_rcu(&event->event_entry, &ctx->event_list); |
885 | if (!ctx->nr_events) | 896 | if (!ctx->nr_events) |
886 | perf_pmu_rotate_start(ctx->pmu); | 897 | perf_pmu_rotate_start(ctx->pmu); |
@@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
1020 | cpuctx->cgrp = NULL; | 1031 | cpuctx->cgrp = NULL; |
1021 | } | 1032 | } |
1022 | 1033 | ||
1034 | if (has_branch_stack(event)) | ||
1035 | ctx->nr_branch_stack--; | ||
1036 | |||
1023 | ctx->nr_events--; | 1037 | ctx->nr_events--; |
1024 | if (event->attr.inherit_stat) | 1038 | if (event->attr.inherit_stat) |
1025 | ctx->nr_stat--; | 1039 | ctx->nr_stat--; |
@@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
2195 | } | 2209 | } |
2196 | 2210 | ||
2197 | /* | 2211 | /* |
2212 | * When sampling the branck stack in system-wide, it may be necessary | ||
2213 | * to flush the stack on context switch. This happens when the branch | ||
2214 | * stack does not tag its entries with the pid of the current task. | ||
2215 | * Otherwise it becomes impossible to associate a branch entry with a | ||
2216 | * task. This ambiguity is more likely to appear when the branch stack | ||
2217 | * supports priv level filtering and the user sets it to monitor only | ||
2218 | * at the user level (which could be a useful measurement in system-wide | ||
2219 | * mode). In that case, the risk is high of having a branch stack with | ||
2220 | * branch from multiple tasks. Flushing may mean dropping the existing | ||
2221 | * entries or stashing them somewhere in the PMU specific code layer. | ||
2222 | * | ||
2223 | * This function provides the context switch callback to the lower code | ||
2224 | * layer. It is invoked ONLY when there is at least one system-wide context | ||
2225 | * with at least one active event using taken branch sampling. | ||
2226 | */ | ||
2227 | static void perf_branch_stack_sched_in(struct task_struct *prev, | ||
2228 | struct task_struct *task) | ||
2229 | { | ||
2230 | struct perf_cpu_context *cpuctx; | ||
2231 | struct pmu *pmu; | ||
2232 | unsigned long flags; | ||
2233 | |||
2234 | /* no need to flush branch stack if not changing task */ | ||
2235 | if (prev == task) | ||
2236 | return; | ||
2237 | |||
2238 | local_irq_save(flags); | ||
2239 | |||
2240 | rcu_read_lock(); | ||
2241 | |||
2242 | list_for_each_entry_rcu(pmu, &pmus, entry) { | ||
2243 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
2244 | |||
2245 | /* | ||
2246 | * check if the context has at least one | ||
2247 | * event using PERF_SAMPLE_BRANCH_STACK | ||
2248 | */ | ||
2249 | if (cpuctx->ctx.nr_branch_stack > 0 | ||
2250 | && pmu->flush_branch_stack) { | ||
2251 | |||
2252 | pmu = cpuctx->ctx.pmu; | ||
2253 | |||
2254 | perf_ctx_lock(cpuctx, cpuctx->task_ctx); | ||
2255 | |||
2256 | perf_pmu_disable(pmu); | ||
2257 | |||
2258 | pmu->flush_branch_stack(); | ||
2259 | |||
2260 | perf_pmu_enable(pmu); | ||
2261 | |||
2262 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | ||
2263 | } | ||
2264 | } | ||
2265 | |||
2266 | rcu_read_unlock(); | ||
2267 | |||
2268 | local_irq_restore(flags); | ||
2269 | } | ||
2270 | |||
2271 | /* | ||
2198 | * Called from scheduler to add the events of the current task | 2272 | * Called from scheduler to add the events of the current task |
2199 | * with interrupts disabled. | 2273 | * with interrupts disabled. |
2200 | * | 2274 | * |
@@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, | |||
2225 | */ | 2299 | */ |
2226 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) | 2300 | if (atomic_read(&__get_cpu_var(perf_cgroup_events))) |
2227 | perf_cgroup_sched_in(prev, task); | 2301 | perf_cgroup_sched_in(prev, task); |
2302 | |||
2303 | /* check for system-wide branch_stack events */ | ||
2304 | if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) | ||
2305 | perf_branch_stack_sched_in(prev, task); | ||
2228 | } | 2306 | } |
2229 | 2307 | ||
2230 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | 2308 | static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) |
@@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event) | |||
2791 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); | 2869 | atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); |
2792 | static_key_slow_dec_deferred(&perf_sched_events); | 2870 | static_key_slow_dec_deferred(&perf_sched_events); |
2793 | } | 2871 | } |
2872 | |||
2873 | if (has_branch_stack(event)) { | ||
2874 | static_key_slow_dec_deferred(&perf_sched_events); | ||
2875 | /* is system-wide event */ | ||
2876 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
2877 | atomic_dec(&per_cpu(perf_branch_stack_events, | ||
2878 | event->cpu)); | ||
2879 | } | ||
2794 | } | 2880 | } |
2795 | 2881 | ||
2796 | if (event->rb) { | 2882 | if (event->rb) { |
@@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
3907 | } | 3993 | } |
3908 | } | 3994 | } |
3909 | } | 3995 | } |
3996 | |||
3997 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
3998 | if (data->br_stack) { | ||
3999 | size_t size; | ||
4000 | |||
4001 | size = data->br_stack->nr | ||
4002 | * sizeof(struct perf_branch_entry); | ||
4003 | |||
4004 | perf_output_put(handle, data->br_stack->nr); | ||
4005 | perf_output_copy(handle, data->br_stack->entries, size); | ||
4006 | } else { | ||
4007 | /* | ||
4008 | * we always store at least the value of nr | ||
4009 | */ | ||
4010 | u64 nr = 0; | ||
4011 | perf_output_put(handle, nr); | ||
4012 | } | ||
4013 | } | ||
3910 | } | 4014 | } |
3911 | 4015 | ||
3912 | void perf_prepare_sample(struct perf_event_header *header, | 4016 | void perf_prepare_sample(struct perf_event_header *header, |
@@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3949 | WARN_ON_ONCE(size & (sizeof(u64)-1)); | 4053 | WARN_ON_ONCE(size & (sizeof(u64)-1)); |
3950 | header->size += size; | 4054 | header->size += size; |
3951 | } | 4055 | } |
4056 | |||
4057 | if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
4058 | int size = sizeof(u64); /* nr */ | ||
4059 | if (data->br_stack) { | ||
4060 | size += data->br_stack->nr | ||
4061 | * sizeof(struct perf_branch_entry); | ||
4062 | } | ||
4063 | header->size += size; | ||
4064 | } | ||
3952 | } | 4065 | } |
3953 | 4066 | ||
3954 | static void perf_event_output(struct perf_event *event, | 4067 | static void perf_event_output(struct perf_event *event, |
@@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) | |||
5010 | if (event->attr.type != PERF_TYPE_SOFTWARE) | 5123 | if (event->attr.type != PERF_TYPE_SOFTWARE) |
5011 | return -ENOENT; | 5124 | return -ENOENT; |
5012 | 5125 | ||
5126 | /* | ||
5127 | * no branch sampling for software events | ||
5128 | */ | ||
5129 | if (has_branch_stack(event)) | ||
5130 | return -EOPNOTSUPP; | ||
5131 | |||
5013 | switch (event_id) { | 5132 | switch (event_id) { |
5014 | case PERF_COUNT_SW_CPU_CLOCK: | 5133 | case PERF_COUNT_SW_CPU_CLOCK: |
5015 | case PERF_COUNT_SW_TASK_CLOCK: | 5134 | case PERF_COUNT_SW_TASK_CLOCK: |
@@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) | |||
5120 | if (event->attr.type != PERF_TYPE_TRACEPOINT) | 5239 | if (event->attr.type != PERF_TYPE_TRACEPOINT) |
5121 | return -ENOENT; | 5240 | return -ENOENT; |
5122 | 5241 | ||
5242 | /* | ||
5243 | * no branch sampling for tracepoint events | ||
5244 | */ | ||
5245 | if (has_branch_stack(event)) | ||
5246 | return -EOPNOTSUPP; | ||
5247 | |||
5123 | err = perf_trace_init(event); | 5248 | err = perf_trace_init(event); |
5124 | if (err) | 5249 | if (err) |
5125 | return err; | 5250 | return err; |
@@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) | |||
5345 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) | 5470 | if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) |
5346 | return -ENOENT; | 5471 | return -ENOENT; |
5347 | 5472 | ||
5473 | /* | ||
5474 | * no branch sampling for software events | ||
5475 | */ | ||
5476 | if (has_branch_stack(event)) | ||
5477 | return -EOPNOTSUPP; | ||
5478 | |||
5348 | perf_swevent_init_hrtimer(event); | 5479 | perf_swevent_init_hrtimer(event); |
5349 | 5480 | ||
5350 | return 0; | 5481 | return 0; |
@@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) | |||
5419 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) | 5550 | if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) |
5420 | return -ENOENT; | 5551 | return -ENOENT; |
5421 | 5552 | ||
5553 | /* | ||
5554 | * no branch sampling for software events | ||
5555 | */ | ||
5556 | if (has_branch_stack(event)) | ||
5557 | return -EOPNOTSUPP; | ||
5558 | |||
5422 | perf_swevent_init_hrtimer(event); | 5559 | perf_swevent_init_hrtimer(event); |
5423 | 5560 | ||
5424 | return 0; | 5561 | return 0; |
@@ -5866,6 +6003,12 @@ done: | |||
5866 | return ERR_PTR(err); | 6003 | return ERR_PTR(err); |
5867 | } | 6004 | } |
5868 | } | 6005 | } |
6006 | if (has_branch_stack(event)) { | ||
6007 | static_key_slow_inc(&perf_sched_events.key); | ||
6008 | if (!(event->attach_state & PERF_ATTACH_TASK)) | ||
6009 | atomic_inc(&per_cpu(perf_branch_stack_events, | ||
6010 | event->cpu)); | ||
6011 | } | ||
5869 | } | 6012 | } |
5870 | 6013 | ||
5871 | return event; | 6014 | return event; |
@@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
5935 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) | 6078 | if (attr->read_format & ~(PERF_FORMAT_MAX-1)) |
5936 | return -EINVAL; | 6079 | return -EINVAL; |
5937 | 6080 | ||
6081 | if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { | ||
6082 | u64 mask = attr->branch_sample_type; | ||
6083 | |||
6084 | /* only using defined bits */ | ||
6085 | if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) | ||
6086 | return -EINVAL; | ||
6087 | |||
6088 | /* at least one branch bit must be set */ | ||
6089 | if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) | ||
6090 | return -EINVAL; | ||
6091 | |||
6092 | /* kernel level capture: check permissions */ | ||
6093 | if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) | ||
6094 | && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
6095 | return -EACCES; | ||
6096 | |||
6097 | /* propagate priv level, when not set for branch */ | ||
6098 | if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { | ||
6099 | |||
6100 | /* exclude_kernel checked on syscall entry */ | ||
6101 | if (!attr->exclude_kernel) | ||
6102 | mask |= PERF_SAMPLE_BRANCH_KERNEL; | ||
6103 | |||
6104 | if (!attr->exclude_user) | ||
6105 | mask |= PERF_SAMPLE_BRANCH_USER; | ||
6106 | |||
6107 | if (!attr->exclude_hv) | ||
6108 | mask |= PERF_SAMPLE_BRANCH_HV; | ||
6109 | /* | ||
6110 | * adjust user setting (for HW filter setup) | ||
6111 | */ | ||
6112 | attr->branch_sample_type = mask; | ||
6113 | } | ||
6114 | } | ||
5938 | out: | 6115 | out: |
5939 | return ret; | 6116 | return ret; |
5940 | 6117 | ||
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3330022a7ac1..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) | |||
581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) | 581 | if (bp->attr.type != PERF_TYPE_BREAKPOINT) |
582 | return -ENOENT; | 582 | return -ENOENT; |
583 | 583 | ||
584 | /* | ||
585 | * no branch sampling for breakpoint events | ||
586 | */ | ||
587 | if (has_branch_stack(bp)) | ||
588 | return -EOPNOTSUPP; | ||
589 | |||
584 | err = register_perf_hw_breakpoint(bp); | 590 | err = register_perf_hw_breakpoint(bp); |
585 | if (err) | 591 | if (err) |
586 | return err; | 592 | return err; |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a5766b4b0125..a1386b2fff00 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha | |||
152 | corresponding events, i.e., they always refer to events defined earlier on the command | 152 | corresponding events, i.e., they always refer to events defined earlier on the command |
153 | line. | 153 | line. |
154 | 154 | ||
155 | -b:: | ||
156 | --branch-any:: | ||
157 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
158 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
159 | |||
160 | -j:: | ||
161 | --branch-filter:: | ||
162 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
163 | taken branches. The number of branches captured with each sample depends on the | ||
164 | underlying hardware, the type of branches of interest, and the executed code. | ||
165 | It is possible to select the types of branches captured by enabling filters. The | ||
166 | following filters are defined: | ||
167 | |||
168 | - any: any type of branches | ||
169 | - any_call: any function call or system call | ||
170 | - any_ret: any function return or system call return | ||
171 | - any_ind: any indirect branch | ||
172 | - u: only when the branch target is at the user level | ||
173 | - k: only when the branch target is in the kernel | ||
174 | - hv: only when the target is at the hypervisor level | ||
175 | |||
176 | + | ||
177 | The option requires at least one branch type among any, any_call, any_ret, ind_call. | ||
178 | The privilege levels may be ommitted, in which case, the privilege levels of the associated | ||
179 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
180 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
181 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
182 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
183 | Note that this feature may not be available on all processors. | ||
184 | |||
155 | SEE ALSO | 185 | SEE ALSO |
156 | -------- | 186 | -------- |
157 | linkperf:perf-stat[1], linkperf:perf-list[1] | 187 | linkperf:perf-stat[1], linkperf:perf-list[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712e..87feeee8b90c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -153,6 +153,16 @@ OPTIONS | |||
153 | information which may be very large and thus may clutter the display. | 153 | information which may be very large and thus may clutter the display. |
154 | It currently includes: cpu and numa topology of the host system. | 154 | It currently includes: cpu and numa topology of the host system. |
155 | 155 | ||
156 | -b:: | ||
157 | --branch-stack:: | ||
158 | Use the addresses of sampled taken branches instead of the instruction | ||
159 | address to build the histograms. To generate meaningful output, the | ||
160 | perf.data file must have been obtained using perf record -b or | ||
161 | perf record --branch-filter xxx where xxx is a branch filter option. | ||
162 | perf report is able to auto-detect whether a perf.data file contains | ||
163 | branch stacks and it will automatically switch to the branch view mode, | ||
164 | unless --no-branch-stack is used. | ||
165 | |||
156 | SEE ALSO | 166 | SEE ALSO |
157 | -------- | 167 | -------- |
158 | linkperf:perf-stat[1], linkperf:perf-annotate[1] | 168 | linkperf:perf-stat[1], linkperf:perf-annotate[1] |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef202..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
473 | if (!have_tracepoints(&evsel_list->entries)) | 473 | if (!have_tracepoints(&evsel_list->entries)) |
474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); | 474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); |
475 | 475 | ||
476 | if (!rec->opts.branch_stack) | ||
477 | perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); | ||
478 | |||
476 | if (!rec->file_new) { | 479 | if (!rec->file_new) { |
477 | err = perf_session__read_header(session, output); | 480 | err = perf_session__read_header(session, output); |
478 | if (err < 0) | 481 | if (err < 0) |
@@ -638,6 +641,90 @@ out_delete_session: | |||
638 | return err; | 641 | return err; |
639 | } | 642 | } |
640 | 643 | ||
644 | #define BRANCH_OPT(n, m) \ | ||
645 | { .name = n, .mode = (m) } | ||
646 | |||
647 | #define BRANCH_END { .name = NULL } | ||
648 | |||
649 | struct branch_mode { | ||
650 | const char *name; | ||
651 | int mode; | ||
652 | }; | ||
653 | |||
654 | static const struct branch_mode branch_modes[] = { | ||
655 | BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), | ||
656 | BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), | ||
657 | BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), | ||
658 | BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), | ||
659 | BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), | ||
660 | BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), | ||
661 | BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), | ||
662 | BRANCH_END | ||
663 | }; | ||
664 | |||
665 | static int | ||
666 | parse_branch_stack(const struct option *opt, const char *str, int unset) | ||
667 | { | ||
668 | #define ONLY_PLM \ | ||
669 | (PERF_SAMPLE_BRANCH_USER |\ | ||
670 | PERF_SAMPLE_BRANCH_KERNEL |\ | ||
671 | PERF_SAMPLE_BRANCH_HV) | ||
672 | |||
673 | uint64_t *mode = (uint64_t *)opt->value; | ||
674 | const struct branch_mode *br; | ||
675 | char *s, *os = NULL, *p; | ||
676 | int ret = -1; | ||
677 | |||
678 | if (unset) | ||
679 | return 0; | ||
680 | |||
681 | /* | ||
682 | * cannot set it twice, -b + --branch-filter for instance | ||
683 | */ | ||
684 | if (*mode) | ||
685 | return -1; | ||
686 | |||
687 | /* str may be NULL in case no arg is passed to -b */ | ||
688 | if (str) { | ||
689 | /* because str is read-only */ | ||
690 | s = os = strdup(str); | ||
691 | if (!s) | ||
692 | return -1; | ||
693 | |||
694 | for (;;) { | ||
695 | p = strchr(s, ','); | ||
696 | if (p) | ||
697 | *p = '\0'; | ||
698 | |||
699 | for (br = branch_modes; br->name; br++) { | ||
700 | if (!strcasecmp(s, br->name)) | ||
701 | break; | ||
702 | } | ||
703 | if (!br->name) { | ||
704 | ui__warning("unknown branch filter %s," | ||
705 | " check man page\n", s); | ||
706 | goto error; | ||
707 | } | ||
708 | |||
709 | *mode |= br->mode; | ||
710 | |||
711 | if (!p) | ||
712 | break; | ||
713 | |||
714 | s = p + 1; | ||
715 | } | ||
716 | } | ||
717 | ret = 0; | ||
718 | |||
719 | /* default to any branch */ | ||
720 | if ((*mode & ~ONLY_PLM) == 0) { | ||
721 | *mode = PERF_SAMPLE_BRANCH_ANY; | ||
722 | } | ||
723 | error: | ||
724 | free(os); | ||
725 | return ret; | ||
726 | } | ||
727 | |||
641 | static const char * const record_usage[] = { | 728 | static const char * const record_usage[] = { |
642 | "perf record [<options>] [<command>]", | 729 | "perf record [<options>] [<command>]", |
643 | "perf record [<options>] -- <command> [<options>]", | 730 | "perf record [<options>] -- <command> [<options>]", |
@@ -727,6 +814,14 @@ const struct option record_options[] = { | |||
727 | "monitor event in cgroup name only", | 814 | "monitor event in cgroup name only", |
728 | parse_cgroups), | 815 | parse_cgroups), |
729 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), | 816 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), |
817 | |||
818 | OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, | ||
819 | "branch any", "sample any taken branches", | ||
820 | parse_branch_stack), | ||
821 | |||
822 | OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, | ||
823 | "branch filter mask", "branch stack filter modes", | ||
824 | parse_branch_stack), | ||
730 | OPT_END() | 825 | OPT_END() |
731 | }; | 826 | }; |
732 | 827 | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -53,6 +53,82 @@ struct perf_report { | |||
53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); | 53 | DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); |
54 | }; | 54 | }; |
55 | 55 | ||
56 | static int perf_report__add_branch_hist_entry(struct perf_tool *tool, | ||
57 | struct addr_location *al, | ||
58 | struct perf_sample *sample, | ||
59 | struct perf_evsel *evsel, | ||
60 | struct machine *machine) | ||
61 | { | ||
62 | struct perf_report *rep = container_of(tool, struct perf_report, tool); | ||
63 | struct symbol *parent = NULL; | ||
64 | int err = 0; | ||
65 | unsigned i; | ||
66 | struct hist_entry *he; | ||
67 | struct branch_info *bi, *bx; | ||
68 | |||
69 | if ((sort__has_parent || symbol_conf.use_callchain) | ||
70 | && sample->callchain) { | ||
71 | err = machine__resolve_callchain(machine, evsel, al->thread, | ||
72 | sample->callchain, &parent); | ||
73 | if (err) | ||
74 | return err; | ||
75 | } | ||
76 | |||
77 | bi = machine__resolve_bstack(machine, al->thread, | ||
78 | sample->branch_stack); | ||
79 | if (!bi) | ||
80 | return -ENOMEM; | ||
81 | |||
82 | for (i = 0; i < sample->branch_stack->nr; i++) { | ||
83 | if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) | ||
84 | continue; | ||
85 | /* | ||
86 | * The report shows the percentage of total branches captured | ||
87 | * and not events sampled. Thus we use a pseudo period of 1. | ||
88 | */ | ||
89 | he = __hists__add_branch_entry(&evsel->hists, al, parent, | ||
90 | &bi[i], 1); | ||
91 | if (he) { | ||
92 | struct annotation *notes; | ||
93 | err = -ENOMEM; | ||
94 | bx = he->branch_info; | ||
95 | if (bx->from.sym && use_browser > 0) { | ||
96 | notes = symbol__annotation(bx->from.sym); | ||
97 | if (!notes->src | ||
98 | && symbol__alloc_hist(bx->from.sym) < 0) | ||
99 | goto out; | ||
100 | |||
101 | err = symbol__inc_addr_samples(bx->from.sym, | ||
102 | bx->from.map, | ||
103 | evsel->idx, | ||
104 | bx->from.al_addr); | ||
105 | if (err) | ||
106 | goto out; | ||
107 | } | ||
108 | |||
109 | if (bx->to.sym && use_browser > 0) { | ||
110 | notes = symbol__annotation(bx->to.sym); | ||
111 | if (!notes->src | ||
112 | && symbol__alloc_hist(bx->to.sym) < 0) | ||
113 | goto out; | ||
114 | |||
115 | err = symbol__inc_addr_samples(bx->to.sym, | ||
116 | bx->to.map, | ||
117 | evsel->idx, | ||
118 | bx->to.al_addr); | ||
119 | if (err) | ||
120 | goto out; | ||
121 | } | ||
122 | evsel->hists.stats.total_period += 1; | ||
123 | hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); | ||
124 | err = 0; | ||
125 | } else | ||
126 | return -ENOMEM; | ||
127 | } | ||
128 | out: | ||
129 | return err; | ||
130 | } | ||
131 | |||
56 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, | 132 | static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, |
57 | struct addr_location *al, | 133 | struct addr_location *al, |
58 | struct perf_sample *sample, | 134 | struct perf_sample *sample, |
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, | |||
126 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) | 202 | if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) |
127 | return 0; | 203 | return 0; |
128 | 204 | ||
129 | if (al.map != NULL) | 205 | if (sort__branch_mode == 1) { |
130 | al.map->dso->hit = 1; | 206 | if (perf_report__add_branch_hist_entry(tool, &al, sample, |
207 | evsel, machine)) { | ||
208 | pr_debug("problem adding lbr entry, skipping event\n"); | ||
209 | return -1; | ||
210 | } | ||
211 | } else { | ||
212 | if (al.map != NULL) | ||
213 | al.map->dso->hit = 1; | ||
131 | 214 | ||
132 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { | 215 | if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { |
133 | pr_debug("problem incrementing symbol period, skipping event\n"); | 216 | pr_debug("problem incrementing symbol period, skipping event\n"); |
134 | return -1; | 217 | return -1; |
218 | } | ||
135 | } | 219 | } |
136 | |||
137 | return 0; | 220 | return 0; |
138 | } | 221 | } |
139 | 222 | ||
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) | |||
188 | } | 271 | } |
189 | } | 272 | } |
190 | 273 | ||
274 | if (sort__branch_mode == 1) { | ||
275 | if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { | ||
276 | fprintf(stderr, "selected -b but no branch data." | ||
277 | " Did you call perf record without" | ||
278 | " -b?\n"); | ||
279 | return -1; | ||
280 | } | ||
281 | } | ||
282 | |||
191 | return 0; | 283 | return 0; |
192 | } | 284 | } |
193 | 285 | ||
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) | |||
246 | { | 338 | { |
247 | int ret = -EINVAL; | 339 | int ret = -EINVAL; |
248 | u64 nr_samples; | 340 | u64 nr_samples; |
249 | struct perf_session *session; | 341 | struct perf_session *session = rep->session; |
250 | struct perf_evsel *pos; | 342 | struct perf_evsel *pos; |
251 | struct map *kernel_map; | 343 | struct map *kernel_map; |
252 | struct kmap *kernel_kmap; | 344 | struct kmap *kernel_kmap; |
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) | |||
254 | 346 | ||
255 | signal(SIGINT, sig_handler); | 347 | signal(SIGINT, sig_handler); |
256 | 348 | ||
257 | session = perf_session__new(rep->input_name, O_RDONLY, | ||
258 | rep->force, false, &rep->tool); | ||
259 | if (session == NULL) | ||
260 | return -ENOMEM; | ||
261 | |||
262 | rep->session = session; | ||
263 | |||
264 | if (rep->cpu_list) { | 349 | if (rep->cpu_list) { |
265 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, | 350 | ret = perf_session__cpu_bitmap(session, rep->cpu_list, |
266 | rep->cpu_bitmap); | 351 | rep->cpu_bitmap); |
@@ -427,9 +512,19 @@ setup: | |||
427 | return 0; | 512 | return 0; |
428 | } | 513 | } |
429 | 514 | ||
515 | static int | ||
516 | parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) | ||
517 | { | ||
518 | sort__branch_mode = !unset; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
430 | int cmd_report(int argc, const char **argv, const char *prefix __used) | 522 | int cmd_report(int argc, const char **argv, const char *prefix __used) |
431 | { | 523 | { |
524 | struct perf_session *session; | ||
432 | struct stat st; | 525 | struct stat st; |
526 | bool has_br_stack = false; | ||
527 | int ret = -1; | ||
433 | char callchain_default_opt[] = "fractal,0.5,callee"; | 528 | char callchain_default_opt[] = "fractal,0.5,callee"; |
434 | const char * const report_usage[] = { | 529 | const char * const report_usage[] = { |
435 | "perf report [<options>]", | 530 | "perf report [<options>]", |
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
477 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, | 572 | OPT_BOOLEAN(0, "stdio", &report.use_stdio, |
478 | "Use the stdio interface"), | 573 | "Use the stdio interface"), |
479 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 574 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
480 | "sort by key(s): pid, comm, dso, symbol, parent"), | 575 | "sort by key(s): pid, comm, dso, symbol, parent, dso_to," |
576 | " dso_from, symbol_to, symbol_from, mispredict"), | ||
481 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, | 577 | OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, |
482 | "Show sample percentage for different cpu modes"), | 578 | "Show sample percentage for different cpu modes"), |
483 | OPT_STRING('p', "parent", &parent_pattern, "regex", | 579 | OPT_STRING('p', "parent", &parent_pattern, "regex", |
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
517 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 613 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
518 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, | 614 | OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, |
519 | "Show a column with the sum of periods"), | 615 | "Show a column with the sum of periods"), |
616 | OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", | ||
617 | "use branch records for histogram filling", parse_branch_mode), | ||
520 | OPT_END() | 618 | OPT_END() |
521 | }; | 619 | }; |
522 | 620 | ||
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
536 | else | 634 | else |
537 | report.input_name = "perf.data"; | 635 | report.input_name = "perf.data"; |
538 | } | 636 | } |
637 | session = perf_session__new(report.input_name, O_RDONLY, | ||
638 | report.force, false, &report.tool); | ||
639 | if (session == NULL) | ||
640 | return -ENOMEM; | ||
539 | 641 | ||
540 | if (strcmp(report.input_name, "-") != 0) | 642 | report.session = session; |
643 | |||
644 | has_br_stack = perf_header__has_feat(&session->header, | ||
645 | HEADER_BRANCH_STACK); | ||
646 | |||
647 | if (sort__branch_mode == -1 && has_br_stack) | ||
648 | sort__branch_mode = 1; | ||
649 | |||
650 | /* sort__branch_mode could be 0 if --no-branch-stack */ | ||
651 | if (sort__branch_mode == 1) { | ||
652 | /* | ||
653 | * if no sort_order is provided, then specify | ||
654 | * branch-mode specific order | ||
655 | */ | ||
656 | if (sort_order == default_sort_order) | ||
657 | sort_order = "comm,dso_from,symbol_from," | ||
658 | "dso_to,symbol_to"; | ||
659 | |||
660 | } | ||
661 | |||
662 | if (strcmp(report.input_name, "-") != 0) { | ||
541 | setup_browser(true); | 663 | setup_browser(true); |
542 | else | 664 | } else { |
543 | use_browser = 0; | 665 | use_browser = 0; |
666 | } | ||
544 | 667 | ||
545 | /* | 668 | /* |
546 | * Only in the newt browser we are doing integrated annotation, | 669 | * Only in the newt browser we are doing integrated annotation, |
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
568 | } | 691 | } |
569 | 692 | ||
570 | if (symbol__init() < 0) | 693 | if (symbol__init() < 0) |
571 | return -1; | 694 | goto error; |
572 | 695 | ||
573 | setup_sorting(report_usage, options); | 696 | setup_sorting(report_usage, options); |
574 | 697 | ||
575 | if (parent_pattern != default_parent_pattern) { | 698 | if (parent_pattern != default_parent_pattern) { |
576 | if (sort_dimension__add("parent") < 0) | 699 | if (sort_dimension__add("parent") < 0) |
577 | return -1; | 700 | goto error; |
578 | 701 | ||
579 | /* | 702 | /* |
580 | * Only show the parent fields if we explicitly | 703 | * Only show the parent fields if we explicitly |
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) | |||
592 | if (argc) | 715 | if (argc) |
593 | usage_with_options(report_usage, options); | 716 | usage_with_options(report_usage, options); |
594 | 717 | ||
595 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
596 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); | 718 | sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); |
597 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
598 | 719 | ||
599 | return __cmd_report(&report); | 720 | if (sort__branch_mode == 1) { |
721 | sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); | ||
722 | sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); | ||
723 | sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); | ||
724 | sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); | ||
725 | } else { | ||
726 | sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); | ||
727 | sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); | ||
728 | } | ||
729 | |||
730 | ret = __cmd_report(&report); | ||
731 | error: | ||
732 | perf_session__delete(session); | ||
733 | return ret; | ||
600 | } | 734 | } |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f0227e93665d..eec392e48067 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -179,6 +179,23 @@ struct ip_callchain { | |||
179 | u64 ips[0]; | 179 | u64 ips[0]; |
180 | }; | 180 | }; |
181 | 181 | ||
182 | struct branch_flags { | ||
183 | u64 mispred:1; | ||
184 | u64 predicted:1; | ||
185 | u64 reserved:62; | ||
186 | }; | ||
187 | |||
188 | struct branch_entry { | ||
189 | u64 from; | ||
190 | u64 to; | ||
191 | struct branch_flags flags; | ||
192 | }; | ||
193 | |||
194 | struct branch_stack { | ||
195 | u64 nr; | ||
196 | struct branch_entry entries[0]; | ||
197 | }; | ||
198 | |||
182 | extern bool perf_host, perf_guest; | 199 | extern bool perf_host, perf_guest; |
183 | extern const char perf_version_string[]; | 200 | extern const char perf_version_string[]; |
184 | 201 | ||
@@ -205,6 +222,7 @@ struct perf_record_opts { | |||
205 | unsigned int freq; | 222 | unsigned int freq; |
206 | unsigned int mmap_pages; | 223 | unsigned int mmap_pages; |
207 | unsigned int user_freq; | 224 | unsigned int user_freq; |
225 | int branch_stack; | ||
208 | u64 default_interval; | 226 | u64 default_interval; |
209 | u64 user_interval; | 227 | u64 user_interval; |
210 | const char *cpu_list; | 228 | const char *cpu_list; |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -81,6 +81,7 @@ struct perf_sample { | |||
81 | u32 raw_size; | 81 | u32 raw_size; |
82 | void *raw_data; | 82 | void *raw_data; |
83 | struct ip_callchain *callchain; | 83 | struct ip_callchain *callchain; |
84 | struct branch_stack *branch_stack; | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | #define BUILD_ID_SIZE 20 | 87 | #define BUILD_ID_SIZE 20 |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 302d49a9f985..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
126 | attr->watermark = 0; | 126 | attr->watermark = 0; |
127 | attr->wakeup_events = 1; | 127 | attr->wakeup_events = 1; |
128 | } | 128 | } |
129 | if (opts->branch_stack) { | ||
130 | attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; | ||
131 | attr->branch_sample_type = opts->branch_stack; | ||
132 | } | ||
129 | 133 | ||
130 | attr->mmap = track; | 134 | attr->mmap = track; |
131 | attr->comm = track; | 135 | attr->comm = track; |
@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
576 | data->raw_data = (void *) pdata; | 580 | data->raw_data = (void *) pdata; |
577 | } | 581 | } |
578 | 582 | ||
583 | if (type & PERF_SAMPLE_BRANCH_STACK) { | ||
584 | u64 sz; | ||
585 | |||
586 | data->branch_stack = (struct branch_stack *)array; | ||
587 | array++; /* nr */ | ||
588 | |||
589 | sz = data->branch_stack->nr * sizeof(struct branch_entry); | ||
590 | sz /= sizeof(u64); | ||
591 | array += sz; | ||
592 | } | ||
579 | return 0; | 593 | return 0; |
580 | } | 594 | } |
581 | 595 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9f867d96c6a5..0d9b6da86a39 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -1023,6 +1023,12 @@ write_it: | |||
1023 | return do_write_string(fd, buffer); | 1023 | return do_write_string(fd, buffer); |
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | static int write_branch_stack(int fd __used, struct perf_header *h __used, | ||
1027 | struct perf_evlist *evlist __used) | ||
1028 | { | ||
1029 | return 0; | ||
1030 | } | ||
1031 | |||
1026 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) | 1032 | static void print_hostname(struct perf_header *ph, int fd, FILE *fp) |
1027 | { | 1033 | { |
1028 | char *str = do_read_string(fd, ph); | 1034 | char *str = do_read_string(fd, ph); |
@@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
1144 | uint64_t id; | 1150 | uint64_t id; |
1145 | void *buf = NULL; | 1151 | void *buf = NULL; |
1146 | char *str; | 1152 | char *str; |
1147 | u32 nre, sz, nr, i, j, msz; | 1153 | u32 nre, sz, nr, i, j; |
1148 | int ret; | 1154 | ssize_t ret; |
1155 | size_t msz; | ||
1149 | 1156 | ||
1150 | /* number of events */ | 1157 | /* number of events */ |
1151 | ret = read(fd, &nre, sizeof(nre)); | 1158 | ret = read(fd, &nre, sizeof(nre)); |
@@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) | |||
1162 | if (ph->needs_swap) | 1169 | if (ph->needs_swap) |
1163 | sz = bswap_32(sz); | 1170 | sz = bswap_32(sz); |
1164 | 1171 | ||
1165 | /* | ||
1166 | * ensure it is at least to our ABI rev | ||
1167 | */ | ||
1168 | if (sz < (u32)sizeof(attr)) | ||
1169 | goto error; | ||
1170 | |||
1171 | memset(&attr, 0, sizeof(attr)); | 1172 | memset(&attr, 0, sizeof(attr)); |
1172 | 1173 | ||
1173 | /* read entire region to sync up to next field */ | 1174 | /* buffer to hold on file attr struct */ |
1174 | buf = malloc(sz); | 1175 | buf = malloc(sz); |
1175 | if (!buf) | 1176 | if (!buf) |
1176 | goto error; | 1177 | goto error; |
1177 | 1178 | ||
1178 | msz = sizeof(attr); | 1179 | msz = sizeof(attr); |
1179 | if (sz < msz) | 1180 | if (sz < (ssize_t)msz) |
1180 | msz = sz; | 1181 | msz = sz; |
1181 | 1182 | ||
1182 | for (i = 0 ; i < nre; i++) { | 1183 | for (i = 0 ; i < nre; i++) { |
1183 | 1184 | ||
1185 | /* | ||
1186 | * must read entire on-file attr struct to | ||
1187 | * sync up with layout. | ||
1188 | */ | ||
1184 | ret = read(fd, buf, sz); | 1189 | ret = read(fd, buf, sz); |
1185 | if (ret != (ssize_t)sz) | 1190 | if (ret != (ssize_t)sz) |
1186 | goto error; | 1191 | goto error; |
@@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) | |||
1316 | free(str); | 1321 | free(str); |
1317 | } | 1322 | } |
1318 | 1323 | ||
1324 | static void print_branch_stack(struct perf_header *ph __used, int fd __used, | ||
1325 | FILE *fp) | ||
1326 | { | ||
1327 | fprintf(fp, "# contains samples with branch stack\n"); | ||
1328 | } | ||
1329 | |||
1319 | static int __event_process_build_id(struct build_id_event *bev, | 1330 | static int __event_process_build_id(struct build_id_event *bev, |
1320 | char *filename, | 1331 | char *filename, |
1321 | struct perf_session *session) | 1332 | struct perf_session *session) |
@@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | |||
1520 | FEAT_OPA(HEADER_CMDLINE, cmdline), | 1531 | FEAT_OPA(HEADER_CMDLINE, cmdline), |
1521 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), | 1532 | FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), |
1522 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), | 1533 | FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), |
1534 | FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), | ||
1523 | }; | 1535 | }; |
1524 | 1536 | ||
1525 | struct header_print_data { | 1537 | struct header_print_data { |
@@ -1804,35 +1816,101 @@ out_free: | |||
1804 | return err; | 1816 | return err; |
1805 | } | 1817 | } |
1806 | 1818 | ||
1807 | static int check_magic_endian(u64 *magic, struct perf_file_header *header, | 1819 | static const int attr_file_abi_sizes[] = { |
1808 | struct perf_header *ph) | 1820 | [0] = PERF_ATTR_SIZE_VER0, |
1821 | [1] = PERF_ATTR_SIZE_VER1, | ||
1822 | 0, | ||
1823 | }; | ||
1824 | |||
1825 | /* | ||
1826 | * In the legacy file format, the magic number is not used to encode endianness. | ||
1827 | * hdr_sz was used to encode endianness. But given that hdr_sz can vary based | ||
1828 | * on ABI revisions, we need to try all combinations for all endianness to | ||
1829 | * detect the endianness. | ||
1830 | */ | ||
1831 | static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
1809 | { | 1832 | { |
1810 | int ret; | 1833 | uint64_t ref_size, attr_size; |
1834 | int i; | ||
1811 | 1835 | ||
1812 | /* check for legacy format */ | 1836 | for (i = 0 ; attr_file_abi_sizes[i]; i++) { |
1813 | ret = memcmp(magic, __perf_magic1, sizeof(*magic)); | 1837 | ref_size = attr_file_abi_sizes[i] |
1814 | if (ret == 0) { | 1838 | + sizeof(struct perf_file_section); |
1815 | pr_debug("legacy perf.data format\n"); | 1839 | if (hdr_sz != ref_size) { |
1816 | if (!header) | 1840 | attr_size = bswap_64(hdr_sz); |
1817 | return -1; | 1841 | if (attr_size != ref_size) |
1842 | continue; | ||
1818 | 1843 | ||
1819 | if (header->attr_size != sizeof(struct perf_file_attr)) { | 1844 | ph->needs_swap = true; |
1820 | u64 attr_size = bswap_64(header->attr_size); | 1845 | } |
1846 | pr_debug("ABI%d perf.data file detected, need_swap=%d\n", | ||
1847 | i, | ||
1848 | ph->needs_swap); | ||
1849 | return 0; | ||
1850 | } | ||
1851 | /* could not determine endianness */ | ||
1852 | return -1; | ||
1853 | } | ||
1821 | 1854 | ||
1822 | if (attr_size != sizeof(struct perf_file_attr)) | 1855 | #define PERF_PIPE_HDR_VER0 16 |
1823 | return -1; | 1856 | |
1857 | static const size_t attr_pipe_abi_sizes[] = { | ||
1858 | [0] = PERF_PIPE_HDR_VER0, | ||
1859 | 0, | ||
1860 | }; | ||
1861 | |||
1862 | /* | ||
1863 | * In the legacy pipe format, there is an implicit assumption that endiannesss | ||
1864 | * between host recording the samples, and host parsing the samples is the | ||
1865 | * same. This is not always the case given that the pipe output may always be | ||
1866 | * redirected into a file and analyzed on a different machine with possibly a | ||
1867 | * different endianness and perf_event ABI revsions in the perf tool itself. | ||
1868 | */ | ||
1869 | static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) | ||
1870 | { | ||
1871 | u64 attr_size; | ||
1872 | int i; | ||
1873 | |||
1874 | for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { | ||
1875 | if (hdr_sz != attr_pipe_abi_sizes[i]) { | ||
1876 | attr_size = bswap_64(hdr_sz); | ||
1877 | if (attr_size != hdr_sz) | ||
1878 | continue; | ||
1824 | 1879 | ||
1825 | ph->needs_swap = true; | 1880 | ph->needs_swap = true; |
1826 | } | 1881 | } |
1882 | pr_debug("Pipe ABI%d perf.data file detected\n", i); | ||
1827 | return 0; | 1883 | return 0; |
1828 | } | 1884 | } |
1885 | return -1; | ||
1886 | } | ||
1887 | |||
1888 | static int check_magic_endian(u64 magic, uint64_t hdr_sz, | ||
1889 | bool is_pipe, struct perf_header *ph) | ||
1890 | { | ||
1891 | int ret; | ||
1892 | |||
1893 | /* check for legacy format */ | ||
1894 | ret = memcmp(&magic, __perf_magic1, sizeof(magic)); | ||
1895 | if (ret == 0) { | ||
1896 | pr_debug("legacy perf.data format\n"); | ||
1897 | if (is_pipe) | ||
1898 | return try_all_pipe_abis(hdr_sz, ph); | ||
1899 | |||
1900 | return try_all_file_abis(hdr_sz, ph); | ||
1901 | } | ||
1902 | /* | ||
1903 | * the new magic number serves two purposes: | ||
1904 | * - unique number to identify actual perf.data files | ||
1905 | * - encode endianness of file | ||
1906 | */ | ||
1829 | 1907 | ||
1830 | /* check magic number with same endianness */ | 1908 | /* check magic number with one endianness */ |
1831 | if (*magic == __perf_magic2) | 1909 | if (magic == __perf_magic2) |
1832 | return 0; | 1910 | return 0; |
1833 | 1911 | ||
1834 | /* check magic number but opposite endianness */ | 1912 | /* check magic number with opposite endianness */ |
1835 | if (*magic != __perf_magic2_sw) | 1913 | if (magic != __perf_magic2_sw) |
1836 | return -1; | 1914 | return -1; |
1837 | 1915 | ||
1838 | ph->needs_swap = true; | 1916 | ph->needs_swap = true; |
@@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header, | |||
1851 | if (ret <= 0) | 1929 | if (ret <= 0) |
1852 | return -1; | 1930 | return -1; |
1853 | 1931 | ||
1854 | if (check_magic_endian(&header->magic, header, ph) < 0) | 1932 | if (check_magic_endian(header->magic, |
1933 | header->attr_size, false, ph) < 0) { | ||
1934 | pr_debug("magic/endian check failed\n"); | ||
1855 | return -1; | 1935 | return -1; |
1936 | } | ||
1856 | 1937 | ||
1857 | if (ph->needs_swap) { | 1938 | if (ph->needs_swap) { |
1858 | mem_bswap_64(header, offsetof(struct perf_file_header, | 1939 | mem_bswap_64(header, offsetof(struct perf_file_header, |
@@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, | |||
1939 | if (ret <= 0) | 2020 | if (ret <= 0) |
1940 | return -1; | 2021 | return -1; |
1941 | 2022 | ||
1942 | if (check_magic_endian(&header->magic, NULL, ph) < 0) | 2023 | if (check_magic_endian(header->magic, header->size, true, ph) < 0) { |
2024 | pr_debug("endian/magic failed\n"); | ||
1943 | return -1; | 2025 | return -1; |
2026 | } | ||
2027 | |||
2028 | if (ph->needs_swap) | ||
2029 | header->size = bswap_64(header->size); | ||
1944 | 2030 | ||
1945 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) | 2031 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) |
1946 | return -1; | 2032 | return -1; |
1947 | 2033 | ||
1948 | if (header->size != sizeof(*header)) { | ||
1949 | u64 size = bswap_64(header->size); | ||
1950 | |||
1951 | if (size != sizeof(*header)) | ||
1952 | return -1; | ||
1953 | |||
1954 | ph->needs_swap = true; | ||
1955 | } | ||
1956 | |||
1957 | return 0; | 2034 | return 0; |
1958 | } | 2035 | } |
1959 | 2036 | ||
@@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) | |||
1973 | return 0; | 2050 | return 0; |
1974 | } | 2051 | } |
1975 | 2052 | ||
2053 | static int read_attr(int fd, struct perf_header *ph, | ||
2054 | struct perf_file_attr *f_attr) | ||
2055 | { | ||
2056 | struct perf_event_attr *attr = &f_attr->attr; | ||
2057 | size_t sz, left; | ||
2058 | size_t our_sz = sizeof(f_attr->attr); | ||
2059 | int ret; | ||
2060 | |||
2061 | memset(f_attr, 0, sizeof(*f_attr)); | ||
2062 | |||
2063 | /* read minimal guaranteed structure */ | ||
2064 | ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); | ||
2065 | if (ret <= 0) { | ||
2066 | pr_debug("cannot read %d bytes of header attr\n", | ||
2067 | PERF_ATTR_SIZE_VER0); | ||
2068 | return -1; | ||
2069 | } | ||
2070 | |||
2071 | /* on file perf_event_attr size */ | ||
2072 | sz = attr->size; | ||
2073 | |||
2074 | if (ph->needs_swap) | ||
2075 | sz = bswap_32(sz); | ||
2076 | |||
2077 | if (sz == 0) { | ||
2078 | /* assume ABI0 */ | ||
2079 | sz = PERF_ATTR_SIZE_VER0; | ||
2080 | } else if (sz > our_sz) { | ||
2081 | pr_debug("file uses a more recent and unsupported ABI" | ||
2082 | " (%zu bytes extra)\n", sz - our_sz); | ||
2083 | return -1; | ||
2084 | } | ||
2085 | /* what we have not yet read and that we know about */ | ||
2086 | left = sz - PERF_ATTR_SIZE_VER0; | ||
2087 | if (left) { | ||
2088 | void *ptr = attr; | ||
2089 | ptr += PERF_ATTR_SIZE_VER0; | ||
2090 | |||
2091 | ret = readn(fd, ptr, left); | ||
2092 | } | ||
2093 | /* read perf_file_section, ids are read in caller */ | ||
2094 | ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); | ||
2095 | |||
2096 | return ret <= 0 ? -1 : 0; | ||
2097 | } | ||
2098 | |||
1976 | int perf_session__read_header(struct perf_session *session, int fd) | 2099 | int perf_session__read_header(struct perf_session *session, int fd) |
1977 | { | 2100 | { |
1978 | struct perf_header *header = &session->header; | 2101 | struct perf_header *header = &session->header; |
@@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) | |||
1988 | if (session->fd_pipe) | 2111 | if (session->fd_pipe) |
1989 | return perf_header__read_pipe(session, fd); | 2112 | return perf_header__read_pipe(session, fd); |
1990 | 2113 | ||
1991 | if (perf_file_header__read(&f_header, header, fd) < 0) { | 2114 | if (perf_file_header__read(&f_header, header, fd) < 0) |
1992 | pr_debug("incompatible file format\n"); | ||
1993 | return -EINVAL; | 2115 | return -EINVAL; |
1994 | } | ||
1995 | 2116 | ||
1996 | nr_attrs = f_header.attrs.size / sizeof(f_attr); | 2117 | nr_attrs = f_header.attrs.size / f_header.attr_size; |
1997 | lseek(fd, f_header.attrs.offset, SEEK_SET); | 2118 | lseek(fd, f_header.attrs.offset, SEEK_SET); |
1998 | 2119 | ||
1999 | for (i = 0; i < nr_attrs; i++) { | 2120 | for (i = 0; i < nr_attrs; i++) { |
2000 | struct perf_evsel *evsel; | 2121 | struct perf_evsel *evsel; |
2001 | off_t tmp; | 2122 | off_t tmp; |
2002 | 2123 | ||
2003 | if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) | 2124 | if (read_attr(fd, header, &f_attr) < 0) |
2004 | goto out_errno; | 2125 | goto out_errno; |
2005 | 2126 | ||
2006 | if (header->needs_swap) | 2127 | if (header->needs_swap) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e68f617d082f..21a6be09c129 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -27,7 +27,7 @@ enum { | |||
27 | HEADER_EVENT_DESC, | 27 | HEADER_EVENT_DESC, |
28 | HEADER_CPU_TOPOLOGY, | 28 | HEADER_CPU_TOPOLOGY, |
29 | HEADER_NUMA_TOPOLOGY, | 29 | HEADER_NUMA_TOPOLOGY, |
30 | 30 | HEADER_BRANCH_STACK, | |
31 | HEADER_LAST_FEATURE, | 31 | HEADER_LAST_FEATURE, |
32 | HEADER_FEAT_BITS = 256, | 32 | HEADER_FEAT_BITS = 256, |
33 | }; | 33 | }; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac7..8380c3db1c92 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) | |||
50 | hists__set_col_len(hists, col, 0); | 50 | hists__set_col_len(hists, col, 0); |
51 | } | 51 | } |
52 | 52 | ||
53 | static void hists__set_unres_dso_col_len(struct hists *hists, int dso) | ||
54 | { | ||
55 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
56 | |||
57 | if (hists__col_len(hists, dso) < unresolved_col_width && | ||
58 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
59 | !symbol_conf.dso_list) | ||
60 | hists__set_col_len(hists, dso, unresolved_col_width); | ||
61 | } | ||
62 | |||
53 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | 63 | static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) |
54 | { | 64 | { |
65 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | ||
55 | u16 len; | 66 | u16 len; |
56 | 67 | ||
57 | if (h->ms.sym) | 68 | if (h->ms.sym) |
58 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); | 69 | hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); |
59 | else { | 70 | else |
60 | const unsigned int unresolved_col_width = BITS_PER_LONG / 4; | 71 | hists__set_unres_dso_col_len(hists, HISTC_DSO); |
61 | |||
62 | if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && | ||
63 | !symbol_conf.col_width_list_str && !symbol_conf.field_sep && | ||
64 | !symbol_conf.dso_list) | ||
65 | hists__set_col_len(hists, HISTC_DSO, | ||
66 | unresolved_col_width); | ||
67 | } | ||
68 | 72 | ||
69 | len = thread__comm_len(h->thread); | 73 | len = thread__comm_len(h->thread); |
70 | if (hists__new_col_len(hists, HISTC_COMM, len)) | 74 | if (hists__new_col_len(hists, HISTC_COMM, len)) |
@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
74 | len = dso__name_len(h->ms.map->dso); | 78 | len = dso__name_len(h->ms.map->dso); |
75 | hists__new_col_len(hists, HISTC_DSO, len); | 79 | hists__new_col_len(hists, HISTC_DSO, len); |
76 | } | 80 | } |
81 | |||
82 | if (h->branch_info) { | ||
83 | int symlen; | ||
84 | /* | ||
85 | * +4 accounts for '[x] ' priv level info | ||
86 | * +2 account of 0x prefix on raw addresses | ||
87 | */ | ||
88 | if (h->branch_info->from.sym) { | ||
89 | symlen = (int)h->branch_info->from.sym->namelen + 4; | ||
90 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
91 | |||
92 | symlen = dso__name_len(h->branch_info->from.map->dso); | ||
93 | hists__new_col_len(hists, HISTC_DSO_FROM, symlen); | ||
94 | } else { | ||
95 | symlen = unresolved_col_width + 4 + 2; | ||
96 | hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); | ||
97 | hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); | ||
98 | } | ||
99 | |||
100 | if (h->branch_info->to.sym) { | ||
101 | symlen = (int)h->branch_info->to.sym->namelen + 4; | ||
102 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
103 | |||
104 | symlen = dso__name_len(h->branch_info->to.map->dso); | ||
105 | hists__new_col_len(hists, HISTC_DSO_TO, symlen); | ||
106 | } else { | ||
107 | symlen = unresolved_col_width + 4 + 2; | ||
108 | hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); | ||
109 | hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); | ||
110 | } | ||
111 | } | ||
77 | } | 112 | } |
78 | 113 | ||
79 | static void hist_entry__add_cpumode_period(struct hist_entry *he, | 114 | static void hist_entry__add_cpumode_period(struct hist_entry *he, |
@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) | |||
195 | return 0; | 230 | return 0; |
196 | } | 231 | } |
197 | 232 | ||
198 | struct hist_entry *__hists__add_entry(struct hists *hists, | 233 | static struct hist_entry *add_hist_entry(struct hists *hists, |
234 | struct hist_entry *entry, | ||
199 | struct addr_location *al, | 235 | struct addr_location *al, |
200 | struct symbol *sym_parent, u64 period) | 236 | u64 period) |
201 | { | 237 | { |
202 | struct rb_node **p; | 238 | struct rb_node **p; |
203 | struct rb_node *parent = NULL; | 239 | struct rb_node *parent = NULL; |
204 | struct hist_entry *he; | 240 | struct hist_entry *he; |
205 | struct hist_entry entry = { | ||
206 | .thread = al->thread, | ||
207 | .ms = { | ||
208 | .map = al->map, | ||
209 | .sym = al->sym, | ||
210 | }, | ||
211 | .cpu = al->cpu, | ||
212 | .ip = al->addr, | ||
213 | .level = al->level, | ||
214 | .period = period, | ||
215 | .parent = sym_parent, | ||
216 | .filtered = symbol__parent_filter(sym_parent), | ||
217 | }; | ||
218 | int cmp; | 241 | int cmp; |
219 | 242 | ||
220 | pthread_mutex_lock(&hists->lock); | 243 | pthread_mutex_lock(&hists->lock); |
@@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
225 | parent = *p; | 248 | parent = *p; |
226 | he = rb_entry(parent, struct hist_entry, rb_node_in); | 249 | he = rb_entry(parent, struct hist_entry, rb_node_in); |
227 | 250 | ||
228 | cmp = hist_entry__cmp(&entry, he); | 251 | cmp = hist_entry__cmp(entry, he); |
229 | 252 | ||
230 | if (!cmp) { | 253 | if (!cmp) { |
231 | he->period += period; | 254 | he->period += period; |
@@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, | |||
239 | p = &(*p)->rb_right; | 262 | p = &(*p)->rb_right; |
240 | } | 263 | } |
241 | 264 | ||
242 | he = hist_entry__new(&entry); | 265 | he = hist_entry__new(entry); |
243 | if (!he) | 266 | if (!he) |
244 | goto out_unlock; | 267 | goto out_unlock; |
245 | 268 | ||
@@ -252,6 +275,51 @@ out_unlock: | |||
252 | return he; | 275 | return he; |
253 | } | 276 | } |
254 | 277 | ||
278 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
279 | struct addr_location *al, | ||
280 | struct symbol *sym_parent, | ||
281 | struct branch_info *bi, | ||
282 | u64 period) | ||
283 | { | ||
284 | struct hist_entry entry = { | ||
285 | .thread = al->thread, | ||
286 | .ms = { | ||
287 | .map = bi->to.map, | ||
288 | .sym = bi->to.sym, | ||
289 | }, | ||
290 | .cpu = al->cpu, | ||
291 | .ip = bi->to.addr, | ||
292 | .level = al->level, | ||
293 | .period = period, | ||
294 | .parent = sym_parent, | ||
295 | .filtered = symbol__parent_filter(sym_parent), | ||
296 | .branch_info = bi, | ||
297 | }; | ||
298 | |||
299 | return add_hist_entry(self, &entry, al, period); | ||
300 | } | ||
301 | |||
302 | struct hist_entry *__hists__add_entry(struct hists *self, | ||
303 | struct addr_location *al, | ||
304 | struct symbol *sym_parent, u64 period) | ||
305 | { | ||
306 | struct hist_entry entry = { | ||
307 | .thread = al->thread, | ||
308 | .ms = { | ||
309 | .map = al->map, | ||
310 | .sym = al->sym, | ||
311 | }, | ||
312 | .cpu = al->cpu, | ||
313 | .ip = al->addr, | ||
314 | .level = al->level, | ||
315 | .period = period, | ||
316 | .parent = sym_parent, | ||
317 | .filtered = symbol__parent_filter(sym_parent), | ||
318 | }; | ||
319 | |||
320 | return add_hist_entry(self, &entry, al, period); | ||
321 | } | ||
322 | |||
255 | int64_t | 323 | int64_t |
256 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | 324 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) |
257 | { | 325 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e862..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -42,6 +42,11 @@ enum hist_column { | |||
42 | HISTC_COMM, | 42 | HISTC_COMM, |
43 | HISTC_PARENT, | 43 | HISTC_PARENT, |
44 | HISTC_CPU, | 44 | HISTC_CPU, |
45 | HISTC_MISPREDICT, | ||
46 | HISTC_SYMBOL_FROM, | ||
47 | HISTC_SYMBOL_TO, | ||
48 | HISTC_DSO_FROM, | ||
49 | HISTC_DSO_TO, | ||
45 | HISTC_NR_COLS, /* Last entry */ | 50 | HISTC_NR_COLS, /* Last entry */ |
46 | }; | 51 | }; |
47 | 52 | ||
@@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, | |||
74 | struct hists *hists); | 79 | struct hists *hists); |
75 | void hist_entry__free(struct hist_entry *); | 80 | void hist_entry__free(struct hist_entry *); |
76 | 81 | ||
82 | struct hist_entry *__hists__add_branch_entry(struct hists *self, | ||
83 | struct addr_location *al, | ||
84 | struct symbol *sym_parent, | ||
85 | struct branch_info *bi, | ||
86 | u64 period); | ||
87 | |||
77 | void hists__output_resort(struct hists *self); | 88 | void hists__output_resort(struct hists *self); |
78 | void hists__output_resort_threaded(struct hists *hists); | 89 | void hists__output_resort_threaded(struct hists *hists); |
79 | void hists__collapse_resort(struct hists *self); | 90 | void hists__collapse_resort(struct hists *self); |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f833cf9c6a9..002ebbf59f48 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
24 | self->fd = STDIN_FILENO; | 24 | self->fd = STDIN_FILENO; |
25 | 25 | ||
26 | if (perf_session__read_header(self, self->fd) < 0) | 26 | if (perf_session__read_header(self, self->fd) < 0) |
27 | pr_err("incompatible file format"); | 27 | pr_err("incompatible file format (rerun with -v to learn more)"); |
28 | 28 | ||
29 | return 0; | 29 | return 0; |
30 | } | 30 | } |
@@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) | |||
56 | } | 56 | } |
57 | 57 | ||
58 | if (perf_session__read_header(self, self->fd) < 0) { | 58 | if (perf_session__read_header(self, self->fd) < 0) { |
59 | pr_err("incompatible file format"); | 59 | pr_err("incompatible file format (rerun with -v to learn more)"); |
60 | goto out_close; | 60 | goto out_close; |
61 | } | 61 | } |
62 | 62 | ||
@@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) | |||
229 | return 0; | 229 | return 0; |
230 | } | 230 | } |
231 | 231 | ||
232 | static const u8 cpumodes[] = { | ||
233 | PERF_RECORD_MISC_USER, | ||
234 | PERF_RECORD_MISC_KERNEL, | ||
235 | PERF_RECORD_MISC_GUEST_USER, | ||
236 | PERF_RECORD_MISC_GUEST_KERNEL | ||
237 | }; | ||
238 | #define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) | ||
239 | |||
240 | static void ip__resolve_ams(struct machine *self, struct thread *thread, | ||
241 | struct addr_map_symbol *ams, | ||
242 | u64 ip) | ||
243 | { | ||
244 | struct addr_location al; | ||
245 | size_t i; | ||
246 | u8 m; | ||
247 | |||
248 | memset(&al, 0, sizeof(al)); | ||
249 | |||
250 | for (i = 0; i < NCPUMODES; i++) { | ||
251 | m = cpumodes[i]; | ||
252 | /* | ||
253 | * We cannot use the header.misc hint to determine whether a | ||
254 | * branch stack address is user, kernel, guest, hypervisor. | ||
255 | * Branches may straddle the kernel/user/hypervisor boundaries. | ||
256 | * Thus, we have to try consecutively until we find a match | ||
257 | * or else, the symbol is unknown | ||
258 | */ | ||
259 | thread__find_addr_location(thread, self, m, MAP__FUNCTION, | ||
260 | ip, &al, NULL); | ||
261 | if (al.sym) | ||
262 | goto found; | ||
263 | } | ||
264 | found: | ||
265 | ams->addr = ip; | ||
266 | ams->al_addr = al.addr; | ||
267 | ams->sym = al.sym; | ||
268 | ams->map = al.map; | ||
269 | } | ||
270 | |||
271 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
272 | struct thread *thr, | ||
273 | struct branch_stack *bs) | ||
274 | { | ||
275 | struct branch_info *bi; | ||
276 | unsigned int i; | ||
277 | |||
278 | bi = calloc(bs->nr, sizeof(struct branch_info)); | ||
279 | if (!bi) | ||
280 | return NULL; | ||
281 | |||
282 | for (i = 0; i < bs->nr; i++) { | ||
283 | ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); | ||
284 | ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); | ||
285 | bi[i].flags = bs->entries[i].flags; | ||
286 | } | ||
287 | return bi; | ||
288 | } | ||
289 | |||
232 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, | 290 | int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, |
233 | struct thread *thread, | 291 | struct thread *thread, |
234 | struct ip_callchain *chain, | 292 | struct ip_callchain *chain, |
@@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) | |||
697 | i, sample->callchain->ips[i]); | 755 | i, sample->callchain->ips[i]); |
698 | } | 756 | } |
699 | 757 | ||
758 | static void branch_stack__printf(struct perf_sample *sample) | ||
759 | { | ||
760 | uint64_t i; | ||
761 | |||
762 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | ||
763 | |||
764 | for (i = 0; i < sample->branch_stack->nr; i++) | ||
765 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | ||
766 | i, sample->branch_stack->entries[i].from, | ||
767 | sample->branch_stack->entries[i].to); | ||
768 | } | ||
769 | |||
700 | static void perf_session__print_tstamp(struct perf_session *session, | 770 | static void perf_session__print_tstamp(struct perf_session *session, |
701 | union perf_event *event, | 771 | union perf_event *event, |
702 | struct perf_sample *sample) | 772 | struct perf_sample *sample) |
@@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, | |||
744 | 814 | ||
745 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) | 815 | if (session->sample_type & PERF_SAMPLE_CALLCHAIN) |
746 | callchain__printf(sample); | 816 | callchain__printf(sample); |
817 | |||
818 | if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
819 | branch_stack__printf(sample); | ||
747 | } | 820 | } |
748 | 821 | ||
749 | static struct machine * | 822 | static struct machine * |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c8d90178e7de..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel | |||
73 | struct ip_callchain *chain, | 73 | struct ip_callchain *chain, |
74 | struct symbol **parent); | 74 | struct symbol **parent); |
75 | 75 | ||
76 | struct branch_info *machine__resolve_bstack(struct machine *self, | ||
77 | struct thread *thread, | ||
78 | struct branch_stack *bs); | ||
79 | |||
76 | bool perf_session__has_traces(struct perf_session *self, const char *msg); | 80 | bool perf_session__has_traces(struct perf_session *self, const char *msg); |
77 | 81 | ||
78 | void mem_bswap_64(void *src, int byte_size); | 82 | void mem_bswap_64(void *src, int byte_size); |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 16da30d8d765..88dbcf6f9575 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; | |||
8 | const char *sort_order = default_sort_order; | 8 | const char *sort_order = default_sort_order; |
9 | int sort__need_collapse = 0; | 9 | int sort__need_collapse = 0; |
10 | int sort__has_parent = 0; | 10 | int sort__has_parent = 0; |
11 | int sort__branch_mode = -1; /* -1 = means not set */ | ||
11 | 12 | ||
12 | enum sort_type sort__first_dimension; | 13 | enum sort_type sort__first_dimension; |
13 | 14 | ||
@@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, | |||
94 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); | 95 | return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); |
95 | } | 96 | } |
96 | 97 | ||
98 | static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) | ||
99 | { | ||
100 | struct dso *dso_l = map_l ? map_l->dso : NULL; | ||
101 | struct dso *dso_r = map_r ? map_r->dso : NULL; | ||
102 | const char *dso_name_l, *dso_name_r; | ||
103 | |||
104 | if (!dso_l || !dso_r) | ||
105 | return cmp_null(dso_l, dso_r); | ||
106 | |||
107 | if (verbose) { | ||
108 | dso_name_l = dso_l->long_name; | ||
109 | dso_name_r = dso_r->long_name; | ||
110 | } else { | ||
111 | dso_name_l = dso_l->short_name; | ||
112 | dso_name_r = dso_r->short_name; | ||
113 | } | ||
114 | |||
115 | return strcmp(dso_name_l, dso_name_r); | ||
116 | } | ||
117 | |||
97 | struct sort_entry sort_comm = { | 118 | struct sort_entry sort_comm = { |
98 | .se_header = "Command", | 119 | .se_header = "Command", |
99 | .se_cmp = sort__comm_cmp, | 120 | .se_cmp = sort__comm_cmp, |
@@ -107,36 +128,74 @@ struct sort_entry sort_comm = { | |||
107 | static int64_t | 128 | static int64_t |
108 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | 129 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) |
109 | { | 130 | { |
110 | struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; | 131 | return _sort__dso_cmp(left->ms.map, right->ms.map); |
111 | struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; | 132 | } |
112 | const char *dso_name_l, *dso_name_r; | ||
113 | 133 | ||
114 | if (!dso_l || !dso_r) | ||
115 | return cmp_null(dso_l, dso_r); | ||
116 | 134 | ||
117 | if (verbose) { | 135 | static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, |
118 | dso_name_l = dso_l->long_name; | 136 | u64 ip_l, u64 ip_r) |
119 | dso_name_r = dso_r->long_name; | 137 | { |
120 | } else { | 138 | if (!sym_l || !sym_r) |
121 | dso_name_l = dso_l->short_name; | 139 | return cmp_null(sym_l, sym_r); |
122 | dso_name_r = dso_r->short_name; | 140 | |
141 | if (sym_l == sym_r) | ||
142 | return 0; | ||
143 | |||
144 | if (sym_l) | ||
145 | ip_l = sym_l->start; | ||
146 | if (sym_r) | ||
147 | ip_r = sym_r->start; | ||
148 | |||
149 | return (int64_t)(ip_r - ip_l); | ||
150 | } | ||
151 | |||
152 | static int _hist_entry__dso_snprintf(struct map *map, char *bf, | ||
153 | size_t size, unsigned int width) | ||
154 | { | ||
155 | if (map && map->dso) { | ||
156 | const char *dso_name = !verbose ? map->dso->short_name : | ||
157 | map->dso->long_name; | ||
158 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | ||
123 | } | 159 | } |
124 | 160 | ||
125 | return strcmp(dso_name_l, dso_name_r); | 161 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); |
126 | } | 162 | } |
127 | 163 | ||
128 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, | 164 | static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, |
129 | size_t size, unsigned int width) | 165 | size_t size, unsigned int width) |
130 | { | 166 | { |
131 | if (self->ms.map && self->ms.map->dso) { | 167 | return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); |
132 | const char *dso_name = !verbose ? self->ms.map->dso->short_name : | 168 | } |
133 | self->ms.map->dso->long_name; | 169 | |
134 | return repsep_snprintf(bf, size, "%-*s", width, dso_name); | 170 | static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, |
171 | u64 ip, char level, char *bf, size_t size, | ||
172 | unsigned int width __used) | ||
173 | { | ||
174 | size_t ret = 0; | ||
175 | |||
176 | if (verbose) { | ||
177 | char o = map ? dso__symtab_origin(map->dso) : '!'; | ||
178 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
179 | BITS_PER_LONG / 4, ip, o); | ||
135 | } | 180 | } |
136 | 181 | ||
137 | return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); | 182 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); |
183 | if (sym) | ||
184 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
185 | width - ret, | ||
186 | sym->name); | ||
187 | else { | ||
188 | size_t len = BITS_PER_LONG / 4; | ||
189 | ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", | ||
190 | len, ip); | ||
191 | ret += repsep_snprintf(bf + ret, size - ret, "%-*s", | ||
192 | width - ret, ""); | ||
193 | } | ||
194 | |||
195 | return ret; | ||
138 | } | 196 | } |
139 | 197 | ||
198 | |||
140 | struct sort_entry sort_dso = { | 199 | struct sort_entry sort_dso = { |
141 | .se_header = "Shared Object", | 200 | .se_header = "Shared Object", |
142 | .se_cmp = sort__dso_cmp, | 201 | .se_cmp = sort__dso_cmp, |
@@ -144,8 +203,14 @@ struct sort_entry sort_dso = { | |||
144 | .se_width_idx = HISTC_DSO, | 203 | .se_width_idx = HISTC_DSO, |
145 | }; | 204 | }; |
146 | 205 | ||
147 | /* --sort symbol */ | 206 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, |
207 | size_t size, unsigned int width __used) | ||
208 | { | ||
209 | return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, | ||
210 | self->level, bf, size, width); | ||
211 | } | ||
148 | 212 | ||
213 | /* --sort symbol */ | ||
149 | static int64_t | 214 | static int64_t |
150 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 215 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
151 | { | 216 | { |
@@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | |||
163 | ip_l = left->ms.sym->start; | 228 | ip_l = left->ms.sym->start; |
164 | ip_r = right->ms.sym->start; | 229 | ip_r = right->ms.sym->start; |
165 | 230 | ||
166 | return (int64_t)(ip_r - ip_l); | 231 | return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); |
167 | } | ||
168 | |||
169 | static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, | ||
170 | size_t size, unsigned int width __used) | ||
171 | { | ||
172 | size_t ret = 0; | ||
173 | |||
174 | if (verbose) { | ||
175 | char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; | ||
176 | ret += repsep_snprintf(bf, size, "%-#*llx %c ", | ||
177 | BITS_PER_LONG / 4, self->ip, o); | ||
178 | } | ||
179 | |||
180 | if (!sort_dso.elide) | ||
181 | ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); | ||
182 | |||
183 | if (self->ms.sym) | ||
184 | ret += repsep_snprintf(bf + ret, size - ret, "%s", | ||
185 | self->ms.sym->name); | ||
186 | else | ||
187 | ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", | ||
188 | BITS_PER_LONG / 4, self->ip); | ||
189 | |||
190 | return ret; | ||
191 | } | 232 | } |
192 | 233 | ||
193 | struct sort_entry sort_sym = { | 234 | struct sort_entry sort_sym = { |
@@ -246,19 +287,155 @@ struct sort_entry sort_cpu = { | |||
246 | .se_width_idx = HISTC_CPU, | 287 | .se_width_idx = HISTC_CPU, |
247 | }; | 288 | }; |
248 | 289 | ||
290 | static int64_t | ||
291 | sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
292 | { | ||
293 | return _sort__dso_cmp(left->branch_info->from.map, | ||
294 | right->branch_info->from.map); | ||
295 | } | ||
296 | |||
297 | static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, | ||
298 | size_t size, unsigned int width) | ||
299 | { | ||
300 | return _hist_entry__dso_snprintf(self->branch_info->from.map, | ||
301 | bf, size, width); | ||
302 | } | ||
303 | |||
304 | struct sort_entry sort_dso_from = { | ||
305 | .se_header = "Source Shared Object", | ||
306 | .se_cmp = sort__dso_from_cmp, | ||
307 | .se_snprintf = hist_entry__dso_from_snprintf, | ||
308 | .se_width_idx = HISTC_DSO_FROM, | ||
309 | }; | ||
310 | |||
311 | static int64_t | ||
312 | sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
313 | { | ||
314 | return _sort__dso_cmp(left->branch_info->to.map, | ||
315 | right->branch_info->to.map); | ||
316 | } | ||
317 | |||
318 | static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, | ||
319 | size_t size, unsigned int width) | ||
320 | { | ||
321 | return _hist_entry__dso_snprintf(self->branch_info->to.map, | ||
322 | bf, size, width); | ||
323 | } | ||
324 | |||
325 | static int64_t | ||
326 | sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) | ||
327 | { | ||
328 | struct addr_map_symbol *from_l = &left->branch_info->from; | ||
329 | struct addr_map_symbol *from_r = &right->branch_info->from; | ||
330 | |||
331 | if (!from_l->sym && !from_r->sym) | ||
332 | return right->level - left->level; | ||
333 | |||
334 | return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, | ||
335 | from_r->addr); | ||
336 | } | ||
337 | |||
338 | static int64_t | ||
339 | sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) | ||
340 | { | ||
341 | struct addr_map_symbol *to_l = &left->branch_info->to; | ||
342 | struct addr_map_symbol *to_r = &right->branch_info->to; | ||
343 | |||
344 | if (!to_l->sym && !to_r->sym) | ||
345 | return right->level - left->level; | ||
346 | |||
347 | return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); | ||
348 | } | ||
349 | |||
350 | static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, | ||
351 | size_t size, unsigned int width __used) | ||
352 | { | ||
353 | struct addr_map_symbol *from = &self->branch_info->from; | ||
354 | return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, | ||
355 | self->level, bf, size, width); | ||
356 | |||
357 | } | ||
358 | |||
359 | static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, | ||
360 | size_t size, unsigned int width __used) | ||
361 | { | ||
362 | struct addr_map_symbol *to = &self->branch_info->to; | ||
363 | return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, | ||
364 | self->level, bf, size, width); | ||
365 | |||
366 | } | ||
367 | |||
368 | struct sort_entry sort_dso_to = { | ||
369 | .se_header = "Target Shared Object", | ||
370 | .se_cmp = sort__dso_to_cmp, | ||
371 | .se_snprintf = hist_entry__dso_to_snprintf, | ||
372 | .se_width_idx = HISTC_DSO_TO, | ||
373 | }; | ||
374 | |||
375 | struct sort_entry sort_sym_from = { | ||
376 | .se_header = "Source Symbol", | ||
377 | .se_cmp = sort__sym_from_cmp, | ||
378 | .se_snprintf = hist_entry__sym_from_snprintf, | ||
379 | .se_width_idx = HISTC_SYMBOL_FROM, | ||
380 | }; | ||
381 | |||
382 | struct sort_entry sort_sym_to = { | ||
383 | .se_header = "Target Symbol", | ||
384 | .se_cmp = sort__sym_to_cmp, | ||
385 | .se_snprintf = hist_entry__sym_to_snprintf, | ||
386 | .se_width_idx = HISTC_SYMBOL_TO, | ||
387 | }; | ||
388 | |||
389 | static int64_t | ||
390 | sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) | ||
391 | { | ||
392 | const unsigned char mp = left->branch_info->flags.mispred != | ||
393 | right->branch_info->flags.mispred; | ||
394 | const unsigned char p = left->branch_info->flags.predicted != | ||
395 | right->branch_info->flags.predicted; | ||
396 | |||
397 | return mp || p; | ||
398 | } | ||
399 | |||
400 | static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, | ||
401 | size_t size, unsigned int width){ | ||
402 | static const char *out = "N/A"; | ||
403 | |||
404 | if (self->branch_info->flags.predicted) | ||
405 | out = "N"; | ||
406 | else if (self->branch_info->flags.mispred) | ||
407 | out = "Y"; | ||
408 | |||
409 | return repsep_snprintf(bf, size, "%-*s", width, out); | ||
410 | } | ||
411 | |||
412 | struct sort_entry sort_mispredict = { | ||
413 | .se_header = "Branch Mispredicted", | ||
414 | .se_cmp = sort__mispredict_cmp, | ||
415 | .se_snprintf = hist_entry__mispredict_snprintf, | ||
416 | .se_width_idx = HISTC_MISPREDICT, | ||
417 | }; | ||
418 | |||
249 | struct sort_dimension { | 419 | struct sort_dimension { |
250 | const char *name; | 420 | const char *name; |
251 | struct sort_entry *entry; | 421 | struct sort_entry *entry; |
252 | int taken; | 422 | int taken; |
253 | }; | 423 | }; |
254 | 424 | ||
425 | #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } | ||
426 | |||
255 | static struct sort_dimension sort_dimensions[] = { | 427 | static struct sort_dimension sort_dimensions[] = { |
256 | { .name = "pid", .entry = &sort_thread, }, | 428 | DIM(SORT_PID, "pid", sort_thread), |
257 | { .name = "comm", .entry = &sort_comm, }, | 429 | DIM(SORT_COMM, "comm", sort_comm), |
258 | { .name = "dso", .entry = &sort_dso, }, | 430 | DIM(SORT_DSO, "dso", sort_dso), |
259 | { .name = "symbol", .entry = &sort_sym, }, | 431 | DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), |
260 | { .name = "parent", .entry = &sort_parent, }, | 432 | DIM(SORT_DSO_TO, "dso_to", sort_dso_to), |
261 | { .name = "cpu", .entry = &sort_cpu, }, | 433 | DIM(SORT_SYM, "symbol", sort_sym), |
434 | DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), | ||
435 | DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), | ||
436 | DIM(SORT_PARENT, "parent", sort_parent), | ||
437 | DIM(SORT_CPU, "cpu", sort_cpu), | ||
438 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | ||
262 | }; | 439 | }; |
263 | 440 | ||
264 | int sort_dimension__add(const char *tok) | 441 | int sort_dimension__add(const char *tok) |
@@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok) | |||
270 | 447 | ||
271 | if (strncasecmp(tok, sd->name, strlen(tok))) | 448 | if (strncasecmp(tok, sd->name, strlen(tok))) |
272 | continue; | 449 | continue; |
273 | |||
274 | if (sd->entry == &sort_parent) { | 450 | if (sd->entry == &sort_parent) { |
275 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); | 451 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); |
276 | if (ret) { | 452 | if (ret) { |
@@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok) | |||
302 | sort__first_dimension = SORT_PARENT; | 478 | sort__first_dimension = SORT_PARENT; |
303 | else if (!strcmp(sd->name, "cpu")) | 479 | else if (!strcmp(sd->name, "cpu")) |
304 | sort__first_dimension = SORT_CPU; | 480 | sort__first_dimension = SORT_CPU; |
481 | else if (!strcmp(sd->name, "symbol_from")) | ||
482 | sort__first_dimension = SORT_SYM_FROM; | ||
483 | else if (!strcmp(sd->name, "symbol_to")) | ||
484 | sort__first_dimension = SORT_SYM_TO; | ||
485 | else if (!strcmp(sd->name, "dso_from")) | ||
486 | sort__first_dimension = SORT_DSO_FROM; | ||
487 | else if (!strcmp(sd->name, "dso_to")) | ||
488 | sort__first_dimension = SORT_DSO_TO; | ||
489 | else if (!strcmp(sd->name, "mispredict")) | ||
490 | sort__first_dimension = SORT_MISPREDICT; | ||
305 | } | 491 | } |
306 | 492 | ||
307 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | 493 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); |
@@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok) | |||
309 | 495 | ||
310 | return 0; | 496 | return 0; |
311 | } | 497 | } |
312 | |||
313 | return -ESRCH; | 498 | return -ESRCH; |
314 | } | 499 | } |
315 | 500 | ||
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..472aa5a63a58 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -31,11 +31,16 @@ extern const char *parent_pattern; | |||
31 | extern const char default_sort_order[]; | 31 | extern const char default_sort_order[]; |
32 | extern int sort__need_collapse; | 32 | extern int sort__need_collapse; |
33 | extern int sort__has_parent; | 33 | extern int sort__has_parent; |
34 | extern int sort__branch_mode; | ||
34 | extern char *field_sep; | 35 | extern char *field_sep; |
35 | extern struct sort_entry sort_comm; | 36 | extern struct sort_entry sort_comm; |
36 | extern struct sort_entry sort_dso; | 37 | extern struct sort_entry sort_dso; |
37 | extern struct sort_entry sort_sym; | 38 | extern struct sort_entry sort_sym; |
38 | extern struct sort_entry sort_parent; | 39 | extern struct sort_entry sort_parent; |
40 | extern struct sort_entry sort_dso_from; | ||
41 | extern struct sort_entry sort_dso_to; | ||
42 | extern struct sort_entry sort_sym_from; | ||
43 | extern struct sort_entry sort_sym_to; | ||
39 | extern enum sort_type sort__first_dimension; | 44 | extern enum sort_type sort__first_dimension; |
40 | 45 | ||
41 | /** | 46 | /** |
@@ -72,6 +77,7 @@ struct hist_entry { | |||
72 | struct hist_entry *pair; | 77 | struct hist_entry *pair; |
73 | struct rb_root sorted_chain; | 78 | struct rb_root sorted_chain; |
74 | }; | 79 | }; |
80 | struct branch_info *branch_info; | ||
75 | struct callchain_root callchain[0]; | 81 | struct callchain_root callchain[0]; |
76 | }; | 82 | }; |
77 | 83 | ||
@@ -82,6 +88,11 @@ enum sort_type { | |||
82 | SORT_SYM, | 88 | SORT_SYM, |
83 | SORT_PARENT, | 89 | SORT_PARENT, |
84 | SORT_CPU, | 90 | SORT_CPU, |
91 | SORT_DSO_FROM, | ||
92 | SORT_DSO_TO, | ||
93 | SORT_SYM_FROM, | ||
94 | SORT_SYM_TO, | ||
95 | SORT_MISPREDICT, | ||
85 | }; | 96 | }; |
86 | 97 | ||
87 | /* | 98 | /* |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a683d4fc918..ac49ef208a5f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
6 | #include <stdint.h> | 6 | #include <stdint.h> |
7 | #include "map.h" | 7 | #include "map.h" |
8 | #include "../perf.h" | ||
8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
9 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
10 | #include <stdio.h> | 11 | #include <stdio.h> |
@@ -96,7 +97,11 @@ struct symbol_conf { | |||
96 | *col_width_list_str; | 97 | *col_width_list_str; |
97 | struct strlist *dso_list, | 98 | struct strlist *dso_list, |
98 | *comm_list, | 99 | *comm_list, |
99 | *sym_list; | 100 | *sym_list, |
101 | *dso_from_list, | ||
102 | *dso_to_list, | ||
103 | *sym_from_list, | ||
104 | *sym_to_list; | ||
100 | const char *symfs; | 105 | const char *symfs; |
101 | }; | 106 | }; |
102 | 107 | ||
@@ -120,6 +125,19 @@ struct map_symbol { | |||
120 | bool has_children; | 125 | bool has_children; |
121 | }; | 126 | }; |
122 | 127 | ||
128 | struct addr_map_symbol { | ||
129 | struct map *map; | ||
130 | struct symbol *sym; | ||
131 | u64 addr; | ||
132 | u64 al_addr; | ||
133 | }; | ||
134 | |||
135 | struct branch_info { | ||
136 | struct addr_map_symbol from; | ||
137 | struct addr_map_symbol to; | ||
138 | struct branch_flags flags; | ||
139 | }; | ||
140 | |||
123 | struct addr_location { | 141 | struct addr_location { |
124 | struct thread *thread; | 142 | struct thread *thread; |
125 | struct map *map; | 143 | struct map *map; |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bfba0490c098..de8ece8bcce3 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
@@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) | |||
805 | self->hists = hists; | 805 | self->hists = hists; |
806 | self->b.refresh = hist_browser__refresh; | 806 | self->b.refresh = hist_browser__refresh; |
807 | self->b.seek = ui_browser__hists_seek; | 807 | self->b.seek = ui_browser__hists_seek; |
808 | self->b.use_navkeypressed = true, | 808 | self->b.use_navkeypressed = true; |
809 | self->has_symbols = sort_sym.list.next != NULL; | 809 | if (sort__branch_mode == 1) |
810 | self->has_symbols = sort_sym_from.list.next != NULL; | ||
811 | else | ||
812 | self->has_symbols = sort_sym.list.next != NULL; | ||
810 | } | 813 | } |
811 | 814 | ||
812 | return self; | 815 | return self; |
@@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
853 | return printed; | 856 | return printed; |
854 | } | 857 | } |
855 | 858 | ||
859 | static inline void free_popup_options(char **options, int n) | ||
860 | { | ||
861 | int i; | ||
862 | |||
863 | for (i = 0; i < n; ++i) { | ||
864 | free(options[i]); | ||
865 | options[i] = NULL; | ||
866 | } | ||
867 | } | ||
868 | |||
856 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | 869 | static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, |
857 | const char *helpline, const char *ev_name, | 870 | const char *helpline, const char *ev_name, |
858 | bool left_exits, | 871 | bool left_exits, |
@@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
861 | { | 874 | { |
862 | struct hists *self = &evsel->hists; | 875 | struct hists *self = &evsel->hists; |
863 | struct hist_browser *browser = hist_browser__new(self); | 876 | struct hist_browser *browser = hist_browser__new(self); |
877 | struct branch_info *bi; | ||
864 | struct pstack *fstack; | 878 | struct pstack *fstack; |
879 | char *options[16]; | ||
880 | int nr_options = 0; | ||
865 | int key = -1; | 881 | int key = -1; |
866 | 882 | ||
867 | if (browser == NULL) | 883 | if (browser == NULL) |
@@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
873 | 889 | ||
874 | ui_helpline__push(helpline); | 890 | ui_helpline__push(helpline); |
875 | 891 | ||
892 | memset(options, 0, sizeof(options)); | ||
893 | |||
876 | while (1) { | 894 | while (1) { |
877 | const struct thread *thread = NULL; | 895 | const struct thread *thread = NULL; |
878 | const struct dso *dso = NULL; | 896 | const struct dso *dso = NULL; |
879 | char *options[16]; | 897 | int choice = 0, |
880 | int nr_options = 0, choice = 0, i, | ||
881 | annotate = -2, zoom_dso = -2, zoom_thread = -2, | 898 | annotate = -2, zoom_dso = -2, zoom_thread = -2, |
882 | browse_map = -2; | 899 | annotate_f = -2, annotate_t = -2, browse_map = -2; |
900 | |||
901 | nr_options = 0; | ||
883 | 902 | ||
884 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); | 903 | key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); |
885 | 904 | ||
@@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
887 | thread = hist_browser__selected_thread(browser); | 906 | thread = hist_browser__selected_thread(browser); |
888 | dso = browser->selection->map ? browser->selection->map->dso : NULL; | 907 | dso = browser->selection->map ? browser->selection->map->dso : NULL; |
889 | } | 908 | } |
890 | |||
891 | switch (key) { | 909 | switch (key) { |
892 | case K_TAB: | 910 | case K_TAB: |
893 | case K_UNTAB: | 911 | case K_UNTAB: |
@@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
902 | if (!browser->has_symbols) { | 920 | if (!browser->has_symbols) { |
903 | ui_browser__warning(&browser->b, delay_secs * 2, | 921 | ui_browser__warning(&browser->b, delay_secs * 2, |
904 | "Annotation is only available for symbolic views, " | 922 | "Annotation is only available for symbolic views, " |
905 | "include \"sym\" in --sort to use it."); | 923 | "include \"sym*\" in --sort to use it."); |
906 | continue; | 924 | continue; |
907 | } | 925 | } |
908 | 926 | ||
@@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
972 | if (!browser->has_symbols) | 990 | if (!browser->has_symbols) |
973 | goto add_exit_option; | 991 | goto add_exit_option; |
974 | 992 | ||
975 | if (browser->selection != NULL && | 993 | if (sort__branch_mode == 1) { |
976 | browser->selection->sym != NULL && | 994 | bi = browser->he_selection->branch_info; |
977 | !browser->selection->map->dso->annotate_warned && | 995 | if (browser->selection != NULL && |
978 | asprintf(&options[nr_options], "Annotate %s", | 996 | bi && |
979 | browser->selection->sym->name) > 0) | 997 | bi->from.sym != NULL && |
980 | annotate = nr_options++; | 998 | !bi->from.map->dso->annotate_warned && |
999 | asprintf(&options[nr_options], "Annotate %s", | ||
1000 | bi->from.sym->name) > 0) | ||
1001 | annotate_f = nr_options++; | ||
1002 | |||
1003 | if (browser->selection != NULL && | ||
1004 | bi && | ||
1005 | bi->to.sym != NULL && | ||
1006 | !bi->to.map->dso->annotate_warned && | ||
1007 | (bi->to.sym != bi->from.sym || | ||
1008 | bi->to.map->dso != bi->from.map->dso) && | ||
1009 | asprintf(&options[nr_options], "Annotate %s", | ||
1010 | bi->to.sym->name) > 0) | ||
1011 | annotate_t = nr_options++; | ||
1012 | } else { | ||
1013 | |||
1014 | if (browser->selection != NULL && | ||
1015 | browser->selection->sym != NULL && | ||
1016 | !browser->selection->map->dso->annotate_warned && | ||
1017 | asprintf(&options[nr_options], "Annotate %s", | ||
1018 | browser->selection->sym->name) > 0) | ||
1019 | annotate = nr_options++; | ||
1020 | } | ||
981 | 1021 | ||
982 | if (thread != NULL && | 1022 | if (thread != NULL && |
983 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", | 1023 | asprintf(&options[nr_options], "Zoom %s %s(%d) thread", |
@@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
998 | browse_map = nr_options++; | 1038 | browse_map = nr_options++; |
999 | add_exit_option: | 1039 | add_exit_option: |
1000 | options[nr_options++] = (char *)"Exit"; | 1040 | options[nr_options++] = (char *)"Exit"; |
1001 | 1041 | retry_popup_menu: | |
1002 | choice = ui__popup_menu(nr_options, options); | 1042 | choice = ui__popup_menu(nr_options, options); |
1003 | 1043 | ||
1004 | for (i = 0; i < nr_options - 1; ++i) | ||
1005 | free(options[i]); | ||
1006 | |||
1007 | if (choice == nr_options - 1) | 1044 | if (choice == nr_options - 1) |
1008 | break; | 1045 | break; |
1009 | 1046 | ||
1010 | if (choice == -1) | 1047 | if (choice == -1) { |
1048 | free_popup_options(options, nr_options - 1); | ||
1011 | continue; | 1049 | continue; |
1050 | } | ||
1012 | 1051 | ||
1013 | if (choice == annotate) { | 1052 | if (choice == annotate || choice == annotate_t || choice == annotate_f) { |
1014 | struct hist_entry *he; | 1053 | struct hist_entry *he; |
1015 | int err; | 1054 | int err; |
1016 | do_annotate: | 1055 | do_annotate: |
1017 | he = hist_browser__selected_entry(browser); | 1056 | he = hist_browser__selected_entry(browser); |
1018 | if (he == NULL) | 1057 | if (he == NULL) |
1019 | continue; | 1058 | continue; |
1059 | |||
1060 | /* | ||
1061 | * we stash the branch_info symbol + map into the | ||
1062 | * the ms so we don't have to rewrite all the annotation | ||
1063 | * code to use branch_info. | ||
1064 | * in branch mode, the ms struct is not used | ||
1065 | */ | ||
1066 | if (choice == annotate_f) { | ||
1067 | he->ms.sym = he->branch_info->from.sym; | ||
1068 | he->ms.map = he->branch_info->from.map; | ||
1069 | } else if (choice == annotate_t) { | ||
1070 | he->ms.sym = he->branch_info->to.sym; | ||
1071 | he->ms.map = he->branch_info->to.map; | ||
1072 | } | ||
1073 | |||
1020 | /* | 1074 | /* |
1021 | * Don't let this be freed, say, by hists__decay_entry. | 1075 | * Don't let this be freed, say, by hists__decay_entry. |
1022 | */ | 1076 | */ |
@@ -1024,9 +1078,18 @@ do_annotate: | |||
1024 | err = hist_entry__tui_annotate(he, evsel->idx, | 1078 | err = hist_entry__tui_annotate(he, evsel->idx, |
1025 | timer, arg, delay_secs); | 1079 | timer, arg, delay_secs); |
1026 | he->used = false; | 1080 | he->used = false; |
1081 | /* | ||
1082 | * offer option to annotate the other branch source or target | ||
1083 | * (if they exists) when returning from annotate | ||
1084 | */ | ||
1085 | if ((err == 'q' || err == CTRL('c')) | ||
1086 | && annotate_t != -2 && annotate_f != -2) | ||
1087 | goto retry_popup_menu; | ||
1088 | |||
1027 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); | 1089 | ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); |
1028 | if (err) | 1090 | if (err) |
1029 | ui_browser__handle_resize(&browser->b); | 1091 | ui_browser__handle_resize(&browser->b); |
1092 | |||
1030 | } else if (choice == browse_map) | 1093 | } else if (choice == browse_map) |
1031 | map__browse(browser->selection->map); | 1094 | map__browse(browser->selection->map); |
1032 | else if (choice == zoom_dso) { | 1095 | else if (choice == zoom_dso) { |
@@ -1072,6 +1135,7 @@ out_free_stack: | |||
1072 | pstack__delete(fstack); | 1135 | pstack__delete(fstack); |
1073 | out: | 1136 | out: |
1074 | hist_browser__delete(browser); | 1137 | hist_browser__delete(browser); |
1138 | free_popup_options(options, nr_options - 1); | ||
1075 | return key; | 1139 | return key; |
1076 | } | 1140 | } |
1077 | 1141 | ||