diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-21 14:12:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-21 14:12:48 -0400 |
commit | bbcdea658f42070d25e7764f1b81785a51cb1642 (patch) | |
tree | ea5c21c180560918e94aa83a939c9d2507812ff5 | |
parent | 8b810a3a35eec9cd5e9245266dd0ae10de063094 (diff) | |
parent | df6c3db8d30fb1699ccbc403196b86324f4257af (diff) |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"Two hw-enablement patches, two race fixes, three fixes for regressions
of semantics, plus a number of tooling fixes"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Add proper condition to run sched_task callbacks
perf/core: Fix locking for children siblings group read
perf/core: Fix scheduling regression of pinned groups
perf/x86/intel: Fix debug_store reset field for freq events
perf/x86/intel: Add Goldmont Plus CPU PMU support
perf/x86/intel: Enable C-state residency events for Apollo Lake
perf symbols: Accept zero as the kernel base address
Revert "perf/core: Drop kernel samples even though :u is specified"
perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target
perf evsel: State in the default event name if attr.exclude_kernel is set
perf evsel: Fix attr.exclude_kernel setting for default cycles:p
-rw-r--r-- | arch/x86/events/intel/core.c | 164 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 26 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 22 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 4 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 33 | ||||
-rw-r--r-- | tools/perf/ui/browser.c | 2 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 8 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 2 |
9 files changed, 221 insertions, 42 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aa62437d1aa1..98b0f0729527 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs | |||
1708 | }, | 1708 | }, |
1709 | }; | 1709 | }; |
1710 | 1710 | ||
1711 | static __initconst const u64 glp_hw_cache_event_ids | ||
1712 | [PERF_COUNT_HW_CACHE_MAX] | ||
1713 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1714 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1715 | [C(L1D)] = { | ||
1716 | [C(OP_READ)] = { | ||
1717 | [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
1718 | [C(RESULT_MISS)] = 0x0, | ||
1719 | }, | ||
1720 | [C(OP_WRITE)] = { | ||
1721 | [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
1722 | [C(RESULT_MISS)] = 0x0, | ||
1723 | }, | ||
1724 | [C(OP_PREFETCH)] = { | ||
1725 | [C(RESULT_ACCESS)] = 0x0, | ||
1726 | [C(RESULT_MISS)] = 0x0, | ||
1727 | }, | ||
1728 | }, | ||
1729 | [C(L1I)] = { | ||
1730 | [C(OP_READ)] = { | ||
1731 | [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ | ||
1732 | [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ | ||
1733 | }, | ||
1734 | [C(OP_WRITE)] = { | ||
1735 | [C(RESULT_ACCESS)] = -1, | ||
1736 | [C(RESULT_MISS)] = -1, | ||
1737 | }, | ||
1738 | [C(OP_PREFETCH)] = { | ||
1739 | [C(RESULT_ACCESS)] = 0x0, | ||
1740 | [C(RESULT_MISS)] = 0x0, | ||
1741 | }, | ||
1742 | }, | ||
1743 | [C(LL)] = { | ||
1744 | [C(OP_READ)] = { | ||
1745 | [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1746 | [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1747 | }, | ||
1748 | [C(OP_WRITE)] = { | ||
1749 | [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1750 | [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1751 | }, | ||
1752 | [C(OP_PREFETCH)] = { | ||
1753 | [C(RESULT_ACCESS)] = 0x0, | ||
1754 | [C(RESULT_MISS)] = 0x0, | ||
1755 | }, | ||
1756 | }, | ||
1757 | [C(DTLB)] = { | ||
1758 | [C(OP_READ)] = { | ||
1759 | [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
1760 | [C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ | ||
1761 | }, | ||
1762 | [C(OP_WRITE)] = { | ||
1763 | [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
1764 | [C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ | ||
1765 | }, | ||
1766 | [C(OP_PREFETCH)] = { | ||
1767 | [C(RESULT_ACCESS)] = 0x0, | ||
1768 | [C(RESULT_MISS)] = 0x0, | ||
1769 | }, | ||
1770 | }, | ||
1771 | [C(ITLB)] = { | ||
1772 | [C(OP_READ)] = { | ||
1773 | [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
1774 | [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ | ||
1775 | }, | ||
1776 | [C(OP_WRITE)] = { | ||
1777 | [C(RESULT_ACCESS)] = -1, | ||
1778 | [C(RESULT_MISS)] = -1, | ||
1779 | }, | ||
1780 | [C(OP_PREFETCH)] = { | ||
1781 | [C(RESULT_ACCESS)] = -1, | ||
1782 | [C(RESULT_MISS)] = -1, | ||
1783 | }, | ||
1784 | }, | ||
1785 | [C(BPU)] = { | ||
1786 | [C(OP_READ)] = { | ||
1787 | [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
1788 | [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
1789 | }, | ||
1790 | [C(OP_WRITE)] = { | ||
1791 | [C(RESULT_ACCESS)] = -1, | ||
1792 | [C(RESULT_MISS)] = -1, | ||
1793 | }, | ||
1794 | [C(OP_PREFETCH)] = { | ||
1795 | [C(RESULT_ACCESS)] = -1, | ||
1796 | [C(RESULT_MISS)] = -1, | ||
1797 | }, | ||
1798 | }, | ||
1799 | }; | ||
1800 | |||
1801 | static __initconst const u64 glp_hw_cache_extra_regs | ||
1802 | [PERF_COUNT_HW_CACHE_MAX] | ||
1803 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1804 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1805 | [C(LL)] = { | ||
1806 | [C(OP_READ)] = { | ||
1807 | [C(RESULT_ACCESS)] = GLM_DEMAND_READ| | ||
1808 | GLM_LLC_ACCESS, | ||
1809 | [C(RESULT_MISS)] = GLM_DEMAND_READ| | ||
1810 | GLM_LLC_MISS, | ||
1811 | }, | ||
1812 | [C(OP_WRITE)] = { | ||
1813 | [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| | ||
1814 | GLM_LLC_ACCESS, | ||
1815 | [C(RESULT_MISS)] = GLM_DEMAND_WRITE| | ||
1816 | GLM_LLC_MISS, | ||
1817 | }, | ||
1818 | [C(OP_PREFETCH)] = { | ||
1819 | [C(RESULT_ACCESS)] = 0x0, | ||
1820 | [C(RESULT_MISS)] = 0x0, | ||
1821 | }, | ||
1822 | }, | ||
1823 | }; | ||
1824 | |||
1711 | #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ | 1825 | #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ |
1712 | #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ | 1826 | #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ |
1713 | #define KNL_MCDRAM_LOCAL BIT_ULL(21) | 1827 | #define KNL_MCDRAM_LOCAL BIT_ULL(21) |
@@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event) | |||
3016 | return 0; | 3130 | return 0; |
3017 | } | 3131 | } |
3018 | 3132 | ||
3133 | static struct event_constraint counter0_constraint = | ||
3134 | INTEL_ALL_EVENT_CONSTRAINT(0, 0x1); | ||
3135 | |||
3019 | static struct event_constraint counter2_constraint = | 3136 | static struct event_constraint counter2_constraint = |
3020 | EVENT_CONSTRAINT(0, 0x4, 0); | 3137 | EVENT_CONSTRAINT(0, 0x4, 0); |
3021 | 3138 | ||
@@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | |||
3037 | return c; | 3154 | return c; |
3038 | } | 3155 | } |
3039 | 3156 | ||
3157 | static struct event_constraint * | ||
3158 | glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, | ||
3159 | struct perf_event *event) | ||
3160 | { | ||
3161 | struct event_constraint *c; | ||
3162 | |||
3163 | /* :ppp means to do reduced skid PEBS which is PMC0 only. */ | ||
3164 | if (event->attr.precise_ip == 3) | ||
3165 | return &counter0_constraint; | ||
3166 | |||
3167 | c = intel_get_event_constraints(cpuc, idx, event); | ||
3168 | |||
3169 | return c; | ||
3170 | } | ||
3171 | |||
3040 | /* | 3172 | /* |
3041 | * Broadwell: | 3173 | * Broadwell: |
3042 | * | 3174 | * |
@@ -3265,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu) | |||
3265 | static void intel_pmu_sched_task(struct perf_event_context *ctx, | 3397 | static void intel_pmu_sched_task(struct perf_event_context *ctx, |
3266 | bool sched_in) | 3398 | bool sched_in) |
3267 | { | 3399 | { |
3268 | if (x86_pmu.pebs_active) | 3400 | intel_pmu_pebs_sched_task(ctx, sched_in); |
3269 | intel_pmu_pebs_sched_task(ctx, sched_in); | 3401 | intel_pmu_lbr_sched_task(ctx, sched_in); |
3270 | if (x86_pmu.lbr_nr) | ||
3271 | intel_pmu_lbr_sched_task(ctx, sched_in); | ||
3272 | } | 3402 | } |
3273 | 3403 | ||
3274 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); | 3404 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); |
@@ -3838,6 +3968,32 @@ __init int intel_pmu_init(void) | |||
3838 | pr_cont("Goldmont events, "); | 3968 | pr_cont("Goldmont events, "); |
3839 | break; | 3969 | break; |
3840 | 3970 | ||
3971 | case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3972 | memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, | ||
3973 | sizeof(hw_cache_event_ids)); | ||
3974 | memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, | ||
3975 | sizeof(hw_cache_extra_regs)); | ||
3976 | |||
3977 | intel_pmu_lbr_init_skl(); | ||
3978 | |||
3979 | x86_pmu.event_constraints = intel_slm_event_constraints; | ||
3980 | x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints; | ||
3981 | x86_pmu.extra_regs = intel_glm_extra_regs; | ||
3982 | /* | ||
3983 | * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS | ||
3984 | * for precise cycles. | ||
3985 | */ | ||
3986 | x86_pmu.pebs_aliases = NULL; | ||
3987 | x86_pmu.pebs_prec_dist = true; | ||
3988 | x86_pmu.lbr_pt_coexist = true; | ||
3989 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; | ||
3990 | x86_pmu.get_event_constraints = glp_get_event_constraints; | ||
3991 | x86_pmu.cpu_events = glm_events_attrs; | ||
3992 | /* Goldmont Plus has 4-wide pipeline */ | ||
3993 | event_attr_td_total_slots_scale_glm.event_str = "4"; | ||
3994 | pr_cont("Goldmont plus events, "); | ||
3995 | break; | ||
3996 | |||
3841 | case INTEL_FAM6_WESTMERE: | 3997 | case INTEL_FAM6_WESTMERE: |
3842 | case INTEL_FAM6_WESTMERE_EP: | 3998 | case INTEL_FAM6_WESTMERE_EP: |
3843 | case INTEL_FAM6_WESTMERE_EX: | 3999 | case INTEL_FAM6_WESTMERE_EX: |
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 238ae3248ba5..4cf100ff2a37 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c | |||
@@ -40,16 +40,16 @@ | |||
40 | * Model specific counters: | 40 | * Model specific counters: |
41 | * MSR_CORE_C1_RES: CORE C1 Residency Counter | 41 | * MSR_CORE_C1_RES: CORE C1 Residency Counter |
42 | * perf code: 0x00 | 42 | * perf code: 0x00 |
43 | * Available model: SLM,AMT | 43 | * Available model: SLM,AMT,GLM |
44 | * Scope: Core (each processor core has a MSR) | 44 | * Scope: Core (each processor core has a MSR) |
45 | * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter | 45 | * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter |
46 | * perf code: 0x01 | 46 | * perf code: 0x01 |
47 | * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL | 47 | * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM |
48 | * Scope: Core | 48 | * Scope: Core |
49 | * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter | 49 | * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter |
50 | * perf code: 0x02 | 50 | * perf code: 0x02 |
51 | * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW | 51 | * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW |
52 | * SKL,KNL | 52 | * SKL,KNL,GLM |
53 | * Scope: Core | 53 | * Scope: Core |
54 | * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter | 54 | * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter |
55 | * perf code: 0x03 | 55 | * perf code: 0x03 |
@@ -57,16 +57,17 @@ | |||
57 | * Scope: Core | 57 | * Scope: Core |
58 | * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. | 58 | * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. |
59 | * perf code: 0x00 | 59 | * perf code: 0x00 |
60 | * Available model: SNB,IVB,HSW,BDW,SKL,KNL | 60 | * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM |
61 | * Scope: Package (physical package) | 61 | * Scope: Package (physical package) |
62 | * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. | 62 | * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. |
63 | * perf code: 0x01 | 63 | * perf code: 0x01 |
64 | * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL | 64 | * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL |
65 | * GLM | ||
65 | * Scope: Package (physical package) | 66 | * Scope: Package (physical package) |
66 | * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. | 67 | * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. |
67 | * perf code: 0x02 | 68 | * perf code: 0x02 |
68 | * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW | 69 | * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW |
69 | * SKL,KNL | 70 | * SKL,KNL,GLM |
70 | * Scope: Package (physical package) | 71 | * Scope: Package (physical package) |
71 | * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. | 72 | * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. |
72 | * perf code: 0x03 | 73 | * perf code: 0x03 |
@@ -82,7 +83,7 @@ | |||
82 | * Scope: Package (physical package) | 83 | * Scope: Package (physical package) |
83 | * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. | 84 | * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. |
84 | * perf code: 0x06 | 85 | * perf code: 0x06 |
85 | * Available model: HSW ULT only | 86 | * Available model: HSW ULT, GLM |
86 | * Scope: Package (physical package) | 87 | * Scope: Package (physical package) |
87 | * | 88 | * |
88 | */ | 89 | */ |
@@ -504,6 +505,17 @@ static const struct cstate_model knl_cstates __initconst = { | |||
504 | }; | 505 | }; |
505 | 506 | ||
506 | 507 | ||
508 | static const struct cstate_model glm_cstates __initconst = { | ||
509 | .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | | ||
510 | BIT(PERF_CSTATE_CORE_C3_RES) | | ||
511 | BIT(PERF_CSTATE_CORE_C6_RES), | ||
512 | |||
513 | .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | | ||
514 | BIT(PERF_CSTATE_PKG_C3_RES) | | ||
515 | BIT(PERF_CSTATE_PKG_C6_RES) | | ||
516 | BIT(PERF_CSTATE_PKG_C10_RES), | ||
517 | }; | ||
518 | |||
507 | 519 | ||
508 | #define X86_CSTATES_MODEL(model, states) \ | 520 | #define X86_CSTATES_MODEL(model, states) \ |
509 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } | 521 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } |
@@ -546,6 +558,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { | |||
546 | 558 | ||
547 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), | 559 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), |
548 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), | 560 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), |
561 | |||
562 | X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), | ||
549 | { }, | 563 | { }, |
550 | }; | 564 | }; |
551 | MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); | 565 | MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); |
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c6d23ffe422d..a322fed5f8ed 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void) | |||
606 | x86_pmu.drain_pebs(®s); | 606 | x86_pmu.drain_pebs(®s); |
607 | } | 607 | } |
608 | 608 | ||
609 | void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) | ||
610 | { | ||
611 | if (!sched_in) | ||
612 | intel_pmu_drain_pebs_buffer(); | ||
613 | } | ||
614 | |||
615 | /* | 609 | /* |
616 | * PEBS | 610 | * PEBS |
617 | */ | 611 | */ |
@@ -651,6 +645,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { | |||
651 | EVENT_CONSTRAINT_END | 645 | EVENT_CONSTRAINT_END |
652 | }; | 646 | }; |
653 | 647 | ||
648 | struct event_constraint intel_glp_pebs_event_constraints[] = { | ||
649 | /* Allow all events as PEBS with no flags */ | ||
650 | INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), | ||
651 | EVENT_CONSTRAINT_END | ||
652 | }; | ||
653 | |||
654 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 654 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
655 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ | 655 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
656 | INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 656 | INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
@@ -816,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc) | |||
816 | return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); | 816 | return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); |
817 | } | 817 | } |
818 | 818 | ||
819 | void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in) | ||
820 | { | ||
821 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | ||
822 | |||
823 | if (!sched_in && pebs_needs_sched_cb(cpuc)) | ||
824 | intel_pmu_drain_pebs_buffer(); | ||
825 | } | ||
826 | |||
819 | static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) | 827 | static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) |
820 | { | 828 | { |
821 | struct debug_store *ds = cpuc->ds; | 829 | struct debug_store *ds = cpuc->ds; |
@@ -889,6 +897,8 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
889 | if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { | 897 | if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { |
890 | ds->pebs_event_reset[hwc->idx] = | 898 | ds->pebs_event_reset[hwc->idx] = |
891 | (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; | 899 | (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; |
900 | } else { | ||
901 | ds->pebs_event_reset[hwc->idx] = 0; | ||
892 | } | 902 | } |
893 | } | 903 | } |
894 | 904 | ||
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index eb261656a320..955457a30197 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c | |||
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) | |||
380 | 380 | ||
381 | void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) | 381 | void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) |
382 | { | 382 | { |
383 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); | ||
383 | struct x86_perf_task_context *task_ctx; | 384 | struct x86_perf_task_context *task_ctx; |
384 | 385 | ||
386 | if (!cpuc->lbr_users) | ||
387 | return; | ||
388 | |||
385 | /* | 389 | /* |
386 | * If LBR callstack feature is enabled and the stack was saved when | 390 | * If LBR callstack feature is enabled and the stack was saved when |
387 | * the task was scheduled out, restore the stack. Otherwise flush | 391 | * the task was scheduled out, restore the stack. Otherwise flush |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 53728eea1bed..476aec3a4cab 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
@@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[]; | |||
879 | 879 | ||
880 | extern struct event_constraint intel_glm_pebs_event_constraints[]; | 880 | extern struct event_constraint intel_glm_pebs_event_constraints[]; |
881 | 881 | ||
882 | extern struct event_constraint intel_glp_pebs_event_constraints[]; | ||
883 | |||
882 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; | 884 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; |
883 | 885 | ||
884 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; | 886 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1538df9b2b65..426c2ffba16d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event) | |||
1452 | 1452 | ||
1453 | lockdep_assert_held(&ctx->lock); | 1453 | lockdep_assert_held(&ctx->lock); |
1454 | 1454 | ||
1455 | /* | ||
1456 | * It's 'group type', really, because if our group leader is | ||
1457 | * pinned, so are we. | ||
1458 | */ | ||
1459 | if (event->group_leader != event) | ||
1460 | event = event->group_leader; | ||
1461 | |||
1455 | event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; | 1462 | event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; |
1456 | if (!ctx->task) | 1463 | if (!ctx->task) |
1457 | event_type |= EVENT_CPU; | 1464 | event_type |= EVENT_CPU; |
@@ -4378,7 +4385,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value); | |||
4378 | static int __perf_read_group_add(struct perf_event *leader, | 4385 | static int __perf_read_group_add(struct perf_event *leader, |
4379 | u64 read_format, u64 *values) | 4386 | u64 read_format, u64 *values) |
4380 | { | 4387 | { |
4388 | struct perf_event_context *ctx = leader->ctx; | ||
4381 | struct perf_event *sub; | 4389 | struct perf_event *sub; |
4390 | unsigned long flags; | ||
4382 | int n = 1; /* skip @nr */ | 4391 | int n = 1; /* skip @nr */ |
4383 | int ret; | 4392 | int ret; |
4384 | 4393 | ||
@@ -4408,12 +4417,15 @@ static int __perf_read_group_add(struct perf_event *leader, | |||
4408 | if (read_format & PERF_FORMAT_ID) | 4417 | if (read_format & PERF_FORMAT_ID) |
4409 | values[n++] = primary_event_id(leader); | 4418 | values[n++] = primary_event_id(leader); |
4410 | 4419 | ||
4420 | raw_spin_lock_irqsave(&ctx->lock, flags); | ||
4421 | |||
4411 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | 4422 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
4412 | values[n++] += perf_event_count(sub); | 4423 | values[n++] += perf_event_count(sub); |
4413 | if (read_format & PERF_FORMAT_ID) | 4424 | if (read_format & PERF_FORMAT_ID) |
4414 | values[n++] = primary_event_id(sub); | 4425 | values[n++] = primary_event_id(sub); |
4415 | } | 4426 | } |
4416 | 4427 | ||
4428 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | ||
4417 | return 0; | 4429 | return 0; |
4418 | } | 4430 | } |
4419 | 4431 | ||
@@ -7321,21 +7333,6 @@ int perf_event_account_interrupt(struct perf_event *event) | |||
7321 | return __perf_event_account_interrupt(event, 1); | 7333 | return __perf_event_account_interrupt(event, 1); |
7322 | } | 7334 | } |
7323 | 7335 | ||
7324 | static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) | ||
7325 | { | ||
7326 | /* | ||
7327 | * Due to interrupt latency (AKA "skid"), we may enter the | ||
7328 | * kernel before taking an overflow, even if the PMU is only | ||
7329 | * counting user events. | ||
7330 | * To avoid leaking information to userspace, we must always | ||
7331 | * reject kernel samples when exclude_kernel is set. | ||
7332 | */ | ||
7333 | if (event->attr.exclude_kernel && !user_mode(regs)) | ||
7334 | return false; | ||
7335 | |||
7336 | return true; | ||
7337 | } | ||
7338 | |||
7339 | /* | 7336 | /* |
7340 | * Generic event overflow handling, sampling. | 7337 | * Generic event overflow handling, sampling. |
7341 | */ | 7338 | */ |
@@ -7357,12 +7354,6 @@ static int __perf_event_overflow(struct perf_event *event, | |||
7357 | ret = __perf_event_account_interrupt(event, throttle); | 7354 | ret = __perf_event_account_interrupt(event, throttle); |
7358 | 7355 | ||
7359 | /* | 7356 | /* |
7360 | * For security, drop the skid kernel samples if necessary. | ||
7361 | */ | ||
7362 | if (!sample_is_allowed(event, regs)) | ||
7363 | return ret; | ||
7364 | |||
7365 | /* | ||
7366 | * XXX event_limit might not quite work as expected on inherited | 7357 | * XXX event_limit might not quite work as expected on inherited |
7367 | * events | 7358 | * events |
7368 | */ | 7359 | */ |
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index a4d3762cd825..83874b0e266c 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c | |||
@@ -704,7 +704,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser, | |||
704 | ui_browser__gotorc(browser, row, column + 1); | 704 | ui_browser__gotorc(browser, row, column + 1); |
705 | SLsmg_draw_hline(2); | 705 | SLsmg_draw_hline(2); |
706 | 706 | ||
707 | if (row++ == 0) | 707 | if (++row == 0) |
708 | goto out; | 708 | goto out; |
709 | } else | 709 | } else |
710 | row = 0; | 710 | row = 0; |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 87b431886670..413f74df08de 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -273,7 +273,7 @@ struct perf_evsel *perf_evsel__new_cycles(void) | |||
273 | struct perf_event_attr attr = { | 273 | struct perf_event_attr attr = { |
274 | .type = PERF_TYPE_HARDWARE, | 274 | .type = PERF_TYPE_HARDWARE, |
275 | .config = PERF_COUNT_HW_CPU_CYCLES, | 275 | .config = PERF_COUNT_HW_CPU_CYCLES, |
276 | .exclude_kernel = 1, | 276 | .exclude_kernel = geteuid() != 0, |
277 | }; | 277 | }; |
278 | struct perf_evsel *evsel; | 278 | struct perf_evsel *evsel; |
279 | 279 | ||
@@ -298,8 +298,10 @@ struct perf_evsel *perf_evsel__new_cycles(void) | |||
298 | goto out; | 298 | goto out; |
299 | 299 | ||
300 | /* use asprintf() because free(evsel) assumes name is allocated */ | 300 | /* use asprintf() because free(evsel) assumes name is allocated */ |
301 | if (asprintf(&evsel->name, "cycles%.*s", | 301 | if (asprintf(&evsel->name, "cycles%s%s%.*s", |
302 | attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) | 302 | (attr.precise_ip || attr.exclude_kernel) ? ":" : "", |
303 | attr.exclude_kernel ? "u" : "", | ||
304 | attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0) | ||
303 | goto error_free; | 305 | goto error_free; |
304 | out: | 306 | out: |
305 | return evsel; | 307 | return evsel; |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5de2b86b9880..2e9eb6aa3ce2 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -2209,7 +2209,7 @@ int machine__get_kernel_start(struct machine *machine) | |||
2209 | machine->kernel_start = 1ULL << 63; | 2209 | machine->kernel_start = 1ULL << 63; |
2210 | if (map) { | 2210 | if (map) { |
2211 | err = map__load(map); | 2211 | err = map__load(map); |
2212 | if (map->start) | 2212 | if (!err) |
2213 | machine->kernel_start = map->start; | 2213 | machine->kernel_start = map->start; |
2214 | } | 2214 | } |
2215 | return err; | 2215 | return err; |