diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-05-05 02:35:00 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-05-05 02:35:00 -0400 |
commit | 1fb48f8e54e5ed4d3d8599ba7e83f1f60530c81c (patch) | |
tree | 6b8c1ccdd461e211f72c674d183f5129f5fe4a5b /arch/x86/events | |
parent | 778843f934e362ed4ed734520f60a44a78a074b4 (diff) | |
parent | 04974df8049fc4240d22759a91e035082ccd18b4 (diff) |
Merge tag 'v4.6-rc6' into x86/asm, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/events')
-rw-r--r-- | arch/x86/events/amd/core.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 1 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 75 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.h | 3 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 1 |
6 files changed, 74 insertions, 14 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 86a9bec18dab..bd3e8421b57c 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c | |||
@@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
115 | /* | 115 | /* |
116 | * AMD Performance Monitor K7 and later. | 116 | * AMD Performance Monitor K7 and later. |
117 | */ | 117 | */ |
118 | static const u64 amd_perfmon_event_map[] = | 118 | static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = |
119 | { | 119 | { |
120 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | 120 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
121 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 121 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 68fa55b4d42e..aff79884e17d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -3639,6 +3639,7 @@ __init int intel_pmu_init(void) | |||
3639 | 3639 | ||
3640 | case 78: /* 14nm Skylake Mobile */ | 3640 | case 78: /* 14nm Skylake Mobile */ |
3641 | case 94: /* 14nm Skylake Desktop */ | 3641 | case 94: /* 14nm Skylake Desktop */ |
3642 | case 85: /* 14nm Skylake Server */ | ||
3642 | x86_pmu.late_ack = true; | 3643 | x86_pmu.late_ack = true; |
3643 | memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 3644 | memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
3644 | memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 3645 | memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c3b7c1780c9..1ca5d1e7d4f2 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c | |||
@@ -63,7 +63,7 @@ static enum { | |||
63 | 63 | ||
64 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | 64 | #define LBR_PLM (LBR_KERNEL | LBR_USER) |
65 | 65 | ||
66 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | 66 | #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ |
67 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | 67 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ |
68 | #define LBR_IGN 0 /* ignored */ | 68 | #define LBR_IGN 0 /* ignored */ |
69 | 69 | ||
@@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | |||
610 | * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate | 610 | * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate |
611 | * in suppress mode. So LBR_SELECT should be set to | 611 | * in suppress mode. So LBR_SELECT should be set to |
612 | * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) | 612 | * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) |
613 | * But the 10th bit LBR_CALL_STACK does not operate | ||
614 | * in suppress mode. | ||
613 | */ | 615 | */ |
614 | reg->config = mask ^ x86_pmu.lbr_sel_mask; | 616 | reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); |
615 | 617 | ||
616 | if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && | 618 | if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && |
617 | (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && | 619 | (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && |
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 6af7cf71d6b2..09a77dbc73c9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c | |||
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void) | |||
136 | struct dev_ext_attribute *de_attrs; | 136 | struct dev_ext_attribute *de_attrs; |
137 | struct attribute **attrs; | 137 | struct attribute **attrs; |
138 | size_t size; | 138 | size_t size; |
139 | u64 reg; | ||
139 | int ret; | 140 | int ret; |
140 | long i; | 141 | long i; |
141 | 142 | ||
143 | if (boot_cpu_has(X86_FEATURE_VMX)) { | ||
144 | /* | ||
145 | * Intel SDM, 36.5 "Tracing post-VMXON" says that | ||
146 | * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace | ||
147 | * post-VMXON. | ||
148 | */ | ||
149 | rdmsrl(MSR_IA32_VMX_MISC, reg); | ||
150 | if (reg & BIT(14)) | ||
151 | pt_pmu.vmx = true; | ||
152 | } | ||
153 | |||
142 | attrs = NULL; | 154 | attrs = NULL; |
143 | 155 | ||
144 | for (i = 0; i < PT_CPUID_LEAVES; i++) { | 156 | for (i = 0; i < PT_CPUID_LEAVES; i++) { |
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event) | |||
269 | 281 | ||
270 | reg |= (event->attr.config & PT_CONFIG_MASK); | 282 | reg |= (event->attr.config & PT_CONFIG_MASK); |
271 | 283 | ||
284 | event->hw.config = reg; | ||
272 | wrmsrl(MSR_IA32_RTIT_CTL, reg); | 285 | wrmsrl(MSR_IA32_RTIT_CTL, reg); |
273 | } | 286 | } |
274 | 287 | ||
275 | static void pt_config_start(bool start) | 288 | static void pt_config_stop(struct perf_event *event) |
276 | { | 289 | { |
277 | u64 ctl; | 290 | u64 ctl = READ_ONCE(event->hw.config); |
291 | |||
292 | /* may be already stopped by a PMI */ | ||
293 | if (!(ctl & RTIT_CTL_TRACEEN)) | ||
294 | return; | ||
278 | 295 | ||
279 | rdmsrl(MSR_IA32_RTIT_CTL, ctl); | 296 | ctl &= ~RTIT_CTL_TRACEEN; |
280 | if (start) | ||
281 | ctl |= RTIT_CTL_TRACEEN; | ||
282 | else | ||
283 | ctl &= ~RTIT_CTL_TRACEEN; | ||
284 | wrmsrl(MSR_IA32_RTIT_CTL, ctl); | 297 | wrmsrl(MSR_IA32_RTIT_CTL, ctl); |
285 | 298 | ||
299 | WRITE_ONCE(event->hw.config, ctl); | ||
300 | |||
286 | /* | 301 | /* |
287 | * A wrmsr that disables trace generation serializes other PT | 302 | * A wrmsr that disables trace generation serializes other PT |
288 | * registers and causes all data packets to be written to memory, | 303 | * registers and causes all data packets to be written to memory, |
@@ -291,8 +306,7 @@ static void pt_config_start(bool start) | |||
291 | * The below WMB, separating data store and aux_head store matches | 306 | * The below WMB, separating data store and aux_head store matches |
292 | * the consumer's RMB that separates aux_head load and data load. | 307 | * the consumer's RMB that separates aux_head load and data load. |
293 | */ | 308 | */ |
294 | if (!start) | 309 | wmb(); |
295 | wmb(); | ||
296 | } | 310 | } |
297 | 311 | ||
298 | static void pt_config_buffer(void *buf, unsigned int topa_idx, | 312 | static void pt_config_buffer(void *buf, unsigned int topa_idx, |
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void) | |||
942 | if (!ACCESS_ONCE(pt->handle_nmi)) | 956 | if (!ACCESS_ONCE(pt->handle_nmi)) |
943 | return; | 957 | return; |
944 | 958 | ||
945 | pt_config_start(false); | 959 | /* |
960 | * If VMX is on and PT does not support it, don't touch anything. | ||
961 | */ | ||
962 | if (READ_ONCE(pt->vmx_on)) | ||
963 | return; | ||
946 | 964 | ||
947 | if (!event) | 965 | if (!event) |
948 | return; | 966 | return; |
949 | 967 | ||
968 | pt_config_stop(event); | ||
969 | |||
950 | buf = perf_get_aux(&pt->handle); | 970 | buf = perf_get_aux(&pt->handle); |
951 | if (!buf) | 971 | if (!buf) |
952 | return; | 972 | return; |
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void) | |||
983 | } | 1003 | } |
984 | } | 1004 | } |
985 | 1005 | ||
1006 | void intel_pt_handle_vmx(int on) | ||
1007 | { | ||
1008 | struct pt *pt = this_cpu_ptr(&pt_ctx); | ||
1009 | struct perf_event *event; | ||
1010 | unsigned long flags; | ||
1011 | |||
1012 | /* PT plays nice with VMX, do nothing */ | ||
1013 | if (pt_pmu.vmx) | ||
1014 | return; | ||
1015 | |||
1016 | /* | ||
1017 | * VMXON will clear RTIT_CTL.TraceEn; we need to make | ||
1018 | * sure to not try to set it while VMX is on. Disable | ||
1019 | * interrupts to avoid racing with pmu callbacks; | ||
1020 | * concurrent PMI should be handled fine. | ||
1021 | */ | ||
1022 | local_irq_save(flags); | ||
1023 | WRITE_ONCE(pt->vmx_on, on); | ||
1024 | |||
1025 | if (on) { | ||
1026 | /* prevent pt_config_stop() from writing RTIT_CTL */ | ||
1027 | event = pt->handle.event; | ||
1028 | if (event) | ||
1029 | event->hw.config = 0; | ||
1030 | } | ||
1031 | local_irq_restore(flags); | ||
1032 | } | ||
1033 | EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); | ||
1034 | |||
986 | /* | 1035 | /* |
987 | * PMU callbacks | 1036 | * PMU callbacks |
988 | */ | 1037 | */ |
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode) | |||
992 | struct pt *pt = this_cpu_ptr(&pt_ctx); | 1041 | struct pt *pt = this_cpu_ptr(&pt_ctx); |
993 | struct pt_buffer *buf = perf_get_aux(&pt->handle); | 1042 | struct pt_buffer *buf = perf_get_aux(&pt->handle); |
994 | 1043 | ||
1044 | if (READ_ONCE(pt->vmx_on)) | ||
1045 | return; | ||
1046 | |||
995 | if (!buf || pt_buffer_is_full(buf, pt)) { | 1047 | if (!buf || pt_buffer_is_full(buf, pt)) { |
996 | event->hw.state = PERF_HES_STOPPED; | 1048 | event->hw.state = PERF_HES_STOPPED; |
997 | return; | 1049 | return; |
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode) | |||
1014 | * see comment in intel_pt_interrupt(). | 1066 | * see comment in intel_pt_interrupt(). |
1015 | */ | 1067 | */ |
1016 | ACCESS_ONCE(pt->handle_nmi) = 0; | 1068 | ACCESS_ONCE(pt->handle_nmi) = 0; |
1017 | pt_config_start(false); | 1069 | |
1070 | pt_config_stop(event); | ||
1018 | 1071 | ||
1019 | if (event->hw.state == PERF_HES_STOPPED) | 1072 | if (event->hw.state == PERF_HES_STOPPED) |
1020 | return; | 1073 | return; |
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..3abb5f5cccc8 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h | |||
@@ -65,6 +65,7 @@ enum pt_capabilities { | |||
65 | struct pt_pmu { | 65 | struct pt_pmu { |
66 | struct pmu pmu; | 66 | struct pmu pmu; |
67 | u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; | 67 | u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; |
68 | bool vmx; | ||
68 | }; | 69 | }; |
69 | 70 | ||
70 | /** | 71 | /** |
@@ -107,10 +108,12 @@ struct pt_buffer { | |||
107 | * struct pt - per-cpu pt context | 108 | * struct pt - per-cpu pt context |
108 | * @handle: perf output handle | 109 | * @handle: perf output handle |
109 | * @handle_nmi: do handle PT PMI on this cpu, there's an active event | 110 | * @handle_nmi: do handle PT PMI on this cpu, there's an active event |
111 | * @vmx_on: 1 if VMX is ON on this cpu | ||
110 | */ | 112 | */ |
111 | struct pt { | 113 | struct pt { |
112 | struct perf_output_handle handle; | 114 | struct perf_output_handle handle; |
113 | int handle_nmi; | 115 | int handle_nmi; |
116 | int vmx_on; | ||
114 | }; | 117 | }; |
115 | 118 | ||
116 | #endif /* __INTEL_PT_H__ */ | 119 | #endif /* __INTEL_PT_H__ */ |
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 70c93f9b03ac..1705c9d75e44 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c | |||
@@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void) | |||
718 | break; | 718 | break; |
719 | case 60: /* Haswell */ | 719 | case 60: /* Haswell */ |
720 | case 69: /* Haswell-Celeron */ | 720 | case 69: /* Haswell-Celeron */ |
721 | case 70: /* Haswell GT3e */ | ||
721 | case 61: /* Broadwell */ | 722 | case 61: /* Broadwell */ |
722 | case 71: /* Broadwell-H */ | 723 | case 71: /* Broadwell-H */ |
723 | rapl_cntr_mask = RAPL_IDX_HSW; | 724 | rapl_cntr_mask = RAPL_IDX_HSW; |