aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2019-02-04 17:23:30 -0500
committerIngo Molnar <mingo@kernel.org>2019-02-11 02:00:39 -0500
commit9b545c04abd4f7246a3bde040efde587abebb23c (patch)
tree8fce94799e295c68da00c8ae76ccfdc53ddc4c43
parentf26d9db21bf9b5dbfe17a5bc3bdf4ca6c961c924 (diff)
perf/x86/kvm: Avoid unnecessary work in guest filtering
KVM added a workaround for PEBS events leaking into guests with commit: 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.") This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR. Intel also added a fix for this issue to microcode updates on Haswell/Broadwell/Skylake. It turns out using the MSR entry/exit list makes VM exits significantly slower. The list is only needed for disabling PEBS, because the GLOBAL_CTRL change gets optimized by KVM into changing the VMCS. Check for the microcode updates that have the microcode fix for leaking PEBS, and disable the extra entry/exit list entry for PEBS_ENABLE. In addition we always clear the GLOBAL_CTRL for the PEBS counter while running in the guest, which is enough to make them never fire at the wrong side of the host/guest transition. The overhead for VM exits with the filtering active with the patch is reduced from 8% to 4%. The microcode patch has already been merged into future platforms. This patch is one-off thing. The quirks is used here. For other old platforms which doesn't have microcode patch and quirks, extra disable of the PEBS_ENABLE MSR is still required. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: bp@alien8.de Link: https://lkml.kernel.org/r/1549319013-4522-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/intel/core.c74
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/events/perf_event.h15
3 files changed, 75 insertions, 16 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index daafb893449b..8fe2afa9c818 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
18#include <asm/hardirq.h> 18#include <asm/hardirq.h>
19#include <asm/intel-family.h> 19#include <asm/intel-family.h>
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/cpu_device_id.h>
21 22
22#include "../perf_event.h" 23#include "../perf_event.h"
23 24
@@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
3206 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; 3207 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
3207 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; 3208 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
3208 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; 3209 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
3209 /* 3210 if (x86_pmu.flags & PMU_FL_PEBS_ALL)
3210 * If PMU counter has PEBS enabled it is not enough to disable counter 3211 arr[0].guest &= ~cpuc->pebs_enabled;
3211 * on a guest entry since PEBS memory write can overshoot guest entry 3212 else
3212 * and corrupt guest memory. Disabling PEBS solves the problem. 3213 arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
3213 */ 3214 *nr = 1;
3214 arr[1].msr = MSR_IA32_PEBS_ENABLE; 3215
3215 arr[1].host = cpuc->pebs_enabled; 3216 if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
3216 arr[1].guest = 0; 3217 /*
3218 * If PMU counter has PEBS enabled it is not enough to
3219 * disable counter on a guest entry since PEBS memory
3220 * write can overshoot guest entry and corrupt guest
3221 * memory. Disabling PEBS solves the problem.
3222 *
3223 * Don't do this if the CPU already enforces it.
3224 */
3225 arr[1].msr = MSR_IA32_PEBS_ENABLE;
3226 arr[1].host = cpuc->pebs_enabled;
3227 arr[1].guest = 0;
3228 *nr = 2;
3229 }
3217 3230
3218 *nr = 2;
3219 return arr; 3231 return arr;
3220} 3232}
3221 3233
@@ -3739,6 +3751,47 @@ static __init void intel_clovertown_quirk(void)
3739 x86_pmu.pebs_constraints = NULL; 3751 x86_pmu.pebs_constraints = NULL;
3740} 3752}
3741 3753
3754static const struct x86_cpu_desc isolation_ucodes[] = {
3755 INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
3756 INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
3757 INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
3758 INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
3759 INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
3760 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
3761 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
3762 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
3763 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
3764 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
3765 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
3766 INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
3767 INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
3768 INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
3769 INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
3770 INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
3771 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
3772 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
3773 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
3774 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
3775 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
3776 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
3777 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
3778 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
3779 INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
3780 {}
3781};
3782
3783static void intel_check_pebs_isolation(void)
3784{
3785 x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
3786}
3787
3788static __init void intel_pebs_isolation_quirk(void)
3789{
3790 WARN_ON_ONCE(x86_pmu.check_microcode);
3791 x86_pmu.check_microcode = intel_check_pebs_isolation;
3792 intel_check_pebs_isolation();
3793}
3794
3742static int intel_snb_pebs_broken(int cpu) 3795static int intel_snb_pebs_broken(int cpu)
3743{ 3796{
3744 u32 rev = UINT_MAX; /* default to broken for unknown models */ 3797 u32 rev = UINT_MAX; /* default to broken for unknown models */
@@ -4431,6 +4484,7 @@ __init int intel_pmu_init(void)
4431 case INTEL_FAM6_HASWELL_ULT: 4484 case INTEL_FAM6_HASWELL_ULT:
4432 case INTEL_FAM6_HASWELL_GT3E: 4485 case INTEL_FAM6_HASWELL_GT3E:
4433 x86_add_quirk(intel_ht_bug); 4486 x86_add_quirk(intel_ht_bug);
4487 x86_add_quirk(intel_pebs_isolation_quirk);
4434 x86_pmu.late_ack = true; 4488 x86_pmu.late_ack = true;
4435 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 4489 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4436 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 4490 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4462,6 +4516,7 @@ __init int intel_pmu_init(void)
4462 case INTEL_FAM6_BROADWELL_XEON_D: 4516 case INTEL_FAM6_BROADWELL_XEON_D:
4463 case INTEL_FAM6_BROADWELL_GT3E: 4517 case INTEL_FAM6_BROADWELL_GT3E:
4464 case INTEL_FAM6_BROADWELL_X: 4518 case INTEL_FAM6_BROADWELL_X:
4519 x86_add_quirk(intel_pebs_isolation_quirk);
4465 x86_pmu.late_ack = true; 4520 x86_pmu.late_ack = true;
4466 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 4521 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4467 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 4522 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
@@ -4524,6 +4579,7 @@ __init int intel_pmu_init(void)
4524 case INTEL_FAM6_SKYLAKE_X: 4579 case INTEL_FAM6_SKYLAKE_X:
4525 case INTEL_FAM6_KABYLAKE_MOBILE: 4580 case INTEL_FAM6_KABYLAKE_MOBILE:
4526 case INTEL_FAM6_KABYLAKE_DESKTOP: 4581 case INTEL_FAM6_KABYLAKE_DESKTOP:
4582 x86_add_quirk(intel_pebs_isolation_quirk);
4527 x86_pmu.late_ack = true; 4583 x86_pmu.late_ack = true;
4528 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 4584 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4529 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 4585 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index e9acf1d2e7b2..10c99ce1fead 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1628,6 +1628,8 @@ void __init intel_ds_init(void)
1628 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 1628 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
1629 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 1629 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
1630 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; 1630 x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
1631 if (x86_pmu.version <= 4)
1632 x86_pmu.pebs_no_isolation = 1;
1631 if (x86_pmu.pebs) { 1633 if (x86_pmu.pebs) {
1632 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 1634 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
1633 int format = x86_pmu.intel_cap.pebs_format; 1635 int format = x86_pmu.intel_cap.pebs_format;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b7031bfc..dea716e1f713 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,13 +601,14 @@ struct x86_pmu {
601 /* 601 /*
602 * Intel DebugStore bits 602 * Intel DebugStore bits
603 */ 603 */
604 unsigned int bts :1, 604 unsigned int bts :1,
605 bts_active :1, 605 bts_active :1,
606 pebs :1, 606 pebs :1,
607 pebs_active :1, 607 pebs_active :1,
608 pebs_broken :1, 608 pebs_broken :1,
609 pebs_prec_dist :1, 609 pebs_prec_dist :1,
610 pebs_no_tlb :1; 610 pebs_no_tlb :1,
611 pebs_no_isolation :1;
611 int pebs_record_size; 612 int pebs_record_size;
612 int pebs_buffer_size; 613 int pebs_buffer_size;
613 void (*drain_pebs)(struct pt_regs *regs); 614 void (*drain_pebs)(struct pt_regs *regs);