diff options
author | Andi Kleen <ak@linux.intel.com> | 2019-02-04 17:23:30 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-02-11 02:00:39 -0500 |
commit | 9b545c04abd4f7246a3bde040efde587abebb23c (patch) | |
tree | 8fce94799e295c68da00c8ae76ccfdc53ddc4c43 | |
parent | f26d9db21bf9b5dbfe17a5bc3bdf4ca6c961c924 (diff) |
perf/x86/kvm: Avoid unnecessary work in guest filtering
KVM added a workaround for PEBS events leaking into guests with
commit:
26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
This uses the VT entry/exit list to add an extra disable of the
PEBS_ENABLE MSR.
Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.
It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.
Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.
The overhead for VM exits with the filtering active with the patch is
reduced from 8% to 4%.
The microcode patch has already been merged into future platforms.
This patch is one-off thing. The quirks is used here.
For other old platforms which doesn't have microcode patch and quirks,
extra disable of the PEBS_ENABLE MSR is still required.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: bp@alien8.de
Link: https://lkml.kernel.org/r/1549319013-4522-2-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/intel/core.c | 74 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 2 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 15 |
3 files changed, 75 insertions, 16 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index daafb893449b..8fe2afa9c818 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/hardirq.h> | 18 | #include <asm/hardirq.h> |
19 | #include <asm/intel-family.h> | 19 | #include <asm/intel-family.h> |
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/cpu_device_id.h> | ||
21 | 22 | ||
22 | #include "../perf_event.h" | 23 | #include "../perf_event.h" |
23 | 24 | ||
@@ -3206,16 +3207,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) | |||
3206 | arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; | 3207 | arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; |
3207 | arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; | 3208 | arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; |
3208 | arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; | 3209 | arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; |
3209 | /* | 3210 | if (x86_pmu.flags & PMU_FL_PEBS_ALL) |
3210 | * If PMU counter has PEBS enabled it is not enough to disable counter | 3211 | arr[0].guest &= ~cpuc->pebs_enabled; |
3211 | * on a guest entry since PEBS memory write can overshoot guest entry | 3212 | else |
3212 | * and corrupt guest memory. Disabling PEBS solves the problem. | 3213 | arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); |
3213 | */ | 3214 | *nr = 1; |
3214 | arr[1].msr = MSR_IA32_PEBS_ENABLE; | 3215 | |
3215 | arr[1].host = cpuc->pebs_enabled; | 3216 | if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { |
3216 | arr[1].guest = 0; | 3217 | /* |
3218 | * If PMU counter has PEBS enabled it is not enough to | ||
3219 | * disable counter on a guest entry since PEBS memory | ||
3220 | * write can overshoot guest entry and corrupt guest | ||
3221 | * memory. Disabling PEBS solves the problem. | ||
3222 | * | ||
3223 | * Don't do this if the CPU already enforces it. | ||
3224 | */ | ||
3225 | arr[1].msr = MSR_IA32_PEBS_ENABLE; | ||
3226 | arr[1].host = cpuc->pebs_enabled; | ||
3227 | arr[1].guest = 0; | ||
3228 | *nr = 2; | ||
3229 | } | ||
3217 | 3230 | ||
3218 | *nr = 2; | ||
3219 | return arr; | 3231 | return arr; |
3220 | } | 3232 | } |
3221 | 3233 | ||
@@ -3739,6 +3751,47 @@ static __init void intel_clovertown_quirk(void) | |||
3739 | x86_pmu.pebs_constraints = NULL; | 3751 | x86_pmu.pebs_constraints = NULL; |
3740 | } | 3752 | } |
3741 | 3753 | ||
3754 | static const struct x86_cpu_desc isolation_ucodes[] = { | ||
3755 | INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f), | ||
3756 | INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e), | ||
3757 | INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015), | ||
3758 | INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), | ||
3759 | INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), | ||
3760 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023), | ||
3761 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014), | ||
3762 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010), | ||
3763 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009), | ||
3764 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009), | ||
3765 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002), | ||
3766 | INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), | ||
3767 | INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), | ||
3768 | INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), | ||
3769 | INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c), | ||
3770 | INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c), | ||
3771 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e), | ||
3772 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e), | ||
3773 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e), | ||
3774 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e), | ||
3775 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e), | ||
3776 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e), | ||
3777 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e), | ||
3778 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e), | ||
3779 | INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e), | ||
3780 | {} | ||
3781 | }; | ||
3782 | |||
3783 | static void intel_check_pebs_isolation(void) | ||
3784 | { | ||
3785 | x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes); | ||
3786 | } | ||
3787 | |||
3788 | static __init void intel_pebs_isolation_quirk(void) | ||
3789 | { | ||
3790 | WARN_ON_ONCE(x86_pmu.check_microcode); | ||
3791 | x86_pmu.check_microcode = intel_check_pebs_isolation; | ||
3792 | intel_check_pebs_isolation(); | ||
3793 | } | ||
3794 | |||
3742 | static int intel_snb_pebs_broken(int cpu) | 3795 | static int intel_snb_pebs_broken(int cpu) |
3743 | { | 3796 | { |
3744 | u32 rev = UINT_MAX; /* default to broken for unknown models */ | 3797 | u32 rev = UINT_MAX; /* default to broken for unknown models */ |
@@ -4431,6 +4484,7 @@ __init int intel_pmu_init(void) | |||
4431 | case INTEL_FAM6_HASWELL_ULT: | 4484 | case INTEL_FAM6_HASWELL_ULT: |
4432 | case INTEL_FAM6_HASWELL_GT3E: | 4485 | case INTEL_FAM6_HASWELL_GT3E: |
4433 | x86_add_quirk(intel_ht_bug); | 4486 | x86_add_quirk(intel_ht_bug); |
4487 | x86_add_quirk(intel_pebs_isolation_quirk); | ||
4434 | x86_pmu.late_ack = true; | 4488 | x86_pmu.late_ack = true; |
4435 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 4489 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
4436 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 4490 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
@@ -4462,6 +4516,7 @@ __init int intel_pmu_init(void) | |||
4462 | case INTEL_FAM6_BROADWELL_XEON_D: | 4516 | case INTEL_FAM6_BROADWELL_XEON_D: |
4463 | case INTEL_FAM6_BROADWELL_GT3E: | 4517 | case INTEL_FAM6_BROADWELL_GT3E: |
4464 | case INTEL_FAM6_BROADWELL_X: | 4518 | case INTEL_FAM6_BROADWELL_X: |
4519 | x86_add_quirk(intel_pebs_isolation_quirk); | ||
4465 | x86_pmu.late_ack = true; | 4520 | x86_pmu.late_ack = true; |
4466 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 4521 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
4467 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 4522 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
@@ -4524,6 +4579,7 @@ __init int intel_pmu_init(void) | |||
4524 | case INTEL_FAM6_SKYLAKE_X: | 4579 | case INTEL_FAM6_SKYLAKE_X: |
4525 | case INTEL_FAM6_KABYLAKE_MOBILE: | 4580 | case INTEL_FAM6_KABYLAKE_MOBILE: |
4526 | case INTEL_FAM6_KABYLAKE_DESKTOP: | 4581 | case INTEL_FAM6_KABYLAKE_DESKTOP: |
4582 | x86_add_quirk(intel_pebs_isolation_quirk); | ||
4527 | x86_pmu.late_ack = true; | 4583 | x86_pmu.late_ack = true; |
4528 | memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 4584 | memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
4529 | memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 4585 | memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e9acf1d2e7b2..10c99ce1fead 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -1628,6 +1628,8 @@ void __init intel_ds_init(void) | |||
1628 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | 1628 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); |
1629 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | 1629 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); |
1630 | x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; | 1630 | x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; |
1631 | if (x86_pmu.version <= 4) | ||
1632 | x86_pmu.pebs_no_isolation = 1; | ||
1631 | if (x86_pmu.pebs) { | 1633 | if (x86_pmu.pebs) { |
1632 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | 1634 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; |
1633 | int format = x86_pmu.intel_cap.pebs_format; | 1635 | int format = x86_pmu.intel_cap.pebs_format; |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78d7b7031bfc..dea716e1f713 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
@@ -601,13 +601,14 @@ struct x86_pmu { | |||
601 | /* | 601 | /* |
602 | * Intel DebugStore bits | 602 | * Intel DebugStore bits |
603 | */ | 603 | */ |
604 | unsigned int bts :1, | 604 | unsigned int bts :1, |
605 | bts_active :1, | 605 | bts_active :1, |
606 | pebs :1, | 606 | pebs :1, |
607 | pebs_active :1, | 607 | pebs_active :1, |
608 | pebs_broken :1, | 608 | pebs_broken :1, |
609 | pebs_prec_dist :1, | 609 | pebs_prec_dist :1, |
610 | pebs_no_tlb :1; | 610 | pebs_no_tlb :1, |
611 | pebs_no_isolation :1; | ||
611 | int pebs_record_size; | 612 | int pebs_record_size; |
612 | int pebs_buffer_size; | 613 | int pebs_buffer_size; |
613 | void (*drain_pebs)(struct pt_regs *regs); | 614 | void (*drain_pebs)(struct pt_regs *regs); |