diff options
author | KarimAllah Ahmed <karahmed@amazon.de> | 2018-02-01 16:59:45 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2018-02-03 17:06:52 -0500 |
commit | d28b387fb74da95d69d2615732f50cceb38e9a4d (patch) | |
tree | fd8fcf12170450b5dee1e59171b2d71eb156fce5 | |
parent | 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd (diff) |
KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL
[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ]
Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for
guests that will only mitigate Spectre V2 through IBRS+IBPB and will not
be using a retpoline+IBPB based approach.
To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for
guests that do not actually use the MSR, only start saving and restoring
when a non-zero is written to it.
No attempt is made to handle STIBP here, intentionally. Filtering STIBP
may be added in a future patch, which may require trapping all writes
if we don't want to pass it through directly to the guest.
[dwmw2: Clean up CPUID bits, save/restore manually, handle reset]
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: kvm@vger.kernel.org
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ashok Raj <ashok.raj@intel.com>
Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de
-rw-r--r-- | arch/x86/kvm/cpuid.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 105 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 |
3 files changed, 110 insertions, 6 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1909635fe09c..13f5d4217e4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
367 | 367 | ||
368 | /* cpuid 0x80000008.ebx */ | 368 | /* cpuid 0x80000008.ebx */ |
369 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = | 369 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
370 | F(IBPB); | 370 | F(IBPB) | F(IBRS); |
371 | 371 | ||
372 | /* cpuid 0xC0000001.edx */ | 372 | /* cpuid 0xC0000001.edx */ |
373 | const u32 kvm_cpuid_C000_0001_edx_x86_features = | 373 | const u32 kvm_cpuid_C000_0001_edx_x86_features = |
@@ -394,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
394 | 394 | ||
395 | /* cpuid 7.0.edx*/ | 395 | /* cpuid 7.0.edx*/ |
396 | const u32 kvm_cpuid_7_0_edx_x86_features = | 396 | const u32 kvm_cpuid_7_0_edx_x86_features = |
397 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); | 397 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | |
398 | F(ARCH_CAPABILITIES); | ||
398 | 399 | ||
399 | /* all calls to cpuid_count() should be made on the same cpu */ | 400 | /* all calls to cpuid_count() should be made on the same cpu */ |
400 | get_cpu(); | 401 | get_cpu(); |
@@ -630,9 +631,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
630 | g_phys_as = phys_as; | 631 | g_phys_as = phys_as; |
631 | entry->eax = g_phys_as | (virt_as << 8); | 632 | entry->eax = g_phys_as | (virt_as << 8); |
632 | entry->edx = 0; | 633 | entry->edx = 0; |
633 | /* IBPB isn't necessarily present in hardware cpuid */ | 634 | /* IBRS and IBPB aren't necessarily present in hardware cpuid */ |
634 | if (boot_cpu_has(X86_FEATURE_IBPB)) | 635 | if (boot_cpu_has(X86_FEATURE_IBPB)) |
635 | entry->ebx |= F(IBPB); | 636 | entry->ebx |= F(IBPB); |
637 | if (boot_cpu_has(X86_FEATURE_IBRS)) | ||
638 | entry->ebx |= F(IBRS); | ||
636 | entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; | 639 | entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; |
637 | cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); | 640 | cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); |
638 | break; | 641 | break; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e5f75eb7c459..bee4c49f6dd0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -595,6 +595,7 @@ struct vcpu_vmx { | |||
595 | #endif | 595 | #endif |
596 | 596 | ||
597 | u64 arch_capabilities; | 597 | u64 arch_capabilities; |
598 | u64 spec_ctrl; | ||
598 | 599 | ||
599 | u32 vm_entry_controls_shadow; | 600 | u32 vm_entry_controls_shadow; |
600 | u32 vm_exit_controls_shadow; | 601 | u32 vm_exit_controls_shadow; |
@@ -1911,6 +1912,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1911 | } | 1912 | } |
1912 | 1913 | ||
1913 | /* | 1914 | /* |
1915 | * Check if MSR is intercepted for currently loaded MSR bitmap. | ||
1916 | */ | ||
1917 | static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) | ||
1918 | { | ||
1919 | unsigned long *msr_bitmap; | ||
1920 | int f = sizeof(unsigned long); | ||
1921 | |||
1922 | if (!cpu_has_vmx_msr_bitmap()) | ||
1923 | return true; | ||
1924 | |||
1925 | msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; | ||
1926 | |||
1927 | if (msr <= 0x1fff) { | ||
1928 | return !!test_bit(msr, msr_bitmap + 0x800 / f); | ||
1929 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
1930 | msr &= 0x1fff; | ||
1931 | return !!test_bit(msr, msr_bitmap + 0xc00 / f); | ||
1932 | } | ||
1933 | |||
1934 | return true; | ||
1935 | } | ||
1936 | |||
1937 | /* | ||
1914 | * Check if MSR is intercepted for L01 MSR bitmap. | 1938 | * Check if MSR is intercepted for L01 MSR bitmap. |
1915 | */ | 1939 | */ |
1916 | static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) | 1940 | static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) |
@@ -3262,6 +3286,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3262 | case MSR_IA32_TSC: | 3286 | case MSR_IA32_TSC: |
3263 | msr_info->data = guest_read_tsc(vcpu); | 3287 | msr_info->data = guest_read_tsc(vcpu); |
3264 | break; | 3288 | break; |
3289 | case MSR_IA32_SPEC_CTRL: | ||
3290 | if (!msr_info->host_initiated && | ||
3291 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | ||
3292 | !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | ||
3293 | return 1; | ||
3294 | |||
3295 | msr_info->data = to_vmx(vcpu)->spec_ctrl; | ||
3296 | break; | ||
3265 | case MSR_IA32_ARCH_CAPABILITIES: | 3297 | case MSR_IA32_ARCH_CAPABILITIES: |
3266 | if (!msr_info->host_initiated && | 3298 | if (!msr_info->host_initiated && |
3267 | !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) | 3299 | !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) |
@@ -3375,6 +3407,37 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3375 | case MSR_IA32_TSC: | 3407 | case MSR_IA32_TSC: |
3376 | kvm_write_tsc(vcpu, msr_info); | 3408 | kvm_write_tsc(vcpu, msr_info); |
3377 | break; | 3409 | break; |
3410 | case MSR_IA32_SPEC_CTRL: | ||
3411 | if (!msr_info->host_initiated && | ||
3412 | !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | ||
3413 | !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | ||
3414 | return 1; | ||
3415 | |||
3416 | /* The STIBP bit doesn't fault even if it's not advertised */ | ||
3417 | if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) | ||
3418 | return 1; | ||
3419 | |||
3420 | vmx->spec_ctrl = data; | ||
3421 | |||
3422 | if (!data) | ||
3423 | break; | ||
3424 | |||
3425 | /* | ||
3426 | * For non-nested: | ||
3427 | * When it's written (to non-zero) for the first time, pass | ||
3428 | * it through. | ||
3429 | * | ||
3430 | * For nested: | ||
3431 | * The handling of the MSR bitmap for L2 guests is done in | ||
3432 | * nested_vmx_merge_msr_bitmap. We should not touch the | ||
3433 | * vmcs02.msr_bitmap here since it gets completely overwritten | ||
3434 | * in the merging. We update the vmcs01 here for L1 as well | ||
3435 | * since it will end up touching the MSR anyway now. | ||
3436 | */ | ||
3437 | vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, | ||
3438 | MSR_IA32_SPEC_CTRL, | ||
3439 | MSR_TYPE_RW); | ||
3440 | break; | ||
3378 | case MSR_IA32_PRED_CMD: | 3441 | case MSR_IA32_PRED_CMD: |
3379 | if (!msr_info->host_initiated && | 3442 | if (!msr_info->host_initiated && |
3380 | !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && | 3443 | !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && |
@@ -5700,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
5700 | u64 cr0; | 5763 | u64 cr0; |
5701 | 5764 | ||
5702 | vmx->rmode.vm86_active = 0; | 5765 | vmx->rmode.vm86_active = 0; |
5766 | vmx->spec_ctrl = 0; | ||
5703 | 5767 | ||
5704 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 5768 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
5705 | kvm_set_cr8(vcpu, 0); | 5769 | kvm_set_cr8(vcpu, 0); |
@@ -9371,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
9371 | 9435 | ||
9372 | vmx_arm_hv_timer(vcpu); | 9436 | vmx_arm_hv_timer(vcpu); |
9373 | 9437 | ||
9438 | /* | ||
9439 | * If this vCPU has touched SPEC_CTRL, restore the guest's value if | ||
9440 | * it's non-zero. Since vmentry is serialising on affected CPUs, there | ||
9441 | * is no need to worry about the conditional branch over the wrmsr | ||
9442 | * being speculatively taken. | ||
9443 | */ | ||
9444 | if (vmx->spec_ctrl) | ||
9445 | wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
9446 | |||
9374 | vmx->__launched = vmx->loaded_vmcs->launched; | 9447 | vmx->__launched = vmx->loaded_vmcs->launched; |
9375 | asm( | 9448 | asm( |
9376 | /* Store host registers */ | 9449 | /* Store host registers */ |
@@ -9489,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
9489 | #endif | 9562 | #endif |
9490 | ); | 9563 | ); |
9491 | 9564 | ||
9565 | /* | ||
9566 | * We do not use IBRS in the kernel. If this vCPU has used the | ||
9567 | * SPEC_CTRL MSR it may have left it on; save the value and | ||
9568 | * turn it off. This is much more efficient than blindly adding | ||
9569 | * it to the atomic save/restore list. Especially as the former | ||
9570 | * (Saving guest MSRs on vmexit) doesn't even exist in KVM. | ||
9571 | * | ||
9572 | * For non-nested case: | ||
9573 | * If the L01 MSR bitmap does not intercept the MSR, then we need to | ||
9574 | * save it. | ||
9575 | * | ||
9576 | * For nested case: | ||
9577 | * If the L02 MSR bitmap does not intercept the MSR, then we need to | ||
9578 | * save it. | ||
9579 | */ | ||
9580 | if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) | ||
9581 | rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
9582 | |||
9583 | if (vmx->spec_ctrl) | ||
9584 | wrmsrl(MSR_IA32_SPEC_CTRL, 0); | ||
9585 | |||
9492 | /* Eliminate branch target predictions from guest mode */ | 9586 | /* Eliminate branch target predictions from guest mode */ |
9493 | vmexit_fill_RSB(); | 9587 | vmexit_fill_RSB(); |
9494 | 9588 | ||
@@ -10113,7 +10207,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
10113 | unsigned long *msr_bitmap_l1; | 10207 | unsigned long *msr_bitmap_l1; |
10114 | unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; | 10208 | unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; |
10115 | /* | 10209 | /* |
10116 | * pred_cmd is trying to verify two things: | 10210 | * pred_cmd & spec_ctrl are trying to verify two things: |
10117 | * | 10211 | * |
10118 | * 1. L0 gave a permission to L1 to actually passthrough the MSR. This | 10212 | * 1. L0 gave a permission to L1 to actually passthrough the MSR. This |
10119 | * ensures that we do not accidentally generate an L02 MSR bitmap | 10213 | * ensures that we do not accidentally generate an L02 MSR bitmap |
@@ -10126,9 +10220,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
10126 | * the MSR. | 10220 | * the MSR. |
10127 | */ | 10221 | */ |
10128 | bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); | 10222 | bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); |
10223 | bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); | ||
10129 | 10224 | ||
10130 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && | 10225 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && |
10131 | !pred_cmd) | 10226 | !pred_cmd && !spec_ctrl) |
10132 | return false; | 10227 | return false; |
10133 | 10228 | ||
10134 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); | 10229 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); |
@@ -10162,6 +10257,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
10162 | } | 10257 | } |
10163 | } | 10258 | } |
10164 | 10259 | ||
10260 | if (spec_ctrl) | ||
10261 | nested_vmx_disable_intercept_for_msr( | ||
10262 | msr_bitmap_l1, msr_bitmap_l0, | ||
10263 | MSR_IA32_SPEC_CTRL, | ||
10264 | MSR_TYPE_R | MSR_TYPE_W); | ||
10265 | |||
10165 | if (pred_cmd) | 10266 | if (pred_cmd) |
10166 | nested_vmx_disable_intercept_for_msr( | 10267 | nested_vmx_disable_intercept_for_msr( |
10167 | msr_bitmap_l1, msr_bitmap_l0, | 10268 | msr_bitmap_l1, msr_bitmap_l0, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4ec142e90a34..ac381437c291 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1009,7 +1009,7 @@ static u32 msrs_to_save[] = { | |||
1009 | #endif | 1009 | #endif |
1010 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | 1010 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
1011 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, | 1011 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, |
1012 | MSR_IA32_ARCH_CAPABILITIES | 1012 | MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES |
1013 | }; | 1013 | }; |
1014 | 1014 | ||
1015 | static unsigned num_msrs_to_save; | 1015 | static unsigned num_msrs_to_save; |