aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKarimAllah Ahmed <karahmed@amazon.de>2018-02-01 16:59:45 -0500
committerThomas Gleixner <tglx@linutronix.de>2018-02-03 17:06:52 -0500
commitd28b387fb74da95d69d2615732f50cceb38e9a4d (patch)
treefd8fcf12170450b5dee1e59171b2d71eb156fce5
parent28c1c9fabf48d6ad596273a11c46e0d0da3e14cd (diff)
KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL
[ Based on a patch from Ashok Raj <ashok.raj@intel.com> ] Add direct access to MSR_IA32_SPEC_CTRL for guests. This is needed for guests that will only mitigate Spectre V2 through IBRS+IBPB and will not be using a retpoline+IBPB based approach. To avoid the overhead of saving and restoring the MSR_IA32_SPEC_CTRL for guests that do not actually use the MSR, only start saving and restoring when a non-zero is written to it. No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. [dwmw2: Clean up CPUID bits, save/restore manually, handle reset] Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Darren Kenny <darren.kenny@oracle.com> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Reviewed-by: Jim Mattson <jmattson@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Jun Nakajima <jun.nakajima@intel.com> Cc: kvm@vger.kernel.org Cc: Dave Hansen <dave.hansen@intel.com> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Asit Mallick <asit.k.mallick@intel.com> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ashok Raj <ashok.raj@intel.com> Link: https://lkml.kernel.org/r/1517522386-18410-5-git-send-email-karahmed@amazon.de
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/vmx.c105
-rw-r--r--arch/x86/kvm/x86.c2
3 files changed, 110 insertions, 6 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 1909635fe09c..13f5d4217e4f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
367 367
368 /* cpuid 0x80000008.ebx */ 368 /* cpuid 0x80000008.ebx */
369 const u32 kvm_cpuid_8000_0008_ebx_x86_features = 369 const u32 kvm_cpuid_8000_0008_ebx_x86_features =
370 F(IBPB); 370 F(IBPB) | F(IBRS);
371 371
372 /* cpuid 0xC0000001.edx */ 372 /* cpuid 0xC0000001.edx */
373 const u32 kvm_cpuid_C000_0001_edx_x86_features = 373 const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -394,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
394 394
395 /* cpuid 7.0.edx*/ 395 /* cpuid 7.0.edx*/
396 const u32 kvm_cpuid_7_0_edx_x86_features = 396 const u32 kvm_cpuid_7_0_edx_x86_features =
397 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(ARCH_CAPABILITIES); 397 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
398 F(ARCH_CAPABILITIES);
398 399
399 /* all calls to cpuid_count() should be made on the same cpu */ 400 /* all calls to cpuid_count() should be made on the same cpu */
400 get_cpu(); 401 get_cpu();
@@ -630,9 +631,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
630 g_phys_as = phys_as; 631 g_phys_as = phys_as;
631 entry->eax = g_phys_as | (virt_as << 8); 632 entry->eax = g_phys_as | (virt_as << 8);
632 entry->edx = 0; 633 entry->edx = 0;
633 /* IBPB isn't necessarily present in hardware cpuid */ 634 /* IBRS and IBPB aren't necessarily present in hardware cpuid */
634 if (boot_cpu_has(X86_FEATURE_IBPB)) 635 if (boot_cpu_has(X86_FEATURE_IBPB))
635 entry->ebx |= F(IBPB); 636 entry->ebx |= F(IBPB);
637 if (boot_cpu_has(X86_FEATURE_IBRS))
638 entry->ebx |= F(IBRS);
636 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; 639 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
637 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); 640 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
638 break; 641 break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e5f75eb7c459..bee4c49f6dd0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -595,6 +595,7 @@ struct vcpu_vmx {
595#endif 595#endif
596 596
597 u64 arch_capabilities; 597 u64 arch_capabilities;
598 u64 spec_ctrl;
598 599
599 u32 vm_entry_controls_shadow; 600 u32 vm_entry_controls_shadow;
600 u32 vm_exit_controls_shadow; 601 u32 vm_exit_controls_shadow;
@@ -1911,6 +1912,29 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1911} 1912}
1912 1913
1913/* 1914/*
1915 * Check if MSR is intercepted for currently loaded MSR bitmap.
1916 */
1917static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
1918{
1919 unsigned long *msr_bitmap;
1920 int f = sizeof(unsigned long);
1921
1922 if (!cpu_has_vmx_msr_bitmap())
1923 return true;
1924
1925 msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
1926
1927 if (msr <= 0x1fff) {
1928 return !!test_bit(msr, msr_bitmap + 0x800 / f);
1929 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1930 msr &= 0x1fff;
1931 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
1932 }
1933
1934 return true;
1935}
1936
1937/*
1914 * Check if MSR is intercepted for L01 MSR bitmap. 1938 * Check if MSR is intercepted for L01 MSR bitmap.
1915 */ 1939 */
1916static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) 1940static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
@@ -3262,6 +3286,14 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3262 case MSR_IA32_TSC: 3286 case MSR_IA32_TSC:
3263 msr_info->data = guest_read_tsc(vcpu); 3287 msr_info->data = guest_read_tsc(vcpu);
3264 break; 3288 break;
3289 case MSR_IA32_SPEC_CTRL:
3290 if (!msr_info->host_initiated &&
3291 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3292 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3293 return 1;
3294
3295 msr_info->data = to_vmx(vcpu)->spec_ctrl;
3296 break;
3265 case MSR_IA32_ARCH_CAPABILITIES: 3297 case MSR_IA32_ARCH_CAPABILITIES:
3266 if (!msr_info->host_initiated && 3298 if (!msr_info->host_initiated &&
3267 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) 3299 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
@@ -3375,6 +3407,37 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3375 case MSR_IA32_TSC: 3407 case MSR_IA32_TSC:
3376 kvm_write_tsc(vcpu, msr_info); 3408 kvm_write_tsc(vcpu, msr_info);
3377 break; 3409 break;
3410 case MSR_IA32_SPEC_CTRL:
3411 if (!msr_info->host_initiated &&
3412 !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
3413 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
3414 return 1;
3415
3416 /* The STIBP bit doesn't fault even if it's not advertised */
3417 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
3418 return 1;
3419
3420 vmx->spec_ctrl = data;
3421
3422 if (!data)
3423 break;
3424
3425 /*
3426 * For non-nested:
3427 * When it's written (to non-zero) for the first time, pass
3428 * it through.
3429 *
3430 * For nested:
3431 * The handling of the MSR bitmap for L2 guests is done in
3432 * nested_vmx_merge_msr_bitmap. We should not touch the
3433 * vmcs02.msr_bitmap here since it gets completely overwritten
3434 * in the merging. We update the vmcs01 here for L1 as well
3435 * since it will end up touching the MSR anyway now.
3436 */
3437 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
3438 MSR_IA32_SPEC_CTRL,
3439 MSR_TYPE_RW);
3440 break;
3378 case MSR_IA32_PRED_CMD: 3441 case MSR_IA32_PRED_CMD:
3379 if (!msr_info->host_initiated && 3442 if (!msr_info->host_initiated &&
3380 !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && 3443 !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
@@ -5700,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5700 u64 cr0; 5763 u64 cr0;
5701 5764
5702 vmx->rmode.vm86_active = 0; 5765 vmx->rmode.vm86_active = 0;
5766 vmx->spec_ctrl = 0;
5703 5767
5704 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); 5768 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5705 kvm_set_cr8(vcpu, 0); 5769 kvm_set_cr8(vcpu, 0);
@@ -9371,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9371 9435
9372 vmx_arm_hv_timer(vcpu); 9436 vmx_arm_hv_timer(vcpu);
9373 9437
9438 /*
9439 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
9440 * it's non-zero. Since vmentry is serialising on affected CPUs, there
9441 * is no need to worry about the conditional branch over the wrmsr
9442 * being speculatively taken.
9443 */
9444 if (vmx->spec_ctrl)
9445 wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9446
9374 vmx->__launched = vmx->loaded_vmcs->launched; 9447 vmx->__launched = vmx->loaded_vmcs->launched;
9375 asm( 9448 asm(
9376 /* Store host registers */ 9449 /* Store host registers */
@@ -9489,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9489#endif 9562#endif
9490 ); 9563 );
9491 9564
9565 /*
9566 * We do not use IBRS in the kernel. If this vCPU has used the
9567 * SPEC_CTRL MSR it may have left it on; save the value and
9568 * turn it off. This is much more efficient than blindly adding
9569 * it to the atomic save/restore list. Especially as the former
9570 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
9571 *
9572 * For non-nested case:
9573 * If the L01 MSR bitmap does not intercept the MSR, then we need to
9574 * save it.
9575 *
9576 * For nested case:
9577 * If the L02 MSR bitmap does not intercept the MSR, then we need to
9578 * save it.
9579 */
9580 if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
9581 rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
9582
9583 if (vmx->spec_ctrl)
9584 wrmsrl(MSR_IA32_SPEC_CTRL, 0);
9585
9492 /* Eliminate branch target predictions from guest mode */ 9586 /* Eliminate branch target predictions from guest mode */
9493 vmexit_fill_RSB(); 9587 vmexit_fill_RSB();
9494 9588
@@ -10113,7 +10207,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
10113 unsigned long *msr_bitmap_l1; 10207 unsigned long *msr_bitmap_l1;
10114 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; 10208 unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
10115 /* 10209 /*
10116 * pred_cmd is trying to verify two things: 10210 * pred_cmd & spec_ctrl are trying to verify two things:
10117 * 10211 *
10118 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This 10212 * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
10119 * ensures that we do not accidentally generate an L02 MSR bitmap 10213 * ensures that we do not accidentally generate an L02 MSR bitmap
@@ -10126,9 +10220,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
10126 * the MSR. 10220 * the MSR.
10127 */ 10221 */
10128 bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); 10222 bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
10223 bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
10129 10224
10130 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && 10225 if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
10131 !pred_cmd) 10226 !pred_cmd && !spec_ctrl)
10132 return false; 10227 return false;
10133 10228
10134 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); 10229 page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap);
@@ -10162,6 +10257,12 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
10162 } 10257 }
10163 } 10258 }
10164 10259
10260 if (spec_ctrl)
10261 nested_vmx_disable_intercept_for_msr(
10262 msr_bitmap_l1, msr_bitmap_l0,
10263 MSR_IA32_SPEC_CTRL,
10264 MSR_TYPE_R | MSR_TYPE_W);
10265
10165 if (pred_cmd) 10266 if (pred_cmd)
10166 nested_vmx_disable_intercept_for_msr( 10267 nested_vmx_disable_intercept_for_msr(
10167 msr_bitmap_l1, msr_bitmap_l0, 10268 msr_bitmap_l1, msr_bitmap_l0,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4ec142e90a34..ac381437c291 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1009,7 +1009,7 @@ static u32 msrs_to_save[] = {
1009#endif 1009#endif
1010 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, 1010 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1011 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, 1011 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1012 MSR_IA32_ARCH_CAPABILITIES 1012 MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
1013}; 1013};
1014 1014
1015static unsigned num_msrs_to_save; 1015static unsigned num_msrs_to_save;