aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kvm/cpuid.c37
-rw-r--r--arch/x86/kvm/emulate.c8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c84
-rw-r--r--arch/x86/kvm/vmx.c334
-rw-r--r--arch/x86/kvm/x86.c145
-rw-r--r--arch/x86/kvm/x86.h5
14 files changed, 465 insertions, 185 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83afbb7d9..fcaf9c961265 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -337,6 +337,11 @@ struct kvm_pmu {
337 u64 reprogram_pmi; 337 u64 reprogram_pmi;
338}; 338};
339 339
340enum {
341 KVM_DEBUGREG_BP_ENABLED = 1,
342 KVM_DEBUGREG_WONT_EXIT = 2,
343};
344
340struct kvm_vcpu_arch { 345struct kvm_vcpu_arch {
341 /* 346 /*
342 * rip and regs accesses must go through 347 * rip and regs accesses must go through
@@ -444,7 +449,6 @@ struct kvm_vcpu_arch {
444 } st; 449 } st;
445 450
446 u64 last_guest_tsc; 451 u64 last_guest_tsc;
447 u64 last_kernel_ns;
448 u64 last_host_tsc; 452 u64 last_host_tsc;
449 u64 tsc_offset_adjustment; 453 u64 tsc_offset_adjustment;
450 u64 this_tsc_nsec; 454 u64 this_tsc_nsec;
@@ -464,7 +468,7 @@ struct kvm_vcpu_arch {
464 struct mtrr_state_type mtrr_state; 468 struct mtrr_state_type mtrr_state;
465 u32 pat; 469 u32 pat;
466 470
467 int switch_db_regs; 471 unsigned switch_db_regs;
468 unsigned long db[KVM_NR_DB_REGS]; 472 unsigned long db[KVM_NR_DB_REGS];
469 unsigned long dr6; 473 unsigned long dr6;
470 unsigned long dr7; 474 unsigned long dr7;
@@ -599,6 +603,8 @@ struct kvm_arch {
599 bool use_master_clock; 603 bool use_master_clock;
600 u64 master_kernel_ns; 604 u64 master_kernel_ns;
601 cycle_t master_cycle_now; 605 cycle_t master_cycle_now;
606 struct delayed_work kvmclock_update_work;
607 struct delayed_work kvmclock_sync_work;
602 608
603 struct kvm_xen_hvm_config xen_hvm_config; 609 struct kvm_xen_hvm_config xen_hvm_config;
604 610
@@ -702,6 +708,7 @@ struct kvm_x86_ops {
702 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 708 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
703 u64 (*get_dr6)(struct kvm_vcpu *vcpu); 709 u64 (*get_dr6)(struct kvm_vcpu *vcpu);
704 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); 710 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
711 void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
705 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 712 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
706 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 713 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
707 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 714 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -728,8 +735,8 @@ struct kvm_x86_ops {
728 int (*nmi_allowed)(struct kvm_vcpu *vcpu); 735 int (*nmi_allowed)(struct kvm_vcpu *vcpu);
729 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); 736 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
730 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); 737 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
731 int (*enable_nmi_window)(struct kvm_vcpu *vcpu); 738 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
732 int (*enable_irq_window)(struct kvm_vcpu *vcpu); 739 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
733 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 740 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
734 int (*vm_has_apicv)(struct kvm *kvm); 741 int (*vm_has_apicv)(struct kvm *kvm);
735 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); 742 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
@@ -765,6 +772,9 @@ struct kvm_x86_ops {
765 struct x86_instruction_info *info, 772 struct x86_instruction_info *info,
766 enum x86_intercept_stage stage); 773 enum x86_intercept_stage stage);
767 void (*handle_external_intr)(struct kvm_vcpu *vcpu); 774 void (*handle_external_intr)(struct kvm_vcpu *vcpu);
775 bool (*mpx_supported)(void);
776
777 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
768}; 778};
769 779
770struct kvm_arch_async_pf { 780struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2067264fb7f5..7004d21e6219 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -85,6 +85,7 @@
85#define VM_EXIT_SAVE_IA32_EFER 0x00100000 85#define VM_EXIT_SAVE_IA32_EFER 0x00100000
86#define VM_EXIT_LOAD_IA32_EFER 0x00200000 86#define VM_EXIT_LOAD_IA32_EFER 0x00200000
87#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 87#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
88#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
88 89
89#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff 90#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
90 91
@@ -95,6 +96,7 @@
95#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 96#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
96#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 97#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
97#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 98#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
99#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
98 100
99#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff 101#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
100 102
@@ -174,6 +176,8 @@ enum vmcs_field {
174 GUEST_PDPTR2_HIGH = 0x0000280f, 176 GUEST_PDPTR2_HIGH = 0x0000280f,
175 GUEST_PDPTR3 = 0x00002810, 177 GUEST_PDPTR3 = 0x00002810,
176 GUEST_PDPTR3_HIGH = 0x00002811, 178 GUEST_PDPTR3_HIGH = 0x00002811,
179 GUEST_BNDCFGS = 0x00002812,
180 GUEST_BNDCFGS_HIGH = 0x00002813,
177 HOST_IA32_PAT = 0x00002c00, 181 HOST_IA32_PAT = 0x00002c00,
178 HOST_IA32_PAT_HIGH = 0x00002c01, 182 HOST_IA32_PAT_HIGH = 0x00002c01,
179 HOST_IA32_EFER = 0x00002c02, 183 HOST_IA32_EFER = 0x00002c02,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 6c1d7411eb00..d949ef28c48b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,8 @@
16#define XSTATE_Hi16_ZMM 0x80 16#define XSTATE_Hi16_ZMM 0x80
17 17
18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) 18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
19/* Bit 63 of XCR0 is reserved for future expansion */
20#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
19 21
20#define FXSAVE_SIZE 512 22#define FXSAVE_SIZE 512
21 23
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 4924f4be2b99..c827ace3121b 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -295,6 +295,7 @@
295#define MSR_SMI_COUNT 0x00000034 295#define MSR_SMI_COUNT 0x00000034
296#define MSR_IA32_FEATURE_CONTROL 0x0000003a 296#define MSR_IA32_FEATURE_CONTROL 0x0000003a
297#define MSR_IA32_TSC_ADJUST 0x0000003b 297#define MSR_IA32_TSC_ADJUST 0x0000003b
298#define MSR_IA32_BNDCFGS 0x00000d90
298 299
299#define FEATURE_CONTROL_LOCKED (1<<0) 300#define FEATURE_CONTROL_LOCKED (1<<0)
300#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 301#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 713f1b3bad52..0331cb389d68 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -417,7 +417,6 @@ void kvm_disable_steal_time(void)
417#ifdef CONFIG_SMP 417#ifdef CONFIG_SMP
418static void __init kvm_smp_prepare_boot_cpu(void) 418static void __init kvm_smp_prepare_boot_cpu(void)
419{ 419{
420 WARN_ON(kvm_register_clock("primary cpu clock"));
421 kvm_guest_cpu_init(); 420 kvm_guest_cpu_init();
422 native_smp_prepare_boot_cpu(); 421 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init(); 422 kvm_spinlock_init();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e6041094ff26..d9156ceecdff 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,7 +242,7 @@ void __init kvmclock_init(void)
242 hv_clock = __va(mem); 242 hv_clock = __va(mem);
243 memset(hv_clock, 0, size); 243 memset(hv_clock, 0, size);
244 244
245 if (kvm_register_clock("boot clock")) { 245 if (kvm_register_clock("primary cpu clock")) {
246 hv_clock = NULL; 246 hv_clock = NULL;
247 memblock_free(mem, size); 247 memblock_free(mem, size);
248 return; 248 return;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e5503d8aec1d..bea60671ef8a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -28,7 +28,7 @@ static u32 xstate_required_size(u64 xstate_bv)
28 int feature_bit = 0; 28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30 30
31 xstate_bv &= ~XSTATE_FPSSE; 31 xstate_bv &= XSTATE_EXTEND_MASK;
32 while (xstate_bv) { 32 while (xstate_bv) {
33 if (xstate_bv & 0x1) { 33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx; 34 u32 eax, ebx, ecx, edx;
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
43 return ret; 43 return ret;
44} 44}
45 45
46u64 kvm_supported_xcr0(void)
47{
48 u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
49
50 if (!kvm_x86_ops->mpx_supported())
51 xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
52
53 return xcr0;
54}
55
46void kvm_update_cpuid(struct kvm_vcpu *vcpu) 56void kvm_update_cpuid(struct kvm_vcpu *vcpu)
47{ 57{
48 struct kvm_cpuid_entry2 *best; 58 struct kvm_cpuid_entry2 *best;
@@ -73,9 +83,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
73 } else { 83 } else {
74 vcpu->arch.guest_supported_xcr0 = 84 vcpu->arch.guest_supported_xcr0 =
75 (best->eax | ((u64)best->edx << 32)) & 85 (best->eax | ((u64)best->edx << 32)) &
76 host_xcr0 & KVM_SUPPORTED_XCR0; 86 kvm_supported_xcr0();
77 vcpu->arch.guest_xstate_size = 87 vcpu->arch.guest_xstate_size = best->ebx =
78 xstate_required_size(vcpu->arch.guest_supported_xcr0); 88 xstate_required_size(vcpu->arch.xcr0);
79 } 89 }
80 90
81 kvm_pmu_cpuid_update(vcpu); 91 kvm_pmu_cpuid_update(vcpu);
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
210 entry->flags = 0; 220 entry->flags = 0;
211} 221}
212 222
213static bool supported_xcr0_bit(unsigned bit)
214{
215 u64 mask = ((u64)1 << bit);
216
217 return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
218}
219
220#define F(x) bit(X86_FEATURE_##x) 223#define F(x) bit(X86_FEATURE_##x)
221 224
222static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, 225static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,6 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
256#endif 259#endif
257 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 260 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
258 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; 261 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
262 unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
259 263
260 /* cpuid 1.edx */ 264 /* cpuid 1.edx */
261 const u32 kvm_supported_word0_x86_features = 265 const u32 kvm_supported_word0_x86_features =
@@ -303,7 +307,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
303 /* cpuid 7.0.ebx */ 307 /* cpuid 7.0.ebx */
304 const u32 kvm_supported_word9_x86_features = 308 const u32 kvm_supported_word9_x86_features =
305 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 309 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
306 F(BMI2) | F(ERMS) | f_invpcid | F(RTM); 310 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
311 F(ADX);
307 312
308 /* all calls to cpuid_count() should be made on the same cpu */ 313 /* all calls to cpuid_count() should be made on the same cpu */
309 get_cpu(); 314 get_cpu();
@@ -436,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
436 } 441 }
437 case 0xd: { 442 case 0xd: {
438 int idx, i; 443 int idx, i;
444 u64 supported = kvm_supported_xcr0();
439 445
440 entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; 446 entry->eax &= supported;
441 entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; 447 entry->edx &= supported >> 32;
442 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 448 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
443 for (idx = 1, i = 1; idx < 64; ++idx) { 449 for (idx = 1, i = 1; idx < 64; ++idx) {
450 u64 mask = ((u64)1 << idx);
444 if (*nent >= maxnent) 451 if (*nent >= maxnent)
445 goto out; 452 goto out;
446 453
447 do_cpuid_1_ent(&entry[i], function, idx); 454 do_cpuid_1_ent(&entry[i], function, idx);
448 if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) 455 if (entry[i].eax == 0 || !(supported & mask))
449 continue; 456 continue;
450 entry[i].flags |= 457 entry[i].flags |=
451 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 458 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 07ffca0a89e9..205b17eed93c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3668,6 +3668,10 @@ static const struct gprefix pfx_vmovntpx = {
3668 I(0, em_mov), N, N, N, 3668 I(0, em_mov), N, N, N,
3669}; 3669};
3670 3670
3671static const struct gprefix pfx_0f_28_0f_29 = {
3672 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
3673};
3674
3671static const struct escape escape_d9 = { { 3675static const struct escape escape_d9 = { {
3672 N, N, N, N, N, N, N, I(DstMem, em_fnstcw), 3676 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3673}, { 3677}, {
@@ -3870,7 +3874,9 @@ static const struct opcode twobyte_table[256] = {
3870 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), 3874 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
3871 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), 3875 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
3872 N, N, N, N, 3876 N, N, N, N,
3873 N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), 3877 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
3878 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
3879 N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
3874 N, N, N, N, 3880 N, N, N, N,
3875 /* 0x30 - 0x3F */ 3881 /* 0x30 - 0x3F */
3876 II(ImplicitOps | Priv, em_wrmsr, wrmsr), 3882 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b531351a587..f5704d9e5ddc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3329,7 +3329,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
3329 arch.direct_map = vcpu->arch.mmu.direct_map; 3329 arch.direct_map = vcpu->arch.mmu.direct_map;
3330 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); 3330 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
3331 3331
3332 return kvm_setup_async_pf(vcpu, gva, gfn, &arch); 3332 return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
3333} 3333}
3334 3334
3335static bool can_do_async_pf(struct kvm_vcpu *vcpu) 3335static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cba218a2f08d..b1e6c1bf68d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,7 +913,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't 913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
914 * used by guest then tlbs are not flushed, so guest is allowed to access the 914 * used by guest then tlbs are not flushed, so guest is allowed to access the
915 * freed pages. 915 * freed pages.
916 * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. 916 * We set tlbs_dirty to let the notifier know this change and delay the flush
917 * until such a case actually happens.
917 */ 918 */
918static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 919static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
919{ 920{
@@ -942,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
942 return -EINVAL; 943 return -EINVAL;
943 944
944 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { 945 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
945 vcpu->kvm->tlbs_dirty++; 946 vcpu->kvm->tlbs_dirty = true;
946 continue; 947 continue;
947 } 948 }
948 949
@@ -957,7 +958,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
957 958
958 if (gfn != sp->gfns[i]) { 959 if (gfn != sp->gfns[i]) {
959 drop_spte(vcpu->kvm, &sp->spt[i]); 960 drop_spte(vcpu->kvm, &sp->spt[i]);
960 vcpu->kvm->tlbs_dirty++; 961 vcpu->kvm->tlbs_dirty = true;
961 continue; 962 continue;
962 } 963 }
963 964
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2de1bc09a8d4..7f4f9c2badae 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,6 +34,7 @@
34#include <asm/perf_event.h> 34#include <asm/perf_event.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/desc.h> 36#include <asm/desc.h>
37#include <asm/debugreg.h>
37#include <asm/kvm_para.h> 38#include <asm/kvm_para.h>
38 39
39#include <asm/virtext.h> 40#include <asm/virtext.h>
@@ -303,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
303 return vmcb->control.intercept_cr & (1U << bit); 304 return vmcb->control.intercept_cr & (1U << bit);
304} 305}
305 306
306static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) 307static inline void set_dr_intercepts(struct vcpu_svm *svm)
307{ 308{
308 struct vmcb *vmcb = get_host_vmcb(svm); 309 struct vmcb *vmcb = get_host_vmcb(svm);
309 310
310 vmcb->control.intercept_dr |= (1U << bit); 311 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
312 | (1 << INTERCEPT_DR1_READ)
313 | (1 << INTERCEPT_DR2_READ)
314 | (1 << INTERCEPT_DR3_READ)
315 | (1 << INTERCEPT_DR4_READ)
316 | (1 << INTERCEPT_DR5_READ)
317 | (1 << INTERCEPT_DR6_READ)
318 | (1 << INTERCEPT_DR7_READ)
319 | (1 << INTERCEPT_DR0_WRITE)
320 | (1 << INTERCEPT_DR1_WRITE)
321 | (1 << INTERCEPT_DR2_WRITE)
322 | (1 << INTERCEPT_DR3_WRITE)
323 | (1 << INTERCEPT_DR4_WRITE)
324 | (1 << INTERCEPT_DR5_WRITE)
325 | (1 << INTERCEPT_DR6_WRITE)
326 | (1 << INTERCEPT_DR7_WRITE);
311 327
312 recalc_intercepts(svm); 328 recalc_intercepts(svm);
313} 329}
314 330
315static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) 331static inline void clr_dr_intercepts(struct vcpu_svm *svm)
316{ 332{
317 struct vmcb *vmcb = get_host_vmcb(svm); 333 struct vmcb *vmcb = get_host_vmcb(svm);
318 334
319 vmcb->control.intercept_dr &= ~(1U << bit); 335 vmcb->control.intercept_dr = 0;
320 336
321 recalc_intercepts(svm); 337 recalc_intercepts(svm);
322} 338}
@@ -1080,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm)
1080 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1096 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1081 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1097 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1082 1098
1083 set_dr_intercept(svm, INTERCEPT_DR0_READ); 1099 set_dr_intercepts(svm);
1084 set_dr_intercept(svm, INTERCEPT_DR1_READ);
1085 set_dr_intercept(svm, INTERCEPT_DR2_READ);
1086 set_dr_intercept(svm, INTERCEPT_DR3_READ);
1087 set_dr_intercept(svm, INTERCEPT_DR4_READ);
1088 set_dr_intercept(svm, INTERCEPT_DR5_READ);
1089 set_dr_intercept(svm, INTERCEPT_DR6_READ);
1090 set_dr_intercept(svm, INTERCEPT_DR7_READ);
1091
1092 set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1093 set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1094 set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1095 set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1096 set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1097 set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1098 set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1099 set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1100 1100
1101 set_exception_intercept(svm, PF_VECTOR); 1101 set_exception_intercept(svm, PF_VECTOR);
1102 set_exception_intercept(svm, UD_VECTOR); 1102 set_exception_intercept(svm, UD_VECTOR);
@@ -1684,6 +1684,21 @@ static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1684 mark_dirty(svm->vmcb, VMCB_DR); 1684 mark_dirty(svm->vmcb, VMCB_DR);
1685} 1685}
1686 1686
1687static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1688{
1689 struct vcpu_svm *svm = to_svm(vcpu);
1690
1691 get_debugreg(vcpu->arch.db[0], 0);
1692 get_debugreg(vcpu->arch.db[1], 1);
1693 get_debugreg(vcpu->arch.db[2], 2);
1694 get_debugreg(vcpu->arch.db[3], 3);
1695 vcpu->arch.dr6 = svm_get_dr6(vcpu);
1696 vcpu->arch.dr7 = svm->vmcb->save.dr7;
1697
1698 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1699 set_dr_intercepts(svm);
1700}
1701
1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1702static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1688{ 1703{
1689 struct vcpu_svm *svm = to_svm(vcpu); 1704 struct vcpu_svm *svm = to_svm(vcpu);
@@ -2842,6 +2857,7 @@ static int iret_interception(struct vcpu_svm *svm)
2842 clr_intercept(svm, INTERCEPT_IRET); 2857 clr_intercept(svm, INTERCEPT_IRET);
2843 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2858 svm->vcpu.arch.hflags |= HF_IRET_MASK;
2844 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); 2859 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2860 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2845 return 1; 2861 return 1;
2846} 2862}
2847 2863
@@ -2974,6 +2990,17 @@ static int dr_interception(struct vcpu_svm *svm)
2974 unsigned long val; 2990 unsigned long val;
2975 int err; 2991 int err;
2976 2992
2993 if (svm->vcpu.guest_debug == 0) {
2994 /*
2995 * No more DR vmexits; force a reload of the debug registers
2996 * and reenter on this instruction. The next vmexit will
2997 * retrieve the full state of the debug registers.
2998 */
2999 clr_dr_intercepts(svm);
3000 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3001 return 1;
3002 }
3003
2977 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) 3004 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2978 return emulate_on_interception(svm); 3005 return emulate_on_interception(svm);
2979 3006
@@ -3649,7 +3676,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3649 return ret; 3676 return ret;
3650} 3677}
3651 3678
3652static int enable_irq_window(struct kvm_vcpu *vcpu) 3679static void enable_irq_window(struct kvm_vcpu *vcpu)
3653{ 3680{
3654 struct vcpu_svm *svm = to_svm(vcpu); 3681 struct vcpu_svm *svm = to_svm(vcpu);
3655 3682
@@ -3663,16 +3690,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
3663 svm_set_vintr(svm); 3690 svm_set_vintr(svm);
3664 svm_inject_irq(svm, 0x0); 3691 svm_inject_irq(svm, 0x0);
3665 } 3692 }
3666 return 0;
3667} 3693}
3668 3694
3669static int enable_nmi_window(struct kvm_vcpu *vcpu) 3695static void enable_nmi_window(struct kvm_vcpu *vcpu)
3670{ 3696{
3671 struct vcpu_svm *svm = to_svm(vcpu); 3697 struct vcpu_svm *svm = to_svm(vcpu);
3672 3698
3673 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) 3699 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3674 == HF_NMI_MASK) 3700 == HF_NMI_MASK)
3675 return 0; /* IRET will cause a vm exit */ 3701 return; /* IRET will cause a vm exit */
3676 3702
3677 /* 3703 /*
3678 * Something prevents NMI from been injected. Single step over possible 3704 * Something prevents NMI from been injected. Single step over possible
@@ -3681,7 +3707,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu)
3681 svm->nmi_singlestep = true; 3707 svm->nmi_singlestep = true;
3682 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 3708 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3683 update_db_bp_intercept(vcpu); 3709 update_db_bp_intercept(vcpu);
3684 return 0;
3685} 3710}
3686 3711
3687static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 3712static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4064,6 +4089,11 @@ static bool svm_invpcid_supported(void)
4064 return false; 4089 return false;
4065} 4090}
4066 4091
4092static bool svm_mpx_supported(void)
4093{
4094 return false;
4095}
4096
4067static bool svm_has_wbinvd_exit(void) 4097static bool svm_has_wbinvd_exit(void)
4068{ 4098{
4069 return true; 4099 return true;
@@ -4302,6 +4332,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4302 .get_dr6 = svm_get_dr6, 4332 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6, 4333 .set_dr6 = svm_set_dr6,
4304 .set_dr7 = svm_set_dr7, 4334 .set_dr7 = svm_set_dr7,
4335 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
4305 .cache_reg = svm_cache_reg, 4336 .cache_reg = svm_cache_reg,
4306 .get_rflags = svm_get_rflags, 4337 .get_rflags = svm_get_rflags,
4307 .set_rflags = svm_set_rflags, 4338 .set_rflags = svm_set_rflags,
@@ -4345,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4345 4376
4346 .rdtscp_supported = svm_rdtscp_supported, 4377 .rdtscp_supported = svm_rdtscp_supported,
4347 .invpcid_supported = svm_invpcid_supported, 4378 .invpcid_supported = svm_invpcid_supported,
4379 .mpx_supported = svm_mpx_supported,
4348 4380
4349 .set_supported_cpuid = svm_set_supported_cpuid, 4381 .set_supported_cpuid = svm_set_supported_cpuid,
4350 4382
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 392752834751..1320e0f8e611 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/tboot.h> 33#include <linux/tboot.h>
34#include <linux/hrtimer.h>
34#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
35#include "x86.h" 36#include "x86.h"
36 37
@@ -42,6 +43,7 @@
42#include <asm/i387.h> 43#include <asm/i387.h>
43#include <asm/xcr.h> 44#include <asm/xcr.h>
44#include <asm/perf_event.h> 45#include <asm/perf_event.h>
46#include <asm/debugreg.h>
45#include <asm/kexec.h> 47#include <asm/kexec.h>
46 48
47#include "trace.h" 49#include "trace.h"
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO);
110 112
111#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) 113#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
112 114
115#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
116
113/* 117/*
114 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 118 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
115 * ple_gap: upper bound on the amount of time between two successive 119 * ple_gap: upper bound on the amount of time between two successive
@@ -202,6 +206,7 @@ struct __packed vmcs12 {
202 u64 guest_pdptr1; 206 u64 guest_pdptr1;
203 u64 guest_pdptr2; 207 u64 guest_pdptr2;
204 u64 guest_pdptr3; 208 u64 guest_pdptr3;
209 u64 guest_bndcfgs;
205 u64 host_ia32_pat; 210 u64 host_ia32_pat;
206 u64 host_ia32_efer; 211 u64 host_ia32_efer;
207 u64 host_ia32_perf_global_ctrl; 212 u64 host_ia32_perf_global_ctrl;
@@ -374,6 +379,9 @@ struct nested_vmx {
374 */ 379 */
375 struct page *apic_access_page; 380 struct page *apic_access_page;
376 u64 msr_ia32_feature_control; 381 u64 msr_ia32_feature_control;
382
383 struct hrtimer preemption_timer;
384 bool preemption_timer_expired;
377}; 385};
378 386
379#define POSTED_INTR_ON 0 387#define POSTED_INTR_ON 0
@@ -441,6 +449,7 @@ struct vcpu_vmx {
441#endif 449#endif
442 int gs_ldt_reload_needed; 450 int gs_ldt_reload_needed;
443 int fs_reload_needed; 451 int fs_reload_needed;
452 u64 msr_host_bndcfgs;
444 } host_state; 453 } host_state;
445 struct { 454 struct {
446 int vm86_active; 455 int vm86_active;
@@ -533,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = {
533 GUEST_CS_LIMIT, 542 GUEST_CS_LIMIT,
534 GUEST_CS_BASE, 543 GUEST_CS_BASE,
535 GUEST_ES_BASE, 544 GUEST_ES_BASE,
545 GUEST_BNDCFGS,
536 CR0_GUEST_HOST_MASK, 546 CR0_GUEST_HOST_MASK,
537 CR0_READ_SHADOW, 547 CR0_READ_SHADOW,
538 CR4_READ_SHADOW, 548 CR4_READ_SHADOW,
@@ -588,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
588 FIELD64(GUEST_PDPTR1, guest_pdptr1), 598 FIELD64(GUEST_PDPTR1, guest_pdptr1),
589 FIELD64(GUEST_PDPTR2, guest_pdptr2), 599 FIELD64(GUEST_PDPTR2, guest_pdptr2),
590 FIELD64(GUEST_PDPTR3, guest_pdptr3), 600 FIELD64(GUEST_PDPTR3, guest_pdptr3),
601 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
591 FIELD64(HOST_IA32_PAT, host_ia32_pat), 602 FIELD64(HOST_IA32_PAT, host_ia32_pat),
592 FIELD64(HOST_IA32_EFER, host_ia32_efer), 603 FIELD64(HOST_IA32_EFER, host_ia32_efer),
593 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), 604 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -718,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
718static u64 construct_eptp(unsigned long root_hpa); 729static u64 construct_eptp(unsigned long root_hpa);
719static void kvm_cpu_vmxon(u64 addr); 730static void kvm_cpu_vmxon(u64 addr);
720static void kvm_cpu_vmxoff(void); 731static void kvm_cpu_vmxoff(void);
732static bool vmx_mpx_supported(void);
721static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 733static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
722static void vmx_set_segment(struct kvm_vcpu *vcpu, 734static void vmx_set_segment(struct kvm_vcpu *vcpu,
723 struct kvm_segment *var, int seg); 735 struct kvm_segment *var, int seg);
@@ -728,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
728static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 740static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
729static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 741static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
730static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 742static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
743static bool vmx_mpx_supported(void);
731 744
732static DEFINE_PER_CPU(struct vmcs *, vmxarea); 745static DEFINE_PER_CPU(struct vmcs *, vmxarea);
733static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 746static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1047,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
1047 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; 1060 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
1048} 1061}
1049 1062
1063static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
1064{
1065 return vmcs12->pin_based_vm_exec_control &
1066 PIN_BASED_VMX_PREEMPTION_TIMER;
1067}
1068
1050static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) 1069static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1051{ 1070{
1052 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); 1071 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1710,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
1710 if (is_long_mode(&vmx->vcpu)) 1729 if (is_long_mode(&vmx->vcpu))
1711 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 1730 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1712#endif 1731#endif
1732 if (boot_cpu_has(X86_FEATURE_MPX))
1733 rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
1713 for (i = 0; i < vmx->save_nmsrs; ++i) 1734 for (i = 0; i < vmx->save_nmsrs; ++i)
1714 kvm_set_shared_msr(vmx->guest_msrs[i].index, 1735 kvm_set_shared_msr(vmx->guest_msrs[i].index,
1715 vmx->guest_msrs[i].data, 1736 vmx->guest_msrs[i].data,
@@ -1747,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1747#ifdef CONFIG_X86_64 1768#ifdef CONFIG_X86_64
1748 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1769 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1749#endif 1770#endif
1771 if (vmx->host_state.msr_host_bndcfgs)
1772 wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
1750 /* 1773 /*
1751 * If the FPU is not active (through the host task or 1774 * If the FPU is not active (through the host task or
1752 * the guest vcpu), then restore the cr0.TS bit. 1775 * the guest vcpu), then restore the cr0.TS bit.
@@ -2248,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2248 */ 2271 */
2249 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2250 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | 2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
2251 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | 2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2252 PIN_BASED_VMX_PREEMPTION_TIMER; 2276 PIN_BASED_VMX_PREEMPTION_TIMER;
2253 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2254 2277
2255 /* 2278 /*
2256 * Exit controls 2279 * Exit controls
@@ -2265,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2265#ifdef CONFIG_X86_64 2288#ifdef CONFIG_X86_64
2266 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2289 VM_EXIT_HOST_ADDR_SPACE_SIZE |
2267#endif 2290#endif
2268 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 2291 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
2292 nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2293 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
2269 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2294 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2270 if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || 2295 if (vmx_mpx_supported())
2271 !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { 2296 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2272 nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2273 nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2274 }
2275 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2276 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
2277 2297
2278 /* entry controls */ 2298 /* entry controls */
2279 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2299 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2287,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2287 VM_ENTRY_LOAD_IA32_PAT; 2307 VM_ENTRY_LOAD_IA32_PAT;
2288 nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | 2308 nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
2289 VM_ENTRY_LOAD_IA32_EFER); 2309 VM_ENTRY_LOAD_IA32_EFER);
2310 if (vmx_mpx_supported())
2311 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
2290 2312
2291 /* cpu-based controls */ 2313 /* cpu-based controls */
2292 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, 2314 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2342,9 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2342 2364
2343 /* miscellaneous data */ 2365 /* miscellaneous data */
2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2366 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2367 nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
2346 VMX_MISC_SAVE_EFER_LMA; 2368 nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; 2369 VMX_MISC_ACTIVITY_HLT;
2348 nested_vmx_misc_high = 0; 2370 nested_vmx_misc_high = 0;
2349} 2371}
2350 2372
@@ -2479,6 +2501,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 case MSR_IA32_SYSENTER_ESP: 2501 case MSR_IA32_SYSENTER_ESP:
2480 data = vmcs_readl(GUEST_SYSENTER_ESP); 2502 data = vmcs_readl(GUEST_SYSENTER_ESP);
2481 break; 2503 break;
2504 case MSR_IA32_BNDCFGS:
2505 if (!vmx_mpx_supported())
2506 return 1;
2507 data = vmcs_read64(GUEST_BNDCFGS);
2508 break;
2482 case MSR_IA32_FEATURE_CONTROL: 2509 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu)) 2510 if (!nested_vmx_allowed(vcpu))
2484 return 1; 2511 return 1;
@@ -2547,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2547 case MSR_IA32_SYSENTER_ESP: 2574 case MSR_IA32_SYSENTER_ESP:
2548 vmcs_writel(GUEST_SYSENTER_ESP, data); 2575 vmcs_writel(GUEST_SYSENTER_ESP, data);
2549 break; 2576 break;
2577 case MSR_IA32_BNDCFGS:
2578 if (!vmx_mpx_supported())
2579 return 1;
2580 vmcs_write64(GUEST_BNDCFGS, data);
2581 break;
2550 case MSR_IA32_TSC: 2582 case MSR_IA32_TSC:
2551 kvm_write_tsc(vcpu, msr_info); 2583 kvm_write_tsc(vcpu, msr_info);
2552 break; 2584 break;
@@ -2832,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2832 vmx_capability.ept, vmx_capability.vpid); 2864 vmx_capability.ept, vmx_capability.vpid);
2833 } 2865 }
2834 2866
2835 min = 0; 2867 min = VM_EXIT_SAVE_DEBUG_CONTROLS;
2836#ifdef CONFIG_X86_64 2868#ifdef CONFIG_X86_64
2837 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 2869 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2838#endif 2870#endif
2839 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | 2871 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
2840 VM_EXIT_ACK_INTR_ON_EXIT; 2872 VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
2841 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 2873 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2842 &_vmexit_control) < 0) 2874 &_vmexit_control) < 0)
2843 return -EIO; 2875 return -EIO;
@@ -2853,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2853 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) 2885 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
2854 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; 2886 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2855 2887
2856 min = 0; 2888 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
2857 opt = VM_ENTRY_LOAD_IA32_PAT; 2889 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
2858 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 2890 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
2859 &_vmentry_control) < 0) 2891 &_vmentry_control) < 0)
2860 return -EIO; 2892 return -EIO;
@@ -4223,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4223static u32 vmx_exec_control(struct vcpu_vmx *vmx) 4255static u32 vmx_exec_control(struct vcpu_vmx *vmx)
4224{ 4256{
4225 u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 4257 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
4258
4259 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
4260 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
4261
4226 if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { 4262 if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
4227 exec_control &= ~CPU_BASED_TPR_SHADOW; 4263 exec_control &= ~CPU_BASED_TPR_SHADOW;
4228#ifdef CONFIG_X86_64 4264#ifdef CONFIG_X86_64
@@ -4496,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
4496 PIN_BASED_NMI_EXITING; 4532 PIN_BASED_NMI_EXITING;
4497} 4533}
4498 4534
4499static int enable_irq_window(struct kvm_vcpu *vcpu) 4535static void enable_irq_window(struct kvm_vcpu *vcpu)
4500{ 4536{
4501 u32 cpu_based_vm_exec_control; 4537 u32 cpu_based_vm_exec_control;
4502 4538
4503 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4504 /*
4505 * We get here if vmx_interrupt_allowed() said we can't
4506 * inject to L1 now because L2 must run. The caller will have
4507 * to make L2 exit right after entry, so we can inject to L1
4508 * more promptly.
4509 */
4510 return -EBUSY;
4511
4512 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 4539 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4513 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 4540 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
4514 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 4541 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4515 return 0;
4516} 4542}
4517 4543
4518static int enable_nmi_window(struct kvm_vcpu *vcpu) 4544static void enable_nmi_window(struct kvm_vcpu *vcpu)
4519{ 4545{
4520 u32 cpu_based_vm_exec_control; 4546 u32 cpu_based_vm_exec_control;
4521 4547
4522 if (!cpu_has_virtual_nmis()) 4548 if (!cpu_has_virtual_nmis() ||
4523 return enable_irq_window(vcpu); 4549 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4524 4550 enable_irq_window(vcpu);
4525 if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) 4551 return;
4526 return enable_irq_window(vcpu); 4552 }
4527 4553
4528 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 4554 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4529 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 4555 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
4530 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 4556 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4531 return 0;
4532} 4557}
4533 4558
4534static void vmx_inject_irq(struct kvm_vcpu *vcpu) 4559static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4620,22 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4620 4645
4621static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4646static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4622{ 4647{
4623 if (is_guest_mode(vcpu)) { 4648 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 if (to_vmx(vcpu)->nested.nested_run_pending) 4649 return 0;
4625 return 0;
4626 if (nested_exit_on_nmi(vcpu)) {
4627 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4628 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4629 INTR_INFO_VALID_MASK, 0);
4630 /*
4631 * The NMI-triggered VM exit counts as injection:
4632 * clear this one and block further NMIs.
4633 */
4634 vcpu->arch.nmi_pending = 0;
4635 vmx_set_nmi_mask(vcpu, true);
4636 return 0;
4637 }
4638 }
4639 4650
4640 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) 4651 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
4641 return 0; 4652 return 0;
@@ -4647,19 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4647 4658
4648static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4659static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4649{ 4660{
4650 if (is_guest_mode(vcpu)) { 4661 return (!to_vmx(vcpu)->nested.nested_run_pending &&
4651 if (to_vmx(vcpu)->nested.nested_run_pending) 4662 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4652 return 0;
4653 if (nested_exit_on_intr(vcpu)) {
4654 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4655 0, 0);
4656 /*
4657 * fall through to normal code, but now in L1, not L2
4658 */
4659 }
4660 }
4661
4662 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4663 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 4663 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4664 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 4664 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4665} 4665}
@@ -5102,6 +5102,22 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5102 } 5102 }
5103 } 5103 }
5104 5104
5105 if (vcpu->guest_debug == 0) {
5106 u32 cpu_based_vm_exec_control;
5107
5108 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5109 cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
5110 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5111
5112 /*
5113 * No more DR vmexits; force a reload of the debug registers
5114 * and reenter on this instruction. The next vmexit will
5115 * retrieve the full state of the debug registers.
5116 */
5117 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
5118 return 1;
5119 }
5120
5105 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 5121 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5106 dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 5122 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5107 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5123 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
@@ -5128,6 +5144,24 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5128{ 5144{
5129} 5145}
5130 5146
5147static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5148{
5149 u32 cpu_based_vm_exec_control;
5150
5151 get_debugreg(vcpu->arch.db[0], 0);
5152 get_debugreg(vcpu->arch.db[1], 1);
5153 get_debugreg(vcpu->arch.db[2], 2);
5154 get_debugreg(vcpu->arch.db[3], 3);
5155 get_debugreg(vcpu->arch.dr6, 6);
5156 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5157
5158 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5159
5160 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5161 cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
5162 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5163}
5164
5131static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5165static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5132{ 5166{
5133 vmcs_writel(GUEST_DR7, val); 5167 vmcs_writel(GUEST_DR7, val);
@@ -5727,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
5727 */ 5761 */
5728} 5762}
5729 5763
5764static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
5765{
5766 struct vcpu_vmx *vmx =
5767 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
5768
5769 vmx->nested.preemption_timer_expired = true;
5770 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
5771 kvm_vcpu_kick(&vmx->vcpu);
5772
5773 return HRTIMER_NORESTART;
5774}
5775
5730/* 5776/*
5731 * Emulate the VMXON instruction. 5777 * Emulate the VMXON instruction.
5732 * Currently, we just remember that VMX is active, and do not save or even 5778 * Currently, we just remember that VMX is active, and do not save or even
@@ -5791,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
5791 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); 5837 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
5792 vmx->nested.vmcs02_num = 0; 5838 vmx->nested.vmcs02_num = 0;
5793 5839
5840 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
5841 HRTIMER_MODE_REL);
5842 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
5843
5794 vmx->nested.vmxon = true; 5844 vmx->nested.vmxon = true;
5795 5845
5796 skip_emulated_instruction(vcpu); 5846 skip_emulated_instruction(vcpu);
@@ -6767,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6767 * table is L0's fault. 6817 * table is L0's fault.
6768 */ 6818 */
6769 return 0; 6819 return 0;
6770 case EXIT_REASON_PREEMPTION_TIMER:
6771 return vmcs12->pin_based_vm_exec_control &
6772 PIN_BASED_VMX_PREEMPTION_TIMER;
6773 case EXIT_REASON_WBINVD: 6820 case EXIT_REASON_WBINVD:
6774 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 6821 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6775 case EXIT_REASON_XSETBV: 6822 case EXIT_REASON_XSETBV:
@@ -6785,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6785 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6832 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6786} 6833}
6787 6834
6788static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
6789{
6790 u64 delta_tsc_l1;
6791 u32 preempt_val_l1, preempt_val_l2, preempt_scale;
6792
6793 if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
6794 PIN_BASED_VMX_PREEMPTION_TIMER))
6795 return;
6796 preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
6797 MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
6798 preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
6799 delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
6800 - vcpu->arch.last_guest_tsc;
6801 preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
6802 if (preempt_val_l2 <= preempt_val_l1)
6803 preempt_val_l2 = 0;
6804 else
6805 preempt_val_l2 -= preempt_val_l1;
6806 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
6807}
6808
6809/* 6835/*
6810 * The guest has exited. See if we can fix it or if we need userspace 6836 * The guest has exited. See if we can fix it or if we need userspace
6811 * assistance. 6837 * assistance.
@@ -7052,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
7052 local_irq_enable(); 7078 local_irq_enable();
7053} 7079}
7054 7080
7081static bool vmx_mpx_supported(void)
7082{
7083 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
7084 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
7085}
7086
7055static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 7087static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7056{ 7088{
7057 u32 exit_intr_info; 7089 u32 exit_intr_info;
@@ -7218,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7218 atomic_switch_perf_msrs(vmx); 7250 atomic_switch_perf_msrs(vmx);
7219 debugctlmsr = get_debugctlmsr(); 7251 debugctlmsr = get_debugctlmsr();
7220 7252
7221 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
7222 nested_adjust_preemption_timer(vcpu);
7223 vmx->__launched = vmx->loaded_vmcs->launched; 7253 vmx->__launched = vmx->loaded_vmcs->launched;
7224 asm( 7254 asm(
7225 /* Store host registers */ 7255 /* Store host registers */
@@ -7616,6 +7646,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7616 kvm_inject_page_fault(vcpu, fault); 7646 kvm_inject_page_fault(vcpu, fault);
7617} 7647}
7618 7648
7649static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
7650{
7651 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
7652 struct vcpu_vmx *vmx = to_vmx(vcpu);
7653
7654 if (vcpu->arch.virtual_tsc_khz == 0)
7655 return;
7656
7657 /* Make sure short timeouts reliably trigger an immediate vmexit.
7658 * hrtimer_start does not guarantee this. */
7659 if (preemption_timeout <= 1) {
7660 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
7661 return;
7662 }
7663
7664 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
7665 preemption_timeout *= 1000000;
7666 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
7667 hrtimer_start(&vmx->nested.preemption_timer,
7668 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
7669}
7670
7619/* 7671/*
7620 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 7672 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7621 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 7673 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7629,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7629{ 7681{
7630 struct vcpu_vmx *vmx = to_vmx(vcpu); 7682 struct vcpu_vmx *vmx = to_vmx(vcpu);
7631 u32 exec_control; 7683 u32 exec_control;
7632 u32 exit_control;
7633 7684
7634 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7685 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
7635 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 7686 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7687,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7687 7738
7688 vmcs_write64(VMCS_LINK_POINTER, -1ull); 7739 vmcs_write64(VMCS_LINK_POINTER, -1ull);
7689 7740
7690 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, 7741 exec_control = vmcs12->pin_based_vm_exec_control;
7691 (vmcs_config.pin_based_exec_ctrl | 7742 exec_control |= vmcs_config.pin_based_exec_ctrl;
7692 vmcs12->pin_based_vm_exec_control)); 7743 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
7744 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
7693 7745
7694 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7746 vmx->nested.preemption_timer_expired = false;
7695 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 7747 if (nested_cpu_has_preemption_timer(vmcs12))
7696 vmcs12->vmx_preemption_timer_value); 7748 vmx_start_preemption_timer(vcpu);
7697 7749
7698 /* 7750 /*
7699 * Whether page-faults are trapped is determined by a combination of 7751 * Whether page-faults are trapped is determined by a combination of
@@ -7721,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7721 enable_ept ? vmcs12->page_fault_error_code_match : 0); 7773 enable_ept ? vmcs12->page_fault_error_code_match : 0);
7722 7774
7723 if (cpu_has_secondary_exec_ctrls()) { 7775 if (cpu_has_secondary_exec_ctrls()) {
7724 u32 exec_control = vmx_secondary_exec_control(vmx); 7776 exec_control = vmx_secondary_exec_control(vmx);
7725 if (!vmx->rdtscp_enabled) 7777 if (!vmx->rdtscp_enabled)
7726 exec_control &= ~SECONDARY_EXEC_RDTSCP; 7778 exec_control &= ~SECONDARY_EXEC_RDTSCP;
7727 /* Take the following fields only from vmcs12 */ 7779 /* Take the following fields only from vmcs12 */
@@ -7808,10 +7860,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7808 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7860 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
7809 * bits are further modified by vmx_set_efer() below. 7861 * bits are further modified by vmx_set_efer() below.
7810 */ 7862 */
7811 exit_control = vmcs_config.vmexit_ctrl; 7863 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
7812 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7813 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7814 vm_exit_controls_init(vmx, exit_control);
7815 7864
7816 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7865 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7817 * emulated by vmx_set_efer(), below. 7866 * emulated by vmx_set_efer(), below.
@@ -7830,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7830 7879
7831 set_cr4_guest_host_mask(vmx); 7880 set_cr4_guest_host_mask(vmx);
7832 7881
7882 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
7883 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
7884
7833 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) 7885 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
7834 vmcs_write64(TSC_OFFSET, 7886 vmcs_write64(TSC_OFFSET,
7835 vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); 7887 vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8155,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8155 } 8207 }
8156} 8208}
8157 8209
8210static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
8211{
8212 struct vcpu_vmx *vmx = to_vmx(vcpu);
8213
8214 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
8215 vmx->nested.preemption_timer_expired) {
8216 if (vmx->nested.nested_run_pending)
8217 return -EBUSY;
8218 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
8219 return 0;
8220 }
8221
8222 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
8223 if (vmx->nested.nested_run_pending ||
8224 vcpu->arch.interrupt.pending)
8225 return -EBUSY;
8226 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
8227 NMI_VECTOR | INTR_TYPE_NMI_INTR |
8228 INTR_INFO_VALID_MASK, 0);
8229 /*
8230 * The NMI-triggered VM exit counts as injection:
8231 * clear this one and block further NMIs.
8232 */
8233 vcpu->arch.nmi_pending = 0;
8234 vmx_set_nmi_mask(vcpu, true);
8235 return 0;
8236 }
8237
8238 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
8239 nested_exit_on_intr(vcpu)) {
8240 if (vmx->nested.nested_run_pending)
8241 return -EBUSY;
8242 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
8243 }
8244
8245 return 0;
8246}
8247
8248static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
8249{
8250 ktime_t remaining =
8251 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
8252 u64 value;
8253
8254 if (ktime_to_ns(remaining) <= 0)
8255 return 0;
8256
8257 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
8258 do_div(value, 1000000);
8259 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
8260}
8261
8158/* 8262/*
8159 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 8263 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
8160 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 8264 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8225,10 +8329,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8225 else 8329 else
8226 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; 8330 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8227 8331
8228 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8332 if (nested_cpu_has_preemption_timer(vmcs12)) {
8229 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8333 if (vmcs12->vm_exit_controls &
8230 vmcs12->vmx_preemption_timer_value = 8334 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
8231 vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 8335 vmcs12->vmx_preemption_timer_value =
8336 vmx_get_preemption_timer_value(vcpu);
8337 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
8338 }
8232 8339
8233 /* 8340 /*
8234 * In some cases (usually, nested EPT), L2 is allowed to change its 8341 * In some cases (usually, nested EPT), L2 is allowed to change its
@@ -8260,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8260 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8367 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
8261 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8368 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
8262 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 8369 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
8370 if (vmx_mpx_supported())
8371 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
8263 8372
8264 /* update exit information fields: */ 8373 /* update exit information fields: */
8265 8374
@@ -8369,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8369 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); 8478 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
8370 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); 8479 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
8371 8480
8481 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
8482 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
8483 vmcs_write64(GUEST_BNDCFGS, 0);
8484
8372 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { 8485 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
8373 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); 8486 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
8374 vcpu->arch.pat = vmcs12->host_ia32_pat; 8487 vcpu->arch.pat = vmcs12->host_ia32_pat;
@@ -8495,6 +8608,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8495 nested_vmx_succeed(vcpu); 8608 nested_vmx_succeed(vcpu);
8496 if (enable_shadow_vmcs) 8609 if (enable_shadow_vmcs)
8497 vmx->nested.sync_shadow_vmcs = true; 8610 vmx->nested.sync_shadow_vmcs = true;
8611
8612 /* in case we halted in L2 */
8613 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8498} 8614}
8499 8615
8500/* 8616/*
@@ -8573,6 +8689,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
8573 .get_dr6 = vmx_get_dr6, 8689 .get_dr6 = vmx_get_dr6,
8574 .set_dr6 = vmx_set_dr6, 8690 .set_dr6 = vmx_set_dr6,
8575 .set_dr7 = vmx_set_dr7, 8691 .set_dr7 = vmx_set_dr7,
8692 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
8576 .cache_reg = vmx_cache_reg, 8693 .cache_reg = vmx_cache_reg,
8577 .get_rflags = vmx_get_rflags, 8694 .get_rflags = vmx_get_rflags,
8578 .set_rflags = vmx_set_rflags, 8695 .set_rflags = vmx_set_rflags,
@@ -8634,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
8634 8751
8635 .check_intercept = vmx_check_intercept, 8752 .check_intercept = vmx_check_intercept,
8636 .handle_external_intr = vmx_handle_external_intr, 8753 .handle_external_intr = vmx_handle_external_intr,
8754 .mpx_supported = vmx_mpx_supported,
8755
8756 .check_nested_events = vmx_check_nested_events,
8637}; 8757};
8638 8758
8639static int __init vmx_init(void) 8759static int __init vmx_init(void)
@@ -8721,6 +8841,8 @@ static int __init vmx_init(void)
8721 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); 8841 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
8722 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); 8842 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
8723 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); 8843 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
8844 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
8845
8724 memcpy(vmx_msr_bitmap_legacy_x2apic, 8846 memcpy(vmx_msr_bitmap_legacy_x2apic,
8725 vmx_msr_bitmap_legacy, PAGE_SIZE); 8847 vmx_msr_bitmap_legacy, PAGE_SIZE);
8726 memcpy(vmx_msr_bitmap_longmode_x2apic, 8848 memcpy(vmx_msr_bitmap_longmode_x2apic,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b8578432d5b..d1c55f8722c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -595,13 +595,13 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
595 595
596int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 596int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
597{ 597{
598 u64 xcr0; 598 u64 xcr0 = xcr;
599 u64 old_xcr0 = vcpu->arch.xcr0;
599 u64 valid_bits; 600 u64 valid_bits;
600 601
601 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 602 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
602 if (index != XCR_XFEATURE_ENABLED_MASK) 603 if (index != XCR_XFEATURE_ENABLED_MASK)
603 return 1; 604 return 1;
604 xcr0 = xcr;
605 if (!(xcr0 & XSTATE_FP)) 605 if (!(xcr0 & XSTATE_FP))
606 return 1; 606 return 1;
607 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 607 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -616,8 +616,14 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
616 if (xcr0 & ~valid_bits) 616 if (xcr0 & ~valid_bits)
617 return 1; 617 return 1;
618 618
619 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
620 return 1;
621
619 kvm_put_guest_xcr0(vcpu); 622 kvm_put_guest_xcr0(vcpu);
620 vcpu->arch.xcr0 = xcr0; 623 vcpu->arch.xcr0 = xcr0;
624
625 if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
626 kvm_update_cpuid(vcpu);
621 return 0; 627 return 0;
622} 628}
623 629
@@ -753,7 +759,9 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
753 else 759 else
754 dr7 = vcpu->arch.dr7; 760 dr7 = vcpu->arch.dr7;
755 kvm_x86_ops->set_dr7(vcpu, dr7); 761 kvm_x86_ops->set_dr7(vcpu, dr7);
756 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); 762 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
763 if (dr7 & DR7_BP_EN_MASK)
764 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
757} 765}
758 766
759static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 767static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
@@ -879,7 +887,7 @@ static u32 msrs_to_save[] = {
879 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 887 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
880#endif 888#endif
881 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, 889 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
882 MSR_IA32_FEATURE_CONTROL 890 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
883}; 891};
884 892
885static unsigned num_msrs_to_save; 893static unsigned num_msrs_to_save;
@@ -1581,7 +1589,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 /* With all the info we got, fill in the values */ 1589 /* With all the info we got, fill in the values */
1582 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1590 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1583 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1591 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1584 vcpu->last_kernel_ns = kernel_ns;
1585 vcpu->last_guest_tsc = tsc_timestamp; 1592 vcpu->last_guest_tsc = tsc_timestamp;
1586 1593
1587 /* 1594 /*
@@ -1623,14 +1630,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1623 * the others. 1630 * the others.
1624 * 1631 *
1625 * So in those cases, request a kvmclock update for all vcpus. 1632 * So in those cases, request a kvmclock update for all vcpus.
1626 * The worst case for a remote vcpu to update its kvmclock 1633 * We need to rate-limit these requests though, as they can
1627 * is then bounded by maximum nohz sleep latency. 1634 * considerably slow guests that have a large number of vcpus.
1635 * The time for a remote vcpu to update its kvmclock is bound
1636 * by the delay we use to rate-limit the updates.
1628 */ 1637 */
1629 1638
1630static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) 1639#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1640
1641static void kvmclock_update_fn(struct work_struct *work)
1631{ 1642{
1632 int i; 1643 int i;
1633 struct kvm *kvm = v->kvm; 1644 struct delayed_work *dwork = to_delayed_work(work);
1645 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1646 kvmclock_update_work);
1647 struct kvm *kvm = container_of(ka, struct kvm, arch);
1634 struct kvm_vcpu *vcpu; 1648 struct kvm_vcpu *vcpu;
1635 1649
1636 kvm_for_each_vcpu(i, vcpu, kvm) { 1650 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1639,6 +1653,29 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1639 } 1653 }
1640} 1654}
1641 1655
1656static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1657{
1658 struct kvm *kvm = v->kvm;
1659
1660 set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
1661 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1662 KVMCLOCK_UPDATE_DELAY);
1663}
1664
1665#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1666
1667static void kvmclock_sync_fn(struct work_struct *work)
1668{
1669 struct delayed_work *dwork = to_delayed_work(work);
1670 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1671 kvmclock_sync_work);
1672 struct kvm *kvm = container_of(ka, struct kvm, arch);
1673
1674 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
1675 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
1676 KVMCLOCK_SYNC_PERIOD);
1677}
1678
1642static bool msr_mtrr_valid(unsigned msr) 1679static bool msr_mtrr_valid(unsigned msr)
1643{ 1680{
1644 switch (msr) { 1681 switch (msr) {
@@ -2323,9 +2360,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2323 case HV_X64_MSR_VP_INDEX: { 2360 case HV_X64_MSR_VP_INDEX: {
2324 int r; 2361 int r;
2325 struct kvm_vcpu *v; 2362 struct kvm_vcpu *v;
2326 kvm_for_each_vcpu(r, v, vcpu->kvm) 2363 kvm_for_each_vcpu(r, v, vcpu->kvm) {
2327 if (v == vcpu) 2364 if (v == vcpu) {
2328 data = r; 2365 data = r;
2366 break;
2367 }
2368 }
2329 break; 2369 break;
2330 } 2370 }
2331 case HV_X64_MSR_EOI: 2371 case HV_X64_MSR_EOI:
@@ -2617,6 +2657,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2617 case KVM_CAP_KVMCLOCK_CTRL: 2657 case KVM_CAP_KVMCLOCK_CTRL:
2618 case KVM_CAP_READONLY_MEM: 2658 case KVM_CAP_READONLY_MEM:
2619 case KVM_CAP_HYPERV_TIME: 2659 case KVM_CAP_HYPERV_TIME:
2660 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2620#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2661#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2621 case KVM_CAP_ASSIGN_DEV_IRQ: 2662 case KVM_CAP_ASSIGN_DEV_IRQ:
2622 case KVM_CAP_PCI_2_3: 2663 case KVM_CAP_PCI_2_3:
@@ -3043,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3043 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility 3084 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
3044 * with old userspace. 3085 * with old userspace.
3045 */ 3086 */
3046 if (xstate_bv & ~KVM_SUPPORTED_XCR0) 3087 if (xstate_bv & ~kvm_supported_xcr0())
3047 return -EINVAL;
3048 if (xstate_bv & ~host_xcr0)
3049 return -EINVAL; 3088 return -EINVAL;
3050 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3089 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3051 guest_xsave->region, vcpu->arch.guest_xstate_size); 3090 guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3898,6 +3937,23 @@ static void kvm_init_msr_list(void)
3898 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { 3937 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3899 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 3938 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3900 continue; 3939 continue;
3940
3941 /*
3942 * Even MSRs that are valid in the host may not be exposed
3943 * to the guests in some cases. We could work around this
3944 * in VMX with the generic MSR save/load machinery, but it
3945 * is not really worthwhile since it will really only
3946 * happen with nested virtualization.
3947 */
3948 switch (msrs_to_save[i]) {
3949 case MSR_IA32_BNDCFGS:
3950 if (!kvm_x86_ops->mpx_supported())
3951 continue;
3952 break;
3953 default:
3954 break;
3955 }
3956
3901 if (j < i) 3957 if (j < i)
3902 msrs_to_save[j] = msrs_to_save[i]; 3958 msrs_to_save[j] = msrs_to_save[i];
3903 j++; 3959 j++;
@@ -4394,6 +4450,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4394 if (!exchanged) 4450 if (!exchanged)
4395 return X86EMUL_CMPXCHG_FAILED; 4451 return X86EMUL_CMPXCHG_FAILED;
4396 4452
4453 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
4397 kvm_mmu_pte_write(vcpu, gpa, new, bytes); 4454 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4398 4455
4399 return X86EMUL_CONTINUE; 4456 return X86EMUL_CONTINUE;
@@ -5537,9 +5594,10 @@ int kvm_arch_init(void *opaque)
5537 goto out_free_percpu; 5594 goto out_free_percpu;
5538 5595
5539 kvm_set_mmio_spte_mask(); 5596 kvm_set_mmio_spte_mask();
5540 kvm_init_msr_list();
5541 5597
5542 kvm_x86_ops = ops; 5598 kvm_x86_ops = ops;
5599 kvm_init_msr_list();
5600
5543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 5601 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5544 PT_DIRTY_MASK, PT64_NX_MASK, 0); 5602 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5545 5603
@@ -5782,8 +5840,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5782 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 5840 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5783} 5841}
5784 5842
5785static void inject_pending_event(struct kvm_vcpu *vcpu) 5843static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5786{ 5844{
5845 int r;
5846
5787 /* try to reinject previous events if any */ 5847 /* try to reinject previous events if any */
5788 if (vcpu->arch.exception.pending) { 5848 if (vcpu->arch.exception.pending) {
5789 trace_kvm_inj_exception(vcpu->arch.exception.nr, 5849 trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5793,17 +5853,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5793 vcpu->arch.exception.has_error_code, 5853 vcpu->arch.exception.has_error_code,
5794 vcpu->arch.exception.error_code, 5854 vcpu->arch.exception.error_code,
5795 vcpu->arch.exception.reinject); 5855 vcpu->arch.exception.reinject);
5796 return; 5856 return 0;
5797 } 5857 }
5798 5858
5799 if (vcpu->arch.nmi_injected) { 5859 if (vcpu->arch.nmi_injected) {
5800 kvm_x86_ops->set_nmi(vcpu); 5860 kvm_x86_ops->set_nmi(vcpu);
5801 return; 5861 return 0;
5802 } 5862 }
5803 5863
5804 if (vcpu->arch.interrupt.pending) { 5864 if (vcpu->arch.interrupt.pending) {
5805 kvm_x86_ops->set_irq(vcpu); 5865 kvm_x86_ops->set_irq(vcpu);
5806 return; 5866 return 0;
5867 }
5868
5869 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
5870 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
5871 if (r != 0)
5872 return r;
5807 } 5873 }
5808 5874
5809 /* try to inject new event if pending */ 5875 /* try to inject new event if pending */
@@ -5820,6 +5886,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5820 kvm_x86_ops->set_irq(vcpu); 5886 kvm_x86_ops->set_irq(vcpu);
5821 } 5887 }
5822 } 5888 }
5889 return 0;
5823} 5890}
5824 5891
5825static void process_nmi(struct kvm_vcpu *vcpu) 5892static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5924,15 +5991,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5924 goto out; 5991 goto out;
5925 } 5992 }
5926 5993
5927 inject_pending_event(vcpu); 5994 if (inject_pending_event(vcpu, req_int_win) != 0)
5928 5995 req_immediate_exit = true;
5929 /* enable NMI/IRQ window open exits if needed */ 5996 /* enable NMI/IRQ window open exits if needed */
5930 if (vcpu->arch.nmi_pending) 5997 else if (vcpu->arch.nmi_pending)
5931 req_immediate_exit = 5998 kvm_x86_ops->enable_nmi_window(vcpu);
5932 kvm_x86_ops->enable_nmi_window(vcpu) != 0;
5933 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) 5999 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5934 req_immediate_exit = 6000 kvm_x86_ops->enable_irq_window(vcpu);
5935 kvm_x86_ops->enable_irq_window(vcpu) != 0;
5936 6001
5937 if (kvm_lapic_enabled(vcpu)) { 6002 if (kvm_lapic_enabled(vcpu)) {
5938 /* 6003 /*
@@ -5992,12 +6057,28 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5992 set_debugreg(vcpu->arch.eff_db[1], 1); 6057 set_debugreg(vcpu->arch.eff_db[1], 1);
5993 set_debugreg(vcpu->arch.eff_db[2], 2); 6058 set_debugreg(vcpu->arch.eff_db[2], 2);
5994 set_debugreg(vcpu->arch.eff_db[3], 3); 6059 set_debugreg(vcpu->arch.eff_db[3], 3);
6060 set_debugreg(vcpu->arch.dr6, 6);
5995 } 6061 }
5996 6062
5997 trace_kvm_entry(vcpu->vcpu_id); 6063 trace_kvm_entry(vcpu->vcpu_id);
5998 kvm_x86_ops->run(vcpu); 6064 kvm_x86_ops->run(vcpu);
5999 6065
6000 /* 6066 /*
6067 * Do this here before restoring debug registers on the host. And
6068 * since we do this before handling the vmexit, a DR access vmexit
6069 * can (a) read the correct value of the debug registers, (b) set
6070 * KVM_DEBUGREG_WONT_EXIT again.
6071 */
6072 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6073 int i;
6074
6075 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6076 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6077 for (i = 0; i < KVM_NR_DB_REGS; i++)
6078 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6079 }
6080
6081 /*
6001 * If the guest has used debug registers, at least dr7 6082 * If the guest has used debug registers, at least dr7
6002 * will be disabled while returning to the host. 6083 * will be disabled while returning to the host.
6003 * If we don't have active breakpoints in the host, we don't 6084 * If we don't have active breakpoints in the host, we don't
@@ -6711,6 +6792,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6711{ 6792{
6712 int r; 6793 int r;
6713 struct msr_data msr; 6794 struct msr_data msr;
6795 struct kvm *kvm = vcpu->kvm;
6714 6796
6715 r = vcpu_load(vcpu); 6797 r = vcpu_load(vcpu);
6716 if (r) 6798 if (r)
@@ -6721,6 +6803,9 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6721 kvm_write_tsc(vcpu, &msr); 6803 kvm_write_tsc(vcpu, &msr);
6722 vcpu_put(vcpu); 6804 vcpu_put(vcpu);
6723 6805
6806 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
6807 KVMCLOCK_SYNC_PERIOD);
6808
6724 return r; 6809 return r;
6725} 6810}
6726 6811
@@ -7013,6 +7098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7013 7098
7014 pvclock_update_vm_gtod_copy(kvm); 7099 pvclock_update_vm_gtod_copy(kvm);
7015 7100
7101 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7102 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7103
7016 return 0; 7104 return 0;
7017} 7105}
7018 7106
@@ -7050,6 +7138,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
7050 7138
7051void kvm_arch_sync_events(struct kvm *kvm) 7139void kvm_arch_sync_events(struct kvm *kvm)
7052{ 7140{
7141 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
7142 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
7053 kvm_free_all_assigned_devices(kvm); 7143 kvm_free_all_assigned_devices(kvm);
7054 kvm_free_pit(kvm); 7144 kvm_free_pit(kvm);
7055} 7145}
@@ -7248,6 +7338,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7248 7338
7249int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 7339int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7250{ 7340{
7341 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7342 kvm_x86_ops->check_nested_events(vcpu, false);
7343
7251 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && 7344 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7252 !vcpu->arch.apf.halted) 7345 !vcpu->arch.apf.halted)
7253 || !list_empty_careful(&vcpu->async_pf.done) 7346 || !list_empty_careful(&vcpu->async_pf.done)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8da5823bcde6..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,9 +122,12 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
122 gva_t addr, void *val, unsigned int bytes, 122 gva_t addr, void *val, unsigned int bytes,
123 struct x86_exception *exception); 123 struct x86_exception *exception);
124 124
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
126 | XSTATE_BNDREGS | XSTATE_BNDCSR)
126extern u64 host_xcr0; 127extern u64 host_xcr0;
127 128
129extern u64 kvm_supported_xcr0(void);
130
128extern unsigned int min_timer_period_us; 131extern unsigned int min_timer_period_us;
129 132
130extern struct static_key kvm_no_apic_vcpu; 133extern struct static_key kvm_no_apic_vcpu;