aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c39
-rw-r--r--arch/x86/kvm/ioapic.c34
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/svm.c11
-rw-r--r--arch/x86/kvm/vmx.c73
-rw-r--r--arch/x86/kvm/x86.c42
7 files changed, 152 insertions, 61 deletions
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3a6583..c2cea6651279 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
44 [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, 44 [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
45 [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, 45 [CPUID_1_ECX] = { 1, 0, CPUID_ECX},
46 [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, 46 [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
47 [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, 47 [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, 48 [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, 49 [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, 50 [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8079d141792a..e7d04d0c8008 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4014 fxstate_size(ctxt)); 4014 fxstate_size(ctxt));
4015} 4015}
4016 4016
4017/*
4018 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4019 * in the host registers (via FXSAVE) instead, so they won't be modified.
4020 * (preemption has to stay disabled until FXRSTOR).
4021 *
4022 * Use noinline to keep the stack for other functions called by callers small.
4023 */
4024static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4025 const size_t used_size)
4026{
4027 struct fxregs_state fx_tmp;
4028 int rc;
4029
4030 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4031 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4032 __fxstate_size(16) - used_size);
4033
4034 return rc;
4035}
4036
4017static int em_fxrstor(struct x86_emulate_ctxt *ctxt) 4037static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4018{ 4038{
4019 struct fxregs_state fx_state; 4039 struct fxregs_state fx_state;
@@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4024 if (rc != X86EMUL_CONTINUE) 4044 if (rc != X86EMUL_CONTINUE)
4025 return rc; 4045 return rc;
4026 4046
4047 size = fxstate_size(ctxt);
4048 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4049 if (rc != X86EMUL_CONTINUE)
4050 return rc;
4051
4027 ctxt->ops->get_fpu(ctxt); 4052 ctxt->ops->get_fpu(ctxt);
4028 4053
4029 size = fxstate_size(ctxt);
4030 if (size < __fxstate_size(16)) { 4054 if (size < __fxstate_size(16)) {
4031 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); 4055 rc = fxregs_fixup(&fx_state, size);
4032 if (rc != X86EMUL_CONTINUE) 4056 if (rc != X86EMUL_CONTINUE)
4033 goto out; 4057 goto out;
4034 } 4058 }
4035 4059
4036 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4037 if (rc != X86EMUL_CONTINUE)
4038 goto out;
4039
4040 if (fx_state.mxcsr >> 16) { 4060 if (fx_state.mxcsr >> 16) {
4041 rc = emulate_gp(ctxt, 0); 4061 rc = emulate_gp(ctxt, 0);
4042 goto out; 4062 goto out;
@@ -5000,6 +5020,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5000 bool op_prefix = false; 5020 bool op_prefix = false;
5001 bool has_seg_override = false; 5021 bool has_seg_override = false;
5002 struct opcode opcode; 5022 struct opcode opcode;
5023 u16 dummy;
5024 struct desc_struct desc;
5003 5025
5004 ctxt->memop.type = OP_NONE; 5026 ctxt->memop.type = OP_NONE;
5005 ctxt->memopp = NULL; 5027 ctxt->memopp = NULL;
@@ -5018,6 +5040,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
5018 switch (mode) { 5040 switch (mode) {
5019 case X86EMUL_MODE_REAL: 5041 case X86EMUL_MODE_REAL:
5020 case X86EMUL_MODE_VM86: 5042 case X86EMUL_MODE_VM86:
5043 def_op_bytes = def_ad_bytes = 2;
5044 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5045 if (desc.d)
5046 def_op_bytes = def_ad_bytes = 4;
5047 break;
5021 case X86EMUL_MODE_PROT16: 5048 case X86EMUL_MODE_PROT16:
5022 def_op_bytes = def_ad_bytes = 2; 5049 def_op_bytes = def_ad_bytes = 2;
5023 break; 5050 break;
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index bdff437acbcb..4e822ad363f3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
209 209
210 old_irr = ioapic->irr; 210 old_irr = ioapic->irr;
211 ioapic->irr |= mask; 211 ioapic->irr |= mask;
212 if (edge) 212 if (edge) {
213 ioapic->irr_delivered &= ~mask; 213 ioapic->irr_delivered &= ~mask;
214 if ((edge && old_irr == ioapic->irr) || 214 if (old_irr == ioapic->irr) {
215 (!edge && entry.fields.remote_irr)) { 215 ret = 0;
216 ret = 0; 216 goto out;
217 goto out; 217 }
218 } 218 }
219 219
220 ret = ioapic_service(ioapic, irq, line_status); 220 ret = ioapic_service(ioapic, irq, line_status);
@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
257 index == RTC_GSI) { 257 index == RTC_GSI) {
258 if (kvm_apic_match_dest(vcpu, NULL, 0, 258 if (kvm_apic_match_dest(vcpu, NULL, 0,
259 e->fields.dest_id, e->fields.dest_mode) || 259 e->fields.dest_id, e->fields.dest_mode) ||
260 (e->fields.trig_mode == IOAPIC_EDGE_TRIG && 260 kvm_apic_pending_eoi(vcpu, e->fields.vector))
261 kvm_apic_pending_eoi(vcpu, e->fields.vector)))
262 __set_bit(e->fields.vector, 261 __set_bit(e->fields.vector,
263 ioapic_handled_vectors); 262 ioapic_handled_vectors);
264 } 263 }
@@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
277{ 276{
278 unsigned index; 277 unsigned index;
279 bool mask_before, mask_after; 278 bool mask_before, mask_after;
279 int old_remote_irr, old_delivery_status;
280 union kvm_ioapic_redirect_entry *e; 280 union kvm_ioapic_redirect_entry *e;
281 281
282 switch (ioapic->ioregsel) { 282 switch (ioapic->ioregsel) {
@@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
299 return; 299 return;
300 e = &ioapic->redirtbl[index]; 300 e = &ioapic->redirtbl[index];
301 mask_before = e->fields.mask; 301 mask_before = e->fields.mask;
302 /* Preserve read-only fields */
303 old_remote_irr = e->fields.remote_irr;
304 old_delivery_status = e->fields.delivery_status;
302 if (ioapic->ioregsel & 1) { 305 if (ioapic->ioregsel & 1) {
303 e->bits &= 0xffffffff; 306 e->bits &= 0xffffffff;
304 e->bits |= (u64) val << 32; 307 e->bits |= (u64) val << 32;
305 } else { 308 } else {
306 e->bits &= ~0xffffffffULL; 309 e->bits &= ~0xffffffffULL;
307 e->bits |= (u32) val; 310 e->bits |= (u32) val;
308 e->fields.remote_irr = 0;
309 } 311 }
312 e->fields.remote_irr = old_remote_irr;
313 e->fields.delivery_status = old_delivery_status;
314
315 /*
316 * Some OSes (Linux, Xen) assume that Remote IRR bit will
317 * be cleared by IOAPIC hardware when the entry is configured
318 * as edge-triggered. This behavior is used to simulate an
319 * explicit EOI on IOAPICs that don't have the EOI register.
320 */
321 if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
322 e->fields.remote_irr = 0;
323
310 mask_after = e->fields.mask; 324 mask_after = e->fields.mask;
311 if (mask_before != mask_after) 325 if (mask_before != mask_after)
312 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); 326 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
324 struct kvm_lapic_irq irqe; 338 struct kvm_lapic_irq irqe;
325 int ret; 339 int ret;
326 340
327 if (entry->fields.mask) 341 if (entry->fields.mask ||
342 (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
343 entry->fields.remote_irr))
328 return -1; 344 return -1;
329 345
330 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 346 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 943acbf00c69..e2c1fb8d35ce 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
266 recalculate_apic_map(apic->vcpu->kvm); 266 recalculate_apic_map(apic->vcpu->kvm);
267} 267}
268 268
269static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
270{
271 return ((id >> 4) << 16) | (1 << (id & 0xf));
272}
273
269static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) 274static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
270{ 275{
271 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 276 u32 ldr = kvm_apic_calc_x2apic_ldr(id);
272 277
273 WARN_ON_ONCE(id != apic->vcpu->vcpu_id); 278 WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
274 279
@@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2245{ 2250{
2246 if (apic_x2apic_mode(vcpu->arch.apic)) { 2251 if (apic_x2apic_mode(vcpu->arch.apic)) {
2247 u32 *id = (u32 *)(s->regs + APIC_ID); 2252 u32 *id = (u32 *)(s->regs + APIC_ID);
2253 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2248 2254
2249 if (vcpu->kvm->arch.x2apic_format) { 2255 if (vcpu->kvm->arch.x2apic_format) {
2250 if (*id != vcpu->vcpu_id) 2256 if (*id != vcpu->vcpu_id)
@@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2255 else 2261 else
2256 *id <<= 24; 2262 *id <<= 24;
2257 } 2263 }
2264
2265 /* In x2APIC mode, the LDR is fixed and based on the id */
2266 if (set)
2267 *ldr = kvm_apic_calc_x2apic_ldr(*id);
2258 } 2268 }
2259 2269
2260 return 0; 2270 return 0;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 59e13a79c2e3..eb714f1cdf7e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm)
361{ 361{
362 struct vmcb_control_area *c, *h; 362 struct vmcb_control_area *c, *h;
363 struct nested_state *g; 363 struct nested_state *g;
364 u32 h_intercept_exceptions;
364 365
365 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 366 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
366 367
@@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm)
371 h = &svm->nested.hsave->control; 372 h = &svm->nested.hsave->control;
372 g = &svm->nested; 373 g = &svm->nested;
373 374
375 /* No need to intercept #UD if L1 doesn't intercept it */
376 h_intercept_exceptions =
377 h->intercept_exceptions & ~(1U << UD_VECTOR);
378
374 c->intercept_cr = h->intercept_cr | g->intercept_cr; 379 c->intercept_cr = h->intercept_cr | g->intercept_cr;
375 c->intercept_dr = h->intercept_dr | g->intercept_dr; 380 c->intercept_dr = h->intercept_dr | g->intercept_dr;
376 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; 381 c->intercept_exceptions =
382 h_intercept_exceptions | g->intercept_exceptions;
377 c->intercept = h->intercept | g->intercept; 383 c->intercept = h->intercept | g->intercept;
378} 384}
379 385
@@ -2196,7 +2202,10 @@ static int ud_interception(struct vcpu_svm *svm)
2196{ 2202{
2197 int er; 2203 int er;
2198 2204
2205 WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
2199 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 2206 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
2207 if (er == EMULATE_USER_EXIT)
2208 return 0;
2200 if (er != EMULATE_DONE) 2209 if (er != EMULATE_DONE)
2201 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2210 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2202 return 1; 2211 return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 714a0673ec3c..4704aaf6d19e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1887{ 1887{
1888 u32 eb; 1888 u32 eb;
1889 1889
1890 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 1890 eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
1891 (1u << DB_VECTOR) | (1u << AC_VECTOR); 1891 (1u << DB_VECTOR) | (1u << AC_VECTOR);
1892 if ((vcpu->guest_debug & 1892 if ((vcpu->guest_debug &
1893 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 1893 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1905 */ 1905 */
1906 if (is_guest_mode(vcpu)) 1906 if (is_guest_mode(vcpu))
1907 eb |= get_vmcs12(vcpu)->exception_bitmap; 1907 eb |= get_vmcs12(vcpu)->exception_bitmap;
1908 else
1909 eb |= 1u << UD_VECTOR;
1908 1910
1909 vmcs_write32(EXCEPTION_BITMAP, eb); 1911 vmcs_write32(EXCEPTION_BITMAP, eb);
1910} 1912}
@@ -5600,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5600 vmcs_write64(GUEST_IA32_DEBUGCTL, 0); 5602 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5601 } 5603 }
5602 5604
5603 vmcs_writel(GUEST_RFLAGS, 0x02); 5605 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
5604 kvm_rip_write(vcpu, 0xfff0); 5606 kvm_rip_write(vcpu, 0xfff0);
5605 5607
5606 vmcs_writel(GUEST_GDTR_BASE, 0); 5608 vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -5915,11 +5917,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
5915 return 1; /* already handled by vmx_vcpu_run() */ 5917 return 1; /* already handled by vmx_vcpu_run() */
5916 5918
5917 if (is_invalid_opcode(intr_info)) { 5919 if (is_invalid_opcode(intr_info)) {
5918 if (is_guest_mode(vcpu)) { 5920 WARN_ON_ONCE(is_guest_mode(vcpu));
5919 kvm_queue_exception(vcpu, UD_VECTOR);
5920 return 1;
5921 }
5922 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); 5921 er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
5922 if (er == EMULATE_USER_EXIT)
5923 return 0;
5923 if (er != EMULATE_DONE) 5924 if (er != EMULATE_DONE)
5924 kvm_queue_exception(vcpu, UD_VECTOR); 5925 kvm_queue_exception(vcpu, UD_VECTOR);
5925 return 1; 5926 return 1;
@@ -6602,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
6602 if (kvm_test_request(KVM_REQ_EVENT, vcpu)) 6603 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
6603 return 1; 6604 return 1;
6604 6605
6605 err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); 6606 err = emulate_instruction(vcpu, 0);
6606 6607
6607 if (err == EMULATE_USER_EXIT) { 6608 if (err == EMULATE_USER_EXIT) {
6608 ++vcpu->stat.mmio_exits; 6609 ++vcpu->stat.mmio_exits;
@@ -7414,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
7414 */ 7415 */
7415static void free_nested(struct vcpu_vmx *vmx) 7416static void free_nested(struct vcpu_vmx *vmx)
7416{ 7417{
7417 if (!vmx->nested.vmxon) 7418 if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
7418 return; 7419 return;
7419 7420
7420 vmx->nested.vmxon = false; 7421 vmx->nested.vmxon = false;
7422 vmx->nested.smm.vmxon = false;
7421 free_vpid(vmx->nested.vpid02); 7423 free_vpid(vmx->nested.vpid02);
7422 vmx->nested.posted_intr_nv = -1; 7424 vmx->nested.posted_intr_nv = -1;
7423 vmx->nested.current_vmptr = -1ull; 7425 vmx->nested.current_vmptr = -1ull;
@@ -9800,8 +9802,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
9800 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); 9802 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
9801 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); 9803 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
9802 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); 9804 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
9803 /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ 9805 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
9804 cr4_fixed1_update(bit(11), ecx, bit(2));
9805 9806
9806#undef cr4_fixed1_update 9807#undef cr4_fixed1_update
9807} 9808}
@@ -10875,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10875 return 1; 10876 return 1;
10876 } 10877 }
10877 10878
10879 if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
10880 (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
10881 (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
10882 return 1;
10883
10878 return 0; 10884 return 0;
10879} 10885}
10880 10886
@@ -11099,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11099{ 11105{
11100 struct vcpu_vmx *vmx = to_vmx(vcpu); 11106 struct vcpu_vmx *vmx = to_vmx(vcpu);
11101 unsigned long exit_qual; 11107 unsigned long exit_qual;
11102 11108 bool block_nested_events =
11103 if (kvm_event_needs_reinjection(vcpu)) 11109 vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
11104 return -EBUSY;
11105 11110
11106 if (vcpu->arch.exception.pending && 11111 if (vcpu->arch.exception.pending &&
11107 nested_vmx_check_exception(vcpu, &exit_qual)) { 11112 nested_vmx_check_exception(vcpu, &exit_qual)) {
11108 if (vmx->nested.nested_run_pending) 11113 if (block_nested_events)
11109 return -EBUSY; 11114 return -EBUSY;
11110 nested_vmx_inject_exception_vmexit(vcpu, exit_qual); 11115 nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
11111 vcpu->arch.exception.pending = false; 11116 vcpu->arch.exception.pending = false;
@@ -11114,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11114 11119
11115 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && 11120 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
11116 vmx->nested.preemption_timer_expired) { 11121 vmx->nested.preemption_timer_expired) {
11117 if (vmx->nested.nested_run_pending) 11122 if (block_nested_events)
11118 return -EBUSY; 11123 return -EBUSY;
11119 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); 11124 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
11120 return 0; 11125 return 0;
11121 } 11126 }
11122 11127
11123 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { 11128 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
11124 if (vmx->nested.nested_run_pending) 11129 if (block_nested_events)
11125 return -EBUSY; 11130 return -EBUSY;
11126 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 11131 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
11127 NMI_VECTOR | INTR_TYPE_NMI_INTR | 11132 NMI_VECTOR | INTR_TYPE_NMI_INTR |
@@ -11137,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
11137 11142
11138 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && 11143 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
11139 nested_exit_on_intr(vcpu)) { 11144 nested_exit_on_intr(vcpu)) {
11140 if (vmx->nested.nested_run_pending) 11145 if (block_nested_events)
11141 return -EBUSY; 11146 return -EBUSY;
11142 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); 11147 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
11143 return 0; 11148 return 0;
@@ -11324,6 +11329,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
11324 kvm_clear_interrupt_queue(vcpu); 11329 kvm_clear_interrupt_queue(vcpu);
11325} 11330}
11326 11331
11332static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
11333 struct vmcs12 *vmcs12)
11334{
11335 u32 entry_failure_code;
11336
11337 nested_ept_uninit_mmu_context(vcpu);
11338
11339 /*
11340 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11341 * couldn't have changed.
11342 */
11343 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11344 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11345
11346 if (!enable_ept)
11347 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11348}
11349
11327/* 11350/*
11328 * A part of what we need to when the nested L2 guest exits and we want to 11351 * A part of what we need to when the nested L2 guest exits and we want to
11329 * run its L1 parent, is to reset L1's guest state to the host state specified 11352 * run its L1 parent, is to reset L1's guest state to the host state specified
@@ -11337,7 +11360,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11337 struct vmcs12 *vmcs12) 11360 struct vmcs12 *vmcs12)
11338{ 11361{
11339 struct kvm_segment seg; 11362 struct kvm_segment seg;
11340 u32 entry_failure_code;
11341 11363
11342 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 11364 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
11343 vcpu->arch.efer = vmcs12->host_ia32_efer; 11365 vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -11364,17 +11386,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
11364 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 11386 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
11365 vmx_set_cr4(vcpu, vmcs12->host_cr4); 11387 vmx_set_cr4(vcpu, vmcs12->host_cr4);
11366 11388
11367 nested_ept_uninit_mmu_context(vcpu); 11389 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11368
11369 /*
11370 * Only PDPTE load can fail as the value of cr3 was checked on entry and
11371 * couldn't have changed.
11372 */
11373 if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
11374 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
11375
11376 if (!enable_ept)
11377 vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
11378 11390
11379 if (enable_vpid) { 11391 if (enable_vpid) {
11380 /* 11392 /*
@@ -11604,6 +11616,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
11604 * accordingly. 11616 * accordingly.
11605 */ 11617 */
11606 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 11618 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
11619
11620 load_vmcs12_mmu_host_state(vcpu, vmcs12);
11621
11607 /* 11622 /*
11608 * The emulated instruction was already skipped in 11623 * The emulated instruction was already skipped in
11609 * nested_vmx_run, but the updated RIP was never 11624 * nested_vmx_run, but the updated RIP was never
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c85aa2e2d1..eee8e7faf1af 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
107static bool __read_mostly ignore_msrs = 0; 107static bool __read_mostly ignore_msrs = 0;
108module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); 108module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
109 109
110static bool __read_mostly report_ignored_msrs = true;
111module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
112
110unsigned int min_timer_period_us = 500; 113unsigned int min_timer_period_us = 500;
111module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); 114module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
112 115
@@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
1795 /* both __this_cpu_read() and rdtsc() should be on the same cpu */ 1798 /* both __this_cpu_read() and rdtsc() should be on the same cpu */
1796 get_cpu(); 1799 get_cpu();
1797 1800
1798 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, 1801 if (__this_cpu_read(cpu_tsc_khz)) {
1799 &hv_clock.tsc_shift, 1802 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
1800 &hv_clock.tsc_to_system_mul); 1803 &hv_clock.tsc_shift,
1801 ret = __pvclock_read_cycles(&hv_clock, rdtsc()); 1804 &hv_clock.tsc_to_system_mul);
1805 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
1806 } else
1807 ret = ktime_get_boot_ns() + ka->kvmclock_offset;
1802 1808
1803 put_cpu(); 1809 put_cpu();
1804 1810
@@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1830 */ 1836 */
1831 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 1837 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1832 1838
1839 if (guest_hv_clock.version & 1)
1840 ++guest_hv_clock.version; /* first time write, random junk */
1841
1833 vcpu->hv_clock.version = guest_hv_clock.version + 1; 1842 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1834 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1843 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1835 &vcpu->hv_clock, 1844 &vcpu->hv_clock,
@@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2322 /* Drop writes to this legacy MSR -- see rdmsr 2331 /* Drop writes to this legacy MSR -- see rdmsr
2323 * counterpart for further detail. 2332 * counterpart for further detail.
2324 */ 2333 */
2325 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); 2334 if (report_ignored_msrs)
2335 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2336 msr, data);
2326 break; 2337 break;
2327 case MSR_AMD64_OSVW_ID_LENGTH: 2338 case MSR_AMD64_OSVW_ID_LENGTH:
2328 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) 2339 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2359 msr, data); 2370 msr, data);
2360 return 1; 2371 return 1;
2361 } else { 2372 } else {
2362 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", 2373 if (report_ignored_msrs)
2363 msr, data); 2374 vcpu_unimpl(vcpu,
2375 "ignored wrmsr: 0x%x data 0x%llx\n",
2376 msr, data);
2364 break; 2377 break;
2365 } 2378 }
2366 } 2379 }
@@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2578 msr_info->index); 2591 msr_info->index);
2579 return 1; 2592 return 1;
2580 } else { 2593 } else {
2581 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); 2594 if (report_ignored_msrs)
2595 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
2596 msr_info->index);
2582 msr_info->data = 0; 2597 msr_info->data = 0;
2583 } 2598 }
2584 break; 2599 break;
@@ -5430,7 +5445,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
5430 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 5445 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5431 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 5446 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5432 vcpu->run->internal.ndata = 0; 5447 vcpu->run->internal.ndata = 0;
5433 r = EMULATE_FAIL; 5448 r = EMULATE_USER_EXIT;
5434 } 5449 }
5435 kvm_queue_exception(vcpu, UD_VECTOR); 5450 kvm_queue_exception(vcpu, UD_VECTOR);
5436 5451
@@ -5722,6 +5737,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5722 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, 5737 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5723 emulation_type)) 5738 emulation_type))
5724 return EMULATE_DONE; 5739 return EMULATE_DONE;
5740 if (ctxt->have_exception && inject_emulated_exception(vcpu))
5741 return EMULATE_DONE;
5725 if (emulation_type & EMULTYPE_SKIP) 5742 if (emulation_type & EMULTYPE_SKIP)
5726 return EMULATE_FAIL; 5743 return EMULATE_FAIL;
5727 return handle_emulation_failure(vcpu); 5744 return handle_emulation_failure(vcpu);
@@ -7250,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7250{ 7267{
7251 struct fpu *fpu = &current->thread.fpu; 7268 struct fpu *fpu = &current->thread.fpu;
7252 int r; 7269 int r;
7253 sigset_t sigsaved;
7254 7270
7255 fpu__initialize(fpu); 7271 fpu__initialize(fpu);
7256 7272
7257 if (vcpu->sigset_active) 7273 kvm_sigset_activate(vcpu);
7258 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
7259 7274
7260 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { 7275 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7261 if (kvm_run->immediate_exit) { 7276 if (kvm_run->immediate_exit) {
@@ -7298,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7298 7313
7299out: 7314out:
7300 post_kvm_run_save(vcpu); 7315 post_kvm_run_save(vcpu);
7301 if (vcpu->sigset_active) 7316 kvm_sigset_deactivate(vcpu);
7302 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
7303 7317
7304 return r; 7318 return r;
7305} 7319}