aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt13
-rw-r--r--arch/arm/kvm/arm.c4
-rw-r--r--arch/mips/kvm/mips.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/lapic.c64
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/svm.c55
-rw-r--r--arch/x86/kvm/vmx.c752
-rw-r--r--arch/x86/kvm/x86.c109
-rw-r--r--drivers/ptp/ptp_kvm.c7
-rw-r--r--include/linux/kvm_host.h17
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--virt/kvm/kvm_main.c113
16 files changed, 572 insertions, 594 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e4f2cdcf78eb..069450938b79 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3389,7 +3389,18 @@ struct kvm_run {
3389Request that KVM_RUN return when it becomes possible to inject external 3389Request that KVM_RUN return when it becomes possible to inject external
3390interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. 3390interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
3391 3391
3392 __u8 padding1[7]; 3392 __u8 immediate_exit;
3393
3394This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
3395exits immediately, returning -EINTR. In the common scenario where a
3396signal is used to "kick" a VCPU out of KVM_RUN, this field can be used
3397to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability.
3398Rather than blocking the signal outside KVM_RUN, userspace can set up
3399a signal handler that sets run->immediate_exit to a non-zero value.
3400
3401This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available.
3402
3403 __u8 padding1[6];
3393 3404
3394 /* out */ 3405 /* out */
3395 __u32 exit_reason; 3406 __u32 exit_reason;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 21c493a9e5c9..c9a2103faeb9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -206,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
206 case KVM_CAP_ARM_PSCI_0_2: 206 case KVM_CAP_ARM_PSCI_0_2:
207 case KVM_CAP_READONLY_MEM: 207 case KVM_CAP_READONLY_MEM:
208 case KVM_CAP_MP_STATE: 208 case KVM_CAP_MP_STATE:
209 case KVM_CAP_IMMEDIATE_EXIT:
209 r = 1; 210 r = 1;
210 break; 211 break;
211 case KVM_CAP_COALESCED_MMIO: 212 case KVM_CAP_COALESCED_MMIO:
@@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
604 return ret; 605 return ret;
605 } 606 }
606 607
608 if (run->immediate_exit)
609 return -EINTR;
610
607 if (vcpu->sigset_active) 611 if (vcpu->sigset_active)
608 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 612 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
609 613
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 31ee5ee0010b..ed81e5ac1426 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -397,7 +397,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
397 397
398int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) 398int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
399{ 399{
400 int r = 0; 400 int r = -EINTR;
401 sigset_t sigsaved; 401 sigset_t sigsaved;
402 402
403 if (vcpu->sigset_active) 403 if (vcpu->sigset_active)
@@ -409,6 +409,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
409 vcpu->mmio_needed = 0; 409 vcpu->mmio_needed = 0;
410 } 410 }
411 411
412 if (run->immediate_exit)
413 goto out;
414
412 lose_fpu(1); 415 lose_fpu(1);
413 416
414 local_irq_disable(); 417 local_irq_disable();
@@ -429,6 +432,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
429 guest_exit_irqoff(); 432 guest_exit_irqoff();
430 local_irq_enable(); 433 local_irq_enable();
431 434
435out:
432 if (vcpu->sigset_active) 436 if (vcpu->sigset_active)
433 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 437 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
434 438
@@ -1021,6 +1025,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
1021 case KVM_CAP_ENABLE_CAP: 1025 case KVM_CAP_ENABLE_CAP:
1022 case KVM_CAP_READONLY_MEM: 1026 case KVM_CAP_READONLY_MEM:
1023 case KVM_CAP_SYNC_MMU: 1027 case KVM_CAP_SYNC_MMU:
1028 case KVM_CAP_IMMEDIATE_EXIT:
1024 r = 1; 1029 r = 1;
1025 break; 1030 break;
1026 case KVM_CAP_COALESCED_MMIO: 1031 case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fcb253ba51e5..2b38d824e9e5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
511 case KVM_CAP_ONE_REG: 511 case KVM_CAP_ONE_REG:
512 case KVM_CAP_IOEVENTFD: 512 case KVM_CAP_IOEVENTFD:
513 case KVM_CAP_DEVICE_CTRL: 513 case KVM_CAP_DEVICE_CTRL:
514 case KVM_CAP_IMMEDIATE_EXIT:
514 r = 1; 515 r = 1;
515 break; 516 break;
516 case KVM_CAP_PPC_PAIRED_SINGLES: 517 case KVM_CAP_PPC_PAIRED_SINGLES:
@@ -1118,7 +1119,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
1118#endif 1119#endif
1119 } 1120 }
1120 1121
1121 r = kvmppc_vcpu_run(run, vcpu); 1122 if (run->immediate_exit)
1123 r = -EINTR;
1124 else
1125 r = kvmppc_vcpu_run(run, vcpu);
1122 1126
1123 if (vcpu->sigset_active) 1127 if (vcpu->sigset_active)
1124 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1128 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 502de74ea984..99e35fe0dea8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -370,6 +370,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
370 case KVM_CAP_S390_IRQCHIP: 370 case KVM_CAP_S390_IRQCHIP:
371 case KVM_CAP_VM_ATTRIBUTES: 371 case KVM_CAP_VM_ATTRIBUTES:
372 case KVM_CAP_MP_STATE: 372 case KVM_CAP_MP_STATE:
373 case KVM_CAP_IMMEDIATE_EXIT:
373 case KVM_CAP_S390_INJECT_IRQ: 374 case KVM_CAP_S390_INJECT_IRQ:
374 case KVM_CAP_S390_USER_SIGP: 375 case KVM_CAP_S390_USER_SIGP:
375 case KVM_CAP_S390_USER_STSI: 376 case KVM_CAP_S390_USER_STSI:
@@ -2798,6 +2799,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2798 int rc; 2799 int rc;
2799 sigset_t sigsaved; 2800 sigset_t sigsaved;
2800 2801
2802 if (kvm_run->immediate_exit)
2803 return -EINTR;
2804
2801 if (guestdbg_exit_pending(vcpu)) { 2805 if (guestdbg_exit_pending(vcpu)) {
2802 kvm_s390_prepare_debug_exit(vcpu); 2806 kvm_s390_prepare_debug_exit(vcpu);
2803 return 0; 2807 return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 417502cf42b6..74ef58c8ff53 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -55,7 +55,6 @@
55#define KVM_REQ_TRIPLE_FAULT 10 55#define KVM_REQ_TRIPLE_FAULT 10
56#define KVM_REQ_MMU_SYNC 11 56#define KVM_REQ_MMU_SYNC 11
57#define KVM_REQ_CLOCK_UPDATE 12 57#define KVM_REQ_CLOCK_UPDATE 12
58#define KVM_REQ_DEACTIVATE_FPU 13
59#define KVM_REQ_EVENT 14 58#define KVM_REQ_EVENT 14
60#define KVM_REQ_APF_HALT 15 59#define KVM_REQ_APF_HALT 15
61#define KVM_REQ_STEAL_UPDATE 16 60#define KVM_REQ_STEAL_UPDATE 16
@@ -936,8 +935,6 @@ struct kvm_x86_ops {
936 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 935 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
937 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 936 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
938 u32 (*get_pkru)(struct kvm_vcpu *vcpu); 937 u32 (*get_pkru)(struct kvm_vcpu *vcpu);
939 void (*fpu_activate)(struct kvm_vcpu *vcpu);
940 void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
941 938
942 void (*tlb_flush)(struct kvm_vcpu *vcpu); 939 void (*tlb_flush)(struct kvm_vcpu *vcpu);
943 940
@@ -969,7 +966,7 @@ struct kvm_x86_ops {
969 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); 966 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
970 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); 967 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
971 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); 968 void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
972 void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); 969 int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
973 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 970 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
974 int (*get_tdp_level)(void); 971 int (*get_tdp_level)(void);
975 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 972 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c0e2036217ad..1d155cc56629 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
123 if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) 123 if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
124 best->ebx = xstate_required_size(vcpu->arch.xcr0, true); 124 best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
125 125
126 kvm_x86_ops->fpu_activate(vcpu);
127
128 /* 126 /*
129 * The existing code assumes virtual address is 48-bit in the canonical 127 * The existing code assumes virtual address is 48-bit in the canonical
130 * address checks; exit if it is ever changed. 128 * address checks; exit if it is ever changed.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 33b799fd3a6e..bad6a25067bc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -341,7 +341,7 @@ static int find_highest_vector(void *bitmap)
341 vec >= 0; vec -= APIC_VECTORS_PER_REG) { 341 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
342 reg = bitmap + REG_POS(vec); 342 reg = bitmap + REG_POS(vec);
343 if (*reg) 343 if (*reg)
344 return fls(*reg) - 1 + vec; 344 return __fls(*reg) + vec;
345 } 345 }
346 346
347 return -1; 347 return -1;
@@ -361,27 +361,32 @@ static u8 count_vectors(void *bitmap)
361 return count; 361 return count;
362} 362}
363 363
364void __kvm_apic_update_irr(u32 *pir, void *regs) 364int __kvm_apic_update_irr(u32 *pir, void *regs)
365{ 365{
366 u32 i, pir_val; 366 u32 i, vec;
367 u32 pir_val, irr_val;
368 int max_irr = -1;
367 369
368 for (i = 0; i <= 7; i++) { 370 for (i = vec = 0; i <= 7; i++, vec += 32) {
369 pir_val = READ_ONCE(pir[i]); 371 pir_val = READ_ONCE(pir[i]);
372 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
370 if (pir_val) { 373 if (pir_val) {
371 pir_val = xchg(&pir[i], 0); 374 irr_val |= xchg(&pir[i], 0);
372 *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; 375 *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
373 } 376 }
377 if (irr_val)
378 max_irr = __fls(irr_val) + vec;
374 } 379 }
380
381 return max_irr;
375} 382}
376EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); 383EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
377 384
378void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) 385int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
379{ 386{
380 struct kvm_lapic *apic = vcpu->arch.apic; 387 struct kvm_lapic *apic = vcpu->arch.apic;
381 388
382 __kvm_apic_update_irr(pir, apic->regs); 389 return __kvm_apic_update_irr(pir, apic->regs);
383
384 kvm_make_request(KVM_REQ_EVENT, vcpu);
385} 390}
386EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 391EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
387 392
@@ -401,8 +406,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
401 if (!apic->irr_pending) 406 if (!apic->irr_pending)
402 return -1; 407 return -1;
403 408
404 if (apic->vcpu->arch.apicv_active)
405 kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
406 result = apic_search_irr(apic); 409 result = apic_search_irr(apic);
407 ASSERT(result == -1 || result >= 16); 410 ASSERT(result == -1 || result >= 16);
408 411
@@ -416,9 +419,10 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
416 vcpu = apic->vcpu; 419 vcpu = apic->vcpu;
417 420
418 if (unlikely(vcpu->arch.apicv_active)) { 421 if (unlikely(vcpu->arch.apicv_active)) {
419 /* try to update RVI */ 422 /* need to update RVI */
420 apic_clear_vector(vec, apic->regs + APIC_IRR); 423 apic_clear_vector(vec, apic->regs + APIC_IRR);
421 kvm_make_request(KVM_REQ_EVENT, vcpu); 424 kvm_x86_ops->hwapic_irr_update(vcpu,
425 apic_find_highest_irr(apic));
422 } else { 426 } else {
423 apic->irr_pending = false; 427 apic->irr_pending = false;
424 apic_clear_vector(vec, apic->regs + APIC_IRR); 428 apic_clear_vector(vec, apic->regs + APIC_IRR);
@@ -508,6 +512,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
508 */ 512 */
509 return apic_find_highest_irr(vcpu->arch.apic); 513 return apic_find_highest_irr(vcpu->arch.apic);
510} 514}
515EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
511 516
512static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, 517static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
513 int vector, int level, int trig_mode, 518 int vector, int level, int trig_mode,
@@ -524,16 +529,14 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
524 529
525static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) 530static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
526{ 531{
527 532 return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val,
528 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, 533 sizeof(val));
529 sizeof(val));
530} 534}
531 535
532static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) 536static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
533{ 537{
534 538 return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val,
535 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, 539 sizeof(*val));
536 sizeof(*val));
537} 540}
538 541
539static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) 542static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
@@ -572,7 +575,11 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
572 575
573static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) 576static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
574{ 577{
575 int highest_irr = apic_find_highest_irr(apic); 578 int highest_irr;
579 if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
580 highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
581 else
582 highest_irr = apic_find_highest_irr(apic);
576 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) 583 if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
577 return -1; 584 return -1;
578 return highest_irr; 585 return highest_irr;
@@ -2204,8 +2211,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2204 1 : count_vectors(apic->regs + APIC_ISR); 2211 1 : count_vectors(apic->regs + APIC_ISR);
2205 apic->highest_isr_cache = -1; 2212 apic->highest_isr_cache = -1;
2206 if (vcpu->arch.apicv_active) { 2213 if (vcpu->arch.apicv_active) {
2207 if (kvm_x86_ops->apicv_post_state_restore) 2214 kvm_x86_ops->apicv_post_state_restore(vcpu);
2208 kvm_x86_ops->apicv_post_state_restore(vcpu);
2209 kvm_x86_ops->hwapic_irr_update(vcpu, 2215 kvm_x86_ops->hwapic_irr_update(vcpu,
2210 apic_find_highest_irr(apic)); 2216 apic_find_highest_irr(apic));
2211 kvm_x86_ops->hwapic_isr_update(vcpu, 2217 kvm_x86_ops->hwapic_isr_update(vcpu,
@@ -2279,8 +2285,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2279 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2285 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2280 return; 2286 return;
2281 2287
2282 if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2288 if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
2283 sizeof(u32))) 2289 sizeof(u32)))
2284 return; 2290 return;
2285 2291
2286 apic_set_tpr(vcpu->arch.apic, data & 0xff); 2292 apic_set_tpr(vcpu->arch.apic, data & 0xff);
@@ -2332,14 +2338,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2332 max_isr = 0; 2338 max_isr = 0;
2333 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); 2339 data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2334 2340
2335 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, 2341 kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data,
2336 sizeof(u32)); 2342 sizeof(u32));
2337} 2343}
2338 2344
2339int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) 2345int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2340{ 2346{
2341 if (vapic_addr) { 2347 if (vapic_addr) {
2342 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2348 if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
2343 &vcpu->arch.apic->vapic_cache, 2349 &vcpu->arch.apic->vapic_cache,
2344 vapic_addr, sizeof(u32))) 2350 vapic_addr, sizeof(u32)))
2345 return -EINVAL; 2351 return -EINVAL;
@@ -2433,7 +2439,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
2433 vcpu->arch.pv_eoi.msr_val = data; 2439 vcpu->arch.pv_eoi.msr_val = data;
2434 if (!pv_eoi_enabled(vcpu)) 2440 if (!pv_eoi_enabled(vcpu))
2435 return 0; 2441 return 0;
2436 return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, 2442 return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data,
2437 addr, sizeof(u8)); 2443 addr, sizeof(u8));
2438} 2444}
2439 2445
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 05abd837b78a..bcbe811f3b97 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -71,8 +71,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
71bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, 71bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
72 int short_hand, unsigned int dest, int dest_mode); 72 int short_hand, unsigned int dest, int dest_mode);
73 73
74void __kvm_apic_update_irr(u32 *pir, void *regs); 74int __kvm_apic_update_irr(u32 *pir, void *regs);
75void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); 75int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
76void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); 76void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
77int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, 77int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
78 struct dest_map *dest_map); 78 struct dest_map *dest_map);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0414f054bdf..d1efe2c62b3f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -971,8 +971,8 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
971 * a particular vCPU. 971 * a particular vCPU.
972 */ 972 */
973#define SVM_VM_DATA_HASH_BITS 8 973#define SVM_VM_DATA_HASH_BITS 8
974DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); 974static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
975static spinlock_t svm_vm_data_hash_lock; 975static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
976 976
977/* Note: 977/* Note:
978 * This function is called from IOMMU driver to notify 978 * This function is called from IOMMU driver to notify
@@ -1077,8 +1077,6 @@ static __init int svm_hardware_setup(void)
1077 } else { 1077 } else {
1078 pr_info("AVIC enabled\n"); 1078 pr_info("AVIC enabled\n");
1079 1079
1080 hash_init(svm_vm_data_hash);
1081 spin_lock_init(&svm_vm_data_hash_lock);
1082 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); 1080 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1083 } 1081 }
1084 } 1082 }
@@ -1159,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm)
1159 struct vmcb_control_area *control = &svm->vmcb->control; 1157 struct vmcb_control_area *control = &svm->vmcb->control;
1160 struct vmcb_save_area *save = &svm->vmcb->save; 1158 struct vmcb_save_area *save = &svm->vmcb->save;
1161 1159
1162 svm->vcpu.fpu_active = 1;
1163 svm->vcpu.arch.hflags = 0; 1160 svm->vcpu.arch.hflags = 0;
1164 1161
1165 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1162 set_cr_intercept(svm, INTERCEPT_CR0_READ);
@@ -1901,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
1901 ulong gcr0 = svm->vcpu.arch.cr0; 1898 ulong gcr0 = svm->vcpu.arch.cr0;
1902 u64 *hcr0 = &svm->vmcb->save.cr0; 1899 u64 *hcr0 = &svm->vmcb->save.cr0;
1903 1900
1904 if (!svm->vcpu.fpu_active) 1901 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1905 *hcr0 |= SVM_CR0_SELECTIVE_MASK; 1902 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1906 else
1907 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1908 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1909 1903
1910 mark_dirty(svm->vmcb, VMCB_CR); 1904 mark_dirty(svm->vmcb, VMCB_CR);
1911 1905
1912 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1906 if (gcr0 == *hcr0) {
1913 clr_cr_intercept(svm, INTERCEPT_CR0_READ); 1907 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1914 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1908 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1915 } else { 1909 } else {
@@ -1940,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1940 if (!npt_enabled) 1934 if (!npt_enabled)
1941 cr0 |= X86_CR0_PG | X86_CR0_WP; 1935 cr0 |= X86_CR0_PG | X86_CR0_WP;
1942 1936
1943 if (!vcpu->fpu_active)
1944 cr0 |= X86_CR0_TS;
1945 /* 1937 /*
1946 * re-enable caching here because the QEMU bios 1938 * re-enable caching here because the QEMU bios
1947 * does not do it - this results in some delay at 1939 * does not do it - this results in some delay at
@@ -2160,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm)
2160 return 1; 2152 return 1;
2161} 2153}
2162 2154
2163static void svm_fpu_activate(struct kvm_vcpu *vcpu)
2164{
2165 struct vcpu_svm *svm = to_svm(vcpu);
2166
2167 clr_exception_intercept(svm, NM_VECTOR);
2168
2169 svm->vcpu.fpu_active = 1;
2170 update_cr0_intercept(svm);
2171}
2172
2173static int nm_interception(struct vcpu_svm *svm)
2174{
2175 svm_fpu_activate(&svm->vcpu);
2176 return 1;
2177}
2178
2179static bool is_erratum_383(void) 2155static bool is_erratum_383(void)
2180{ 2156{
2181 int err, i; 2157 int err, i;
@@ -2573,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
2573 if (!npt_enabled && svm->apf_reason == 0) 2549 if (!npt_enabled && svm->apf_reason == 0)
2574 return NESTED_EXIT_HOST; 2550 return NESTED_EXIT_HOST;
2575 break; 2551 break;
2576 case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2577 nm_interception(svm);
2578 break;
2579 default: 2552 default:
2580 break; 2553 break;
2581 } 2554 }
@@ -4020,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4020 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 3993 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
4021 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 3994 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
4022 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 3995 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4023 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
4024 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 3996 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
4025 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, 3997 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
4026 [SVM_EXIT_INTR] = intr_interception, 3998 [SVM_EXIT_INTR] = intr_interception,
@@ -4359,11 +4331,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
4359 return; 4331 return;
4360} 4332}
4361 4333
4362static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
4363{
4364 return;
4365}
4366
4367static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) 4334static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
4368{ 4335{
4369 kvm_lapic_set_irr(vec, vcpu->arch.apic); 4336 kvm_lapic_set_irr(vec, vcpu->arch.apic);
@@ -5079,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void)
5079 return true; 5046 return true;
5080} 5047}
5081 5048
5082static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
5083{
5084 struct vcpu_svm *svm = to_svm(vcpu);
5085
5086 set_exception_intercept(svm, NM_VECTOR);
5087 update_cr0_intercept(svm);
5088}
5089
5090#define PRE_EX(exit) { .exit_code = (exit), \ 5049#define PRE_EX(exit) { .exit_code = (exit), \
5091 .stage = X86_ICPT_PRE_EXCEPT, } 5050 .stage = X86_ICPT_PRE_EXCEPT, }
5092#define POST_EX(exit) { .exit_code = (exit), \ 5051#define POST_EX(exit) { .exit_code = (exit), \
@@ -5347,9 +5306,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
5347 5306
5348 .get_pkru = svm_get_pkru, 5307 .get_pkru = svm_get_pkru,
5349 5308
5350 .fpu_activate = svm_fpu_activate,
5351 .fpu_deactivate = svm_fpu_deactivate,
5352
5353 .tlb_flush = svm_flush_tlb, 5309 .tlb_flush = svm_flush_tlb,
5354 5310
5355 .run = svm_vcpu_run, 5311 .run = svm_vcpu_run,
@@ -5373,7 +5329,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
5373 .get_enable_apicv = svm_get_enable_apicv, 5329 .get_enable_apicv = svm_get_enable_apicv,
5374 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, 5330 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
5375 .load_eoi_exitmap = svm_load_eoi_exitmap, 5331 .load_eoi_exitmap = svm_load_eoi_exitmap,
5376 .sync_pir_to_irr = svm_sync_pir_to_irr,
5377 .hwapic_irr_update = svm_hwapic_irr_update, 5332 .hwapic_irr_update = svm_hwapic_irr_update,
5378 .hwapic_isr_update = svm_hwapic_isr_update, 5333 .hwapic_isr_update = svm_hwapic_isr_update,
5379 .apicv_post_state_restore = avic_post_state_restore, 5334 .apicv_post_state_restore = avic_post_state_restore,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c3e42623090..9856b73a21ad 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1856,7 +1856,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1856 u32 eb; 1856 u32 eb;
1857 1857
1858 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | 1858 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
1859 (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); 1859 (1u << DB_VECTOR) | (1u << AC_VECTOR);
1860 if ((vcpu->guest_debug & 1860 if ((vcpu->guest_debug &
1861 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == 1861 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
1862 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) 1862 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -1865,8 +1865,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
1865 eb = ~0; 1865 eb = ~0;
1866 if (enable_ept) 1866 if (enable_ept)
1867 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 1867 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
1868 if (vcpu->fpu_active)
1869 eb &= ~(1u << NM_VECTOR);
1870 1868
1871 /* When we are running a nested L2 guest and L1 specified for it a 1869 /* When we are running a nested L2 guest and L1 specified for it a
1872 * certain exception bitmap, we must trap the same exceptions and pass 1870 * certain exception bitmap, we must trap the same exceptions and pass
@@ -2340,25 +2338,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2340 } 2338 }
2341} 2339}
2342 2340
2343static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
2344{
2345 ulong cr0;
2346
2347 if (vcpu->fpu_active)
2348 return;
2349 vcpu->fpu_active = 1;
2350 cr0 = vmcs_readl(GUEST_CR0);
2351 cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
2352 cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
2353 vmcs_writel(GUEST_CR0, cr0);
2354 update_exception_bitmap(vcpu);
2355 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
2356 if (is_guest_mode(vcpu))
2357 vcpu->arch.cr0_guest_owned_bits &=
2358 ~get_vmcs12(vcpu)->cr0_guest_host_mask;
2359 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2360}
2361
2362static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); 2341static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2363 2342
2364/* 2343/*
@@ -2377,33 +2356,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
2377 (fields->cr4_read_shadow & fields->cr4_guest_host_mask); 2356 (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
2378} 2357}
2379 2358
2380static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
2381{
2382 /* Note that there is no vcpu->fpu_active = 0 here. The caller must
2383 * set this *before* calling this function.
2384 */
2385 vmx_decache_cr0_guest_bits(vcpu);
2386 vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
2387 update_exception_bitmap(vcpu);
2388 vcpu->arch.cr0_guest_owned_bits = 0;
2389 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
2390 if (is_guest_mode(vcpu)) {
2391 /*
2392 * L1's specified read shadow might not contain the TS bit,
2393 * so now that we turned on shadowing of this bit, we need to
2394 * set this bit of the shadow. Like in nested_vmx_run we need
2395 * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
2396 * up-to-date here because we just decached cr0.TS (and we'll
2397 * only update vmcs12->guest_cr0 on nested exit).
2398 */
2399 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2400 vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
2401 (vcpu->arch.cr0 & X86_CR0_TS);
2402 vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
2403 } else
2404 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2405}
2406
2407static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 2359static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2408{ 2360{
2409 unsigned long rflags, save_rflags; 2361 unsigned long rflags, save_rflags;
@@ -4232,9 +4184,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
4232 if (enable_ept) 4184 if (enable_ept)
4233 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 4185 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
4234 4186
4235 if (!vcpu->fpu_active)
4236 hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
4237
4238 vmcs_writel(CR0_READ_SHADOW, cr0); 4187 vmcs_writel(CR0_READ_SHADOW, cr0);
4239 vmcs_writel(GUEST_CR0, hw_cr0); 4188 vmcs_writel(GUEST_CR0, hw_cr0);
4240 vcpu->arch.cr0 = cr0; 4189 vcpu->arch.cr0 = cr0;
@@ -5051,26 +5000,12 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
5051 if (pi_test_and_set_pir(vector, &vmx->pi_desc)) 5000 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
5052 return; 5001 return;
5053 5002
5054 r = pi_test_and_set_on(&vmx->pi_desc); 5003 /* If a previous notification has sent the IPI, nothing to do. */
5055 kvm_make_request(KVM_REQ_EVENT, vcpu); 5004 if (pi_test_and_set_on(&vmx->pi_desc))
5056 if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu))
5057 kvm_vcpu_kick(vcpu);
5058}
5059
5060static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
5061{
5062 struct vcpu_vmx *vmx = to_vmx(vcpu);
5063
5064 if (!pi_test_on(&vmx->pi_desc))
5065 return; 5005 return;
5066 5006
5067 pi_clear_on(&vmx->pi_desc); 5007 if (!kvm_vcpu_trigger_posted_interrupt(vcpu))
5068 /* 5008 kvm_vcpu_kick(vcpu);
5069 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
5070 * But on x86 this is just a compiler barrier anyway.
5071 */
5072 smp_mb__after_atomic();
5073 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
5074} 5009}
5075 5010
5076/* 5011/*
@@ -5335,7 +5270,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
5335 /* 22.2.1, 20.8.1 */ 5270 /* 22.2.1, 20.8.1 */
5336 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); 5271 vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
5337 5272
5338 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 5273 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
5274 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
5275
5339 set_cr4_guest_host_mask(vmx); 5276 set_cr4_guest_host_mask(vmx);
5340 5277
5341 if (vmx_xsaves_supported()) 5278 if (vmx_xsaves_supported())
@@ -5439,7 +5376,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5439 vmx_set_cr0(vcpu, cr0); /* enter rmode */ 5376 vmx_set_cr0(vcpu, cr0); /* enter rmode */
5440 vmx_set_cr4(vcpu, 0); 5377 vmx_set_cr4(vcpu, 0);
5441 vmx_set_efer(vcpu, 0); 5378 vmx_set_efer(vcpu, 0);
5442 vmx_fpu_activate(vcpu); 5379
5443 update_exception_bitmap(vcpu); 5380 update_exception_bitmap(vcpu);
5444 5381
5445 vpid_sync_context(vmx->vpid); 5382 vpid_sync_context(vmx->vpid);
@@ -5473,26 +5410,20 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
5473 5410
5474static void enable_irq_window(struct kvm_vcpu *vcpu) 5411static void enable_irq_window(struct kvm_vcpu *vcpu)
5475{ 5412{
5476 u32 cpu_based_vm_exec_control; 5413 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
5477 5414 CPU_BASED_VIRTUAL_INTR_PENDING);
5478 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5479 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
5480 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5481} 5415}
5482 5416
5483static void enable_nmi_window(struct kvm_vcpu *vcpu) 5417static void enable_nmi_window(struct kvm_vcpu *vcpu)
5484{ 5418{
5485 u32 cpu_based_vm_exec_control;
5486
5487 if (!cpu_has_virtual_nmis() || 5419 if (!cpu_has_virtual_nmis() ||
5488 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { 5420 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5489 enable_irq_window(vcpu); 5421 enable_irq_window(vcpu);
5490 return; 5422 return;
5491 } 5423 }
5492 5424
5493 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 5425 vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
5494 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 5426 CPU_BASED_VIRTUAL_NMI_PENDING);
5495 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5496} 5427}
5497 5428
5498static void vmx_inject_irq(struct kvm_vcpu *vcpu) 5429static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -5718,11 +5649,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
5718 if (is_nmi(intr_info)) 5649 if (is_nmi(intr_info))
5719 return 1; /* already handled by vmx_vcpu_run() */ 5650 return 1; /* already handled by vmx_vcpu_run() */
5720 5651
5721 if (is_no_device(intr_info)) {
5722 vmx_fpu_activate(vcpu);
5723 return 1;
5724 }
5725
5726 if (is_invalid_opcode(intr_info)) { 5652 if (is_invalid_opcode(intr_info)) {
5727 if (is_guest_mode(vcpu)) { 5653 if (is_guest_mode(vcpu)) {
5728 kvm_queue_exception(vcpu, UD_VECTOR); 5654 kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5912,22 +5838,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
5912 return kvm_set_cr4(vcpu, val); 5838 return kvm_set_cr4(vcpu, val);
5913} 5839}
5914 5840
5915/* called to set cr0 as appropriate for clts instruction exit. */
5916static void handle_clts(struct kvm_vcpu *vcpu)
5917{
5918 if (is_guest_mode(vcpu)) {
5919 /*
5920 * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS
5921 * but we did (!fpu_active). We need to keep GUEST_CR0.TS on,
5922 * just pretend it's off (also in arch.cr0 for fpu_activate).
5923 */
5924 vmcs_writel(CR0_READ_SHADOW,
5925 vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS);
5926 vcpu->arch.cr0 &= ~X86_CR0_TS;
5927 } else
5928 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
5929}
5930
5931static int handle_cr(struct kvm_vcpu *vcpu) 5841static int handle_cr(struct kvm_vcpu *vcpu)
5932{ 5842{
5933 unsigned long exit_qualification, val; 5843 unsigned long exit_qualification, val;
@@ -5973,9 +5883,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5973 } 5883 }
5974 break; 5884 break;
5975 case 2: /* clts */ 5885 case 2: /* clts */
5976 handle_clts(vcpu); 5886 WARN_ONCE(1, "Guest should always own CR0.TS");
5887 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
5977 trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); 5888 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
5978 vmx_fpu_activate(vcpu);
5979 return kvm_skip_emulated_instruction(vcpu); 5889 return kvm_skip_emulated_instruction(vcpu);
5980 case 1: /*mov from cr*/ 5890 case 1: /*mov from cr*/
5981 switch (cr) { 5891 switch (cr) {
@@ -6151,12 +6061,8 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
6151 6061
6152static int handle_interrupt_window(struct kvm_vcpu *vcpu) 6062static int handle_interrupt_window(struct kvm_vcpu *vcpu)
6153{ 6063{
6154 u32 cpu_based_vm_exec_control; 6064 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
6155 6065 CPU_BASED_VIRTUAL_INTR_PENDING);
6156 /* clear pending irq */
6157 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6158 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
6159 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
6160 6066
6161 kvm_make_request(KVM_REQ_EVENT, vcpu); 6067 kvm_make_request(KVM_REQ_EVENT, vcpu);
6162 6068
@@ -6382,6 +6288,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
6382 EPT_VIOLATION_EXECUTABLE)) 6288 EPT_VIOLATION_EXECUTABLE))
6383 ? PFERR_PRESENT_MASK : 0; 6289 ? PFERR_PRESENT_MASK : 0;
6384 6290
6291 vcpu->arch.gpa_available = true;
6385 vcpu->arch.exit_qualification = exit_qualification; 6292 vcpu->arch.exit_qualification = exit_qualification;
6386 6293
6387 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); 6294 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -6399,6 +6306,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
6399 } 6306 }
6400 6307
6401 ret = handle_mmio_page_fault(vcpu, gpa, true); 6308 ret = handle_mmio_page_fault(vcpu, gpa, true);
6309 vcpu->arch.gpa_available = true;
6402 if (likely(ret == RET_MMIO_PF_EMULATE)) 6310 if (likely(ret == RET_MMIO_PF_EMULATE))
6403 return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == 6311 return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
6404 EMULATE_DONE; 6312 EMULATE_DONE;
@@ -6420,12 +6328,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
6420 6328
6421static int handle_nmi_window(struct kvm_vcpu *vcpu) 6329static int handle_nmi_window(struct kvm_vcpu *vcpu)
6422{ 6330{
6423 u32 cpu_based_vm_exec_control; 6331 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
6424 6332 CPU_BASED_VIRTUAL_NMI_PENDING);
6425 /* clear pending NMI */
6426 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6427 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
6428 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
6429 ++vcpu->stat.nmi_window_exits; 6333 ++vcpu->stat.nmi_window_exits;
6430 kvm_make_request(KVM_REQ_EVENT, vcpu); 6334 kvm_make_request(KVM_REQ_EVENT, vcpu);
6431 6335
@@ -6663,8 +6567,10 @@ static __init int hardware_setup(void)
6663 if (!cpu_has_vmx_ple()) 6567 if (!cpu_has_vmx_ple())
6664 ple_gap = 0; 6568 ple_gap = 0;
6665 6569
6666 if (!cpu_has_vmx_apicv()) 6570 if (!cpu_has_vmx_apicv()) {
6667 enable_apicv = 0; 6571 enable_apicv = 0;
6572 kvm_x86_ops->sync_pir_to_irr = NULL;
6573 }
6668 6574
6669 if (cpu_has_vmx_tsc_scaling()) { 6575 if (cpu_has_vmx_tsc_scaling()) {
6670 kvm_has_tsc_control = true; 6576 kvm_has_tsc_control = true;
@@ -7134,6 +7040,53 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
7134 return 0; 7040 return 0;
7135} 7041}
7136 7042
7043static int enter_vmx_operation(struct kvm_vcpu *vcpu)
7044{
7045 struct vcpu_vmx *vmx = to_vmx(vcpu);
7046 struct vmcs *shadow_vmcs;
7047
7048 if (cpu_has_vmx_msr_bitmap()) {
7049 vmx->nested.msr_bitmap =
7050 (unsigned long *)__get_free_page(GFP_KERNEL);
7051 if (!vmx->nested.msr_bitmap)
7052 goto out_msr_bitmap;
7053 }
7054
7055 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
7056 if (!vmx->nested.cached_vmcs12)
7057 goto out_cached_vmcs12;
7058
7059 if (enable_shadow_vmcs) {
7060 shadow_vmcs = alloc_vmcs();
7061 if (!shadow_vmcs)
7062 goto out_shadow_vmcs;
7063 /* mark vmcs as shadow */
7064 shadow_vmcs->revision_id |= (1u << 31);
7065 /* init shadow vmcs */
7066 vmcs_clear(shadow_vmcs);
7067 vmx->vmcs01.shadow_vmcs = shadow_vmcs;
7068 }
7069
7070 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
7071 vmx->nested.vmcs02_num = 0;
7072
7073 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
7074 HRTIMER_MODE_REL_PINNED);
7075 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
7076
7077 vmx->nested.vmxon = true;
7078 return 0;
7079
7080out_shadow_vmcs:
7081 kfree(vmx->nested.cached_vmcs12);
7082
7083out_cached_vmcs12:
7084 free_page((unsigned long)vmx->nested.msr_bitmap);
7085
7086out_msr_bitmap:
7087 return -ENOMEM;
7088}
7089
7137/* 7090/*
7138 * Emulate the VMXON instruction. 7091 * Emulate the VMXON instruction.
7139 * Currently, we just remember that VMX is active, and do not save or even 7092 * Currently, we just remember that VMX is active, and do not save or even
@@ -7144,9 +7097,9 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
7144 */ 7097 */
7145static int handle_vmon(struct kvm_vcpu *vcpu) 7098static int handle_vmon(struct kvm_vcpu *vcpu)
7146{ 7099{
7100 int ret;
7147 struct kvm_segment cs; 7101 struct kvm_segment cs;
7148 struct vcpu_vmx *vmx = to_vmx(vcpu); 7102 struct vcpu_vmx *vmx = to_vmx(vcpu);
7149 struct vmcs *shadow_vmcs;
7150 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED 7103 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
7151 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 7104 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7152 7105
@@ -7186,49 +7139,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
7186 7139
7187 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) 7140 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL))
7188 return 1; 7141 return 1;
7189 7142
7190 if (cpu_has_vmx_msr_bitmap()) { 7143 ret = enter_vmx_operation(vcpu);
7191 vmx->nested.msr_bitmap = 7144 if (ret)
7192 (unsigned long *)__get_free_page(GFP_KERNEL); 7145 return ret;
7193 if (!vmx->nested.msr_bitmap)
7194 goto out_msr_bitmap;
7195 }
7196
7197 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
7198 if (!vmx->nested.cached_vmcs12)
7199 goto out_cached_vmcs12;
7200
7201 if (enable_shadow_vmcs) {
7202 shadow_vmcs = alloc_vmcs();
7203 if (!shadow_vmcs)
7204 goto out_shadow_vmcs;
7205 /* mark vmcs as shadow */
7206 shadow_vmcs->revision_id |= (1u << 31);
7207 /* init shadow vmcs */
7208 vmcs_clear(shadow_vmcs);
7209 vmx->vmcs01.shadow_vmcs = shadow_vmcs;
7210 }
7211
7212 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
7213 vmx->nested.vmcs02_num = 0;
7214
7215 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
7216 HRTIMER_MODE_REL_PINNED);
7217 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
7218
7219 vmx->nested.vmxon = true;
7220 7146
7221 nested_vmx_succeed(vcpu); 7147 nested_vmx_succeed(vcpu);
7222 return kvm_skip_emulated_instruction(vcpu); 7148 return kvm_skip_emulated_instruction(vcpu);
7223
7224out_shadow_vmcs:
7225 kfree(vmx->nested.cached_vmcs12);
7226
7227out_cached_vmcs12:
7228 free_page((unsigned long)vmx->nested.msr_bitmap);
7229
7230out_msr_bitmap:
7231 return -ENOMEM;
7232} 7149}
7233 7150
7234/* 7151/*
@@ -7677,6 +7594,18 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
7677 return kvm_skip_emulated_instruction(vcpu); 7594 return kvm_skip_emulated_instruction(vcpu);
7678} 7595}
7679 7596
7597static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
7598{
7599 vmx->nested.current_vmptr = vmptr;
7600 if (enable_shadow_vmcs) {
7601 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
7602 SECONDARY_EXEC_SHADOW_VMCS);
7603 vmcs_write64(VMCS_LINK_POINTER,
7604 __pa(vmx->vmcs01.shadow_vmcs));
7605 vmx->nested.sync_shadow_vmcs = true;
7606 }
7607}
7608
7680/* Emulate the VMPTRLD instruction */ 7609/* Emulate the VMPTRLD instruction */
7681static int handle_vmptrld(struct kvm_vcpu *vcpu) 7610static int handle_vmptrld(struct kvm_vcpu *vcpu)
7682{ 7611{
@@ -7707,7 +7636,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
7707 } 7636 }
7708 7637
7709 nested_release_vmcs12(vmx); 7638 nested_release_vmcs12(vmx);
7710 vmx->nested.current_vmptr = vmptr;
7711 vmx->nested.current_vmcs12 = new_vmcs12; 7639 vmx->nested.current_vmcs12 = new_vmcs12;
7712 vmx->nested.current_vmcs12_page = page; 7640 vmx->nested.current_vmcs12_page = page;
7713 /* 7641 /*
@@ -7716,14 +7644,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
7716 */ 7644 */
7717 memcpy(vmx->nested.cached_vmcs12, 7645 memcpy(vmx->nested.cached_vmcs12,
7718 vmx->nested.current_vmcs12, VMCS12_SIZE); 7646 vmx->nested.current_vmcs12, VMCS12_SIZE);
7719 7647 set_current_vmptr(vmx, vmptr);
7720 if (enable_shadow_vmcs) {
7721 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
7722 SECONDARY_EXEC_SHADOW_VMCS);
7723 vmcs_write64(VMCS_LINK_POINTER,
7724 __pa(vmx->vmcs01.shadow_vmcs));
7725 vmx->nested.sync_shadow_vmcs = true;
7726 }
7727 } 7648 }
7728 7649
7729 nested_vmx_succeed(vcpu); 7650 nested_vmx_succeed(vcpu);
@@ -8517,6 +8438,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
8517 u32 vectoring_info = vmx->idt_vectoring_info; 8438 u32 vectoring_info = vmx->idt_vectoring_info;
8518 8439
8519 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); 8440 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
8441 vcpu->arch.gpa_available = false;
8520 8442
8521 /* 8443 /*
8522 * Flush logged GPAs PML buffer, this will make dirty_bitmap more 8444 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
@@ -8735,6 +8657,27 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
8735 } 8657 }
8736} 8658}
8737 8659
8660static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
8661{
8662 struct vcpu_vmx *vmx = to_vmx(vcpu);
8663 int max_irr;
8664
8665 WARN_ON(!vcpu->arch.apicv_active);
8666 if (pi_test_on(&vmx->pi_desc)) {
8667 pi_clear_on(&vmx->pi_desc);
8668 /*
8669 * IOMMU can write to PIR.ON, so the barrier matters even on UP.
8670 * But on x86 this is just a compiler barrier anyway.
8671 */
8672 smp_mb__after_atomic();
8673 max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
8674 } else {
8675 max_irr = kvm_lapic_find_highest_irr(vcpu);
8676 }
8677 vmx_hwapic_irr_update(vcpu, max_irr);
8678 return max_irr;
8679}
8680
8738static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 8681static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
8739{ 8682{
8740 if (!kvm_vcpu_apicv_active(vcpu)) 8683 if (!kvm_vcpu_apicv_active(vcpu))
@@ -8746,6 +8689,14 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
8746 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); 8689 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
8747} 8690}
8748 8691
8692static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
8693{
8694 struct vcpu_vmx *vmx = to_vmx(vcpu);
8695
8696 pi_clear_on(&vmx->pi_desc);
8697 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
8698}
8699
8749static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) 8700static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
8750{ 8701{
8751 u32 exit_intr_info; 8702 u32 exit_intr_info;
@@ -9591,17 +9542,16 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
9591 kvm_inject_page_fault(vcpu, fault); 9542 kvm_inject_page_fault(vcpu, fault);
9592} 9543}
9593 9544
9594static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, 9545static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
9546 struct vmcs12 *vmcs12);
9547
9548static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
9595 struct vmcs12 *vmcs12) 9549 struct vmcs12 *vmcs12)
9596{ 9550{
9597 struct vcpu_vmx *vmx = to_vmx(vcpu); 9551 struct vcpu_vmx *vmx = to_vmx(vcpu);
9598 int maxphyaddr = cpuid_maxphyaddr(vcpu); 9552 u64 hpa;
9599 9553
9600 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { 9554 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
9601 if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
9602 vmcs12->apic_access_addr >> maxphyaddr)
9603 return false;
9604
9605 /* 9555 /*
9606 * Translate L1 physical address to host physical 9556 * Translate L1 physical address to host physical
9607 * address for vmcs02. Keep the page pinned, so this 9557 * address for vmcs02. Keep the page pinned, so this
@@ -9612,59 +9562,80 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
9612 nested_release_page(vmx->nested.apic_access_page); 9562 nested_release_page(vmx->nested.apic_access_page);
9613 vmx->nested.apic_access_page = 9563 vmx->nested.apic_access_page =
9614 nested_get_page(vcpu, vmcs12->apic_access_addr); 9564 nested_get_page(vcpu, vmcs12->apic_access_addr);
9565 /*
9566 * If translation failed, no matter: This feature asks
9567 * to exit when accessing the given address, and if it
9568 * can never be accessed, this feature won't do
9569 * anything anyway.
9570 */
9571 if (vmx->nested.apic_access_page) {
9572 hpa = page_to_phys(vmx->nested.apic_access_page);
9573 vmcs_write64(APIC_ACCESS_ADDR, hpa);
9574 } else {
9575 vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
9576 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
9577 }
9578 } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
9579 cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
9580 vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
9581 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
9582 kvm_vcpu_reload_apic_access_page(vcpu);
9615 } 9583 }
9616 9584
9617 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { 9585 if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
9618 if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
9619 vmcs12->virtual_apic_page_addr >> maxphyaddr)
9620 return false;
9621
9622 if (vmx->nested.virtual_apic_page) /* shouldn't happen */ 9586 if (vmx->nested.virtual_apic_page) /* shouldn't happen */
9623 nested_release_page(vmx->nested.virtual_apic_page); 9587 nested_release_page(vmx->nested.virtual_apic_page);
9624 vmx->nested.virtual_apic_page = 9588 vmx->nested.virtual_apic_page =
9625 nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); 9589 nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
9626 9590
9627 /* 9591 /*
9628 * Failing the vm entry is _not_ what the processor does 9592 * If translation failed, VM entry will fail because
9629 * but it's basically the only possibility we have. 9593 * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull.
9630 * We could still enter the guest if CR8 load exits are 9594 * Failing the vm entry is _not_ what the processor
9631 * enabled, CR8 store exits are enabled, and virtualize APIC 9595 * does but it's basically the only possibility we
9632 * access is disabled; in this case the processor would never 9596 * have. We could still enter the guest if CR8 load
9633 * use the TPR shadow and we could simply clear the bit from 9597 * exits are enabled, CR8 store exits are enabled, and
9634 * the execution control. But such a configuration is useless, 9598 * virtualize APIC access is disabled; in this case
9635 * so let's keep the code simple. 9599 * the processor would never use the TPR shadow and we
9600 * could simply clear the bit from the execution
9601 * control. But such a configuration is useless, so
9602 * let's keep the code simple.
9636 */ 9603 */
9637 if (!vmx->nested.virtual_apic_page) 9604 if (vmx->nested.virtual_apic_page) {
9638 return false; 9605 hpa = page_to_phys(vmx->nested.virtual_apic_page);
9606 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa);
9607 }
9639 } 9608 }
9640 9609
9641 if (nested_cpu_has_posted_intr(vmcs12)) { 9610 if (nested_cpu_has_posted_intr(vmcs12)) {
9642 if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
9643 vmcs12->posted_intr_desc_addr >> maxphyaddr)
9644 return false;
9645
9646 if (vmx->nested.pi_desc_page) { /* shouldn't happen */ 9611 if (vmx->nested.pi_desc_page) { /* shouldn't happen */
9647 kunmap(vmx->nested.pi_desc_page); 9612 kunmap(vmx->nested.pi_desc_page);
9648 nested_release_page(vmx->nested.pi_desc_page); 9613 nested_release_page(vmx->nested.pi_desc_page);
9649 } 9614 }
9650 vmx->nested.pi_desc_page = 9615 vmx->nested.pi_desc_page =
9651 nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); 9616 nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
9652 if (!vmx->nested.pi_desc_page)
9653 return false;
9654
9655 vmx->nested.pi_desc = 9617 vmx->nested.pi_desc =
9656 (struct pi_desc *)kmap(vmx->nested.pi_desc_page); 9618 (struct pi_desc *)kmap(vmx->nested.pi_desc_page);
9657 if (!vmx->nested.pi_desc) { 9619 if (!vmx->nested.pi_desc) {
9658 nested_release_page_clean(vmx->nested.pi_desc_page); 9620 nested_release_page_clean(vmx->nested.pi_desc_page);
9659 return false; 9621 return;
9660 } 9622 }
9661 vmx->nested.pi_desc = 9623 vmx->nested.pi_desc =
9662 (struct pi_desc *)((void *)vmx->nested.pi_desc + 9624 (struct pi_desc *)((void *)vmx->nested.pi_desc +
9663 (unsigned long)(vmcs12->posted_intr_desc_addr & 9625 (unsigned long)(vmcs12->posted_intr_desc_addr &
9664 (PAGE_SIZE - 1))); 9626 (PAGE_SIZE - 1)));
9627 vmcs_write64(POSTED_INTR_DESC_ADDR,
9628 page_to_phys(vmx->nested.pi_desc_page) +
9629 (unsigned long)(vmcs12->posted_intr_desc_addr &
9630 (PAGE_SIZE - 1)));
9665 } 9631 }
9666 9632 if (cpu_has_vmx_msr_bitmap() &&
9667 return true; 9633 nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) &&
9634 nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
9635 ;
9636 else
9637 vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
9638 CPU_BASED_USE_MSR_BITMAPS);
9668} 9639}
9669 9640
9670static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) 9641static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
@@ -9980,7 +9951,7 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
9980 * is assigned to entry_failure_code on failure. 9951 * is assigned to entry_failure_code on failure.
9981 */ 9952 */
9982static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, 9953static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
9983 unsigned long *entry_failure_code) 9954 u32 *entry_failure_code)
9984{ 9955{
9985 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { 9956 if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
9986 if (!nested_cr3_valid(vcpu, cr3)) { 9957 if (!nested_cr3_valid(vcpu, cr3)) {
@@ -10020,7 +9991,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
10020 * is assigned to entry_failure_code on failure. 9991 * is assigned to entry_failure_code on failure.
10021 */ 9992 */
10022static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 9993static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10023 unsigned long *entry_failure_code) 9994 bool from_vmentry, u32 *entry_failure_code)
10024{ 9995{
10025 struct vcpu_vmx *vmx = to_vmx(vcpu); 9996 struct vcpu_vmx *vmx = to_vmx(vcpu);
10026 u32 exec_control; 9997 u32 exec_control;
@@ -10063,21 +10034,26 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10063 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); 10034 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
10064 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); 10035 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
10065 10036
10066 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { 10037 if (from_vmentry &&
10038 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
10067 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); 10039 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
10068 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); 10040 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
10069 } else { 10041 } else {
10070 kvm_set_dr(vcpu, 7, vcpu->arch.dr7); 10042 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
10071 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); 10043 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
10072 } 10044 }
10073 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 10045 if (from_vmentry) {
10074 vmcs12->vm_entry_intr_info_field); 10046 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
10075 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 10047 vmcs12->vm_entry_intr_info_field);
10076 vmcs12->vm_entry_exception_error_code); 10048 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
10077 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 10049 vmcs12->vm_entry_exception_error_code);
10078 vmcs12->vm_entry_instruction_len); 10050 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
10079 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 10051 vmcs12->vm_entry_instruction_len);
10080 vmcs12->guest_interruptibility_info); 10052 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
10053 vmcs12->guest_interruptibility_info);
10054 } else {
10055 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
10056 }
10081 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); 10057 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
10082 vmx_set_rflags(vcpu, vmcs12->guest_rflags); 10058 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
10083 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 10059 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
@@ -10106,12 +10082,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10106 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; 10082 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
10107 vmx->nested.pi_pending = false; 10083 vmx->nested.pi_pending = false;
10108 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); 10084 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
10109 vmcs_write64(POSTED_INTR_DESC_ADDR, 10085 } else {
10110 page_to_phys(vmx->nested.pi_desc_page) +
10111 (unsigned long)(vmcs12->posted_intr_desc_addr &
10112 (PAGE_SIZE - 1)));
10113 } else
10114 exec_control &= ~PIN_BASED_POSTED_INTR; 10086 exec_control &= ~PIN_BASED_POSTED_INTR;
10087 }
10115 10088
10116 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); 10089 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
10117 10090
@@ -10156,26 +10129,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10156 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 10129 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
10157 exec_control |= vmcs12->secondary_vm_exec_control; 10130 exec_control |= vmcs12->secondary_vm_exec_control;
10158 10131
10159 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
10160 /*
10161 * If translation failed, no matter: This feature asks
10162 * to exit when accessing the given address, and if it
10163 * can never be accessed, this feature won't do
10164 * anything anyway.
10165 */
10166 if (!vmx->nested.apic_access_page)
10167 exec_control &=
10168 ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
10169 else
10170 vmcs_write64(APIC_ACCESS_ADDR,
10171 page_to_phys(vmx->nested.apic_access_page));
10172 } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
10173 cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
10174 exec_control |=
10175 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
10176 kvm_vcpu_reload_apic_access_page(vcpu);
10177 }
10178
10179 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { 10132 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
10180 vmcs_write64(EOI_EXIT_BITMAP0, 10133 vmcs_write64(EOI_EXIT_BITMAP0,
10181 vmcs12->eoi_exit_bitmap0); 10134 vmcs12->eoi_exit_bitmap0);
@@ -10190,6 +10143,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10190 } 10143 }
10191 10144
10192 nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; 10145 nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
10146
10147 /*
10148 * Write an illegal value to APIC_ACCESS_ADDR. Later,
10149 * nested_get_vmcs12_pages will either fix it up or
10150 * remove the VM execution control.
10151 */
10152 if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
10153 vmcs_write64(APIC_ACCESS_ADDR, -1ull);
10154
10193 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 10155 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
10194 } 10156 }
10195 10157
@@ -10226,19 +10188,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10226 exec_control &= ~CPU_BASED_TPR_SHADOW; 10188 exec_control &= ~CPU_BASED_TPR_SHADOW;
10227 exec_control |= vmcs12->cpu_based_vm_exec_control; 10189 exec_control |= vmcs12->cpu_based_vm_exec_control;
10228 10190
10191 /*
10192 * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if
10193 * nested_get_vmcs12_pages can't fix it up, the illegal value
10194 * will result in a VM entry failure.
10195 */
10229 if (exec_control & CPU_BASED_TPR_SHADOW) { 10196 if (exec_control & CPU_BASED_TPR_SHADOW) {
10230 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 10197 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
10231 page_to_phys(vmx->nested.virtual_apic_page));
10232 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); 10198 vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
10233 } 10199 }
10234 10200
10235 if (cpu_has_vmx_msr_bitmap() &&
10236 exec_control & CPU_BASED_USE_MSR_BITMAPS &&
10237 nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
10238 ; /* MSR_BITMAP will be set by following vmx_set_efer. */
10239 else
10240 exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
10241
10242 /* 10201 /*
10243 * Merging of IO bitmap not currently supported. 10202 * Merging of IO bitmap not currently supported.
10244 * Rather, exit every time. 10203 * Rather, exit every time.
@@ -10270,16 +10229,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10270 ~VM_ENTRY_IA32E_MODE) | 10229 ~VM_ENTRY_IA32E_MODE) |
10271 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); 10230 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
10272 10231
10273 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { 10232 if (from_vmentry &&
10233 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
10274 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); 10234 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
10275 vcpu->arch.pat = vmcs12->guest_ia32_pat; 10235 vcpu->arch.pat = vmcs12->guest_ia32_pat;
10276 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) 10236 } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
10277 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); 10237 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
10278 10238 }
10279 10239
10280 set_cr4_guest_host_mask(vmx); 10240 set_cr4_guest_host_mask(vmx);
10281 10241
10282 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) 10242 if (from_vmentry &&
10243 vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
10283 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); 10244 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
10284 10245
10285 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) 10246 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
@@ -10318,8 +10279,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10318 } 10279 }
10319 10280
10320 /* 10281 /*
10321 * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified 10282 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
10322 * TS bit (for lazy fpu) and bits which we consider mandatory enabled. 10283 * bits which we consider mandatory enabled.
10323 * The CR0_READ_SHADOW is what L2 should have expected to read given 10284 * The CR0_READ_SHADOW is what L2 should have expected to read given
10324 * the specifications by L1; It's not enough to take 10285 * the specifications by L1; It's not enough to take
10325 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we 10286 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
@@ -10331,7 +10292,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10331 vmx_set_cr4(vcpu, vmcs12->guest_cr4); 10292 vmx_set_cr4(vcpu, vmcs12->guest_cr4);
10332 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); 10293 vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
10333 10294
10334 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) 10295 if (from_vmentry &&
10296 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
10335 vcpu->arch.efer = vmcs12->guest_ia32_efer; 10297 vcpu->arch.efer = vmcs12->guest_ia32_efer;
10336 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) 10298 else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
10337 vcpu->arch.efer |= (EFER_LMA | EFER_LME); 10299 vcpu->arch.efer |= (EFER_LMA | EFER_LME);
@@ -10365,73 +10327,22 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10365 return 0; 10327 return 0;
10366} 10328}
10367 10329
10368/* 10330static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10369 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
10370 * for running an L2 nested guest.
10371 */
10372static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10373{ 10331{
10374 struct vmcs12 *vmcs12;
10375 struct vcpu_vmx *vmx = to_vmx(vcpu); 10332 struct vcpu_vmx *vmx = to_vmx(vcpu);
10376 int cpu;
10377 struct loaded_vmcs *vmcs02;
10378 bool ia32e;
10379 u32 msr_entry_idx;
10380 unsigned long exit_qualification;
10381
10382 if (!nested_vmx_check_permission(vcpu))
10383 return 1;
10384
10385 if (!nested_vmx_check_vmcs12(vcpu))
10386 goto out;
10387
10388 vmcs12 = get_vmcs12(vcpu);
10389
10390 if (enable_shadow_vmcs)
10391 copy_shadow_to_vmcs12(vmx);
10392
10393 /*
10394 * The nested entry process starts with enforcing various prerequisites
10395 * on vmcs12 as required by the Intel SDM, and act appropriately when
10396 * they fail: As the SDM explains, some conditions should cause the
10397 * instruction to fail, while others will cause the instruction to seem
10398 * to succeed, but return an EXIT_REASON_INVALID_STATE.
10399 * To speed up the normal (success) code path, we should avoid checking
10400 * for misconfigurations which will anyway be caught by the processor
10401 * when using the merged vmcs02.
10402 */
10403 if (vmcs12->launch_state == launch) {
10404 nested_vmx_failValid(vcpu,
10405 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
10406 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
10407 goto out;
10408 }
10409 10333
10410 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && 10334 if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
10411 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { 10335 vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
10412 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 10336 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10413 goto out;
10414 }
10415 10337
10416 if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { 10338 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
10417 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 10339 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10418 goto out;
10419 }
10420
10421 if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
10422 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
10423 goto out;
10424 }
10425 10340
10426 if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { 10341 if (nested_vmx_check_apicv_controls(vcpu, vmcs12))
10427 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 10342 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10428 goto out;
10429 }
10430 10343
10431 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { 10344 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
10432 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 10345 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10433 goto out;
10434 }
10435 10346
10436 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 10347 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
10437 vmx->nested.nested_vmx_procbased_ctls_low, 10348 vmx->nested.nested_vmx_procbased_ctls_low,
@@ -10448,28 +10359,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10448 !vmx_control_verify(vmcs12->vm_entry_controls, 10359 !vmx_control_verify(vmcs12->vm_entry_controls,
10449 vmx->nested.nested_vmx_entry_ctls_low, 10360 vmx->nested.nested_vmx_entry_ctls_low,
10450 vmx->nested.nested_vmx_entry_ctls_high)) 10361 vmx->nested.nested_vmx_entry_ctls_high))
10451 { 10362 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10452 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
10453 goto out;
10454 }
10455 10363
10456 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || 10364 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
10457 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || 10365 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
10458 !nested_cr3_valid(vcpu, vmcs12->host_cr3)) { 10366 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
10459 nested_vmx_failValid(vcpu, 10367 return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
10460 VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); 10368
10461 goto out; 10369 return 0;
10462 } 10370}
10371
10372static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10373 u32 *exit_qual)
10374{
10375 bool ia32e;
10376
10377 *exit_qual = ENTRY_FAIL_DEFAULT;
10463 10378
10464 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || 10379 if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
10465 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) { 10380 !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))
10466 nested_vmx_entry_failure(vcpu, vmcs12,
10467 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
10468 return 1; 10381 return 1;
10469 } 10382
10470 if (vmcs12->vmcs_link_pointer != -1ull) { 10383 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) &&
10471 nested_vmx_entry_failure(vcpu, vmcs12, 10384 vmcs12->vmcs_link_pointer != -1ull) {
10472 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); 10385 *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
10473 return 1; 10386 return 1;
10474 } 10387 }
10475 10388
@@ -10482,16 +10395,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10482 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to 10395 * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
10483 * CR0.PG) is 1. 10396 * CR0.PG) is 1.
10484 */ 10397 */
10485 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { 10398 if (to_vmx(vcpu)->nested.nested_run_pending &&
10399 (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
10486 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; 10400 ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
10487 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || 10401 if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
10488 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || 10402 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
10489 ((vmcs12->guest_cr0 & X86_CR0_PG) && 10403 ((vmcs12->guest_cr0 & X86_CR0_PG) &&
10490 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { 10404 ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))
10491 nested_vmx_entry_failure(vcpu, vmcs12,
10492 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
10493 return 1; 10405 return 1;
10494 }
10495 } 10406 }
10496 10407
10497 /* 10408 /*
@@ -10505,28 +10416,26 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10505 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; 10416 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
10506 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || 10417 if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
10507 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || 10418 ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
10508 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { 10419 ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))
10509 nested_vmx_entry_failure(vcpu, vmcs12,
10510 EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
10511 return 1; 10420 return 1;
10512 }
10513 } 10421 }
10514 10422
10515 /* 10423 return 0;
10516 * We're finally done with prerequisite checking, and can start with 10424}
10517 * the nested entry. 10425
10518 */ 10426static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
10427{
10428 struct vcpu_vmx *vmx = to_vmx(vcpu);
10429 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
10430 struct loaded_vmcs *vmcs02;
10431 int cpu;
10432 u32 msr_entry_idx;
10433 u32 exit_qual;
10519 10434
10520 vmcs02 = nested_get_current_vmcs02(vmx); 10435 vmcs02 = nested_get_current_vmcs02(vmx);
10521 if (!vmcs02) 10436 if (!vmcs02)
10522 return -ENOMEM; 10437 return -ENOMEM;
10523 10438
10524 /*
10525 * After this point, the trap flag no longer triggers a singlestep trap
10526 * on the vm entry instructions. Don't call
10527 * kvm_skip_emulated_instruction.
10528 */
10529 skip_emulated_instruction(vcpu);
10530 enter_guest_mode(vcpu); 10439 enter_guest_mode(vcpu);
10531 10440
10532 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 10441 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -10541,14 +10450,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10541 10450
10542 vmx_segment_cache_clear(vmx); 10451 vmx_segment_cache_clear(vmx);
10543 10452
10544 if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) { 10453 if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
10545 leave_guest_mode(vcpu); 10454 leave_guest_mode(vcpu);
10546 vmx_load_vmcs01(vcpu); 10455 vmx_load_vmcs01(vcpu);
10547 nested_vmx_entry_failure(vcpu, vmcs12, 10456 nested_vmx_entry_failure(vcpu, vmcs12,
10548 EXIT_REASON_INVALID_STATE, exit_qualification); 10457 EXIT_REASON_INVALID_STATE, exit_qual);
10549 return 1; 10458 return 1;
10550 } 10459 }
10551 10460
10461 nested_get_vmcs12_pages(vcpu, vmcs12);
10462
10552 msr_entry_idx = nested_vmx_load_msr(vcpu, 10463 msr_entry_idx = nested_vmx_load_msr(vcpu,
10553 vmcs12->vm_entry_msr_load_addr, 10464 vmcs12->vm_entry_msr_load_addr,
10554 vmcs12->vm_entry_msr_load_count); 10465 vmcs12->vm_entry_msr_load_count);
@@ -10562,17 +10473,90 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10562 10473
10563 vmcs12->launch_state = 1; 10474 vmcs12->launch_state = 1;
10564 10475
10565 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
10566 return kvm_vcpu_halt(vcpu);
10567
10568 vmx->nested.nested_run_pending = 1;
10569
10570 /* 10476 /*
10571 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point 10477 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
10572 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet 10478 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
10573 * returned as far as L1 is concerned. It will only return (and set 10479 * returned as far as L1 is concerned. It will only return (and set
10574 * the success flag) when L2 exits (see nested_vmx_vmexit()). 10480 * the success flag) when L2 exits (see nested_vmx_vmexit()).
10575 */ 10481 */
10482 return 0;
10483}
10484
10485/*
10486 * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
10487 * for running an L2 nested guest.
10488 */
10489static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
10490{
10491 struct vmcs12 *vmcs12;
10492 struct vcpu_vmx *vmx = to_vmx(vcpu);
10493 u32 exit_qual;
10494 int ret;
10495
10496 if (!nested_vmx_check_permission(vcpu))
10497 return 1;
10498
10499 if (!nested_vmx_check_vmcs12(vcpu))
10500 goto out;
10501
10502 vmcs12 = get_vmcs12(vcpu);
10503
10504 if (enable_shadow_vmcs)
10505 copy_shadow_to_vmcs12(vmx);
10506
10507 /*
10508 * The nested entry process starts with enforcing various prerequisites
10509 * on vmcs12 as required by the Intel SDM, and act appropriately when
10510 * they fail: As the SDM explains, some conditions should cause the
10511 * instruction to fail, while others will cause the instruction to seem
10512 * to succeed, but return an EXIT_REASON_INVALID_STATE.
10513 * To speed up the normal (success) code path, we should avoid checking
10514 * for misconfigurations which will anyway be caught by the processor
10515 * when using the merged vmcs02.
10516 */
10517 if (vmcs12->launch_state == launch) {
10518 nested_vmx_failValid(vcpu,
10519 launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
10520 : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
10521 goto out;
10522 }
10523
10524 ret = check_vmentry_prereqs(vcpu, vmcs12);
10525 if (ret) {
10526 nested_vmx_failValid(vcpu, ret);
10527 goto out;
10528 }
10529
10530 /*
10531 * After this point, the trap flag no longer triggers a singlestep trap
10532 * on the vm entry instructions; don't call kvm_skip_emulated_instruction.
10533 * This is not 100% correct; for performance reasons, we delegate most
10534 * of the checks on host state to the processor. If those fail,
10535 * the singlestep trap is missed.
10536 */
10537 skip_emulated_instruction(vcpu);
10538
10539 ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual);
10540 if (ret) {
10541 nested_vmx_entry_failure(vcpu, vmcs12,
10542 EXIT_REASON_INVALID_STATE, exit_qual);
10543 return 1;
10544 }
10545
10546 /*
10547 * We're finally done with prerequisite checking, and can start with
10548 * the nested entry.
10549 */
10550
10551 ret = enter_vmx_non_root_mode(vcpu, true);
10552 if (ret)
10553 return ret;
10554
10555 if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
10556 return kvm_vcpu_halt(vcpu);
10557
10558 vmx->nested.nested_run_pending = 1;
10559
10576 return 1; 10560 return 1;
10577 10561
10578out: 10562out:
@@ -10713,21 +10697,13 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
10713} 10697}
10714 10698
10715/* 10699/*
10716 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 10700 * Update the guest state fields of vmcs12 to reflect changes that
10717 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 10701 * occurred while L2 was running. (The "IA-32e mode guest" bit of the
10718 * and this function updates it to reflect the changes to the guest state while 10702 * VM-entry controls is also updated, since this is really a guest
10719 * L2 was running (and perhaps made some exits which were handled directly by L0 10703 * state bit.)
10720 * without going back to L1), and to reflect the exit reason.
10721 * Note that we do not have to copy here all VMCS fields, just those that
10722 * could have changed by the L2 guest or the exit - i.e., the guest-state and
10723 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
10724 * which already writes to vmcs12 directly.
10725 */ 10704 */
10726static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, 10705static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10727 u32 exit_reason, u32 exit_intr_info,
10728 unsigned long exit_qualification)
10729{ 10706{
10730 /* update guest state fields: */
10731 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 10707 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
10732 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); 10708 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
10733 10709
@@ -10833,6 +10809,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10833 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 10809 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
10834 if (nested_cpu_has_xsaves(vmcs12)) 10810 if (nested_cpu_has_xsaves(vmcs12))
10835 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); 10811 vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
10812}
10813
10814/*
10815 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
10816 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
10817 * and this function updates it to reflect the changes to the guest state while
10818 * L2 was running (and perhaps made some exits which were handled directly by L0
10819 * without going back to L1), and to reflect the exit reason.
10820 * Note that we do not have to copy here all VMCS fields, just those that
10821 * could have changed by the L2 guest or the exit - i.e., the guest-state and
10822 * exit-information fields only. Other fields are modified by L1 with VMWRITE,
10823 * which already writes to vmcs12 directly.
10824 */
10825static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10826 u32 exit_reason, u32 exit_intr_info,
10827 unsigned long exit_qualification)
10828{
10829 /* update guest state fields: */
10830 sync_vmcs12(vcpu, vmcs12);
10836 10831
10837 /* update exit information fields: */ 10832 /* update exit information fields: */
10838 10833
@@ -10883,7 +10878,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
10883 struct vmcs12 *vmcs12) 10878 struct vmcs12 *vmcs12)
10884{ 10879{
10885 struct kvm_segment seg; 10880 struct kvm_segment seg;
10886 unsigned long entry_failure_code; 10881 u32 entry_failure_code;
10887 10882
10888 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) 10883 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
10889 vcpu->arch.efer = vmcs12->host_ia32_efer; 10884 vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -10898,24 +10893,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
10898 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); 10893 vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
10899 /* 10894 /*
10900 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't 10895 * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
10901 * actually changed, because it depends on the current state of 10896 * actually changed, because vmx_set_cr0 refers to efer set above.
10902 * fpu_active (which may have changed). 10897 *
10903 * Note that vmx_set_cr0 refers to efer set above. 10898 * CR0_GUEST_HOST_MASK is already set in the original vmcs01
10899 * (KVM doesn't change it);
10904 */ 10900 */
10901 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
10905 vmx_set_cr0(vcpu, vmcs12->host_cr0); 10902 vmx_set_cr0(vcpu, vmcs12->host_cr0);
10906 /*
10907 * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
10908 * to apply the same changes to L1's vmcs. We just set cr0 correctly,
10909 * but we also need to update cr0_guest_host_mask and exception_bitmap.
10910 */
10911 update_exception_bitmap(vcpu);
10912 vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
10913 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
10914 10903
10915 /* 10904 /* Same as above - no reason to call set_cr4_guest_host_mask(). */
10916 * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01
10917 * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
10918 */
10919 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); 10905 vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
10920 kvm_set_cr4(vcpu, vmcs12->host_cr4); 10906 kvm_set_cr4(vcpu, vmcs12->host_cr4);
10921 10907
@@ -11544,9 +11530,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11544 11530
11545 .get_pkru = vmx_get_pkru, 11531 .get_pkru = vmx_get_pkru,
11546 11532
11547 .fpu_activate = vmx_fpu_activate,
11548 .fpu_deactivate = vmx_fpu_deactivate,
11549
11550 .tlb_flush = vmx_flush_tlb, 11533 .tlb_flush = vmx_flush_tlb,
11551 11534
11552 .run = vmx_vcpu_run, 11535 .run = vmx_vcpu_run,
@@ -11571,6 +11554,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11571 .get_enable_apicv = vmx_get_enable_apicv, 11554 .get_enable_apicv = vmx_get_enable_apicv,
11572 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, 11555 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
11573 .load_eoi_exitmap = vmx_load_eoi_exitmap, 11556 .load_eoi_exitmap = vmx_load_eoi_exitmap,
11557 .apicv_post_state_restore = vmx_apicv_post_state_restore,
11574 .hwapic_irr_update = vmx_hwapic_irr_update, 11558 .hwapic_irr_update = vmx_hwapic_irr_update,
11575 .hwapic_isr_update = vmx_hwapic_isr_update, 11559 .hwapic_isr_update = vmx_hwapic_isr_update,
11576 .sync_pir_to_irr = vmx_sync_pir_to_irr, 11560 .sync_pir_to_irr = vmx_sync_pir_to_irr,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2f64e5d0ae53..c48404017e4f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1811,7 +1811,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1811 struct kvm_vcpu_arch *vcpu = &v->arch; 1811 struct kvm_vcpu_arch *vcpu = &v->arch;
1812 struct pvclock_vcpu_time_info guest_hv_clock; 1812 struct pvclock_vcpu_time_info guest_hv_clock;
1813 1813
1814 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, 1814 if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time,
1815 &guest_hv_clock, sizeof(guest_hv_clock)))) 1815 &guest_hv_clock, sizeof(guest_hv_clock))))
1816 return; 1816 return;
1817 1817
@@ -1832,9 +1832,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1832 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 1832 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1833 1833
1834 vcpu->hv_clock.version = guest_hv_clock.version + 1; 1834 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1835 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1835 kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
1836 &vcpu->hv_clock, 1836 &vcpu->hv_clock,
1837 sizeof(vcpu->hv_clock.version)); 1837 sizeof(vcpu->hv_clock.version));
1838 1838
1839 smp_wmb(); 1839 smp_wmb();
1840 1840
@@ -1848,16 +1848,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1848 1848
1849 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); 1849 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
1850 1850
1851 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1851 kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
1852 &vcpu->hv_clock, 1852 &vcpu->hv_clock,
1853 sizeof(vcpu->hv_clock)); 1853 sizeof(vcpu->hv_clock));
1854 1854
1855 smp_wmb(); 1855 smp_wmb();
1856 1856
1857 vcpu->hv_clock.version++; 1857 vcpu->hv_clock.version++;
1858 kvm_write_guest_cached(v->kvm, &vcpu->pv_time, 1858 kvm_vcpu_write_guest_cached(v, &vcpu->pv_time,
1859 &vcpu->hv_clock, 1859 &vcpu->hv_clock,
1860 sizeof(vcpu->hv_clock.version)); 1860 sizeof(vcpu->hv_clock.version));
1861} 1861}
1862 1862
1863static int kvm_guest_time_update(struct kvm_vcpu *v) 1863static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -2090,7 +2090,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2090 return 0; 2090 return 0;
2091 } 2091 }
2092 2092
2093 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, 2093 if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa,
2094 sizeof(u32))) 2094 sizeof(u32)))
2095 return 1; 2095 return 1;
2096 2096
@@ -2109,7 +2109,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
2109 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) 2109 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2110 return; 2110 return;
2111 2111
2112 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, 2112 if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime,
2113 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) 2113 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2114 return; 2114 return;
2115 2115
@@ -2120,7 +2120,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
2120 2120
2121 vcpu->arch.st.steal.version += 1; 2121 vcpu->arch.st.steal.version += 1;
2122 2122
2123 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, 2123 kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
2124 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 2124 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2125 2125
2126 smp_wmb(); 2126 smp_wmb();
@@ -2129,14 +2129,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
2129 vcpu->arch.st.last_steal; 2129 vcpu->arch.st.last_steal;
2130 vcpu->arch.st.last_steal = current->sched_info.run_delay; 2130 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2131 2131
2132 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, 2132 kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
2133 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 2133 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2134 2134
2135 smp_wmb(); 2135 smp_wmb();
2136 2136
2137 vcpu->arch.st.steal.version += 1; 2137 vcpu->arch.st.steal.version += 1;
2138 2138
2139 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, 2139 kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime,
2140 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 2140 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2141} 2141}
2142 2142
@@ -2241,7 +2241,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2241 if (!(data & 1)) 2241 if (!(data & 1))
2242 break; 2242 break;
2243 2243
2244 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, 2244 if (kvm_vcpu_gfn_to_hva_cache_init(vcpu,
2245 &vcpu->arch.pv_time, data & ~1ULL, 2245 &vcpu->arch.pv_time, data & ~1ULL,
2246 sizeof(struct pvclock_vcpu_time_info))) 2246 sizeof(struct pvclock_vcpu_time_info)))
2247 vcpu->arch.pv_time_enabled = false; 2247 vcpu->arch.pv_time_enabled = false;
@@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2262 if (data & KVM_STEAL_RESERVED_MASK) 2262 if (data & KVM_STEAL_RESERVED_MASK)
2263 return 1; 2263 return 1;
2264 2264
2265 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, 2265 if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime,
2266 data & KVM_STEAL_VALID_BITS, 2266 data & KVM_STEAL_VALID_BITS,
2267 sizeof(struct kvm_steal_time))) 2267 sizeof(struct kvm_steal_time)))
2268 return 1; 2268 return 1;
@@ -2672,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2672 case KVM_CAP_DISABLE_QUIRKS: 2672 case KVM_CAP_DISABLE_QUIRKS:
2673 case KVM_CAP_SET_BOOT_CPU_ID: 2673 case KVM_CAP_SET_BOOT_CPU_ID:
2674 case KVM_CAP_SPLIT_IRQCHIP: 2674 case KVM_CAP_SPLIT_IRQCHIP:
2675 case KVM_CAP_IMMEDIATE_EXIT:
2675#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2676#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2676 case KVM_CAP_ASSIGN_DEV_IRQ: 2677 case KVM_CAP_ASSIGN_DEV_IRQ:
2677 case KVM_CAP_PCI_2_3: 2678 case KVM_CAP_PCI_2_3:
@@ -2875,7 +2876,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
2875 2876
2876 vcpu->arch.st.steal.preempted = 1; 2877 vcpu->arch.st.steal.preempted = 1;
2877 2878
2878 kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, 2879 kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime,
2879 &vcpu->arch.st.steal.preempted, 2880 &vcpu->arch.st.steal.preempted,
2880 offsetof(struct kvm_steal_time, preempted), 2881 offsetof(struct kvm_steal_time, preempted),
2881 sizeof(vcpu->arch.st.steal.preempted)); 2882 sizeof(vcpu->arch.st.steal.preempted));
@@ -2909,7 +2910,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2909static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, 2910static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2910 struct kvm_lapic_state *s) 2911 struct kvm_lapic_state *s)
2911{ 2912{
2912 if (vcpu->arch.apicv_active) 2913 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
2913 kvm_x86_ops->sync_pir_to_irr(vcpu); 2914 kvm_x86_ops->sync_pir_to_irr(vcpu);
2914 2915
2915 return kvm_apic_get_state(vcpu, s); 2916 return kvm_apic_get_state(vcpu, s);
@@ -6659,7 +6660,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
6659 if (irqchip_split(vcpu->kvm)) 6660 if (irqchip_split(vcpu->kvm))
6660 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); 6661 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
6661 else { 6662 else {
6662 if (vcpu->arch.apicv_active) 6663 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
6663 kvm_x86_ops->sync_pir_to_irr(vcpu); 6664 kvm_x86_ops->sync_pir_to_irr(vcpu);
6664 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); 6665 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
6665 } 6666 }
@@ -6750,10 +6751,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6750 r = 0; 6751 r = 0;
6751 goto out; 6752 goto out;
6752 } 6753 }
6753 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
6754 vcpu->fpu_active = 0;
6755 kvm_x86_ops->fpu_deactivate(vcpu);
6756 }
6757 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { 6754 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
6758 /* Page is swapped out. Do synthetic halt */ 6755 /* Page is swapped out. Do synthetic halt */
6759 vcpu->arch.apf.halted = true; 6756 vcpu->arch.apf.halted = true;
@@ -6813,20 +6810,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6813 kvm_hv_process_stimers(vcpu); 6810 kvm_hv_process_stimers(vcpu);
6814 } 6811 }
6815 6812
6816 /*
6817 * KVM_REQ_EVENT is not set when posted interrupts are set by
6818 * VT-d hardware, so we have to update RVI unconditionally.
6819 */
6820 if (kvm_lapic_enabled(vcpu)) {
6821 /*
6822 * Update architecture specific hints for APIC
6823 * virtual interrupt delivery.
6824 */
6825 if (vcpu->arch.apicv_active)
6826 kvm_x86_ops->hwapic_irr_update(vcpu,
6827 kvm_lapic_find_highest_irr(vcpu));
6828 }
6829
6830 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { 6813 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6831 ++vcpu->stat.req_event; 6814 ++vcpu->stat.req_event;
6832 kvm_apic_accept_events(vcpu); 6815 kvm_apic_accept_events(vcpu);
@@ -6869,22 +6852,40 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6869 preempt_disable(); 6852 preempt_disable();
6870 6853
6871 kvm_x86_ops->prepare_guest_switch(vcpu); 6854 kvm_x86_ops->prepare_guest_switch(vcpu);
6872 if (vcpu->fpu_active) 6855 kvm_load_guest_fpu(vcpu);
6873 kvm_load_guest_fpu(vcpu); 6856
6857 /*
6858 * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
6859 * IPI are then delayed after guest entry, which ensures that they
6860 * result in virtual interrupt delivery.
6861 */
6862 local_irq_disable();
6874 vcpu->mode = IN_GUEST_MODE; 6863 vcpu->mode = IN_GUEST_MODE;
6875 6864
6876 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 6865 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6877 6866
6878 /* 6867 /*
6879 * We should set ->mode before check ->requests, 6868 * 1) We should set ->mode before checking ->requests. Please see
6880 * Please see the comment in kvm_make_all_cpus_request. 6869 * the comment in kvm_make_all_cpus_request.
6881 * This also orders the write to mode from any reads 6870 *
6882 * to the page tables done while the VCPU is running. 6871 * 2) For APICv, we should set ->mode before checking PIR.ON. This
6883 * Please see the comment in kvm_flush_remote_tlbs. 6872 * pairs with the memory barrier implicit in pi_test_and_set_on
6873 * (see vmx_deliver_posted_interrupt).
6874 *
6875 * 3) This also orders the write to mode from any reads to the page
6876 * tables done while the VCPU is running. Please see the comment
6877 * in kvm_flush_remote_tlbs.
6884 */ 6878 */
6885 smp_mb__after_srcu_read_unlock(); 6879 smp_mb__after_srcu_read_unlock();
6886 6880
6887 local_irq_disable(); 6881 /*
6882 * This handles the case where a posted interrupt was
6883 * notified with kvm_vcpu_kick.
6884 */
6885 if (kvm_lapic_enabled(vcpu)) {
6886 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
6887 kvm_x86_ops->sync_pir_to_irr(vcpu);
6888 }
6888 6889
6889 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests 6890 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
6890 || need_resched() || signal_pending(current)) { 6891 || need_resched() || signal_pending(current)) {
@@ -7023,6 +7024,9 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
7023 7024
7024static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) 7025static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
7025{ 7026{
7027 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7028 kvm_x86_ops->check_nested_events(vcpu, false);
7029
7026 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && 7030 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7027 !vcpu->arch.apf.halted); 7031 !vcpu->arch.apf.halted);
7028} 7032}
@@ -7194,7 +7198,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7194 } else 7198 } else
7195 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); 7199 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7196 7200
7197 r = vcpu_run(vcpu); 7201 if (kvm_run->immediate_exit)
7202 r = -EINTR;
7203 else
7204 r = vcpu_run(vcpu);
7198 7205
7199out: 7206out:
7200 post_kvm_run_save(vcpu); 7207 post_kvm_run_save(vcpu);
@@ -8389,9 +8396,6 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
8389 8396
8390int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 8397int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
8391{ 8398{
8392 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
8393 kvm_x86_ops->check_nested_events(vcpu, false);
8394
8395 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); 8399 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
8396} 8400}
8397 8401
@@ -8528,9 +8532,8 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8528 8532
8529static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) 8533static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
8530{ 8534{
8531 8535 return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val,
8532 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, 8536 sizeof(val));
8533 sizeof(val));
8534} 8537}
8535 8538
8536void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, 8539void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index 0a54e8326a90..09b4df74291e 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -176,12 +176,19 @@ static void __exit ptp_kvm_exit(void)
176 176
177static int __init ptp_kvm_init(void) 177static int __init ptp_kvm_init(void)
178{ 178{
179 long ret;
180
179 clock_pair_gpa = slow_virt_to_phys(&clock_pair); 181 clock_pair_gpa = slow_virt_to_phys(&clock_pair);
180 hv_clock = pvclock_pvti_cpu0_va(); 182 hv_clock = pvclock_pvti_cpu0_va();
181 183
182 if (!hv_clock) 184 if (!hv_clock)
183 return -ENODEV; 185 return -ENODEV;
184 186
187 ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,
188 KVM_CLOCK_PAIRING_WALLCLOCK);
189 if (ret == -KVM_ENOSYS || ret == -KVM_EOPNOTSUPP)
190 return -ENODEV;
191
185 kvm_ptp_clock.caps = ptp_kvm_caps; 192 kvm_ptp_clock.caps = ptp_kvm_caps;
186 193
187 kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); 194 kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cda457bcedc1..8d69d5150748 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -221,7 +221,6 @@ struct kvm_vcpu {
221 struct mutex mutex; 221 struct mutex mutex;
222 struct kvm_run *run; 222 struct kvm_run *run;
223 223
224 int fpu_active;
225 int guest_fpu_loaded, guest_xcr0_loaded; 224 int guest_fpu_loaded, guest_xcr0_loaded;
226 struct swait_queue_head wq; 225 struct swait_queue_head wq;
227 struct pid *pid; 226 struct pid *pid;
@@ -641,18 +640,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
641int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, 640int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
642 unsigned long len); 641 unsigned long len);
643int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); 642int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
644int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 643int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
645 void *data, unsigned long len); 644 void *data, unsigned long len);
646int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, 645int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
647 int offset, int len); 646 int offset, int len);
648int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, 647int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
649 unsigned long len); 648 unsigned long len);
650int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 649int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
651 void *data, unsigned long len); 650 void *data, unsigned long len);
652int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 651int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
653 void *data, int offset, unsigned long len); 652 void *data, int offset, unsigned long len);
654int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 653int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc,
655 gpa_t gpa, unsigned long len); 654 gpa_t gpa, unsigned long len);
656int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); 655int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
657int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); 656int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
658struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 657struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7964b970b9ad..f51d5082a377 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
218struct kvm_run { 218struct kvm_run {
219 /* in */ 219 /* in */
220 __u8 request_interrupt_window; 220 __u8 request_interrupt_window;
221 __u8 padding1[7]; 221 __u8 immediate_exit;
222 __u8 padding1[6];
222 223
223 /* out */ 224 /* out */
224 __u32 exit_reason; 225 __u32 exit_reason;
@@ -881,6 +882,7 @@ struct kvm_ppc_resize_hpt {
881#define KVM_CAP_SPAPR_RESIZE_HPT 133 882#define KVM_CAP_SPAPR_RESIZE_HPT 133
882#define KVM_CAP_PPC_MMU_RADIX 134 883#define KVM_CAP_PPC_MMU_RADIX 134
883#define KVM_CAP_PPC_MMU_HASH_V3 135 884#define KVM_CAP_PPC_MMU_HASH_V3 135
885#define KVM_CAP_IMMEDIATE_EXIT 136
884 886
885#ifdef KVM_CAP_IRQ_ROUTING 887#ifdef KVM_CAP_IRQ_ROUTING
886 888
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..cc4d6e0dd2a2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -506,11 +506,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
506 if (!slots) 506 if (!slots)
507 return NULL; 507 return NULL;
508 508
509 /*
510 * Init kvm generation close to the maximum to easily test the
511 * code of handling generation number wrap-around.
512 */
513 slots->generation = -150;
514 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) 509 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
515 slots->id_to_index[i] = slots->memslots[i].id = i; 510 slots->id_to_index[i] = slots->memslots[i].id = i;
516 511
@@ -641,9 +636,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
641 636
642 r = -ENOMEM; 637 r = -ENOMEM;
643 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 638 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
644 kvm->memslots[i] = kvm_alloc_memslots(); 639 struct kvm_memslots *slots = kvm_alloc_memslots();
645 if (!kvm->memslots[i]) 640 if (!slots)
646 goto out_err_no_srcu; 641 goto out_err_no_srcu;
642 /*
643 * Generations must be different for each address space.
644 * Init kvm generation close to the maximum to easily test the
645 * code of handling generation number wrap-around.
646 */
647 slots->generation = i * 2 - 150;
648 rcu_assign_pointer(kvm->memslots[i], slots);
647 } 649 }
648 650
649 if (init_srcu_struct(&kvm->srcu)) 651 if (init_srcu_struct(&kvm->srcu))
@@ -870,8 +872,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
870 * Increment the new memslot generation a second time. This prevents 872 * Increment the new memslot generation a second time. This prevents
871 * vm exits that race with memslot updates from caching a memslot 873 * vm exits that race with memslot updates from caching a memslot
872 * generation that will (potentially) be valid forever. 874 * generation that will (potentially) be valid forever.
875 *
876 * Generations must be unique even across address spaces. We do not need
877 * a global counter for that, instead the generation space is evenly split
878 * across address spaces. For example, with two address spaces, address
879 * space 0 will use generations 0, 4, 8, ... while * address space 1 will
880 * use generations 2, 6, 10, 14, ...
873 */ 881 */
874 slots->generation++; 882 slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
875 883
876 kvm_arch_memslots_updated(kvm, slots); 884 kvm_arch_memslots_updated(kvm, slots);
877 885
@@ -1094,37 +1102,31 @@ int kvm_get_dirty_log(struct kvm *kvm,
1094{ 1102{
1095 struct kvm_memslots *slots; 1103 struct kvm_memslots *slots;
1096 struct kvm_memory_slot *memslot; 1104 struct kvm_memory_slot *memslot;
1097 int r, i, as_id, id; 1105 int i, as_id, id;
1098 unsigned long n; 1106 unsigned long n;
1099 unsigned long any = 0; 1107 unsigned long any = 0;
1100 1108
1101 r = -EINVAL;
1102 as_id = log->slot >> 16; 1109 as_id = log->slot >> 16;
1103 id = (u16)log->slot; 1110 id = (u16)log->slot;
1104 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 1111 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1105 goto out; 1112 return -EINVAL;
1106 1113
1107 slots = __kvm_memslots(kvm, as_id); 1114 slots = __kvm_memslots(kvm, as_id);
1108 memslot = id_to_memslot(slots, id); 1115 memslot = id_to_memslot(slots, id);
1109 r = -ENOENT;
1110 if (!memslot->dirty_bitmap) 1116 if (!memslot->dirty_bitmap)
1111 goto out; 1117 return -ENOENT;
1112 1118
1113 n = kvm_dirty_bitmap_bytes(memslot); 1119 n = kvm_dirty_bitmap_bytes(memslot);
1114 1120
1115 for (i = 0; !any && i < n/sizeof(long); ++i) 1121 for (i = 0; !any && i < n/sizeof(long); ++i)
1116 any = memslot->dirty_bitmap[i]; 1122 any = memslot->dirty_bitmap[i];
1117 1123
1118 r = -EFAULT;
1119 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 1124 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1120 goto out; 1125 return -EFAULT;
1121 1126
1122 if (any) 1127 if (any)
1123 *is_dirty = 1; 1128 *is_dirty = 1;
1124 1129 return 0;
1125 r = 0;
1126out:
1127 return r;
1128} 1130}
1129EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 1131EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
1130 1132
@@ -1156,24 +1158,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
1156{ 1158{
1157 struct kvm_memslots *slots; 1159 struct kvm_memslots *slots;
1158 struct kvm_memory_slot *memslot; 1160 struct kvm_memory_slot *memslot;
1159 int r, i, as_id, id; 1161 int i, as_id, id;
1160 unsigned long n; 1162 unsigned long n;
1161 unsigned long *dirty_bitmap; 1163 unsigned long *dirty_bitmap;
1162 unsigned long *dirty_bitmap_buffer; 1164 unsigned long *dirty_bitmap_buffer;
1163 1165
1164 r = -EINVAL;
1165 as_id = log->slot >> 16; 1166 as_id = log->slot >> 16;
1166 id = (u16)log->slot; 1167 id = (u16)log->slot;
1167 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) 1168 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1168 goto out; 1169 return -EINVAL;
1169 1170
1170 slots = __kvm_memslots(kvm, as_id); 1171 slots = __kvm_memslots(kvm, as_id);
1171 memslot = id_to_memslot(slots, id); 1172 memslot = id_to_memslot(slots, id);
1172 1173
1173 dirty_bitmap = memslot->dirty_bitmap; 1174 dirty_bitmap = memslot->dirty_bitmap;
1174 r = -ENOENT;
1175 if (!dirty_bitmap) 1175 if (!dirty_bitmap)
1176 goto out; 1176 return -ENOENT;
1177 1177
1178 n = kvm_dirty_bitmap_bytes(memslot); 1178 n = kvm_dirty_bitmap_bytes(memslot);
1179 1179
@@ -1202,14 +1202,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
1202 } 1202 }
1203 1203
1204 spin_unlock(&kvm->mmu_lock); 1204 spin_unlock(&kvm->mmu_lock);
1205
1206 r = -EFAULT;
1207 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 1205 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
1208 goto out; 1206 return -EFAULT;
1209 1207 return 0;
1210 r = 0;
1211out:
1212 return r;
1213} 1208}
1214EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); 1209EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
1215#endif 1210#endif
@@ -1937,10 +1932,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
1937} 1932}
1938EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); 1933EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
1939 1934
1940int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 1935static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
1941 gpa_t gpa, unsigned long len) 1936 struct gfn_to_hva_cache *ghc,
1937 gpa_t gpa, unsigned long len)
1942{ 1938{
1943 struct kvm_memslots *slots = kvm_memslots(kvm);
1944 int offset = offset_in_page(gpa); 1939 int offset = offset_in_page(gpa);
1945 gfn_t start_gfn = gpa >> PAGE_SHIFT; 1940 gfn_t start_gfn = gpa >> PAGE_SHIFT;
1946 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; 1941 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
@@ -1950,7 +1945,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1950 ghc->gpa = gpa; 1945 ghc->gpa = gpa;
1951 ghc->generation = slots->generation; 1946 ghc->generation = slots->generation;
1952 ghc->len = len; 1947 ghc->len = len;
1953 ghc->memslot = gfn_to_memslot(kvm, start_gfn); 1948 ghc->memslot = __gfn_to_memslot(slots, start_gfn);
1954 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); 1949 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
1955 if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { 1950 if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
1956 ghc->hva += offset; 1951 ghc->hva += offset;
@@ -1960,7 +1955,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1960 * verify that the entire region is valid here. 1955 * verify that the entire region is valid here.
1961 */ 1956 */
1962 while (start_gfn <= end_gfn) { 1957 while (start_gfn <= end_gfn) {
1963 ghc->memslot = gfn_to_memslot(kvm, start_gfn); 1958 ghc->memslot = __gfn_to_memslot(slots, start_gfn);
1964 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, 1959 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
1965 &nr_pages_avail); 1960 &nr_pages_avail);
1966 if (kvm_is_error_hva(ghc->hva)) 1961 if (kvm_is_error_hva(ghc->hva))
@@ -1972,22 +1967,29 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1972 } 1967 }
1973 return 0; 1968 return 0;
1974} 1969}
1975EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
1976 1970
1977int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 1971int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
1978 void *data, int offset, unsigned long len) 1972 gpa_t gpa, unsigned long len)
1979{ 1973{
1980 struct kvm_memslots *slots = kvm_memslots(kvm); 1974 struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
1975 return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len);
1976}
1977EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init);
1978
1979int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
1980 void *data, int offset, unsigned long len)
1981{
1982 struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
1981 int r; 1983 int r;
1982 gpa_t gpa = ghc->gpa + offset; 1984 gpa_t gpa = ghc->gpa + offset;
1983 1985
1984 BUG_ON(len + offset > ghc->len); 1986 BUG_ON(len + offset > ghc->len);
1985 1987
1986 if (slots->generation != ghc->generation) 1988 if (slots->generation != ghc->generation)
1987 kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); 1989 __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
1988 1990
1989 if (unlikely(!ghc->memslot)) 1991 if (unlikely(!ghc->memslot))
1990 return kvm_write_guest(kvm, gpa, data, len); 1992 return kvm_vcpu_write_guest(vcpu, gpa, data, len);
1991 1993
1992 if (kvm_is_error_hva(ghc->hva)) 1994 if (kvm_is_error_hva(ghc->hva))
1993 return -EFAULT; 1995 return -EFAULT;
@@ -1999,28 +2001,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1999 2001
2000 return 0; 2002 return 0;
2001} 2003}
2002EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); 2004EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached);
2003 2005
2004int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 2006int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
2005 void *data, unsigned long len) 2007 void *data, unsigned long len)
2006{ 2008{
2007 return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); 2009 return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len);
2008} 2010}
2009EXPORT_SYMBOL_GPL(kvm_write_guest_cached); 2011EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached);
2010 2012
2011int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 2013int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc,
2012 void *data, unsigned long len) 2014 void *data, unsigned long len)
2013{ 2015{
2014 struct kvm_memslots *slots = kvm_memslots(kvm); 2016 struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
2015 int r; 2017 int r;
2016 2018
2017 BUG_ON(len > ghc->len); 2019 BUG_ON(len > ghc->len);
2018 2020
2019 if (slots->generation != ghc->generation) 2021 if (slots->generation != ghc->generation)
2020 kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); 2022 __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
2021 2023
2022 if (unlikely(!ghc->memslot)) 2024 if (unlikely(!ghc->memslot))
2023 return kvm_read_guest(kvm, ghc->gpa, data, len); 2025 return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len);
2024 2026
2025 if (kvm_is_error_hva(ghc->hva)) 2027 if (kvm_is_error_hva(ghc->hva))
2026 return -EFAULT; 2028 return -EFAULT;
@@ -2031,7 +2033,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
2031 2033
2032 return 0; 2034 return 0;
2033} 2035}
2034EXPORT_SYMBOL_GPL(kvm_read_guest_cached); 2036EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached);
2035 2037
2036int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) 2038int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
2037{ 2039{
@@ -3133,10 +3135,9 @@ static long kvm_vm_compat_ioctl(struct file *filp,
3133 struct compat_kvm_dirty_log compat_log; 3135 struct compat_kvm_dirty_log compat_log;
3134 struct kvm_dirty_log log; 3136 struct kvm_dirty_log log;
3135 3137
3136 r = -EFAULT;
3137 if (copy_from_user(&compat_log, (void __user *)arg, 3138 if (copy_from_user(&compat_log, (void __user *)arg,
3138 sizeof(compat_log))) 3139 sizeof(compat_log)))
3139 goto out; 3140 return -EFAULT;
3140 log.slot = compat_log.slot; 3141 log.slot = compat_log.slot;
3141 log.padding1 = compat_log.padding1; 3142 log.padding1 = compat_log.padding1;
3142 log.padding2 = compat_log.padding2; 3143 log.padding2 = compat_log.padding2;
@@ -3148,8 +3149,6 @@ static long kvm_vm_compat_ioctl(struct file *filp,
3148 default: 3149 default:
3149 r = kvm_vm_ioctl(filp, ioctl, arg); 3150 r = kvm_vm_ioctl(filp, ioctl, arg);
3150 } 3151 }
3151
3152out:
3153 return r; 3152 return r;
3154} 3153}
3155#endif 3154#endif