diff options
| -rw-r--r-- | Documentation/virtual/kvm/api.txt | 13 | ||||
| -rw-r--r-- | arch/arm/kvm/arm.c | 4 | ||||
| -rw-r--r-- | arch/mips/kvm/mips.c | 7 | ||||
| -rw-r--r-- | arch/powerpc/kvm/powerpc.c | 6 | ||||
| -rw-r--r-- | arch/s390/kvm/kvm-s390.c | 4 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 5 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 64 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.h | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 55 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 752 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 109 | ||||
| -rw-r--r-- | drivers/ptp/ptp_kvm.c | 7 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 17 | ||||
| -rw-r--r-- | include/uapi/linux/kvm.h | 4 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 113 |
16 files changed, 572 insertions, 594 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e4f2cdcf78eb..069450938b79 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -3389,7 +3389,18 @@ struct kvm_run { | |||
| 3389 | Request that KVM_RUN return when it becomes possible to inject external | 3389 | Request that KVM_RUN return when it becomes possible to inject external |
| 3390 | interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. | 3390 | interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. |
| 3391 | 3391 | ||
| 3392 | __u8 padding1[7]; | 3392 | __u8 immediate_exit; |
| 3393 | |||
| 3394 | This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN | ||
| 3395 | exits immediately, returning -EINTR. In the common scenario where a | ||
| 3396 | signal is used to "kick" a VCPU out of KVM_RUN, this field can be used | ||
| 3397 | to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability. | ||
| 3398 | Rather than blocking the signal outside KVM_RUN, userspace can set up | ||
| 3399 | a signal handler that sets run->immediate_exit to a non-zero value. | ||
| 3400 | |||
| 3401 | This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available. | ||
| 3402 | |||
| 3403 | __u8 padding1[6]; | ||
| 3393 | 3404 | ||
| 3394 | /* out */ | 3405 | /* out */ |
| 3395 | __u32 exit_reason; | 3406 | __u32 exit_reason; |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 21c493a9e5c9..c9a2103faeb9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
| @@ -206,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 206 | case KVM_CAP_ARM_PSCI_0_2: | 206 | case KVM_CAP_ARM_PSCI_0_2: |
| 207 | case KVM_CAP_READONLY_MEM: | 207 | case KVM_CAP_READONLY_MEM: |
| 208 | case KVM_CAP_MP_STATE: | 208 | case KVM_CAP_MP_STATE: |
| 209 | case KVM_CAP_IMMEDIATE_EXIT: | ||
| 209 | r = 1; | 210 | r = 1; |
| 210 | break; | 211 | break; |
| 211 | case KVM_CAP_COALESCED_MMIO: | 212 | case KVM_CAP_COALESCED_MMIO: |
| @@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 604 | return ret; | 605 | return ret; |
| 605 | } | 606 | } |
| 606 | 607 | ||
| 608 | if (run->immediate_exit) | ||
| 609 | return -EINTR; | ||
| 610 | |||
| 607 | if (vcpu->sigset_active) | 611 | if (vcpu->sigset_active) |
| 608 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 612 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| 609 | 613 | ||
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 31ee5ee0010b..ed81e5ac1426 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
| @@ -397,7 +397,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 397 | 397 | ||
| 398 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | 398 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
| 399 | { | 399 | { |
| 400 | int r = 0; | 400 | int r = -EINTR; |
| 401 | sigset_t sigsaved; | 401 | sigset_t sigsaved; |
| 402 | 402 | ||
| 403 | if (vcpu->sigset_active) | 403 | if (vcpu->sigset_active) |
| @@ -409,6 +409,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 409 | vcpu->mmio_needed = 0; | 409 | vcpu->mmio_needed = 0; |
| 410 | } | 410 | } |
| 411 | 411 | ||
| 412 | if (run->immediate_exit) | ||
| 413 | goto out; | ||
| 414 | |||
| 412 | lose_fpu(1); | 415 | lose_fpu(1); |
| 413 | 416 | ||
| 414 | local_irq_disable(); | 417 | local_irq_disable(); |
| @@ -429,6 +432,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 429 | guest_exit_irqoff(); | 432 | guest_exit_irqoff(); |
| 430 | local_irq_enable(); | 433 | local_irq_enable(); |
| 431 | 434 | ||
| 435 | out: | ||
| 432 | if (vcpu->sigset_active) | 436 | if (vcpu->sigset_active) |
| 433 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 437 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 434 | 438 | ||
| @@ -1021,6 +1025,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 1021 | case KVM_CAP_ENABLE_CAP: | 1025 | case KVM_CAP_ENABLE_CAP: |
| 1022 | case KVM_CAP_READONLY_MEM: | 1026 | case KVM_CAP_READONLY_MEM: |
| 1023 | case KVM_CAP_SYNC_MMU: | 1027 | case KVM_CAP_SYNC_MMU: |
| 1028 | case KVM_CAP_IMMEDIATE_EXIT: | ||
| 1024 | r = 1; | 1029 | r = 1; |
| 1025 | break; | 1030 | break; |
| 1026 | case KVM_CAP_COALESCED_MMIO: | 1031 | case KVM_CAP_COALESCED_MMIO: |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index fcb253ba51e5..2b38d824e9e5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 511 | case KVM_CAP_ONE_REG: | 511 | case KVM_CAP_ONE_REG: |
| 512 | case KVM_CAP_IOEVENTFD: | 512 | case KVM_CAP_IOEVENTFD: |
| 513 | case KVM_CAP_DEVICE_CTRL: | 513 | case KVM_CAP_DEVICE_CTRL: |
| 514 | case KVM_CAP_IMMEDIATE_EXIT: | ||
| 514 | r = 1; | 515 | r = 1; |
| 515 | break; | 516 | break; |
| 516 | case KVM_CAP_PPC_PAIRED_SINGLES: | 517 | case KVM_CAP_PPC_PAIRED_SINGLES: |
| @@ -1118,7 +1119,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 1118 | #endif | 1119 | #endif |
| 1119 | } | 1120 | } |
| 1120 | 1121 | ||
| 1121 | r = kvmppc_vcpu_run(run, vcpu); | 1122 | if (run->immediate_exit) |
| 1123 | r = -EINTR; | ||
| 1124 | else | ||
| 1125 | r = kvmppc_vcpu_run(run, vcpu); | ||
| 1122 | 1126 | ||
| 1123 | if (vcpu->sigset_active) | 1127 | if (vcpu->sigset_active) |
| 1124 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 1128 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 502de74ea984..99e35fe0dea8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -370,6 +370,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 370 | case KVM_CAP_S390_IRQCHIP: | 370 | case KVM_CAP_S390_IRQCHIP: |
| 371 | case KVM_CAP_VM_ATTRIBUTES: | 371 | case KVM_CAP_VM_ATTRIBUTES: |
| 372 | case KVM_CAP_MP_STATE: | 372 | case KVM_CAP_MP_STATE: |
| 373 | case KVM_CAP_IMMEDIATE_EXIT: | ||
| 373 | case KVM_CAP_S390_INJECT_IRQ: | 374 | case KVM_CAP_S390_INJECT_IRQ: |
| 374 | case KVM_CAP_S390_USER_SIGP: | 375 | case KVM_CAP_S390_USER_SIGP: |
| 375 | case KVM_CAP_S390_USER_STSI: | 376 | case KVM_CAP_S390_USER_STSI: |
| @@ -2798,6 +2799,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2798 | int rc; | 2799 | int rc; |
| 2799 | sigset_t sigsaved; | 2800 | sigset_t sigsaved; |
| 2800 | 2801 | ||
| 2802 | if (kvm_run->immediate_exit) | ||
| 2803 | return -EINTR; | ||
| 2804 | |||
| 2801 | if (guestdbg_exit_pending(vcpu)) { | 2805 | if (guestdbg_exit_pending(vcpu)) { |
| 2802 | kvm_s390_prepare_debug_exit(vcpu); | 2806 | kvm_s390_prepare_debug_exit(vcpu); |
| 2803 | return 0; | 2807 | return 0; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 417502cf42b6..74ef58c8ff53 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | #define KVM_REQ_TRIPLE_FAULT 10 | 55 | #define KVM_REQ_TRIPLE_FAULT 10 |
| 56 | #define KVM_REQ_MMU_SYNC 11 | 56 | #define KVM_REQ_MMU_SYNC 11 |
| 57 | #define KVM_REQ_CLOCK_UPDATE 12 | 57 | #define KVM_REQ_CLOCK_UPDATE 12 |
| 58 | #define KVM_REQ_DEACTIVATE_FPU 13 | ||
| 59 | #define KVM_REQ_EVENT 14 | 58 | #define KVM_REQ_EVENT 14 |
| 60 | #define KVM_REQ_APF_HALT 15 | 59 | #define KVM_REQ_APF_HALT 15 |
| 61 | #define KVM_REQ_STEAL_UPDATE 16 | 60 | #define KVM_REQ_STEAL_UPDATE 16 |
| @@ -936,8 +935,6 @@ struct kvm_x86_ops { | |||
| 936 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 935 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
| 937 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 936 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
| 938 | u32 (*get_pkru)(struct kvm_vcpu *vcpu); | 937 | u32 (*get_pkru)(struct kvm_vcpu *vcpu); |
| 939 | void (*fpu_activate)(struct kvm_vcpu *vcpu); | ||
| 940 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); | ||
| 941 | 938 | ||
| 942 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 939 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
| 943 | 940 | ||
| @@ -969,7 +966,7 @@ struct kvm_x86_ops { | |||
| 969 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 966 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
| 970 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); | 967 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); |
| 971 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 968 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
| 972 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 969 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
| 973 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 970 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
| 974 | int (*get_tdp_level)(void); | 971 | int (*get_tdp_level)(void); |
| 975 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 972 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c0e2036217ad..1d155cc56629 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
| 123 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) | 123 | if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) |
| 124 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 124 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
| 125 | 125 | ||
| 126 | kvm_x86_ops->fpu_activate(vcpu); | ||
| 127 | |||
| 128 | /* | 126 | /* |
| 129 | * The existing code assumes virtual address is 48-bit in the canonical | 127 | * The existing code assumes virtual address is 48-bit in the canonical |
| 130 | * address checks; exit if it is ever changed. | 128 | * address checks; exit if it is ever changed. |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 33b799fd3a6e..bad6a25067bc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -341,7 +341,7 @@ static int find_highest_vector(void *bitmap) | |||
| 341 | vec >= 0; vec -= APIC_VECTORS_PER_REG) { | 341 | vec >= 0; vec -= APIC_VECTORS_PER_REG) { |
| 342 | reg = bitmap + REG_POS(vec); | 342 | reg = bitmap + REG_POS(vec); |
| 343 | if (*reg) | 343 | if (*reg) |
| 344 | return fls(*reg) - 1 + vec; | 344 | return __fls(*reg) + vec; |
| 345 | } | 345 | } |
| 346 | 346 | ||
| 347 | return -1; | 347 | return -1; |
| @@ -361,27 +361,32 @@ static u8 count_vectors(void *bitmap) | |||
| 361 | return count; | 361 | return count; |
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | void __kvm_apic_update_irr(u32 *pir, void *regs) | 364 | int __kvm_apic_update_irr(u32 *pir, void *regs) |
| 365 | { | 365 | { |
| 366 | u32 i, pir_val; | 366 | u32 i, vec; |
| 367 | u32 pir_val, irr_val; | ||
| 368 | int max_irr = -1; | ||
| 367 | 369 | ||
| 368 | for (i = 0; i <= 7; i++) { | 370 | for (i = vec = 0; i <= 7; i++, vec += 32) { |
| 369 | pir_val = READ_ONCE(pir[i]); | 371 | pir_val = READ_ONCE(pir[i]); |
| 372 | irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); | ||
| 370 | if (pir_val) { | 373 | if (pir_val) { |
| 371 | pir_val = xchg(&pir[i], 0); | 374 | irr_val |= xchg(&pir[i], 0); |
| 372 | *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; | 375 | *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; |
| 373 | } | 376 | } |
| 377 | if (irr_val) | ||
| 378 | max_irr = __fls(irr_val) + vec; | ||
| 374 | } | 379 | } |
| 380 | |||
| 381 | return max_irr; | ||
| 375 | } | 382 | } |
| 376 | EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); | 383 | EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); |
| 377 | 384 | ||
| 378 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | 385 | int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) |
| 379 | { | 386 | { |
| 380 | struct kvm_lapic *apic = vcpu->arch.apic; | 387 | struct kvm_lapic *apic = vcpu->arch.apic; |
| 381 | 388 | ||
| 382 | __kvm_apic_update_irr(pir, apic->regs); | 389 | return __kvm_apic_update_irr(pir, apic->regs); |
| 383 | |||
| 384 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
| 385 | } | 390 | } |
| 386 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | 391 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); |
| 387 | 392 | ||
| @@ -401,8 +406,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
| 401 | if (!apic->irr_pending) | 406 | if (!apic->irr_pending) |
| 402 | return -1; | 407 | return -1; |
| 403 | 408 | ||
| 404 | if (apic->vcpu->arch.apicv_active) | ||
| 405 | kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
| 406 | result = apic_search_irr(apic); | 409 | result = apic_search_irr(apic); |
| 407 | ASSERT(result == -1 || result >= 16); | 410 | ASSERT(result == -1 || result >= 16); |
| 408 | 411 | ||
| @@ -416,9 +419,10 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) | |||
| 416 | vcpu = apic->vcpu; | 419 | vcpu = apic->vcpu; |
| 417 | 420 | ||
| 418 | if (unlikely(vcpu->arch.apicv_active)) { | 421 | if (unlikely(vcpu->arch.apicv_active)) { |
| 419 | /* try to update RVI */ | 422 | /* need to update RVI */ |
| 420 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 423 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
| 421 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 424 | kvm_x86_ops->hwapic_irr_update(vcpu, |
| 425 | apic_find_highest_irr(apic)); | ||
| 422 | } else { | 426 | } else { |
| 423 | apic->irr_pending = false; | 427 | apic->irr_pending = false; |
| 424 | apic_clear_vector(vec, apic->regs + APIC_IRR); | 428 | apic_clear_vector(vec, apic->regs + APIC_IRR); |
| @@ -508,6 +512,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
| 508 | */ | 512 | */ |
| 509 | return apic_find_highest_irr(vcpu->arch.apic); | 513 | return apic_find_highest_irr(vcpu->arch.apic); |
| 510 | } | 514 | } |
| 515 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); | ||
| 511 | 516 | ||
| 512 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 517 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
| 513 | int vector, int level, int trig_mode, | 518 | int vector, int level, int trig_mode, |
| @@ -524,16 +529,14 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | |||
| 524 | 529 | ||
| 525 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | 530 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) |
| 526 | { | 531 | { |
| 527 | 532 | return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val, | |
| 528 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, | 533 | sizeof(val)); |
| 529 | sizeof(val)); | ||
| 530 | } | 534 | } |
| 531 | 535 | ||
| 532 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) | 536 | static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) |
| 533 | { | 537 | { |
| 534 | 538 | return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val, | |
| 535 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, | 539 | sizeof(*val)); |
| 536 | sizeof(*val)); | ||
| 537 | } | 540 | } |
| 538 | 541 | ||
| 539 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) | 542 | static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) |
| @@ -572,7 +575,11 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
| 572 | 575 | ||
| 573 | static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) | 576 | static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) |
| 574 | { | 577 | { |
| 575 | int highest_irr = apic_find_highest_irr(apic); | 578 | int highest_irr; |
| 579 | if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) | ||
| 580 | highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
| 581 | else | ||
| 582 | highest_irr = apic_find_highest_irr(apic); | ||
| 576 | if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) | 583 | if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) |
| 577 | return -1; | 584 | return -1; |
| 578 | return highest_irr; | 585 | return highest_irr; |
| @@ -2204,8 +2211,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) | |||
| 2204 | 1 : count_vectors(apic->regs + APIC_ISR); | 2211 | 1 : count_vectors(apic->regs + APIC_ISR); |
| 2205 | apic->highest_isr_cache = -1; | 2212 | apic->highest_isr_cache = -1; |
| 2206 | if (vcpu->arch.apicv_active) { | 2213 | if (vcpu->arch.apicv_active) { |
| 2207 | if (kvm_x86_ops->apicv_post_state_restore) | 2214 | kvm_x86_ops->apicv_post_state_restore(vcpu); |
| 2208 | kvm_x86_ops->apicv_post_state_restore(vcpu); | ||
| 2209 | kvm_x86_ops->hwapic_irr_update(vcpu, | 2215 | kvm_x86_ops->hwapic_irr_update(vcpu, |
| 2210 | apic_find_highest_irr(apic)); | 2216 | apic_find_highest_irr(apic)); |
| 2211 | kvm_x86_ops->hwapic_isr_update(vcpu, | 2217 | kvm_x86_ops->hwapic_isr_update(vcpu, |
| @@ -2279,8 +2285,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) | |||
| 2279 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) | 2285 | if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) |
| 2280 | return; | 2286 | return; |
| 2281 | 2287 | ||
| 2282 | if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, | 2288 | if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, |
| 2283 | sizeof(u32))) | 2289 | sizeof(u32))) |
| 2284 | return; | 2290 | return; |
| 2285 | 2291 | ||
| 2286 | apic_set_tpr(vcpu->arch.apic, data & 0xff); | 2292 | apic_set_tpr(vcpu->arch.apic, data & 0xff); |
| @@ -2332,14 +2338,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) | |||
| 2332 | max_isr = 0; | 2338 | max_isr = 0; |
| 2333 | data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); | 2339 | data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); |
| 2334 | 2340 | ||
| 2335 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, | 2341 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, |
| 2336 | sizeof(u32)); | 2342 | sizeof(u32)); |
| 2337 | } | 2343 | } |
| 2338 | 2344 | ||
| 2339 | int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) | 2345 | int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) |
| 2340 | { | 2346 | { |
| 2341 | if (vapic_addr) { | 2347 | if (vapic_addr) { |
| 2342 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, | 2348 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, |
| 2343 | &vcpu->arch.apic->vapic_cache, | 2349 | &vcpu->arch.apic->vapic_cache, |
| 2344 | vapic_addr, sizeof(u32))) | 2350 | vapic_addr, sizeof(u32))) |
| 2345 | return -EINVAL; | 2351 | return -EINVAL; |
| @@ -2433,7 +2439,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
| 2433 | vcpu->arch.pv_eoi.msr_val = data; | 2439 | vcpu->arch.pv_eoi.msr_val = data; |
| 2434 | if (!pv_eoi_enabled(vcpu)) | 2440 | if (!pv_eoi_enabled(vcpu)) |
| 2435 | return 0; | 2441 | return 0; |
| 2436 | return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, | 2442 | return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data, |
| 2437 | addr, sizeof(u8)); | 2443 | addr, sizeof(u8)); |
| 2438 | } | 2444 | } |
| 2439 | 2445 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 05abd837b78a..bcbe811f3b97 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -71,8 +71,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
| 71 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 71 | bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 72 | int short_hand, unsigned int dest, int dest_mode); | 72 | int short_hand, unsigned int dest, int dest_mode); |
| 73 | 73 | ||
| 74 | void __kvm_apic_update_irr(u32 *pir, void *regs); | 74 | int __kvm_apic_update_irr(u32 *pir, void *regs); |
| 75 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | 75 | int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); |
| 76 | void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); | 76 | void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); |
| 77 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, | 77 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
| 78 | struct dest_map *dest_map); | 78 | struct dest_map *dest_map); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d0414f054bdf..d1efe2c62b3f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -971,8 +971,8 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) | |||
| 971 | * a particular vCPU. | 971 | * a particular vCPU. |
| 972 | */ | 972 | */ |
| 973 | #define SVM_VM_DATA_HASH_BITS 8 | 973 | #define SVM_VM_DATA_HASH_BITS 8 |
| 974 | DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); | 974 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); |
| 975 | static spinlock_t svm_vm_data_hash_lock; | 975 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); |
| 976 | 976 | ||
| 977 | /* Note: | 977 | /* Note: |
| 978 | * This function is called from IOMMU driver to notify | 978 | * This function is called from IOMMU driver to notify |
| @@ -1077,8 +1077,6 @@ static __init int svm_hardware_setup(void) | |||
| 1077 | } else { | 1077 | } else { |
| 1078 | pr_info("AVIC enabled\n"); | 1078 | pr_info("AVIC enabled\n"); |
| 1079 | 1079 | ||
| 1080 | hash_init(svm_vm_data_hash); | ||
| 1081 | spin_lock_init(&svm_vm_data_hash_lock); | ||
| 1082 | amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); | 1080 | amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); |
| 1083 | } | 1081 | } |
| 1084 | } | 1082 | } |
| @@ -1159,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1159 | struct vmcb_control_area *control = &svm->vmcb->control; | 1157 | struct vmcb_control_area *control = &svm->vmcb->control; |
| 1160 | struct vmcb_save_area *save = &svm->vmcb->save; | 1158 | struct vmcb_save_area *save = &svm->vmcb->save; |
| 1161 | 1159 | ||
| 1162 | svm->vcpu.fpu_active = 1; | ||
| 1163 | svm->vcpu.arch.hflags = 0; | 1160 | svm->vcpu.arch.hflags = 0; |
| 1164 | 1161 | ||
| 1165 | set_cr_intercept(svm, INTERCEPT_CR0_READ); | 1162 | set_cr_intercept(svm, INTERCEPT_CR0_READ); |
| @@ -1901,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm) | |||
| 1901 | ulong gcr0 = svm->vcpu.arch.cr0; | 1898 | ulong gcr0 = svm->vcpu.arch.cr0; |
| 1902 | u64 *hcr0 = &svm->vmcb->save.cr0; | 1899 | u64 *hcr0 = &svm->vmcb->save.cr0; |
| 1903 | 1900 | ||
| 1904 | if (!svm->vcpu.fpu_active) | 1901 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) |
| 1905 | *hcr0 |= SVM_CR0_SELECTIVE_MASK; | 1902 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); |
| 1906 | else | ||
| 1907 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | ||
| 1908 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | ||
| 1909 | 1903 | ||
| 1910 | mark_dirty(svm->vmcb, VMCB_CR); | 1904 | mark_dirty(svm->vmcb, VMCB_CR); |
| 1911 | 1905 | ||
| 1912 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | 1906 | if (gcr0 == *hcr0) { |
| 1913 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); | 1907 | clr_cr_intercept(svm, INTERCEPT_CR0_READ); |
| 1914 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); | 1908 | clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); |
| 1915 | } else { | 1909 | } else { |
| @@ -1940,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 1940 | if (!npt_enabled) | 1934 | if (!npt_enabled) |
| 1941 | cr0 |= X86_CR0_PG | X86_CR0_WP; | 1935 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
| 1942 | 1936 | ||
| 1943 | if (!vcpu->fpu_active) | ||
| 1944 | cr0 |= X86_CR0_TS; | ||
| 1945 | /* | 1937 | /* |
| 1946 | * re-enable caching here because the QEMU bios | 1938 | * re-enable caching here because the QEMU bios |
| 1947 | * does not do it - this results in some delay at | 1939 | * does not do it - this results in some delay at |
| @@ -2160,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm) | |||
| 2160 | return 1; | 2152 | return 1; |
| 2161 | } | 2153 | } |
| 2162 | 2154 | ||
| 2163 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) | ||
| 2164 | { | ||
| 2165 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2166 | |||
| 2167 | clr_exception_intercept(svm, NM_VECTOR); | ||
| 2168 | |||
| 2169 | svm->vcpu.fpu_active = 1; | ||
| 2170 | update_cr0_intercept(svm); | ||
| 2171 | } | ||
| 2172 | |||
| 2173 | static int nm_interception(struct vcpu_svm *svm) | ||
| 2174 | { | ||
| 2175 | svm_fpu_activate(&svm->vcpu); | ||
| 2176 | return 1; | ||
| 2177 | } | ||
| 2178 | |||
| 2179 | static bool is_erratum_383(void) | 2155 | static bool is_erratum_383(void) |
| 2180 | { | 2156 | { |
| 2181 | int err, i; | 2157 | int err, i; |
| @@ -2573,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) | |||
| 2573 | if (!npt_enabled && svm->apf_reason == 0) | 2549 | if (!npt_enabled && svm->apf_reason == 0) |
| 2574 | return NESTED_EXIT_HOST; | 2550 | return NESTED_EXIT_HOST; |
| 2575 | break; | 2551 | break; |
| 2576 | case SVM_EXIT_EXCP_BASE + NM_VECTOR: | ||
| 2577 | nm_interception(svm); | ||
| 2578 | break; | ||
| 2579 | default: | 2552 | default: |
| 2580 | break; | 2553 | break; |
| 2581 | } | 2554 | } |
| @@ -4020,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 4020 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 3993 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
| 4021 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, | 3994 | [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, |
| 4022 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 3995 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
| 4023 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | ||
| 4024 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 3996 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
| 4025 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, | 3997 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, |
| 4026 | [SVM_EXIT_INTR] = intr_interception, | 3998 | [SVM_EXIT_INTR] = intr_interception, |
| @@ -4359,11 +4331,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | |||
| 4359 | return; | 4331 | return; |
| 4360 | } | 4332 | } |
| 4361 | 4333 | ||
| 4362 | static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
| 4363 | { | ||
| 4364 | return; | ||
| 4365 | } | ||
| 4366 | |||
| 4367 | static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) | 4334 | static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) |
| 4368 | { | 4335 | { |
| 4369 | kvm_lapic_set_irr(vec, vcpu->arch.apic); | 4336 | kvm_lapic_set_irr(vec, vcpu->arch.apic); |
| @@ -5079,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void) | |||
| 5079 | return true; | 5046 | return true; |
| 5080 | } | 5047 | } |
| 5081 | 5048 | ||
| 5082 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
| 5083 | { | ||
| 5084 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 5085 | |||
| 5086 | set_exception_intercept(svm, NM_VECTOR); | ||
| 5087 | update_cr0_intercept(svm); | ||
| 5088 | } | ||
| 5089 | |||
| 5090 | #define PRE_EX(exit) { .exit_code = (exit), \ | 5049 | #define PRE_EX(exit) { .exit_code = (exit), \ |
| 5091 | .stage = X86_ICPT_PRE_EXCEPT, } | 5050 | .stage = X86_ICPT_PRE_EXCEPT, } |
| 5092 | #define POST_EX(exit) { .exit_code = (exit), \ | 5051 | #define POST_EX(exit) { .exit_code = (exit), \ |
| @@ -5347,9 +5306,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
| 5347 | 5306 | ||
| 5348 | .get_pkru = svm_get_pkru, | 5307 | .get_pkru = svm_get_pkru, |
| 5349 | 5308 | ||
| 5350 | .fpu_activate = svm_fpu_activate, | ||
| 5351 | .fpu_deactivate = svm_fpu_deactivate, | ||
| 5352 | |||
| 5353 | .tlb_flush = svm_flush_tlb, | 5309 | .tlb_flush = svm_flush_tlb, |
| 5354 | 5310 | ||
| 5355 | .run = svm_vcpu_run, | 5311 | .run = svm_vcpu_run, |
| @@ -5373,7 +5329,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
| 5373 | .get_enable_apicv = svm_get_enable_apicv, | 5329 | .get_enable_apicv = svm_get_enable_apicv, |
| 5374 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, | 5330 | .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, |
| 5375 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 5331 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
| 5376 | .sync_pir_to_irr = svm_sync_pir_to_irr, | ||
| 5377 | .hwapic_irr_update = svm_hwapic_irr_update, | 5332 | .hwapic_irr_update = svm_hwapic_irr_update, |
| 5378 | .hwapic_isr_update = svm_hwapic_isr_update, | 5333 | .hwapic_isr_update = svm_hwapic_isr_update, |
| 5379 | .apicv_post_state_restore = avic_post_state_restore, | 5334 | .apicv_post_state_restore = avic_post_state_restore, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c3e42623090..9856b73a21ad 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -1856,7 +1856,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 1856 | u32 eb; | 1856 | u32 eb; |
| 1857 | 1857 | ||
| 1858 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | | 1858 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
| 1859 | (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); | 1859 | (1u << DB_VECTOR) | (1u << AC_VECTOR); |
| 1860 | if ((vcpu->guest_debug & | 1860 | if ((vcpu->guest_debug & |
| 1861 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == | 1861 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
| 1862 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) | 1862 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
| @@ -1865,8 +1865,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 1865 | eb = ~0; | 1865 | eb = ~0; |
| 1866 | if (enable_ept) | 1866 | if (enable_ept) |
| 1867 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 1867 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
| 1868 | if (vcpu->fpu_active) | ||
| 1869 | eb &= ~(1u << NM_VECTOR); | ||
| 1870 | 1868 | ||
| 1871 | /* When we are running a nested L2 guest and L1 specified for it a | 1869 | /* When we are running a nested L2 guest and L1 specified for it a |
| 1872 | * certain exception bitmap, we must trap the same exceptions and pass | 1870 | * certain exception bitmap, we must trap the same exceptions and pass |
| @@ -2340,25 +2338,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 2340 | } | 2338 | } |
| 2341 | } | 2339 | } |
| 2342 | 2340 | ||
| 2343 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | ||
| 2344 | { | ||
| 2345 | ulong cr0; | ||
| 2346 | |||
| 2347 | if (vcpu->fpu_active) | ||
| 2348 | return; | ||
| 2349 | vcpu->fpu_active = 1; | ||
| 2350 | cr0 = vmcs_readl(GUEST_CR0); | ||
| 2351 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); | ||
| 2352 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); | ||
| 2353 | vmcs_writel(GUEST_CR0, cr0); | ||
| 2354 | update_exception_bitmap(vcpu); | ||
| 2355 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
| 2356 | if (is_guest_mode(vcpu)) | ||
| 2357 | vcpu->arch.cr0_guest_owned_bits &= | ||
| 2358 | ~get_vmcs12(vcpu)->cr0_guest_host_mask; | ||
| 2359 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
| 2360 | } | ||
| 2361 | |||
| 2362 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | 2341 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); |
| 2363 | 2342 | ||
| 2364 | /* | 2343 | /* |
| @@ -2377,33 +2356,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields) | |||
| 2377 | (fields->cr4_read_shadow & fields->cr4_guest_host_mask); | 2356 | (fields->cr4_read_shadow & fields->cr4_guest_host_mask); |
| 2378 | } | 2357 | } |
| 2379 | 2358 | ||
| 2380 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
| 2381 | { | ||
| 2382 | /* Note that there is no vcpu->fpu_active = 0 here. The caller must | ||
| 2383 | * set this *before* calling this function. | ||
| 2384 | */ | ||
| 2385 | vmx_decache_cr0_guest_bits(vcpu); | ||
| 2386 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); | ||
| 2387 | update_exception_bitmap(vcpu); | ||
| 2388 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
| 2389 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
| 2390 | if (is_guest_mode(vcpu)) { | ||
| 2391 | /* | ||
| 2392 | * L1's specified read shadow might not contain the TS bit, | ||
| 2393 | * so now that we turned on shadowing of this bit, we need to | ||
| 2394 | * set this bit of the shadow. Like in nested_vmx_run we need | ||
| 2395 | * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet | ||
| 2396 | * up-to-date here because we just decached cr0.TS (and we'll | ||
| 2397 | * only update vmcs12->guest_cr0 on nested exit). | ||
| 2398 | */ | ||
| 2399 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 2400 | vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) | | ||
| 2401 | (vcpu->arch.cr0 & X86_CR0_TS); | ||
| 2402 | vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); | ||
| 2403 | } else | ||
| 2404 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
| 2405 | } | ||
| 2406 | |||
| 2407 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 2359 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
| 2408 | { | 2360 | { |
| 2409 | unsigned long rflags, save_rflags; | 2361 | unsigned long rflags, save_rflags; |
| @@ -4232,9 +4184,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 4232 | if (enable_ept) | 4184 | if (enable_ept) |
| 4233 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 4185 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
| 4234 | 4186 | ||
| 4235 | if (!vcpu->fpu_active) | ||
| 4236 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
| 4237 | |||
| 4238 | vmcs_writel(CR0_READ_SHADOW, cr0); | 4187 | vmcs_writel(CR0_READ_SHADOW, cr0); |
| 4239 | vmcs_writel(GUEST_CR0, hw_cr0); | 4188 | vmcs_writel(GUEST_CR0, hw_cr0); |
| 4240 | vcpu->arch.cr0 = cr0; | 4189 | vcpu->arch.cr0 = cr0; |
| @@ -5051,26 +5000,12 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | |||
| 5051 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | 5000 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) |
| 5052 | return; | 5001 | return; |
| 5053 | 5002 | ||
| 5054 | r = pi_test_and_set_on(&vmx->pi_desc); | 5003 | /* If a previous notification has sent the IPI, nothing to do. */ |
| 5055 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5004 | if (pi_test_and_set_on(&vmx->pi_desc)) |
| 5056 | if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu)) | ||
| 5057 | kvm_vcpu_kick(vcpu); | ||
| 5058 | } | ||
| 5059 | |||
| 5060 | static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
| 5061 | { | ||
| 5062 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 5063 | |||
| 5064 | if (!pi_test_on(&vmx->pi_desc)) | ||
| 5065 | return; | 5005 | return; |
| 5066 | 5006 | ||
| 5067 | pi_clear_on(&vmx->pi_desc); | 5007 | if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) |
| 5068 | /* | 5008 | kvm_vcpu_kick(vcpu); |
| 5069 | * IOMMU can write to PIR.ON, so the barrier matters even on UP. | ||
| 5070 | * But on x86 this is just a compiler barrier anyway. | ||
| 5071 | */ | ||
| 5072 | smp_mb__after_atomic(); | ||
| 5073 | kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
| 5074 | } | 5009 | } |
| 5075 | 5010 | ||
| 5076 | /* | 5011 | /* |
| @@ -5335,7 +5270,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 5335 | /* 22.2.1, 20.8.1 */ | 5270 | /* 22.2.1, 20.8.1 */ |
| 5336 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); | 5271 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); |
| 5337 | 5272 | ||
| 5338 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 5273 | vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; |
| 5274 | vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); | ||
| 5275 | |||
| 5339 | set_cr4_guest_host_mask(vmx); | 5276 | set_cr4_guest_host_mask(vmx); |
| 5340 | 5277 | ||
| 5341 | if (vmx_xsaves_supported()) | 5278 | if (vmx_xsaves_supported()) |
| @@ -5439,7 +5376,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
| 5439 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ | 5376 | vmx_set_cr0(vcpu, cr0); /* enter rmode */ |
| 5440 | vmx_set_cr4(vcpu, 0); | 5377 | vmx_set_cr4(vcpu, 0); |
| 5441 | vmx_set_efer(vcpu, 0); | 5378 | vmx_set_efer(vcpu, 0); |
| 5442 | vmx_fpu_activate(vcpu); | 5379 | |
| 5443 | update_exception_bitmap(vcpu); | 5380 | update_exception_bitmap(vcpu); |
| 5444 | 5381 | ||
| 5445 | vpid_sync_context(vmx->vpid); | 5382 | vpid_sync_context(vmx->vpid); |
| @@ -5473,26 +5410,20 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) | |||
| 5473 | 5410 | ||
| 5474 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 5411 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
| 5475 | { | 5412 | { |
| 5476 | u32 cpu_based_vm_exec_control; | 5413 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, |
| 5477 | 5414 | CPU_BASED_VIRTUAL_INTR_PENDING); | |
| 5478 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
| 5479 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
| 5480 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
| 5481 | } | 5415 | } |
| 5482 | 5416 | ||
| 5483 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 5417 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
| 5484 | { | 5418 | { |
| 5485 | u32 cpu_based_vm_exec_control; | ||
| 5486 | |||
| 5487 | if (!cpu_has_virtual_nmis() || | 5419 | if (!cpu_has_virtual_nmis() || |
| 5488 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | 5420 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { |
| 5489 | enable_irq_window(vcpu); | 5421 | enable_irq_window(vcpu); |
| 5490 | return; | 5422 | return; |
| 5491 | } | 5423 | } |
| 5492 | 5424 | ||
| 5493 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5425 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, |
| 5494 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 5426 | CPU_BASED_VIRTUAL_NMI_PENDING); |
| 5495 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
| 5496 | } | 5427 | } |
| 5497 | 5428 | ||
| 5498 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 5429 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
| @@ -5718,11 +5649,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
| 5718 | if (is_nmi(intr_info)) | 5649 | if (is_nmi(intr_info)) |
| 5719 | return 1; /* already handled by vmx_vcpu_run() */ | 5650 | return 1; /* already handled by vmx_vcpu_run() */ |
| 5720 | 5651 | ||
| 5721 | if (is_no_device(intr_info)) { | ||
| 5722 | vmx_fpu_activate(vcpu); | ||
| 5723 | return 1; | ||
| 5724 | } | ||
| 5725 | |||
| 5726 | if (is_invalid_opcode(intr_info)) { | 5652 | if (is_invalid_opcode(intr_info)) { |
| 5727 | if (is_guest_mode(vcpu)) { | 5653 | if (is_guest_mode(vcpu)) { |
| 5728 | kvm_queue_exception(vcpu, UD_VECTOR); | 5654 | kvm_queue_exception(vcpu, UD_VECTOR); |
| @@ -5912,22 +5838,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | |||
| 5912 | return kvm_set_cr4(vcpu, val); | 5838 | return kvm_set_cr4(vcpu, val); |
| 5913 | } | 5839 | } |
| 5914 | 5840 | ||
| 5915 | /* called to set cr0 as appropriate for clts instruction exit. */ | ||
| 5916 | static void handle_clts(struct kvm_vcpu *vcpu) | ||
| 5917 | { | ||
| 5918 | if (is_guest_mode(vcpu)) { | ||
| 5919 | /* | ||
| 5920 | * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS | ||
| 5921 | * but we did (!fpu_active). We need to keep GUEST_CR0.TS on, | ||
| 5922 | * just pretend it's off (also in arch.cr0 for fpu_activate). | ||
| 5923 | */ | ||
| 5924 | vmcs_writel(CR0_READ_SHADOW, | ||
| 5925 | vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS); | ||
| 5926 | vcpu->arch.cr0 &= ~X86_CR0_TS; | ||
| 5927 | } else | ||
| 5928 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | ||
| 5929 | } | ||
| 5930 | |||
| 5931 | static int handle_cr(struct kvm_vcpu *vcpu) | 5841 | static int handle_cr(struct kvm_vcpu *vcpu) |
| 5932 | { | 5842 | { |
| 5933 | unsigned long exit_qualification, val; | 5843 | unsigned long exit_qualification, val; |
| @@ -5973,9 +5883,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 5973 | } | 5883 | } |
| 5974 | break; | 5884 | break; |
| 5975 | case 2: /* clts */ | 5885 | case 2: /* clts */ |
| 5976 | handle_clts(vcpu); | 5886 | WARN_ONCE(1, "Guest should always own CR0.TS"); |
| 5887 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); | ||
| 5977 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); | 5888 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
| 5978 | vmx_fpu_activate(vcpu); | ||
| 5979 | return kvm_skip_emulated_instruction(vcpu); | 5889 | return kvm_skip_emulated_instruction(vcpu); |
| 5980 | case 1: /*mov from cr*/ | 5890 | case 1: /*mov from cr*/ |
| 5981 | switch (cr) { | 5891 | switch (cr) { |
| @@ -6151,12 +6061,8 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) | |||
| 6151 | 6061 | ||
| 6152 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) | 6062 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
| 6153 | { | 6063 | { |
| 6154 | u32 cpu_based_vm_exec_control; | 6064 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, |
| 6155 | 6065 | CPU_BASED_VIRTUAL_INTR_PENDING); | |
| 6156 | /* clear pending irq */ | ||
| 6157 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
| 6158 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
| 6159 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
| 6160 | 6066 | ||
| 6161 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6067 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 6162 | 6068 | ||
| @@ -6382,6 +6288,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
| 6382 | EPT_VIOLATION_EXECUTABLE)) | 6288 | EPT_VIOLATION_EXECUTABLE)) |
| 6383 | ? PFERR_PRESENT_MASK : 0; | 6289 | ? PFERR_PRESENT_MASK : 0; |
| 6384 | 6290 | ||
| 6291 | vcpu->arch.gpa_available = true; | ||
| 6385 | vcpu->arch.exit_qualification = exit_qualification; | 6292 | vcpu->arch.exit_qualification = exit_qualification; |
| 6386 | 6293 | ||
| 6387 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 6294 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
| @@ -6399,6 +6306,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
| 6399 | } | 6306 | } |
| 6400 | 6307 | ||
| 6401 | ret = handle_mmio_page_fault(vcpu, gpa, true); | 6308 | ret = handle_mmio_page_fault(vcpu, gpa, true); |
| 6309 | vcpu->arch.gpa_available = true; | ||
| 6402 | if (likely(ret == RET_MMIO_PF_EMULATE)) | 6310 | if (likely(ret == RET_MMIO_PF_EMULATE)) |
| 6403 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | 6311 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == |
| 6404 | EMULATE_DONE; | 6312 | EMULATE_DONE; |
| @@ -6420,12 +6328,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
| 6420 | 6328 | ||
| 6421 | static int handle_nmi_window(struct kvm_vcpu *vcpu) | 6329 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
| 6422 | { | 6330 | { |
| 6423 | u32 cpu_based_vm_exec_control; | 6331 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, |
| 6424 | 6332 | CPU_BASED_VIRTUAL_NMI_PENDING); | |
| 6425 | /* clear pending NMI */ | ||
| 6426 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
| 6427 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | ||
| 6428 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
| 6429 | ++vcpu->stat.nmi_window_exits; | 6333 | ++vcpu->stat.nmi_window_exits; |
| 6430 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6334 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 6431 | 6335 | ||
| @@ -6663,8 +6567,10 @@ static __init int hardware_setup(void) | |||
| 6663 | if (!cpu_has_vmx_ple()) | 6567 | if (!cpu_has_vmx_ple()) |
| 6664 | ple_gap = 0; | 6568 | ple_gap = 0; |
| 6665 | 6569 | ||
| 6666 | if (!cpu_has_vmx_apicv()) | 6570 | if (!cpu_has_vmx_apicv()) { |
| 6667 | enable_apicv = 0; | 6571 | enable_apicv = 0; |
| 6572 | kvm_x86_ops->sync_pir_to_irr = NULL; | ||
| 6573 | } | ||
| 6668 | 6574 | ||
| 6669 | if (cpu_has_vmx_tsc_scaling()) { | 6575 | if (cpu_has_vmx_tsc_scaling()) { |
| 6670 | kvm_has_tsc_control = true; | 6576 | kvm_has_tsc_control = true; |
| @@ -7134,6 +7040,53 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
| 7134 | return 0; | 7040 | return 0; |
| 7135 | } | 7041 | } |
| 7136 | 7042 | ||
| 7043 | static int enter_vmx_operation(struct kvm_vcpu *vcpu) | ||
| 7044 | { | ||
| 7045 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 7046 | struct vmcs *shadow_vmcs; | ||
| 7047 | |||
| 7048 | if (cpu_has_vmx_msr_bitmap()) { | ||
| 7049 | vmx->nested.msr_bitmap = | ||
| 7050 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 7051 | if (!vmx->nested.msr_bitmap) | ||
| 7052 | goto out_msr_bitmap; | ||
| 7053 | } | ||
| 7054 | |||
| 7055 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
| 7056 | if (!vmx->nested.cached_vmcs12) | ||
| 7057 | goto out_cached_vmcs12; | ||
| 7058 | |||
| 7059 | if (enable_shadow_vmcs) { | ||
| 7060 | shadow_vmcs = alloc_vmcs(); | ||
| 7061 | if (!shadow_vmcs) | ||
| 7062 | goto out_shadow_vmcs; | ||
| 7063 | /* mark vmcs as shadow */ | ||
| 7064 | shadow_vmcs->revision_id |= (1u << 31); | ||
| 7065 | /* init shadow vmcs */ | ||
| 7066 | vmcs_clear(shadow_vmcs); | ||
| 7067 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
| 7068 | } | ||
| 7069 | |||
| 7070 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
| 7071 | vmx->nested.vmcs02_num = 0; | ||
| 7072 | |||
| 7073 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
| 7074 | HRTIMER_MODE_REL_PINNED); | ||
| 7075 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
| 7076 | |||
| 7077 | vmx->nested.vmxon = true; | ||
| 7078 | return 0; | ||
| 7079 | |||
| 7080 | out_shadow_vmcs: | ||
| 7081 | kfree(vmx->nested.cached_vmcs12); | ||
| 7082 | |||
| 7083 | out_cached_vmcs12: | ||
| 7084 | free_page((unsigned long)vmx->nested.msr_bitmap); | ||
| 7085 | |||
| 7086 | out_msr_bitmap: | ||
| 7087 | return -ENOMEM; | ||
| 7088 | } | ||
| 7089 | |||
| 7137 | /* | 7090 | /* |
| 7138 | * Emulate the VMXON instruction. | 7091 | * Emulate the VMXON instruction. |
| 7139 | * Currently, we just remember that VMX is active, and do not save or even | 7092 | * Currently, we just remember that VMX is active, and do not save or even |
| @@ -7144,9 +7097,9 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
| 7144 | */ | 7097 | */ |
| 7145 | static int handle_vmon(struct kvm_vcpu *vcpu) | 7098 | static int handle_vmon(struct kvm_vcpu *vcpu) |
| 7146 | { | 7099 | { |
| 7100 | int ret; | ||
| 7147 | struct kvm_segment cs; | 7101 | struct kvm_segment cs; |
| 7148 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7102 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 7149 | struct vmcs *shadow_vmcs; | ||
| 7150 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | 7103 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED |
| 7151 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | 7104 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
| 7152 | 7105 | ||
| @@ -7186,49 +7139,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
| 7186 | 7139 | ||
| 7187 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) | 7140 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) |
| 7188 | return 1; | 7141 | return 1; |
| 7189 | 7142 | ||
| 7190 | if (cpu_has_vmx_msr_bitmap()) { | 7143 | ret = enter_vmx_operation(vcpu); |
| 7191 | vmx->nested.msr_bitmap = | 7144 | if (ret) |
| 7192 | (unsigned long *)__get_free_page(GFP_KERNEL); | 7145 | return ret; |
| 7193 | if (!vmx->nested.msr_bitmap) | ||
| 7194 | goto out_msr_bitmap; | ||
| 7195 | } | ||
| 7196 | |||
| 7197 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
| 7198 | if (!vmx->nested.cached_vmcs12) | ||
| 7199 | goto out_cached_vmcs12; | ||
| 7200 | |||
| 7201 | if (enable_shadow_vmcs) { | ||
| 7202 | shadow_vmcs = alloc_vmcs(); | ||
| 7203 | if (!shadow_vmcs) | ||
| 7204 | goto out_shadow_vmcs; | ||
| 7205 | /* mark vmcs as shadow */ | ||
| 7206 | shadow_vmcs->revision_id |= (1u << 31); | ||
| 7207 | /* init shadow vmcs */ | ||
| 7208 | vmcs_clear(shadow_vmcs); | ||
| 7209 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
| 7210 | } | ||
| 7211 | |||
| 7212 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
| 7213 | vmx->nested.vmcs02_num = 0; | ||
| 7214 | |||
| 7215 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
| 7216 | HRTIMER_MODE_REL_PINNED); | ||
| 7217 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
| 7218 | |||
| 7219 | vmx->nested.vmxon = true; | ||
| 7220 | 7146 | ||
| 7221 | nested_vmx_succeed(vcpu); | 7147 | nested_vmx_succeed(vcpu); |
| 7222 | return kvm_skip_emulated_instruction(vcpu); | 7148 | return kvm_skip_emulated_instruction(vcpu); |
| 7223 | |||
| 7224 | out_shadow_vmcs: | ||
| 7225 | kfree(vmx->nested.cached_vmcs12); | ||
| 7226 | |||
| 7227 | out_cached_vmcs12: | ||
| 7228 | free_page((unsigned long)vmx->nested.msr_bitmap); | ||
| 7229 | |||
| 7230 | out_msr_bitmap: | ||
| 7231 | return -ENOMEM; | ||
| 7232 | } | 7149 | } |
| 7233 | 7150 | ||
| 7234 | /* | 7151 | /* |
| @@ -7677,6 +7594,18 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
| 7677 | return kvm_skip_emulated_instruction(vcpu); | 7594 | return kvm_skip_emulated_instruction(vcpu); |
| 7678 | } | 7595 | } |
| 7679 | 7596 | ||
| 7597 | static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
| 7598 | { | ||
| 7599 | vmx->nested.current_vmptr = vmptr; | ||
| 7600 | if (enable_shadow_vmcs) { | ||
| 7601 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
| 7602 | SECONDARY_EXEC_SHADOW_VMCS); | ||
| 7603 | vmcs_write64(VMCS_LINK_POINTER, | ||
| 7604 | __pa(vmx->vmcs01.shadow_vmcs)); | ||
| 7605 | vmx->nested.sync_shadow_vmcs = true; | ||
| 7606 | } | ||
| 7607 | } | ||
| 7608 | |||
| 7680 | /* Emulate the VMPTRLD instruction */ | 7609 | /* Emulate the VMPTRLD instruction */ |
| 7681 | static int handle_vmptrld(struct kvm_vcpu *vcpu) | 7610 | static int handle_vmptrld(struct kvm_vcpu *vcpu) |
| 7682 | { | 7611 | { |
| @@ -7707,7 +7636,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 7707 | } | 7636 | } |
| 7708 | 7637 | ||
| 7709 | nested_release_vmcs12(vmx); | 7638 | nested_release_vmcs12(vmx); |
| 7710 | vmx->nested.current_vmptr = vmptr; | ||
| 7711 | vmx->nested.current_vmcs12 = new_vmcs12; | 7639 | vmx->nested.current_vmcs12 = new_vmcs12; |
| 7712 | vmx->nested.current_vmcs12_page = page; | 7640 | vmx->nested.current_vmcs12_page = page; |
| 7713 | /* | 7641 | /* |
| @@ -7716,14 +7644,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 7716 | */ | 7644 | */ |
| 7717 | memcpy(vmx->nested.cached_vmcs12, | 7645 | memcpy(vmx->nested.cached_vmcs12, |
| 7718 | vmx->nested.current_vmcs12, VMCS12_SIZE); | 7646 | vmx->nested.current_vmcs12, VMCS12_SIZE); |
| 7719 | 7647 | set_current_vmptr(vmx, vmptr); | |
| 7720 | if (enable_shadow_vmcs) { | ||
| 7721 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
| 7722 | SECONDARY_EXEC_SHADOW_VMCS); | ||
| 7723 | vmcs_write64(VMCS_LINK_POINTER, | ||
| 7724 | __pa(vmx->vmcs01.shadow_vmcs)); | ||
| 7725 | vmx->nested.sync_shadow_vmcs = true; | ||
| 7726 | } | ||
| 7727 | } | 7648 | } |
| 7728 | 7649 | ||
| 7729 | nested_vmx_succeed(vcpu); | 7650 | nested_vmx_succeed(vcpu); |
| @@ -8517,6 +8438,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 8517 | u32 vectoring_info = vmx->idt_vectoring_info; | 8438 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 8518 | 8439 | ||
| 8519 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | 8440 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
| 8441 | vcpu->arch.gpa_available = false; | ||
| 8520 | 8442 | ||
| 8521 | /* | 8443 | /* |
| 8522 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more | 8444 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more |
| @@ -8735,6 +8657,27 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
| 8735 | } | 8657 | } |
| 8736 | } | 8658 | } |
| 8737 | 8659 | ||
| 8660 | static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
| 8661 | { | ||
| 8662 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 8663 | int max_irr; | ||
| 8664 | |||
| 8665 | WARN_ON(!vcpu->arch.apicv_active); | ||
| 8666 | if (pi_test_on(&vmx->pi_desc)) { | ||
| 8667 | pi_clear_on(&vmx->pi_desc); | ||
| 8668 | /* | ||
| 8669 | * IOMMU can write to PIR.ON, so the barrier matters even on UP. | ||
| 8670 | * But on x86 this is just a compiler barrier anyway. | ||
| 8671 | */ | ||
| 8672 | smp_mb__after_atomic(); | ||
| 8673 | max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
| 8674 | } else { | ||
| 8675 | max_irr = kvm_lapic_find_highest_irr(vcpu); | ||
| 8676 | } | ||
| 8677 | vmx_hwapic_irr_update(vcpu, max_irr); | ||
| 8678 | return max_irr; | ||
| 8679 | } | ||
| 8680 | |||
| 8738 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 8681 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
| 8739 | { | 8682 | { |
| 8740 | if (!kvm_vcpu_apicv_active(vcpu)) | 8683 | if (!kvm_vcpu_apicv_active(vcpu)) |
| @@ -8746,6 +8689,14 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | |||
| 8746 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | 8689 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); |
| 8747 | } | 8690 | } |
| 8748 | 8691 | ||
| 8692 | static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) | ||
| 8693 | { | ||
| 8694 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 8695 | |||
| 8696 | pi_clear_on(&vmx->pi_desc); | ||
| 8697 | memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); | ||
| 8698 | } | ||
| 8699 | |||
| 8749 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 8700 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
| 8750 | { | 8701 | { |
| 8751 | u32 exit_intr_info; | 8702 | u32 exit_intr_info; |
| @@ -9591,17 +9542,16 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
| 9591 | kvm_inject_page_fault(vcpu, fault); | 9542 | kvm_inject_page_fault(vcpu, fault); |
| 9592 | } | 9543 | } |
| 9593 | 9544 | ||
| 9594 | static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | 9545 | static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
| 9546 | struct vmcs12 *vmcs12); | ||
| 9547 | |||
| 9548 | static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | ||
| 9595 | struct vmcs12 *vmcs12) | 9549 | struct vmcs12 *vmcs12) |
| 9596 | { | 9550 | { |
| 9597 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9551 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 9598 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | 9552 | u64 hpa; |
| 9599 | 9553 | ||
| 9600 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 9554 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
| 9601 | if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || | ||
| 9602 | vmcs12->apic_access_addr >> maxphyaddr) | ||
| 9603 | return false; | ||
| 9604 | |||
| 9605 | /* | 9555 | /* |
| 9606 | * Translate L1 physical address to host physical | 9556 | * Translate L1 physical address to host physical |
| 9607 | * address for vmcs02. Keep the page pinned, so this | 9557 | * address for vmcs02. Keep the page pinned, so this |
| @@ -9612,59 +9562,80 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9612 | nested_release_page(vmx->nested.apic_access_page); | 9562 | nested_release_page(vmx->nested.apic_access_page); |
| 9613 | vmx->nested.apic_access_page = | 9563 | vmx->nested.apic_access_page = |
| 9614 | nested_get_page(vcpu, vmcs12->apic_access_addr); | 9564 | nested_get_page(vcpu, vmcs12->apic_access_addr); |
| 9565 | /* | ||
| 9566 | * If translation failed, no matter: This feature asks | ||
| 9567 | * to exit when accessing the given address, and if it | ||
| 9568 | * can never be accessed, this feature won't do | ||
| 9569 | * anything anyway. | ||
| 9570 | */ | ||
| 9571 | if (vmx->nested.apic_access_page) { | ||
| 9572 | hpa = page_to_phys(vmx->nested.apic_access_page); | ||
| 9573 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | ||
| 9574 | } else { | ||
| 9575 | vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, | ||
| 9576 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
| 9577 | } | ||
| 9578 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | ||
| 9579 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { | ||
| 9580 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | ||
| 9581 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | ||
| 9582 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
| 9615 | } | 9583 | } |
| 9616 | 9584 | ||
| 9617 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 9585 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
| 9618 | if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || | ||
| 9619 | vmcs12->virtual_apic_page_addr >> maxphyaddr) | ||
| 9620 | return false; | ||
| 9621 | |||
| 9622 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | 9586 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ |
| 9623 | nested_release_page(vmx->nested.virtual_apic_page); | 9587 | nested_release_page(vmx->nested.virtual_apic_page); |
| 9624 | vmx->nested.virtual_apic_page = | 9588 | vmx->nested.virtual_apic_page = |
| 9625 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); | 9589 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); |
| 9626 | 9590 | ||
| 9627 | /* | 9591 | /* |
| 9628 | * Failing the vm entry is _not_ what the processor does | 9592 | * If translation failed, VM entry will fail because |
| 9629 | * but it's basically the only possibility we have. | 9593 | * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. |
| 9630 | * We could still enter the guest if CR8 load exits are | 9594 | * Failing the vm entry is _not_ what the processor |
| 9631 | * enabled, CR8 store exits are enabled, and virtualize APIC | 9595 | * does but it's basically the only possibility we |
| 9632 | * access is disabled; in this case the processor would never | 9596 | * have. We could still enter the guest if CR8 load |
| 9633 | * use the TPR shadow and we could simply clear the bit from | 9597 | * exits are enabled, CR8 store exits are enabled, and |
| 9634 | * the execution control. But such a configuration is useless, | 9598 | * virtualize APIC access is disabled; in this case |
| 9635 | * so let's keep the code simple. | 9599 | * the processor would never use the TPR shadow and we |
| 9600 | * could simply clear the bit from the execution | ||
| 9601 | * control. But such a configuration is useless, so | ||
| 9602 | * let's keep the code simple. | ||
| 9636 | */ | 9603 | */ |
| 9637 | if (!vmx->nested.virtual_apic_page) | 9604 | if (vmx->nested.virtual_apic_page) { |
| 9638 | return false; | 9605 | hpa = page_to_phys(vmx->nested.virtual_apic_page); |
| 9606 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); | ||
| 9607 | } | ||
| 9639 | } | 9608 | } |
| 9640 | 9609 | ||
| 9641 | if (nested_cpu_has_posted_intr(vmcs12)) { | 9610 | if (nested_cpu_has_posted_intr(vmcs12)) { |
| 9642 | if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || | ||
| 9643 | vmcs12->posted_intr_desc_addr >> maxphyaddr) | ||
| 9644 | return false; | ||
| 9645 | |||
| 9646 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 9611 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ |
| 9647 | kunmap(vmx->nested.pi_desc_page); | 9612 | kunmap(vmx->nested.pi_desc_page); |
| 9648 | nested_release_page(vmx->nested.pi_desc_page); | 9613 | nested_release_page(vmx->nested.pi_desc_page); |
| 9649 | } | 9614 | } |
| 9650 | vmx->nested.pi_desc_page = | 9615 | vmx->nested.pi_desc_page = |
| 9651 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); | 9616 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); |
| 9652 | if (!vmx->nested.pi_desc_page) | ||
| 9653 | return false; | ||
| 9654 | |||
| 9655 | vmx->nested.pi_desc = | 9617 | vmx->nested.pi_desc = |
| 9656 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); | 9618 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); |
| 9657 | if (!vmx->nested.pi_desc) { | 9619 | if (!vmx->nested.pi_desc) { |
| 9658 | nested_release_page_clean(vmx->nested.pi_desc_page); | 9620 | nested_release_page_clean(vmx->nested.pi_desc_page); |
| 9659 | return false; | 9621 | return; |
| 9660 | } | 9622 | } |
| 9661 | vmx->nested.pi_desc = | 9623 | vmx->nested.pi_desc = |
| 9662 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | 9624 | (struct pi_desc *)((void *)vmx->nested.pi_desc + |
| 9663 | (unsigned long)(vmcs12->posted_intr_desc_addr & | 9625 | (unsigned long)(vmcs12->posted_intr_desc_addr & |
| 9664 | (PAGE_SIZE - 1))); | 9626 | (PAGE_SIZE - 1))); |
| 9627 | vmcs_write64(POSTED_INTR_DESC_ADDR, | ||
| 9628 | page_to_phys(vmx->nested.pi_desc_page) + | ||
| 9629 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
| 9630 | (PAGE_SIZE - 1))); | ||
| 9665 | } | 9631 | } |
| 9666 | 9632 | if (cpu_has_vmx_msr_bitmap() && | |
| 9667 | return true; | 9633 | nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && |
| 9634 | nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) | ||
| 9635 | ; | ||
| 9636 | else | ||
| 9637 | vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, | ||
| 9638 | CPU_BASED_USE_MSR_BITMAPS); | ||
| 9668 | } | 9639 | } |
| 9669 | 9640 | ||
| 9670 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | 9641 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) |
| @@ -9980,7 +9951,7 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) | |||
| 9980 | * is assigned to entry_failure_code on failure. | 9951 | * is assigned to entry_failure_code on failure. |
| 9981 | */ | 9952 | */ |
| 9982 | static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, | 9953 | static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, |
| 9983 | unsigned long *entry_failure_code) | 9954 | u32 *entry_failure_code) |
| 9984 | { | 9955 | { |
| 9985 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { | 9956 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { |
| 9986 | if (!nested_cr3_valid(vcpu, cr3)) { | 9957 | if (!nested_cr3_valid(vcpu, cr3)) { |
| @@ -10020,7 +9991,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | |||
| 10020 | * is assigned to entry_failure_code on failure. | 9991 | * is assigned to entry_failure_code on failure. |
| 10021 | */ | 9992 | */ |
| 10022 | static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | 9993 | static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
| 10023 | unsigned long *entry_failure_code) | 9994 | bool from_vmentry, u32 *entry_failure_code) |
| 10024 | { | 9995 | { |
| 10025 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9996 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 10026 | u32 exec_control; | 9997 | u32 exec_control; |
| @@ -10063,21 +10034,26 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10063 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); | 10034 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); |
| 10064 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); | 10035 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); |
| 10065 | 10036 | ||
| 10066 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { | 10037 | if (from_vmentry && |
| 10038 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { | ||
| 10067 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | 10039 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
| 10068 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); | 10040 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); |
| 10069 | } else { | 10041 | } else { |
| 10070 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); | 10042 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); |
| 10071 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); | 10043 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); |
| 10072 | } | 10044 | } |
| 10073 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 10045 | if (from_vmentry) { |
| 10074 | vmcs12->vm_entry_intr_info_field); | 10046 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 10075 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 10047 | vmcs12->vm_entry_intr_info_field); |
| 10076 | vmcs12->vm_entry_exception_error_code); | 10048 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, |
| 10077 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 10049 | vmcs12->vm_entry_exception_error_code); |
| 10078 | vmcs12->vm_entry_instruction_len); | 10050 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
| 10079 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 10051 | vmcs12->vm_entry_instruction_len); |
| 10080 | vmcs12->guest_interruptibility_info); | 10052 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
| 10053 | vmcs12->guest_interruptibility_info); | ||
| 10054 | } else { | ||
| 10055 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); | ||
| 10056 | } | ||
| 10081 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 10057 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
| 10082 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 10058 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
| 10083 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 10059 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
| @@ -10106,12 +10082,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10106 | vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; | 10082 | vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; |
| 10107 | vmx->nested.pi_pending = false; | 10083 | vmx->nested.pi_pending = false; |
| 10108 | vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); | 10084 | vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); |
| 10109 | vmcs_write64(POSTED_INTR_DESC_ADDR, | 10085 | } else { |
| 10110 | page_to_phys(vmx->nested.pi_desc_page) + | ||
| 10111 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
| 10112 | (PAGE_SIZE - 1))); | ||
| 10113 | } else | ||
| 10114 | exec_control &= ~PIN_BASED_POSTED_INTR; | 10086 | exec_control &= ~PIN_BASED_POSTED_INTR; |
| 10087 | } | ||
| 10115 | 10088 | ||
| 10116 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | 10089 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); |
| 10117 | 10090 | ||
| @@ -10156,26 +10129,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10156 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) | 10129 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) |
| 10157 | exec_control |= vmcs12->secondary_vm_exec_control; | 10130 | exec_control |= vmcs12->secondary_vm_exec_control; |
| 10158 | 10131 | ||
| 10159 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { | ||
| 10160 | /* | ||
| 10161 | * If translation failed, no matter: This feature asks | ||
| 10162 | * to exit when accessing the given address, and if it | ||
| 10163 | * can never be accessed, this feature won't do | ||
| 10164 | * anything anyway. | ||
| 10165 | */ | ||
| 10166 | if (!vmx->nested.apic_access_page) | ||
| 10167 | exec_control &= | ||
| 10168 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
| 10169 | else | ||
| 10170 | vmcs_write64(APIC_ACCESS_ADDR, | ||
| 10171 | page_to_phys(vmx->nested.apic_access_page)); | ||
| 10172 | } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && | ||
| 10173 | cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { | ||
| 10174 | exec_control |= | ||
| 10175 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
| 10176 | kvm_vcpu_reload_apic_access_page(vcpu); | ||
| 10177 | } | ||
| 10178 | |||
| 10179 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { | 10132 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { |
| 10180 | vmcs_write64(EOI_EXIT_BITMAP0, | 10133 | vmcs_write64(EOI_EXIT_BITMAP0, |
| 10181 | vmcs12->eoi_exit_bitmap0); | 10134 | vmcs12->eoi_exit_bitmap0); |
| @@ -10190,6 +10143,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10190 | } | 10143 | } |
| 10191 | 10144 | ||
| 10192 | nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; | 10145 | nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; |
| 10146 | |||
| 10147 | /* | ||
| 10148 | * Write an illegal value to APIC_ACCESS_ADDR. Later, | ||
| 10149 | * nested_get_vmcs12_pages will either fix it up or | ||
| 10150 | * remove the VM execution control. | ||
| 10151 | */ | ||
| 10152 | if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) | ||
| 10153 | vmcs_write64(APIC_ACCESS_ADDR, -1ull); | ||
| 10154 | |||
| 10193 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 10155 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
| 10194 | } | 10156 | } |
| 10195 | 10157 | ||
| @@ -10226,19 +10188,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10226 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 10188 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
| 10227 | exec_control |= vmcs12->cpu_based_vm_exec_control; | 10189 | exec_control |= vmcs12->cpu_based_vm_exec_control; |
| 10228 | 10190 | ||
| 10191 | /* | ||
| 10192 | * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if | ||
| 10193 | * nested_get_vmcs12_pages can't fix it up, the illegal value | ||
| 10194 | * will result in a VM entry failure. | ||
| 10195 | */ | ||
| 10229 | if (exec_control & CPU_BASED_TPR_SHADOW) { | 10196 | if (exec_control & CPU_BASED_TPR_SHADOW) { |
| 10230 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 10197 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); |
| 10231 | page_to_phys(vmx->nested.virtual_apic_page)); | ||
| 10232 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | 10198 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); |
| 10233 | } | 10199 | } |
| 10234 | 10200 | ||
| 10235 | if (cpu_has_vmx_msr_bitmap() && | ||
| 10236 | exec_control & CPU_BASED_USE_MSR_BITMAPS && | ||
| 10237 | nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) | ||
| 10238 | ; /* MSR_BITMAP will be set by following vmx_set_efer. */ | ||
| 10239 | else | ||
| 10240 | exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; | ||
| 10241 | |||
| 10242 | /* | 10201 | /* |
| 10243 | * Merging of IO bitmap not currently supported. | 10202 | * Merging of IO bitmap not currently supported. |
| 10244 | * Rather, exit every time. | 10203 | * Rather, exit every time. |
| @@ -10270,16 +10229,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10270 | ~VM_ENTRY_IA32E_MODE) | | 10229 | ~VM_ENTRY_IA32E_MODE) | |
| 10271 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 10230 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
| 10272 | 10231 | ||
| 10273 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { | 10232 | if (from_vmentry && |
| 10233 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { | ||
| 10274 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); | 10234 | vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); |
| 10275 | vcpu->arch.pat = vmcs12->guest_ia32_pat; | 10235 | vcpu->arch.pat = vmcs12->guest_ia32_pat; |
| 10276 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) | 10236 | } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
| 10277 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); | 10237 | vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); |
| 10278 | 10238 | } | |
| 10279 | 10239 | ||
| 10280 | set_cr4_guest_host_mask(vmx); | 10240 | set_cr4_guest_host_mask(vmx); |
| 10281 | 10241 | ||
| 10282 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | 10242 | if (from_vmentry && |
| 10243 | vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | ||
| 10283 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | 10244 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); |
| 10284 | 10245 | ||
| 10285 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 10246 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
| @@ -10318,8 +10279,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10318 | } | 10279 | } |
| 10319 | 10280 | ||
| 10320 | /* | 10281 | /* |
| 10321 | * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified | 10282 | * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those |
| 10322 | * TS bit (for lazy fpu) and bits which we consider mandatory enabled. | 10283 | * bits which we consider mandatory enabled. |
| 10323 | * The CR0_READ_SHADOW is what L2 should have expected to read given | 10284 | * The CR0_READ_SHADOW is what L2 should have expected to read given |
| 10324 | * the specifications by L1; It's not enough to take | 10285 | * the specifications by L1; It's not enough to take |
| 10325 | * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we | 10286 | * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we |
| @@ -10331,7 +10292,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10331 | vmx_set_cr4(vcpu, vmcs12->guest_cr4); | 10292 | vmx_set_cr4(vcpu, vmcs12->guest_cr4); |
| 10332 | vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); | 10293 | vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); |
| 10333 | 10294 | ||
| 10334 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 10295 | if (from_vmentry && |
| 10296 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) | ||
| 10335 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 10297 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
| 10336 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 10298 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
| 10337 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 10299 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
| @@ -10365,73 +10327,22 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10365 | return 0; | 10327 | return 0; |
| 10366 | } | 10328 | } |
| 10367 | 10329 | ||
| 10368 | /* | 10330 | static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
| 10369 | * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 | ||
| 10370 | * for running an L2 nested guest. | ||
| 10371 | */ | ||
| 10372 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
| 10373 | { | 10331 | { |
| 10374 | struct vmcs12 *vmcs12; | ||
| 10375 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 10332 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 10376 | int cpu; | ||
| 10377 | struct loaded_vmcs *vmcs02; | ||
| 10378 | bool ia32e; | ||
| 10379 | u32 msr_entry_idx; | ||
| 10380 | unsigned long exit_qualification; | ||
| 10381 | |||
| 10382 | if (!nested_vmx_check_permission(vcpu)) | ||
| 10383 | return 1; | ||
| 10384 | |||
| 10385 | if (!nested_vmx_check_vmcs12(vcpu)) | ||
| 10386 | goto out; | ||
| 10387 | |||
| 10388 | vmcs12 = get_vmcs12(vcpu); | ||
| 10389 | |||
| 10390 | if (enable_shadow_vmcs) | ||
| 10391 | copy_shadow_to_vmcs12(vmx); | ||
| 10392 | |||
| 10393 | /* | ||
| 10394 | * The nested entry process starts with enforcing various prerequisites | ||
| 10395 | * on vmcs12 as required by the Intel SDM, and act appropriately when | ||
| 10396 | * they fail: As the SDM explains, some conditions should cause the | ||
| 10397 | * instruction to fail, while others will cause the instruction to seem | ||
| 10398 | * to succeed, but return an EXIT_REASON_INVALID_STATE. | ||
| 10399 | * To speed up the normal (success) code path, we should avoid checking | ||
| 10400 | * for misconfigurations which will anyway be caught by the processor | ||
| 10401 | * when using the merged vmcs02. | ||
| 10402 | */ | ||
| 10403 | if (vmcs12->launch_state == launch) { | ||
| 10404 | nested_vmx_failValid(vcpu, | ||
| 10405 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | ||
| 10406 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | ||
| 10407 | goto out; | ||
| 10408 | } | ||
| 10409 | 10333 | ||
| 10410 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && | 10334 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && |
| 10411 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { | 10335 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) |
| 10412 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10336 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10413 | goto out; | ||
| 10414 | } | ||
| 10415 | 10337 | ||
| 10416 | if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { | 10338 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) |
| 10417 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10339 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10418 | goto out; | ||
| 10419 | } | ||
| 10420 | |||
| 10421 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { | ||
| 10422 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
| 10423 | goto out; | ||
| 10424 | } | ||
| 10425 | 10340 | ||
| 10426 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { | 10341 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) |
| 10427 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10342 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10428 | goto out; | ||
| 10429 | } | ||
| 10430 | 10343 | ||
| 10431 | if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { | 10344 | if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) |
| 10432 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 10345 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10433 | goto out; | ||
| 10434 | } | ||
| 10435 | 10346 | ||
| 10436 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | 10347 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, |
| 10437 | vmx->nested.nested_vmx_procbased_ctls_low, | 10348 | vmx->nested.nested_vmx_procbased_ctls_low, |
| @@ -10448,28 +10359,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 10448 | !vmx_control_verify(vmcs12->vm_entry_controls, | 10359 | !vmx_control_verify(vmcs12->vm_entry_controls, |
| 10449 | vmx->nested.nested_vmx_entry_ctls_low, | 10360 | vmx->nested.nested_vmx_entry_ctls_low, |
| 10450 | vmx->nested.nested_vmx_entry_ctls_high)) | 10361 | vmx->nested.nested_vmx_entry_ctls_high)) |
| 10451 | { | 10362 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10452 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
| 10453 | goto out; | ||
| 10454 | } | ||
| 10455 | 10363 | ||
| 10456 | if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || | 10364 | if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || |
| 10457 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || | 10365 | !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || |
| 10458 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) { | 10366 | !nested_cr3_valid(vcpu, vmcs12->host_cr3)) |
| 10459 | nested_vmx_failValid(vcpu, | 10367 | return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; |
| 10460 | VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); | 10368 | |
| 10461 | goto out; | 10369 | return 0; |
| 10462 | } | 10370 | } |
| 10371 | |||
| 10372 | static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||
| 10373 | u32 *exit_qual) | ||
| 10374 | { | ||
| 10375 | bool ia32e; | ||
| 10376 | |||
| 10377 | *exit_qual = ENTRY_FAIL_DEFAULT; | ||
| 10463 | 10378 | ||
| 10464 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || | 10379 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || |
| 10465 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) { | 10380 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) |
| 10466 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 10467 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
| 10468 | return 1; | 10381 | return 1; |
| 10469 | } | 10382 | |
| 10470 | if (vmcs12->vmcs_link_pointer != -1ull) { | 10383 | if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) && |
| 10471 | nested_vmx_entry_failure(vcpu, vmcs12, | 10384 | vmcs12->vmcs_link_pointer != -1ull) { |
| 10472 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); | 10385 | *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; |
| 10473 | return 1; | 10386 | return 1; |
| 10474 | } | 10387 | } |
| 10475 | 10388 | ||
| @@ -10482,16 +10395,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 10482 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to | 10395 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to |
| 10483 | * CR0.PG) is 1. | 10396 | * CR0.PG) is 1. |
| 10484 | */ | 10397 | */ |
| 10485 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { | 10398 | if (to_vmx(vcpu)->nested.nested_run_pending && |
| 10399 | (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { | ||
| 10486 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | 10400 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; |
| 10487 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || | 10401 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || |
| 10488 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | 10402 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || |
| 10489 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | 10403 | ((vmcs12->guest_cr0 & X86_CR0_PG) && |
| 10490 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { | 10404 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) |
| 10491 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 10492 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
| 10493 | return 1; | 10405 | return 1; |
| 10494 | } | ||
| 10495 | } | 10406 | } |
| 10496 | 10407 | ||
| 10497 | /* | 10408 | /* |
| @@ -10505,28 +10416,26 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 10505 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | 10416 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; |
| 10506 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | 10417 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || |
| 10507 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | 10418 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || |
| 10508 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { | 10419 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) |
| 10509 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 10510 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
| 10511 | return 1; | 10420 | return 1; |
| 10512 | } | ||
| 10513 | } | 10421 | } |
| 10514 | 10422 | ||
| 10515 | /* | 10423 | return 0; |
| 10516 | * We're finally done with prerequisite checking, and can start with | 10424 | } |
| 10517 | * the nested entry. | 10425 | |
| 10518 | */ | 10426 | static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) |
| 10427 | { | ||
| 10428 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 10429 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
| 10430 | struct loaded_vmcs *vmcs02; | ||
| 10431 | int cpu; | ||
| 10432 | u32 msr_entry_idx; | ||
| 10433 | u32 exit_qual; | ||
| 10519 | 10434 | ||
| 10520 | vmcs02 = nested_get_current_vmcs02(vmx); | 10435 | vmcs02 = nested_get_current_vmcs02(vmx); |
| 10521 | if (!vmcs02) | 10436 | if (!vmcs02) |
| 10522 | return -ENOMEM; | 10437 | return -ENOMEM; |
| 10523 | 10438 | ||
| 10524 | /* | ||
| 10525 | * After this point, the trap flag no longer triggers a singlestep trap | ||
| 10526 | * on the vm entry instructions. Don't call | ||
| 10527 | * kvm_skip_emulated_instruction. | ||
| 10528 | */ | ||
| 10529 | skip_emulated_instruction(vcpu); | ||
| 10530 | enter_guest_mode(vcpu); | 10439 | enter_guest_mode(vcpu); |
| 10531 | 10440 | ||
| 10532 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | 10441 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
| @@ -10541,14 +10450,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 10541 | 10450 | ||
| 10542 | vmx_segment_cache_clear(vmx); | 10451 | vmx_segment_cache_clear(vmx); |
| 10543 | 10452 | ||
| 10544 | if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) { | 10453 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
| 10545 | leave_guest_mode(vcpu); | 10454 | leave_guest_mode(vcpu); |
| 10546 | vmx_load_vmcs01(vcpu); | 10455 | vmx_load_vmcs01(vcpu); |
| 10547 | nested_vmx_entry_failure(vcpu, vmcs12, | 10456 | nested_vmx_entry_failure(vcpu, vmcs12, |
| 10548 | EXIT_REASON_INVALID_STATE, exit_qualification); | 10457 | EXIT_REASON_INVALID_STATE, exit_qual); |
| 10549 | return 1; | 10458 | return 1; |
| 10550 | } | 10459 | } |
| 10551 | 10460 | ||
| 10461 | nested_get_vmcs12_pages(vcpu, vmcs12); | ||
| 10462 | |||
| 10552 | msr_entry_idx = nested_vmx_load_msr(vcpu, | 10463 | msr_entry_idx = nested_vmx_load_msr(vcpu, |
| 10553 | vmcs12->vm_entry_msr_load_addr, | 10464 | vmcs12->vm_entry_msr_load_addr, |
| 10554 | vmcs12->vm_entry_msr_load_count); | 10465 | vmcs12->vm_entry_msr_load_count); |
| @@ -10562,17 +10473,90 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
| 10562 | 10473 | ||
| 10563 | vmcs12->launch_state = 1; | 10474 | vmcs12->launch_state = 1; |
| 10564 | 10475 | ||
| 10565 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
| 10566 | return kvm_vcpu_halt(vcpu); | ||
| 10567 | |||
| 10568 | vmx->nested.nested_run_pending = 1; | ||
| 10569 | |||
| 10570 | /* | 10476 | /* |
| 10571 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 10477 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
| 10572 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 10478 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
| 10573 | * returned as far as L1 is concerned. It will only return (and set | 10479 | * returned as far as L1 is concerned. It will only return (and set |
| 10574 | * the success flag) when L2 exits (see nested_vmx_vmexit()). | 10480 | * the success flag) when L2 exits (see nested_vmx_vmexit()). |
| 10575 | */ | 10481 | */ |
| 10482 | return 0; | ||
| 10483 | } | ||
| 10484 | |||
| 10485 | /* | ||
| 10486 | * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 | ||
| 10487 | * for running an L2 nested guest. | ||
| 10488 | */ | ||
| 10489 | static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
| 10490 | { | ||
| 10491 | struct vmcs12 *vmcs12; | ||
| 10492 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 10493 | u32 exit_qual; | ||
| 10494 | int ret; | ||
| 10495 | |||
| 10496 | if (!nested_vmx_check_permission(vcpu)) | ||
| 10497 | return 1; | ||
| 10498 | |||
| 10499 | if (!nested_vmx_check_vmcs12(vcpu)) | ||
| 10500 | goto out; | ||
| 10501 | |||
| 10502 | vmcs12 = get_vmcs12(vcpu); | ||
| 10503 | |||
| 10504 | if (enable_shadow_vmcs) | ||
| 10505 | copy_shadow_to_vmcs12(vmx); | ||
| 10506 | |||
| 10507 | /* | ||
| 10508 | * The nested entry process starts with enforcing various prerequisites | ||
| 10509 | * on vmcs12 as required by the Intel SDM, and act appropriately when | ||
| 10510 | * they fail: As the SDM explains, some conditions should cause the | ||
| 10511 | * instruction to fail, while others will cause the instruction to seem | ||
| 10512 | * to succeed, but return an EXIT_REASON_INVALID_STATE. | ||
| 10513 | * To speed up the normal (success) code path, we should avoid checking | ||
| 10514 | * for misconfigurations which will anyway be caught by the processor | ||
| 10515 | * when using the merged vmcs02. | ||
| 10516 | */ | ||
| 10517 | if (vmcs12->launch_state == launch) { | ||
| 10518 | nested_vmx_failValid(vcpu, | ||
| 10519 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | ||
| 10520 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | ||
| 10521 | goto out; | ||
| 10522 | } | ||
| 10523 | |||
| 10524 | ret = check_vmentry_prereqs(vcpu, vmcs12); | ||
| 10525 | if (ret) { | ||
| 10526 | nested_vmx_failValid(vcpu, ret); | ||
| 10527 | goto out; | ||
| 10528 | } | ||
| 10529 | |||
| 10530 | /* | ||
| 10531 | * After this point, the trap flag no longer triggers a singlestep trap | ||
| 10532 | * on the vm entry instructions; don't call kvm_skip_emulated_instruction. | ||
| 10533 | * This is not 100% correct; for performance reasons, we delegate most | ||
| 10534 | * of the checks on host state to the processor. If those fail, | ||
| 10535 | * the singlestep trap is missed. | ||
| 10536 | */ | ||
| 10537 | skip_emulated_instruction(vcpu); | ||
| 10538 | |||
| 10539 | ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual); | ||
| 10540 | if (ret) { | ||
| 10541 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
| 10542 | EXIT_REASON_INVALID_STATE, exit_qual); | ||
| 10543 | return 1; | ||
| 10544 | } | ||
| 10545 | |||
| 10546 | /* | ||
| 10547 | * We're finally done with prerequisite checking, and can start with | ||
| 10548 | * the nested entry. | ||
| 10549 | */ | ||
| 10550 | |||
| 10551 | ret = enter_vmx_non_root_mode(vcpu, true); | ||
| 10552 | if (ret) | ||
| 10553 | return ret; | ||
| 10554 | |||
| 10555 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
| 10556 | return kvm_vcpu_halt(vcpu); | ||
| 10557 | |||
| 10558 | vmx->nested.nested_run_pending = 1; | ||
| 10559 | |||
| 10576 | return 1; | 10560 | return 1; |
| 10577 | 10561 | ||
| 10578 | out: | 10562 | out: |
| @@ -10713,21 +10697,13 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | |||
| 10713 | } | 10697 | } |
| 10714 | 10698 | ||
| 10715 | /* | 10699 | /* |
| 10716 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 10700 | * Update the guest state fields of vmcs12 to reflect changes that |
| 10717 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 10701 | * occurred while L2 was running. (The "IA-32e mode guest" bit of the |
| 10718 | * and this function updates it to reflect the changes to the guest state while | 10702 | * VM-entry controls is also updated, since this is really a guest |
| 10719 | * L2 was running (and perhaps made some exits which were handled directly by L0 | 10703 | * state bit.) |
| 10720 | * without going back to L1), and to reflect the exit reason. | ||
| 10721 | * Note that we do not have to copy here all VMCS fields, just those that | ||
| 10722 | * could have changed by the L2 guest or the exit - i.e., the guest-state and | ||
| 10723 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | ||
| 10724 | * which already writes to vmcs12 directly. | ||
| 10725 | */ | 10704 | */ |
| 10726 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | 10705 | static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
| 10727 | u32 exit_reason, u32 exit_intr_info, | ||
| 10728 | unsigned long exit_qualification) | ||
| 10729 | { | 10706 | { |
| 10730 | /* update guest state fields: */ | ||
| 10731 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 10707 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
| 10732 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | 10708 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); |
| 10733 | 10709 | ||
| @@ -10833,6 +10809,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10833 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | 10809 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); |
| 10834 | if (nested_cpu_has_xsaves(vmcs12)) | 10810 | if (nested_cpu_has_xsaves(vmcs12)) |
| 10835 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); | 10811 | vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); |
| 10812 | } | ||
| 10813 | |||
| 10814 | /* | ||
| 10815 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | ||
| 10816 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | ||
| 10817 | * and this function updates it to reflect the changes to the guest state while | ||
| 10818 | * L2 was running (and perhaps made some exits which were handled directly by L0 | ||
| 10819 | * without going back to L1), and to reflect the exit reason. | ||
| 10820 | * Note that we do not have to copy here all VMCS fields, just those that | ||
| 10821 | * could have changed by the L2 guest or the exit - i.e., the guest-state and | ||
| 10822 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | ||
| 10823 | * which already writes to vmcs12 directly. | ||
| 10824 | */ | ||
| 10825 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||
| 10826 | u32 exit_reason, u32 exit_intr_info, | ||
| 10827 | unsigned long exit_qualification) | ||
| 10828 | { | ||
| 10829 | /* update guest state fields: */ | ||
| 10830 | sync_vmcs12(vcpu, vmcs12); | ||
| 10836 | 10831 | ||
| 10837 | /* update exit information fields: */ | 10832 | /* update exit information fields: */ |
| 10838 | 10833 | ||
| @@ -10883,7 +10878,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
| 10883 | struct vmcs12 *vmcs12) | 10878 | struct vmcs12 *vmcs12) |
| 10884 | { | 10879 | { |
| 10885 | struct kvm_segment seg; | 10880 | struct kvm_segment seg; |
| 10886 | unsigned long entry_failure_code; | 10881 | u32 entry_failure_code; |
| 10887 | 10882 | ||
| 10888 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 10883 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
| 10889 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 10884 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
| @@ -10898,24 +10893,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
| 10898 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); | 10893 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); |
| 10899 | /* | 10894 | /* |
| 10900 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | 10895 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't |
| 10901 | * actually changed, because it depends on the current state of | 10896 | * actually changed, because vmx_set_cr0 refers to efer set above. |
| 10902 | * fpu_active (which may have changed). | 10897 | * |
| 10903 | * Note that vmx_set_cr0 refers to efer set above. | 10898 | * CR0_GUEST_HOST_MASK is already set in the original vmcs01 |
| 10899 | * (KVM doesn't change it); | ||
| 10904 | */ | 10900 | */ |
| 10901 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
| 10905 | vmx_set_cr0(vcpu, vmcs12->host_cr0); | 10902 | vmx_set_cr0(vcpu, vmcs12->host_cr0); |
| 10906 | /* | ||
| 10907 | * If we did fpu_activate()/fpu_deactivate() during L2's run, we need | ||
| 10908 | * to apply the same changes to L1's vmcs. We just set cr0 correctly, | ||
| 10909 | * but we also need to update cr0_guest_host_mask and exception_bitmap. | ||
| 10910 | */ | ||
| 10911 | update_exception_bitmap(vcpu); | ||
| 10912 | vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0); | ||
| 10913 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
| 10914 | 10903 | ||
| 10915 | /* | 10904 | /* Same as above - no reason to call set_cr4_guest_host_mask(). */ |
| 10916 | * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01 | ||
| 10917 | * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); | ||
| 10918 | */ | ||
| 10919 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); | 10905 | vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); |
| 10920 | kvm_set_cr4(vcpu, vmcs12->host_cr4); | 10906 | kvm_set_cr4(vcpu, vmcs12->host_cr4); |
| 10921 | 10907 | ||
| @@ -11544,9 +11530,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 11544 | 11530 | ||
| 11545 | .get_pkru = vmx_get_pkru, | 11531 | .get_pkru = vmx_get_pkru, |
| 11546 | 11532 | ||
| 11547 | .fpu_activate = vmx_fpu_activate, | ||
| 11548 | .fpu_deactivate = vmx_fpu_deactivate, | ||
| 11549 | |||
| 11550 | .tlb_flush = vmx_flush_tlb, | 11533 | .tlb_flush = vmx_flush_tlb, |
| 11551 | 11534 | ||
| 11552 | .run = vmx_vcpu_run, | 11535 | .run = vmx_vcpu_run, |
| @@ -11571,6 +11554,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 11571 | .get_enable_apicv = vmx_get_enable_apicv, | 11554 | .get_enable_apicv = vmx_get_enable_apicv, |
| 11572 | .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, | 11555 | .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, |
| 11573 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 11556 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
| 11557 | .apicv_post_state_restore = vmx_apicv_post_state_restore, | ||
| 11574 | .hwapic_irr_update = vmx_hwapic_irr_update, | 11558 | .hwapic_irr_update = vmx_hwapic_irr_update, |
| 11575 | .hwapic_isr_update = vmx_hwapic_isr_update, | 11559 | .hwapic_isr_update = vmx_hwapic_isr_update, |
| 11576 | .sync_pir_to_irr = vmx_sync_pir_to_irr, | 11560 | .sync_pir_to_irr = vmx_sync_pir_to_irr, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f64e5d0ae53..c48404017e4f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -1811,7 +1811,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
| 1811 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1811 | struct kvm_vcpu_arch *vcpu = &v->arch; |
| 1812 | struct pvclock_vcpu_time_info guest_hv_clock; | 1812 | struct pvclock_vcpu_time_info guest_hv_clock; |
| 1813 | 1813 | ||
| 1814 | if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, | 1814 | if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time, |
| 1815 | &guest_hv_clock, sizeof(guest_hv_clock)))) | 1815 | &guest_hv_clock, sizeof(guest_hv_clock)))) |
| 1816 | return; | 1816 | return; |
| 1817 | 1817 | ||
| @@ -1832,9 +1832,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
| 1832 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); | 1832 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); |
| 1833 | 1833 | ||
| 1834 | vcpu->hv_clock.version = guest_hv_clock.version + 1; | 1834 | vcpu->hv_clock.version = guest_hv_clock.version + 1; |
| 1835 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1835 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
| 1836 | &vcpu->hv_clock, | 1836 | &vcpu->hv_clock, |
| 1837 | sizeof(vcpu->hv_clock.version)); | 1837 | sizeof(vcpu->hv_clock.version)); |
| 1838 | 1838 | ||
| 1839 | smp_wmb(); | 1839 | smp_wmb(); |
| 1840 | 1840 | ||
| @@ -1848,16 +1848,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) | |||
| 1848 | 1848 | ||
| 1849 | trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); | 1849 | trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); |
| 1850 | 1850 | ||
| 1851 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1851 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
| 1852 | &vcpu->hv_clock, | 1852 | &vcpu->hv_clock, |
| 1853 | sizeof(vcpu->hv_clock)); | 1853 | sizeof(vcpu->hv_clock)); |
| 1854 | 1854 | ||
| 1855 | smp_wmb(); | 1855 | smp_wmb(); |
| 1856 | 1856 | ||
| 1857 | vcpu->hv_clock.version++; | 1857 | vcpu->hv_clock.version++; |
| 1858 | kvm_write_guest_cached(v->kvm, &vcpu->pv_time, | 1858 | kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, |
| 1859 | &vcpu->hv_clock, | 1859 | &vcpu->hv_clock, |
| 1860 | sizeof(vcpu->hv_clock.version)); | 1860 | sizeof(vcpu->hv_clock.version)); |
| 1861 | } | 1861 | } |
| 1862 | 1862 | ||
| 1863 | static int kvm_guest_time_update(struct kvm_vcpu *v) | 1863 | static int kvm_guest_time_update(struct kvm_vcpu *v) |
| @@ -2090,7 +2090,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
| 2090 | return 0; | 2090 | return 0; |
| 2091 | } | 2091 | } |
| 2092 | 2092 | ||
| 2093 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, | 2093 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa, |
| 2094 | sizeof(u32))) | 2094 | sizeof(u32))) |
| 2095 | return 1; | 2095 | return 1; |
| 2096 | 2096 | ||
| @@ -2109,7 +2109,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
| 2109 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) | 2109 | if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) |
| 2110 | return; | 2110 | return; |
| 2111 | 2111 | ||
| 2112 | if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2112 | if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime, |
| 2113 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) | 2113 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) |
| 2114 | return; | 2114 | return; |
| 2115 | 2115 | ||
| @@ -2120,7 +2120,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
| 2120 | 2120 | ||
| 2121 | vcpu->arch.st.steal.version += 1; | 2121 | vcpu->arch.st.steal.version += 1; |
| 2122 | 2122 | ||
| 2123 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2123 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
| 2124 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2124 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
| 2125 | 2125 | ||
| 2126 | smp_wmb(); | 2126 | smp_wmb(); |
| @@ -2129,14 +2129,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) | |||
| 2129 | vcpu->arch.st.last_steal; | 2129 | vcpu->arch.st.last_steal; |
| 2130 | vcpu->arch.st.last_steal = current->sched_info.run_delay; | 2130 | vcpu->arch.st.last_steal = current->sched_info.run_delay; |
| 2131 | 2131 | ||
| 2132 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2132 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
| 2133 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2133 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
| 2134 | 2134 | ||
| 2135 | smp_wmb(); | 2135 | smp_wmb(); |
| 2136 | 2136 | ||
| 2137 | vcpu->arch.st.steal.version += 1; | 2137 | vcpu->arch.st.steal.version += 1; |
| 2138 | 2138 | ||
| 2139 | kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2139 | kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, |
| 2140 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); | 2140 | &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); |
| 2141 | } | 2141 | } |
| 2142 | 2142 | ||
| @@ -2241,7 +2241,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2241 | if (!(data & 1)) | 2241 | if (!(data & 1)) |
| 2242 | break; | 2242 | break; |
| 2243 | 2243 | ||
| 2244 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, | 2244 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, |
| 2245 | &vcpu->arch.pv_time, data & ~1ULL, | 2245 | &vcpu->arch.pv_time, data & ~1ULL, |
| 2246 | sizeof(struct pvclock_vcpu_time_info))) | 2246 | sizeof(struct pvclock_vcpu_time_info))) |
| 2247 | vcpu->arch.pv_time_enabled = false; | 2247 | vcpu->arch.pv_time_enabled = false; |
| @@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2262 | if (data & KVM_STEAL_RESERVED_MASK) | 2262 | if (data & KVM_STEAL_RESERVED_MASK) |
| 2263 | return 1; | 2263 | return 1; |
| 2264 | 2264 | ||
| 2265 | if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, | 2265 | if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime, |
| 2266 | data & KVM_STEAL_VALID_BITS, | 2266 | data & KVM_STEAL_VALID_BITS, |
| 2267 | sizeof(struct kvm_steal_time))) | 2267 | sizeof(struct kvm_steal_time))) |
| 2268 | return 1; | 2268 | return 1; |
| @@ -2672,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 2672 | case KVM_CAP_DISABLE_QUIRKS: | 2672 | case KVM_CAP_DISABLE_QUIRKS: |
| 2673 | case KVM_CAP_SET_BOOT_CPU_ID: | 2673 | case KVM_CAP_SET_BOOT_CPU_ID: |
| 2674 | case KVM_CAP_SPLIT_IRQCHIP: | 2674 | case KVM_CAP_SPLIT_IRQCHIP: |
| 2675 | case KVM_CAP_IMMEDIATE_EXIT: | ||
| 2675 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2676 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
| 2676 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2677 | case KVM_CAP_ASSIGN_DEV_IRQ: |
| 2677 | case KVM_CAP_PCI_2_3: | 2678 | case KVM_CAP_PCI_2_3: |
| @@ -2875,7 +2876,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) | |||
| 2875 | 2876 | ||
| 2876 | vcpu->arch.st.steal.preempted = 1; | 2877 | vcpu->arch.st.steal.preempted = 1; |
| 2877 | 2878 | ||
| 2878 | kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, | 2879 | kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime, |
| 2879 | &vcpu->arch.st.steal.preempted, | 2880 | &vcpu->arch.st.steal.preempted, |
| 2880 | offsetof(struct kvm_steal_time, preempted), | 2881 | offsetof(struct kvm_steal_time, preempted), |
| 2881 | sizeof(vcpu->arch.st.steal.preempted)); | 2882 | sizeof(vcpu->arch.st.steal.preempted)); |
| @@ -2909,7 +2910,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 2909 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2910 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
| 2910 | struct kvm_lapic_state *s) | 2911 | struct kvm_lapic_state *s) |
| 2911 | { | 2912 | { |
| 2912 | if (vcpu->arch.apicv_active) | 2913 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) |
| 2913 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 2914 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
| 2914 | 2915 | ||
| 2915 | return kvm_apic_get_state(vcpu, s); | 2916 | return kvm_apic_get_state(vcpu, s); |
| @@ -6659,7 +6660,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
| 6659 | if (irqchip_split(vcpu->kvm)) | 6660 | if (irqchip_split(vcpu->kvm)) |
| 6660 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); | 6661 | kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); |
| 6661 | else { | 6662 | else { |
| 6662 | if (vcpu->arch.apicv_active) | 6663 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) |
| 6663 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 6664 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
| 6664 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); | 6665 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); |
| 6665 | } | 6666 | } |
| @@ -6750,10 +6751,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 6750 | r = 0; | 6751 | r = 0; |
| 6751 | goto out; | 6752 | goto out; |
| 6752 | } | 6753 | } |
| 6753 | if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { | ||
| 6754 | vcpu->fpu_active = 0; | ||
| 6755 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
| 6756 | } | ||
| 6757 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { | 6754 | if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { |
| 6758 | /* Page is swapped out. Do synthetic halt */ | 6755 | /* Page is swapped out. Do synthetic halt */ |
| 6759 | vcpu->arch.apf.halted = true; | 6756 | vcpu->arch.apf.halted = true; |
| @@ -6813,20 +6810,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 6813 | kvm_hv_process_stimers(vcpu); | 6810 | kvm_hv_process_stimers(vcpu); |
| 6814 | } | 6811 | } |
| 6815 | 6812 | ||
| 6816 | /* | ||
| 6817 | * KVM_REQ_EVENT is not set when posted interrupts are set by | ||
| 6818 | * VT-d hardware, so we have to update RVI unconditionally. | ||
| 6819 | */ | ||
| 6820 | if (kvm_lapic_enabled(vcpu)) { | ||
| 6821 | /* | ||
| 6822 | * Update architecture specific hints for APIC | ||
| 6823 | * virtual interrupt delivery. | ||
| 6824 | */ | ||
| 6825 | if (vcpu->arch.apicv_active) | ||
| 6826 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
| 6827 | kvm_lapic_find_highest_irr(vcpu)); | ||
| 6828 | } | ||
| 6829 | |||
| 6830 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 6813 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
| 6831 | ++vcpu->stat.req_event; | 6814 | ++vcpu->stat.req_event; |
| 6832 | kvm_apic_accept_events(vcpu); | 6815 | kvm_apic_accept_events(vcpu); |
| @@ -6869,22 +6852,40 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 6869 | preempt_disable(); | 6852 | preempt_disable(); |
| 6870 | 6853 | ||
| 6871 | kvm_x86_ops->prepare_guest_switch(vcpu); | 6854 | kvm_x86_ops->prepare_guest_switch(vcpu); |
| 6872 | if (vcpu->fpu_active) | 6855 | kvm_load_guest_fpu(vcpu); |
| 6873 | kvm_load_guest_fpu(vcpu); | 6856 | |
| 6857 | /* | ||
| 6858 | * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt | ||
| 6859 | * IPI are then delayed after guest entry, which ensures that they | ||
| 6860 | * result in virtual interrupt delivery. | ||
| 6861 | */ | ||
| 6862 | local_irq_disable(); | ||
| 6874 | vcpu->mode = IN_GUEST_MODE; | 6863 | vcpu->mode = IN_GUEST_MODE; |
| 6875 | 6864 | ||
| 6876 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | 6865 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
| 6877 | 6866 | ||
| 6878 | /* | 6867 | /* |
| 6879 | * We should set ->mode before check ->requests, | 6868 | * 1) We should set ->mode before checking ->requests. Please see |
| 6880 | * Please see the comment in kvm_make_all_cpus_request. | 6869 | * the comment in kvm_make_all_cpus_request. |
| 6881 | * This also orders the write to mode from any reads | 6870 | * |
| 6882 | * to the page tables done while the VCPU is running. | 6871 | * 2) For APICv, we should set ->mode before checking PIR.ON. This |
| 6883 | * Please see the comment in kvm_flush_remote_tlbs. | 6872 | * pairs with the memory barrier implicit in pi_test_and_set_on |
| 6873 | * (see vmx_deliver_posted_interrupt). | ||
| 6874 | * | ||
| 6875 | * 3) This also orders the write to mode from any reads to the page | ||
| 6876 | * tables done while the VCPU is running. Please see the comment | ||
| 6877 | * in kvm_flush_remote_tlbs. | ||
| 6884 | */ | 6878 | */ |
| 6885 | smp_mb__after_srcu_read_unlock(); | 6879 | smp_mb__after_srcu_read_unlock(); |
| 6886 | 6880 | ||
| 6887 | local_irq_disable(); | 6881 | /* |
| 6882 | * This handles the case where a posted interrupt was | ||
| 6883 | * notified with kvm_vcpu_kick. | ||
| 6884 | */ | ||
| 6885 | if (kvm_lapic_enabled(vcpu)) { | ||
| 6886 | if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) | ||
| 6887 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
| 6888 | } | ||
| 6888 | 6889 | ||
| 6889 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests | 6890 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
| 6890 | || need_resched() || signal_pending(current)) { | 6891 | || need_resched() || signal_pending(current)) { |
| @@ -7023,6 +7024,9 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) | |||
| 7023 | 7024 | ||
| 7024 | static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) | 7025 | static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) |
| 7025 | { | 7026 | { |
| 7027 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) | ||
| 7028 | kvm_x86_ops->check_nested_events(vcpu, false); | ||
| 7029 | |||
| 7026 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7030 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
| 7027 | !vcpu->arch.apf.halted); | 7031 | !vcpu->arch.apf.halted); |
| 7028 | } | 7032 | } |
| @@ -7194,7 +7198,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 7194 | } else | 7198 | } else |
| 7195 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); | 7199 | WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); |
| 7196 | 7200 | ||
| 7197 | r = vcpu_run(vcpu); | 7201 | if (kvm_run->immediate_exit) |
| 7202 | r = -EINTR; | ||
| 7203 | else | ||
| 7204 | r = vcpu_run(vcpu); | ||
| 7198 | 7205 | ||
| 7199 | out: | 7206 | out: |
| 7200 | post_kvm_run_save(vcpu); | 7207 | post_kvm_run_save(vcpu); |
| @@ -8389,9 +8396,6 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) | |||
| 8389 | 8396 | ||
| 8390 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 8397 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
| 8391 | { | 8398 | { |
| 8392 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) | ||
| 8393 | kvm_x86_ops->check_nested_events(vcpu, false); | ||
| 8394 | |||
| 8395 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); | 8399 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); |
| 8396 | } | 8400 | } |
| 8397 | 8401 | ||
| @@ -8528,9 +8532,8 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
| 8528 | 8532 | ||
| 8529 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | 8533 | static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) |
| 8530 | { | 8534 | { |
| 8531 | 8535 | return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val, | |
| 8532 | return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, | 8536 | sizeof(val)); |
| 8533 | sizeof(val)); | ||
| 8534 | } | 8537 | } |
| 8535 | 8538 | ||
| 8536 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | 8539 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c index 0a54e8326a90..09b4df74291e 100644 --- a/drivers/ptp/ptp_kvm.c +++ b/drivers/ptp/ptp_kvm.c | |||
| @@ -176,12 +176,19 @@ static void __exit ptp_kvm_exit(void) | |||
| 176 | 176 | ||
| 177 | static int __init ptp_kvm_init(void) | 177 | static int __init ptp_kvm_init(void) |
| 178 | { | 178 | { |
| 179 | long ret; | ||
| 180 | |||
| 179 | clock_pair_gpa = slow_virt_to_phys(&clock_pair); | 181 | clock_pair_gpa = slow_virt_to_phys(&clock_pair); |
| 180 | hv_clock = pvclock_pvti_cpu0_va(); | 182 | hv_clock = pvclock_pvti_cpu0_va(); |
| 181 | 183 | ||
| 182 | if (!hv_clock) | 184 | if (!hv_clock) |
| 183 | return -ENODEV; | 185 | return -ENODEV; |
| 184 | 186 | ||
| 187 | ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, | ||
| 188 | KVM_CLOCK_PAIRING_WALLCLOCK); | ||
| 189 | if (ret == -KVM_ENOSYS || ret == -KVM_EOPNOTSUPP) | ||
| 190 | return -ENODEV; | ||
| 191 | |||
| 185 | kvm_ptp_clock.caps = ptp_kvm_caps; | 192 | kvm_ptp_clock.caps = ptp_kvm_caps; |
| 186 | 193 | ||
| 187 | kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); | 194 | kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cda457bcedc1..8d69d5150748 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -221,7 +221,6 @@ struct kvm_vcpu { | |||
| 221 | struct mutex mutex; | 221 | struct mutex mutex; |
| 222 | struct kvm_run *run; | 222 | struct kvm_run *run; |
| 223 | 223 | ||
| 224 | int fpu_active; | ||
| 225 | int guest_fpu_loaded, guest_xcr0_loaded; | 224 | int guest_fpu_loaded, guest_xcr0_loaded; |
| 226 | struct swait_queue_head wq; | 225 | struct swait_queue_head wq; |
| 227 | struct pid *pid; | 226 | struct pid *pid; |
| @@ -641,18 +640,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
| 641 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | 640 | int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, |
| 642 | unsigned long len); | 641 | unsigned long len); |
| 643 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); | 642 | int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); |
| 644 | int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 643 | int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
| 645 | void *data, unsigned long len); | 644 | void *data, unsigned long len); |
| 646 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, | 645 | int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, |
| 647 | int offset, int len); | 646 | int offset, int len); |
| 648 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, | 647 | int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, |
| 649 | unsigned long len); | 648 | unsigned long len); |
| 650 | int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 649 | int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
| 651 | void *data, unsigned long len); | 650 | void *data, unsigned long len); |
| 652 | int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 651 | int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
| 653 | void *data, int offset, unsigned long len); | 652 | void *data, int offset, unsigned long len); |
| 654 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 653 | int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, |
| 655 | gpa_t gpa, unsigned long len); | 654 | gpa_t gpa, unsigned long len); |
| 656 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); | 655 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); |
| 657 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); | 656 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); |
| 658 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); | 657 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7964b970b9ad..f51d5082a377 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
| @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { | |||
| 218 | struct kvm_run { | 218 | struct kvm_run { |
| 219 | /* in */ | 219 | /* in */ |
| 220 | __u8 request_interrupt_window; | 220 | __u8 request_interrupt_window; |
| 221 | __u8 padding1[7]; | 221 | __u8 immediate_exit; |
| 222 | __u8 padding1[6]; | ||
| 222 | 223 | ||
| 223 | /* out */ | 224 | /* out */ |
| 224 | __u32 exit_reason; | 225 | __u32 exit_reason; |
| @@ -881,6 +882,7 @@ struct kvm_ppc_resize_hpt { | |||
| 881 | #define KVM_CAP_SPAPR_RESIZE_HPT 133 | 882 | #define KVM_CAP_SPAPR_RESIZE_HPT 133 |
| 882 | #define KVM_CAP_PPC_MMU_RADIX 134 | 883 | #define KVM_CAP_PPC_MMU_RADIX 134 |
| 883 | #define KVM_CAP_PPC_MMU_HASH_V3 135 | 884 | #define KVM_CAP_PPC_MMU_HASH_V3 135 |
| 885 | #define KVM_CAP_IMMEDIATE_EXIT 136 | ||
| 884 | 886 | ||
| 885 | #ifdef KVM_CAP_IRQ_ROUTING | 887 | #ifdef KVM_CAP_IRQ_ROUTING |
| 886 | 888 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 482612b4e496..cc4d6e0dd2a2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -506,11 +506,6 @@ static struct kvm_memslots *kvm_alloc_memslots(void) | |||
| 506 | if (!slots) | 506 | if (!slots) |
| 507 | return NULL; | 507 | return NULL; |
| 508 | 508 | ||
| 509 | /* | ||
| 510 | * Init kvm generation close to the maximum to easily test the | ||
| 511 | * code of handling generation number wrap-around. | ||
| 512 | */ | ||
| 513 | slots->generation = -150; | ||
| 514 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) | 509 | for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) |
| 515 | slots->id_to_index[i] = slots->memslots[i].id = i; | 510 | slots->id_to_index[i] = slots->memslots[i].id = i; |
| 516 | 511 | ||
| @@ -641,9 +636,16 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
| 641 | 636 | ||
| 642 | r = -ENOMEM; | 637 | r = -ENOMEM; |
| 643 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { | 638 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { |
| 644 | kvm->memslots[i] = kvm_alloc_memslots(); | 639 | struct kvm_memslots *slots = kvm_alloc_memslots(); |
| 645 | if (!kvm->memslots[i]) | 640 | if (!slots) |
| 646 | goto out_err_no_srcu; | 641 | goto out_err_no_srcu; |
| 642 | /* | ||
| 643 | * Generations must be different for each address space. | ||
| 644 | * Init kvm generation close to the maximum to easily test the | ||
| 645 | * code of handling generation number wrap-around. | ||
| 646 | */ | ||
| 647 | slots->generation = i * 2 - 150; | ||
| 648 | rcu_assign_pointer(kvm->memslots[i], slots); | ||
| 647 | } | 649 | } |
| 648 | 650 | ||
| 649 | if (init_srcu_struct(&kvm->srcu)) | 651 | if (init_srcu_struct(&kvm->srcu)) |
| @@ -870,8 +872,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
| 870 | * Increment the new memslot generation a second time. This prevents | 872 | * Increment the new memslot generation a second time. This prevents |
| 871 | * vm exits that race with memslot updates from caching a memslot | 873 | * vm exits that race with memslot updates from caching a memslot |
| 872 | * generation that will (potentially) be valid forever. | 874 | * generation that will (potentially) be valid forever. |
| 875 | * | ||
| 876 | * Generations must be unique even across address spaces. We do not need | ||
| 877 | * a global counter for that, instead the generation space is evenly split | ||
| 878 | * across address spaces. For example, with two address spaces, address | ||
| 879 | * space 0 will use generations 0, 4, 8, ... while * address space 1 will | ||
| 880 | * use generations 2, 6, 10, 14, ... | ||
| 873 | */ | 881 | */ |
| 874 | slots->generation++; | 882 | slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1; |
| 875 | 883 | ||
| 876 | kvm_arch_memslots_updated(kvm, slots); | 884 | kvm_arch_memslots_updated(kvm, slots); |
| 877 | 885 | ||
| @@ -1094,37 +1102,31 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
| 1094 | { | 1102 | { |
| 1095 | struct kvm_memslots *slots; | 1103 | struct kvm_memslots *slots; |
| 1096 | struct kvm_memory_slot *memslot; | 1104 | struct kvm_memory_slot *memslot; |
| 1097 | int r, i, as_id, id; | 1105 | int i, as_id, id; |
| 1098 | unsigned long n; | 1106 | unsigned long n; |
| 1099 | unsigned long any = 0; | 1107 | unsigned long any = 0; |
| 1100 | 1108 | ||
| 1101 | r = -EINVAL; | ||
| 1102 | as_id = log->slot >> 16; | 1109 | as_id = log->slot >> 16; |
| 1103 | id = (u16)log->slot; | 1110 | id = (u16)log->slot; |
| 1104 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | 1111 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
| 1105 | goto out; | 1112 | return -EINVAL; |
| 1106 | 1113 | ||
| 1107 | slots = __kvm_memslots(kvm, as_id); | 1114 | slots = __kvm_memslots(kvm, as_id); |
| 1108 | memslot = id_to_memslot(slots, id); | 1115 | memslot = id_to_memslot(slots, id); |
| 1109 | r = -ENOENT; | ||
| 1110 | if (!memslot->dirty_bitmap) | 1116 | if (!memslot->dirty_bitmap) |
| 1111 | goto out; | 1117 | return -ENOENT; |
| 1112 | 1118 | ||
| 1113 | n = kvm_dirty_bitmap_bytes(memslot); | 1119 | n = kvm_dirty_bitmap_bytes(memslot); |
| 1114 | 1120 | ||
| 1115 | for (i = 0; !any && i < n/sizeof(long); ++i) | 1121 | for (i = 0; !any && i < n/sizeof(long); ++i) |
| 1116 | any = memslot->dirty_bitmap[i]; | 1122 | any = memslot->dirty_bitmap[i]; |
| 1117 | 1123 | ||
| 1118 | r = -EFAULT; | ||
| 1119 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) | 1124 | if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) |
| 1120 | goto out; | 1125 | return -EFAULT; |
| 1121 | 1126 | ||
| 1122 | if (any) | 1127 | if (any) |
| 1123 | *is_dirty = 1; | 1128 | *is_dirty = 1; |
| 1124 | 1129 | return 0; | |
| 1125 | r = 0; | ||
| 1126 | out: | ||
| 1127 | return r; | ||
| 1128 | } | 1130 | } |
| 1129 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); | 1131 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log); |
| 1130 | 1132 | ||
| @@ -1156,24 +1158,22 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
| 1156 | { | 1158 | { |
| 1157 | struct kvm_memslots *slots; | 1159 | struct kvm_memslots *slots; |
| 1158 | struct kvm_memory_slot *memslot; | 1160 | struct kvm_memory_slot *memslot; |
| 1159 | int r, i, as_id, id; | 1161 | int i, as_id, id; |
| 1160 | unsigned long n; | 1162 | unsigned long n; |
| 1161 | unsigned long *dirty_bitmap; | 1163 | unsigned long *dirty_bitmap; |
| 1162 | unsigned long *dirty_bitmap_buffer; | 1164 | unsigned long *dirty_bitmap_buffer; |
| 1163 | 1165 | ||
| 1164 | r = -EINVAL; | ||
| 1165 | as_id = log->slot >> 16; | 1166 | as_id = log->slot >> 16; |
| 1166 | id = (u16)log->slot; | 1167 | id = (u16)log->slot; |
| 1167 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) | 1168 | if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS) |
| 1168 | goto out; | 1169 | return -EINVAL; |
| 1169 | 1170 | ||
| 1170 | slots = __kvm_memslots(kvm, as_id); | 1171 | slots = __kvm_memslots(kvm, as_id); |
| 1171 | memslot = id_to_memslot(slots, id); | 1172 | memslot = id_to_memslot(slots, id); |
| 1172 | 1173 | ||
| 1173 | dirty_bitmap = memslot->dirty_bitmap; | 1174 | dirty_bitmap = memslot->dirty_bitmap; |
| 1174 | r = -ENOENT; | ||
| 1175 | if (!dirty_bitmap) | 1175 | if (!dirty_bitmap) |
| 1176 | goto out; | 1176 | return -ENOENT; |
| 1177 | 1177 | ||
| 1178 | n = kvm_dirty_bitmap_bytes(memslot); | 1178 | n = kvm_dirty_bitmap_bytes(memslot); |
| 1179 | 1179 | ||
| @@ -1202,14 +1202,9 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, | |||
| 1202 | } | 1202 | } |
| 1203 | 1203 | ||
| 1204 | spin_unlock(&kvm->mmu_lock); | 1204 | spin_unlock(&kvm->mmu_lock); |
| 1205 | |||
| 1206 | r = -EFAULT; | ||
| 1207 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) | 1205 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) |
| 1208 | goto out; | 1206 | return -EFAULT; |
| 1209 | 1207 | return 0; | |
| 1210 | r = 0; | ||
| 1211 | out: | ||
| 1212 | return r; | ||
| 1213 | } | 1208 | } |
| 1214 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); | 1209 | EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); |
| 1215 | #endif | 1210 | #endif |
| @@ -1937,10 +1932,10 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, | |||
| 1937 | } | 1932 | } |
| 1938 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); | 1933 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest); |
| 1939 | 1934 | ||
| 1940 | int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1935 | static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, |
| 1941 | gpa_t gpa, unsigned long len) | 1936 | struct gfn_to_hva_cache *ghc, |
| 1937 | gpa_t gpa, unsigned long len) | ||
| 1942 | { | 1938 | { |
| 1943 | struct kvm_memslots *slots = kvm_memslots(kvm); | ||
| 1944 | int offset = offset_in_page(gpa); | 1939 | int offset = offset_in_page(gpa); |
| 1945 | gfn_t start_gfn = gpa >> PAGE_SHIFT; | 1940 | gfn_t start_gfn = gpa >> PAGE_SHIFT; |
| 1946 | gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; | 1941 | gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT; |
| @@ -1950,7 +1945,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
| 1950 | ghc->gpa = gpa; | 1945 | ghc->gpa = gpa; |
| 1951 | ghc->generation = slots->generation; | 1946 | ghc->generation = slots->generation; |
| 1952 | ghc->len = len; | 1947 | ghc->len = len; |
| 1953 | ghc->memslot = gfn_to_memslot(kvm, start_gfn); | 1948 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
| 1954 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); | 1949 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); |
| 1955 | if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { | 1950 | if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { |
| 1956 | ghc->hva += offset; | 1951 | ghc->hva += offset; |
| @@ -1960,7 +1955,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
| 1960 | * verify that the entire region is valid here. | 1955 | * verify that the entire region is valid here. |
| 1961 | */ | 1956 | */ |
| 1962 | while (start_gfn <= end_gfn) { | 1957 | while (start_gfn <= end_gfn) { |
| 1963 | ghc->memslot = gfn_to_memslot(kvm, start_gfn); | 1958 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
| 1964 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, | 1959 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, |
| 1965 | &nr_pages_avail); | 1960 | &nr_pages_avail); |
| 1966 | if (kvm_is_error_hva(ghc->hva)) | 1961 | if (kvm_is_error_hva(ghc->hva)) |
| @@ -1972,22 +1967,29 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
| 1972 | } | 1967 | } |
| 1973 | return 0; | 1968 | return 0; |
| 1974 | } | 1969 | } |
| 1975 | EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); | ||
| 1976 | 1970 | ||
| 1977 | int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 1971 | int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
| 1978 | void *data, int offset, unsigned long len) | 1972 | gpa_t gpa, unsigned long len) |
| 1979 | { | 1973 | { |
| 1980 | struct kvm_memslots *slots = kvm_memslots(kvm); | 1974 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); |
| 1975 | return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); | ||
| 1976 | } | ||
| 1977 | EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); | ||
| 1978 | |||
| 1979 | int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, | ||
| 1980 | void *data, int offset, unsigned long len) | ||
| 1981 | { | ||
| 1982 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); | ||
| 1981 | int r; | 1983 | int r; |
| 1982 | gpa_t gpa = ghc->gpa + offset; | 1984 | gpa_t gpa = ghc->gpa + offset; |
| 1983 | 1985 | ||
| 1984 | BUG_ON(len + offset > ghc->len); | 1986 | BUG_ON(len + offset > ghc->len); |
| 1985 | 1987 | ||
| 1986 | if (slots->generation != ghc->generation) | 1988 | if (slots->generation != ghc->generation) |
| 1987 | kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); | 1989 | __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); |
| 1988 | 1990 | ||
| 1989 | if (unlikely(!ghc->memslot)) | 1991 | if (unlikely(!ghc->memslot)) |
| 1990 | return kvm_write_guest(kvm, gpa, data, len); | 1992 | return kvm_vcpu_write_guest(vcpu, gpa, data, len); |
| 1991 | 1993 | ||
| 1992 | if (kvm_is_error_hva(ghc->hva)) | 1994 | if (kvm_is_error_hva(ghc->hva)) |
| 1993 | return -EFAULT; | 1995 | return -EFAULT; |
| @@ -1999,28 +2001,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
| 1999 | 2001 | ||
| 2000 | return 0; | 2002 | return 0; |
| 2001 | } | 2003 | } |
| 2002 | EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); | 2004 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); |
| 2003 | 2005 | ||
| 2004 | int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 2006 | int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
| 2005 | void *data, unsigned long len) | 2007 | void *data, unsigned long len) |
| 2006 | { | 2008 | { |
| 2007 | return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); | 2009 | return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); |
| 2008 | } | 2010 | } |
| 2009 | EXPORT_SYMBOL_GPL(kvm_write_guest_cached); | 2011 | EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); |
| 2010 | 2012 | ||
| 2011 | int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | 2013 | int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, |
| 2012 | void *data, unsigned long len) | 2014 | void *data, unsigned long len) |
| 2013 | { | 2015 | { |
| 2014 | struct kvm_memslots *slots = kvm_memslots(kvm); | 2016 | struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); |
| 2015 | int r; | 2017 | int r; |
| 2016 | 2018 | ||
| 2017 | BUG_ON(len > ghc->len); | 2019 | BUG_ON(len > ghc->len); |
| 2018 | 2020 | ||
| 2019 | if (slots->generation != ghc->generation) | 2021 | if (slots->generation != ghc->generation) |
| 2020 | kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); | 2022 | __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); |
| 2021 | 2023 | ||
| 2022 | if (unlikely(!ghc->memslot)) | 2024 | if (unlikely(!ghc->memslot)) |
| 2023 | return kvm_read_guest(kvm, ghc->gpa, data, len); | 2025 | return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); |
| 2024 | 2026 | ||
| 2025 | if (kvm_is_error_hva(ghc->hva)) | 2027 | if (kvm_is_error_hva(ghc->hva)) |
| 2026 | return -EFAULT; | 2028 | return -EFAULT; |
| @@ -2031,7 +2033,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, | |||
| 2031 | 2033 | ||
| 2032 | return 0; | 2034 | return 0; |
| 2033 | } | 2035 | } |
| 2034 | EXPORT_SYMBOL_GPL(kvm_read_guest_cached); | 2036 | EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); |
| 2035 | 2037 | ||
| 2036 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) | 2038 | int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) |
| 2037 | { | 2039 | { |
| @@ -3133,10 +3135,9 @@ static long kvm_vm_compat_ioctl(struct file *filp, | |||
| 3133 | struct compat_kvm_dirty_log compat_log; | 3135 | struct compat_kvm_dirty_log compat_log; |
| 3134 | struct kvm_dirty_log log; | 3136 | struct kvm_dirty_log log; |
| 3135 | 3137 | ||
| 3136 | r = -EFAULT; | ||
| 3137 | if (copy_from_user(&compat_log, (void __user *)arg, | 3138 | if (copy_from_user(&compat_log, (void __user *)arg, |
| 3138 | sizeof(compat_log))) | 3139 | sizeof(compat_log))) |
| 3139 | goto out; | 3140 | return -EFAULT; |
| 3140 | log.slot = compat_log.slot; | 3141 | log.slot = compat_log.slot; |
| 3141 | log.padding1 = compat_log.padding1; | 3142 | log.padding1 = compat_log.padding1; |
| 3142 | log.padding2 = compat_log.padding2; | 3143 | log.padding2 = compat_log.padding2; |
| @@ -3148,8 +3149,6 @@ static long kvm_vm_compat_ioctl(struct file *filp, | |||
| 3148 | default: | 3149 | default: |
| 3149 | r = kvm_vm_ioctl(filp, ioctl, arg); | 3150 | r = kvm_vm_ioctl(filp, ioctl, arg); |
| 3150 | } | 3151 | } |
| 3151 | |||
| 3152 | out: | ||
| 3153 | return r; | 3152 | return r; |
| 3154 | } | 3153 | } |
| 3155 | #endif | 3154 | #endif |
