diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 17:47:31 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 17:47:31 -0400 |
commit | 01227a889ed56ae53aeebb9f93be9d54dd8b2de8 (patch) | |
tree | d5eba9359a9827e84d4112b84d48c54df5c5acde /arch/x86 | |
parent | 9e6879460c8edb0cd3c24c09b83d06541b5af0dc (diff) | |
parent | db6ae6158186a17165ef990bda2895ae7594b039 (diff) |
Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Gleb Natapov:
"Highlights of the updates are:
general:
- new emulated device API
- legacy device assignment is now optional
- irqfd interface is more generic and can be shared between arches
x86:
- VMCS shadow support and other nested VMX improvements
- APIC virtualization and Posted Interrupt hardware support
- Optimize mmio spte zapping
ppc:
- BookE: in-kernel MPIC emulation with irqfd support
- Book3S: in-kernel XICS emulation (incomplete)
- Book3S: HV: migration fixes
- BookE: more debug support preparation
- BookE: e6500 support
ARM:
- reworking of Hyp idmaps
s390:
- ioeventfd for virtio-ccw
And many other bug fixes, cleanups and improvements"
* tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
kvm: Add compat_ioctl for device control API
KVM: x86: Account for failing enable_irq_window for NMI window request
KVM: PPC: Book3S: Add API for in-kernel XICS emulation
kvm/ppc/mpic: fix missing unlock in set_base_addr()
kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write
kvm/ppc/mpic: remove users
kvm/ppc/mpic: fix mmio region lists when multiple guests used
kvm/ppc/mpic: remove default routes from documentation
kvm: KVM_CAP_IOMMU only available with device assignment
ARM: KVM: iterate over all CPUs for CPU compatibility check
KVM: ARM: Fix spelling in error message
ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
KVM: ARM: Fix API documentation for ONE_REG encoding
ARM: KVM: promote vfp_host pointer to generic host cpu context
ARM: KVM: add architecture specific hook for capabilities
ARM: KVM: perform HYP initilization for hotplugged CPUs
ARM: KVM: switch to a dual-step HYP init code
ARM: KVM: rework HYP page table freeing
ARM: KVM: enforce maximum size for identity mapped code
ARM: KVM: move to a KVM provided HYP idmap
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/entry_arch.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/hw_irq.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 26 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 18 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 1 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 2 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/vmx.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 5 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/kvmclock.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 14 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 31 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 189 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 22 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 108 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 11 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 40 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 1077 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 243 |
26 files changed, 1344 insertions, 521 deletions
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c69..9bd4ecac72be 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | |||
19 | 19 | ||
20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
21 | 21 | ||
22 | #ifdef CONFIG_HAVE_KVM | ||
23 | BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) | ||
24 | #endif | ||
25 | |||
22 | /* | 26 | /* |
23 | * every pentium local APIC has two 'local interrupts', with a | 27 | * every pentium local APIC has two 'local interrupts', with a |
24 | * soft-definable vector attached to both interrupts, one of | 28 | * soft-definable vector attached to both interrupts, one of |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f74..ab0ae1aa6d0a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -12,6 +12,9 @@ typedef struct { | |||
12 | unsigned int irq_spurious_count; | 12 | unsigned int irq_spurious_count; |
13 | unsigned int icr_read_retry_count; | 13 | unsigned int icr_read_retry_count; |
14 | #endif | 14 | #endif |
15 | #ifdef CONFIG_HAVE_KVM | ||
16 | unsigned int kvm_posted_intr_ipis; | ||
17 | #endif | ||
15 | unsigned int x86_platform_ipis; /* arch dependent */ | 18 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 19 | unsigned int apic_perf_irqs; |
17 | unsigned int apic_irq_work_irqs; | 20 | unsigned int apic_irq_work_irqs; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5a..1da97efad08a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -28,6 +28,7 @@ | |||
28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void x86_platform_ipi(void); | 30 | extern void x86_platform_ipi(void); |
31 | extern void kvm_posted_intr_ipi(void); | ||
31 | extern void error_interrupt(void); | 32 | extern void error_interrupt(void); |
32 | extern void irq_work_interrupt(void); | 33 | extern void irq_work_interrupt(void); |
33 | 34 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86c..5702d7e3111d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -102,6 +102,11 @@ | |||
102 | */ | 102 | */ |
103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 | 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 |
104 | 104 | ||
105 | /* Vector for KVM to deliver posted interrupt IPI */ | ||
106 | #ifdef CONFIG_HAVE_KVM | ||
107 | #define POSTED_INTR_VECTOR 0xf2 | ||
108 | #endif | ||
109 | |||
105 | /* | 110 | /* |
106 | * IRQ work vector: | 111 | * IRQ work vector: |
107 | */ | 112 | */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4979778cc7fb..3741c653767c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/msr-index.h> | 31 | #include <asm/msr-index.h> |
32 | #include <asm/asm.h> | 32 | #include <asm/asm.h> |
33 | 33 | ||
34 | #define KVM_MAX_VCPUS 254 | 34 | #define KVM_MAX_VCPUS 255 |
35 | #define KVM_SOFT_MAX_VCPUS 160 | 35 | #define KVM_SOFT_MAX_VCPUS 160 |
36 | #define KVM_USER_MEM_SLOTS 125 | 36 | #define KVM_USER_MEM_SLOTS 125 |
37 | /* memory slots that are not exposed to userspace */ | 37 | /* memory slots that are not exposed to userspace */ |
@@ -43,6 +43,8 @@ | |||
43 | #define KVM_PIO_PAGE_OFFSET 1 | 43 | #define KVM_PIO_PAGE_OFFSET 1 |
44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
45 | 45 | ||
46 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
47 | |||
46 | #define CR0_RESERVED_BITS \ | 48 | #define CR0_RESERVED_BITS \ |
47 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | 49 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
48 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | 50 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
@@ -94,9 +96,6 @@ | |||
94 | 96 | ||
95 | #define ASYNC_PF_PER_VCPU 64 | 97 | #define ASYNC_PF_PER_VCPU 64 |
96 | 98 | ||
97 | extern raw_spinlock_t kvm_lock; | ||
98 | extern struct list_head vm_list; | ||
99 | |||
100 | struct kvm_vcpu; | 99 | struct kvm_vcpu; |
101 | struct kvm; | 100 | struct kvm; |
102 | struct kvm_async_pf; | 101 | struct kvm_async_pf; |
@@ -230,6 +229,7 @@ struct kvm_mmu_page { | |||
230 | #endif | 229 | #endif |
231 | 230 | ||
232 | int write_flooding_count; | 231 | int write_flooding_count; |
232 | bool mmio_cached; | ||
233 | }; | 233 | }; |
234 | 234 | ||
235 | struct kvm_pio_request { | 235 | struct kvm_pio_request { |
@@ -345,7 +345,6 @@ struct kvm_vcpu_arch { | |||
345 | unsigned long apic_attention; | 345 | unsigned long apic_attention; |
346 | int32_t apic_arb_prio; | 346 | int32_t apic_arb_prio; |
347 | int mp_state; | 347 | int mp_state; |
348 | int sipi_vector; | ||
349 | u64 ia32_misc_enable_msr; | 348 | u64 ia32_misc_enable_msr; |
350 | bool tpr_access_reporting; | 349 | bool tpr_access_reporting; |
351 | 350 | ||
@@ -643,7 +642,7 @@ struct kvm_x86_ops { | |||
643 | /* Create, but do not attach this VCPU */ | 642 | /* Create, but do not attach this VCPU */ |
644 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 643 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
645 | void (*vcpu_free)(struct kvm_vcpu *vcpu); | 644 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
646 | int (*vcpu_reset)(struct kvm_vcpu *vcpu); | 645 | void (*vcpu_reset)(struct kvm_vcpu *vcpu); |
647 | 646 | ||
648 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); | 647 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); |
649 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 648 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
@@ -696,14 +695,16 @@ struct kvm_x86_ops { | |||
696 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 695 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
697 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 696 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
698 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | 697 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); |
699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 698 | int (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 699 | int (*enable_irq_window)(struct kvm_vcpu *vcpu); |
701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 700 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
702 | int (*vm_has_apicv)(struct kvm *kvm); | 701 | int (*vm_has_apicv)(struct kvm *kvm); |
703 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 702 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
704 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 703 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
705 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 704 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
706 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 705 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
706 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | ||
707 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | ||
707 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 708 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
708 | int (*get_tdp_level)(void); | 709 | int (*get_tdp_level)(void); |
709 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 710 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
@@ -730,6 +731,7 @@ struct kvm_x86_ops { | |||
730 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 731 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
731 | struct x86_instruction_info *info, | 732 | struct x86_instruction_info *info, |
732 | enum x86_intercept_stage stage); | 733 | enum x86_intercept_stage stage); |
734 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); | ||
733 | }; | 735 | }; |
734 | 736 | ||
735 | struct kvm_arch_async_pf { | 737 | struct kvm_arch_async_pf { |
@@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
767 | struct kvm_memory_slot *slot, | 769 | struct kvm_memory_slot *slot, |
768 | gfn_t gfn_offset, unsigned long mask); | 770 | gfn_t gfn_offset, unsigned long mask); |
769 | void kvm_mmu_zap_all(struct kvm *kvm); | 771 | void kvm_mmu_zap_all(struct kvm *kvm); |
772 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); | ||
770 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 773 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
771 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 774 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
772 | 775 | ||
@@ -797,6 +800,7 @@ enum emulation_result { | |||
797 | #define EMULTYPE_TRAP_UD (1 << 1) | 800 | #define EMULTYPE_TRAP_UD (1 << 1) |
798 | #define EMULTYPE_SKIP (1 << 2) | 801 | #define EMULTYPE_SKIP (1 << 2) |
799 | #define EMULTYPE_RETRY (1 << 3) | 802 | #define EMULTYPE_RETRY (1 << 3) |
803 | #define EMULTYPE_NO_REEXECUTE (1 << 4) | ||
800 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 804 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
801 | int emulation_type, void *insn, int insn_len); | 805 | int emulation_type, void *insn, int insn_len); |
802 | 806 | ||
@@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
807 | } | 811 | } |
808 | 812 | ||
809 | void kvm_enable_efer_bits(u64); | 813 | void kvm_enable_efer_bits(u64); |
814 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); | ||
810 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 815 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
811 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); | 816 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); |
812 | 817 | ||
@@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | |||
819 | 824 | ||
820 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 825 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
821 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 826 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
827 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); | ||
822 | 828 | ||
823 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, | 829 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
824 | int reason, bool has_error_code, u32 error_code); | 830 | int reason, bool has_error_code, u32 error_code); |
@@ -973,7 +979,6 @@ enum { | |||
973 | * Trap the fault and ignore the instruction if that happens. | 979 | * Trap the fault and ignore the instruction if that happens. |
974 | */ | 980 | */ |
975 | asmlinkage void kvm_spurious_fault(void); | 981 | asmlinkage void kvm_spurious_fault(void); |
976 | extern bool kvm_rebooting; | ||
977 | 982 | ||
978 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ | 983 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ |
979 | "666: " insn "\n\t" \ | 984 | "666: " insn "\n\t" \ |
@@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | |||
1002 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1007 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
1003 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1008 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
1004 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1009 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
1010 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
1005 | 1011 | ||
1006 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1012 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1007 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1013 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
@@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); | |||
1027 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | 1033 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); |
1028 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | 1034 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); |
1029 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 1035 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
1030 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 1036 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
1031 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | 1037 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); |
1032 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | 1038 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); |
1033 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | 1039 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e398..f3e01a2cbaa1 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -65,11 +65,16 @@ | |||
65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | 65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 |
66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
68 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | ||
68 | 69 | ||
69 | 70 | ||
70 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 71 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
71 | #define PIN_BASED_NMI_EXITING 0x00000008 | 72 | #define PIN_BASED_NMI_EXITING 0x00000008 |
72 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | 73 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 |
74 | #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 | ||
75 | #define PIN_BASED_POSTED_INTR 0x00000080 | ||
76 | |||
77 | #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 | ||
73 | 78 | ||
74 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 | 79 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 |
75 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 80 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
@@ -81,6 +86,8 @@ | |||
81 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | 86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 |
82 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | 87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 |
83 | 88 | ||
89 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff | ||
90 | |||
84 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 | 91 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 |
85 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 92 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
86 | #define VM_ENTRY_SMM 0x00000400 | 93 | #define VM_ENTRY_SMM 0x00000400 |
@@ -89,9 +96,15 @@ | |||
89 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | 96 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 |
90 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | 97 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 |
91 | 98 | ||
99 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff | ||
100 | |||
101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | ||
102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | ||
103 | |||
92 | /* VMCS Encodings */ | 104 | /* VMCS Encodings */ |
93 | enum vmcs_field { | 105 | enum vmcs_field { |
94 | VIRTUAL_PROCESSOR_ID = 0x00000000, | 106 | VIRTUAL_PROCESSOR_ID = 0x00000000, |
107 | POSTED_INTR_NV = 0x00000002, | ||
95 | GUEST_ES_SELECTOR = 0x00000800, | 108 | GUEST_ES_SELECTOR = 0x00000800, |
96 | GUEST_CS_SELECTOR = 0x00000802, | 109 | GUEST_CS_SELECTOR = 0x00000802, |
97 | GUEST_SS_SELECTOR = 0x00000804, | 110 | GUEST_SS_SELECTOR = 0x00000804, |
@@ -126,6 +139,8 @@ enum vmcs_field { | |||
126 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 139 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
127 | APIC_ACCESS_ADDR = 0x00002014, | 140 | APIC_ACCESS_ADDR = 0x00002014, |
128 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 141 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
142 | POSTED_INTR_DESC_ADDR = 0x00002016, | ||
143 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | ||
129 | EPT_POINTER = 0x0000201a, | 144 | EPT_POINTER = 0x0000201a, |
130 | EPT_POINTER_HIGH = 0x0000201b, | 145 | EPT_POINTER_HIGH = 0x0000201b, |
131 | EOI_EXIT_BITMAP0 = 0x0000201c, | 146 | EOI_EXIT_BITMAP0 = 0x0000201c, |
@@ -136,6 +151,8 @@ enum vmcs_field { | |||
136 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | 151 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, |
137 | EOI_EXIT_BITMAP3 = 0x00002022, | 152 | EOI_EXIT_BITMAP3 = 0x00002022, |
138 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | 153 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, |
154 | VMREAD_BITMAP = 0x00002026, | ||
155 | VMWRITE_BITMAP = 0x00002028, | ||
139 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 156 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
140 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 157 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
141 | VMCS_LINK_POINTER = 0x00002800, | 158 | VMCS_LINK_POINTER = 0x00002800, |
@@ -209,6 +226,7 @@ enum vmcs_field { | |||
209 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, | 226 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, |
210 | GUEST_ACTIVITY_STATE = 0X00004826, | 227 | GUEST_ACTIVITY_STATE = 0X00004826, |
211 | GUEST_SYSENTER_CS = 0x0000482A, | 228 | GUEST_SYSENTER_CS = 0x0000482A, |
229 | VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, | ||
212 | HOST_IA32_SYSENTER_CS = 0x00004c00, | 230 | HOST_IA32_SYSENTER_CS = 0x00004c00, |
213 | CR0_GUEST_HOST_MASK = 0x00006000, | 231 | CR0_GUEST_HOST_MASK = 0x00006000, |
214 | CR4_GUEST_HOST_MASK = 0x00006002, | 232 | CR4_GUEST_HOST_MASK = 0x00006002, |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a65ec29e6ffb..5d9a3033b3d7 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -29,7 +29,6 @@ | |||
29 | #define __KVM_HAVE_PIT | 29 | #define __KVM_HAVE_PIT |
30 | #define __KVM_HAVE_IOAPIC | 30 | #define __KVM_HAVE_IOAPIC |
31 | #define __KVM_HAVE_IRQ_LINE | 31 | #define __KVM_HAVE_IRQ_LINE |
32 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | ||
33 | #define __KVM_HAVE_MSI | 32 | #define __KVM_HAVE_MSI |
34 | #define __KVM_HAVE_USER_NMI | 33 | #define __KVM_HAVE_USER_NMI |
35 | #define __KVM_HAVE_GUEST_DEBUG | 34 | #define __KVM_HAVE_GUEST_DEBUG |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index b5757885d7a4..b3a4866661c5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -528,6 +528,8 @@ | |||
528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | 528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU |
529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | 529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU |
530 | 530 | ||
531 | /* MSR_IA32_VMX_MISC bits */ | ||
532 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | ||
531 | /* AMD-V MSRs */ | 533 | /* AMD-V MSRs */ |
532 | 534 | ||
533 | #define MSR_VM_CR 0xc0010114 | 535 | #define MSR_VM_CR 0xc0010114 |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee68..d651082c7cf7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | ||
68 | #define EXIT_REASON_WBINVD 54 | 69 | #define EXIT_REASON_WBINVD 54 |
69 | #define EXIT_REASON_XSETBV 55 | 70 | #define EXIT_REASON_XSETBV 55 |
70 | #define EXIT_REASON_APIC_WRITE 56 | 71 | #define EXIT_REASON_APIC_WRITE 56 |
@@ -110,7 +111,7 @@ | |||
110 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
111 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
112 | { EXIT_REASON_INVD, "INVD" }, \ | 113 | { EXIT_REASON_INVD, "INVD" }, \ |
113 | { EXIT_REASON_INVPCID, "INVPCID" } | 114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
114 | 115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | |
115 | 116 | ||
116 | #endif /* _UAPIVMX_H */ | 117 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca790..727208941030 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1167 | x86_platform_ipi smp_x86_platform_ipi | 1167 | x86_platform_ipi smp_x86_platform_ipi |
1168 | 1168 | ||
1169 | #ifdef CONFIG_HAVE_KVM | ||
1170 | apicinterrupt POSTED_INTR_VECTOR \ | ||
1171 | kvm_posted_intr_ipi smp_kvm_posted_intr_ipi | ||
1172 | #endif | ||
1173 | |||
1169 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1174 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1170 | threshold_interrupt smp_threshold_interrupt | 1175 | threshold_interrupt smp_threshold_interrupt |
1171 | apicinterrupt THERMAL_APIC_VECTOR \ | 1176 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 84b778962c66..ac0631d8996f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -224,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
224 | set_irq_regs(old_regs); | 224 | set_irq_regs(old_regs); |
225 | } | 225 | } |
226 | 226 | ||
227 | #ifdef CONFIG_HAVE_KVM | ||
228 | /* | ||
229 | * Handler for POSTED_INTERRUPT_VECTOR. | ||
230 | */ | ||
231 | void smp_kvm_posted_intr_ipi(struct pt_regs *regs) | ||
232 | { | ||
233 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
234 | |||
235 | ack_APIC_irq(); | ||
236 | |||
237 | irq_enter(); | ||
238 | |||
239 | exit_idle(); | ||
240 | |||
241 | inc_irq_stat(kvm_posted_intr_ipis); | ||
242 | |||
243 | irq_exit(); | ||
244 | |||
245 | set_irq_regs(old_regs); | ||
246 | } | ||
247 | #endif | ||
248 | |||
227 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 249 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
228 | 250 | ||
229 | #ifdef CONFIG_HOTPLUG_CPU | 251 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b3..a2a1fbc594ff 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void) | |||
172 | 172 | ||
173 | /* IPI for X86 platform specific use */ | 173 | /* IPI for X86 platform specific use */ |
174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); | 174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
175 | #ifdef CONFIG_HAVE_KVM | ||
176 | /* IPI for KVM to deliver posted interrupt */ | ||
177 | alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); | ||
178 | #endif | ||
175 | 179 | ||
176 | /* IPI vectors for APIC spurious and error interrupts */ | 180 | /* IPI vectors for APIC spurious and error interrupts */ |
177 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 181 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0732f0089a3d..d2c381280e3c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) | |||
160 | { | 160 | { |
161 | int cpu = smp_processor_id(); | 161 | int cpu = smp_processor_id(); |
162 | int low, high, ret; | 162 | int low, high, ret; |
163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | 163 | struct pvclock_vcpu_time_info *src; |
164 | |||
165 | if (!hv_clock) | ||
166 | return 0; | ||
164 | 167 | ||
168 | src = &hv_clock[cpu].pvti; | ||
165 | low = (int)slow_virt_to_phys(src) | 1; | 169 | low = (int)slow_virt_to_phys(src) | 1; |
166 | high = ((u64)slow_virt_to_phys(src) >> 32); | 170 | high = ((u64)slow_virt_to_phys(src) >> 32); |
167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 171 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
@@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) | |||
276 | struct pvclock_vcpu_time_info *vcpu_time; | 280 | struct pvclock_vcpu_time_info *vcpu_time; |
277 | unsigned int size; | 281 | unsigned int size; |
278 | 282 | ||
283 | if (!hv_clock) | ||
284 | return 0; | ||
285 | |||
279 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | 286 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); |
280 | 287 | ||
281 | preempt_disable(); | 288 | preempt_disable(); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f00059805..a47a3e54b964 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -21,14 +21,13 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | depends on HIGH_RES_TIMERS | 23 | depends on HIGH_RES_TIMERS |
24 | # for device assignment: | ||
25 | depends on PCI | ||
26 | # for TASKSTATS/TASK_DELAY_ACCT: | 24 | # for TASKSTATS/TASK_DELAY_ACCT: |
27 | depends on NET | 25 | depends on NET |
28 | select PREEMPT_NOTIFIERS | 26 | select PREEMPT_NOTIFIERS |
29 | select MMU_NOTIFIER | 27 | select MMU_NOTIFIER |
30 | select ANON_INODES | 28 | select ANON_INODES |
31 | select HAVE_KVM_IRQCHIP | 29 | select HAVE_KVM_IRQCHIP |
30 | select HAVE_KVM_IRQ_ROUTING | ||
32 | select HAVE_KVM_EVENTFD | 31 | select HAVE_KVM_EVENTFD |
33 | select KVM_APIC_ARCHITECTURE | 32 | select KVM_APIC_ARCHITECTURE |
34 | select KVM_ASYNC_PF | 33 | select KVM_ASYNC_PF |
@@ -82,6 +81,17 @@ config KVM_MMU_AUDIT | |||
82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 81 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
83 | audit KVM MMU at runtime. | 82 | audit KVM MMU at runtime. |
84 | 83 | ||
84 | config KVM_DEVICE_ASSIGNMENT | ||
85 | bool "KVM legacy PCI device assignment support" | ||
86 | depends on KVM && PCI && IOMMU_API | ||
87 | default y | ||
88 | ---help--- | ||
89 | Provide support for legacy PCI device assignment through KVM. The | ||
90 | kernel now also supports a full featured userspace device driver | ||
91 | framework through VFIO, which supersedes much of this support. | ||
92 | |||
93 | If unsure, say Y. | ||
94 | |||
85 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 95 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
86 | # the virtualization menu. | 96 | # the virtualization menu. |
87 | source drivers/vhost/Kconfig | 97 | source drivers/vhost/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d30401c5cb..d609e1d84048 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I. | |||
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | 10 | irqchip.o) |
11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ |
12 | assigned-dev.o iommu.o) | ||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 13 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
13 | 14 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 15 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..8e517bba6a7c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -132,8 +132,9 @@ | |||
132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
133 | #define No64 (1<<28) | 133 | #define No64 (1<<28) |
134 | #define PageTable (1 << 29) /* instruction used to write page table */ | 134 | #define PageTable (1 << 29) /* instruction used to write page table */ |
135 | #define NotImpl (1 << 30) /* instruction is not implemented */ | ||
135 | /* Source 2 operand type */ | 136 | /* Source 2 operand type */ |
136 | #define Src2Shift (30) | 137 | #define Src2Shift (31) |
137 | #define Src2None (OpNone << Src2Shift) | 138 | #define Src2None (OpNone << Src2Shift) |
138 | #define Src2CL (OpCL << Src2Shift) | 139 | #define Src2CL (OpCL << Src2Shift) |
139 | #define Src2ImmByte (OpImmByte << Src2Shift) | 140 | #define Src2ImmByte (OpImmByte << Src2Shift) |
@@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1578 | 1579 | ||
1579 | memset(&seg_desc, 0, sizeof seg_desc); | 1580 | memset(&seg_desc, 0, sizeof seg_desc); |
1580 | 1581 | ||
1581 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | 1582 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1582 | || ctxt->mode == X86EMUL_MODE_REAL) { | 1583 | /* set real mode segment descriptor (keep limit etc. for |
1583 | /* set real mode segment descriptor */ | 1584 | * unreal mode) */ |
1584 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); | 1585 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); |
1585 | set_desc_base(&seg_desc, selector << 4); | 1586 | set_desc_base(&seg_desc, selector << 4); |
1586 | goto load; | 1587 | goto load; |
1588 | } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { | ||
1589 | /* VM86 needs a clean new segment descriptor */ | ||
1590 | set_desc_base(&seg_desc, selector << 4); | ||
1591 | set_desc_limit(&seg_desc, 0xffff); | ||
1592 | seg_desc.type = 3; | ||
1593 | seg_desc.p = 1; | ||
1594 | seg_desc.s = 1; | ||
1595 | seg_desc.dpl = 3; | ||
1596 | goto load; | ||
1587 | } | 1597 | } |
1588 | 1598 | ||
1589 | rpl = selector & 3; | 1599 | rpl = selector & 3; |
@@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3615 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | 3625 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } |
3616 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | 3626 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ |
3617 | .check_perm = (_p) } | 3627 | .check_perm = (_p) } |
3618 | #define N D(0) | 3628 | #define N D(NotImpl) |
3619 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3629 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
3620 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3630 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
3621 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3631 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
@@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { | |||
3713 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 3723 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
3714 | I(SrcMem | Stack, em_grp45), | 3724 | I(SrcMem | Stack, em_grp45), |
3715 | I(SrcMemFAddr | ImplicitOps, em_grp45), | 3725 | I(SrcMemFAddr | ImplicitOps, em_grp45), |
3716 | I(SrcMem | Stack, em_grp45), N, | 3726 | I(SrcMem | Stack, em_grp45), D(Undefined), |
3717 | }; | 3727 | }; |
3718 | 3728 | ||
3719 | static const struct opcode group6[] = { | 3729 | static const struct opcode group6[] = { |
@@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4162 | break; | 4172 | break; |
4163 | case OpMem8: | 4173 | case OpMem8: |
4164 | ctxt->memop.bytes = 1; | 4174 | ctxt->memop.bytes = 1; |
4175 | if (ctxt->memop.type == OP_REG) { | ||
4176 | ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); | ||
4177 | fetch_register_operand(&ctxt->memop); | ||
4178 | } | ||
4165 | goto mem_common; | 4179 | goto mem_common; |
4166 | case OpMem16: | 4180 | case OpMem16: |
4167 | ctxt->memop.bytes = 2; | 4181 | ctxt->memop.bytes = 2; |
@@ -4373,7 +4387,7 @@ done_prefixes: | |||
4373 | ctxt->intercept = opcode.intercept; | 4387 | ctxt->intercept = opcode.intercept; |
4374 | 4388 | ||
4375 | /* Unrecognised? */ | 4389 | /* Unrecognised? */ |
4376 | if (ctxt->d == 0 || (ctxt->d & Undefined)) | 4390 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) |
4377 | return EMULATION_FAILED; | 4391 | return EMULATION_FAILED; |
4378 | 4392 | ||
4379 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 4393 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
@@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4511 | 4525 | ||
4512 | ctxt->mem_read.pos = 0; | 4526 | ctxt->mem_read.pos = 0; |
4513 | 4527 | ||
4514 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { | 4528 | if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || |
4529 | (ctxt->d & Undefined)) { | ||
4515 | rc = emulate_ud(ctxt); | 4530 | rc = emulate_ud(ctxt); |
4516 | goto done; | 4531 | goto done; |
4517 | } | 4532 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9bb..412a5aa0ef94 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) | |||
290 | } | 290 | } |
291 | spin_unlock(&ps->inject_lock); | 291 | spin_unlock(&ps->inject_lock); |
292 | if (inject) { | 292 | if (inject) { |
293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); |
294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); |
295 | 295 | ||
296 | /* | 296 | /* |
297 | * Provides NMI watchdog support via Virtual Wire mode. | 297 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f77df1c5de6e..e1adbb4aca75 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) | |||
94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
95 | } | 95 | } |
96 | 96 | ||
97 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) | ||
98 | { | ||
99 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
100 | |||
101 | return apic_test_vector(vector, apic->regs + APIC_ISR) || | ||
102 | apic_test_vector(vector, apic->regs + APIC_IRR); | ||
103 | } | ||
104 | |||
97 | static inline void apic_set_vector(int vec, void *bitmap) | 105 | static inline void apic_set_vector(int vec, void *bitmap) |
98 | { | 106 | { |
99 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) | |||
145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 153 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
146 | } | 154 | } |
147 | 155 | ||
148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
149 | struct kvm_lapic_irq *irq, | ||
150 | u64 *eoi_exit_bitmap) | ||
151 | { | ||
152 | struct kvm_lapic **dst; | ||
153 | struct kvm_apic_map *map; | ||
154 | unsigned long bitmap = 1; | ||
155 | int i; | ||
156 | |||
157 | rcu_read_lock(); | ||
158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
159 | |||
160 | if (unlikely(!map)) { | ||
161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); | ||
162 | goto out; | ||
163 | } | ||
164 | |||
165 | if (irq->dest_mode == 0) { /* physical mode */ | ||
166 | if (irq->delivery_mode == APIC_DM_LOWEST || | ||
167 | irq->dest_id == 0xff) { | ||
168 | __set_bit(irq->vector, | ||
169 | (unsigned long *)eoi_exit_bitmap); | ||
170 | goto out; | ||
171 | } | ||
172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
173 | } else { | ||
174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
175 | |||
176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
177 | |||
178 | bitmap = apic_logical_id(map, mda); | ||
179 | } | ||
180 | |||
181 | for_each_set_bit(i, &bitmap, 16) { | ||
182 | if (!dst[i]) | ||
183 | continue; | ||
184 | if (dst[i]->vcpu == vcpu) { | ||
185 | __set_bit(irq->vector, | ||
186 | (unsigned long *)eoi_exit_bitmap); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | out: | ||
192 | rcu_read_unlock(); | ||
193 | } | ||
194 | |||
195 | static void recalculate_apic_map(struct kvm *kvm) | 156 | static void recalculate_apic_map(struct kvm *kvm) |
196 | { | 157 | { |
197 | struct kvm_apic_map *new, *old = NULL; | 158 | struct kvm_apic_map *new, *old = NULL; |
@@ -256,7 +217,7 @@ out: | |||
256 | if (old) | 217 | if (old) |
257 | kfree_rcu(old, rcu); | 218 | kfree_rcu(old, rcu); |
258 | 219 | ||
259 | kvm_ioapic_make_eoibitmap_request(kvm); | 220 | kvm_vcpu_request_scan_ioapic(kvm); |
260 | } | 221 | } |
261 | 222 | ||
262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 223 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
@@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) | |||
357 | return count; | 318 | return count; |
358 | } | 319 | } |
359 | 320 | ||
321 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | ||
322 | { | ||
323 | u32 i, pir_val; | ||
324 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
325 | |||
326 | for (i = 0; i <= 7; i++) { | ||
327 | pir_val = xchg(&pir[i], 0); | ||
328 | if (pir_val) | ||
329 | *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; | ||
330 | } | ||
331 | } | ||
332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | ||
333 | |||
360 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
361 | { | 335 | { |
362 | apic->irr_pending = true; | 336 | apic->irr_pending = true; |
@@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
379 | if (!apic->irr_pending) | 353 | if (!apic->irr_pending) |
380 | return -1; | 354 | return -1; |
381 | 355 | ||
356 | kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
382 | result = apic_search_irr(apic); | 357 | result = apic_search_irr(apic); |
383 | ASSERT(result == -1 || result >= 16); | 358 | ASSERT(result == -1 || result >= 16); |
384 | 359 | ||
@@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
431 | } | 406 | } |
432 | 407 | ||
433 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 408 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
434 | int vector, int level, int trig_mode); | 409 | int vector, int level, int trig_mode, |
410 | unsigned long *dest_map); | ||
435 | 411 | ||
436 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | 412 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
413 | unsigned long *dest_map) | ||
437 | { | 414 | { |
438 | struct kvm_lapic *apic = vcpu->arch.apic; | 415 | struct kvm_lapic *apic = vcpu->arch.apic; |
439 | 416 | ||
440 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, | 417 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, |
441 | irq->level, irq->trig_mode); | 418 | irq->level, irq->trig_mode, dest_map); |
442 | } | 419 | } |
443 | 420 | ||
444 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | 421 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) |
@@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) | |||
505 | return result; | 482 | return result; |
506 | } | 483 | } |
507 | 484 | ||
485 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) | ||
486 | { | ||
487 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
488 | int i; | ||
489 | |||
490 | for (i = 0; i < 8; i++) | ||
491 | apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); | ||
492 | } | ||
493 | |||
508 | static void apic_update_ppr(struct kvm_lapic *apic) | 494 | static void apic_update_ppr(struct kvm_lapic *apic) |
509 | { | 495 | { |
510 | u32 tpr, isrv, ppr, old_ppr; | 496 | u32 tpr, isrv, ppr, old_ppr; |
@@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
611 | } | 597 | } |
612 | 598 | ||
613 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 599 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
614 | struct kvm_lapic_irq *irq, int *r) | 600 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) |
615 | { | 601 | { |
616 | struct kvm_apic_map *map; | 602 | struct kvm_apic_map *map; |
617 | unsigned long bitmap = 1; | 603 | unsigned long bitmap = 1; |
@@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
622 | *r = -1; | 608 | *r = -1; |
623 | 609 | ||
624 | if (irq->shorthand == APIC_DEST_SELF) { | 610 | if (irq->shorthand == APIC_DEST_SELF) { |
625 | *r = kvm_apic_set_irq(src->vcpu, irq); | 611 | *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); |
626 | return true; | 612 | return true; |
627 | } | 613 | } |
628 | 614 | ||
@@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
667 | continue; | 653 | continue; |
668 | if (*r < 0) | 654 | if (*r < 0) |
669 | *r = 0; | 655 | *r = 0; |
670 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq); | 656 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); |
671 | } | 657 | } |
672 | 658 | ||
673 | ret = true; | 659 | ret = true; |
@@ -681,7 +667,8 @@ out: | |||
681 | * Return 1 if successfully added and 0 if discarded. | 667 | * Return 1 if successfully added and 0 if discarded. |
682 | */ | 668 | */ |
683 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 669 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
684 | int vector, int level, int trig_mode) | 670 | int vector, int level, int trig_mode, |
671 | unsigned long *dest_map) | ||
685 | { | 672 | { |
686 | int result = 0; | 673 | int result = 0; |
687 | struct kvm_vcpu *vcpu = apic->vcpu; | 674 | struct kvm_vcpu *vcpu = apic->vcpu; |
@@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
694 | if (unlikely(!apic_enabled(apic))) | 681 | if (unlikely(!apic_enabled(apic))) |
695 | break; | 682 | break; |
696 | 683 | ||
697 | if (trig_mode) { | 684 | if (dest_map) |
698 | apic_debug("level trig mode for vector %d", vector); | 685 | __set_bit(vcpu->vcpu_id, dest_map); |
699 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
700 | } else | ||
701 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
702 | 686 | ||
703 | result = !apic_test_and_set_irr(vector, apic); | 687 | if (kvm_x86_ops->deliver_posted_interrupt) { |
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 688 | result = 1; |
705 | trig_mode, vector, !result); | 689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
706 | if (!result) { | 690 | } else { |
707 | if (trig_mode) | 691 | result = !apic_test_and_set_irr(vector, apic); |
708 | apic_debug("level trig mode repeatedly for " | ||
709 | "vector %d", vector); | ||
710 | break; | ||
711 | } | ||
712 | 692 | ||
713 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 693 | if (!result) { |
714 | kvm_vcpu_kick(vcpu); | 694 | if (trig_mode) |
695 | apic_debug("level trig mode repeatedly " | ||
696 | "for vector %d", vector); | ||
697 | goto out; | ||
698 | } | ||
699 | |||
700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
701 | kvm_vcpu_kick(vcpu); | ||
702 | } | ||
703 | out: | ||
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | ||
705 | trig_mode, vector, !result); | ||
715 | break; | 706 | break; |
716 | 707 | ||
717 | case APIC_DM_REMRD: | 708 | case APIC_DM_REMRD: |
@@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
731 | case APIC_DM_INIT: | 722 | case APIC_DM_INIT: |
732 | if (!trig_mode || level) { | 723 | if (!trig_mode || level) { |
733 | result = 1; | 724 | result = 1; |
734 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 725 | /* assumes that there are only KVM_APIC_INIT/SIPI */ |
726 | apic->pending_events = (1UL << KVM_APIC_INIT); | ||
727 | /* make sure pending_events is visible before sending | ||
728 | * the request */ | ||
729 | smp_wmb(); | ||
735 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 730 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
736 | kvm_vcpu_kick(vcpu); | 731 | kvm_vcpu_kick(vcpu); |
737 | } else { | 732 | } else { |
@@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
743 | case APIC_DM_STARTUP: | 738 | case APIC_DM_STARTUP: |
744 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", | 739 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
745 | vcpu->vcpu_id, vector); | 740 | vcpu->vcpu_id, vector); |
746 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 741 | result = 1; |
747 | result = 1; | 742 | apic->sipi_vector = vector; |
748 | vcpu->arch.sipi_vector = vector; | 743 | /* make sure sipi_vector is visible for the receiver */ |
749 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 744 | smp_wmb(); |
750 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 745 | set_bit(KVM_APIC_SIPI, &apic->pending_events); |
751 | kvm_vcpu_kick(vcpu); | 746 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
752 | } | 747 | kvm_vcpu_kick(vcpu); |
753 | break; | 748 | break; |
754 | 749 | ||
755 | case APIC_DM_EXTINT: | 750 | case APIC_DM_EXTINT: |
@@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | |||
782 | trigger_mode = IOAPIC_LEVEL_TRIG; | 777 | trigger_mode = IOAPIC_LEVEL_TRIG; |
783 | else | 778 | else |
784 | trigger_mode = IOAPIC_EDGE_TRIG; | 779 | trigger_mode = IOAPIC_EDGE_TRIG; |
785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 780 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); |
786 | } | 781 | } |
787 | } | 782 | } |
788 | 783 | ||
@@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
848 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 843 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
849 | irq.vector); | 844 | irq.vector); |
850 | 845 | ||
851 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 846 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); |
852 | } | 847 | } |
853 | 848 | ||
854 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 849 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
@@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | |||
1484 | vector = reg & APIC_VECTOR_MASK; | 1479 | vector = reg & APIC_VECTOR_MASK; |
1485 | mode = reg & APIC_MODE_MASK; | 1480 | mode = reg & APIC_MODE_MASK; |
1486 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | 1481 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; |
1487 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode); | 1482 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode, |
1483 | NULL); | ||
1488 | } | 1484 | } |
1489 | return 0; | 1485 | return 0; |
1490 | } | 1486 | } |
@@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1654 | apic->highest_isr_cache = -1; | 1650 | apic->highest_isr_cache = -1; |
1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | 1651 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); |
1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1652 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1653 | kvm_rtc_eoi_tracking_restore_one(vcpu); | ||
1657 | } | 1654 | } |
1658 | 1655 | ||
1659 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1656 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
1860 | addr, sizeof(u8)); | 1857 | addr, sizeof(u8)); |
1861 | } | 1858 | } |
1862 | 1859 | ||
1860 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | ||
1861 | { | ||
1862 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1863 | unsigned int sipi_vector; | ||
1864 | |||
1865 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1866 | return; | ||
1867 | |||
1868 | if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { | ||
1869 | kvm_lapic_reset(vcpu); | ||
1870 | kvm_vcpu_reset(vcpu); | ||
1871 | if (kvm_vcpu_is_bsp(apic->vcpu)) | ||
1872 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1873 | else | ||
1874 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
1875 | } | ||
1876 | if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && | ||
1877 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
1878 | /* evaluate pending_events before reading the vector */ | ||
1879 | smp_rmb(); | ||
1880 | sipi_vector = apic->sipi_vector; | ||
1881 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
1882 | vcpu->vcpu_id, sipi_vector); | ||
1883 | kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); | ||
1884 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1885 | } | ||
1886 | } | ||
1887 | |||
1863 | void kvm_lapic_init(void) | 1888 | void kvm_lapic_init(void) |
1864 | { | 1889 | { |
1865 | /* do not patch jump label more than once per second */ | 1890 | /* do not patch jump label more than once per second */ |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4e..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -5,6 +5,9 @@ | |||
5 | 5 | ||
6 | #include <linux/kvm_host.h> | 6 | #include <linux/kvm_host.h> |
7 | 7 | ||
8 | #define KVM_APIC_INIT 0 | ||
9 | #define KVM_APIC_SIPI 1 | ||
10 | |||
8 | struct kvm_timer { | 11 | struct kvm_timer { |
9 | struct hrtimer timer; | 12 | struct hrtimer timer; |
10 | s64 period; /* unit: ns */ | 13 | s64 period; /* unit: ns */ |
@@ -32,6 +35,8 @@ struct kvm_lapic { | |||
32 | void *regs; | 35 | void *regs; |
33 | gpa_t vapic_addr; | 36 | gpa_t vapic_addr; |
34 | struct page *vapic_page; | 37 | struct page *vapic_page; |
38 | unsigned long pending_events; | ||
39 | unsigned int sipi_vector; | ||
35 | }; | 40 | }; |
36 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | 41 | int kvm_create_lapic(struct kvm_vcpu *vcpu); |
37 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | 42 | void kvm_free_lapic(struct kvm_vcpu *vcpu); |
@@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); | |||
39 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | 44 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); |
40 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); | 45 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); |
41 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | 46 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); |
47 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu); | ||
42 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 48 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); |
43 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 49 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
44 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 50 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
@@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | |||
47 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 53 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
48 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 54 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
49 | 55 | ||
56 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | ||
57 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | ||
50 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 58 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
51 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 59 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
52 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 60 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
61 | unsigned long *dest_map); | ||
53 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 62 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
54 | 63 | ||
55 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 64 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
56 | struct kvm_lapic_irq *irq, int *r); | 65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); |
57 | 66 | ||
58 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
59 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 68 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
@@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | |||
154 | return ldr & map->lid_mask; | 163 | return ldr & map->lid_mask; |
155 | } | 164 | } |
156 | 165 | ||
157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | 166 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
158 | struct kvm_lapic_irq *irq, | 167 | { |
159 | u64 *eoi_bitmap); | 168 | return vcpu->arch.apic->pending_events; |
169 | } | ||
170 | |||
171 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | ||
160 | 172 | ||
161 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108a..004cc87b781c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | |||
199 | 199 | ||
200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) | 200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) |
201 | { | 201 | { |
202 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
203 | |||
202 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 204 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
203 | 205 | ||
206 | sp->mmio_cached = true; | ||
204 | trace_mark_mmio_spte(sptep, gfn, access); | 207 | trace_mark_mmio_spte(sptep, gfn, access); |
205 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); | 208 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); |
206 | } | 209 | } |
@@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1502 | u64 *parent_pte, int direct) | 1505 | u64 *parent_pte, int direct) |
1503 | { | 1506 | { |
1504 | struct kvm_mmu_page *sp; | 1507 | struct kvm_mmu_page *sp; |
1508 | |||
1505 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); | 1509 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); |
1506 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1510 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1507 | if (!direct) | 1511 | if (!direct) |
@@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1644 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1648 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1645 | struct list_head *invalid_list); | 1649 | struct list_head *invalid_list); |
1646 | 1650 | ||
1647 | #define for_each_gfn_sp(kvm, sp, gfn) \ | 1651 | #define for_each_gfn_sp(_kvm, _sp, _gfn) \ |
1648 | hlist_for_each_entry(sp, \ | 1652 | hlist_for_each_entry(_sp, \ |
1649 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1653 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
1650 | if ((sp)->gfn != (gfn)) {} else | 1654 | if ((_sp)->gfn != (_gfn)) {} else |
1651 | 1655 | ||
1652 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ | 1656 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
1653 | hlist_for_each_entry(sp, \ | 1657 | for_each_gfn_sp(_kvm, _sp, _gfn) \ |
1654 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1658 | if ((_sp)->role.direct || (_sp)->role.invalid) {} else |
1655 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
1656 | (sp)->role.invalid) {} else | ||
1657 | 1659 | ||
1658 | /* @sp->gfn should be write-protected at the call site */ | 1660 | /* @sp->gfn should be write-protected at the call site */ |
1659 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1661 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2089 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2091 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
2090 | struct list_head *invalid_list) | 2092 | struct list_head *invalid_list) |
2091 | { | 2093 | { |
2092 | struct kvm_mmu_page *sp; | 2094 | struct kvm_mmu_page *sp, *nsp; |
2093 | 2095 | ||
2094 | if (list_empty(invalid_list)) | 2096 | if (list_empty(invalid_list)) |
2095 | return; | 2097 | return; |
@@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2106 | */ | 2108 | */ |
2107 | kvm_flush_remote_tlbs(kvm); | 2109 | kvm_flush_remote_tlbs(kvm); |
2108 | 2110 | ||
2109 | do { | 2111 | list_for_each_entry_safe(sp, nsp, invalid_list, link) { |
2110 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
2111 | WARN_ON(!sp->role.invalid || sp->root_count); | 2112 | WARN_ON(!sp->role.invalid || sp->root_count); |
2112 | kvm_mmu_free_page(sp); | 2113 | kvm_mmu_free_page(sp); |
2113 | } while (!list_empty(invalid_list)); | 2114 | } |
2115 | } | ||
2116 | |||
2117 | static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | ||
2118 | struct list_head *invalid_list) | ||
2119 | { | ||
2120 | struct kvm_mmu_page *sp; | ||
2121 | |||
2122 | if (list_empty(&kvm->arch.active_mmu_pages)) | ||
2123 | return false; | ||
2124 | |||
2125 | sp = list_entry(kvm->arch.active_mmu_pages.prev, | ||
2126 | struct kvm_mmu_page, link); | ||
2127 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | ||
2128 | |||
2129 | return true; | ||
2114 | } | 2130 | } |
2115 | 2131 | ||
2116 | /* | 2132 | /* |
@@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2120 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | 2136 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) |
2121 | { | 2137 | { |
2122 | LIST_HEAD(invalid_list); | 2138 | LIST_HEAD(invalid_list); |
2123 | /* | ||
2124 | * If we set the number of mmu pages to be smaller be than the | ||
2125 | * number of actived pages , we must to free some mmu pages before we | ||
2126 | * change the value | ||
2127 | */ | ||
2128 | 2139 | ||
2129 | spin_lock(&kvm->mmu_lock); | 2140 | spin_lock(&kvm->mmu_lock); |
2130 | 2141 | ||
2131 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { | 2142 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
2132 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && | 2143 | /* Need to free some mmu pages to achieve the goal. */ |
2133 | !list_empty(&kvm->arch.active_mmu_pages)) { | 2144 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) |
2134 | struct kvm_mmu_page *page; | 2145 | if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
2146 | break; | ||
2135 | 2147 | ||
2136 | page = container_of(kvm->arch.active_mmu_pages.prev, | ||
2137 | struct kvm_mmu_page, link); | ||
2138 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); | ||
2139 | } | ||
2140 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2148 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
2141 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; | 2149 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; |
2142 | } | 2150 | } |
@@ -2794,6 +2802,7 @@ exit: | |||
2794 | 2802 | ||
2795 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2803 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2796 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2804 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2805 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu); | ||
2797 | 2806 | ||
2798 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | 2807 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
2799 | gfn_t gfn, bool prefault) | 2808 | gfn_t gfn, bool prefault) |
@@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2835 | spin_lock(&vcpu->kvm->mmu_lock); | 2844 | spin_lock(&vcpu->kvm->mmu_lock); |
2836 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 2845 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
2837 | goto out_unlock; | 2846 | goto out_unlock; |
2838 | kvm_mmu_free_some_pages(vcpu); | 2847 | make_mmu_pages_available(vcpu); |
2839 | if (likely(!force_pt_level)) | 2848 | if (likely(!force_pt_level)) |
2840 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 2849 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
2841 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | 2850 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, |
@@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2913 | 2922 | ||
2914 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2923 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2915 | spin_lock(&vcpu->kvm->mmu_lock); | 2924 | spin_lock(&vcpu->kvm->mmu_lock); |
2916 | kvm_mmu_free_some_pages(vcpu); | 2925 | make_mmu_pages_available(vcpu); |
2917 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, | 2926 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, |
2918 | 1, ACC_ALL, NULL); | 2927 | 1, ACC_ALL, NULL); |
2919 | ++sp->root_count; | 2928 | ++sp->root_count; |
@@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2925 | 2934 | ||
2926 | ASSERT(!VALID_PAGE(root)); | 2935 | ASSERT(!VALID_PAGE(root)); |
2927 | spin_lock(&vcpu->kvm->mmu_lock); | 2936 | spin_lock(&vcpu->kvm->mmu_lock); |
2928 | kvm_mmu_free_some_pages(vcpu); | 2937 | make_mmu_pages_available(vcpu); |
2929 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 2938 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
2930 | i << 30, | 2939 | i << 30, |
2931 | PT32_ROOT_LEVEL, 1, ACC_ALL, | 2940 | PT32_ROOT_LEVEL, 1, ACC_ALL, |
@@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
2964 | ASSERT(!VALID_PAGE(root)); | 2973 | ASSERT(!VALID_PAGE(root)); |
2965 | 2974 | ||
2966 | spin_lock(&vcpu->kvm->mmu_lock); | 2975 | spin_lock(&vcpu->kvm->mmu_lock); |
2967 | kvm_mmu_free_some_pages(vcpu); | 2976 | make_mmu_pages_available(vcpu); |
2968 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, | 2977 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, |
2969 | 0, ACC_ALL, NULL); | 2978 | 0, ACC_ALL, NULL); |
2970 | root = __pa(sp->spt); | 2979 | root = __pa(sp->spt); |
@@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
2998 | return 1; | 3007 | return 1; |
2999 | } | 3008 | } |
3000 | spin_lock(&vcpu->kvm->mmu_lock); | 3009 | spin_lock(&vcpu->kvm->mmu_lock); |
3001 | kvm_mmu_free_some_pages(vcpu); | 3010 | make_mmu_pages_available(vcpu); |
3002 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 3011 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
3003 | PT32_ROOT_LEVEL, 0, | 3012 | PT32_ROOT_LEVEL, 0, |
3004 | ACC_ALL, NULL); | 3013 | ACC_ALL, NULL); |
@@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3304 | spin_lock(&vcpu->kvm->mmu_lock); | 3313 | spin_lock(&vcpu->kvm->mmu_lock); |
3305 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3314 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
3306 | goto out_unlock; | 3315 | goto out_unlock; |
3307 | kvm_mmu_free_some_pages(vcpu); | 3316 | make_mmu_pages_available(vcpu); |
3308 | if (likely(!force_pt_level)) | 3317 | if (likely(!force_pt_level)) |
3309 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3318 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
3310 | r = __direct_map(vcpu, gpa, write, map_writable, | 3319 | r = __direct_map(vcpu, gpa, write, map_writable, |
@@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
4006 | } | 4015 | } |
4007 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 4016 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
4008 | 4017 | ||
4009 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 4018 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu) |
4010 | { | 4019 | { |
4011 | LIST_HEAD(invalid_list); | 4020 | LIST_HEAD(invalid_list); |
4012 | 4021 | ||
4013 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && | 4022 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) |
4014 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 4023 | return; |
4015 | struct kvm_mmu_page *sp; | 4024 | |
4025 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { | ||
4026 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) | ||
4027 | break; | ||
4016 | 4028 | ||
4017 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | ||
4018 | struct kvm_mmu_page, link); | ||
4019 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | ||
4020 | ++vcpu->kvm->stat.mmu_recycled; | 4029 | ++vcpu->kvm->stat.mmu_recycled; |
4021 | } | 4030 | } |
4022 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4031 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
@@ -4185,17 +4194,22 @@ restart: | |||
4185 | spin_unlock(&kvm->mmu_lock); | 4194 | spin_unlock(&kvm->mmu_lock); |
4186 | } | 4195 | } |
4187 | 4196 | ||
4188 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 4197 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) |
4189 | struct list_head *invalid_list) | ||
4190 | { | 4198 | { |
4191 | struct kvm_mmu_page *page; | 4199 | struct kvm_mmu_page *sp, *node; |
4200 | LIST_HEAD(invalid_list); | ||
4192 | 4201 | ||
4193 | if (list_empty(&kvm->arch.active_mmu_pages)) | 4202 | spin_lock(&kvm->mmu_lock); |
4194 | return; | 4203 | restart: |
4204 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { | ||
4205 | if (!sp->mmio_cached) | ||
4206 | continue; | ||
4207 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | ||
4208 | goto restart; | ||
4209 | } | ||
4195 | 4210 | ||
4196 | page = container_of(kvm->arch.active_mmu_pages.prev, | 4211 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4197 | struct kvm_mmu_page, link); | 4212 | spin_unlock(&kvm->mmu_lock); |
4198 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | ||
4199 | } | 4213 | } |
4200 | 4214 | ||
4201 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4215 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
@@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4232 | idx = srcu_read_lock(&kvm->srcu); | 4246 | idx = srcu_read_lock(&kvm->srcu); |
4233 | spin_lock(&kvm->mmu_lock); | 4247 | spin_lock(&kvm->mmu_lock); |
4234 | 4248 | ||
4235 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); | 4249 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); |
4236 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4250 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4237 | 4251 | ||
4238 | spin_unlock(&kvm->mmu_lock); | 4252 | spin_unlock(&kvm->mmu_lock); |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e866..2adcbc2cac6d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | |||
57 | 57 | ||
58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
59 | { | 59 | { |
60 | return kvm->arch.n_max_mmu_pages - | 60 | if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) |
61 | kvm->arch.n_used_mmu_pages; | 61 | return kvm->arch.n_max_mmu_pages - |
62 | } | 62 | kvm->arch.n_used_mmu_pages; |
63 | 63 | ||
64 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 64 | return 0; |
65 | { | ||
66 | if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) | ||
67 | __kvm_mmu_free_some_pages(vcpu); | ||
68 | } | 65 | } |
69 | 66 | ||
70 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | 67 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550e..da20860b457a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
627 | goto out_unlock; | 627 | goto out_unlock; |
628 | 628 | ||
629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
630 | kvm_mmu_free_some_pages(vcpu); | 630 | make_mmu_pages_available(vcpu); |
631 | if (!force_pt_level) | 631 | if (!force_pt_level) |
632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | |||
360 | return 1; | 360 | return 1; |
361 | } | 361 | } |
362 | 362 | ||
363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | 363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
364 | { | 364 | { |
365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
366 | struct kvm_pmc *pmc; | 366 | struct kvm_pmc *pmc; |
367 | u32 index = msr_info->index; | ||
368 | u64 data = msr_info->data; | ||
367 | 369 | ||
368 | switch (index) { | 370 | switch (index) { |
369 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | 371 | case MSR_CORE_PERF_FIXED_CTR_CTRL: |
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
375 | } | 377 | } |
376 | break; | 378 | break; |
377 | case MSR_CORE_PERF_GLOBAL_STATUS: | 379 | case MSR_CORE_PERF_GLOBAL_STATUS: |
380 | if (msr_info->host_initiated) { | ||
381 | pmu->global_status = data; | ||
382 | return 0; | ||
383 | } | ||
378 | break; /* RO MSR */ | 384 | break; /* RO MSR */ |
379 | case MSR_CORE_PERF_GLOBAL_CTRL: | 385 | case MSR_CORE_PERF_GLOBAL_CTRL: |
380 | if (pmu->global_ctrl == data) | 386 | if (pmu->global_ctrl == data) |
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
386 | break; | 392 | break; |
387 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 393 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
388 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | 394 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { |
389 | pmu->global_status &= ~data; | 395 | if (!msr_info->host_initiated) |
396 | pmu->global_status &= ~data; | ||
390 | pmu->global_ovf_ctrl = data; | 397 | pmu->global_ovf_ctrl = data; |
391 | return 0; | 398 | return 0; |
392 | } | 399 | } |
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
394 | default: | 401 | default: |
395 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | 402 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || |
396 | (pmc = get_fixed_pmc(pmu, index))) { | 403 | (pmc = get_fixed_pmc(pmu, index))) { |
397 | data = (s64)(s32)data; | 404 | if (!msr_info->host_initiated) |
405 | data = (s64)(s32)data; | ||
398 | pmc->counter += data - read_pmc(pmc); | 406 | pmc->counter += data - read_pmc(pmc); |
399 | return 0; | 407 | return 0; |
400 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7d39d70647e3..a14a6eaf871d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1131 | init_seg(&save->gs); | 1131 | init_seg(&save->gs); |
1132 | 1132 | ||
1133 | save->cs.selector = 0xf000; | 1133 | save->cs.selector = 0xf000; |
1134 | save->cs.base = 0xffff0000; | ||
1134 | /* Executable/Readable Code Segment */ | 1135 | /* Executable/Readable Code Segment */ |
1135 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | | 1136 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | |
1136 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; | 1137 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; |
1137 | save->cs.limit = 0xffff; | 1138 | save->cs.limit = 0xffff; |
1138 | /* | ||
1139 | * cs.base should really be 0xffff0000, but vmx can't handle that, so | ||
1140 | * be consistent with it. | ||
1141 | * | ||
1142 | * Replace when we have real mode working for vmx. | ||
1143 | */ | ||
1144 | save->cs.base = 0xf0000; | ||
1145 | 1139 | ||
1146 | save->gdtr.limit = 0xffff; | 1140 | save->gdtr.limit = 0xffff; |
1147 | save->idtr.limit = 0xffff; | 1141 | save->idtr.limit = 0xffff; |
@@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1191 | enable_gif(svm); | 1185 | enable_gif(svm); |
1192 | } | 1186 | } |
1193 | 1187 | ||
1194 | static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | 1188 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu) |
1195 | { | 1189 | { |
1196 | struct vcpu_svm *svm = to_svm(vcpu); | 1190 | struct vcpu_svm *svm = to_svm(vcpu); |
1197 | u32 dummy; | 1191 | u32 dummy; |
@@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1199 | 1193 | ||
1200 | init_vmcb(svm); | 1194 | init_vmcb(svm); |
1201 | 1195 | ||
1202 | if (!kvm_vcpu_is_bsp(vcpu)) { | ||
1203 | kvm_rip_write(vcpu, 0); | ||
1204 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; | ||
1205 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; | ||
1206 | } | ||
1207 | |||
1208 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1196 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1209 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1197 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
1210 | |||
1211 | return 0; | ||
1212 | } | 1198 | } |
1213 | 1199 | ||
1214 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | 1200 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) |
@@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3487 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && | 3473 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
3488 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && | 3474 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && |
3489 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) | 3475 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) |
3490 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 3476 | printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " |
3491 | "exit_code 0x%x\n", | 3477 | "exit_code 0x%x\n", |
3492 | __func__, svm->vmcb->control.exit_int_info, | 3478 | __func__, svm->vmcb->control.exit_int_info, |
3493 | exit_code); | 3479 | exit_code); |
@@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | |||
3591 | return; | 3577 | return; |
3592 | } | 3578 | } |
3593 | 3579 | ||
3580 | static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
3581 | { | ||
3582 | return; | ||
3583 | } | ||
3584 | |||
3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3585 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
3595 | { | 3586 | { |
3596 | struct vcpu_svm *svm = to_svm(vcpu); | 3587 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3641 | return ret; | 3632 | return ret; |
3642 | } | 3633 | } |
3643 | 3634 | ||
3644 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3635 | static int enable_irq_window(struct kvm_vcpu *vcpu) |
3645 | { | 3636 | { |
3646 | struct vcpu_svm *svm = to_svm(vcpu); | 3637 | struct vcpu_svm *svm = to_svm(vcpu); |
3647 | 3638 | ||
@@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
3655 | svm_set_vintr(svm); | 3646 | svm_set_vintr(svm); |
3656 | svm_inject_irq(svm, 0x0); | 3647 | svm_inject_irq(svm, 0x0); |
3657 | } | 3648 | } |
3649 | return 0; | ||
3658 | } | 3650 | } |
3659 | 3651 | ||
3660 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 3652 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
3661 | { | 3653 | { |
3662 | struct vcpu_svm *svm = to_svm(vcpu); | 3654 | struct vcpu_svm *svm = to_svm(vcpu); |
3663 | 3655 | ||
3664 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) | 3656 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
3665 | == HF_NMI_MASK) | 3657 | == HF_NMI_MASK) |
3666 | return; /* IRET will cause a vm exit */ | 3658 | return 0; /* IRET will cause a vm exit */ |
3667 | 3659 | ||
3668 | /* | 3660 | /* |
3669 | * Something prevents NMI from been injected. Single step over possible | 3661 | * Something prevents NMI from been injected. Single step over possible |
@@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
3672 | svm->nmi_singlestep = true; | 3664 | svm->nmi_singlestep = true; |
3673 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3665 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
3674 | update_db_bp_intercept(vcpu); | 3666 | update_db_bp_intercept(vcpu); |
3667 | return 0; | ||
3675 | } | 3668 | } |
3676 | 3669 | ||
3677 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3670 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -4247,6 +4240,11 @@ out: | |||
4247 | return ret; | 4240 | return ret; |
4248 | } | 4241 | } |
4249 | 4242 | ||
4243 | static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | ||
4244 | { | ||
4245 | local_irq_enable(); | ||
4246 | } | ||
4247 | |||
4250 | static struct kvm_x86_ops svm_x86_ops = { | 4248 | static struct kvm_x86_ops svm_x86_ops = { |
4251 | .cpu_has_kvm_support = has_svm, | 4249 | .cpu_has_kvm_support = has_svm, |
4252 | .disabled_by_bios = is_disabled, | 4250 | .disabled_by_bios = is_disabled, |
@@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4314 | .vm_has_apicv = svm_vm_has_apicv, | 4312 | .vm_has_apicv = svm_vm_has_apicv, |
4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 4313 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
4316 | .hwapic_isr_update = svm_hwapic_isr_update, | 4314 | .hwapic_isr_update = svm_hwapic_isr_update, |
4315 | .sync_pir_to_irr = svm_sync_pir_to_irr, | ||
4317 | 4316 | ||
4318 | .set_tss_addr = svm_set_tss_addr, | 4317 | .set_tss_addr = svm_set_tss_addr, |
4319 | .get_tdp_level = get_npt_level, | 4318 | .get_tdp_level = get_npt_level, |
@@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4342 | .set_tdp_cr3 = set_tdp_cr3, | 4341 | .set_tdp_cr3 = set_tdp_cr3, |
4343 | 4342 | ||
4344 | .check_intercept = svm_check_intercept, | 4343 | .check_intercept = svm_check_intercept, |
4344 | .handle_external_intr = svm_handle_external_intr, | ||
4345 | }; | 4345 | }; |
4346 | 4346 | ||
4347 | static int __init svm_init(void) | 4347 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 867b81037f96..25a791ed21c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly enable_apicv_reg_vid; | 87 | static bool __read_mostly enable_apicv = 1; |
88 | module_param(enable_apicv, bool, S_IRUGO); | ||
88 | 89 | ||
90 | static bool __read_mostly enable_shadow_vmcs = 1; | ||
91 | module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); | ||
89 | /* | 92 | /* |
90 | * If nested=1, nested virtualization is supported, i.e., guests may use | 93 | * If nested=1, nested virtualization is supported, i.e., guests may use |
91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 94 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -298,7 +301,8 @@ struct __packed vmcs12 { | |||
298 | u32 guest_activity_state; | 301 | u32 guest_activity_state; |
299 | u32 guest_sysenter_cs; | 302 | u32 guest_sysenter_cs; |
300 | u32 host_ia32_sysenter_cs; | 303 | u32 host_ia32_sysenter_cs; |
301 | u32 padding32[8]; /* room for future expansion */ | 304 | u32 vmx_preemption_timer_value; |
305 | u32 padding32[7]; /* room for future expansion */ | ||
302 | u16 virtual_processor_id; | 306 | u16 virtual_processor_id; |
303 | u16 guest_es_selector; | 307 | u16 guest_es_selector; |
304 | u16 guest_cs_selector; | 308 | u16 guest_cs_selector; |
@@ -351,6 +355,12 @@ struct nested_vmx { | |||
351 | /* The host-usable pointer to the above */ | 355 | /* The host-usable pointer to the above */ |
352 | struct page *current_vmcs12_page; | 356 | struct page *current_vmcs12_page; |
353 | struct vmcs12 *current_vmcs12; | 357 | struct vmcs12 *current_vmcs12; |
358 | struct vmcs *current_shadow_vmcs; | ||
359 | /* | ||
360 | * Indicates if the shadow vmcs must be updated with the | ||
361 | * data hold by vmcs12 | ||
362 | */ | ||
363 | bool sync_shadow_vmcs; | ||
354 | 364 | ||
355 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | 365 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
356 | struct list_head vmcs02_pool; | 366 | struct list_head vmcs02_pool; |
@@ -365,6 +375,31 @@ struct nested_vmx { | |||
365 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
366 | }; | 376 | }; |
367 | 377 | ||
378 | #define POSTED_INTR_ON 0 | ||
379 | /* Posted-Interrupt Descriptor */ | ||
380 | struct pi_desc { | ||
381 | u32 pir[8]; /* Posted interrupt requested */ | ||
382 | u32 control; /* bit 0 of control is outstanding notification bit */ | ||
383 | u32 rsvd[7]; | ||
384 | } __aligned(64); | ||
385 | |||
386 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) | ||
387 | { | ||
388 | return test_and_set_bit(POSTED_INTR_ON, | ||
389 | (unsigned long *)&pi_desc->control); | ||
390 | } | ||
391 | |||
392 | static bool pi_test_and_clear_on(struct pi_desc *pi_desc) | ||
393 | { | ||
394 | return test_and_clear_bit(POSTED_INTR_ON, | ||
395 | (unsigned long *)&pi_desc->control); | ||
396 | } | ||
397 | |||
398 | static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | ||
399 | { | ||
400 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | ||
401 | } | ||
402 | |||
368 | struct vcpu_vmx { | 403 | struct vcpu_vmx { |
369 | struct kvm_vcpu vcpu; | 404 | struct kvm_vcpu vcpu; |
370 | unsigned long host_rsp; | 405 | unsigned long host_rsp; |
@@ -377,6 +412,7 @@ struct vcpu_vmx { | |||
377 | struct shared_msr_entry *guest_msrs; | 412 | struct shared_msr_entry *guest_msrs; |
378 | int nmsrs; | 413 | int nmsrs; |
379 | int save_nmsrs; | 414 | int save_nmsrs; |
415 | unsigned long host_idt_base; | ||
380 | #ifdef CONFIG_X86_64 | 416 | #ifdef CONFIG_X86_64 |
381 | u64 msr_host_kernel_gs_base; | 417 | u64 msr_host_kernel_gs_base; |
382 | u64 msr_guest_kernel_gs_base; | 418 | u64 msr_guest_kernel_gs_base; |
@@ -428,6 +464,9 @@ struct vcpu_vmx { | |||
428 | 464 | ||
429 | bool rdtscp_enabled; | 465 | bool rdtscp_enabled; |
430 | 466 | ||
467 | /* Posted interrupt descriptor */ | ||
468 | struct pi_desc pi_desc; | ||
469 | |||
431 | /* Support for a guest hypervisor (nested VMX) */ | 470 | /* Support for a guest hypervisor (nested VMX) */ |
432 | struct nested_vmx nested; | 471 | struct nested_vmx nested; |
433 | }; | 472 | }; |
@@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
451 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 490 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
452 | [number##_HIGH] = VMCS12_OFFSET(name)+4 | 491 | [number##_HIGH] = VMCS12_OFFSET(name)+4 |
453 | 492 | ||
493 | |||
494 | static const unsigned long shadow_read_only_fields[] = { | ||
495 | /* | ||
496 | * We do NOT shadow fields that are modified when L0 | ||
497 | * traps and emulates any vmx instruction (e.g. VMPTRLD, | ||
498 | * VMXON...) executed by L1. | ||
499 | * For example, VM_INSTRUCTION_ERROR is read | ||
500 | * by L1 if a vmx instruction fails (part of the error path). | ||
501 | * Note the code assumes this logic. If for some reason | ||
502 | * we start shadowing these fields then we need to | ||
503 | * force a shadow sync when L0 emulates vmx instructions | ||
504 | * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified | ||
505 | * by nested_vmx_failValid) | ||
506 | */ | ||
507 | VM_EXIT_REASON, | ||
508 | VM_EXIT_INTR_INFO, | ||
509 | VM_EXIT_INSTRUCTION_LEN, | ||
510 | IDT_VECTORING_INFO_FIELD, | ||
511 | IDT_VECTORING_ERROR_CODE, | ||
512 | VM_EXIT_INTR_ERROR_CODE, | ||
513 | EXIT_QUALIFICATION, | ||
514 | GUEST_LINEAR_ADDRESS, | ||
515 | GUEST_PHYSICAL_ADDRESS | ||
516 | }; | ||
517 | static const int max_shadow_read_only_fields = | ||
518 | ARRAY_SIZE(shadow_read_only_fields); | ||
519 | |||
520 | static const unsigned long shadow_read_write_fields[] = { | ||
521 | GUEST_RIP, | ||
522 | GUEST_RSP, | ||
523 | GUEST_CR0, | ||
524 | GUEST_CR3, | ||
525 | GUEST_CR4, | ||
526 | GUEST_INTERRUPTIBILITY_INFO, | ||
527 | GUEST_RFLAGS, | ||
528 | GUEST_CS_SELECTOR, | ||
529 | GUEST_CS_AR_BYTES, | ||
530 | GUEST_CS_LIMIT, | ||
531 | GUEST_CS_BASE, | ||
532 | GUEST_ES_BASE, | ||
533 | CR0_GUEST_HOST_MASK, | ||
534 | CR0_READ_SHADOW, | ||
535 | CR4_READ_SHADOW, | ||
536 | TSC_OFFSET, | ||
537 | EXCEPTION_BITMAP, | ||
538 | CPU_BASED_VM_EXEC_CONTROL, | ||
539 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
540 | VM_ENTRY_INTR_INFO_FIELD, | ||
541 | VM_ENTRY_INSTRUCTION_LEN, | ||
542 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
543 | HOST_FS_BASE, | ||
544 | HOST_GS_BASE, | ||
545 | HOST_FS_SELECTOR, | ||
546 | HOST_GS_SELECTOR | ||
547 | }; | ||
548 | static const int max_shadow_read_write_fields = | ||
549 | ARRAY_SIZE(shadow_read_write_fields); | ||
550 | |||
454 | static const unsigned short vmcs_field_to_offset_table[] = { | 551 | static const unsigned short vmcs_field_to_offset_table[] = { |
455 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | 552 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), |
456 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | 553 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), |
@@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
537 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), | 634 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), |
538 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), | 635 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), |
539 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), | 636 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), |
637 | FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), | ||
540 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), | 638 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), |
541 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), | 639 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), |
542 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), | 640 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), |
@@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
624 | struct kvm_segment *var, int seg); | 722 | struct kvm_segment *var, int seg); |
625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | 723 | static bool guest_state_valid(struct kvm_vcpu *vcpu); |
626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | 724 | static u32 vmx_segment_access_rights(struct kvm_segment *var); |
725 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | ||
726 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | ||
727 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | ||
627 | 728 | ||
628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 729 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 730 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy; | |||
640 | static unsigned long *vmx_msr_bitmap_longmode; | 741 | static unsigned long *vmx_msr_bitmap_longmode; |
641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | 742 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; |
642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | 743 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; |
744 | static unsigned long *vmx_vmread_bitmap; | ||
745 | static unsigned long *vmx_vmwrite_bitmap; | ||
643 | 746 | ||
644 | static bool cpu_has_load_ia32_efer; | 747 | static bool cpu_has_load_ia32_efer; |
645 | static bool cpu_has_load_perf_global_ctrl; | 748 | static bool cpu_has_load_perf_global_ctrl; |
@@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) | |||
782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 885 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; |
783 | } | 886 | } |
784 | 887 | ||
888 | static inline bool cpu_has_vmx_posted_intr(void) | ||
889 | { | ||
890 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | ||
891 | } | ||
892 | |||
893 | static inline bool cpu_has_vmx_apicv(void) | ||
894 | { | ||
895 | return cpu_has_vmx_apic_register_virt() && | ||
896 | cpu_has_vmx_virtual_intr_delivery() && | ||
897 | cpu_has_vmx_posted_intr(); | ||
898 | } | ||
899 | |||
785 | static inline bool cpu_has_vmx_flexpriority(void) | 900 | static inline bool cpu_has_vmx_flexpriority(void) |
786 | { | 901 | { |
787 | return cpu_has_vmx_tpr_shadow() && | 902 | return cpu_has_vmx_tpr_shadow() && |
@@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void) | |||
895 | SECONDARY_EXEC_WBINVD_EXITING; | 1010 | SECONDARY_EXEC_WBINVD_EXITING; |
896 | } | 1011 | } |
897 | 1012 | ||
1013 | static inline bool cpu_has_vmx_shadow_vmcs(void) | ||
1014 | { | ||
1015 | u64 vmx_msr; | ||
1016 | rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); | ||
1017 | /* check if the cpu supports writing r/o exit information fields */ | ||
1018 | if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) | ||
1019 | return false; | ||
1020 | |||
1021 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
1022 | SECONDARY_EXEC_SHADOW_VMCS; | ||
1023 | } | ||
1024 | |||
898 | static inline bool report_flexpriority(void) | 1025 | static inline bool report_flexpriority(void) |
899 | { | 1026 | { |
900 | return flexpriority_enabled; | 1027 | return flexpriority_enabled; |
@@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1790 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 1917 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
1791 | 1918 | ||
1792 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && | 1919 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && |
1793 | nested_pf_handled(vcpu)) | 1920 | !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) |
1794 | return; | 1921 | return; |
1795 | 1922 | ||
1796 | if (has_error_code) { | 1923 | if (has_error_code) { |
@@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | |||
2022 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | 2149 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; |
2023 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2150 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2024 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2151 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2152 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | ||
2025 | static __init void nested_vmx_setup_ctls_msrs(void) | 2153 | static __init void nested_vmx_setup_ctls_msrs(void) |
2026 | { | 2154 | { |
2027 | /* | 2155 | /* |
@@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2040 | */ | 2168 | */ |
2041 | 2169 | ||
2042 | /* pin-based controls */ | 2170 | /* pin-based controls */ |
2171 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | ||
2172 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); | ||
2043 | /* | 2173 | /* |
2044 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is | 2174 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is |
2045 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. | 2175 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. |
2046 | */ | 2176 | */ |
2047 | nested_vmx_pinbased_ctls_low = 0x16 ; | 2177 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2048 | nested_vmx_pinbased_ctls_high = 0x16 | | 2178 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
2049 | PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | | 2179 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | |
2050 | PIN_BASED_VIRTUAL_NMIS; | 2180 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2181 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2051 | 2182 | ||
2052 | /* exit controls */ | 2183 | /* |
2053 | nested_vmx_exit_ctls_low = 0; | 2184 | * Exit controls |
2185 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | ||
2186 | * 17 must be 1. | ||
2187 | */ | ||
2188 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2054 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2189 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
2055 | #ifdef CONFIG_X86_64 | 2190 | #ifdef CONFIG_X86_64 |
2056 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2191 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2057 | #else | 2192 | #else |
2058 | nested_vmx_exit_ctls_high = 0; | 2193 | nested_vmx_exit_ctls_high = 0; |
2059 | #endif | 2194 | #endif |
2195 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2060 | 2196 | ||
2061 | /* entry controls */ | 2197 | /* entry controls */ |
2062 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2198 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
2063 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | 2199 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); |
2064 | nested_vmx_entry_ctls_low = 0; | 2200 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
2201 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2065 | nested_vmx_entry_ctls_high &= | 2202 | nested_vmx_entry_ctls_high &= |
2066 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2203 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; |
2204 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2067 | 2205 | ||
2068 | /* cpu-based controls */ | 2206 | /* cpu-based controls */ |
2069 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2207 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2080 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 2218 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
2081 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 2219 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
2082 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | | 2220 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | |
2221 | CPU_BASED_PAUSE_EXITING | | ||
2083 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2222 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
2084 | /* | 2223 | /* |
2085 | * We can allow some features even when not supported by the | 2224 | * We can allow some features even when not supported by the |
@@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2094 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); | 2233 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); |
2095 | nested_vmx_secondary_ctls_low = 0; | 2234 | nested_vmx_secondary_ctls_low = 0; |
2096 | nested_vmx_secondary_ctls_high &= | 2235 | nested_vmx_secondary_ctls_high &= |
2097 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2236 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2237 | SECONDARY_EXEC_WBINVD_EXITING; | ||
2238 | |||
2239 | /* miscellaneous data */ | ||
2240 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | ||
2241 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | ||
2242 | VMX_MISC_SAVE_EFER_LMA; | ||
2243 | nested_vmx_misc_high = 0; | ||
2098 | } | 2244 | } |
2099 | 2245 | ||
2100 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) | 2246 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) |
@@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2165 | nested_vmx_entry_ctls_high); | 2311 | nested_vmx_entry_ctls_high); |
2166 | break; | 2312 | break; |
2167 | case MSR_IA32_VMX_MISC: | 2313 | case MSR_IA32_VMX_MISC: |
2168 | *pdata = 0; | 2314 | *pdata = vmx_control_msr(nested_vmx_misc_low, |
2315 | nested_vmx_misc_high); | ||
2169 | break; | 2316 | break; |
2170 | /* | 2317 | /* |
2171 | * These MSRs specify bits which the guest must keep fixed (on or off) | 2318 | * These MSRs specify bits which the guest must keep fixed (on or off) |
@@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2529 | u32 _vmexit_control = 0; | 2676 | u32 _vmexit_control = 0; |
2530 | u32 _vmentry_control = 0; | 2677 | u32 _vmentry_control = 0; |
2531 | 2678 | ||
2532 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
2533 | opt = PIN_BASED_VIRTUAL_NMIS; | ||
2534 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
2535 | &_pin_based_exec_control) < 0) | ||
2536 | return -EIO; | ||
2537 | |||
2538 | min = CPU_BASED_HLT_EXITING | | 2679 | min = CPU_BASED_HLT_EXITING | |
2539 | #ifdef CONFIG_X86_64 | 2680 | #ifdef CONFIG_X86_64 |
2540 | CPU_BASED_CR8_LOAD_EXITING | | 2681 | CPU_BASED_CR8_LOAD_EXITING | |
@@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2573 | SECONDARY_EXEC_RDTSCP | | 2714 | SECONDARY_EXEC_RDTSCP | |
2574 | SECONDARY_EXEC_ENABLE_INVPCID | | 2715 | SECONDARY_EXEC_ENABLE_INVPCID | |
2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2716 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 2717 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2718 | SECONDARY_EXEC_SHADOW_VMCS; | ||
2577 | if (adjust_vmx_controls(min2, opt2, | 2719 | if (adjust_vmx_controls(min2, opt2, |
2578 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2720 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2579 | &_cpu_based_2nd_exec_control) < 0) | 2721 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2605 | #ifdef CONFIG_X86_64 | 2747 | #ifdef CONFIG_X86_64 |
2606 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2748 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2607 | #endif | 2749 | #endif |
2608 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; | 2750 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | |
2751 | VM_EXIT_ACK_INTR_ON_EXIT; | ||
2609 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 2752 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
2610 | &_vmexit_control) < 0) | 2753 | &_vmexit_control) < 0) |
2611 | return -EIO; | 2754 | return -EIO; |
2612 | 2755 | ||
2756 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
2757 | opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; | ||
2758 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
2759 | &_pin_based_exec_control) < 0) | ||
2760 | return -EIO; | ||
2761 | |||
2762 | if (!(_cpu_based_2nd_exec_control & | ||
2763 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || | ||
2764 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) | ||
2765 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; | ||
2766 | |||
2613 | min = 0; | 2767 | min = 0; |
2614 | opt = VM_ENTRY_LOAD_IA32_PAT; | 2768 | opt = VM_ENTRY_LOAD_IA32_PAT; |
2615 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 2769 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2762,6 +2916,8 @@ static __init int hardware_setup(void) | |||
2762 | 2916 | ||
2763 | if (!cpu_has_vmx_vpid()) | 2917 | if (!cpu_has_vmx_vpid()) |
2764 | enable_vpid = 0; | 2918 | enable_vpid = 0; |
2919 | if (!cpu_has_vmx_shadow_vmcs()) | ||
2920 | enable_shadow_vmcs = 0; | ||
2765 | 2921 | ||
2766 | if (!cpu_has_vmx_ept() || | 2922 | if (!cpu_has_vmx_ept() || |
2767 | !cpu_has_vmx_ept_4levels()) { | 2923 | !cpu_has_vmx_ept_4levels()) { |
@@ -2788,14 +2944,16 @@ static __init int hardware_setup(void) | |||
2788 | if (!cpu_has_vmx_ple()) | 2944 | if (!cpu_has_vmx_ple()) |
2789 | ple_gap = 0; | 2945 | ple_gap = 0; |
2790 | 2946 | ||
2791 | if (!cpu_has_vmx_apic_register_virt() || | 2947 | if (!cpu_has_vmx_apicv()) |
2792 | !cpu_has_vmx_virtual_intr_delivery()) | 2948 | enable_apicv = 0; |
2793 | enable_apicv_reg_vid = 0; | ||
2794 | 2949 | ||
2795 | if (enable_apicv_reg_vid) | 2950 | if (enable_apicv) |
2796 | kvm_x86_ops->update_cr8_intercept = NULL; | 2951 | kvm_x86_ops->update_cr8_intercept = NULL; |
2797 | else | 2952 | else { |
2798 | kvm_x86_ops->hwapic_irr_update = NULL; | 2953 | kvm_x86_ops->hwapic_irr_update = NULL; |
2954 | kvm_x86_ops->deliver_posted_interrupt = NULL; | ||
2955 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | ||
2956 | } | ||
2799 | 2957 | ||
2800 | if (nested) | 2958 | if (nested) |
2801 | nested_vmx_setup_ctls_msrs(); | 2959 | nested_vmx_setup_ctls_msrs(); |
@@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2876 | vmx->cpl = 0; | 3034 | vmx->cpl = 0; |
2877 | } | 3035 | } |
2878 | 3036 | ||
2879 | static gva_t rmode_tss_base(struct kvm *kvm) | ||
2880 | { | ||
2881 | if (!kvm->arch.tss_addr) { | ||
2882 | struct kvm_memslots *slots; | ||
2883 | struct kvm_memory_slot *slot; | ||
2884 | gfn_t base_gfn; | ||
2885 | |||
2886 | slots = kvm_memslots(kvm); | ||
2887 | slot = id_to_memslot(slots, 0); | ||
2888 | base_gfn = slot->base_gfn + slot->npages - 3; | ||
2889 | |||
2890 | return base_gfn << PAGE_SHIFT; | ||
2891 | } | ||
2892 | return kvm->arch.tss_addr; | ||
2893 | } | ||
2894 | |||
2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 3037 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
2896 | { | 3038 | { |
2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3039 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2942 | 3084 | ||
2943 | /* | 3085 | /* |
2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 3086 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
2945 | * vcpu. Call it here with phys address pointing 16M below 4G. | 3087 | * vcpu. Warn the user that an update is overdue. |
2946 | */ | 3088 | */ |
2947 | if (!vcpu->kvm->arch.tss_addr) { | 3089 | if (!vcpu->kvm->arch.tss_addr) |
2948 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | 3090 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " |
2949 | "called before entering vcpu\n"); | 3091 | "called before entering vcpu\n"); |
2950 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
2951 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
2952 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
2953 | } | ||
2954 | 3092 | ||
2955 | vmx_segment_cache_clear(vmx); | 3093 | vmx_segment_cache_clear(vmx); |
2956 | 3094 | ||
2957 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 3095 | vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); |
2958 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 3096 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
2959 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 3097 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
2960 | 3098 | ||
@@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3214 | */ | 3352 | */ |
3215 | if (!nested_vmx_allowed(vcpu)) | 3353 | if (!nested_vmx_allowed(vcpu)) |
3216 | return 1; | 3354 | return 1; |
3217 | } else if (to_vmx(vcpu)->nested.vmxon) | 3355 | } |
3356 | if (to_vmx(vcpu)->nested.vmxon && | ||
3357 | ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) | ||
3218 | return 1; | 3358 | return 1; |
3219 | 3359 | ||
3220 | vcpu->arch.cr4 = cr4; | 3360 | vcpu->arch.cr4 = cr4; |
@@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
3550 | return true; | 3690 | return true; |
3551 | 3691 | ||
3552 | /* real mode guest state checks */ | 3692 | /* real mode guest state checks */ |
3553 | if (!is_protmode(vcpu)) { | 3693 | if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { |
3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3694 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
3555 | return false; | 3695 | return false; |
3556 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 3696 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm) | |||
3599 | int r, idx, ret = 0; | 3739 | int r, idx, ret = 0; |
3600 | 3740 | ||
3601 | idx = srcu_read_lock(&kvm->srcu); | 3741 | idx = srcu_read_lock(&kvm->srcu); |
3602 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 3742 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; |
3603 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 3743 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
3604 | if (r < 0) | 3744 | if (r < 0) |
3605 | goto out; | 3745 | goto out; |
@@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
3692 | kvm_userspace_mem.flags = 0; | 3832 | kvm_userspace_mem.flags = 0; |
3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3833 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3834 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3835 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
3696 | if (r) | 3836 | if (r) |
3697 | goto out; | 3837 | goto out; |
3698 | 3838 | ||
@@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
3722 | kvm_userspace_mem.guest_phys_addr = | 3862 | kvm_userspace_mem.guest_phys_addr = |
3723 | kvm->arch.ept_identity_map_addr; | 3863 | kvm->arch.ept_identity_map_addr; |
3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3864 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3865 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
3726 | if (r) | 3866 | if (r) |
3727 | goto out; | 3867 | goto out; |
3728 | 3868 | ||
@@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | |||
3869 | msr, MSR_TYPE_W); | 4009 | msr, MSR_TYPE_W); |
3870 | } | 4010 | } |
3871 | 4011 | ||
4012 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
4013 | { | ||
4014 | return enable_apicv && irqchip_in_kernel(kvm); | ||
4015 | } | ||
4016 | |||
4017 | /* | ||
4018 | * Send interrupt to vcpu via posted interrupt way. | ||
4019 | * 1. If target vcpu is running(non-root mode), send posted interrupt | ||
4020 | * notification to vcpu and hardware will sync PIR to vIRR atomically. | ||
4021 | * 2. If target vcpu isn't running(root mode), kick it to pick up the | ||
4022 | * interrupt from PIR in next vmentry. | ||
4023 | */ | ||
4024 | static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | ||
4025 | { | ||
4026 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4027 | int r; | ||
4028 | |||
4029 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | ||
4030 | return; | ||
4031 | |||
4032 | r = pi_test_and_set_on(&vmx->pi_desc); | ||
4033 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
4034 | #ifdef CONFIG_SMP | ||
4035 | if (!r && (vcpu->mode == IN_GUEST_MODE)) | ||
4036 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | ||
4037 | POSTED_INTR_VECTOR); | ||
4038 | else | ||
4039 | #endif | ||
4040 | kvm_vcpu_kick(vcpu); | ||
4041 | } | ||
4042 | |||
4043 | static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
4044 | { | ||
4045 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4046 | |||
4047 | if (!pi_test_and_clear_on(&vmx->pi_desc)) | ||
4048 | return; | ||
4049 | |||
4050 | kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
4051 | } | ||
4052 | |||
4053 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) | ||
4054 | { | ||
4055 | return; | ||
4056 | } | ||
4057 | |||
3872 | /* | 4058 | /* |
3873 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that | 4059 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that |
3874 | * will not change in the lifetime of the guest. | 4060 | * will not change in the lifetime of the guest. |
3875 | * Note that host-state that does change is set elsewhere. E.g., host-state | 4061 | * Note that host-state that does change is set elsewhere. E.g., host-state |
3876 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. | 4062 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. |
3877 | */ | 4063 | */ |
3878 | static void vmx_set_constant_host_state(void) | 4064 | static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) |
3879 | { | 4065 | { |
3880 | u32 low32, high32; | 4066 | u32 low32, high32; |
3881 | unsigned long tmpl; | 4067 | unsigned long tmpl; |
@@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void) | |||
3903 | 4089 | ||
3904 | native_store_idt(&dt); | 4090 | native_store_idt(&dt); |
3905 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | 4091 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
4092 | vmx->host_idt_base = dt.address; | ||
3906 | 4093 | ||
3907 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ | 4094 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ |
3908 | 4095 | ||
@@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) | |||
3928 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | 4115 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); |
3929 | } | 4116 | } |
3930 | 4117 | ||
4118 | static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | ||
4119 | { | ||
4120 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | ||
4121 | |||
4122 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
4123 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | ||
4124 | return pin_based_exec_ctrl; | ||
4125 | } | ||
4126 | |||
3931 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4127 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
3932 | { | 4128 | { |
3933 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | 4129 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; |
@@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
3945 | return exec_control; | 4141 | return exec_control; |
3946 | } | 4142 | } |
3947 | 4143 | ||
3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
3949 | { | ||
3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
3951 | } | ||
3952 | |||
3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 4144 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
3954 | { | 4145 | { |
3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 4146 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
@@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 4162 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 4163 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 4164 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
4165 | /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD | ||
4166 | (handle_vmptrld). | ||
4167 | We can NOT enable shadow_vmcs here because we don't have yet | ||
4168 | a current VMCS12 | ||
4169 | */ | ||
4170 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
3974 | return exec_control; | 4171 | return exec_control; |
3975 | } | 4172 | } |
3976 | 4173 | ||
@@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
3999 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); | 4196 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
4000 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); | 4197 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); |
4001 | 4198 | ||
4199 | if (enable_shadow_vmcs) { | ||
4200 | vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); | ||
4201 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
4202 | } | ||
4002 | if (cpu_has_vmx_msr_bitmap()) | 4203 | if (cpu_has_vmx_msr_bitmap()) |
4003 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); | 4204 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
4004 | 4205 | ||
4005 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 4206 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
4006 | 4207 | ||
4007 | /* Control */ | 4208 | /* Control */ |
4008 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 4209 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); |
4009 | vmcs_config.pin_based_exec_ctrl); | ||
4010 | 4210 | ||
4011 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4211 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
4012 | 4212 | ||
@@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4015 | vmx_secondary_exec_control(vmx)); | 4215 | vmx_secondary_exec_control(vmx)); |
4016 | } | 4216 | } |
4017 | 4217 | ||
4018 | if (enable_apicv_reg_vid) { | 4218 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { |
4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | 4219 | vmcs_write64(EOI_EXIT_BITMAP0, 0); |
4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | 4220 | vmcs_write64(EOI_EXIT_BITMAP1, 0); |
4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | 4221 | vmcs_write64(EOI_EXIT_BITMAP2, 0); |
4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | 4222 | vmcs_write64(EOI_EXIT_BITMAP3, 0); |
4023 | 4223 | ||
4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | 4224 | vmcs_write16(GUEST_INTR_STATUS, 0); |
4225 | |||
4226 | vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); | ||
4227 | vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); | ||
4025 | } | 4228 | } |
4026 | 4229 | ||
4027 | if (ple_gap) { | 4230 | if (ple_gap) { |
@@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4035 | 4238 | ||
4036 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ | 4239 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ |
4037 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ | 4240 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ |
4038 | vmx_set_constant_host_state(); | 4241 | vmx_set_constant_host_state(vmx); |
4039 | #ifdef CONFIG_X86_64 | 4242 | #ifdef CONFIG_X86_64 |
4040 | rdmsrl(MSR_FS_BASE, a); | 4243 | rdmsrl(MSR_FS_BASE, a); |
4041 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ | 4244 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ |
@@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4089 | return 0; | 4292 | return 0; |
4090 | } | 4293 | } |
4091 | 4294 | ||
4092 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4295 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
4093 | { | 4296 | { |
4094 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4297 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4095 | u64 msr; | 4298 | u64 msr; |
4096 | int ret; | ||
4097 | 4299 | ||
4098 | vmx->rmode.vm86_active = 0; | 4300 | vmx->rmode.vm86_active = 0; |
4099 | 4301 | ||
@@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4109 | vmx_segment_cache_clear(vmx); | 4311 | vmx_segment_cache_clear(vmx); |
4110 | 4312 | ||
4111 | seg_setup(VCPU_SREG_CS); | 4313 | seg_setup(VCPU_SREG_CS); |
4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4314 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4315 | vmcs_write32(GUEST_CS_BASE, 0xffff0000); |
4114 | else { | ||
4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | ||
4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | ||
4117 | } | ||
4118 | 4316 | ||
4119 | seg_setup(VCPU_SREG_DS); | 4317 | seg_setup(VCPU_SREG_DS); |
4120 | seg_setup(VCPU_SREG_ES); | 4318 | seg_setup(VCPU_SREG_ES); |
@@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4137 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 4335 | vmcs_writel(GUEST_SYSENTER_EIP, 0); |
4138 | 4336 | ||
4139 | vmcs_writel(GUEST_RFLAGS, 0x02); | 4337 | vmcs_writel(GUEST_RFLAGS, 0x02); |
4140 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4338 | kvm_rip_write(vcpu, 0xfff0); |
4141 | kvm_rip_write(vcpu, 0xfff0); | ||
4142 | else | ||
4143 | kvm_rip_write(vcpu, 0); | ||
4144 | 4339 | ||
4145 | vmcs_writel(GUEST_GDTR_BASE, 0); | 4340 | vmcs_writel(GUEST_GDTR_BASE, 0); |
4146 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); | 4341 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); |
@@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4171 | vmcs_write64(APIC_ACCESS_ADDR, | 4366 | vmcs_write64(APIC_ACCESS_ADDR, |
4172 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); | 4367 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); |
4173 | 4368 | ||
4369 | if (vmx_vm_has_apicv(vcpu->kvm)) | ||
4370 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | ||
4371 | |||
4174 | if (vmx->vpid != 0) | 4372 | if (vmx->vpid != 0) |
4175 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 4373 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
4176 | 4374 | ||
4177 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 4375 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
4178 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
4179 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ | 4376 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
4180 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
4181 | vmx_set_cr4(&vmx->vcpu, 0); | 4377 | vmx_set_cr4(&vmx->vcpu, 0); |
4182 | vmx_set_efer(&vmx->vcpu, 0); | 4378 | vmx_set_efer(&vmx->vcpu, 0); |
4183 | vmx_fpu_activate(&vmx->vcpu); | 4379 | vmx_fpu_activate(&vmx->vcpu); |
4184 | update_exception_bitmap(&vmx->vcpu); | 4380 | update_exception_bitmap(&vmx->vcpu); |
4185 | 4381 | ||
4186 | vpid_sync_context(vmx); | 4382 | vpid_sync_context(vmx); |
4187 | |||
4188 | ret = 0; | ||
4189 | |||
4190 | return ret; | ||
4191 | } | 4383 | } |
4192 | 4384 | ||
4193 | /* | 4385 | /* |
@@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
4200 | PIN_BASED_EXT_INTR_MASK; | 4392 | PIN_BASED_EXT_INTR_MASK; |
4201 | } | 4393 | } |
4202 | 4394 | ||
4203 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 4395 | static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) |
4396 | { | ||
4397 | return get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
4398 | PIN_BASED_NMI_EXITING; | ||
4399 | } | ||
4400 | |||
4401 | static int enable_irq_window(struct kvm_vcpu *vcpu) | ||
4204 | { | 4402 | { |
4205 | u32 cpu_based_vm_exec_control; | 4403 | u32 cpu_based_vm_exec_control; |
4206 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4404 | |
4405 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
4207 | /* | 4406 | /* |
4208 | * We get here if vmx_interrupt_allowed() said we can't | 4407 | * We get here if vmx_interrupt_allowed() said we can't |
4209 | * inject to L1 now because L2 must run. Ask L2 to exit | 4408 | * inject to L1 now because L2 must run. The caller will have |
4210 | * right after entry, so we can inject to L1 more promptly. | 4409 | * to make L2 exit right after entry, so we can inject to L1 |
4410 | * more promptly. | ||
4211 | */ | 4411 | */ |
4212 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | 4412 | return -EBUSY; |
4213 | return; | ||
4214 | } | ||
4215 | 4413 | ||
4216 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4414 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4217 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 4415 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
4218 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4416 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4417 | return 0; | ||
4219 | } | 4418 | } |
4220 | 4419 | ||
4221 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 4420 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
4222 | { | 4421 | { |
4223 | u32 cpu_based_vm_exec_control; | 4422 | u32 cpu_based_vm_exec_control; |
4224 | 4423 | ||
4225 | if (!cpu_has_virtual_nmis()) { | 4424 | if (!cpu_has_virtual_nmis()) |
4226 | enable_irq_window(vcpu); | 4425 | return enable_irq_window(vcpu); |
4227 | return; | 4426 | |
4228 | } | 4427 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) |
4428 | return enable_irq_window(vcpu); | ||
4229 | 4429 | ||
4230 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
4231 | enable_irq_window(vcpu); | ||
4232 | return; | ||
4233 | } | ||
4234 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4430 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4235 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 4431 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
4236 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4432 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4433 | return 0; | ||
4237 | } | 4434 | } |
4238 | 4435 | ||
4239 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4436 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
4294 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 4491 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
4295 | } | 4492 | } |
4296 | 4493 | ||
4297 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
4298 | { | ||
4299 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
4300 | return 0; | ||
4301 | |||
4302 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
4303 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
4304 | | GUEST_INTR_STATE_NMI)); | ||
4305 | } | ||
4306 | |||
4307 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 4494 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
4308 | { | 4495 | { |
4309 | if (!cpu_has_virtual_nmis()) | 4496 | if (!cpu_has_virtual_nmis()) |
@@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4333 | } | 4520 | } |
4334 | } | 4521 | } |
4335 | 4522 | ||
4523 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
4524 | { | ||
4525 | if (is_guest_mode(vcpu)) { | ||
4526 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4527 | |||
4528 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
4529 | return 0; | ||
4530 | if (nested_exit_on_nmi(vcpu)) { | ||
4531 | nested_vmx_vmexit(vcpu); | ||
4532 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | ||
4533 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | ||
4534 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
4535 | /* | ||
4536 | * The NMI-triggered VM exit counts as injection: | ||
4537 | * clear this one and block further NMIs. | ||
4538 | */ | ||
4539 | vcpu->arch.nmi_pending = 0; | ||
4540 | vmx_set_nmi_mask(vcpu, true); | ||
4541 | return 0; | ||
4542 | } | ||
4543 | } | ||
4544 | |||
4545 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
4546 | return 0; | ||
4547 | |||
4548 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
4549 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
4550 | | GUEST_INTR_STATE_NMI)); | ||
4551 | } | ||
4552 | |||
4336 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4553 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4337 | { | 4554 | { |
4338 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4555 | if (is_guest_mode(vcpu)) { |
4339 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4556 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4340 | if (to_vmx(vcpu)->nested.nested_run_pending || | 4557 | |
4341 | (vmcs12->idt_vectoring_info_field & | 4558 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4342 | VECTORING_INFO_VALID_MASK)) | ||
4343 | return 0; | 4559 | return 0; |
4344 | nested_vmx_vmexit(vcpu); | 4560 | if (nested_exit_on_intr(vcpu)) { |
4345 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4561 | nested_vmx_vmexit(vcpu); |
4346 | vmcs12->vm_exit_intr_info = 0; | 4562 | vmcs12->vm_exit_reason = |
4347 | /* fall through to normal code, but now in L1, not L2 */ | 4563 | EXIT_REASON_EXTERNAL_INTERRUPT; |
4564 | vmcs12->vm_exit_intr_info = 0; | ||
4565 | /* | ||
4566 | * fall through to normal code, but now in L1, not L2 | ||
4567 | */ | ||
4568 | } | ||
4348 | } | 4569 | } |
4349 | 4570 | ||
4350 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 4571 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
@@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4362 | .flags = 0, | 4583 | .flags = 0, |
4363 | }; | 4584 | }; |
4364 | 4585 | ||
4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); | 4586 | ret = kvm_set_memory_region(kvm, &tss_mem); |
4366 | if (ret) | 4587 | if (ret) |
4367 | return ret; | 4588 | return ret; |
4368 | kvm->arch.tss_addr = addr; | 4589 | kvm->arch.tss_addr = addr; |
@@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4603 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ | 4824 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
4604 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4825 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
4605 | { | 4826 | { |
4606 | if (to_vmx(vcpu)->nested.vmxon && | ||
4607 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
4608 | return 1; | ||
4609 | |||
4610 | if (is_guest_mode(vcpu)) { | 4827 | if (is_guest_mode(vcpu)) { |
4828 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4829 | unsigned long orig_val = val; | ||
4830 | |||
4611 | /* | 4831 | /* |
4612 | * We get here when L2 changed cr0 in a way that did not change | 4832 | * We get here when L2 changed cr0 in a way that did not change |
4613 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), | 4833 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), |
4614 | * but did change L0 shadowed bits. This can currently happen | 4834 | * but did change L0 shadowed bits. So we first calculate the |
4615 | * with the TS bit: L0 may want to leave TS on (for lazy fpu | 4835 | * effective cr0 value that L1 would like to write into the |
4616 | * loading) while pretending to allow the guest to change it. | 4836 | * hardware. It consists of the L2-owned bits from the new |
4837 | * value combined with the L1-owned bits from L1's guest_cr0. | ||
4617 | */ | 4838 | */ |
4618 | if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | | 4839 | val = (val & ~vmcs12->cr0_guest_host_mask) | |
4619 | (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) | 4840 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); |
4841 | |||
4842 | /* TODO: will have to take unrestricted guest mode into | ||
4843 | * account */ | ||
4844 | if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) | ||
4620 | return 1; | 4845 | return 1; |
4621 | vmcs_writel(CR0_READ_SHADOW, val); | 4846 | |
4847 | if (kvm_set_cr0(vcpu, val)) | ||
4848 | return 1; | ||
4849 | vmcs_writel(CR0_READ_SHADOW, orig_val); | ||
4622 | return 0; | 4850 | return 0; |
4623 | } else | 4851 | } else { |
4852 | if (to_vmx(vcpu)->nested.vmxon && | ||
4853 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
4854 | return 1; | ||
4624 | return kvm_set_cr0(vcpu, val); | 4855 | return kvm_set_cr0(vcpu, val); |
4856 | } | ||
4625 | } | 4857 | } |
4626 | 4858 | ||
4627 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | 4859 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) |
4628 | { | 4860 | { |
4629 | if (is_guest_mode(vcpu)) { | 4861 | if (is_guest_mode(vcpu)) { |
4630 | if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | | 4862 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4631 | (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) | 4863 | unsigned long orig_val = val; |
4864 | |||
4865 | /* analogously to handle_set_cr0 */ | ||
4866 | val = (val & ~vmcs12->cr4_guest_host_mask) | | ||
4867 | (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); | ||
4868 | if (kvm_set_cr4(vcpu, val)) | ||
4632 | return 1; | 4869 | return 1; |
4633 | vmcs_writel(CR4_READ_SHADOW, val); | 4870 | vmcs_writel(CR4_READ_SHADOW, orig_val); |
4634 | return 0; | 4871 | return 0; |
4635 | } else | 4872 | } else |
4636 | return kvm_set_cr4(vcpu, val); | 4873 | return kvm_set_cr4(vcpu, val); |
@@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5183 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) | 5420 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) |
5184 | return 1; | 5421 | return 1; |
5185 | 5422 | ||
5186 | err = emulate_instruction(vcpu, 0); | 5423 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
5187 | 5424 | ||
5188 | if (err == EMULATE_DO_MMIO) { | 5425 | if (err == EMULATE_DO_MMIO) { |
5189 | ret = 0; | 5426 | ret = 0; |
@@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | |||
5259 | } | 5496 | } |
5260 | 5497 | ||
5261 | /* Create a new VMCS */ | 5498 | /* Create a new VMCS */ |
5262 | item = (struct vmcs02_list *) | 5499 | item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
5263 | kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
5264 | if (!item) | 5500 | if (!item) |
5265 | return NULL; | 5501 | return NULL; |
5266 | item->vmcs02.vmcs = alloc_vmcs(); | 5502 | item->vmcs02.vmcs = alloc_vmcs(); |
@@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
5309 | free_loaded_vmcs(&vmx->vmcs01); | 5545 | free_loaded_vmcs(&vmx->vmcs01); |
5310 | } | 5546 | } |
5311 | 5547 | ||
5548 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5549 | u32 vm_instruction_error); | ||
5550 | |||
5312 | /* | 5551 | /* |
5313 | * Emulate the VMXON instruction. | 5552 | * Emulate the VMXON instruction. |
5314 | * Currently, we just remember that VMX is active, and do not save or even | 5553 | * Currently, we just remember that VMX is active, and do not save or even |
@@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5321 | { | 5560 | { |
5322 | struct kvm_segment cs; | 5561 | struct kvm_segment cs; |
5323 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5562 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5563 | struct vmcs *shadow_vmcs; | ||
5324 | 5564 | ||
5325 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5565 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
5326 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5566 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
@@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5344 | kvm_inject_gp(vcpu, 0); | 5584 | kvm_inject_gp(vcpu, 0); |
5345 | return 1; | 5585 | return 1; |
5346 | } | 5586 | } |
5587 | if (vmx->nested.vmxon) { | ||
5588 | nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); | ||
5589 | skip_emulated_instruction(vcpu); | ||
5590 | return 1; | ||
5591 | } | ||
5592 | if (enable_shadow_vmcs) { | ||
5593 | shadow_vmcs = alloc_vmcs(); | ||
5594 | if (!shadow_vmcs) | ||
5595 | return -ENOMEM; | ||
5596 | /* mark vmcs as shadow */ | ||
5597 | shadow_vmcs->revision_id |= (1u << 31); | ||
5598 | /* init shadow vmcs */ | ||
5599 | vmcs_clear(shadow_vmcs); | ||
5600 | vmx->nested.current_shadow_vmcs = shadow_vmcs; | ||
5601 | } | ||
5347 | 5602 | ||
5348 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | 5603 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
5349 | vmx->nested.vmcs02_num = 0; | 5604 | vmx->nested.vmcs02_num = 0; |
@@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
5384 | return 1; | 5639 | return 1; |
5385 | } | 5640 | } |
5386 | 5641 | ||
5642 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | ||
5643 | { | ||
5644 | u32 exec_control; | ||
5645 | if (enable_shadow_vmcs) { | ||
5646 | if (vmx->nested.current_vmcs12 != NULL) { | ||
5647 | /* copy to memory all shadowed fields in case | ||
5648 | they were modified */ | ||
5649 | copy_shadow_to_vmcs12(vmx); | ||
5650 | vmx->nested.sync_shadow_vmcs = false; | ||
5651 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
5652 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
5653 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
5654 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | ||
5655 | } | ||
5656 | } | ||
5657 | kunmap(vmx->nested.current_vmcs12_page); | ||
5658 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5659 | } | ||
5660 | |||
5387 | /* | 5661 | /* |
5388 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or | 5662 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or |
5389 | * just stops using VMX. | 5663 | * just stops using VMX. |
@@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
5394 | return; | 5668 | return; |
5395 | vmx->nested.vmxon = false; | 5669 | vmx->nested.vmxon = false; |
5396 | if (vmx->nested.current_vmptr != -1ull) { | 5670 | if (vmx->nested.current_vmptr != -1ull) { |
5397 | kunmap(vmx->nested.current_vmcs12_page); | 5671 | nested_release_vmcs12(vmx); |
5398 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5399 | vmx->nested.current_vmptr = -1ull; | 5672 | vmx->nested.current_vmptr = -1ull; |
5400 | vmx->nested.current_vmcs12 = NULL; | 5673 | vmx->nested.current_vmcs12 = NULL; |
5401 | } | 5674 | } |
5675 | if (enable_shadow_vmcs) | ||
5676 | free_vmcs(vmx->nested.current_shadow_vmcs); | ||
5402 | /* Unpin physical memory we referred to in current vmcs02 */ | 5677 | /* Unpin physical memory we referred to in current vmcs02 */ |
5403 | if (vmx->nested.apic_access_page) { | 5678 | if (vmx->nested.apic_access_page) { |
5404 | nested_release_page(vmx->nested.apic_access_page); | 5679 | nested_release_page(vmx->nested.apic_access_page); |
@@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
5507 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | 5782 | X86_EFLAGS_SF | X86_EFLAGS_OF)) |
5508 | | X86_EFLAGS_ZF); | 5783 | | X86_EFLAGS_ZF); |
5509 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | 5784 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; |
5785 | /* | ||
5786 | * We don't need to force a shadow sync because | ||
5787 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5788 | */ | ||
5510 | } | 5789 | } |
5511 | 5790 | ||
5512 | /* Emulate the VMCLEAR instruction */ | 5791 | /* Emulate the VMCLEAR instruction */ |
@@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
5539 | } | 5818 | } |
5540 | 5819 | ||
5541 | if (vmptr == vmx->nested.current_vmptr) { | 5820 | if (vmptr == vmx->nested.current_vmptr) { |
5542 | kunmap(vmx->nested.current_vmcs12_page); | 5821 | nested_release_vmcs12(vmx); |
5543 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5544 | vmx->nested.current_vmptr = -1ull; | 5822 | vmx->nested.current_vmptr = -1ull; |
5545 | vmx->nested.current_vmcs12 = NULL; | 5823 | vmx->nested.current_vmcs12 = NULL; |
5546 | } | 5824 | } |
@@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, | |||
5639 | } | 5917 | } |
5640 | } | 5918 | } |
5641 | 5919 | ||
5920 | |||
5921 | static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, | ||
5922 | unsigned long field, u64 field_value){ | ||
5923 | short offset = vmcs_field_to_offset(field); | ||
5924 | char *p = ((char *) get_vmcs12(vcpu)) + offset; | ||
5925 | if (offset < 0) | ||
5926 | return false; | ||
5927 | |||
5928 | switch (vmcs_field_type(field)) { | ||
5929 | case VMCS_FIELD_TYPE_U16: | ||
5930 | *(u16 *)p = field_value; | ||
5931 | return true; | ||
5932 | case VMCS_FIELD_TYPE_U32: | ||
5933 | *(u32 *)p = field_value; | ||
5934 | return true; | ||
5935 | case VMCS_FIELD_TYPE_U64: | ||
5936 | *(u64 *)p = field_value; | ||
5937 | return true; | ||
5938 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5939 | *(natural_width *)p = field_value; | ||
5940 | return true; | ||
5941 | default: | ||
5942 | return false; /* can never happen. */ | ||
5943 | } | ||
5944 | |||
5945 | } | ||
5946 | |||
5947 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | ||
5948 | { | ||
5949 | int i; | ||
5950 | unsigned long field; | ||
5951 | u64 field_value; | ||
5952 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
5953 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | ||
5954 | int num_fields = max_shadow_read_write_fields; | ||
5955 | |||
5956 | vmcs_load(shadow_vmcs); | ||
5957 | |||
5958 | for (i = 0; i < num_fields; i++) { | ||
5959 | field = fields[i]; | ||
5960 | switch (vmcs_field_type(field)) { | ||
5961 | case VMCS_FIELD_TYPE_U16: | ||
5962 | field_value = vmcs_read16(field); | ||
5963 | break; | ||
5964 | case VMCS_FIELD_TYPE_U32: | ||
5965 | field_value = vmcs_read32(field); | ||
5966 | break; | ||
5967 | case VMCS_FIELD_TYPE_U64: | ||
5968 | field_value = vmcs_read64(field); | ||
5969 | break; | ||
5970 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5971 | field_value = vmcs_readl(field); | ||
5972 | break; | ||
5973 | } | ||
5974 | vmcs12_write_any(&vmx->vcpu, field, field_value); | ||
5975 | } | ||
5976 | |||
5977 | vmcs_clear(shadow_vmcs); | ||
5978 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
5979 | } | ||
5980 | |||
5981 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | ||
5982 | { | ||
5983 | unsigned long *fields[] = { | ||
5984 | (unsigned long *)shadow_read_write_fields, | ||
5985 | (unsigned long *)shadow_read_only_fields | ||
5986 | }; | ||
5987 | int num_lists = ARRAY_SIZE(fields); | ||
5988 | int max_fields[] = { | ||
5989 | max_shadow_read_write_fields, | ||
5990 | max_shadow_read_only_fields | ||
5991 | }; | ||
5992 | int i, q; | ||
5993 | unsigned long field; | ||
5994 | u64 field_value = 0; | ||
5995 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
5996 | |||
5997 | vmcs_load(shadow_vmcs); | ||
5998 | |||
5999 | for (q = 0; q < num_lists; q++) { | ||
6000 | for (i = 0; i < max_fields[q]; i++) { | ||
6001 | field = fields[q][i]; | ||
6002 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | ||
6003 | |||
6004 | switch (vmcs_field_type(field)) { | ||
6005 | case VMCS_FIELD_TYPE_U16: | ||
6006 | vmcs_write16(field, (u16)field_value); | ||
6007 | break; | ||
6008 | case VMCS_FIELD_TYPE_U32: | ||
6009 | vmcs_write32(field, (u32)field_value); | ||
6010 | break; | ||
6011 | case VMCS_FIELD_TYPE_U64: | ||
6012 | vmcs_write64(field, (u64)field_value); | ||
6013 | break; | ||
6014 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
6015 | vmcs_writel(field, (long)field_value); | ||
6016 | break; | ||
6017 | } | ||
6018 | } | ||
6019 | } | ||
6020 | |||
6021 | vmcs_clear(shadow_vmcs); | ||
6022 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
6023 | } | ||
6024 | |||
5642 | /* | 6025 | /* |
5643 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was | 6026 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was |
5644 | * used before) all generate the same failure when it is missing. | 6027 | * used before) all generate the same failure when it is missing. |
@@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
5703 | gva_t gva; | 6086 | gva_t gva; |
5704 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6087 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
5705 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 6088 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
5706 | char *p; | ||
5707 | short offset; | ||
5708 | /* The value to write might be 32 or 64 bits, depending on L1's long | 6089 | /* The value to write might be 32 or 64 bits, depending on L1's long |
5709 | * mode, and eventually we need to write that into a field of several | 6090 | * mode, and eventually we need to write that into a field of several |
5710 | * possible lengths. The code below first zero-extends the value to 64 | 6091 | * possible lengths. The code below first zero-extends the value to 64 |
@@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
5741 | return 1; | 6122 | return 1; |
5742 | } | 6123 | } |
5743 | 6124 | ||
5744 | offset = vmcs_field_to_offset(field); | 6125 | if (!vmcs12_write_any(vcpu, field, field_value)) { |
5745 | if (offset < 0) { | ||
5746 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
5747 | skip_emulated_instruction(vcpu); | ||
5748 | return 1; | ||
5749 | } | ||
5750 | p = ((char *) get_vmcs12(vcpu)) + offset; | ||
5751 | |||
5752 | switch (vmcs_field_type(field)) { | ||
5753 | case VMCS_FIELD_TYPE_U16: | ||
5754 | *(u16 *)p = field_value; | ||
5755 | break; | ||
5756 | case VMCS_FIELD_TYPE_U32: | ||
5757 | *(u32 *)p = field_value; | ||
5758 | break; | ||
5759 | case VMCS_FIELD_TYPE_U64: | ||
5760 | *(u64 *)p = field_value; | ||
5761 | break; | ||
5762 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5763 | *(natural_width *)p = field_value; | ||
5764 | break; | ||
5765 | default: | ||
5766 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6126 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
5767 | skip_emulated_instruction(vcpu); | 6127 | skip_emulated_instruction(vcpu); |
5768 | return 1; | 6128 | return 1; |
@@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
5780 | gva_t gva; | 6140 | gva_t gva; |
5781 | gpa_t vmptr; | 6141 | gpa_t vmptr; |
5782 | struct x86_exception e; | 6142 | struct x86_exception e; |
6143 | u32 exec_control; | ||
5783 | 6144 | ||
5784 | if (!nested_vmx_check_permission(vcpu)) | 6145 | if (!nested_vmx_check_permission(vcpu)) |
5785 | return 1; | 6146 | return 1; |
@@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
5818 | skip_emulated_instruction(vcpu); | 6179 | skip_emulated_instruction(vcpu); |
5819 | return 1; | 6180 | return 1; |
5820 | } | 6181 | } |
5821 | if (vmx->nested.current_vmptr != -1ull) { | 6182 | if (vmx->nested.current_vmptr != -1ull) |
5822 | kunmap(vmx->nested.current_vmcs12_page); | 6183 | nested_release_vmcs12(vmx); |
5823 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5824 | } | ||
5825 | 6184 | ||
5826 | vmx->nested.current_vmptr = vmptr; | 6185 | vmx->nested.current_vmptr = vmptr; |
5827 | vmx->nested.current_vmcs12 = new_vmcs12; | 6186 | vmx->nested.current_vmcs12 = new_vmcs12; |
5828 | vmx->nested.current_vmcs12_page = page; | 6187 | vmx->nested.current_vmcs12_page = page; |
6188 | if (enable_shadow_vmcs) { | ||
6189 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6190 | exec_control |= SECONDARY_EXEC_SHADOW_VMCS; | ||
6191 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
6192 | vmcs_write64(VMCS_LINK_POINTER, | ||
6193 | __pa(vmx->nested.current_shadow_vmcs)); | ||
6194 | vmx->nested.sync_shadow_vmcs = true; | ||
6195 | } | ||
5829 | } | 6196 | } |
5830 | 6197 | ||
5831 | nested_vmx_succeed(vcpu); | 6198 | nested_vmx_succeed(vcpu); |
@@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5908 | static const int kvm_vmx_max_exit_handlers = | 6275 | static const int kvm_vmx_max_exit_handlers = |
5909 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 6276 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
5910 | 6277 | ||
6278 | static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | ||
6279 | struct vmcs12 *vmcs12) | ||
6280 | { | ||
6281 | unsigned long exit_qualification; | ||
6282 | gpa_t bitmap, last_bitmap; | ||
6283 | unsigned int port; | ||
6284 | int size; | ||
6285 | u8 b; | ||
6286 | |||
6287 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
6288 | return 1; | ||
6289 | |||
6290 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | ||
6291 | return 0; | ||
6292 | |||
6293 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
6294 | |||
6295 | port = exit_qualification >> 16; | ||
6296 | size = (exit_qualification & 7) + 1; | ||
6297 | |||
6298 | last_bitmap = (gpa_t)-1; | ||
6299 | b = -1; | ||
6300 | |||
6301 | while (size > 0) { | ||
6302 | if (port < 0x8000) | ||
6303 | bitmap = vmcs12->io_bitmap_a; | ||
6304 | else if (port < 0x10000) | ||
6305 | bitmap = vmcs12->io_bitmap_b; | ||
6306 | else | ||
6307 | return 1; | ||
6308 | bitmap += (port & 0x7fff) / 8; | ||
6309 | |||
6310 | if (last_bitmap != bitmap) | ||
6311 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) | ||
6312 | return 1; | ||
6313 | if (b & (1 << (port & 7))) | ||
6314 | return 1; | ||
6315 | |||
6316 | port++; | ||
6317 | size--; | ||
6318 | last_bitmap = bitmap; | ||
6319 | } | ||
6320 | |||
6321 | return 0; | ||
6322 | } | ||
6323 | |||
5911 | /* | 6324 | /* |
5912 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, | 6325 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, |
5913 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed | 6326 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed |
@@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
5939 | /* Then read the msr_index'th bit from this bitmap: */ | 6352 | /* Then read the msr_index'th bit from this bitmap: */ |
5940 | if (msr_index < 1024*8) { | 6353 | if (msr_index < 1024*8) { |
5941 | unsigned char b; | 6354 | unsigned char b; |
5942 | kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); | 6355 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) |
6356 | return 1; | ||
5943 | return 1 & (b >> (msr_index & 7)); | 6357 | return 1 & (b >> (msr_index & 7)); |
5944 | } else | 6358 | } else |
5945 | return 1; /* let L1 handle the wrong parameter */ | 6359 | return 1; /* let L1 handle the wrong parameter */ |
@@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
6033 | */ | 6447 | */ |
6034 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | 6448 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) |
6035 | { | 6449 | { |
6036 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
6037 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 6450 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
6038 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6451 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
6039 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6452 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
6453 | u32 exit_reason = vmx->exit_reason; | ||
6040 | 6454 | ||
6041 | if (vmx->nested.nested_run_pending) | 6455 | if (vmx->nested.nested_run_pending) |
6042 | return 0; | 6456 | return 0; |
@@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6060 | case EXIT_REASON_TRIPLE_FAULT: | 6474 | case EXIT_REASON_TRIPLE_FAULT: |
6061 | return 1; | 6475 | return 1; |
6062 | case EXIT_REASON_PENDING_INTERRUPT: | 6476 | case EXIT_REASON_PENDING_INTERRUPT: |
6477 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); | ||
6063 | case EXIT_REASON_NMI_WINDOW: | 6478 | case EXIT_REASON_NMI_WINDOW: |
6064 | /* | 6479 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); |
6065 | * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit | ||
6066 | * (aka Interrupt Window Exiting) only when L1 turned it on, | ||
6067 | * so if we got a PENDING_INTERRUPT exit, this must be for L1. | ||
6068 | * Same for NMI Window Exiting. | ||
6069 | */ | ||
6070 | return 1; | ||
6071 | case EXIT_REASON_TASK_SWITCH: | 6480 | case EXIT_REASON_TASK_SWITCH: |
6072 | return 1; | 6481 | return 1; |
6073 | case EXIT_REASON_CPUID: | 6482 | case EXIT_REASON_CPUID: |
@@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6097 | case EXIT_REASON_DR_ACCESS: | 6506 | case EXIT_REASON_DR_ACCESS: |
6098 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); | 6507 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); |
6099 | case EXIT_REASON_IO_INSTRUCTION: | 6508 | case EXIT_REASON_IO_INSTRUCTION: |
6100 | /* TODO: support IO bitmaps */ | 6509 | return nested_vmx_exit_handled_io(vcpu, vmcs12); |
6101 | return 1; | ||
6102 | case EXIT_REASON_MSR_READ: | 6510 | case EXIT_REASON_MSR_READ: |
6103 | case EXIT_REASON_MSR_WRITE: | 6511 | case EXIT_REASON_MSR_WRITE: |
6104 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); | 6512 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); |
@@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6122 | case EXIT_REASON_EPT_VIOLATION: | 6530 | case EXIT_REASON_EPT_VIOLATION: |
6123 | case EXIT_REASON_EPT_MISCONFIG: | 6531 | case EXIT_REASON_EPT_MISCONFIG: |
6124 | return 0; | 6532 | return 0; |
6533 | case EXIT_REASON_PREEMPTION_TIMER: | ||
6534 | return vmcs12->pin_based_vm_exec_control & | ||
6535 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
6125 | case EXIT_REASON_WBINVD: | 6536 | case EXIT_REASON_WBINVD: |
6126 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 6537 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
6127 | case EXIT_REASON_XSETBV: | 6538 | case EXIT_REASON_XSETBV: |
@@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
6316 | 6727 | ||
6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 6728 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
6318 | { | 6729 | { |
6730 | if (!vmx_vm_has_apicv(vcpu->kvm)) | ||
6731 | return; | ||
6732 | |||
6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | 6733 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); |
6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | 6734 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); |
6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | 6735 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); |
@@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
6346 | } | 6760 | } |
6347 | } | 6761 | } |
6348 | 6762 | ||
6763 | static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | ||
6764 | { | ||
6765 | u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
6766 | |||
6767 | /* | ||
6768 | * If external interrupt exists, IF bit is set in rflags/eflags on the | ||
6769 | * interrupt stack frame, and interrupt will be enabled on a return | ||
6770 | * from interrupt handler. | ||
6771 | */ | ||
6772 | if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) | ||
6773 | == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { | ||
6774 | unsigned int vector; | ||
6775 | unsigned long entry; | ||
6776 | gate_desc *desc; | ||
6777 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6778 | #ifdef CONFIG_X86_64 | ||
6779 | unsigned long tmp; | ||
6780 | #endif | ||
6781 | |||
6782 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | ||
6783 | desc = (gate_desc *)vmx->host_idt_base + vector; | ||
6784 | entry = gate_offset(*desc); | ||
6785 | asm volatile( | ||
6786 | #ifdef CONFIG_X86_64 | ||
6787 | "mov %%" _ASM_SP ", %[sp]\n\t" | ||
6788 | "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" | ||
6789 | "push $%c[ss]\n\t" | ||
6790 | "push %[sp]\n\t" | ||
6791 | #endif | ||
6792 | "pushf\n\t" | ||
6793 | "orl $0x200, (%%" _ASM_SP ")\n\t" | ||
6794 | __ASM_SIZE(push) " $%c[cs]\n\t" | ||
6795 | "call *%[entry]\n\t" | ||
6796 | : | ||
6797 | #ifdef CONFIG_X86_64 | ||
6798 | [sp]"=&r"(tmp) | ||
6799 | #endif | ||
6800 | : | ||
6801 | [entry]"r"(entry), | ||
6802 | [ss]"i"(__KERNEL_DS), | ||
6803 | [cs]"i"(__KERNEL_CS) | ||
6804 | ); | ||
6805 | } else | ||
6806 | local_irq_enable(); | ||
6807 | } | ||
6808 | |||
6349 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 6809 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
6350 | { | 6810 | { |
6351 | u32 exit_intr_info; | 6811 | u32 exit_intr_info; |
@@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
6388 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 6848 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
6389 | } | 6849 | } |
6390 | 6850 | ||
6391 | static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | 6851 | static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, |
6392 | u32 idt_vectoring_info, | 6852 | u32 idt_vectoring_info, |
6393 | int instr_len_field, | 6853 | int instr_len_field, |
6394 | int error_code_field) | 6854 | int error_code_field) |
@@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
6399 | 6859 | ||
6400 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 6860 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
6401 | 6861 | ||
6402 | vmx->vcpu.arch.nmi_injected = false; | 6862 | vcpu->arch.nmi_injected = false; |
6403 | kvm_clear_exception_queue(&vmx->vcpu); | 6863 | kvm_clear_exception_queue(vcpu); |
6404 | kvm_clear_interrupt_queue(&vmx->vcpu); | 6864 | kvm_clear_interrupt_queue(vcpu); |
6405 | 6865 | ||
6406 | if (!idtv_info_valid) | 6866 | if (!idtv_info_valid) |
6407 | return; | 6867 | return; |
6408 | 6868 | ||
6409 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | 6869 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6410 | 6870 | ||
6411 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 6871 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
6412 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 6872 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
6413 | 6873 | ||
6414 | switch (type) { | 6874 | switch (type) { |
6415 | case INTR_TYPE_NMI_INTR: | 6875 | case INTR_TYPE_NMI_INTR: |
6416 | vmx->vcpu.arch.nmi_injected = true; | 6876 | vcpu->arch.nmi_injected = true; |
6417 | /* | 6877 | /* |
6418 | * SDM 3: 27.7.1.2 (September 2008) | 6878 | * SDM 3: 27.7.1.2 (September 2008) |
6419 | * Clear bit "block by NMI" before VM entry if a NMI | 6879 | * Clear bit "block by NMI" before VM entry if a NMI |
6420 | * delivery faulted. | 6880 | * delivery faulted. |
6421 | */ | 6881 | */ |
6422 | vmx_set_nmi_mask(&vmx->vcpu, false); | 6882 | vmx_set_nmi_mask(vcpu, false); |
6423 | break; | 6883 | break; |
6424 | case INTR_TYPE_SOFT_EXCEPTION: | 6884 | case INTR_TYPE_SOFT_EXCEPTION: |
6425 | vmx->vcpu.arch.event_exit_inst_len = | 6885 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
6426 | vmcs_read32(instr_len_field); | ||
6427 | /* fall through */ | 6886 | /* fall through */ |
6428 | case INTR_TYPE_HARD_EXCEPTION: | 6887 | case INTR_TYPE_HARD_EXCEPTION: |
6429 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 6888 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
6430 | u32 err = vmcs_read32(error_code_field); | 6889 | u32 err = vmcs_read32(error_code_field); |
6431 | kvm_queue_exception_e(&vmx->vcpu, vector, err); | 6890 | kvm_queue_exception_e(vcpu, vector, err); |
6432 | } else | 6891 | } else |
6433 | kvm_queue_exception(&vmx->vcpu, vector); | 6892 | kvm_queue_exception(vcpu, vector); |
6434 | break; | 6893 | break; |
6435 | case INTR_TYPE_SOFT_INTR: | 6894 | case INTR_TYPE_SOFT_INTR: |
6436 | vmx->vcpu.arch.event_exit_inst_len = | 6895 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
6437 | vmcs_read32(instr_len_field); | ||
6438 | /* fall through */ | 6896 | /* fall through */ |
6439 | case INTR_TYPE_EXT_INTR: | 6897 | case INTR_TYPE_EXT_INTR: |
6440 | kvm_queue_interrupt(&vmx->vcpu, vector, | 6898 | kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); |
6441 | type == INTR_TYPE_SOFT_INTR); | ||
6442 | break; | 6899 | break; |
6443 | default: | 6900 | default: |
6444 | break; | 6901 | break; |
@@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
6447 | 6904 | ||
6448 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 6905 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
6449 | { | 6906 | { |
6450 | if (is_guest_mode(&vmx->vcpu)) | 6907 | __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, |
6451 | return; | ||
6452 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, | ||
6453 | VM_EXIT_INSTRUCTION_LEN, | 6908 | VM_EXIT_INSTRUCTION_LEN, |
6454 | IDT_VECTORING_ERROR_CODE); | 6909 | IDT_VECTORING_ERROR_CODE); |
6455 | } | 6910 | } |
6456 | 6911 | ||
6457 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | 6912 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) |
6458 | { | 6913 | { |
6459 | if (is_guest_mode(vcpu)) | 6914 | __vmx_complete_interrupts(vcpu, |
6460 | return; | ||
6461 | __vmx_complete_interrupts(to_vmx(vcpu), | ||
6462 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), | 6915 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), |
6463 | VM_ENTRY_INSTRUCTION_LEN, | 6916 | VM_ENTRY_INSTRUCTION_LEN, |
6464 | VM_ENTRY_EXCEPTION_ERROR_CODE); | 6917 | VM_ENTRY_EXCEPTION_ERROR_CODE); |
@@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6489 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6942 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
6490 | unsigned long debugctlmsr; | 6943 | unsigned long debugctlmsr; |
6491 | 6944 | ||
6492 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { | ||
6493 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6494 | if (vmcs12->idt_vectoring_info_field & | ||
6495 | VECTORING_INFO_VALID_MASK) { | ||
6496 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
6497 | vmcs12->idt_vectoring_info_field); | ||
6498 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
6499 | vmcs12->vm_exit_instruction_len); | ||
6500 | if (vmcs12->idt_vectoring_info_field & | ||
6501 | VECTORING_INFO_DELIVER_CODE_MASK) | ||
6502 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
6503 | vmcs12->idt_vectoring_error_code); | ||
6504 | } | ||
6505 | } | ||
6506 | |||
6507 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 6945 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
6508 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 6946 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
6509 | vmx->entry_time = ktime_get(); | 6947 | vmx->entry_time = ktime_get(); |
@@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6513 | if (vmx->emulation_required) | 6951 | if (vmx->emulation_required) |
6514 | return; | 6952 | return; |
6515 | 6953 | ||
6954 | if (vmx->nested.sync_shadow_vmcs) { | ||
6955 | copy_vmcs12_to_shadow(vmx); | ||
6956 | vmx->nested.sync_shadow_vmcs = false; | ||
6957 | } | ||
6958 | |||
6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6959 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
6517 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 6960 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
6518 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6961 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) |
@@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6662 | 7105 | ||
6663 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7106 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
6664 | 7107 | ||
6665 | if (is_guest_mode(vcpu)) { | ||
6666 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6667 | vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; | ||
6668 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | ||
6669 | vmcs12->idt_vectoring_error_code = | ||
6670 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
6671 | vmcs12->vm_exit_instruction_len = | ||
6672 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
6673 | } | ||
6674 | } | ||
6675 | |||
6676 | vmx->loaded_vmcs->launched = 1; | 7108 | vmx->loaded_vmcs->launched = 1; |
6677 | 7109 | ||
6678 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 7110 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
@@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
6734 | put_cpu(); | 7166 | put_cpu(); |
6735 | if (err) | 7167 | if (err) |
6736 | goto free_vmcs; | 7168 | goto free_vmcs; |
6737 | if (vm_need_virtualize_apic_accesses(kvm)) | 7169 | if (vm_need_virtualize_apic_accesses(kvm)) { |
6738 | err = alloc_apic_access_page(kvm); | 7170 | err = alloc_apic_access_page(kvm); |
6739 | if (err) | 7171 | if (err) |
6740 | goto free_vmcs; | 7172 | goto free_vmcs; |
7173 | } | ||
6741 | 7174 | ||
6742 | if (enable_ept) { | 7175 | if (enable_ept) { |
6743 | if (!kvm->arch.ept_identity_map_addr) | 7176 | if (!kvm->arch.ept_identity_map_addr) |
@@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
6931 | vmcs12->vm_entry_instruction_len); | 7364 | vmcs12->vm_entry_instruction_len); |
6932 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 7365 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
6933 | vmcs12->guest_interruptibility_info); | 7366 | vmcs12->guest_interruptibility_info); |
6934 | vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); | ||
6935 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7367 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
6936 | vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); | 7368 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
6937 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7369 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); |
6938 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7370 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
6939 | vmcs12->guest_pending_dbg_exceptions); | 7371 | vmcs12->guest_pending_dbg_exceptions); |
@@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
6946 | (vmcs_config.pin_based_exec_ctrl | | 7378 | (vmcs_config.pin_based_exec_ctrl | |
6947 | vmcs12->pin_based_vm_exec_control)); | 7379 | vmcs12->pin_based_vm_exec_control)); |
6948 | 7380 | ||
7381 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
7382 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, | ||
7383 | vmcs12->vmx_preemption_timer_value); | ||
7384 | |||
6949 | /* | 7385 | /* |
6950 | * Whether page-faults are trapped is determined by a combination of | 7386 | * Whether page-faults are trapped is determined by a combination of |
6951 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. | 7387 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. |
@@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7016 | * Other fields are different per CPU, and will be set later when | 7452 | * Other fields are different per CPU, and will be set later when |
7017 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. | 7453 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. |
7018 | */ | 7454 | */ |
7019 | vmx_set_constant_host_state(); | 7455 | vmx_set_constant_host_state(vmx); |
7020 | 7456 | ||
7021 | /* | 7457 | /* |
7022 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | 7458 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before |
@@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7082 | 7518 | ||
7083 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7519 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
7084 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7520 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
7085 | if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7521 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
7086 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7522 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
7087 | else | 7523 | else |
7088 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7524 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
@@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7121 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7557 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7122 | int cpu; | 7558 | int cpu; |
7123 | struct loaded_vmcs *vmcs02; | 7559 | struct loaded_vmcs *vmcs02; |
7560 | bool ia32e; | ||
7124 | 7561 | ||
7125 | if (!nested_vmx_check_permission(vcpu) || | 7562 | if (!nested_vmx_check_permission(vcpu) || |
7126 | !nested_vmx_check_vmcs12(vcpu)) | 7563 | !nested_vmx_check_vmcs12(vcpu)) |
@@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7129 | skip_emulated_instruction(vcpu); | 7566 | skip_emulated_instruction(vcpu); |
7130 | vmcs12 = get_vmcs12(vcpu); | 7567 | vmcs12 = get_vmcs12(vcpu); |
7131 | 7568 | ||
7569 | if (enable_shadow_vmcs) | ||
7570 | copy_shadow_to_vmcs12(vmx); | ||
7571 | |||
7132 | /* | 7572 | /* |
7133 | * The nested entry process starts with enforcing various prerequisites | 7573 | * The nested entry process starts with enforcing various prerequisites |
7134 | * on vmcs12 as required by the Intel SDM, and act appropriately when | 7574 | * on vmcs12 as required by the Intel SDM, and act appropriately when |
@@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7146 | return 1; | 7586 | return 1; |
7147 | } | 7587 | } |
7148 | 7588 | ||
7589 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | ||
7590 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
7591 | return 1; | ||
7592 | } | ||
7593 | |||
7149 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | 7594 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && |
7150 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { | 7595 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { |
7151 | /*TODO: Also verify bits beyond physical address width are 0*/ | 7596 | /*TODO: Also verify bits beyond physical address width are 0*/ |
@@ -7204,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7204 | } | 7649 | } |
7205 | 7650 | ||
7206 | /* | 7651 | /* |
7652 | * If the load IA32_EFER VM-entry control is 1, the following checks | ||
7653 | * are performed on the field for the IA32_EFER MSR: | ||
7654 | * - Bits reserved in the IA32_EFER MSR must be 0. | ||
7655 | * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of | ||
7656 | * the IA-32e mode guest VM-exit control. It must also be identical | ||
7657 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to | ||
7658 | * CR0.PG) is 1. | ||
7659 | */ | ||
7660 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { | ||
7661 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | ||
7662 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || | ||
7663 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | ||
7664 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | ||
7665 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { | ||
7666 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
7667 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
7668 | return 1; | ||
7669 | } | ||
7670 | } | ||
7671 | |||
7672 | /* | ||
7673 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | ||
7674 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | ||
7675 | * the values of the LMA and LME bits in the field must each be that of | ||
7676 | * the host address-space size VM-exit control. | ||
7677 | */ | ||
7678 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { | ||
7679 | ia32e = (vmcs12->vm_exit_controls & | ||
7680 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | ||
7681 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | ||
7682 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | ||
7683 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { | ||
7684 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
7685 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
7686 | return 1; | ||
7687 | } | ||
7688 | } | ||
7689 | |||
7690 | /* | ||
7207 | * We're finally done with prerequisite checking, and can start with | 7691 | * We're finally done with prerequisite checking, and can start with |
7208 | * the nested entry. | 7692 | * the nested entry. |
7209 | */ | 7693 | */ |
@@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7223 | vcpu->cpu = cpu; | 7707 | vcpu->cpu = cpu; |
7224 | put_cpu(); | 7708 | put_cpu(); |
7225 | 7709 | ||
7710 | vmx_segment_cache_clear(vmx); | ||
7711 | |||
7226 | vmcs12->launch_state = 1; | 7712 | vmcs12->launch_state = 1; |
7227 | 7713 | ||
7228 | prepare_vmcs02(vcpu, vmcs12); | 7714 | prepare_vmcs02(vcpu, vmcs12); |
@@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7273 | vcpu->arch.cr4_guest_owned_bits)); | 7759 | vcpu->arch.cr4_guest_owned_bits)); |
7274 | } | 7760 | } |
7275 | 7761 | ||
7762 | static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | ||
7763 | struct vmcs12 *vmcs12) | ||
7764 | { | ||
7765 | u32 idt_vectoring; | ||
7766 | unsigned int nr; | ||
7767 | |||
7768 | if (vcpu->arch.exception.pending) { | ||
7769 | nr = vcpu->arch.exception.nr; | ||
7770 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
7771 | |||
7772 | if (kvm_exception_is_soft(nr)) { | ||
7773 | vmcs12->vm_exit_instruction_len = | ||
7774 | vcpu->arch.event_exit_inst_len; | ||
7775 | idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; | ||
7776 | } else | ||
7777 | idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; | ||
7778 | |||
7779 | if (vcpu->arch.exception.has_error_code) { | ||
7780 | idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; | ||
7781 | vmcs12->idt_vectoring_error_code = | ||
7782 | vcpu->arch.exception.error_code; | ||
7783 | } | ||
7784 | |||
7785 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
7786 | } else if (vcpu->arch.nmi_pending) { | ||
7787 | vmcs12->idt_vectoring_info_field = | ||
7788 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; | ||
7789 | } else if (vcpu->arch.interrupt.pending) { | ||
7790 | nr = vcpu->arch.interrupt.nr; | ||
7791 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
7792 | |||
7793 | if (vcpu->arch.interrupt.soft) { | ||
7794 | idt_vectoring |= INTR_TYPE_SOFT_INTR; | ||
7795 | vmcs12->vm_entry_instruction_len = | ||
7796 | vcpu->arch.event_exit_inst_len; | ||
7797 | } else | ||
7798 | idt_vectoring |= INTR_TYPE_EXT_INTR; | ||
7799 | |||
7800 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
7801 | } | ||
7802 | } | ||
7803 | |||
7276 | /* | 7804 | /* |
7277 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 7805 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits |
7278 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 7806 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), |
@@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7284 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 7812 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
7285 | * which already writes to vmcs12 directly. | 7813 | * which already writes to vmcs12 directly. |
7286 | */ | 7814 | */ |
7287 | void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7815 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
7288 | { | 7816 | { |
7289 | /* update guest state fields: */ | 7817 | /* update guest state fields: */ |
7290 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 7818 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
@@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7332 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); | 7860 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); |
7333 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); | 7861 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); |
7334 | 7862 | ||
7335 | vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); | ||
7336 | vmcs12->guest_interruptibility_info = | 7863 | vmcs12->guest_interruptibility_info = |
7337 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 7864 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
7338 | vmcs12->guest_pending_dbg_exceptions = | 7865 | vmcs12->guest_pending_dbg_exceptions = |
7339 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 7866 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
7340 | 7867 | ||
7868 | vmcs12->vm_entry_controls = | ||
7869 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | ||
7870 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | ||
7871 | |||
7341 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 7872 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
7342 | * the relevant bit asks not to trap the change */ | 7873 | * the relevant bit asks not to trap the change */ |
7343 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 7874 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
7344 | if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) | 7875 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) |
7345 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | 7876 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); |
7346 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 7877 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
7347 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 7878 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
@@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7349 | 7880 | ||
7350 | /* update exit information fields: */ | 7881 | /* update exit information fields: */ |
7351 | 7882 | ||
7352 | vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); | 7883 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; |
7353 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 7884 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
7354 | 7885 | ||
7355 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 7886 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
7356 | vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 7887 | if ((vmcs12->vm_exit_intr_info & |
7357 | vmcs12->idt_vectoring_info_field = | 7888 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
7358 | vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7889 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
7359 | vmcs12->idt_vectoring_error_code = | 7890 | vmcs12->vm_exit_intr_error_code = |
7360 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | 7891 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
7892 | vmcs12->idt_vectoring_info_field = 0; | ||
7361 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 7893 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
7362 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 7894 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
7363 | 7895 | ||
7364 | /* clear vm-entry fields which are to be cleared on exit */ | 7896 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { |
7365 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | 7897 | /* vm_entry_intr_info_field is cleared on exit. Emulate this |
7898 | * instead of reading the real value. */ | ||
7366 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; | 7899 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; |
7900 | |||
7901 | /* | ||
7902 | * Transfer the event that L0 or L1 may wanted to inject into | ||
7903 | * L2 to IDT_VECTORING_INFO_FIELD. | ||
7904 | */ | ||
7905 | vmcs12_save_pending_event(vcpu, vmcs12); | ||
7906 | } | ||
7907 | |||
7908 | /* | ||
7909 | * Drop what we picked up for L2 via vmx_complete_interrupts. It is | ||
7910 | * preserved above and would only end up incorrectly in L1. | ||
7911 | */ | ||
7912 | vcpu->arch.nmi_injected = false; | ||
7913 | kvm_clear_exception_queue(vcpu); | ||
7914 | kvm_clear_interrupt_queue(vcpu); | ||
7367 | } | 7915 | } |
7368 | 7916 | ||
7369 | /* | 7917 | /* |
@@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7375 | * Failures During or After Loading Guest State"). | 7923 | * Failures During or After Loading Guest State"). |
7376 | * This function should be called when the active VMCS is L1's (vmcs01). | 7924 | * This function should be called when the active VMCS is L1's (vmcs01). |
7377 | */ | 7925 | */ |
7378 | void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7926 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
7927 | struct vmcs12 *vmcs12) | ||
7379 | { | 7928 | { |
7380 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 7929 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
7381 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 7930 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
7382 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 7931 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
7383 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7932 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
7384 | else | 7933 | else |
7385 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7934 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
@@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7387 | 7936 | ||
7388 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); | 7937 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); |
7389 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); | 7938 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); |
7939 | vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); | ||
7390 | /* | 7940 | /* |
7391 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | 7941 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't |
7392 | * actually changed, because it depends on the current state of | 7942 | * actually changed, because it depends on the current state of |
@@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7445 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 7995 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
7446 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 7996 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
7447 | vmcs12->host_ia32_perf_global_ctrl); | 7997 | vmcs12->host_ia32_perf_global_ctrl); |
7998 | |||
7999 | kvm_set_dr(vcpu, 7, 0x400); | ||
8000 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
7448 | } | 8001 | } |
7449 | 8002 | ||
7450 | /* | 8003 | /* |
@@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7458 | int cpu; | 8011 | int cpu; |
7459 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 8012 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
7460 | 8013 | ||
8014 | /* trying to cancel vmlaunch/vmresume is a bug */ | ||
8015 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | ||
8016 | |||
7461 | leave_guest_mode(vcpu); | 8017 | leave_guest_mode(vcpu); |
7462 | prepare_vmcs12(vcpu, vmcs12); | 8018 | prepare_vmcs12(vcpu, vmcs12); |
7463 | 8019 | ||
@@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7468 | vcpu->cpu = cpu; | 8024 | vcpu->cpu = cpu; |
7469 | put_cpu(); | 8025 | put_cpu(); |
7470 | 8026 | ||
8027 | vmx_segment_cache_clear(vmx); | ||
8028 | |||
7471 | /* if no vmcs02 cache requested, remove the one we used */ | 8029 | /* if no vmcs02 cache requested, remove the one we used */ |
7472 | if (VMCS02_POOL_SIZE == 0) | 8030 | if (VMCS02_POOL_SIZE == 0) |
7473 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | 8031 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
@@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7496 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); | 8054 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); |
7497 | } else | 8055 | } else |
7498 | nested_vmx_succeed(vcpu); | 8056 | nested_vmx_succeed(vcpu); |
8057 | if (enable_shadow_vmcs) | ||
8058 | vmx->nested.sync_shadow_vmcs = true; | ||
7499 | } | 8059 | } |
7500 | 8060 | ||
7501 | /* | 8061 | /* |
@@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | |||
7513 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; | 8073 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; |
7514 | vmcs12->exit_qualification = qualification; | 8074 | vmcs12->exit_qualification = qualification; |
7515 | nested_vmx_succeed(vcpu); | 8075 | nested_vmx_succeed(vcpu); |
8076 | if (enable_shadow_vmcs) | ||
8077 | to_vmx(vcpu)->nested.sync_shadow_vmcs = true; | ||
7516 | } | 8078 | } |
7517 | 8079 | ||
7518 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | 8080 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, |
@@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 8152 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | 8153 | .hwapic_irr_update = vmx_hwapic_irr_update, |
7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | 8154 | .hwapic_isr_update = vmx_hwapic_isr_update, |
8155 | .sync_pir_to_irr = vmx_sync_pir_to_irr, | ||
8156 | .deliver_posted_interrupt = vmx_deliver_posted_interrupt, | ||
7593 | 8157 | ||
7594 | .set_tss_addr = vmx_set_tss_addr, | 8158 | .set_tss_addr = vmx_set_tss_addr, |
7595 | .get_tdp_level = get_ept_level, | 8159 | .get_tdp_level = get_ept_level, |
@@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7618 | .set_tdp_cr3 = vmx_set_cr3, | 8182 | .set_tdp_cr3 = vmx_set_cr3, |
7619 | 8183 | ||
7620 | .check_intercept = vmx_check_intercept, | 8184 | .check_intercept = vmx_check_intercept, |
8185 | .handle_external_intr = vmx_handle_external_intr, | ||
7621 | }; | 8186 | }; |
7622 | 8187 | ||
7623 | static int __init vmx_init(void) | 8188 | static int __init vmx_init(void) |
@@ -7656,6 +8221,24 @@ static int __init vmx_init(void) | |||
7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | 8221 | (unsigned long *)__get_free_page(GFP_KERNEL); |
7657 | if (!vmx_msr_bitmap_longmode_x2apic) | 8222 | if (!vmx_msr_bitmap_longmode_x2apic) |
7658 | goto out4; | 8223 | goto out4; |
8224 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
8225 | if (!vmx_vmread_bitmap) | ||
8226 | goto out5; | ||
8227 | |||
8228 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
8229 | if (!vmx_vmwrite_bitmap) | ||
8230 | goto out6; | ||
8231 | |||
8232 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
8233 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
8234 | /* shadowed read/write fields */ | ||
8235 | for (i = 0; i < max_shadow_read_write_fields; i++) { | ||
8236 | clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); | ||
8237 | clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); | ||
8238 | } | ||
8239 | /* shadowed read only fields */ | ||
8240 | for (i = 0; i < max_shadow_read_only_fields; i++) | ||
8241 | clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); | ||
7659 | 8242 | ||
7660 | /* | 8243 | /* |
7661 | * Allow direct access to the PC debug port (it is often used for I/O | 8244 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7674,7 +8257,7 @@ static int __init vmx_init(void) | |||
7674 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | 8257 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
7675 | __alignof__(struct vcpu_vmx), THIS_MODULE); | 8258 | __alignof__(struct vcpu_vmx), THIS_MODULE); |
7676 | if (r) | 8259 | if (r) |
7677 | goto out3; | 8260 | goto out7; |
7678 | 8261 | ||
7679 | #ifdef CONFIG_KEXEC | 8262 | #ifdef CONFIG_KEXEC |
7680 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 8263 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
@@ -7692,7 +8275,7 @@ static int __init vmx_init(void) | |||
7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | 8275 | memcpy(vmx_msr_bitmap_longmode_x2apic, |
7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | 8276 | vmx_msr_bitmap_longmode, PAGE_SIZE); |
7694 | 8277 | ||
7695 | if (enable_apicv_reg_vid) { | 8278 | if (enable_apicv) { |
7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | 8279 | for (msr = 0x800; msr <= 0x8ff; msr++) |
7697 | vmx_disable_intercept_msr_read_x2apic(msr); | 8280 | vmx_disable_intercept_msr_read_x2apic(msr); |
7698 | 8281 | ||
@@ -7722,6 +8305,12 @@ static int __init vmx_init(void) | |||
7722 | 8305 | ||
7723 | return 0; | 8306 | return 0; |
7724 | 8307 | ||
8308 | out7: | ||
8309 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
8310 | out6: | ||
8311 | free_page((unsigned long)vmx_vmread_bitmap); | ||
8312 | out5: | ||
8313 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
7725 | out4: | 8314 | out4: |
7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8315 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7727 | out3: | 8316 | out3: |
@@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void) | |||
7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8332 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7744 | free_page((unsigned long)vmx_io_bitmap_b); | 8333 | free_page((unsigned long)vmx_io_bitmap_b); |
7745 | free_page((unsigned long)vmx_io_bitmap_a); | 8334 | free_page((unsigned long)vmx_io_bitmap_a); |
8335 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
8336 | free_page((unsigned long)vmx_vmread_bitmap); | ||
7746 | 8337 | ||
7747 | #ifdef CONFIG_KEXEC | 8338 | #ifdef CONFIG_KEXEC |
7748 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); | 8339 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1721324c271..05a8b1a2300d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; | |||
162 | 162 | ||
163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | 163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); |
164 | 164 | ||
165 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
166 | |||
167 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 165 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
168 | { | 166 | { |
169 | int i; | 167 | int i; |
@@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
263 | } | 261 | } |
264 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 262 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
265 | 263 | ||
264 | asmlinkage void kvm_spurious_fault(void) | ||
265 | { | ||
266 | /* Fault while not rebooting. We want the trace. */ | ||
267 | BUG(); | ||
268 | } | ||
269 | EXPORT_SYMBOL_GPL(kvm_spurious_fault); | ||
270 | |||
266 | #define EXCPT_BENIGN 0 | 271 | #define EXCPT_BENIGN 0 |
267 | #define EXCPT_CONTRIBUTORY 1 | 272 | #define EXCPT_CONTRIBUTORY 1 |
268 | #define EXCPT_PF 2 | 273 | #define EXCPT_PF 2 |
@@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = { | |||
840 | MSR_IA32_MCG_CTL, | 845 | MSR_IA32_MCG_CTL, |
841 | }; | 846 | }; |
842 | 847 | ||
843 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 848 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) |
844 | { | 849 | { |
845 | u64 old_efer = vcpu->arch.efer; | ||
846 | |||
847 | if (efer & efer_reserved_bits) | 850 | if (efer & efer_reserved_bits) |
848 | return 1; | 851 | return false; |
849 | |||
850 | if (is_paging(vcpu) | ||
851 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
852 | return 1; | ||
853 | 852 | ||
854 | if (efer & EFER_FFXSR) { | 853 | if (efer & EFER_FFXSR) { |
855 | struct kvm_cpuid_entry2 *feat; | 854 | struct kvm_cpuid_entry2 *feat; |
856 | 855 | ||
857 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 856 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
858 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) | 857 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
859 | return 1; | 858 | return false; |
860 | } | 859 | } |
861 | 860 | ||
862 | if (efer & EFER_SVME) { | 861 | if (efer & EFER_SVME) { |
@@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
864 | 863 | ||
865 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 864 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
866 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) | 865 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
867 | return 1; | 866 | return false; |
868 | } | 867 | } |
869 | 868 | ||
869 | return true; | ||
870 | } | ||
871 | EXPORT_SYMBOL_GPL(kvm_valid_efer); | ||
872 | |||
873 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | ||
874 | { | ||
875 | u64 old_efer = vcpu->arch.efer; | ||
876 | |||
877 | if (!kvm_valid_efer(vcpu, efer)) | ||
878 | return 1; | ||
879 | |||
880 | if (is_paging(vcpu) | ||
881 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
882 | return 1; | ||
883 | |||
870 | efer &= ~EFER_LMA; | 884 | efer &= ~EFER_LMA; |
871 | efer |= vcpu->arch.efer & EFER_LMA; | 885 | efer |= vcpu->arch.efer & EFER_LMA; |
872 | 886 | ||
@@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | |||
1079 | u32 thresh_lo, thresh_hi; | 1093 | u32 thresh_lo, thresh_hi; |
1080 | int use_scaling = 0; | 1094 | int use_scaling = 0; |
1081 | 1095 | ||
1096 | /* tsc_khz can be zero if TSC calibration fails */ | ||
1097 | if (this_tsc_khz == 0) | ||
1098 | return; | ||
1099 | |||
1082 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1100 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1083 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1101 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1084 | &vcpu->arch.virtual_tsc_shift, | 1102 | &vcpu->arch.virtual_tsc_shift, |
@@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1156 | ns = get_kernel_ns(); | 1174 | ns = get_kernel_ns(); |
1157 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1175 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1158 | 1176 | ||
1159 | /* n.b - signed multiplication and division required */ | 1177 | if (vcpu->arch.virtual_tsc_khz) { |
1160 | usdiff = data - kvm->arch.last_tsc_write; | 1178 | /* n.b - signed multiplication and division required */ |
1179 | usdiff = data - kvm->arch.last_tsc_write; | ||
1161 | #ifdef CONFIG_X86_64 | 1180 | #ifdef CONFIG_X86_64 |
1162 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | 1181 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; |
1163 | #else | 1182 | #else |
1164 | /* do_div() only does unsigned */ | 1183 | /* do_div() only does unsigned */ |
1165 | asm("idivl %2; xor %%edx, %%edx" | 1184 | asm("idivl %2; xor %%edx, %%edx" |
1166 | : "=A"(usdiff) | 1185 | : "=A"(usdiff) |
1167 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | 1186 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); |
1168 | #endif | 1187 | #endif |
1169 | do_div(elapsed, 1000); | 1188 | do_div(elapsed, 1000); |
1170 | usdiff -= elapsed; | 1189 | usdiff -= elapsed; |
1171 | if (usdiff < 0) | 1190 | if (usdiff < 0) |
1172 | usdiff = -usdiff; | 1191 | usdiff = -usdiff; |
1192 | } else | ||
1193 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ | ||
1173 | 1194 | ||
1174 | /* | 1195 | /* |
1175 | * Special case: TSC write with a small delta (1 second) of virtual | 1196 | * Special case: TSC write with a small delta (1 second) of virtual |
@@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2034 | case MSR_P6_EVNTSEL0: | 2055 | case MSR_P6_EVNTSEL0: |
2035 | case MSR_P6_EVNTSEL1: | 2056 | case MSR_P6_EVNTSEL1: |
2036 | if (kvm_pmu_msr(vcpu, msr)) | 2057 | if (kvm_pmu_msr(vcpu, msr)) |
2037 | return kvm_pmu_set_msr(vcpu, msr, data); | 2058 | return kvm_pmu_set_msr(vcpu, msr_info); |
2038 | 2059 | ||
2039 | if (pr || data != 0) | 2060 | if (pr || data != 0) |
2040 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " | 2061 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " |
@@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2080 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 2101 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
2081 | return xen_hvm_config(vcpu, data); | 2102 | return xen_hvm_config(vcpu, data); |
2082 | if (kvm_pmu_msr(vcpu, msr)) | 2103 | if (kvm_pmu_msr(vcpu, msr)) |
2083 | return kvm_pmu_set_msr(vcpu, msr, data); | 2104 | return kvm_pmu_set_msr(vcpu, msr_info); |
2084 | if (!ignore_msrs) { | 2105 | if (!ignore_msrs) { |
2085 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 2106 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
2086 | msr, data); | 2107 | msr, data); |
@@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2479 | case KVM_CAP_USER_NMI: | 2500 | case KVM_CAP_USER_NMI: |
2480 | case KVM_CAP_REINJECT_CONTROL: | 2501 | case KVM_CAP_REINJECT_CONTROL: |
2481 | case KVM_CAP_IRQ_INJECT_STATUS: | 2502 | case KVM_CAP_IRQ_INJECT_STATUS: |
2482 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
2483 | case KVM_CAP_IRQFD: | 2503 | case KVM_CAP_IRQFD: |
2484 | case KVM_CAP_IOEVENTFD: | 2504 | case KVM_CAP_IOEVENTFD: |
2485 | case KVM_CAP_PIT2: | 2505 | case KVM_CAP_PIT2: |
@@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2497 | case KVM_CAP_XSAVE: | 2517 | case KVM_CAP_XSAVE: |
2498 | case KVM_CAP_ASYNC_PF: | 2518 | case KVM_CAP_ASYNC_PF: |
2499 | case KVM_CAP_GET_TSC_KHZ: | 2519 | case KVM_CAP_GET_TSC_KHZ: |
2500 | case KVM_CAP_PCI_2_3: | ||
2501 | case KVM_CAP_KVMCLOCK_CTRL: | 2520 | case KVM_CAP_KVMCLOCK_CTRL: |
2502 | case KVM_CAP_READONLY_MEM: | 2521 | case KVM_CAP_READONLY_MEM: |
2503 | case KVM_CAP_IRQFD_RESAMPLE: | 2522 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2523 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
2524 | case KVM_CAP_PCI_2_3: | ||
2525 | #endif | ||
2504 | r = 1; | 2526 | r = 1; |
2505 | break; | 2527 | break; |
2506 | case KVM_CAP_COALESCED_MMIO: | 2528 | case KVM_CAP_COALESCED_MMIO: |
@@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2521 | case KVM_CAP_PV_MMU: /* obsolete */ | 2543 | case KVM_CAP_PV_MMU: /* obsolete */ |
2522 | r = 0; | 2544 | r = 0; |
2523 | break; | 2545 | break; |
2546 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
2524 | case KVM_CAP_IOMMU: | 2547 | case KVM_CAP_IOMMU: |
2525 | r = iommu_present(&pci_bus_type); | 2548 | r = iommu_present(&pci_bus_type); |
2526 | break; | 2549 | break; |
2550 | #endif | ||
2527 | case KVM_CAP_MCE: | 2551 | case KVM_CAP_MCE: |
2528 | r = KVM_MAX_MCE_BANKS; | 2552 | r = KVM_MAX_MCE_BANKS; |
2529 | break; | 2553 | break; |
@@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2679 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2703 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2680 | struct kvm_lapic_state *s) | 2704 | struct kvm_lapic_state *s) |
2681 | { | 2705 | { |
2706 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
2682 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2707 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
2683 | 2708 | ||
2684 | return 0; | 2709 | return 0; |
@@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |||
2696 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | 2721 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, |
2697 | struct kvm_interrupt *irq) | 2722 | struct kvm_interrupt *irq) |
2698 | { | 2723 | { |
2699 | if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) | 2724 | if (irq->irq >= KVM_NR_INTERRUPTS) |
2700 | return -EINVAL; | 2725 | return -EINVAL; |
2701 | if (irqchip_in_kernel(vcpu->kvm)) | 2726 | if (irqchip_in_kernel(vcpu->kvm)) |
2702 | return -ENXIO; | 2727 | return -ENXIO; |
@@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2819 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2844 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
2820 | events->nmi.pad = 0; | 2845 | events->nmi.pad = 0; |
2821 | 2846 | ||
2822 | events->sipi_vector = vcpu->arch.sipi_vector; | 2847 | events->sipi_vector = 0; /* never valid when reporting to user space */ |
2823 | 2848 | ||
2824 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2849 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2825 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | ||
2826 | | KVM_VCPUEVENT_VALID_SHADOW); | 2850 | | KVM_VCPUEVENT_VALID_SHADOW); |
2827 | memset(&events->reserved, 0, sizeof(events->reserved)); | 2851 | memset(&events->reserved, 0, sizeof(events->reserved)); |
2828 | } | 2852 | } |
@@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2853 | vcpu->arch.nmi_pending = events->nmi.pending; | 2877 | vcpu->arch.nmi_pending = events->nmi.pending; |
2854 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | 2878 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); |
2855 | 2879 | ||
2856 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2880 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && |
2857 | vcpu->arch.sipi_vector = events->sipi_vector; | 2881 | kvm_vcpu_has_lapic(vcpu)) |
2882 | vcpu->arch.apic->sipi_vector = events->sipi_vector; | ||
2858 | 2883 | ||
2859 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 2884 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
2860 | 2885 | ||
@@ -3478,13 +3503,15 @@ out: | |||
3478 | return r; | 3503 | return r; |
3479 | } | 3504 | } |
3480 | 3505 | ||
3481 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | 3506 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
3507 | bool line_status) | ||
3482 | { | 3508 | { |
3483 | if (!irqchip_in_kernel(kvm)) | 3509 | if (!irqchip_in_kernel(kvm)) |
3484 | return -ENXIO; | 3510 | return -ENXIO; |
3485 | 3511 | ||
3486 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 3512 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
3487 | irq_event->irq, irq_event->level); | 3513 | irq_event->irq, irq_event->level, |
3514 | line_status); | ||
3488 | return 0; | 3515 | return 0; |
3489 | } | 3516 | } |
3490 | 3517 | ||
@@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4752 | } | 4779 | } |
4753 | 4780 | ||
4754 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, | 4781 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
4755 | bool write_fault_to_shadow_pgtable) | 4782 | bool write_fault_to_shadow_pgtable, |
4783 | int emulation_type) | ||
4756 | { | 4784 | { |
4757 | gpa_t gpa = cr2; | 4785 | gpa_t gpa = cr2; |
4758 | pfn_t pfn; | 4786 | pfn_t pfn; |
4759 | 4787 | ||
4788 | if (emulation_type & EMULTYPE_NO_REEXECUTE) | ||
4789 | return false; | ||
4790 | |||
4760 | if (!vcpu->arch.mmu.direct_map) { | 4791 | if (!vcpu->arch.mmu.direct_map) { |
4761 | /* | 4792 | /* |
4762 | * Write permission should be allowed since only | 4793 | * Write permission should be allowed since only |
@@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4899 | if (r != EMULATION_OK) { | 4930 | if (r != EMULATION_OK) { |
4900 | if (emulation_type & EMULTYPE_TRAP_UD) | 4931 | if (emulation_type & EMULTYPE_TRAP_UD) |
4901 | return EMULATE_FAIL; | 4932 | return EMULATE_FAIL; |
4902 | if (reexecute_instruction(vcpu, cr2, | 4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
4903 | write_fault_to_spt)) | 4934 | emulation_type)) |
4904 | return EMULATE_DONE; | 4935 | return EMULATE_DONE; |
4905 | if (emulation_type & EMULTYPE_SKIP) | 4936 | if (emulation_type & EMULTYPE_SKIP) |
4906 | return EMULATE_FAIL; | 4937 | return EMULATE_FAIL; |
@@ -4930,7 +4961,8 @@ restart: | |||
4930 | return EMULATE_DONE; | 4961 | return EMULATE_DONE; |
4931 | 4962 | ||
4932 | if (r == EMULATION_FAILED) { | 4963 | if (r == EMULATION_FAILED) { |
4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) | 4964 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
4965 | emulation_type)) | ||
4934 | return EMULATE_DONE; | 4966 | return EMULATE_DONE; |
4935 | 4967 | ||
4936 | return handle_emulation_failure(vcpu); | 4968 | return handle_emulation_failure(vcpu); |
@@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
5641 | #endif | 5673 | #endif |
5642 | } | 5674 | } |
5643 | 5675 | ||
5644 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | 5676 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
5645 | { | 5677 | { |
5646 | u64 eoi_exit_bitmap[4]; | 5678 | u64 eoi_exit_bitmap[4]; |
5679 | u32 tmr[8]; | ||
5680 | |||
5681 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | ||
5682 | return; | ||
5647 | 5683 | ||
5648 | memset(eoi_exit_bitmap, 0, 32); | 5684 | memset(eoi_exit_bitmap, 0, 32); |
5685 | memset(tmr, 0, 32); | ||
5649 | 5686 | ||
5650 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5687 | kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); |
5651 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5688 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); |
5689 | kvm_apic_update_tmr(vcpu, tmr); | ||
5652 | } | 5690 | } |
5653 | 5691 | ||
5654 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5692 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
@@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5656 | int r; | 5694 | int r; |
5657 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5695 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5658 | vcpu->run->request_interrupt_window; | 5696 | vcpu->run->request_interrupt_window; |
5659 | bool req_immediate_exit = 0; | 5697 | bool req_immediate_exit = false; |
5660 | 5698 | ||
5661 | if (vcpu->requests) { | 5699 | if (vcpu->requests) { |
5662 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5700 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
@@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5698 | record_steal_time(vcpu); | 5736 | record_steal_time(vcpu); |
5699 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5737 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
5700 | process_nmi(vcpu); | 5738 | process_nmi(vcpu); |
5701 | req_immediate_exit = | ||
5702 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
5703 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | 5739 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) |
5704 | kvm_handle_pmu_event(vcpu); | 5740 | kvm_handle_pmu_event(vcpu); |
5705 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5741 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
5706 | kvm_deliver_pmi(vcpu); | 5742 | kvm_deliver_pmi(vcpu); |
5707 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | 5743 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
5708 | update_eoi_exitmap(vcpu); | 5744 | vcpu_scan_ioapic(vcpu); |
5709 | } | 5745 | } |
5710 | 5746 | ||
5711 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5747 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5748 | kvm_apic_accept_events(vcpu); | ||
5749 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
5750 | r = 1; | ||
5751 | goto out; | ||
5752 | } | ||
5753 | |||
5712 | inject_pending_event(vcpu); | 5754 | inject_pending_event(vcpu); |
5713 | 5755 | ||
5714 | /* enable NMI/IRQ window open exits if needed */ | 5756 | /* enable NMI/IRQ window open exits if needed */ |
5715 | if (vcpu->arch.nmi_pending) | 5757 | if (vcpu->arch.nmi_pending) |
5716 | kvm_x86_ops->enable_nmi_window(vcpu); | 5758 | req_immediate_exit = |
5759 | kvm_x86_ops->enable_nmi_window(vcpu) != 0; | ||
5717 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 5760 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5718 | kvm_x86_ops->enable_irq_window(vcpu); | 5761 | req_immediate_exit = |
5762 | kvm_x86_ops->enable_irq_window(vcpu) != 0; | ||
5719 | 5763 | ||
5720 | if (kvm_lapic_enabled(vcpu)) { | 5764 | if (kvm_lapic_enabled(vcpu)) { |
5721 | /* | 5765 | /* |
@@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5794 | 5838 | ||
5795 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5839 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5796 | smp_wmb(); | 5840 | smp_wmb(); |
5797 | local_irq_enable(); | 5841 | |
5842 | /* Interrupt is enabled by handle_external_intr() */ | ||
5843 | kvm_x86_ops->handle_external_intr(vcpu); | ||
5798 | 5844 | ||
5799 | ++vcpu->stat.exits; | 5845 | ++vcpu->stat.exits; |
5800 | 5846 | ||
@@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5843 | int r; | 5889 | int r; |
5844 | struct kvm *kvm = vcpu->kvm; | 5890 | struct kvm *kvm = vcpu->kvm; |
5845 | 5891 | ||
5846 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | ||
5847 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
5848 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | ||
5849 | kvm_lapic_reset(vcpu); | ||
5850 | r = kvm_vcpu_reset(vcpu); | ||
5851 | if (r) | ||
5852 | return r; | ||
5853 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
5854 | } | ||
5855 | |||
5856 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5892 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
5857 | r = vapic_enter(vcpu); | 5893 | r = vapic_enter(vcpu); |
5858 | if (r) { | 5894 | if (r) { |
@@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5869 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5905 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
5870 | kvm_vcpu_block(vcpu); | 5906 | kvm_vcpu_block(vcpu); |
5871 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5907 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
5872 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) | 5908 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { |
5873 | { | 5909 | kvm_apic_accept_events(vcpu); |
5874 | switch(vcpu->arch.mp_state) { | 5910 | switch(vcpu->arch.mp_state) { |
5875 | case KVM_MP_STATE_HALTED: | 5911 | case KVM_MP_STATE_HALTED: |
5876 | vcpu->arch.mp_state = | 5912 | vcpu->arch.mp_state = |
@@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5878 | case KVM_MP_STATE_RUNNABLE: | 5914 | case KVM_MP_STATE_RUNNABLE: |
5879 | vcpu->arch.apf.halted = false; | 5915 | vcpu->arch.apf.halted = false; |
5880 | break; | 5916 | break; |
5881 | case KVM_MP_STATE_SIPI_RECEIVED: | 5917 | case KVM_MP_STATE_INIT_RECEIVED: |
5918 | break; | ||
5882 | default: | 5919 | default: |
5883 | r = -EINTR; | 5920 | r = -EINTR; |
5884 | break; | 5921 | break; |
@@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
6013 | 6050 | ||
6014 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 6051 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
6015 | kvm_vcpu_block(vcpu); | 6052 | kvm_vcpu_block(vcpu); |
6053 | kvm_apic_accept_events(vcpu); | ||
6016 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | 6054 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); |
6017 | r = -EAGAIN; | 6055 | r = -EAGAIN; |
6018 | goto out; | 6056 | goto out; |
@@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
6169 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 6207 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
6170 | struct kvm_mp_state *mp_state) | 6208 | struct kvm_mp_state *mp_state) |
6171 | { | 6209 | { |
6210 | kvm_apic_accept_events(vcpu); | ||
6172 | mp_state->mp_state = vcpu->arch.mp_state; | 6211 | mp_state->mp_state = vcpu->arch.mp_state; |
6173 | return 0; | 6212 | return 0; |
6174 | } | 6213 | } |
@@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
6176 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 6215 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
6177 | struct kvm_mp_state *mp_state) | 6216 | struct kvm_mp_state *mp_state) |
6178 | { | 6217 | { |
6179 | vcpu->arch.mp_state = mp_state->mp_state; | 6218 | if (!kvm_vcpu_has_lapic(vcpu) && |
6219 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | ||
6220 | return -EINVAL; | ||
6221 | |||
6222 | if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { | ||
6223 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
6224 | set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); | ||
6225 | } else | ||
6226 | vcpu->arch.mp_state = mp_state->mp_state; | ||
6180 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6227 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6181 | return 0; | 6228 | return 0; |
6182 | } | 6229 | } |
@@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6475 | r = vcpu_load(vcpu); | 6522 | r = vcpu_load(vcpu); |
6476 | if (r) | 6523 | if (r) |
6477 | return r; | 6524 | return r; |
6478 | r = kvm_vcpu_reset(vcpu); | 6525 | kvm_vcpu_reset(vcpu); |
6479 | if (r == 0) | 6526 | r = kvm_mmu_setup(vcpu); |
6480 | r = kvm_mmu_setup(vcpu); | ||
6481 | vcpu_put(vcpu); | 6527 | vcpu_put(vcpu); |
6482 | 6528 | ||
6483 | return r; | 6529 | return r; |
@@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
6514 | kvm_x86_ops->vcpu_free(vcpu); | 6560 | kvm_x86_ops->vcpu_free(vcpu); |
6515 | } | 6561 | } |
6516 | 6562 | ||
6517 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | 6563 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu) |
6518 | { | 6564 | { |
6519 | atomic_set(&vcpu->arch.nmi_queued, 0); | 6565 | atomic_set(&vcpu->arch.nmi_queued, 0); |
6520 | vcpu->arch.nmi_pending = 0; | 6566 | vcpu->arch.nmi_pending = 0; |
@@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6541 | vcpu->arch.regs_avail = ~0; | 6587 | vcpu->arch.regs_avail = ~0; |
6542 | vcpu->arch.regs_dirty = ~0; | 6588 | vcpu->arch.regs_dirty = ~0; |
6543 | 6589 | ||
6544 | return kvm_x86_ops->vcpu_reset(vcpu); | 6590 | kvm_x86_ops->vcpu_reset(vcpu); |
6591 | } | ||
6592 | |||
6593 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) | ||
6594 | { | ||
6595 | struct kvm_segment cs; | ||
6596 | |||
6597 | kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
6598 | cs.selector = vector << 8; | ||
6599 | cs.base = vector << 12; | ||
6600 | kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); | ||
6601 | kvm_rip_write(vcpu, 0); | ||
6545 | } | 6602 | } |
6546 | 6603 | ||
6547 | int kvm_arch_hardware_enable(void *garbage) | 6604 | int kvm_arch_hardware_enable(void *garbage) |
@@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6706 | } | 6763 | } |
6707 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 6764 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
6708 | 6765 | ||
6709 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 6766 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { |
6767 | r = -ENOMEM; | ||
6710 | goto fail_free_mce_banks; | 6768 | goto fail_free_mce_banks; |
6769 | } | ||
6711 | 6770 | ||
6712 | r = fx_init(vcpu); | 6771 | r = fx_init(vcpu); |
6713 | if (r) | 6772 | if (r) |
@@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
6811 | 6870 | ||
6812 | void kvm_arch_destroy_vm(struct kvm *kvm) | 6871 | void kvm_arch_destroy_vm(struct kvm *kvm) |
6813 | { | 6872 | { |
6873 | if (current->mm == kvm->mm) { | ||
6874 | /* | ||
6875 | * Free memory regions allocated on behalf of userspace, | ||
6876 | * unless the the memory map has changed due to process exit | ||
6877 | * or fd copying. | ||
6878 | */ | ||
6879 | struct kvm_userspace_memory_region mem; | ||
6880 | memset(&mem, 0, sizeof(mem)); | ||
6881 | mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | ||
6882 | kvm_set_memory_region(kvm, &mem); | ||
6883 | |||
6884 | mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
6885 | kvm_set_memory_region(kvm, &mem); | ||
6886 | |||
6887 | mem.slot = TSS_PRIVATE_MEMSLOT; | ||
6888 | kvm_set_memory_region(kvm, &mem); | ||
6889 | } | ||
6814 | kvm_iommu_unmap_guest(kvm); | 6890 | kvm_iommu_unmap_guest(kvm); |
6815 | kfree(kvm->arch.vpic); | 6891 | kfree(kvm->arch.vpic); |
6816 | kfree(kvm->arch.vioapic); | 6892 | kfree(kvm->arch.vioapic); |
@@ -6903,24 +6979,21 @@ out_free: | |||
6903 | 6979 | ||
6904 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6980 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
6905 | struct kvm_memory_slot *memslot, | 6981 | struct kvm_memory_slot *memslot, |
6906 | struct kvm_memory_slot old, | ||
6907 | struct kvm_userspace_memory_region *mem, | 6982 | struct kvm_userspace_memory_region *mem, |
6908 | bool user_alloc) | 6983 | enum kvm_mr_change change) |
6909 | { | 6984 | { |
6910 | int npages = memslot->npages; | ||
6911 | |||
6912 | /* | 6985 | /* |
6913 | * Only private memory slots need to be mapped here since | 6986 | * Only private memory slots need to be mapped here since |
6914 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. | 6987 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. |
6915 | */ | 6988 | */ |
6916 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { | 6989 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { |
6917 | unsigned long userspace_addr; | 6990 | unsigned long userspace_addr; |
6918 | 6991 | ||
6919 | /* | 6992 | /* |
6920 | * MAP_SHARED to prevent internal slot pages from being moved | 6993 | * MAP_SHARED to prevent internal slot pages from being moved |
6921 | * by fork()/COW. | 6994 | * by fork()/COW. |
6922 | */ | 6995 | */ |
6923 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, | 6996 | userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, |
6924 | PROT_READ | PROT_WRITE, | 6997 | PROT_READ | PROT_WRITE, |
6925 | MAP_SHARED | MAP_ANONYMOUS, 0); | 6998 | MAP_SHARED | MAP_ANONYMOUS, 0); |
6926 | 6999 | ||
@@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6935 | 7008 | ||
6936 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 7009 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
6937 | struct kvm_userspace_memory_region *mem, | 7010 | struct kvm_userspace_memory_region *mem, |
6938 | struct kvm_memory_slot old, | 7011 | const struct kvm_memory_slot *old, |
6939 | bool user_alloc) | 7012 | enum kvm_mr_change change) |
6940 | { | 7013 | { |
6941 | 7014 | ||
6942 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 7015 | int nr_mmu_pages = 0; |
6943 | 7016 | ||
6944 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { | 7017 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { |
6945 | int ret; | 7018 | int ret; |
6946 | 7019 | ||
6947 | ret = vm_munmap(old.userspace_addr, | 7020 | ret = vm_munmap(old->userspace_addr, |
6948 | old.npages * PAGE_SIZE); | 7021 | old->npages * PAGE_SIZE); |
6949 | if (ret < 0) | 7022 | if (ret < 0) |
6950 | printk(KERN_WARNING | 7023 | printk(KERN_WARNING |
6951 | "kvm_vm_ioctl_set_memory_region: " | 7024 | "kvm_vm_ioctl_set_memory_region: " |
@@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6962 | * Existing largepage mappings are destroyed here and new ones will | 7035 | * Existing largepage mappings are destroyed here and new ones will |
6963 | * not be created until the end of the logging. | 7036 | * not be created until the end of the logging. |
6964 | */ | 7037 | */ |
6965 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7038 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
6966 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7039 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6967 | /* | 7040 | /* |
6968 | * If memory slot is created, or moved, we need to clear all | 7041 | * If memory slot is created, or moved, we need to clear all |
6969 | * mmio sptes. | 7042 | * mmio sptes. |
6970 | */ | 7043 | */ |
6971 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { | 7044 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { |
6972 | kvm_mmu_zap_all(kvm); | 7045 | kvm_mmu_zap_mmio_sptes(kvm); |
6973 | kvm_reload_remote_mmus(kvm); | 7046 | kvm_reload_remote_mmus(kvm); |
6974 | } | 7047 | } |
6975 | } | 7048 | } |
@@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
6991 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7064 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6992 | !vcpu->arch.apf.halted) | 7065 | !vcpu->arch.apf.halted) |
6993 | || !list_empty_careful(&vcpu->async_pf.done) | 7066 | || !list_empty_careful(&vcpu->async_pf.done) |
6994 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 7067 | || kvm_apic_has_events(vcpu) |
6995 | || atomic_read(&vcpu->arch.nmi_queued) || | 7068 | || atomic_read(&vcpu->arch.nmi_queued) || |
6996 | (kvm_arch_interrupt_allowed(vcpu) && | 7069 | (kvm_arch_interrupt_allowed(vcpu) && |
6997 | kvm_cpu_has_interrupt(vcpu)); | 7070 | kvm_cpu_has_interrupt(vcpu)); |