diff options
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 25 | ||||
-rw-r--r-- | arch/ia64/kvm/lapic.h | 6 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 8 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 25 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 21 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/irq.c | 56 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 140 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 34 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 336 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 25 | ||||
-rw-r--r-- | drivers/s390/kvm/virtio_ccw.c | 20 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 25 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 39 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 4 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 4 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 25 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 106 |
23 files changed, 803 insertions, 166 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 09905cbcbb0b..c2534c300a45 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -219,19 +219,6 @@ allocation of vcpu ids. For example, if userspace wants | |||
219 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | 219 | single-threaded guest vcpus, it should make all vcpu ids be a multiple |
220 | of the number of vcpus per vcore. | 220 | of the number of vcpus per vcore. |
221 | 221 | ||
222 | On powerpc using book3s_hv mode, the vcpus are mapped onto virtual | ||
223 | threads in one or more virtual CPU cores. (This is because the | ||
224 | hardware requires all the hardware threads in a CPU core to be in the | ||
225 | same partition.) The KVM_CAP_PPC_SMT capability indicates the number | ||
226 | of vcpus per virtual core (vcore). The vcore id is obtained by | ||
227 | dividing the vcpu id by the number of vcpus per vcore. The vcpus in a | ||
228 | given vcore will always be in the same physical core as each other | ||
229 | (though that might be a different physical core from time to time). | ||
230 | Userspace can control the threading (SMT) mode of the guest by its | ||
231 | allocation of vcpu ids. For example, if userspace wants | ||
232 | single-threaded guest vcpus, it should make all vcpu ids be a multiple | ||
233 | of the number of vcpus per vcore. | ||
234 | |||
235 | For virtual cpus that have been created with S390 user controlled virtual | 222 | For virtual cpus that have been created with S390 user controlled virtual |
236 | machines, the resulting vcpu fd can be memory mapped at page offset | 223 | machines, the resulting vcpu fd can be memory mapped at page offset |
237 | KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual | 224 | KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual |
@@ -874,12 +861,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr | |||
874 | be identical. This allows large pages in the guest to be backed by large | 861 | be identical. This allows large pages in the guest to be backed by large |
875 | pages in the host. | 862 | pages in the host. |
876 | 863 | ||
877 | The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which instructs | 864 | The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and |
878 | kvm to keep track of writes to memory within the slot. See KVM_GET_DIRTY_LOG | 865 | KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of |
879 | ioctl. The KVM_CAP_READONLY_MEM capability indicates the availability of the | 866 | writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to |
880 | KVM_MEM_READONLY flag. When this flag is set for a memory region, KVM only | 867 | use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it, |
881 | allows read accesses. Writes will be posted to userspace as KVM_EXIT_MMIO | 868 | to make a new slot read-only. In this case, writes to this memory will be |
882 | exits. | 869 | posted to userspace as KVM_EXIT_MMIO exits. |
883 | 870 | ||
884 | When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of | 871 | When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of |
885 | the memory region are automatically reflected into the guest. For example, an | 872 | the memory region are automatically reflected into the guest. For example, an |
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c5f92a926a9a..c3e2935b6db4 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h | |||
@@ -27,4 +27,10 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | |||
27 | #define kvm_apic_present(x) (true) | 27 | #define kvm_apic_present(x) (true) |
28 | #define kvm_lapic_enabled(x) (true) | 28 | #define kvm_lapic_enabled(x) (true) |
29 | 29 | ||
30 | static inline bool kvm_apic_vid_enabled(void) | ||
31 | { | ||
32 | /* IA64 has no apicv supporting, do nothing here */ | ||
33 | return false; | ||
34 | } | ||
35 | |||
30 | #endif | 36 | #endif |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5b01f0953900..4377d1886631 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -770,6 +770,14 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) | |||
770 | } else | 770 | } else |
771 | prefix = 0; | 771 | prefix = 0; |
772 | 772 | ||
773 | /* | ||
774 | * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy | ||
775 | * copying in vcpu load/put. Lets update our copies before we save | ||
776 | * it into the save area | ||
777 | */ | ||
778 | save_fp_regs(&vcpu->arch.guest_fpregs); | ||
779 | save_access_regs(vcpu->run->s.regs.acrs); | ||
780 | |||
773 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), | 781 | if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), |
774 | vcpu->arch.guest_fpregs.fprs, 128, prefix)) | 782 | vcpu->arch.guest_fpregs.fprs, 128, prefix)) |
775 | return -EFAULT; | 783 | return -EFAULT; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3e05deff21b6..4d89d64a8161 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -67,8 +67,8 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) | |||
67 | 67 | ||
68 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) | 68 | static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) |
69 | { | 69 | { |
70 | int base2 = vcpu->arch.sie_block->ipb >> 28; | 70 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
71 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 71 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
72 | 72 | ||
73 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 73 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; |
74 | } | 74 | } |
@@ -76,10 +76,10 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) | |||
76 | static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, | 76 | static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, |
77 | u64 *address1, u64 *address2) | 77 | u64 *address1, u64 *address2) |
78 | { | 78 | { |
79 | int base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | 79 | u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; |
80 | int disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; | 80 | u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; |
81 | int base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; | 81 | u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12; |
82 | int disp2 = vcpu->arch.sie_block->ipb & 0x0fff; | 82 | u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff; |
83 | 83 | ||
84 | *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; | 84 | *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; |
85 | *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 85 | *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; |
@@ -87,17 +87,20 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, | |||
87 | 87 | ||
88 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) | 88 | static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) |
89 | { | 89 | { |
90 | int base2 = vcpu->arch.sie_block->ipb >> 28; | 90 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
91 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + | 91 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + |
92 | ((vcpu->arch.sie_block->ipb & 0xff00) << 4); | 92 | ((vcpu->arch.sie_block->ipb & 0xff00) << 4); |
93 | /* The displacement is a 20bit _SIGNED_ value */ | ||
94 | if (disp2 & 0x80000) | ||
95 | disp2+=0xfff00000; | ||
93 | 96 | ||
94 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 97 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; |
95 | } | 98 | } |
96 | 99 | ||
97 | static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) | 100 | static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) |
98 | { | 101 | { |
99 | int base2 = vcpu->arch.sie_block->ipb >> 28; | 102 | u32 base2 = vcpu->arch.sie_block->ipb >> 28; |
100 | int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); | 103 | u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); |
101 | 104 | ||
102 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | 105 | return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; |
103 | } | 106 | } |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 77d56a4ba89c..635a74d22409 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -699,6 +699,11 @@ struct kvm_x86_ops { | |||
699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
702 | int (*vm_has_apicv)(struct kvm *kvm); | ||
703 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | ||
704 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | ||
705 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | ||
706 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | ||
702 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 707 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
703 | int (*get_tdp_level)(void); | 708 | int (*get_tdp_level)(void); |
704 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 709 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
@@ -993,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva); | |||
993 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 998 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
994 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 999 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
995 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | 1000 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); |
1001 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | ||
996 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1002 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
997 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1003 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
998 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1004 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index e385df97bfdc..5c9dbadd364a 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -62,10 +62,12 @@ | |||
62 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 62 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
63 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 63 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
64 | #define EXIT_REASON_APIC_ACCESS 44 | 64 | #define EXIT_REASON_APIC_ACCESS 44 |
65 | #define EXIT_REASON_EOI_INDUCED 45 | ||
65 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
66 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
67 | #define EXIT_REASON_WBINVD 54 | 68 | #define EXIT_REASON_WBINVD 54 |
68 | #define EXIT_REASON_XSETBV 55 | 69 | #define EXIT_REASON_XSETBV 55 |
70 | #define EXIT_REASON_APIC_WRITE 56 | ||
69 | #define EXIT_REASON_INVPCID 58 | 71 | #define EXIT_REASON_INVPCID 58 |
70 | 72 | ||
71 | #define VMX_EXIT_REASONS \ | 73 | #define VMX_EXIT_REASONS \ |
@@ -103,7 +105,12 @@ | |||
103 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ | 105 | { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ |
104 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ | 106 | { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ |
105 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ | 107 | { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ |
106 | { EXIT_REASON_WBINVD, "WBINVD" } | 108 | { EXIT_REASON_WBINVD, "WBINVD" }, \ |
109 | { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ | ||
110 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | ||
111 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | ||
112 | { EXIT_REASON_INVD, "INVD" }, \ | ||
113 | { EXIT_REASON_INVPCID, "INVPCID" } | ||
107 | 114 | ||
108 | #ifdef __KERNEL__ | 115 | #ifdef __KERNEL__ |
109 | 116 | ||
@@ -138,9 +145,12 @@ | |||
138 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 145 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
139 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | 146 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 |
140 | #define SECONDARY_EXEC_RDTSCP 0x00000008 | 147 | #define SECONDARY_EXEC_RDTSCP 0x00000008 |
148 | #define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 | ||
141 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 149 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
142 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 150 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
143 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 151 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
152 | #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 | ||
153 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | ||
144 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 154 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
145 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 155 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
146 | 156 | ||
@@ -178,6 +188,7 @@ enum vmcs_field { | |||
178 | GUEST_GS_SELECTOR = 0x0000080a, | 188 | GUEST_GS_SELECTOR = 0x0000080a, |
179 | GUEST_LDTR_SELECTOR = 0x0000080c, | 189 | GUEST_LDTR_SELECTOR = 0x0000080c, |
180 | GUEST_TR_SELECTOR = 0x0000080e, | 190 | GUEST_TR_SELECTOR = 0x0000080e, |
191 | GUEST_INTR_STATUS = 0x00000810, | ||
181 | HOST_ES_SELECTOR = 0x00000c00, | 192 | HOST_ES_SELECTOR = 0x00000c00, |
182 | HOST_CS_SELECTOR = 0x00000c02, | 193 | HOST_CS_SELECTOR = 0x00000c02, |
183 | HOST_SS_SELECTOR = 0x00000c04, | 194 | HOST_SS_SELECTOR = 0x00000c04, |
@@ -205,6 +216,14 @@ enum vmcs_field { | |||
205 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 216 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
206 | EPT_POINTER = 0x0000201a, | 217 | EPT_POINTER = 0x0000201a, |
207 | EPT_POINTER_HIGH = 0x0000201b, | 218 | EPT_POINTER_HIGH = 0x0000201b, |
219 | EOI_EXIT_BITMAP0 = 0x0000201c, | ||
220 | EOI_EXIT_BITMAP0_HIGH = 0x0000201d, | ||
221 | EOI_EXIT_BITMAP1 = 0x0000201e, | ||
222 | EOI_EXIT_BITMAP1_HIGH = 0x0000201f, | ||
223 | EOI_EXIT_BITMAP2 = 0x00002020, | ||
224 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | ||
225 | EOI_EXIT_BITMAP3 = 0x00002022, | ||
226 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | ||
208 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 227 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
209 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 228 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
210 | VMCS_LINK_POINTER = 0x00002800, | 229 | VMCS_LINK_POINTER = 0x00002800, |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e99fb72cd4c5..2b11318151a4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -1013,7 +1013,7 @@ static u8 test_cc(unsigned int condition, unsigned long flags) | |||
1013 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); | 1013 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); |
1014 | 1014 | ||
1015 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; | 1015 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; |
1016 | asm("pushq %[flags]; popf; call *%[fastop]" | 1016 | asm("push %[flags]; popf; call *%[fastop]" |
1017 | : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); | 1017 | : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); |
1018 | return rc; | 1018 | return rc; |
1019 | } | 1019 | } |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index b111aee815f8..484bc874688b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -38,6 +38,38 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | |||
38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); | 38 | EXPORT_SYMBOL(kvm_cpu_has_pending_timer); |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * check if there is pending interrupt from | ||
42 | * non-APIC source without intack. | ||
43 | */ | ||
44 | static int kvm_cpu_has_extint(struct kvm_vcpu *v) | ||
45 | { | ||
46 | if (kvm_apic_accept_pic_intr(v)) | ||
47 | return pic_irqchip(v->kvm)->output; /* PIC */ | ||
48 | else | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * check if there is injectable interrupt: | ||
54 | * when virtual interrupt delivery enabled, | ||
55 | * interrupt from apic will handled by hardware, | ||
56 | * we don't need to check it here. | ||
57 | */ | ||
58 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | ||
59 | { | ||
60 | if (!irqchip_in_kernel(v->kvm)) | ||
61 | return v->arch.interrupt.pending; | ||
62 | |||
63 | if (kvm_cpu_has_extint(v)) | ||
64 | return 1; | ||
65 | |||
66 | if (kvm_apic_vid_enabled(v->kvm)) | ||
67 | return 0; | ||
68 | |||
69 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | ||
70 | } | ||
71 | |||
72 | /* | ||
41 | * check if there is pending interrupt without | 73 | * check if there is pending interrupt without |
42 | * intack. | 74 | * intack. |
43 | */ | 75 | */ |
@@ -46,27 +78,41 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | |||
46 | if (!irqchip_in_kernel(v->kvm)) | 78 | if (!irqchip_in_kernel(v->kvm)) |
47 | return v->arch.interrupt.pending; | 79 | return v->arch.interrupt.pending; |
48 | 80 | ||
49 | if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) | 81 | if (kvm_cpu_has_extint(v)) |
50 | return pic_irqchip(v->kvm)->output; /* PIC */ | 82 | return 1; |
51 | 83 | ||
52 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ | 84 | return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ |
53 | } | 85 | } |
54 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); | 86 | EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); |
55 | 87 | ||
56 | /* | 88 | /* |
89 | * Read pending interrupt(from non-APIC source) | ||
90 | * vector and intack. | ||
91 | */ | ||
92 | static int kvm_cpu_get_extint(struct kvm_vcpu *v) | ||
93 | { | ||
94 | if (kvm_cpu_has_extint(v)) | ||
95 | return kvm_pic_read_irq(v->kvm); /* PIC */ | ||
96 | return -1; | ||
97 | } | ||
98 | |||
99 | /* | ||
57 | * Read pending interrupt vector and intack. | 100 | * Read pending interrupt vector and intack. |
58 | */ | 101 | */ |
59 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | 102 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v) |
60 | { | 103 | { |
104 | int vector; | ||
105 | |||
61 | if (!irqchip_in_kernel(v->kvm)) | 106 | if (!irqchip_in_kernel(v->kvm)) |
62 | return v->arch.interrupt.nr; | 107 | return v->arch.interrupt.nr; |
63 | 108 | ||
64 | if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) | 109 | vector = kvm_cpu_get_extint(v); |
65 | return kvm_pic_read_irq(v->kvm); /* PIC */ | 110 | |
111 | if (kvm_apic_vid_enabled(v->kvm) || vector != -1) | ||
112 | return vector; /* PIC */ | ||
66 | 113 | ||
67 | return kvm_get_apic_interrupt(v); /* APIC */ | 114 | return kvm_get_apic_interrupt(v); /* APIC */ |
68 | } | 115 | } |
69 | EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); | ||
70 | 116 | ||
71 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) | 117 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) |
72 | { | 118 | { |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9392f527f107..02b51dd4e4ad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -140,31 +140,56 @@ static inline int apic_enabled(struct kvm_lapic *apic) | |||
140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ | 140 | (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ |
141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) | 141 | APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) |
142 | 142 | ||
143 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
144 | { | ||
145 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
146 | } | ||
147 | |||
148 | static inline int kvm_apic_id(struct kvm_lapic *apic) | 143 | static inline int kvm_apic_id(struct kvm_lapic *apic) |
149 | { | 144 | { |
150 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
151 | } | 146 | } |
152 | 147 | ||
153 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | 148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, |
149 | struct kvm_lapic_irq *irq, | ||
150 | u64 *eoi_exit_bitmap) | ||
154 | { | 151 | { |
155 | u16 cid; | 152 | struct kvm_lapic **dst; |
156 | ldr >>= 32 - map->ldr_bits; | 153 | struct kvm_apic_map *map; |
157 | cid = (ldr >> map->cid_shift) & map->cid_mask; | 154 | unsigned long bitmap = 1; |
155 | int i; | ||
158 | 156 | ||
159 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | 157 | rcu_read_lock(); |
158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
160 | 159 | ||
161 | return cid; | 160 | if (unlikely(!map)) { |
162 | } | 161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); |
162 | goto out; | ||
163 | } | ||
163 | 164 | ||
164 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | 165 | if (irq->dest_mode == 0) { /* physical mode */ |
165 | { | 166 | if (irq->delivery_mode == APIC_DM_LOWEST || |
166 | ldr >>= (32 - map->ldr_bits); | 167 | irq->dest_id == 0xff) { |
167 | return ldr & map->lid_mask; | 168 | __set_bit(irq->vector, |
169 | (unsigned long *)eoi_exit_bitmap); | ||
170 | goto out; | ||
171 | } | ||
172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
173 | } else { | ||
174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
175 | |||
176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
177 | |||
178 | bitmap = apic_logical_id(map, mda); | ||
179 | } | ||
180 | |||
181 | for_each_set_bit(i, &bitmap, 16) { | ||
182 | if (!dst[i]) | ||
183 | continue; | ||
184 | if (dst[i]->vcpu == vcpu) { | ||
185 | __set_bit(irq->vector, | ||
186 | (unsigned long *)eoi_exit_bitmap); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | out: | ||
192 | rcu_read_unlock(); | ||
168 | } | 193 | } |
169 | 194 | ||
170 | static void recalculate_apic_map(struct kvm *kvm) | 195 | static void recalculate_apic_map(struct kvm *kvm) |
@@ -230,6 +255,8 @@ out: | |||
230 | 255 | ||
231 | if (old) | 256 | if (old) |
232 | kfree_rcu(old, rcu); | 257 | kfree_rcu(old, rcu); |
258 | |||
259 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
233 | } | 260 | } |
234 | 261 | ||
235 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
@@ -345,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
345 | { | 372 | { |
346 | int result; | 373 | int result; |
347 | 374 | ||
375 | /* | ||
376 | * Note that irr_pending is just a hint. It will be always | ||
377 | * true with virtual interrupt delivery enabled. | ||
378 | */ | ||
348 | if (!apic->irr_pending) | 379 | if (!apic->irr_pending) |
349 | return -1; | 380 | return -1; |
350 | 381 | ||
@@ -461,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
461 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 492 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
462 | { | 493 | { |
463 | int result; | 494 | int result; |
495 | |||
496 | /* Note that isr_count is always 1 with vid enabled */ | ||
464 | if (!apic->isr_count) | 497 | if (!apic->isr_count) |
465 | return -1; | 498 | return -1; |
466 | if (likely(apic->highest_isr_cache != -1)) | 499 | if (likely(apic->highest_isr_cache != -1)) |
@@ -740,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) | |||
740 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; | 773 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
741 | } | 774 | } |
742 | 775 | ||
776 | static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | ||
777 | { | ||
778 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | ||
779 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
780 | int trigger_mode; | ||
781 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
782 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
783 | else | ||
784 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
786 | } | ||
787 | } | ||
788 | |||
743 | static int apic_set_eoi(struct kvm_lapic *apic) | 789 | static int apic_set_eoi(struct kvm_lapic *apic) |
744 | { | 790 | { |
745 | int vector = apic_find_highest_isr(apic); | 791 | int vector = apic_find_highest_isr(apic); |
@@ -756,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) | |||
756 | apic_clear_isr(vector, apic); | 802 | apic_clear_isr(vector, apic); |
757 | apic_update_ppr(apic); | 803 | apic_update_ppr(apic); |
758 | 804 | ||
759 | if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && | 805 | kvm_ioapic_send_eoi(apic, vector); |
760 | kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { | ||
761 | int trigger_mode; | ||
762 | if (apic_test_vector(vector, apic->regs + APIC_TMR)) | ||
763 | trigger_mode = IOAPIC_LEVEL_TRIG; | ||
764 | else | ||
765 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
766 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
767 | } | ||
768 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | 806 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); |
769 | return vector; | 807 | return vector; |
770 | } | 808 | } |
771 | 809 | ||
810 | /* | ||
811 | * this interface assumes a trap-like exit, which has already finished | ||
812 | * desired side effect including vISR and vPPR update. | ||
813 | */ | ||
814 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) | ||
815 | { | ||
816 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
817 | |||
818 | trace_kvm_eoi(apic, vector); | ||
819 | |||
820 | kvm_ioapic_send_eoi(apic, vector); | ||
821 | kvm_make_request(KVM_REQ_EVENT, apic->vcpu); | ||
822 | } | ||
823 | EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); | ||
824 | |||
772 | static void apic_send_ipi(struct kvm_lapic *apic) | 825 | static void apic_send_ipi(struct kvm_lapic *apic) |
773 | { | 826 | { |
774 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); | 827 | u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); |
@@ -1212,6 +1265,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) | |||
1212 | } | 1265 | } |
1213 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); | 1266 | EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); |
1214 | 1267 | ||
1268 | /* emulate APIC access in a trap manner */ | ||
1269 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) | ||
1270 | { | ||
1271 | u32 val = 0; | ||
1272 | |||
1273 | /* hw has done the conditional check and inst decode */ | ||
1274 | offset &= 0xff0; | ||
1275 | |||
1276 | apic_reg_read(vcpu->arch.apic, offset, 4, &val); | ||
1277 | |||
1278 | /* TODO: optimize to just emulate side effect w/o one more write */ | ||
1279 | apic_reg_write(vcpu->arch.apic, offset, val); | ||
1280 | } | ||
1281 | EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); | ||
1282 | |||
1215 | void kvm_free_lapic(struct kvm_vcpu *vcpu) | 1283 | void kvm_free_lapic(struct kvm_vcpu *vcpu) |
1216 | { | 1284 | { |
1217 | struct kvm_lapic *apic = vcpu->arch.apic; | 1285 | struct kvm_lapic *apic = vcpu->arch.apic; |
@@ -1288,6 +1356,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) | |||
1288 | 1356 | ||
1289 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | 1357 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) |
1290 | { | 1358 | { |
1359 | u64 old_value = vcpu->arch.apic_base; | ||
1291 | struct kvm_lapic *apic = vcpu->arch.apic; | 1360 | struct kvm_lapic *apic = vcpu->arch.apic; |
1292 | 1361 | ||
1293 | if (!apic) { | 1362 | if (!apic) { |
@@ -1309,11 +1378,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) | |||
1309 | value &= ~MSR_IA32_APICBASE_BSP; | 1378 | value &= ~MSR_IA32_APICBASE_BSP; |
1310 | 1379 | ||
1311 | vcpu->arch.apic_base = value; | 1380 | vcpu->arch.apic_base = value; |
1312 | if (apic_x2apic_mode(apic)) { | 1381 | if ((old_value ^ value) & X2APIC_ENABLE) { |
1313 | u32 id = kvm_apic_id(apic); | 1382 | if (value & X2APIC_ENABLE) { |
1314 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); | 1383 | u32 id = kvm_apic_id(apic); |
1315 | kvm_apic_set_ldr(apic, ldr); | 1384 | u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); |
1385 | kvm_apic_set_ldr(apic, ldr); | ||
1386 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); | ||
1387 | } else | ||
1388 | kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); | ||
1316 | } | 1389 | } |
1390 | |||
1317 | apic->base_address = apic->vcpu->arch.apic_base & | 1391 | apic->base_address = apic->vcpu->arch.apic_base & |
1318 | MSR_IA32_APICBASE_BASE; | 1392 | MSR_IA32_APICBASE_BASE; |
1319 | 1393 | ||
@@ -1359,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1359 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); | 1433 | apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); |
1360 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 1434 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
1361 | } | 1435 | } |
1362 | apic->irr_pending = false; | 1436 | apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); |
1363 | apic->isr_count = 0; | 1437 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); |
1364 | apic->highest_isr_cache = -1; | 1438 | apic->highest_isr_cache = -1; |
1365 | update_divide_count(apic); | 1439 | update_divide_count(apic); |
1366 | atomic_set(&apic->lapic_timer.pending, 0); | 1440 | atomic_set(&apic->lapic_timer.pending, 0); |
@@ -1575,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1575 | update_divide_count(apic); | 1649 | update_divide_count(apic); |
1576 | start_apic_timer(apic); | 1650 | start_apic_timer(apic); |
1577 | apic->irr_pending = true; | 1651 | apic->irr_pending = true; |
1578 | apic->isr_count = count_vectors(apic->regs + APIC_ISR); | 1652 | apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? |
1653 | 1 : count_vectors(apic->regs + APIC_ISR); | ||
1579 | apic->highest_isr_cache = -1; | 1654 | apic->highest_isr_cache = -1; |
1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | ||
1580 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1581 | } | 1657 | } |
1582 | 1658 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e5ebf9f3571f..1676d34ddb4e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -64,6 +64,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | |||
64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); | 64 | u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); |
65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); | 65 | void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); |
66 | 66 | ||
67 | void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); | ||
68 | void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); | ||
69 | |||
67 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 70 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
68 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); | 71 | void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); |
69 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | 72 | void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); |
@@ -124,4 +127,35 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | |||
124 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); | 127 | return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); |
125 | } | 128 | } |
126 | 129 | ||
130 | static inline int apic_x2apic_mode(struct kvm_lapic *apic) | ||
131 | { | ||
132 | return apic->vcpu->arch.apic_base & X2APIC_ENABLE; | ||
133 | } | ||
134 | |||
135 | static inline bool kvm_apic_vid_enabled(struct kvm *kvm) | ||
136 | { | ||
137 | return kvm_x86_ops->vm_has_apicv(kvm); | ||
138 | } | ||
139 | |||
140 | static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) | ||
141 | { | ||
142 | u16 cid; | ||
143 | ldr >>= 32 - map->ldr_bits; | ||
144 | cid = (ldr >> map->cid_shift) & map->cid_mask; | ||
145 | |||
146 | BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); | ||
147 | |||
148 | return cid; | ||
149 | } | ||
150 | |||
151 | static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | ||
152 | { | ||
153 | ldr >>= (32 - map->ldr_bits); | ||
154 | return ldr & map->lid_mask; | ||
155 | } | ||
156 | |||
157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
158 | struct kvm_lapic_irq *irq, | ||
159 | u64 *eoi_bitmap); | ||
160 | |||
127 | #endif | 161 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9f628f7a40b2..0242a8a1b2e2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte) | |||
448 | 448 | ||
449 | static bool spte_is_locklessly_modifiable(u64 spte) | 449 | static bool spte_is_locklessly_modifiable(u64 spte) |
450 | { | 450 | { |
451 | return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); | 451 | return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == |
452 | (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); | ||
452 | } | 453 | } |
453 | 454 | ||
454 | static bool spte_has_volatile_bits(u64 spte) | 455 | static bool spte_has_volatile_bits(u64 spte) |
@@ -1460,28 +1461,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) | |||
1460 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); | 1461 | percpu_counter_add(&kvm_total_used_mmu_pages, nr); |
1461 | } | 1462 | } |
1462 | 1463 | ||
1463 | /* | 1464 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) |
1464 | * Remove the sp from shadow page cache, after call it, | ||
1465 | * we can not find this sp from the cache, and the shadow | ||
1466 | * page table is still valid. | ||
1467 | * It should be under the protection of mmu lock. | ||
1468 | */ | ||
1469 | static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp) | ||
1470 | { | 1465 | { |
1471 | ASSERT(is_empty_shadow_page(sp->spt)); | 1466 | ASSERT(is_empty_shadow_page(sp->spt)); |
1472 | hlist_del(&sp->hash_link); | 1467 | hlist_del(&sp->hash_link); |
1473 | if (!sp->role.direct) | ||
1474 | free_page((unsigned long)sp->gfns); | ||
1475 | } | ||
1476 | |||
1477 | /* | ||
1478 | * Free the shadow page table and the sp, we can do it | ||
1479 | * out of the protection of mmu lock. | ||
1480 | */ | ||
1481 | static void kvm_mmu_free_page(struct kvm_mmu_page *sp) | ||
1482 | { | ||
1483 | list_del(&sp->link); | 1468 | list_del(&sp->link); |
1484 | free_page((unsigned long)sp->spt); | 1469 | free_page((unsigned long)sp->spt); |
1470 | if (!sp->role.direct) | ||
1471 | free_page((unsigned long)sp->gfns); | ||
1485 | kmem_cache_free(mmu_page_header_cache, sp); | 1472 | kmem_cache_free(mmu_page_header_cache, sp); |
1486 | } | 1473 | } |
1487 | 1474 | ||
@@ -2125,7 +2112,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2125 | do { | 2112 | do { |
2126 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | 2113 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); |
2127 | WARN_ON(!sp->role.invalid || sp->root_count); | 2114 | WARN_ON(!sp->role.invalid || sp->root_count); |
2128 | kvm_mmu_isolate_page(sp); | ||
2129 | kvm_mmu_free_page(sp); | 2115 | kvm_mmu_free_page(sp); |
2130 | } while (!list_empty(invalid_list)); | 2116 | } while (!list_empty(invalid_list)); |
2131 | } | 2117 | } |
@@ -2327,9 +2313,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2327 | if (s->role.level != PT_PAGE_TABLE_LEVEL) | 2313 | if (s->role.level != PT_PAGE_TABLE_LEVEL) |
2328 | return 1; | 2314 | return 1; |
2329 | 2315 | ||
2330 | if (!need_unsync && !s->unsync) { | 2316 | if (!s->unsync) |
2331 | need_unsync = true; | 2317 | need_unsync = true; |
2332 | } | ||
2333 | } | 2318 | } |
2334 | if (need_unsync) | 2319 | if (need_unsync) |
2335 | kvm_unsync_pages(vcpu, gfn); | 2320 | kvm_unsync_pages(vcpu, gfn); |
@@ -3687,6 +3672,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
3687 | else | 3672 | else |
3688 | r = paging32_init_context(vcpu, context); | 3673 | r = paging32_init_context(vcpu, context); |
3689 | 3674 | ||
3675 | vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); | ||
3690 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); | 3676 | vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); |
3691 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); | 3677 | vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); |
3692 | vcpu->arch.mmu.base_role.smep_andnot_wp | 3678 | vcpu->arch.mmu.base_role.smep_andnot_wp |
@@ -3853,7 +3839,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, | |||
3853 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3839 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
3854 | *gpa &= ~(gpa_t)7; | 3840 | *gpa &= ~(gpa_t)7; |
3855 | *bytes = 8; | 3841 | *bytes = 8; |
3856 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); | 3842 | r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8); |
3857 | if (r) | 3843 | if (r) |
3858 | gentry = 0; | 3844 | gentry = 0; |
3859 | new = (const u8 *)&gentry; | 3845 | new = (const u8 *)&gentry; |
@@ -4007,7 +3993,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
4007 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3993 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
4008 | & mask.word) && rmap_can_add(vcpu)) | 3994 | & mask.word) && rmap_can_add(vcpu)) |
4009 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3995 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); |
4010 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3996 | if (need_remote_flush(entry, *spte)) |
4011 | remote_flush = true; | 3997 | remote_flush = true; |
4012 | ++spte; | 3998 | ++spte; |
4013 | } | 3999 | } |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ca69dcccbe31..34c5c99323f4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -409,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
409 | unsigned direct_access, access = gw->pt_access; | 409 | unsigned direct_access, access = gw->pt_access; |
410 | int top_level, emulate = 0; | 410 | int top_level, emulate = 0; |
411 | 411 | ||
412 | if (!is_present_gpte(gw->ptes[gw->level - 1])) | ||
413 | return 0; | ||
414 | |||
415 | direct_access = gw->pte_access; | 412 | direct_access = gw->pte_access; |
416 | 413 | ||
417 | top_level = vcpu->arch.mmu.root_level; | 414 | top_level = vcpu->arch.mmu.root_level; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d29d3cd1c156..e1b1ce21bc00 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3571,6 +3571,26 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 3571 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
3572 | } | 3572 | } |
3573 | 3573 | ||
3574 | static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
3575 | { | ||
3576 | return; | ||
3577 | } | ||
3578 | |||
3579 | static int svm_vm_has_apicv(struct kvm *kvm) | ||
3580 | { | ||
3581 | return 0; | ||
3582 | } | ||
3583 | |||
3584 | static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
3585 | { | ||
3586 | return; | ||
3587 | } | ||
3588 | |||
3589 | static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | ||
3590 | { | ||
3591 | return; | ||
3592 | } | ||
3593 | |||
3574 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
3575 | { | 3595 | { |
3576 | struct vcpu_svm *svm = to_svm(vcpu); | 3596 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -4290,6 +4310,10 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4290 | .enable_nmi_window = enable_nmi_window, | 4310 | .enable_nmi_window = enable_nmi_window, |
4291 | .enable_irq_window = enable_irq_window, | 4311 | .enable_irq_window = enable_irq_window, |
4292 | .update_cr8_intercept = update_cr8_intercept, | 4312 | .update_cr8_intercept = update_cr8_intercept, |
4313 | .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, | ||
4314 | .vm_has_apicv = svm_vm_has_apicv, | ||
4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | ||
4316 | .hwapic_isr_update = svm_hwapic_isr_update, | ||
4293 | 4317 | ||
4294 | .set_tss_addr = svm_set_tss_addr, | 4318 | .set_tss_addr = svm_set_tss_addr, |
4295 | .get_tdp_level = get_npt_level, | 4319 | .get_tdp_level = get_npt_level, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02eeba86328d..fe9a9cfadbd6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,6 +84,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly enable_apicv_reg_vid = 1; | ||
88 | module_param(enable_apicv_reg_vid, bool, S_IRUGO); | ||
89 | |||
87 | /* | 90 | /* |
88 | * If nested=1, nested virtualization is supported, i.e., guests may use | 91 | * If nested=1, nested virtualization is supported, i.e., guests may use |
89 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 92 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -640,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a; | |||
640 | static unsigned long *vmx_io_bitmap_b; | 643 | static unsigned long *vmx_io_bitmap_b; |
641 | static unsigned long *vmx_msr_bitmap_legacy; | 644 | static unsigned long *vmx_msr_bitmap_legacy; |
642 | static unsigned long *vmx_msr_bitmap_longmode; | 645 | static unsigned long *vmx_msr_bitmap_longmode; |
646 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | ||
647 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | ||
643 | 648 | ||
644 | static bool cpu_has_load_ia32_efer; | 649 | static bool cpu_has_load_ia32_efer; |
645 | static bool cpu_has_load_perf_global_ctrl; | 650 | static bool cpu_has_load_perf_global_ctrl; |
@@ -764,6 +769,24 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | |||
764 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 769 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
765 | } | 770 | } |
766 | 771 | ||
772 | static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) | ||
773 | { | ||
774 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
775 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
776 | } | ||
777 | |||
778 | static inline bool cpu_has_vmx_apic_register_virt(void) | ||
779 | { | ||
780 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
781 | SECONDARY_EXEC_APIC_REGISTER_VIRT; | ||
782 | } | ||
783 | |||
784 | static inline bool cpu_has_vmx_virtual_intr_delivery(void) | ||
785 | { | ||
786 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
787 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
788 | } | ||
789 | |||
767 | static inline bool cpu_has_vmx_flexpriority(void) | 790 | static inline bool cpu_has_vmx_flexpriority(void) |
768 | { | 791 | { |
769 | return cpu_has_vmx_tpr_shadow() && | 792 | return cpu_has_vmx_tpr_shadow() && |
@@ -1821,6 +1844,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1821 | vmx->guest_msrs[from] = tmp; | 1844 | vmx->guest_msrs[from] = tmp; |
1822 | } | 1845 | } |
1823 | 1846 | ||
1847 | static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
1848 | { | ||
1849 | unsigned long *msr_bitmap; | ||
1850 | |||
1851 | if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { | ||
1852 | if (is_long_mode(vcpu)) | ||
1853 | msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
1854 | else | ||
1855 | msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
1856 | } else { | ||
1857 | if (is_long_mode(vcpu)) | ||
1858 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1859 | else | ||
1860 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1861 | } | ||
1862 | |||
1863 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1864 | } | ||
1865 | |||
1824 | /* | 1866 | /* |
1825 | * Set up the vmcs to automatically save and restore system | 1867 | * Set up the vmcs to automatically save and restore system |
1826 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | 1868 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
@@ -1829,7 +1871,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
1829 | static void setup_msrs(struct vcpu_vmx *vmx) | 1871 | static void setup_msrs(struct vcpu_vmx *vmx) |
1830 | { | 1872 | { |
1831 | int save_nmsrs, index; | 1873 | int save_nmsrs, index; |
1832 | unsigned long *msr_bitmap; | ||
1833 | 1874 | ||
1834 | save_nmsrs = 0; | 1875 | save_nmsrs = 0; |
1835 | #ifdef CONFIG_X86_64 | 1876 | #ifdef CONFIG_X86_64 |
@@ -1861,14 +1902,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
1861 | 1902 | ||
1862 | vmx->save_nmsrs = save_nmsrs; | 1903 | vmx->save_nmsrs = save_nmsrs; |
1863 | 1904 | ||
1864 | if (cpu_has_vmx_msr_bitmap()) { | 1905 | if (cpu_has_vmx_msr_bitmap()) |
1865 | if (is_long_mode(&vmx->vcpu)) | 1906 | vmx_set_msr_bitmap(&vmx->vcpu); |
1866 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
1867 | else | ||
1868 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
1869 | |||
1870 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
1871 | } | ||
1872 | } | 1907 | } |
1873 | 1908 | ||
1874 | /* | 1909 | /* |
@@ -2534,13 +2569,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2534 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { | 2569 | if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { |
2535 | min2 = 0; | 2570 | min2 = 0; |
2536 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2571 | opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2572 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2537 | SECONDARY_EXEC_WBINVD_EXITING | | 2573 | SECONDARY_EXEC_WBINVD_EXITING | |
2538 | SECONDARY_EXEC_ENABLE_VPID | | 2574 | SECONDARY_EXEC_ENABLE_VPID | |
2539 | SECONDARY_EXEC_ENABLE_EPT | | 2575 | SECONDARY_EXEC_ENABLE_EPT | |
2540 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 2576 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
2541 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | | 2577 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
2542 | SECONDARY_EXEC_RDTSCP | | 2578 | SECONDARY_EXEC_RDTSCP | |
2543 | SECONDARY_EXEC_ENABLE_INVPCID; | 2579 | SECONDARY_EXEC_ENABLE_INVPCID | |
2580 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2581 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
2544 | if (adjust_vmx_controls(min2, opt2, | 2582 | if (adjust_vmx_controls(min2, opt2, |
2545 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2583 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2546 | &_cpu_based_2nd_exec_control) < 0) | 2584 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2551,6 +2589,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2551 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) | 2589 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) |
2552 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 2590 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
2553 | #endif | 2591 | #endif |
2592 | |||
2593 | if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) | ||
2594 | _cpu_based_2nd_exec_control &= ~( | ||
2595 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
2596 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | ||
2597 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
2598 | |||
2554 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 2599 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
2555 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT | 2600 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
2556 | enabled */ | 2601 | enabled */ |
@@ -2748,6 +2793,15 @@ static __init int hardware_setup(void) | |||
2748 | if (!cpu_has_vmx_ple()) | 2793 | if (!cpu_has_vmx_ple()) |
2749 | ple_gap = 0; | 2794 | ple_gap = 0; |
2750 | 2795 | ||
2796 | if (!cpu_has_vmx_apic_register_virt() || | ||
2797 | !cpu_has_vmx_virtual_intr_delivery()) | ||
2798 | enable_apicv_reg_vid = 0; | ||
2799 | |||
2800 | if (enable_apicv_reg_vid) | ||
2801 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
2802 | else | ||
2803 | kvm_x86_ops->hwapic_irr_update = NULL; | ||
2804 | |||
2751 | if (nested) | 2805 | if (nested) |
2752 | nested_vmx_setup_ctls_msrs(); | 2806 | nested_vmx_setup_ctls_msrs(); |
2753 | 2807 | ||
@@ -3173,6 +3227,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3173 | if (!is_paging(vcpu)) { | 3227 | if (!is_paging(vcpu)) { |
3174 | hw_cr4 &= ~X86_CR4_PAE; | 3228 | hw_cr4 &= ~X86_CR4_PAE; |
3175 | hw_cr4 |= X86_CR4_PSE; | 3229 | hw_cr4 |= X86_CR4_PSE; |
3230 | /* | ||
3231 | * SMEP is disabled if CPU is in non-paging mode in | ||
3232 | * hardware. However KVM always uses paging mode to | ||
3233 | * emulate guest non-paging mode with TDP. | ||
3234 | * To emulate this behavior, SMEP needs to be manually | ||
3235 | * disabled when guest switches to non-paging mode. | ||
3236 | */ | ||
3237 | hw_cr4 &= ~X86_CR4_SMEP; | ||
3176 | } else if (!(cr4 & X86_CR4_PAE)) { | 3238 | } else if (!(cr4 & X86_CR4_PAE)) { |
3177 | hw_cr4 &= ~X86_CR4_PAE; | 3239 | hw_cr4 &= ~X86_CR4_PAE; |
3178 | } | 3240 | } |
@@ -3707,7 +3769,10 @@ static void free_vpid(struct vcpu_vmx *vmx) | |||
3707 | spin_unlock(&vmx_vpid_lock); | 3769 | spin_unlock(&vmx_vpid_lock); |
3708 | } | 3770 | } |
3709 | 3771 | ||
3710 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | 3772 | #define MSR_TYPE_R 1 |
3773 | #define MSR_TYPE_W 2 | ||
3774 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3775 | u32 msr, int type) | ||
3711 | { | 3776 | { |
3712 | int f = sizeof(unsigned long); | 3777 | int f = sizeof(unsigned long); |
3713 | 3778 | ||
@@ -3720,20 +3785,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) | |||
3720 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 3785 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
3721 | */ | 3786 | */ |
3722 | if (msr <= 0x1fff) { | 3787 | if (msr <= 0x1fff) { |
3723 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ | 3788 | if (type & MSR_TYPE_R) |
3724 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ | 3789 | /* read-low */ |
3790 | __clear_bit(msr, msr_bitmap + 0x000 / f); | ||
3791 | |||
3792 | if (type & MSR_TYPE_W) | ||
3793 | /* write-low */ | ||
3794 | __clear_bit(msr, msr_bitmap + 0x800 / f); | ||
3795 | |||
3725 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 3796 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
3726 | msr &= 0x1fff; | 3797 | msr &= 0x1fff; |
3727 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ | 3798 | if (type & MSR_TYPE_R) |
3728 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ | 3799 | /* read-high */ |
3800 | __clear_bit(msr, msr_bitmap + 0x400 / f); | ||
3801 | |||
3802 | if (type & MSR_TYPE_W) | ||
3803 | /* write-high */ | ||
3804 | __clear_bit(msr, msr_bitmap + 0xc00 / f); | ||
3805 | |||
3806 | } | ||
3807 | } | ||
3808 | |||
3809 | static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3810 | u32 msr, int type) | ||
3811 | { | ||
3812 | int f = sizeof(unsigned long); | ||
3813 | |||
3814 | if (!cpu_has_vmx_msr_bitmap()) | ||
3815 | return; | ||
3816 | |||
3817 | /* | ||
3818 | * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
3819 | * have the write-low and read-high bitmap offsets the wrong way round. | ||
3820 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
3821 | */ | ||
3822 | if (msr <= 0x1fff) { | ||
3823 | if (type & MSR_TYPE_R) | ||
3824 | /* read-low */ | ||
3825 | __set_bit(msr, msr_bitmap + 0x000 / f); | ||
3826 | |||
3827 | if (type & MSR_TYPE_W) | ||
3828 | /* write-low */ | ||
3829 | __set_bit(msr, msr_bitmap + 0x800 / f); | ||
3830 | |||
3831 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
3832 | msr &= 0x1fff; | ||
3833 | if (type & MSR_TYPE_R) | ||
3834 | /* read-high */ | ||
3835 | __set_bit(msr, msr_bitmap + 0x400 / f); | ||
3836 | |||
3837 | if (type & MSR_TYPE_W) | ||
3838 | /* write-high */ | ||
3839 | __set_bit(msr, msr_bitmap + 0xc00 / f); | ||
3840 | |||
3729 | } | 3841 | } |
3730 | } | 3842 | } |
3731 | 3843 | ||
3732 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | 3844 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
3733 | { | 3845 | { |
3734 | if (!longmode_only) | 3846 | if (!longmode_only) |
3735 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | 3847 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
3736 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | 3848 | msr, MSR_TYPE_R | MSR_TYPE_W); |
3849 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
3850 | msr, MSR_TYPE_R | MSR_TYPE_W); | ||
3851 | } | ||
3852 | |||
3853 | static void vmx_enable_intercept_msr_read_x2apic(u32 msr) | ||
3854 | { | ||
3855 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3856 | msr, MSR_TYPE_R); | ||
3857 | __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3858 | msr, MSR_TYPE_R); | ||
3859 | } | ||
3860 | |||
3861 | static void vmx_disable_intercept_msr_read_x2apic(u32 msr) | ||
3862 | { | ||
3863 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3864 | msr, MSR_TYPE_R); | ||
3865 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3866 | msr, MSR_TYPE_R); | ||
3867 | } | ||
3868 | |||
3869 | static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | ||
3870 | { | ||
3871 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3872 | msr, MSR_TYPE_W); | ||
3873 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3874 | msr, MSR_TYPE_W); | ||
3737 | } | 3875 | } |
3738 | 3876 | ||
3739 | /* | 3877 | /* |
@@ -3812,6 +3950,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
3812 | return exec_control; | 3950 | return exec_control; |
3813 | } | 3951 | } |
3814 | 3952 | ||
3953 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
3954 | { | ||
3955 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
3956 | } | ||
3957 | |||
3815 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 3958 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
3816 | { | 3959 | { |
3817 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 3960 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
@@ -3829,6 +3972,10 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3829 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3972 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
3830 | if (!ple_gap) | 3973 | if (!ple_gap) |
3831 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 3974 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
3975 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
3976 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | ||
3977 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | ||
3978 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
3832 | return exec_control; | 3979 | return exec_control; |
3833 | } | 3980 | } |
3834 | 3981 | ||
@@ -3873,6 +4020,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
3873 | vmx_secondary_exec_control(vmx)); | 4020 | vmx_secondary_exec_control(vmx)); |
3874 | } | 4021 | } |
3875 | 4022 | ||
4023 | if (enable_apicv_reg_vid) { | ||
4024 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | ||
4025 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | ||
4026 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | ||
4027 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | ||
4028 | |||
4029 | vmcs_write16(GUEST_INTR_STATUS, 0); | ||
4030 | } | ||
4031 | |||
3876 | if (ple_gap) { | 4032 | if (ple_gap) { |
3877 | vmcs_write32(PLE_GAP, ple_gap); | 4033 | vmcs_write32(PLE_GAP, ple_gap); |
3878 | vmcs_write32(PLE_WINDOW, ple_window); | 4034 | vmcs_write32(PLE_WINDOW, ple_window); |
@@ -4787,6 +4943,26 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) | |||
4787 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 4943 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
4788 | } | 4944 | } |
4789 | 4945 | ||
4946 | static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) | ||
4947 | { | ||
4948 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4949 | int vector = exit_qualification & 0xff; | ||
4950 | |||
4951 | /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ | ||
4952 | kvm_apic_set_eoi_accelerated(vcpu, vector); | ||
4953 | return 1; | ||
4954 | } | ||
4955 | |||
4956 | static int handle_apic_write(struct kvm_vcpu *vcpu) | ||
4957 | { | ||
4958 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
4959 | u32 offset = exit_qualification & 0xfff; | ||
4960 | |||
4961 | /* APIC-write VM exit is trap-like and thus no need to adjust IP */ | ||
4962 | kvm_apic_write_nodecode(vcpu, offset); | ||
4963 | return 1; | ||
4964 | } | ||
4965 | |||
4790 | static int handle_task_switch(struct kvm_vcpu *vcpu) | 4966 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
4791 | { | 4967 | { |
4792 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4968 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -5721,6 +5897,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5721 | [EXIT_REASON_VMON] = handle_vmon, | 5897 | [EXIT_REASON_VMON] = handle_vmon, |
5722 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 5898 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
5723 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 5899 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
5900 | [EXIT_REASON_APIC_WRITE] = handle_apic_write, | ||
5901 | [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, | ||
5724 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 5902 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
5725 | [EXIT_REASON_XSETBV] = handle_xsetbv, | 5903 | [EXIT_REASON_XSETBV] = handle_xsetbv, |
5726 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 5904 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
@@ -6070,6 +6248,85 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
6070 | vmcs_write32(TPR_THRESHOLD, irr); | 6248 | vmcs_write32(TPR_THRESHOLD, irr); |
6071 | } | 6249 | } |
6072 | 6250 | ||
6251 | static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
6252 | { | ||
6253 | u32 sec_exec_control; | ||
6254 | |||
6255 | /* | ||
6256 | * There is not point to enable virtualize x2apic without enable | ||
6257 | * apicv | ||
6258 | */ | ||
6259 | if (!cpu_has_vmx_virtualize_x2apic_mode() || | ||
6260 | !vmx_vm_has_apicv(vcpu->kvm)) | ||
6261 | return; | ||
6262 | |||
6263 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
6264 | return; | ||
6265 | |||
6266 | sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6267 | |||
6268 | if (set) { | ||
6269 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6270 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6271 | } else { | ||
6272 | sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | ||
6273 | sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
6274 | } | ||
6275 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
6276 | |||
6277 | vmx_set_msr_bitmap(vcpu); | ||
6278 | } | ||
6279 | |||
6280 | static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) | ||
6281 | { | ||
6282 | u16 status; | ||
6283 | u8 old; | ||
6284 | |||
6285 | if (!vmx_vm_has_apicv(kvm)) | ||
6286 | return; | ||
6287 | |||
6288 | if (isr == -1) | ||
6289 | isr = 0; | ||
6290 | |||
6291 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6292 | old = status >> 8; | ||
6293 | if (isr != old) { | ||
6294 | status &= 0xff; | ||
6295 | status |= isr << 8; | ||
6296 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6297 | } | ||
6298 | } | ||
6299 | |||
6300 | static void vmx_set_rvi(int vector) | ||
6301 | { | ||
6302 | u16 status; | ||
6303 | u8 old; | ||
6304 | |||
6305 | status = vmcs_read16(GUEST_INTR_STATUS); | ||
6306 | old = (u8)status & 0xff; | ||
6307 | if ((u8)vector != old) { | ||
6308 | status &= ~0xff; | ||
6309 | status |= (u8)vector; | ||
6310 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
6311 | } | ||
6312 | } | ||
6313 | |||
6314 | static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | ||
6315 | { | ||
6316 | if (max_irr == -1) | ||
6317 | return; | ||
6318 | |||
6319 | vmx_set_rvi(max_irr); | ||
6320 | } | ||
6321 | |||
6322 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | ||
6323 | { | ||
6324 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | ||
6325 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | ||
6326 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | ||
6327 | vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); | ||
6328 | } | ||
6329 | |||
6073 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | 6330 | static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) |
6074 | { | 6331 | { |
6075 | u32 exit_intr_info; | 6332 | u32 exit_intr_info; |
@@ -7333,6 +7590,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7333 | .enable_nmi_window = enable_nmi_window, | 7590 | .enable_nmi_window = enable_nmi_window, |
7334 | .enable_irq_window = enable_irq_window, | 7591 | .enable_irq_window = enable_irq_window, |
7335 | .update_cr8_intercept = update_cr8_intercept, | 7592 | .update_cr8_intercept = update_cr8_intercept, |
7593 | .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, | ||
7594 | .vm_has_apicv = vmx_vm_has_apicv, | ||
7595 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | ||
7596 | .hwapic_irr_update = vmx_hwapic_irr_update, | ||
7597 | .hwapic_isr_update = vmx_hwapic_isr_update, | ||
7336 | 7598 | ||
7337 | .set_tss_addr = vmx_set_tss_addr, | 7599 | .set_tss_addr = vmx_set_tss_addr, |
7338 | .get_tdp_level = get_ept_level, | 7600 | .get_tdp_level = get_ept_level, |
@@ -7365,7 +7627,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7365 | 7627 | ||
7366 | static int __init vmx_init(void) | 7628 | static int __init vmx_init(void) |
7367 | { | 7629 | { |
7368 | int r, i; | 7630 | int r, i, msr; |
7369 | 7631 | ||
7370 | rdmsrl_safe(MSR_EFER, &host_efer); | 7632 | rdmsrl_safe(MSR_EFER, &host_efer); |
7371 | 7633 | ||
@@ -7386,11 +7648,19 @@ static int __init vmx_init(void) | |||
7386 | if (!vmx_msr_bitmap_legacy) | 7648 | if (!vmx_msr_bitmap_legacy) |
7387 | goto out1; | 7649 | goto out1; |
7388 | 7650 | ||
7651 | vmx_msr_bitmap_legacy_x2apic = | ||
7652 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7653 | if (!vmx_msr_bitmap_legacy_x2apic) | ||
7654 | goto out2; | ||
7389 | 7655 | ||
7390 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | 7656 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
7391 | if (!vmx_msr_bitmap_longmode) | 7657 | if (!vmx_msr_bitmap_longmode) |
7392 | goto out2; | 7658 | goto out3; |
7393 | 7659 | ||
7660 | vmx_msr_bitmap_longmode_x2apic = | ||
7661 | (unsigned long *)__get_free_page(GFP_KERNEL); | ||
7662 | if (!vmx_msr_bitmap_longmode_x2apic) | ||
7663 | goto out4; | ||
7394 | 7664 | ||
7395 | /* | 7665 | /* |
7396 | * Allow direct access to the PC debug port (it is often used for I/O | 7666 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7422,6 +7692,28 @@ static int __init vmx_init(void) | |||
7422 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 7692 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
7423 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 7693 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
7424 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 7694 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
7695 | memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
7696 | vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
7697 | memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
7698 | vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
7699 | |||
7700 | if (enable_apicv_reg_vid) { | ||
7701 | for (msr = 0x800; msr <= 0x8ff; msr++) | ||
7702 | vmx_disable_intercept_msr_read_x2apic(msr); | ||
7703 | |||
7704 | /* According SDM, in x2apic mode, the whole id reg is used. | ||
7705 | * But in KVM, it only use the highest eight bits. Need to | ||
7706 | * intercept it */ | ||
7707 | vmx_enable_intercept_msr_read_x2apic(0x802); | ||
7708 | /* TMCCT */ | ||
7709 | vmx_enable_intercept_msr_read_x2apic(0x839); | ||
7710 | /* TPR */ | ||
7711 | vmx_disable_intercept_msr_write_x2apic(0x808); | ||
7712 | /* EOI */ | ||
7713 | vmx_disable_intercept_msr_write_x2apic(0x80b); | ||
7714 | /* SELF-IPI */ | ||
7715 | vmx_disable_intercept_msr_write_x2apic(0x83f); | ||
7716 | } | ||
7425 | 7717 | ||
7426 | if (enable_ept) { | 7718 | if (enable_ept) { |
7427 | kvm_mmu_set_mask_ptes(0ull, | 7719 | kvm_mmu_set_mask_ptes(0ull, |
@@ -7435,8 +7727,10 @@ static int __init vmx_init(void) | |||
7435 | 7727 | ||
7436 | return 0; | 7728 | return 0; |
7437 | 7729 | ||
7438 | out3: | 7730 | out4: |
7439 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7731 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7732 | out3: | ||
7733 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7440 | out2: | 7734 | out2: |
7441 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7735 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7442 | out1: | 7736 | out1: |
@@ -7448,6 +7742,8 @@ out: | |||
7448 | 7742 | ||
7449 | static void __exit vmx_exit(void) | 7743 | static void __exit vmx_exit(void) |
7450 | { | 7744 | { |
7745 | free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
7746 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
7451 | free_page((unsigned long)vmx_msr_bitmap_legacy); | 7747 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
7452 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 7748 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7453 | free_page((unsigned long)vmx_io_bitmap_b); | 7749 | free_page((unsigned long)vmx_io_bitmap_b); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9f55299ed7e..373e17a0d398 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -870,8 +870,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
870 | 870 | ||
871 | kvm_x86_ops->set_efer(vcpu, efer); | 871 | kvm_x86_ops->set_efer(vcpu, efer); |
872 | 872 | ||
873 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
874 | |||
875 | /* Update reserved bits */ | 873 | /* Update reserved bits */ |
876 | if ((efer ^ old_efer) & EFER_NX) | 874 | if ((efer ^ old_efer) & EFER_NX) |
877 | kvm_mmu_reset_context(vcpu); | 875 | kvm_mmu_reset_context(vcpu); |
@@ -5565,7 +5563,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5565 | vcpu->arch.nmi_injected = true; | 5563 | vcpu->arch.nmi_injected = true; |
5566 | kvm_x86_ops->set_nmi(vcpu); | 5564 | kvm_x86_ops->set_nmi(vcpu); |
5567 | } | 5565 | } |
5568 | } else if (kvm_cpu_has_interrupt(vcpu)) { | 5566 | } else if (kvm_cpu_has_injectable_intr(vcpu)) { |
5569 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | 5567 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { |
5570 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | 5568 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), |
5571 | false); | 5569 | false); |
@@ -5633,6 +5631,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
5633 | #endif | 5631 | #endif |
5634 | } | 5632 | } |
5635 | 5633 | ||
5634 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | ||
5635 | { | ||
5636 | u64 eoi_exit_bitmap[4]; | ||
5637 | |||
5638 | memset(eoi_exit_bitmap, 0, 32); | ||
5639 | |||
5640 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5641 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | ||
5642 | } | ||
5643 | |||
5636 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5644 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5637 | { | 5645 | { |
5638 | int r; | 5646 | int r; |
@@ -5686,6 +5694,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5686 | kvm_handle_pmu_event(vcpu); | 5694 | kvm_handle_pmu_event(vcpu); |
5687 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5695 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
5688 | kvm_deliver_pmi(vcpu); | 5696 | kvm_deliver_pmi(vcpu); |
5697 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | ||
5698 | update_eoi_exitmap(vcpu); | ||
5689 | } | 5699 | } |
5690 | 5700 | ||
5691 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5701 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
@@ -5694,10 +5704,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5694 | /* enable NMI/IRQ window open exits if needed */ | 5704 | /* enable NMI/IRQ window open exits if needed */ |
5695 | if (vcpu->arch.nmi_pending) | 5705 | if (vcpu->arch.nmi_pending) |
5696 | kvm_x86_ops->enable_nmi_window(vcpu); | 5706 | kvm_x86_ops->enable_nmi_window(vcpu); |
5697 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | 5707 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5698 | kvm_x86_ops->enable_irq_window(vcpu); | 5708 | kvm_x86_ops->enable_irq_window(vcpu); |
5699 | 5709 | ||
5700 | if (kvm_lapic_enabled(vcpu)) { | 5710 | if (kvm_lapic_enabled(vcpu)) { |
5711 | /* | ||
5712 | * Update architecture specific hints for APIC | ||
5713 | * virtual interrupt delivery. | ||
5714 | */ | ||
5715 | if (kvm_x86_ops->hwapic_irr_update) | ||
5716 | kvm_x86_ops->hwapic_irr_update(vcpu, | ||
5717 | kvm_lapic_find_highest_irr(vcpu)); | ||
5701 | update_cr8_intercept(vcpu); | 5718 | update_cr8_intercept(vcpu); |
5702 | kvm_lapic_sync_to_vapic(vcpu); | 5719 | kvm_lapic_sync_to_vapic(vcpu); |
5703 | } | 5720 | } |
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c index 2edd94af131c..3217dfe5cb8b 100644 --- a/drivers/s390/kvm/virtio_ccw.c +++ b/drivers/s390/kvm/virtio_ccw.c | |||
@@ -244,9 +244,9 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, | |||
244 | { | 244 | { |
245 | struct virtio_ccw_device *vcdev = to_vc_device(vdev); | 245 | struct virtio_ccw_device *vcdev = to_vc_device(vdev); |
246 | int err; | 246 | int err; |
247 | struct virtqueue *vq; | 247 | struct virtqueue *vq = NULL; |
248 | struct virtio_ccw_vq_info *info; | 248 | struct virtio_ccw_vq_info *info; |
249 | unsigned long size; | 249 | unsigned long size = 0; /* silence the compiler */ |
250 | unsigned long flags; | 250 | unsigned long flags; |
251 | 251 | ||
252 | /* Allocate queue. */ | 252 | /* Allocate queue. */ |
@@ -279,11 +279,8 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, | |||
279 | /* For now, we fail if we can't get the requested size. */ | 279 | /* For now, we fail if we can't get the requested size. */ |
280 | dev_warn(&vcdev->cdev->dev, "no vq\n"); | 280 | dev_warn(&vcdev->cdev->dev, "no vq\n"); |
281 | err = -ENOMEM; | 281 | err = -ENOMEM; |
282 | free_pages_exact(info->queue, size); | ||
283 | goto out_err; | 282 | goto out_err; |
284 | } | 283 | } |
285 | info->vq = vq; | ||
286 | vq->priv = info; | ||
287 | 284 | ||
288 | /* Register it with the host. */ | 285 | /* Register it with the host. */ |
289 | info->info_block->queue = (__u64)info->queue; | 286 | info->info_block->queue = (__u64)info->queue; |
@@ -297,12 +294,12 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, | |||
297 | err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i); | 294 | err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i); |
298 | if (err) { | 295 | if (err) { |
299 | dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n"); | 296 | dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n"); |
300 | free_pages_exact(info->queue, size); | ||
301 | info->vq = NULL; | ||
302 | vq->priv = NULL; | ||
303 | goto out_err; | 297 | goto out_err; |
304 | } | 298 | } |
305 | 299 | ||
300 | info->vq = vq; | ||
301 | vq->priv = info; | ||
302 | |||
306 | /* Save it to our list. */ | 303 | /* Save it to our list. */ |
307 | spin_lock_irqsave(&vcdev->lock, flags); | 304 | spin_lock_irqsave(&vcdev->lock, flags); |
308 | list_add(&info->node, &vcdev->virtqueues); | 305 | list_add(&info->node, &vcdev->virtqueues); |
@@ -311,8 +308,13 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, | |||
311 | return vq; | 308 | return vq; |
312 | 309 | ||
313 | out_err: | 310 | out_err: |
314 | if (info) | 311 | if (vq) |
312 | vring_del_virtqueue(vq); | ||
313 | if (info) { | ||
314 | if (info->queue) | ||
315 | free_pages_exact(info->queue, size); | ||
315 | kfree(info->info_block); | 316 | kfree(info->info_block); |
317 | } | ||
316 | kfree(info); | 318 | kfree(info); |
317 | return ERR_PTR(err); | 319 | return ERR_PTR(err); |
318 | } | 320 | } |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4dd7d7531e69..0350e0d5e031 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -123,6 +123,7 @@ static inline bool is_error_page(struct page *page) | |||
123 | #define KVM_REQ_MASTERCLOCK_UPDATE 19 | 123 | #define KVM_REQ_MASTERCLOCK_UPDATE 19 |
124 | #define KVM_REQ_MCLOCK_INPROGRESS 20 | 124 | #define KVM_REQ_MCLOCK_INPROGRESS 20 |
125 | #define KVM_REQ_EPR_EXIT 21 | 125 | #define KVM_REQ_EPR_EXIT 21 |
126 | #define KVM_REQ_EOIBITMAP 22 | ||
126 | 127 | ||
127 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 128 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
128 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 | 129 | #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 |
@@ -538,6 +539,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | |||
538 | void kvm_flush_remote_tlbs(struct kvm *kvm); | 539 | void kvm_flush_remote_tlbs(struct kvm *kvm); |
539 | void kvm_reload_remote_mmus(struct kvm *kvm); | 540 | void kvm_reload_remote_mmus(struct kvm *kvm); |
540 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); | 541 | void kvm_make_mclock_inprogress_request(struct kvm *kvm); |
542 | void kvm_make_update_eoibitmap_request(struct kvm *kvm); | ||
541 | 543 | ||
542 | long kvm_arch_dev_ioctl(struct file *filp, | 544 | long kvm_arch_dev_ioctl(struct file *filp, |
543 | unsigned int ioctl, unsigned long arg); | 545 | unsigned int ioctl, unsigned long arg); |
@@ -691,6 +693,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); | |||
691 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); | 693 | int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); |
692 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, | 694 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, |
693 | int irq_source_id, int level); | 695 | int irq_source_id, int level); |
696 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); | ||
694 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); | 697 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); |
695 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 698 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
696 | struct kvm_irq_ack_notifier *kian); | 699 | struct kvm_irq_ack_notifier *kian); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0533496b6228..01edad9b5d71 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4316,7 +4316,10 @@ EXPORT_SYMBOL(yield); | |||
4316 | * It's the caller's job to ensure that the target task struct | 4316 | * It's the caller's job to ensure that the target task struct |
4317 | * can't go away on us before we can do any checks. | 4317 | * can't go away on us before we can do any checks. |
4318 | * | 4318 | * |
4319 | * Returns true if we indeed boosted the target task. | 4319 | * Returns: |
4320 | * true (>0) if we indeed boosted the target task. | ||
4321 | * false (0) if we failed to boost the target. | ||
4322 | * -ESRCH if there's no task to yield to. | ||
4320 | */ | 4323 | */ |
4321 | bool __sched yield_to(struct task_struct *p, bool preempt) | 4324 | bool __sched yield_to(struct task_struct *p, bool preempt) |
4322 | { | 4325 | { |
@@ -4330,6 +4333,15 @@ bool __sched yield_to(struct task_struct *p, bool preempt) | |||
4330 | 4333 | ||
4331 | again: | 4334 | again: |
4332 | p_rq = task_rq(p); | 4335 | p_rq = task_rq(p); |
4336 | /* | ||
4337 | * If we're the only runnable task on the rq and target rq also | ||
4338 | * has only one task, there's absolutely no point in yielding. | ||
4339 | */ | ||
4340 | if (rq->nr_running == 1 && p_rq->nr_running == 1) { | ||
4341 | yielded = -ESRCH; | ||
4342 | goto out_irq; | ||
4343 | } | ||
4344 | |||
4333 | double_rq_lock(rq, p_rq); | 4345 | double_rq_lock(rq, p_rq); |
4334 | while (task_rq(p) != p_rq) { | 4346 | while (task_rq(p) != p_rq) { |
4335 | double_rq_unlock(rq, p_rq); | 4347 | double_rq_unlock(rq, p_rq); |
@@ -4337,13 +4349,13 @@ again: | |||
4337 | } | 4349 | } |
4338 | 4350 | ||
4339 | if (!curr->sched_class->yield_to_task) | 4351 | if (!curr->sched_class->yield_to_task) |
4340 | goto out; | 4352 | goto out_unlock; |
4341 | 4353 | ||
4342 | if (curr->sched_class != p->sched_class) | 4354 | if (curr->sched_class != p->sched_class) |
4343 | goto out; | 4355 | goto out_unlock; |
4344 | 4356 | ||
4345 | if (task_running(p_rq, p) || p->state) | 4357 | if (task_running(p_rq, p) || p->state) |
4346 | goto out; | 4358 | goto out_unlock; |
4347 | 4359 | ||
4348 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | 4360 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
4349 | if (yielded) { | 4361 | if (yielded) { |
@@ -4356,11 +4368,12 @@ again: | |||
4356 | resched_task(p_rq->curr); | 4368 | resched_task(p_rq->curr); |
4357 | } | 4369 | } |
4358 | 4370 | ||
4359 | out: | 4371 | out_unlock: |
4360 | double_rq_unlock(rq, p_rq); | 4372 | double_rq_unlock(rq, p_rq); |
4373 | out_irq: | ||
4361 | local_irq_restore(flags); | 4374 | local_irq_restore(flags); |
4362 | 4375 | ||
4363 | if (yielded) | 4376 | if (yielded > 0) |
4364 | schedule(); | 4377 | schedule(); |
4365 | 4378 | ||
4366 | return yielded; | 4379 | return yielded; |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index f3abbef46c42..ce82b9401958 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/hrtimer.h> | 35 | #include <linux/hrtimer.h> |
36 | #include <linux/io.h> | 36 | #include <linux/io.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/export.h> | ||
38 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
39 | #include <asm/page.h> | 40 | #include <asm/page.h> |
40 | #include <asm/current.h> | 41 | #include <asm/current.h> |
@@ -115,6 +116,42 @@ static void update_handled_vectors(struct kvm_ioapic *ioapic) | |||
115 | smp_wmb(); | 116 | smp_wmb(); |
116 | } | 117 | } |
117 | 118 | ||
119 | void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
120 | u64 *eoi_exit_bitmap) | ||
121 | { | ||
122 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; | ||
123 | union kvm_ioapic_redirect_entry *e; | ||
124 | struct kvm_lapic_irq irqe; | ||
125 | int index; | ||
126 | |||
127 | spin_lock(&ioapic->lock); | ||
128 | /* traverse ioapic entry to set eoi exit bitmap*/ | ||
129 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { | ||
130 | e = &ioapic->redirtbl[index]; | ||
131 | if (!e->fields.mask && | ||
132 | (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || | ||
133 | kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, | ||
134 | index))) { | ||
135 | irqe.dest_id = e->fields.dest_id; | ||
136 | irqe.vector = e->fields.vector; | ||
137 | irqe.dest_mode = e->fields.dest_mode; | ||
138 | irqe.delivery_mode = e->fields.delivery_mode << 8; | ||
139 | kvm_calculate_eoi_exitmap(vcpu, &irqe, eoi_exit_bitmap); | ||
140 | } | ||
141 | } | ||
142 | spin_unlock(&ioapic->lock); | ||
143 | } | ||
144 | EXPORT_SYMBOL_GPL(kvm_ioapic_calculate_eoi_exitmap); | ||
145 | |||
146 | void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm) | ||
147 | { | ||
148 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
149 | |||
150 | if (!kvm_apic_vid_enabled(kvm) || !ioapic) | ||
151 | return; | ||
152 | kvm_make_update_eoibitmap_request(kvm); | ||
153 | } | ||
154 | |||
118 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | 155 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) |
119 | { | 156 | { |
120 | unsigned index; | 157 | unsigned index; |
@@ -156,6 +193,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
156 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG | 193 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG |
157 | && ioapic->irr & (1 << index)) | 194 | && ioapic->irr & (1 << index)) |
158 | ioapic_service(ioapic, index); | 195 | ioapic_service(ioapic, index); |
196 | kvm_ioapic_make_eoibitmap_request(ioapic->kvm); | ||
159 | break; | 197 | break; |
160 | } | 198 | } |
161 | } | 199 | } |
@@ -455,6 +493,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
455 | spin_lock(&ioapic->lock); | 493 | spin_lock(&ioapic->lock); |
456 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 494 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
457 | update_handled_vectors(ioapic); | 495 | update_handled_vectors(ioapic); |
496 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
458 | spin_unlock(&ioapic->lock); | 497 | spin_unlock(&ioapic->lock); |
459 | return 0; | 498 | return 0; |
460 | } | 499 | } |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a30abfe6ed16..0400a466c50c 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -82,5 +82,9 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
82 | struct kvm_lapic_irq *irq); | 82 | struct kvm_lapic_irq *irq); |
83 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 83 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
84 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | 84 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); |
85 | void kvm_ioapic_make_eoibitmap_request(struct kvm *kvm); | ||
86 | void kvm_ioapic_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
87 | u64 *eoi_exit_bitmap); | ||
88 | |||
85 | 89 | ||
86 | #endif | 90 | #endif |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4a340cb23013..72a130bc448a 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -76,7 +76,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
76 | gfn = slot->base_gfn; | 76 | gfn = slot->base_gfn; |
77 | end_gfn = gfn + slot->npages; | 77 | end_gfn = gfn + slot->npages; |
78 | 78 | ||
79 | flags = IOMMU_READ | IOMMU_WRITE; | 79 | flags = IOMMU_READ; |
80 | if (!(slot->flags & KVM_MEM_READONLY)) | ||
81 | flags |= IOMMU_WRITE; | ||
80 | if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) | 82 | if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) |
81 | flags |= IOMMU_CACHE; | 83 | flags |= IOMMU_CACHE; |
82 | 84 | ||
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 656fa455e154..ff6d40e2c06d 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/export.h> | ||
25 | #include <trace/events/kvm.h> | 26 | #include <trace/events/kvm.h> |
26 | 27 | ||
27 | #include <asm/msidef.h> | 28 | #include <asm/msidef.h> |
@@ -237,6 +238,28 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) | |||
237 | return ret; | 238 | return ret; |
238 | } | 239 | } |
239 | 240 | ||
241 | bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) | ||
242 | { | ||
243 | struct kvm_irq_ack_notifier *kian; | ||
244 | struct hlist_node *n; | ||
245 | int gsi; | ||
246 | |||
247 | rcu_read_lock(); | ||
248 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; | ||
249 | if (gsi != -1) | ||
250 | hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, | ||
251 | link) | ||
252 | if (kian->gsi == gsi) { | ||
253 | rcu_read_unlock(); | ||
254 | return true; | ||
255 | } | ||
256 | |||
257 | rcu_read_unlock(); | ||
258 | |||
259 | return false; | ||
260 | } | ||
261 | EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); | ||
262 | |||
240 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 263 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
241 | { | 264 | { |
242 | struct kvm_irq_ack_notifier *kian; | 265 | struct kvm_irq_ack_notifier *kian; |
@@ -261,6 +284,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, | |||
261 | mutex_lock(&kvm->irq_lock); | 284 | mutex_lock(&kvm->irq_lock); |
262 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); | 285 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); |
263 | mutex_unlock(&kvm->irq_lock); | 286 | mutex_unlock(&kvm->irq_lock); |
287 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
264 | } | 288 | } |
265 | 289 | ||
266 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | 290 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, |
@@ -270,6 +294,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
270 | hlist_del_init_rcu(&kian->link); | 294 | hlist_del_init_rcu(&kian->link); |
271 | mutex_unlock(&kvm->irq_lock); | 295 | mutex_unlock(&kvm->irq_lock); |
272 | synchronize_rcu(); | 296 | synchronize_rcu(); |
297 | kvm_ioapic_make_eoibitmap_request(kvm); | ||
273 | } | 298 | } |
274 | 299 | ||
275 | int kvm_request_irq_source_id(struct kvm *kvm) | 300 | int kvm_request_irq_source_id(struct kvm *kvm) |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5e709ebb7c40..2e93630b4add 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -217,6 +217,11 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) | |||
217 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | 217 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); |
218 | } | 218 | } |
219 | 219 | ||
220 | void kvm_make_update_eoibitmap_request(struct kvm *kvm) | ||
221 | { | ||
222 | make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); | ||
223 | } | ||
224 | |||
220 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 225 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
221 | { | 226 | { |
222 | struct page *page; | 227 | struct page *page; |
@@ -714,6 +719,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
714 | } | 719 | } |
715 | 720 | ||
716 | /* | 721 | /* |
722 | * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: | ||
723 | * - create a new memory slot | ||
724 | * - delete an existing memory slot | ||
725 | * - modify an existing memory slot | ||
726 | * -- move it in the guest physical memory space | ||
727 | * -- just change its flags | ||
728 | * | ||
729 | * Since flags can be changed by some of these operations, the following | ||
730 | * differentiation is the best we can do for __kvm_set_memory_region(): | ||
731 | */ | ||
732 | enum kvm_mr_change { | ||
733 | KVM_MR_CREATE, | ||
734 | KVM_MR_DELETE, | ||
735 | KVM_MR_MOVE, | ||
736 | KVM_MR_FLAGS_ONLY, | ||
737 | }; | ||
738 | |||
739 | /* | ||
717 | * Allocate some memory and give it an address in the guest physical address | 740 | * Allocate some memory and give it an address in the guest physical address |
718 | * space. | 741 | * space. |
719 | * | 742 | * |
@@ -731,6 +754,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
731 | struct kvm_memory_slot *slot; | 754 | struct kvm_memory_slot *slot; |
732 | struct kvm_memory_slot old, new; | 755 | struct kvm_memory_slot old, new; |
733 | struct kvm_memslots *slots = NULL, *old_memslots; | 756 | struct kvm_memslots *slots = NULL, *old_memslots; |
757 | enum kvm_mr_change change; | ||
734 | 758 | ||
735 | r = check_memory_region_flags(mem); | 759 | r = check_memory_region_flags(mem); |
736 | if (r) | 760 | if (r) |
@@ -772,17 +796,31 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
772 | new.npages = npages; | 796 | new.npages = npages; |
773 | new.flags = mem->flags; | 797 | new.flags = mem->flags; |
774 | 798 | ||
775 | /* | ||
776 | * Disallow changing a memory slot's size or changing anything about | ||
777 | * zero sized slots that doesn't involve making them non-zero. | ||
778 | */ | ||
779 | r = -EINVAL; | 799 | r = -EINVAL; |
780 | if (npages && old.npages && npages != old.npages) | 800 | if (npages) { |
781 | goto out; | 801 | if (!old.npages) |
782 | if (!npages && !old.npages) | 802 | change = KVM_MR_CREATE; |
803 | else { /* Modify an existing slot. */ | ||
804 | if ((mem->userspace_addr != old.userspace_addr) || | ||
805 | (npages != old.npages) || | ||
806 | ((new.flags ^ old.flags) & KVM_MEM_READONLY)) | ||
807 | goto out; | ||
808 | |||
809 | if (base_gfn != old.base_gfn) | ||
810 | change = KVM_MR_MOVE; | ||
811 | else if (new.flags != old.flags) | ||
812 | change = KVM_MR_FLAGS_ONLY; | ||
813 | else { /* Nothing to change. */ | ||
814 | r = 0; | ||
815 | goto out; | ||
816 | } | ||
817 | } | ||
818 | } else if (old.npages) { | ||
819 | change = KVM_MR_DELETE; | ||
820 | } else /* Modify a non-existent slot: disallowed. */ | ||
783 | goto out; | 821 | goto out; |
784 | 822 | ||
785 | if ((npages && !old.npages) || (base_gfn != old.base_gfn)) { | 823 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { |
786 | /* Check for overlaps */ | 824 | /* Check for overlaps */ |
787 | r = -EEXIST; | 825 | r = -EEXIST; |
788 | kvm_for_each_memslot(slot, kvm->memslots) { | 826 | kvm_for_each_memslot(slot, kvm->memslots) { |
@@ -800,20 +838,12 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
800 | new.dirty_bitmap = NULL; | 838 | new.dirty_bitmap = NULL; |
801 | 839 | ||
802 | r = -ENOMEM; | 840 | r = -ENOMEM; |
803 | 841 | if (change == KVM_MR_CREATE) { | |
804 | /* | ||
805 | * Allocate if a slot is being created. If modifying a slot, | ||
806 | * the userspace_addr cannot change. | ||
807 | */ | ||
808 | if (!old.npages) { | ||
809 | new.user_alloc = user_alloc; | 842 | new.user_alloc = user_alloc; |
810 | new.userspace_addr = mem->userspace_addr; | 843 | new.userspace_addr = mem->userspace_addr; |
811 | 844 | ||
812 | if (kvm_arch_create_memslot(&new, npages)) | 845 | if (kvm_arch_create_memslot(&new, npages)) |
813 | goto out_free; | 846 | goto out_free; |
814 | } else if (npages && mem->userspace_addr != old.userspace_addr) { | ||
815 | r = -EINVAL; | ||
816 | goto out_free; | ||
817 | } | 847 | } |
818 | 848 | ||
819 | /* Allocate page dirty bitmap if needed */ | 849 | /* Allocate page dirty bitmap if needed */ |
@@ -822,7 +852,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
822 | goto out_free; | 852 | goto out_free; |
823 | } | 853 | } |
824 | 854 | ||
825 | if (!npages || base_gfn != old.base_gfn) { | 855 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { |
826 | r = -ENOMEM; | 856 | r = -ENOMEM; |
827 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), | 857 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), |
828 | GFP_KERNEL); | 858 | GFP_KERNEL); |
@@ -863,15 +893,23 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
863 | goto out_free; | 893 | goto out_free; |
864 | } | 894 | } |
865 | 895 | ||
866 | /* map new memory slot into the iommu */ | 896 | /* |
867 | if (npages) { | 897 | * IOMMU mapping: New slots need to be mapped. Old slots need to be |
898 | * un-mapped and re-mapped if their base changes. Since base change | ||
899 | * unmapping is handled above with slot deletion, mapping alone is | ||
900 | * needed here. Anything else the iommu might care about for existing | ||
901 | * slots (size changes, userspace addr changes and read-only flag | ||
902 | * changes) is disallowed above, so any other attribute changes getting | ||
903 | * here can be skipped. | ||
904 | */ | ||
905 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { | ||
868 | r = kvm_iommu_map_pages(kvm, &new); | 906 | r = kvm_iommu_map_pages(kvm, &new); |
869 | if (r) | 907 | if (r) |
870 | goto out_slots; | 908 | goto out_slots; |
871 | } | 909 | } |
872 | 910 | ||
873 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 911 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
874 | if (!npages) { | 912 | if (change == KVM_MR_DELETE) { |
875 | new.dirty_bitmap = NULL; | 913 | new.dirty_bitmap = NULL; |
876 | memset(&new.arch, 0, sizeof(new.arch)); | 914 | memset(&new.arch, 0, sizeof(new.arch)); |
877 | } | 915 | } |
@@ -1669,6 +1707,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
1669 | { | 1707 | { |
1670 | struct pid *pid; | 1708 | struct pid *pid; |
1671 | struct task_struct *task = NULL; | 1709 | struct task_struct *task = NULL; |
1710 | bool ret = false; | ||
1672 | 1711 | ||
1673 | rcu_read_lock(); | 1712 | rcu_read_lock(); |
1674 | pid = rcu_dereference(target->pid); | 1713 | pid = rcu_dereference(target->pid); |
@@ -1676,17 +1715,15 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
1676 | task = get_pid_task(target->pid, PIDTYPE_PID); | 1715 | task = get_pid_task(target->pid, PIDTYPE_PID); |
1677 | rcu_read_unlock(); | 1716 | rcu_read_unlock(); |
1678 | if (!task) | 1717 | if (!task) |
1679 | return false; | 1718 | return ret; |
1680 | if (task->flags & PF_VCPU) { | 1719 | if (task->flags & PF_VCPU) { |
1681 | put_task_struct(task); | 1720 | put_task_struct(task); |
1682 | return false; | 1721 | return ret; |
1683 | } | ||
1684 | if (yield_to(task, 1)) { | ||
1685 | put_task_struct(task); | ||
1686 | return true; | ||
1687 | } | 1722 | } |
1723 | ret = yield_to(task, 1); | ||
1688 | put_task_struct(task); | 1724 | put_task_struct(task); |
1689 | return false; | 1725 | |
1726 | return ret; | ||
1690 | } | 1727 | } |
1691 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); | 1728 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); |
1692 | 1729 | ||
@@ -1727,12 +1764,14 @@ bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | |||
1727 | return eligible; | 1764 | return eligible; |
1728 | } | 1765 | } |
1729 | #endif | 1766 | #endif |
1767 | |||
1730 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 1768 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
1731 | { | 1769 | { |
1732 | struct kvm *kvm = me->kvm; | 1770 | struct kvm *kvm = me->kvm; |
1733 | struct kvm_vcpu *vcpu; | 1771 | struct kvm_vcpu *vcpu; |
1734 | int last_boosted_vcpu = me->kvm->last_boosted_vcpu; | 1772 | int last_boosted_vcpu = me->kvm->last_boosted_vcpu; |
1735 | int yielded = 0; | 1773 | int yielded = 0; |
1774 | int try = 3; | ||
1736 | int pass; | 1775 | int pass; |
1737 | int i; | 1776 | int i; |
1738 | 1777 | ||
@@ -1744,7 +1783,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1744 | * VCPU is holding the lock that we need and will release it. | 1783 | * VCPU is holding the lock that we need and will release it. |
1745 | * We approximate round-robin by starting at the last boosted VCPU. | 1784 | * We approximate round-robin by starting at the last boosted VCPU. |
1746 | */ | 1785 | */ |
1747 | for (pass = 0; pass < 2 && !yielded; pass++) { | 1786 | for (pass = 0; pass < 2 && !yielded && try; pass++) { |
1748 | kvm_for_each_vcpu(i, vcpu, kvm) { | 1787 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1749 | if (!pass && i <= last_boosted_vcpu) { | 1788 | if (!pass && i <= last_boosted_vcpu) { |
1750 | i = last_boosted_vcpu; | 1789 | i = last_boosted_vcpu; |
@@ -1757,10 +1796,15 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1757 | continue; | 1796 | continue; |
1758 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | 1797 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) |
1759 | continue; | 1798 | continue; |
1760 | if (kvm_vcpu_yield_to(vcpu)) { | 1799 | |
1800 | yielded = kvm_vcpu_yield_to(vcpu); | ||
1801 | if (yielded > 0) { | ||
1761 | kvm->last_boosted_vcpu = i; | 1802 | kvm->last_boosted_vcpu = i; |
1762 | yielded = 1; | ||
1763 | break; | 1803 | break; |
1804 | } else if (yielded < 0) { | ||
1805 | try--; | ||
1806 | if (!try) | ||
1807 | break; | ||
1764 | } | 1808 | } |
1765 | } | 1809 | } |
1766 | } | 1810 | } |