diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 18:18:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 18:18:36 -0400 |
commit | 0756b7fbb696d2cb18785da9cab13ec164017f64 (patch) | |
tree | d06242e3f35a7623e00068d7c95d06824f396df3 | |
parent | 6d6218976df142ba5594371f8dbd56650151c56f (diff) | |
parent | 5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760 (diff) |
Merge tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"First batch of KVM changes for 4.14
Common:
- improve heuristic for boosting preempted spinlocks by ignoring
VCPUs in user mode
ARM:
- fix for decoding external abort types from guests
- added support for migrating the active priority of interrupts when
running a GICv2 guest on a GICv3 host
- minor cleanup
PPC:
- expose storage keys to userspace
- merge kvm-ppc-fixes with a fix that missed 4.13 because of
vacations
- fixes
s390:
- merge of kvm/master to avoid conflicts with additional sthyi fixes
- wire up the no-dat enhancements in KVM
- multiple epoch facility (z14 feature)
- Configuration z/Architecture Mode
- more sthyi fixes
- gdb server range checking fix
- small code cleanups
x86:
- emulate Hyper-V TSC frequency MSRs
- add nested INVPCID
- emulate EPTP switching VMFUNC
- support Virtual GIF
- support 5 level page tables
- speedup nested VM exits by packing byte operations
- speedup MMIO by using hardware provided physical address
- a lot of fixes and cleanups, especially nested"
* tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits)
KVM: arm/arm64: Support uaccess of GICC_APRn
KVM: arm/arm64: Extract GICv3 max APRn index calculation
KVM: arm/arm64: vITS: Drop its_ite->lpi field
KVM: arm/arm64: vgic: constify seq_operations and file_operations
KVM: arm/arm64: Fix guest external abort matching
KVM: PPC: Book3S HV: Fix memory leak in kvm_vm_ioctl_get_htab_fd
KVM: s390: vsie: cleanup mcck reinjection
KVM: s390: use WARN_ON_ONCE only for checking
KVM: s390: guestdbg: fix range check
KVM: PPC: Book3S HV: Report storage key support to userspace
KVM: PPC: Book3S HV: Fix case where HDEC is treated as 32-bit on POWER9
KVM: PPC: Book3S HV: Fix invalid use of register expression
KVM: PPC: Book3S HV: Fix H_REGISTER_VPA VPA size validation
KVM: PPC: Book3S HV: Fix setting of storage key in H_ENTER
KVM: PPC: e500mc: Fix a NULL dereference
KVM: PPC: e500: Fix some NULL dereferences on error
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
KVM: s390: we are always in czam mode
KVM: s390: expose no-DAT to guest and migration support
KVM: s390: sthyi: remove invalid guest write access
...
64 files changed, 1479 insertions, 768 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index b2f60ca8b60c..b3ce12643553 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt | |||
@@ -83,6 +83,11 @@ Groups: | |||
83 | 83 | ||
84 | Bits for undefined preemption levels are RAZ/WI. | 84 | Bits for undefined preemption levels are RAZ/WI. |
85 | 85 | ||
86 | Note that this differs from a CPU's view of the APRs on hardware in which | ||
87 | a GIC without the security extensions expose group 0 and group 1 active | ||
88 | priorities in separate register groups, whereas we show a combined view | ||
89 | similar to GICv2's GICH_APR. | ||
90 | |||
86 | For historical reasons and to provide ABI compatibility with userspace we | 91 | For historical reasons and to provide ABI compatibility with userspace we |
87 | export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask | 92 | export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask |
88 | field in the lower 5 bits of a word, meaning that userspace must always | 93 | field in the lower 5 bits of a word, meaning that userspace must always |
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 903fc926860b..95ca68d663a4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
@@ -176,7 +176,8 @@ Architectures: s390 | |||
176 | 176 | ||
177 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH | 177 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH |
178 | 178 | ||
179 | Allows user space to set/get the TOD clock extension (u8). | 179 | Allows user space to set/get the TOD clock extension (u8) (superseded by |
180 | KVM_S390_VM_TOD_EXT). | ||
180 | 181 | ||
181 | Parameters: address of a buffer in user space to store the data (u8) to | 182 | Parameters: address of a buffer in user space to store the data (u8) to |
182 | Returns: -EFAULT if the given address is not accessible from kernel space | 183 | Returns: -EFAULT if the given address is not accessible from kernel space |
@@ -190,6 +191,17 @@ the POP (u64). | |||
190 | Parameters: address of a buffer in user space to store the data (u64) to | 191 | Parameters: address of a buffer in user space to store the data (u64) to |
191 | Returns: -EFAULT if the given address is not accessible from kernel space | 192 | Returns: -EFAULT if the given address is not accessible from kernel space |
192 | 193 | ||
194 | 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT | ||
195 | Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||
196 | the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it | ||
197 | also allows user space to get/set it. If the guest CPU model does not support | ||
198 | it, it is stored as 0 and not allowed to be set to a value != 0. | ||
199 | |||
200 | Parameters: address of a buffer in user space to store the data | ||
201 | (kvm_s390_vm_tod_clock) to | ||
202 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
203 | -EINVAL if setting the TOD clock extension to != 0 is not supported | ||
204 | |||
193 | 4. GROUP: KVM_S390_VM_CRYPTO | 205 | 4. GROUP: KVM_S390_VM_CRYPTO |
194 | Architectures: s390 | 206 | Architectures: s390 |
195 | 207 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index bf206bd9f056..722c7aec88c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -7464,18 +7464,30 @@ L: kvm@vger.kernel.org | |||
7464 | W: http://www.linux-kvm.org | 7464 | W: http://www.linux-kvm.org |
7465 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git | 7465 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git |
7466 | S: Supported | 7466 | S: Supported |
7467 | F: Documentation/*/kvm*.txt | ||
7468 | F: Documentation/virtual/kvm/ | 7467 | F: Documentation/virtual/kvm/ |
7469 | F: arch/*/kvm/ | 7468 | F: include/trace/events/kvm.h |
7470 | F: arch/x86/kernel/kvm.c | 7469 | F: include/uapi/asm-generic/kvm* |
7471 | F: arch/x86/kernel/kvmclock.c | ||
7472 | F: arch/*/include/asm/kvm* | ||
7473 | F: include/linux/kvm* | ||
7474 | F: include/uapi/linux/kvm* | 7470 | F: include/uapi/linux/kvm* |
7475 | F: virt/kvm/ | 7471 | F: include/asm-generic/kvm* |
7472 | F: include/linux/kvm* | ||
7473 | F: include/kvm/iodev.h | ||
7474 | F: virt/kvm/* | ||
7476 | F: tools/kvm/ | 7475 | F: tools/kvm/ |
7477 | 7476 | ||
7478 | KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V | 7477 | KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) |
7478 | M: Paolo Bonzini <pbonzini@redhat.com> | ||
7479 | M: Radim KrÄmář <rkrcmar@redhat.com> | ||
7480 | L: kvm@vger.kernel.org | ||
7481 | W: http://www.linux-kvm.org | ||
7482 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git | ||
7483 | S: Supported | ||
7484 | F: arch/x86/kvm/ | ||
7485 | F: arch/x86/include/uapi/asm/kvm* | ||
7486 | F: arch/x86/include/asm/kvm* | ||
7487 | F: arch/x86/kernel/kvm.c | ||
7488 | F: arch/x86/kernel/kvmclock.c | ||
7489 | |||
7490 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) | ||
7479 | M: Joerg Roedel <joro@8bytes.org> | 7491 | M: Joerg Roedel <joro@8bytes.org> |
7480 | L: kvm@vger.kernel.org | 7492 | L: kvm@vger.kernel.org |
7481 | W: http://www.linux-kvm.org/ | 7493 | W: http://www.linux-kvm.org/ |
@@ -7483,7 +7495,7 @@ S: Maintained | |||
7483 | F: arch/x86/include/asm/svm.h | 7495 | F: arch/x86/include/asm/svm.h |
7484 | F: arch/x86/kvm/svm.c | 7496 | F: arch/x86/kvm/svm.c |
7485 | 7497 | ||
7486 | KERNEL VIRTUAL MACHINE (KVM) FOR ARM | 7498 | KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) |
7487 | M: Christoffer Dall <christoffer.dall@linaro.org> | 7499 | M: Christoffer Dall <christoffer.dall@linaro.org> |
7488 | M: Marc Zyngier <marc.zyngier@arm.com> | 7500 | M: Marc Zyngier <marc.zyngier@arm.com> |
7489 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 7501 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
@@ -7497,14 +7509,16 @@ F: arch/arm/kvm/ | |||
7497 | F: virt/kvm/arm/ | 7509 | F: virt/kvm/arm/ |
7498 | F: include/kvm/arm_* | 7510 | F: include/kvm/arm_* |
7499 | 7511 | ||
7500 | KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC | 7512 | KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) |
7501 | M: Alexander Graf <agraf@suse.com> | 7513 | M: Alexander Graf <agraf@suse.com> |
7502 | L: kvm-ppc@vger.kernel.org | 7514 | L: kvm-ppc@vger.kernel.org |
7503 | W: http://www.linux-kvm.org/ | 7515 | W: http://www.linux-kvm.org/ |
7504 | T: git git://github.com/agraf/linux-2.6.git | 7516 | T: git git://github.com/agraf/linux-2.6.git |
7505 | S: Supported | 7517 | S: Supported |
7518 | F: arch/powerpc/include/uapi/asm/kvm* | ||
7506 | F: arch/powerpc/include/asm/kvm* | 7519 | F: arch/powerpc/include/asm/kvm* |
7507 | F: arch/powerpc/kvm/ | 7520 | F: arch/powerpc/kvm/ |
7521 | F: arch/powerpc/kernel/kvm* | ||
7508 | 7522 | ||
7509 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) | 7523 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) |
7510 | M: Christoffer Dall <christoffer.dall@linaro.org> | 7524 | M: Christoffer Dall <christoffer.dall@linaro.org> |
@@ -7531,7 +7545,8 @@ L: linux-s390@vger.kernel.org | |||
7531 | W: http://www.ibm.com/developerworks/linux/linux390/ | 7545 | W: http://www.ibm.com/developerworks/linux/linux390/ |
7532 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git | 7546 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git |
7533 | S: Supported | 7547 | S: Supported |
7534 | F: Documentation/s390/kvm.txt | 7548 | F: arch/s390/include/uapi/asm/kvm* |
7549 | F: arch/s390/include/asm/gmap.h | ||
7535 | F: arch/s390/include/asm/kvm* | 7550 | F: arch/s390/include/asm/kvm* |
7536 | F: arch/s390/kvm/ | 7551 | F: arch/s390/kvm/ |
7537 | F: arch/s390/mm/gmap.c | 7552 | F: arch/s390/mm/gmap.c |
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index ebf020b02bc8..c8781450905b 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h | |||
@@ -227,7 +227,6 @@ | |||
227 | 227 | ||
228 | #define HSR_DABT_S1PTW (_AC(1, UL) << 7) | 228 | #define HSR_DABT_S1PTW (_AC(1, UL) << 7) |
229 | #define HSR_DABT_CM (_AC(1, UL) << 8) | 229 | #define HSR_DABT_CM (_AC(1, UL) << 8) |
230 | #define HSR_DABT_EA (_AC(1, UL) << 9) | ||
231 | 230 | ||
232 | #define kvm_arm_exception_type \ | 231 | #define kvm_arm_exception_type \ |
233 | {0, "RESET" }, \ | 232 | {0, "RESET" }, \ |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9a8a45aaf19a..98089ffd91bb 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) | |||
149 | return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; | 149 | return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; |
150 | } | 150 | } |
151 | 151 | ||
152 | static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | ||
153 | { | ||
154 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA; | ||
155 | } | ||
156 | |||
157 | static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) | 152 | static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) |
158 | { | 153 | { |
159 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; | 154 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; |
@@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) | |||
206 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; | 201 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; |
207 | } | 202 | } |
208 | 203 | ||
204 | static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | ||
205 | { | ||
206 | switch (kvm_vcpu_trap_get_fault_type(vcpu)) { | ||
207 | case FSC_SEA: | ||
208 | case FSC_SEA_TTW0: | ||
209 | case FSC_SEA_TTW1: | ||
210 | case FSC_SEA_TTW2: | ||
211 | case FSC_SEA_TTW3: | ||
212 | case FSC_SECC: | ||
213 | case FSC_SECC_TTW0: | ||
214 | case FSC_SECC_TTW1: | ||
215 | case FSC_SECC_TTW2: | ||
216 | case FSC_SECC_TTW3: | ||
217 | return true; | ||
218 | default: | ||
219 | return false; | ||
220 | } | ||
221 | } | ||
222 | |||
209 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | 223 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) |
210 | { | 224 | { |
211 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; | 225 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; |
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 54442e375354..cf8bf6bf87c4 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
@@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
67 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { | 67 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { |
68 | trace_kvm_wfx(*vcpu_pc(vcpu), true); | 68 | trace_kvm_wfx(*vcpu_pc(vcpu), true); |
69 | vcpu->stat.wfe_exit_stat++; | 69 | vcpu->stat.wfe_exit_stat++; |
70 | kvm_vcpu_on_spin(vcpu); | 70 | kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); |
71 | } else { | 71 | } else { |
72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); | 72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); |
73 | vcpu->stat.wfi_exit_stat++; | 73 | vcpu->stat.wfi_exit_stat++; |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fe39e6841326..e5df3fce0008 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) | |||
188 | return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; | 188 | return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; |
189 | } | 189 | } |
190 | 190 | ||
191 | static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) | ||
192 | { | ||
193 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA); | ||
194 | } | ||
195 | |||
196 | static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) | 191 | static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) |
197 | { | 192 | { |
198 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); | 193 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); |
@@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) | |||
240 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; | 235 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; |
241 | } | 236 | } |
242 | 237 | ||
238 | static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) | ||
239 | { | ||
240 | switch (kvm_vcpu_trap_get_fault_type(vcpu)) { | ||
241 | case FSC_SEA: | ||
242 | case FSC_SEA_TTW0: | ||
243 | case FSC_SEA_TTW1: | ||
244 | case FSC_SEA_TTW2: | ||
245 | case FSC_SEA_TTW3: | ||
246 | case FSC_SECC: | ||
247 | case FSC_SECC_TTW0: | ||
248 | case FSC_SECC_TTW1: | ||
249 | case FSC_SECC_TTW2: | ||
250 | case FSC_SECC_TTW3: | ||
251 | return true; | ||
252 | default: | ||
253 | return false; | ||
254 | } | ||
255 | } | ||
256 | |||
243 | static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) | 257 | static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) |
244 | { | 258 | { |
245 | u32 esr = kvm_vcpu_get_hsr(vcpu); | 259 | u32 esr = kvm_vcpu_get_hsr(vcpu); |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 17d8a1677a0b..7debb74843a0 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
@@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
84 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { | 84 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { |
85 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); | 85 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); |
86 | vcpu->stat.wfe_exit_stat++; | 86 | vcpu->stat.wfe_exit_stat++; |
87 | kvm_vcpu_on_spin(vcpu); | 87 | kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); |
88 | } else { | 88 | } else { |
89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); | 89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); |
90 | vcpu->stat.wfi_exit_stat++; | 90 | vcpu->stat.wfi_exit_stat++; |
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 116786d2e8e8..c77d508b7462 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c | |||
@@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, | |||
208 | static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | 208 | static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, |
209 | const struct sys_reg_desc *r, u8 apr) | 209 | const struct sys_reg_desc *r, u8 apr) |
210 | { | 210 | { |
211 | struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; | ||
212 | u8 idx = r->Op2 & 3; | 211 | u8 idx = r->Op2 & 3; |
213 | 212 | ||
214 | /* | 213 | if (idx > vgic_v3_max_apr_idx(vcpu)) |
215 | * num_pri_bits are initialized with HW supported values. | 214 | goto err; |
216 | * We can rely safely on num_pri_bits even if VM has not | ||
217 | * restored ICC_CTLR_EL1 before restoring APnR registers. | ||
218 | */ | ||
219 | switch (vgic_v3_cpu->num_pri_bits) { | ||
220 | case 7: | ||
221 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
222 | break; | ||
223 | case 6: | ||
224 | if (idx > 1) | ||
225 | goto err; | ||
226 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
227 | break; | ||
228 | default: | ||
229 | if (idx > 0) | ||
230 | goto err; | ||
231 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
232 | } | ||
233 | 215 | ||
216 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
234 | return true; | 217 | return true; |
235 | err: | 218 | err: |
236 | if (!p->is_write) | 219 | if (!p->is_write) |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad18eef2..bce2a6431430 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
98 | return !!(vcpu->arch.pending_exceptions); | 98 | return !!(vcpu->arch.pending_exceptions); |
99 | } | 99 | } |
100 | 100 | ||
101 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
102 | { | ||
103 | return false; | ||
104 | } | ||
105 | |||
101 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 106 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
102 | { | 107 | { |
103 | return 1; | 108 | return 1; |
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f28d21c69f79..508275bb05d5 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h | |||
@@ -104,6 +104,7 @@ | |||
104 | #define HPTE_R_C ASM_CONST(0x0000000000000080) | 104 | #define HPTE_R_C ASM_CONST(0x0000000000000080) |
105 | #define HPTE_R_R ASM_CONST(0x0000000000000100) | 105 | #define HPTE_R_R ASM_CONST(0x0000000000000100) |
106 | #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) | 106 | #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) |
107 | #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) | ||
107 | 108 | ||
108 | #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) | 109 | #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) |
109 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) | 110 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 67075e065ef2..7c62967d672c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -1941,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) | |||
1941 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; | 1941 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; |
1942 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); | 1942 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); |
1943 | if (ret < 0) { | 1943 | if (ret < 0) { |
1944 | kfree(ctx); | ||
1944 | kvm_put_kvm(kvm); | 1945 | kvm_put_kvm(kvm); |
1945 | return ret; | 1946 | return ret; |
1946 | } | 1947 | } |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 53766e2bc029..8f2da8bba737 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |||
265 | { | 265 | { |
266 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | 266 | struct kvmppc_spapr_tce_table *stt = filp->private_data; |
267 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; | 267 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; |
268 | struct kvm *kvm = stt->kvm; | ||
268 | 269 | ||
270 | mutex_lock(&kvm->lock); | ||
269 | list_del_rcu(&stt->list); | 271 | list_del_rcu(&stt->list); |
272 | mutex_unlock(&kvm->lock); | ||
270 | 273 | ||
271 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { | 274 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { |
272 | WARN_ON(!kref_read(&stit->kref)); | 275 | WARN_ON(!kref_read(&stit->kref)); |
@@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
298 | unsigned long npages, size; | 301 | unsigned long npages, size; |
299 | int ret = -ENOMEM; | 302 | int ret = -ENOMEM; |
300 | int i; | 303 | int i; |
301 | int fd = -1; | ||
302 | 304 | ||
303 | if (!args->size) | 305 | if (!args->size) |
304 | return -EINVAL; | 306 | return -EINVAL; |
@@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
328 | goto fail; | 330 | goto fail; |
329 | } | 331 | } |
330 | 332 | ||
331 | ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | ||
332 | stt, O_RDWR | O_CLOEXEC); | ||
333 | if (ret < 0) | ||
334 | goto fail; | ||
335 | |||
336 | mutex_lock(&kvm->lock); | 333 | mutex_lock(&kvm->lock); |
337 | 334 | ||
338 | /* Check this LIOBN hasn't been previously allocated */ | 335 | /* Check this LIOBN hasn't been previously allocated */ |
@@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
344 | } | 341 | } |
345 | } | 342 | } |
346 | 343 | ||
347 | if (!ret) { | 344 | if (!ret) |
345 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | ||
346 | stt, O_RDWR | O_CLOEXEC); | ||
347 | |||
348 | if (ret >= 0) { | ||
348 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); | 349 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
349 | kvm_get_kvm(kvm); | 350 | kvm_get_kvm(kvm); |
350 | } | 351 | } |
351 | 352 | ||
352 | mutex_unlock(&kvm->lock); | 353 | mutex_unlock(&kvm->lock); |
353 | 354 | ||
354 | if (!ret) | 355 | if (ret >= 0) |
355 | return fd; | 356 | return ret; |
356 | |||
357 | put_unused_fd(fd); | ||
358 | 357 | ||
359 | fail: | 358 | fail: |
360 | for (i = 0; i < npages; i++) | 359 | for (i = 0; i < npages; i++) |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebcf97cb5c98..18e974a34fce 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
485 | 485 | ||
486 | switch (subfunc) { | 486 | switch (subfunc) { |
487 | case H_VPA_REG_VPA: /* register VPA */ | 487 | case H_VPA_REG_VPA: /* register VPA */ |
488 | if (len < sizeof(struct lppaca)) | 488 | /* |
489 | * The size of our lppaca is 1kB because of the way we align | ||
490 | * it for the guest to avoid crossing a 4kB boundary. We only | ||
491 | * use 640 bytes of the structure though, so we should accept | ||
492 | * clients that set a size of 640. | ||
493 | */ | ||
494 | if (len < 640) | ||
489 | break; | 495 | break; |
490 | vpap = &tvcpu->arch.vpa; | 496 | vpap = &tvcpu->arch.vpa; |
491 | err = 0; | 497 | err = 0; |
@@ -3336,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, | |||
3336 | if (radix_enabled()) | 3342 | if (radix_enabled()) |
3337 | return -EINVAL; | 3343 | return -EINVAL; |
3338 | 3344 | ||
3345 | /* | ||
3346 | * POWER7, POWER8 and POWER9 all support 32 storage keys for data. | ||
3347 | * POWER7 doesn't support keys for instruction accesses, | ||
3348 | * POWER8 and POWER9 do. | ||
3349 | */ | ||
3350 | info->data_keys = 32; | ||
3351 | info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; | ||
3352 | |||
3339 | info->flags = KVM_PPC_PAGE_SIZES_REAL; | 3353 | info->flags = KVM_PPC_PAGE_SIZES_REAL; |
3340 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) | 3354 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) |
3341 | info->flags |= KVM_PPC_1T_SEGMENTS; | 3355 | info->flags |= KVM_PPC_1T_SEGMENTS; |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fedb0139524c..4efe364f1188 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
269 | if (!realmode) | 269 | if (!realmode) |
270 | local_irq_restore(irq_flags); | 270 | local_irq_restore(irq_flags); |
271 | 271 | ||
272 | ptel &= ~(HPTE_R_PP0 - psize); | 272 | ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); |
273 | ptel |= pa; | 273 | ptel |= pa; |
274 | 274 | ||
275 | if (pa) | 275 | if (pa) |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2259b6cde119..663a4a861e7f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -982,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
982 | #ifdef CONFIG_KVM_XICS | 982 | #ifdef CONFIG_KVM_XICS |
983 | /* We are entering the guest on that thread, push VCPU to XIVE */ | 983 | /* We are entering the guest on that thread, push VCPU to XIVE */ |
984 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | 984 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) |
985 | cmpldi cr0, r10, r0 | 985 | cmpldi cr0, r10, 0 |
986 | beq no_xive | 986 | beq no_xive |
987 | ld r11, VCPU_XIVE_SAVED_STATE(r4) | 987 | ld r11, VCPU_XIVE_SAVED_STATE(r4) |
988 | li r9, TM_QW1_OS | 988 | li r9, TM_QW1_OS |
@@ -1286,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
1286 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER | 1286 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER |
1287 | bne 2f | 1287 | bne 2f |
1288 | mfspr r3,SPRN_HDEC | 1288 | mfspr r3,SPRN_HDEC |
1289 | cmpwi r3,0 | 1289 | EXTEND_HDEC(r3) |
1290 | cmpdi r3,0 | ||
1290 | mr r4,r9 | 1291 | mr r4,r9 |
1291 | bge fast_guest_return | 1292 | bge fast_guest_return |
1292 | 2: | 1293 | 2: |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 32fdab57d604..f9f6468f4171 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, | |||
455 | if (err) | 455 | if (err) |
456 | goto free_vcpu; | 456 | goto free_vcpu; |
457 | 457 | ||
458 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) | 458 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { |
459 | err = -ENOMEM; | ||
459 | goto uninit_vcpu; | 460 | goto uninit_vcpu; |
461 | } | ||
460 | 462 | ||
461 | err = kvmppc_e500_tlb_init(vcpu_e500); | 463 | err = kvmppc_e500_tlb_init(vcpu_e500); |
462 | if (err) | 464 | if (err) |
463 | goto uninit_id; | 465 | goto uninit_id; |
464 | 466 | ||
465 | vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); | 467 | vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); |
466 | if (!vcpu->arch.shared) | 468 | if (!vcpu->arch.shared) { |
469 | err = -ENOMEM; | ||
467 | goto uninit_tlb; | 470 | goto uninit_tlb; |
471 | } | ||
468 | 472 | ||
469 | return vcpu; | 473 | return vcpu; |
470 | 474 | ||
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index f48a0c22e8f9..d0b6b5788afc 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c | |||
@@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, | |||
331 | goto uninit_vcpu; | 331 | goto uninit_vcpu; |
332 | 332 | ||
333 | vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 333 | vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
334 | if (!vcpu->arch.shared) | 334 | if (!vcpu->arch.shared) { |
335 | err = -ENOMEM; | ||
335 | goto uninit_tlb; | 336 | goto uninit_tlb; |
337 | } | ||
336 | 338 | ||
337 | return vcpu; | 339 | return vcpu; |
338 | 340 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b5f4ca..3480faaf1ef8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | |||
58 | return !!(v->arch.pending_exceptions) || kvm_request_pending(v); | 58 | return !!(v->arch.pending_exceptions) || kvm_request_pending(v); |
59 | } | 59 | } |
60 | 60 | ||
61 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
62 | { | ||
63 | return false; | ||
64 | } | ||
65 | |||
61 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 66 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
62 | { | 67 | { |
63 | return 1; | 68 | return 1; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a409d5991934..51375e766e90 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -226,7 +226,9 @@ struct kvm_s390_sie_block { | |||
226 | #define ECB3_RI 0x01 | 226 | #define ECB3_RI 0x01 |
227 | __u8 ecb3; /* 0x0063 */ | 227 | __u8 ecb3; /* 0x0063 */ |
228 | __u32 scaol; /* 0x0064 */ | 228 | __u32 scaol; /* 0x0064 */ |
229 | __u8 reserved68[4]; /* 0x0068 */ | 229 | __u8 reserved68; /* 0x0068 */ |
230 | __u8 epdx; /* 0x0069 */ | ||
231 | __u8 reserved6a[2]; /* 0x006a */ | ||
230 | __u32 todpr; /* 0x006c */ | 232 | __u32 todpr; /* 0x006c */ |
231 | __u8 reserved70[16]; /* 0x0070 */ | 233 | __u8 reserved70[16]; /* 0x0070 */ |
232 | __u64 mso; /* 0x0080 */ | 234 | __u64 mso; /* 0x0080 */ |
@@ -265,6 +267,7 @@ struct kvm_s390_sie_block { | |||
265 | __u64 cbrlo; /* 0x01b8 */ | 267 | __u64 cbrlo; /* 0x01b8 */ |
266 | __u8 reserved1c0[8]; /* 0x01c0 */ | 268 | __u8 reserved1c0[8]; /* 0x01c0 */ |
267 | #define ECD_HOSTREGMGMT 0x20000000 | 269 | #define ECD_HOSTREGMGMT 0x20000000 |
270 | #define ECD_MEF 0x08000000 | ||
268 | __u32 ecd; /* 0x01c8 */ | 271 | __u32 ecd; /* 0x01c8 */ |
269 | __u8 reserved1cc[18]; /* 0x01cc */ | 272 | __u8 reserved1cc[18]; /* 0x01cc */ |
270 | __u64 pp; /* 0x01de */ | 273 | __u64 pp; /* 0x01de */ |
@@ -739,6 +742,7 @@ struct kvm_arch{ | |||
739 | struct kvm_s390_cpu_model model; | 742 | struct kvm_s390_cpu_model model; |
740 | struct kvm_s390_crypto crypto; | 743 | struct kvm_s390_crypto crypto; |
741 | struct kvm_s390_vsie vsie; | 744 | struct kvm_s390_vsie vsie; |
745 | u8 epdx; | ||
742 | u64 epoch; | 746 | u64 epoch; |
743 | struct kvm_s390_migration_state *migration_state; | 747 | struct kvm_s390_migration_state *migration_state; |
744 | /* subset of available cpu features enabled by user space */ | 748 | /* subset of available cpu features enabled by user space */ |
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index ca21b28a7b17..22b0f49e87c1 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h | |||
@@ -15,6 +15,6 @@ | |||
15 | #define ESSA_SET_STABLE_IF_RESIDENT 6 | 15 | #define ESSA_SET_STABLE_IF_RESIDENT 6 |
16 | #define ESSA_SET_STABLE_NODAT 7 | 16 | #define ESSA_SET_STABLE_NODAT 7 |
17 | 17 | ||
18 | #define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT | 18 | #define ESSA_MAX ESSA_SET_STABLE_NODAT |
19 | 19 | ||
20 | #endif | 20 | #endif |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 69d09c39bbcd..cd7359e23d86 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { | |||
88 | /* kvm attributes for KVM_S390_VM_TOD */ | 88 | /* kvm attributes for KVM_S390_VM_TOD */ |
89 | #define KVM_S390_VM_TOD_LOW 0 | 89 | #define KVM_S390_VM_TOD_LOW 0 |
90 | #define KVM_S390_VM_TOD_HIGH 1 | 90 | #define KVM_S390_VM_TOD_HIGH 1 |
91 | #define KVM_S390_VM_TOD_EXT 2 | ||
92 | |||
93 | struct kvm_s390_vm_tod_clock { | ||
94 | __u8 epoch_idx; | ||
95 | __u64 tod; | ||
96 | }; | ||
91 | 97 | ||
92 | /* kvm attributes for KVM_S390_VM_CPU_MODEL */ | 98 | /* kvm attributes for KVM_S390_VM_CPU_MODEL */ |
93 | /* processor related attributes are r/w */ | 99 | /* processor related attributes are r/w */ |
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index e4d36094aceb..d93a2c0474bf 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) | |||
150 | { | 150 | { |
151 | VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); | 151 | VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); |
152 | vcpu->stat.diagnose_44++; | 152 | vcpu->stat.diagnose_44++; |
153 | kvm_vcpu_on_spin(vcpu); | 153 | kvm_vcpu_on_spin(vcpu, true); |
154 | return 0; | 154 | return 0; |
155 | } | 155 | } |
156 | 156 | ||
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index c2e0ddc1356e..bcbd86621d01 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c | |||
@@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b) | |||
308 | return (addr >= a) && (addr <= b); | 308 | return (addr >= a) && (addr <= b); |
309 | else | 309 | else |
310 | /* "overflowing" interval */ | 310 | /* "overflowing" interval */ |
311 | return (addr <= a) && (addr >= b); | 311 | return (addr >= a) || (addr <= b); |
312 | } | 312 | } |
313 | 313 | ||
314 | #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) | 314 | #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a619ddae610d..a832ad031cee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | |||
2479 | struct kvm_s390_mchk_info *mchk; | 2479 | struct kvm_s390_mchk_info *mchk; |
2480 | union mci mci; | 2480 | union mci mci; |
2481 | __u64 cr14 = 0; /* upper bits are not used */ | 2481 | __u64 cr14 = 0; /* upper bits are not used */ |
2482 | int rc; | ||
2482 | 2483 | ||
2483 | mci.val = mcck_info->mcic; | 2484 | mci.val = mcck_info->mcic; |
2484 | if (mci.sr) | 2485 | if (mci.sr) |
@@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | |||
2496 | if (mci.ck) { | 2497 | if (mci.ck) { |
2497 | /* Inject the floating machine check */ | 2498 | /* Inject the floating machine check */ |
2498 | inti.type = KVM_S390_MCHK; | 2499 | inti.type = KVM_S390_MCHK; |
2499 | WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); | 2500 | rc = __inject_vm(vcpu->kvm, &inti); |
2500 | } else { | 2501 | } else { |
2501 | /* Inject the machine check to specified vcpu */ | 2502 | /* Inject the machine check to specified vcpu */ |
2502 | irq.type = KVM_S390_MCHK; | 2503 | irq.type = KVM_S390_MCHK; |
2503 | WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); | 2504 | rc = kvm_s390_inject_vcpu(vcpu, &irq); |
2504 | } | 2505 | } |
2506 | WARN_ON_ONCE(rc); | ||
2505 | } | 2507 | } |
2506 | 2508 | ||
2507 | int kvm_set_routing_entry(struct kvm *kvm, | 2509 | int kvm_set_routing_entry(struct kvm *kvm, |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af09d3437631..40d0a1a97889 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
130 | { NULL } | 130 | { NULL } |
131 | }; | 131 | }; |
132 | 132 | ||
133 | struct kvm_s390_tod_clock_ext { | ||
134 | __u8 epoch_idx; | ||
135 | __u64 tod; | ||
136 | __u8 reserved[7]; | ||
137 | } __packed; | ||
138 | |||
133 | /* allow nested virtualization in KVM (if enabled by user space) */ | 139 | /* allow nested virtualization in KVM (if enabled by user space) */ |
134 | static int nested; | 140 | static int nested; |
135 | module_param(nested, int, S_IRUGO); | 141 | module_param(nested, int, S_IRUGO); |
@@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, | |||
874 | return 0; | 880 | return 0; |
875 | } | 881 | } |
876 | 882 | ||
883 | static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) | ||
884 | { | ||
885 | struct kvm_s390_vm_tod_clock gtod; | ||
886 | |||
887 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | ||
888 | return -EFAULT; | ||
889 | |||
890 | if (test_kvm_facility(kvm, 139)) | ||
891 | kvm_s390_set_tod_clock_ext(kvm, >od); | ||
892 | else if (gtod.epoch_idx == 0) | ||
893 | kvm_s390_set_tod_clock(kvm, gtod.tod); | ||
894 | else | ||
895 | return -EINVAL; | ||
896 | |||
897 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", | ||
898 | gtod.epoch_idx, gtod.tod); | ||
899 | |||
900 | return 0; | ||
901 | } | ||
902 | |||
877 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | 903 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) |
878 | { | 904 | { |
879 | u8 gtod_high; | 905 | u8 gtod_high; |
@@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
909 | return -EINVAL; | 935 | return -EINVAL; |
910 | 936 | ||
911 | switch (attr->attr) { | 937 | switch (attr->attr) { |
938 | case KVM_S390_VM_TOD_EXT: | ||
939 | ret = kvm_s390_set_tod_ext(kvm, attr); | ||
940 | break; | ||
912 | case KVM_S390_VM_TOD_HIGH: | 941 | case KVM_S390_VM_TOD_HIGH: |
913 | ret = kvm_s390_set_tod_high(kvm, attr); | 942 | ret = kvm_s390_set_tod_high(kvm, attr); |
914 | break; | 943 | break; |
@@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
922 | return ret; | 951 | return ret; |
923 | } | 952 | } |
924 | 953 | ||
954 | static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, | ||
955 | struct kvm_s390_vm_tod_clock *gtod) | ||
956 | { | ||
957 | struct kvm_s390_tod_clock_ext htod; | ||
958 | |||
959 | preempt_disable(); | ||
960 | |||
961 | get_tod_clock_ext((char *)&htod); | ||
962 | |||
963 | gtod->tod = htod.tod + kvm->arch.epoch; | ||
964 | gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; | ||
965 | |||
966 | if (gtod->tod < htod.tod) | ||
967 | gtod->epoch_idx += 1; | ||
968 | |||
969 | preempt_enable(); | ||
970 | } | ||
971 | |||
972 | static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) | ||
973 | { | ||
974 | struct kvm_s390_vm_tod_clock gtod; | ||
975 | |||
976 | memset(>od, 0, sizeof(gtod)); | ||
977 | |||
978 | if (test_kvm_facility(kvm, 139)) | ||
979 | kvm_s390_get_tod_clock_ext(kvm, >od); | ||
980 | else | ||
981 | gtod.tod = kvm_s390_get_tod_clock_fast(kvm); | ||
982 | |||
983 | if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) | ||
984 | return -EFAULT; | ||
985 | |||
986 | VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", | ||
987 | gtod.epoch_idx, gtod.tod); | ||
988 | return 0; | ||
989 | } | ||
990 | |||
925 | static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | 991 | static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) |
926 | { | 992 | { |
927 | u8 gtod_high = 0; | 993 | u8 gtod_high = 0; |
@@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
954 | return -EINVAL; | 1020 | return -EINVAL; |
955 | 1021 | ||
956 | switch (attr->attr) { | 1022 | switch (attr->attr) { |
1023 | case KVM_S390_VM_TOD_EXT: | ||
1024 | ret = kvm_s390_get_tod_ext(kvm, attr); | ||
1025 | break; | ||
957 | case KVM_S390_VM_TOD_HIGH: | 1026 | case KVM_S390_VM_TOD_HIGH: |
958 | ret = kvm_s390_get_tod_high(kvm, attr); | 1027 | ret = kvm_s390_get_tod_high(kvm, attr); |
959 | break; | 1028 | break; |
@@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, | |||
1505 | if (r < 0) | 1574 | if (r < 0) |
1506 | pgstev = 0; | 1575 | pgstev = 0; |
1507 | /* save the value */ | 1576 | /* save the value */ |
1508 | res[i++] = (pgstev >> 24) & 0x3; | 1577 | res[i++] = (pgstev >> 24) & 0x43; |
1509 | /* | 1578 | /* |
1510 | * if the next bit is too far away, stop. | 1579 | * if the next bit is too far away, stop. |
1511 | * if we reached the previous "next", find the next one | 1580 | * if we reached the previous "next", find the next one |
@@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, | |||
1583 | 1652 | ||
1584 | pgstev = bits[i]; | 1653 | pgstev = bits[i]; |
1585 | pgstev = pgstev << 24; | 1654 | pgstev = pgstev << 24; |
1586 | mask &= _PGSTE_GPS_USAGE_MASK; | 1655 | mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; |
1587 | set_pgste_bits(kvm->mm, hva, mask, pgstev); | 1656 | set_pgste_bits(kvm->mm, hva, mask, pgstev); |
1588 | } | 1657 | } |
1589 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1658 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
@@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
1858 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, | 1927 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, |
1859 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 1928 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
1860 | 1929 | ||
1930 | /* we are always in czam mode - even on pre z14 machines */ | ||
1931 | set_kvm_facility(kvm->arch.model.fac_mask, 138); | ||
1932 | set_kvm_facility(kvm->arch.model.fac_list, 138); | ||
1933 | /* we emulate STHYI in kvm */ | ||
1861 | set_kvm_facility(kvm->arch.model.fac_mask, 74); | 1934 | set_kvm_facility(kvm->arch.model.fac_mask, 74); |
1862 | set_kvm_facility(kvm->arch.model.fac_list, 74); | 1935 | set_kvm_facility(kvm->arch.model.fac_list, 74); |
1936 | if (MACHINE_HAS_TLB_GUEST) { | ||
1937 | set_kvm_facility(kvm->arch.model.fac_mask, 147); | ||
1938 | set_kvm_facility(kvm->arch.model.fac_list, 147); | ||
1939 | } | ||
1863 | 1940 | ||
1864 | kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); | 1941 | kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); |
1865 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; | 1942 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; |
@@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
2369 | vcpu->arch.sie_block->eca |= ECA_VX; | 2446 | vcpu->arch.sie_block->eca |= ECA_VX; |
2370 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; | 2447 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; |
2371 | } | 2448 | } |
2449 | if (test_kvm_facility(vcpu->kvm, 139)) | ||
2450 | vcpu->arch.sie_block->ecd |= ECD_MEF; | ||
2451 | |||
2372 | vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | 2452 | vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) |
2373 | | SDNXC; | 2453 | | SDNXC; |
2374 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; | 2454 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; |
@@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
2447 | return kvm_s390_vcpu_has_irq(vcpu, 0); | 2527 | return kvm_s390_vcpu_has_irq(vcpu, 0); |
2448 | } | 2528 | } |
2449 | 2529 | ||
2530 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
2531 | { | ||
2532 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); | ||
2533 | } | ||
2534 | |||
2450 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) | 2535 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) |
2451 | { | 2536 | { |
2452 | atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | 2537 | atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); |
@@ -2855,6 +2940,35 @@ retry: | |||
2855 | return 0; | 2940 | return 0; |
2856 | } | 2941 | } |
2857 | 2942 | ||
2943 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | ||
2944 | const struct kvm_s390_vm_tod_clock *gtod) | ||
2945 | { | ||
2946 | struct kvm_vcpu *vcpu; | ||
2947 | struct kvm_s390_tod_clock_ext htod; | ||
2948 | int i; | ||
2949 | |||
2950 | mutex_lock(&kvm->lock); | ||
2951 | preempt_disable(); | ||
2952 | |||
2953 | get_tod_clock_ext((char *)&htod); | ||
2954 | |||
2955 | kvm->arch.epoch = gtod->tod - htod.tod; | ||
2956 | kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; | ||
2957 | |||
2958 | if (kvm->arch.epoch > gtod->tod) | ||
2959 | kvm->arch.epdx -= 1; | ||
2960 | |||
2961 | kvm_s390_vcpu_block_all(kvm); | ||
2962 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
2963 | vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
2964 | vcpu->arch.sie_block->epdx = kvm->arch.epdx; | ||
2965 | } | ||
2966 | |||
2967 | kvm_s390_vcpu_unblock_all(kvm); | ||
2968 | preempt_enable(); | ||
2969 | mutex_unlock(&kvm->lock); | ||
2970 | } | ||
2971 | |||
2858 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) | 2972 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) |
2859 | { | 2973 | { |
2860 | struct kvm_vcpu *vcpu; | 2974 | struct kvm_vcpu *vcpu; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6fedc8bc7a37..9f8fdd7b2311 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); | |||
272 | int handle_sthyi(struct kvm_vcpu *vcpu); | 272 | int handle_sthyi(struct kvm_vcpu *vcpu); |
273 | 273 | ||
274 | /* implemented in kvm-s390.c */ | 274 | /* implemented in kvm-s390.c */ |
275 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | ||
276 | const struct kvm_s390_vm_tod_clock *gtod); | ||
275 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); | 277 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); |
276 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); | 278 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); |
277 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); | 279 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 785ad028bde6..c954ac49eee4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) | |||
988 | if (pgstev & _PGSTE_GPS_ZERO) | 988 | if (pgstev & _PGSTE_GPS_ZERO) |
989 | res |= 1; | 989 | res |= 1; |
990 | } | 990 | } |
991 | if (pgstev & _PGSTE_GPS_NODAT) | ||
992 | res |= 0x20; | ||
991 | vcpu->run->s.regs.gprs[r1] = res; | 993 | vcpu->run->s.regs.gprs[r1] = res; |
992 | /* | 994 | /* |
993 | * It is possible that all the normal 511 slots were full, in which case | 995 | * It is possible that all the normal 511 slots were full, in which case |
@@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
1027 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 1029 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
1028 | /* Check for invalid operation request code */ | 1030 | /* Check for invalid operation request code */ |
1029 | orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | 1031 | orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; |
1030 | if (orc > ESSA_MAX) | 1032 | /* ORCs 0-6 are always valid */ |
1033 | if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT | ||
1034 | : ESSA_SET_STABLE_IF_RESIDENT)) | ||
1031 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 1035 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
1032 | 1036 | ||
1033 | if (likely(!vcpu->kvm->arch.migration_state)) { | 1037 | if (likely(!vcpu->kvm->arch.migration_state)) { |
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1a252f537081..9d592ef4104b 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
@@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, | |||
155 | return rc; | 155 | return rc; |
156 | } | 156 | } |
157 | 157 | ||
158 | static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) | 158 | static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, |
159 | u64 *status_reg) | ||
159 | { | 160 | { |
160 | int rc; | ||
161 | unsigned int i; | 161 | unsigned int i; |
162 | struct kvm_vcpu *v; | 162 | struct kvm_vcpu *v; |
163 | bool all_stopped = true; | ||
163 | 164 | ||
164 | switch (parameter & 0xff) { | 165 | kvm_for_each_vcpu(i, v, vcpu->kvm) { |
165 | case 0: | 166 | if (v == vcpu) |
166 | rc = SIGP_CC_NOT_OPERATIONAL; | 167 | continue; |
167 | break; | 168 | if (!is_vcpu_stopped(v)) |
168 | case 1: | 169 | all_stopped = false; |
169 | case 2: | ||
170 | kvm_for_each_vcpu(i, v, vcpu->kvm) { | ||
171 | v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; | ||
172 | kvm_clear_async_pf_completion_queue(v); | ||
173 | } | ||
174 | |||
175 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | ||
176 | break; | ||
177 | default: | ||
178 | rc = -EOPNOTSUPP; | ||
179 | } | 170 | } |
180 | return rc; | 171 | |
172 | *status_reg &= 0xffffffff00000000UL; | ||
173 | |||
174 | /* Reject set arch order, with czam we're always in z/Arch mode. */ | ||
175 | *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : | ||
176 | SIGP_STATUS_INCORRECT_STATE); | ||
177 | return SIGP_CC_STATUS_STORED; | ||
181 | } | 178 | } |
182 | 179 | ||
183 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, | 180 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, |
@@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
446 | switch (order_code) { | 443 | switch (order_code) { |
447 | case SIGP_SET_ARCHITECTURE: | 444 | case SIGP_SET_ARCHITECTURE: |
448 | vcpu->stat.instruction_sigp_arch++; | 445 | vcpu->stat.instruction_sigp_arch++; |
449 | rc = __sigp_set_arch(vcpu, parameter); | 446 | rc = __sigp_set_arch(vcpu, parameter, |
447 | &vcpu->run->s.regs.gprs[r1]); | ||
450 | break; | 448 | break; |
451 | default: | 449 | default: |
452 | rc = handle_sigp_dst(vcpu, order_code, cpu_addr, | 450 | rc = handle_sigp_dst(vcpu, order_code, cpu_addr, |
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index a2e5c24f47a7..395926b8c1ed 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c | |||
@@ -436,14 +436,6 @@ int handle_sthyi(struct kvm_vcpu *vcpu) | |||
436 | if (addr & ~PAGE_MASK) | 436 | if (addr & ~PAGE_MASK) |
437 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 437 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
438 | 438 | ||
439 | /* | ||
440 | * If the page has not yet been faulted in, we want to do that | ||
441 | * now and not after all the expensive calculations. | ||
442 | */ | ||
443 | r = write_guest(vcpu, addr, reg2, &cc, 1); | ||
444 | if (r) | ||
445 | return kvm_s390_inject_prog_cond(vcpu, r); | ||
446 | |||
447 | sctns = (void *)get_zeroed_page(GFP_KERNEL); | 439 | sctns = (void *)get_zeroed_page(GFP_KERNEL); |
448 | if (!sctns) | 440 | if (!sctns) |
449 | return -ENOMEM; | 441 | return -ENOMEM; |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ba8203e4d516..b18b5652e5c5 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
349 | scb_s->eca |= scb_o->eca & ECA_IB; | 349 | scb_s->eca |= scb_o->eca & ECA_IB; |
350 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) | 350 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) |
351 | scb_s->eca |= scb_o->eca & ECA_CEI; | 351 | scb_s->eca |= scb_o->eca & ECA_CEI; |
352 | /* Epoch Extension */ | ||
353 | if (test_kvm_facility(vcpu->kvm, 139)) | ||
354 | scb_s->ecd |= scb_o->ecd & ECD_MEF; | ||
352 | 355 | ||
353 | prepare_ibc(vcpu, vsie_page); | 356 | prepare_ibc(vcpu, vsie_page); |
354 | rc = shadow_crycb(vcpu, vsie_page); | 357 | rc = shadow_crycb(vcpu, vsie_page); |
@@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
806 | { | 809 | { |
807 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 810 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; |
808 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 811 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; |
809 | struct mcck_volatile_info *mcck_info; | ||
810 | struct sie_page *sie_page; | ||
811 | int rc; | 812 | int rc; |
812 | 813 | ||
813 | handle_last_fault(vcpu, vsie_page); | 814 | handle_last_fault(vcpu, vsie_page); |
@@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
831 | 832 | ||
832 | if (rc == -EINTR) { | 833 | if (rc == -EINTR) { |
833 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); | 834 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); |
834 | sie_page = container_of(scb_s, struct sie_page, sie_block); | 835 | kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); |
835 | mcck_info = &sie_page->mcck_info; | ||
836 | kvm_s390_reinject_machine_check(vcpu, mcck_info); | ||
837 | return 0; | 836 | return 0; |
838 | } | 837 | } |
839 | 838 | ||
@@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, | |||
919 | */ | 918 | */ |
920 | preempt_disable(); | 919 | preempt_disable(); |
921 | scb_s->epoch += vcpu->kvm->arch.epoch; | 920 | scb_s->epoch += vcpu->kvm->arch.epoch; |
921 | |||
922 | if (scb_s->ecd & ECD_MEF) { | ||
923 | scb_s->epdx += vcpu->kvm->arch.epdx; | ||
924 | if (scb_s->epoch < vcpu->kvm->arch.epoch) | ||
925 | scb_s->epdx += 1; | ||
926 | } | ||
927 | |||
922 | preempt_enable(); | 928 | preempt_enable(); |
923 | } | 929 | } |
924 | 930 | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4198a71b8fdd..ae677f814bc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
@@ -919,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, | |||
919 | case ESSA_GET_STATE: | 919 | case ESSA_GET_STATE: |
920 | break; | 920 | break; |
921 | case ESSA_SET_STABLE: | 921 | case ESSA_SET_STABLE: |
922 | pgstev &= ~_PGSTE_GPS_USAGE_MASK; | 922 | pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); |
923 | pgstev |= _PGSTE_GPS_USAGE_STABLE; | 923 | pgstev |= _PGSTE_GPS_USAGE_STABLE; |
924 | break; | 924 | break; |
925 | case ESSA_SET_UNUSED: | 925 | case ESSA_SET_UNUSED: |
@@ -965,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, | |||
965 | pgstev |= _PGSTE_GPS_USAGE_STABLE; | 965 | pgstev |= _PGSTE_GPS_USAGE_STABLE; |
966 | } | 966 | } |
967 | break; | 967 | break; |
968 | case ESSA_SET_STABLE_NODAT: | ||
969 | pgstev &= ~_PGSTE_GPS_USAGE_MASK; | ||
970 | pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; | ||
971 | break; | ||
968 | default: | 972 | default: |
969 | /* we should never get here! */ | 973 | /* we should never get here! */ |
970 | break; | 974 | break; |
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 29d72bf8ed2b..70dd8f17d054 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c | |||
@@ -83,6 +83,7 @@ static struct facility_def facility_defs[] = { | |||
83 | 78, /* enhanced-DAT 2 */ | 83 | 78, /* enhanced-DAT 2 */ |
84 | 130, /* instruction-execution-protection */ | 84 | 130, /* instruction-execution-protection */ |
85 | 131, /* enhanced-SOP 2 and side-effect */ | 85 | 131, /* enhanced-SOP 2 and side-effect */ |
86 | 139, /* multiple epoch facility */ | ||
86 | 146, /* msa extension 8 */ | 87 | 146, /* msa extension 8 */ |
87 | -1 /* END */ | 88 | -1 /* END */ |
88 | } | 89 | } |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 42bbbf0f173d..2519c6c801c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
@@ -288,6 +288,7 @@ | |||
288 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | 288 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ |
289 | #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ | 289 | #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ |
290 | #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ | 290 | #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ |
291 | #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ | ||
291 | 292 | ||
292 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ | 293 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ |
293 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ | 294 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -219,8 +219,8 @@ struct x86_emulate_ops { | |||
219 | struct x86_instruction_info *info, | 219 | struct x86_instruction_info *info, |
220 | enum x86_intercept_stage stage); | 220 | enum x86_intercept_stage stage); |
221 | 221 | ||
222 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, | 222 | bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, |
223 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 223 | u32 *ecx, u32 *edx, bool check_limit); |
224 | void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); | 224 | void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); |
225 | 225 | ||
226 | unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); | 226 | unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 369e41c23f07..8844eee290b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -79,15 +79,14 @@ | |||
79 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | 79 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
80 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | 80 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) |
81 | 81 | ||
82 | #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL | ||
83 | #define CR3_PCID_INVD BIT_64(63) | 82 | #define CR3_PCID_INVD BIT_64(63) |
84 | #define CR4_RESERVED_BITS \ | 83 | #define CR4_RESERVED_BITS \ |
85 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 84 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
86 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 85 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
87 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | 86 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
88 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | 87 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
89 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ | 88 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ |
90 | | X86_CR4_PKE)) | 89 | | X86_CR4_SMAP | X86_CR4_PKE)) |
91 | 90 | ||
92 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 91 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
93 | 92 | ||
@@ -204,7 +203,6 @@ enum { | |||
204 | #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) | 203 | #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) |
205 | 204 | ||
206 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ | 205 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ |
207 | PFERR_USER_MASK | \ | ||
208 | PFERR_WRITE_MASK | \ | 206 | PFERR_WRITE_MASK | \ |
209 | PFERR_PRESENT_MASK) | 207 | PFERR_PRESENT_MASK) |
210 | 208 | ||
@@ -317,15 +315,17 @@ struct kvm_pio_request { | |||
317 | int size; | 315 | int size; |
318 | }; | 316 | }; |
319 | 317 | ||
318 | #define PT64_ROOT_MAX_LEVEL 5 | ||
319 | |||
320 | struct rsvd_bits_validate { | 320 | struct rsvd_bits_validate { |
321 | u64 rsvd_bits_mask[2][4]; | 321 | u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; |
322 | u64 bad_mt_xwr; | 322 | u64 bad_mt_xwr; |
323 | }; | 323 | }; |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level | 326 | * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, |
327 | * 32-bit). The kvm_mmu structure abstracts the details of the current mmu | 327 | * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the |
328 | * mode. | 328 | * current mmu mode. |
329 | */ | 329 | */ |
330 | struct kvm_mmu { | 330 | struct kvm_mmu { |
331 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 331 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
@@ -548,8 +548,8 @@ struct kvm_vcpu_arch { | |||
548 | 548 | ||
549 | struct kvm_queued_exception { | 549 | struct kvm_queued_exception { |
550 | bool pending; | 550 | bool pending; |
551 | bool injected; | ||
551 | bool has_error_code; | 552 | bool has_error_code; |
552 | bool reinject; | ||
553 | u8 nr; | 553 | u8 nr; |
554 | u32 error_code; | 554 | u32 error_code; |
555 | u8 nested_apf; | 555 | u8 nested_apf; |
@@ -687,8 +687,12 @@ struct kvm_vcpu_arch { | |||
687 | int pending_ioapic_eoi; | 687 | int pending_ioapic_eoi; |
688 | int pending_external_vector; | 688 | int pending_external_vector; |
689 | 689 | ||
690 | /* GPA available (AMD only) */ | 690 | /* GPA available */ |
691 | bool gpa_available; | 691 | bool gpa_available; |
692 | gpa_t gpa_val; | ||
693 | |||
694 | /* be preempted when it's in kernel-mode(cpl=0) */ | ||
695 | bool preempted_in_kernel; | ||
692 | }; | 696 | }; |
693 | 697 | ||
694 | struct kvm_lpage_info { | 698 | struct kvm_lpage_info { |
@@ -979,7 +983,7 @@ struct kvm_x86_ops { | |||
979 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 983 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
980 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 984 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
981 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 985 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
982 | int (*get_tdp_level)(void); | 986 | int (*get_tdp_level)(struct kvm_vcpu *vcpu); |
983 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 987 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
984 | int (*get_lpage_level)(void); | 988 | int (*get_lpage_level)(void); |
985 | bool (*rdtscp_supported)(void); | 989 | bool (*rdtscp_supported)(void); |
@@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
1297 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 1301 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
1298 | } | 1302 | } |
1299 | 1303 | ||
1300 | static inline u64 get_canonical(u64 la) | ||
1301 | { | ||
1302 | return ((int64_t)la << 16) >> 16; | ||
1303 | } | ||
1304 | |||
1305 | static inline bool is_noncanonical_address(u64 la) | ||
1306 | { | ||
1307 | #ifdef CONFIG_X86_64 | ||
1308 | return get_canonical(la) != la; | ||
1309 | #else | ||
1310 | return false; | ||
1311 | #endif | ||
1312 | } | ||
1313 | |||
1314 | #define TSS_IOPB_BASE_OFFSET 0x66 | 1304 | #define TSS_IOPB_BASE_OFFSET 0x66 |
1315 | #define TSS_BASE_SIZE 0x68 | 1305 | #define TSS_BASE_SIZE 0x68 |
1316 | #define TSS_IOPB_SIZE (65536 / 8) | 1306 | #define TSS_IOPB_SIZE (65536 / 8) |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
107 | #define V_IRQ_SHIFT 8 | 107 | #define V_IRQ_SHIFT 8 |
108 | #define V_IRQ_MASK (1 << V_IRQ_SHIFT) | 108 | #define V_IRQ_MASK (1 << V_IRQ_SHIFT) |
109 | 109 | ||
110 | #define V_GIF_SHIFT 9 | ||
111 | #define V_GIF_MASK (1 << V_GIF_SHIFT) | ||
112 | |||
110 | #define V_INTR_PRIO_SHIFT 16 | 113 | #define V_INTR_PRIO_SHIFT 16 |
111 | #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) | 114 | #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) |
112 | 115 | ||
@@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
116 | #define V_INTR_MASKING_SHIFT 24 | 119 | #define V_INTR_MASKING_SHIFT 24 |
117 | #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) | 120 | #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) |
118 | 121 | ||
122 | #define V_GIF_ENABLE_SHIFT 25 | ||
123 | #define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) | ||
124 | |||
119 | #define AVIC_ENABLE_SHIFT 31 | 125 | #define AVIC_ENABLE_SHIFT 31 |
120 | #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) | 126 | #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) |
121 | 127 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -72,6 +72,7 @@ | |||
72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
73 | #define SECONDARY_EXEC_RDRAND 0x00000800 | 73 | #define SECONDARY_EXEC_RDRAND 0x00000800 |
74 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 74 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
75 | #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 | ||
75 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 76 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
76 | #define SECONDARY_EXEC_RDSEED 0x00010000 | 77 | #define SECONDARY_EXEC_RDSEED 0x00010000 |
77 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | 78 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 |
@@ -114,6 +115,10 @@ | |||
114 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | 115 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 |
115 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 | 116 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 |
116 | 117 | ||
118 | /* VMFUNC functions */ | ||
119 | #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 | ||
120 | #define VMFUNC_EPTP_ENTRIES 512 | ||
121 | |||
117 | static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) | 122 | static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) |
118 | { | 123 | { |
119 | return vmx_basic & GENMASK_ULL(30, 0); | 124 | return vmx_basic & GENMASK_ULL(30, 0); |
@@ -187,6 +192,8 @@ enum vmcs_field { | |||
187 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 192 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
188 | POSTED_INTR_DESC_ADDR = 0x00002016, | 193 | POSTED_INTR_DESC_ADDR = 0x00002016, |
189 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | 194 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, |
195 | VM_FUNCTION_CONTROL = 0x00002018, | ||
196 | VM_FUNCTION_CONTROL_HIGH = 0x00002019, | ||
190 | EPT_POINTER = 0x0000201a, | 197 | EPT_POINTER = 0x0000201a, |
191 | EPT_POINTER_HIGH = 0x0000201b, | 198 | EPT_POINTER_HIGH = 0x0000201b, |
192 | EOI_EXIT_BITMAP0 = 0x0000201c, | 199 | EOI_EXIT_BITMAP0 = 0x0000201c, |
@@ -197,6 +204,8 @@ enum vmcs_field { | |||
197 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | 204 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, |
198 | EOI_EXIT_BITMAP3 = 0x00002022, | 205 | EOI_EXIT_BITMAP3 = 0x00002022, |
199 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | 206 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, |
207 | EPTP_LIST_ADDRESS = 0x00002024, | ||
208 | EPTP_LIST_ADDRESS_HIGH = 0x00002025, | ||
200 | VMREAD_BITMAP = 0x00002026, | 209 | VMREAD_BITMAP = 0x00002026, |
201 | VMWRITE_BITMAP = 0x00002028, | 210 | VMWRITE_BITMAP = 0x00002028, |
202 | XSS_EXIT_BITMAP = 0x0000202C, | 211 | XSS_EXIT_BITMAP = 0x0000202C, |
@@ -444,6 +453,7 @@ enum vmcs_field { | |||
444 | 453 | ||
445 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) | 454 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) |
446 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) | 455 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) |
456 | #define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) | ||
447 | #define VMX_EPTP_UC_BIT (1ull << 8) | 457 | #define VMX_EPTP_UC_BIT (1ull << 8) |
448 | #define VMX_EPTP_WB_BIT (1ull << 14) | 458 | #define VMX_EPTP_WB_BIT (1ull << 14) |
449 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 459 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
@@ -459,12 +469,14 @@ enum vmcs_field { | |||
459 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | 469 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ |
460 | #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ | 470 | #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ |
461 | 471 | ||
462 | #define VMX_EPT_DEFAULT_GAW 3 | ||
463 | #define VMX_EPT_MAX_GAW 0x4 | ||
464 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 472 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
465 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | 473 | #define VMX_EPTP_PWL_MASK 0x38ull |
466 | #define VMX_EPT_AD_ENABLE_BIT (1ull << 6) | 474 | #define VMX_EPTP_PWL_4 0x18ull |
467 | #define VMX_EPT_DEFAULT_MT 0x6ull | 475 | #define VMX_EPTP_PWL_5 0x20ull |
476 | #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) | ||
477 | #define VMX_EPTP_MT_MASK 0x7ull | ||
478 | #define VMX_EPTP_MT_WB 0x6ull | ||
479 | #define VMX_EPTP_MT_UC 0x0ull | ||
468 | #define VMX_EPT_READABLE_MASK 0x1ull | 480 | #define VMX_EPT_READABLE_MASK 0x1ull |
469 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 481 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
470 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 482 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19adbb418443..0099e10eb045 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
126 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 126 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * The existing code assumes virtual address is 48-bit in the canonical | 129 | * The existing code assumes virtual address is 48-bit or 57-bit in the |
130 | * address checks; exit if it is ever changed. | 130 | * canonical address checks; exit if it is ever changed. |
131 | */ | 131 | */ |
132 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 132 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
133 | if (best && ((best->eax & 0xff00) >> 8) != 48 && | 133 | if (best) { |
134 | ((best->eax & 0xff00) >> 8) != 0) | 134 | int vaddr_bits = (best->eax & 0xff00) >> 8; |
135 | return -EINVAL; | 135 | |
136 | if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) | ||
137 | return -EINVAL; | ||
138 | } | ||
136 | 139 | ||
137 | /* Update physical-address width */ | 140 | /* Update physical-address width */ |
138 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 141 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
142 | kvm_mmu_reset_context(vcpu); | ||
139 | 143 | ||
140 | kvm_pmu_refresh(vcpu); | 144 | kvm_pmu_refresh(vcpu); |
141 | return 0; | 145 | return 0; |
@@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
383 | 387 | ||
384 | /* cpuid 7.0.ecx*/ | 388 | /* cpuid 7.0.ecx*/ |
385 | const u32 kvm_cpuid_7_0_ecx_x86_features = | 389 | const u32 kvm_cpuid_7_0_ecx_x86_features = |
386 | F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); | 390 | F(AVX512VBMI) | F(LA57) | F(PKU) | |
391 | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); | ||
387 | 392 | ||
388 | /* cpuid 7.0.edx*/ | 393 | /* cpuid 7.0.edx*/ |
389 | const u32 kvm_cpuid_7_0_edx_x86_features = | 394 | const u32 kvm_cpuid_7_0_edx_x86_features = |
@@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | |||
853 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | 858 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); |
854 | } | 859 | } |
855 | 860 | ||
856 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 861 | bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, |
862 | u32 *ecx, u32 *edx, bool check_limit) | ||
857 | { | 863 | { |
858 | u32 function = *eax, index = *ecx; | 864 | u32 function = *eax, index = *ecx; |
859 | struct kvm_cpuid_entry2 *best; | 865 | struct kvm_cpuid_entry2 *best; |
866 | bool entry_found = true; | ||
860 | 867 | ||
861 | best = kvm_find_cpuid_entry(vcpu, function, index); | 868 | best = kvm_find_cpuid_entry(vcpu, function, index); |
862 | 869 | ||
863 | if (!best) | 870 | if (!best) { |
871 | entry_found = false; | ||
872 | if (!check_limit) | ||
873 | goto out; | ||
874 | |||
864 | best = check_cpuid_limit(vcpu, function, index); | 875 | best = check_cpuid_limit(vcpu, function, index); |
876 | } | ||
865 | 877 | ||
878 | out: | ||
866 | if (best) { | 879 | if (best) { |
867 | *eax = best->eax; | 880 | *eax = best->eax; |
868 | *ebx = best->ebx; | 881 | *ebx = best->ebx; |
@@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | |||
870 | *edx = best->edx; | 883 | *edx = best->edx; |
871 | } else | 884 | } else |
872 | *eax = *ebx = *ecx = *edx = 0; | 885 | *eax = *ebx = *ecx = *edx = 0; |
873 | trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); | 886 | trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); |
887 | return entry_found; | ||
874 | } | 888 | } |
875 | EXPORT_SYMBOL_GPL(kvm_cpuid); | 889 | EXPORT_SYMBOL_GPL(kvm_cpuid); |
876 | 890 | ||
@@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
883 | 897 | ||
884 | eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 898 | eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
885 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 899 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
886 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); | 900 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); |
887 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); | 901 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); |
888 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); | 902 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); |
889 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | 903 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da6728383052..1ea3c0e1e3a9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include "x86.h" | 4 | #include "x86.h" |
5 | #include <asm/cpu.h> | 5 | #include <asm/cpu.h> |
6 | #include <asm/processor.h> | ||
6 | 7 | ||
7 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); | 8 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); |
8 | bool kvm_mpx_supported(void); | 9 | bool kvm_mpx_supported(void); |
@@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
20 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | 21 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, |
21 | struct kvm_cpuid2 *cpuid, | 22 | struct kvm_cpuid2 *cpuid, |
22 | struct kvm_cpuid_entry2 __user *entries); | 23 | struct kvm_cpuid_entry2 __user *entries); |
23 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 24 | bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, |
25 | u32 *ecx, u32 *edx, bool check_limit); | ||
24 | 26 | ||
25 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); | 27 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); |
26 | 28 | ||
@@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
29 | return vcpu->arch.maxphyaddr; | 31 | return vcpu->arch.maxphyaddr; |
30 | } | 32 | } |
31 | 33 | ||
32 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | 34 | struct cpuid_reg { |
33 | { | 35 | u32 function; |
34 | struct kvm_cpuid_entry2 *best; | 36 | u32 index; |
35 | 37 | int reg; | |
36 | if (!static_cpu_has(X86_FEATURE_XSAVE)) | 38 | }; |
37 | return false; | ||
38 | |||
39 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
40 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
41 | } | ||
42 | |||
43 | static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) | ||
44 | { | ||
45 | struct kvm_cpuid_entry2 *best; | ||
46 | |||
47 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
48 | return best && (best->edx & bit(X86_FEATURE_MTRR)); | ||
49 | } | ||
50 | |||
51 | static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) | ||
52 | { | ||
53 | struct kvm_cpuid_entry2 *best; | ||
54 | |||
55 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
56 | return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST)); | ||
57 | } | ||
58 | 39 | ||
59 | static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | 40 | static const struct cpuid_reg reverse_cpuid[] = { |
60 | { | 41 | [CPUID_1_EDX] = { 1, 0, CPUID_EDX}, |
61 | struct kvm_cpuid_entry2 *best; | 42 | [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX}, |
62 | 43 | [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, | |
63 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 44 | [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, |
64 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | 45 | [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, |
65 | } | 46 | [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, |
47 | [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, | ||
48 | [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, | ||
49 | [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, | ||
50 | [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, | ||
51 | [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, | ||
52 | [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, | ||
53 | [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, | ||
54 | [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, | ||
55 | [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, | ||
56 | }; | ||
66 | 57 | ||
67 | static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) | 58 | static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) |
68 | { | 59 | { |
69 | struct kvm_cpuid_entry2 *best; | 60 | unsigned x86_leaf = x86_feature / 32; |
70 | |||
71 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
72 | return best && (best->ebx & bit(X86_FEATURE_SMAP)); | ||
73 | } | ||
74 | 61 | ||
75 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | 62 | BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); |
76 | { | 63 | BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); |
77 | struct kvm_cpuid_entry2 *best; | 64 | BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); |
78 | 65 | ||
79 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 66 | return reverse_cpuid[x86_leaf]; |
80 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
81 | } | 67 | } |
82 | 68 | ||
83 | static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu) | 69 | static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature) |
84 | { | 70 | { |
85 | struct kvm_cpuid_entry2 *best; | 71 | struct kvm_cpuid_entry2 *entry; |
86 | 72 | const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); | |
87 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
88 | return best && (best->ecx & bit(X86_FEATURE_PKU)); | ||
89 | } | ||
90 | 73 | ||
91 | static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) | 74 | entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index); |
92 | { | 75 | if (!entry) |
93 | struct kvm_cpuid_entry2 *best; | 76 | return NULL; |
94 | 77 | ||
95 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 78 | switch (cpuid.reg) { |
96 | return best && (best->edx & bit(X86_FEATURE_LM)); | 79 | case CPUID_EAX: |
80 | return &entry->eax; | ||
81 | case CPUID_EBX: | ||
82 | return &entry->ebx; | ||
83 | case CPUID_ECX: | ||
84 | return &entry->ecx; | ||
85 | case CPUID_EDX: | ||
86 | return &entry->edx; | ||
87 | default: | ||
88 | BUILD_BUG(); | ||
89 | return NULL; | ||
90 | } | ||
97 | } | 91 | } |
98 | 92 | ||
99 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | 93 | static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature) |
100 | { | 94 | { |
101 | struct kvm_cpuid_entry2 *best; | 95 | int *reg; |
102 | 96 | ||
103 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 97 | if (x86_feature == X86_FEATURE_XSAVE && |
104 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | 98 | !static_cpu_has(X86_FEATURE_XSAVE)) |
105 | } | 99 | return false; |
106 | 100 | ||
107 | static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | 101 | reg = guest_cpuid_get_register(vcpu, x86_feature); |
108 | { | 102 | if (!reg) |
109 | struct kvm_cpuid_entry2 *best; | 103 | return false; |
110 | 104 | ||
111 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 105 | return *reg & bit(x86_feature); |
112 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | ||
113 | } | 106 | } |
114 | 107 | ||
115 | static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) | 108 | static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) |
116 | { | 109 | { |
117 | struct kvm_cpuid_entry2 *best; | 110 | int *reg; |
118 | 111 | ||
119 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 112 | reg = guest_cpuid_get_register(vcpu, x86_feature); |
120 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); | 113 | if (reg) |
114 | *reg &= ~bit(x86_feature); | ||
121 | } | 115 | } |
122 | 116 | ||
123 | static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) | 117 | static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) |
@@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) | |||
128 | return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; | 122 | return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; |
129 | } | 123 | } |
130 | 124 | ||
131 | static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) | ||
132 | { | ||
133 | struct kvm_cpuid_entry2 *best; | ||
134 | |||
135 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
136 | return best && (best->edx & bit(X86_FEATURE_GBPAGES)); | ||
137 | } | ||
138 | |||
139 | static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) | ||
140 | { | ||
141 | struct kvm_cpuid_entry2 *best; | ||
142 | |||
143 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
144 | return best && (best->ebx & bit(X86_FEATURE_RTM)); | ||
145 | } | ||
146 | |||
147 | static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | ||
148 | { | ||
149 | struct kvm_cpuid_entry2 *best; | ||
150 | |||
151 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
152 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | ||
153 | } | ||
154 | |||
155 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | ||
156 | { | ||
157 | struct kvm_cpuid_entry2 *best; | ||
158 | |||
159 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
160 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 | ||
165 | */ | ||
166 | #define BIT_NRIPS 3 | ||
167 | |||
168 | static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) | ||
169 | { | ||
170 | struct kvm_cpuid_entry2 *best; | ||
171 | |||
172 | best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); | ||
173 | |||
174 | /* | ||
175 | * NRIPS is a scattered cpuid feature, so we can't use | ||
176 | * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit | ||
177 | * position 8, not 3). | ||
178 | */ | ||
179 | return best && (best->edx & bit(BIT_NRIPS)); | ||
180 | } | ||
181 | #undef BIT_NRIPS | ||
182 | |||
183 | static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) | 125 | static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) |
184 | { | 126 | { |
185 | struct kvm_cpuid_entry2 *best; | 127 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb0055953fbc..16bf6655aa85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include "x86.h" | 29 | #include "x86.h" |
30 | #include "tss.h" | 30 | #include "tss.h" |
31 | #include "mmu.h" | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * Operand types | 34 | * Operand types |
@@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, | |||
688 | ulong la; | 689 | ulong la; |
689 | u32 lim; | 690 | u32 lim; |
690 | u16 sel; | 691 | u16 sel; |
692 | u8 va_bits; | ||
691 | 693 | ||
692 | la = seg_base(ctxt, addr.seg) + addr.ea; | 694 | la = seg_base(ctxt, addr.seg) + addr.ea; |
693 | *max_size = 0; | 695 | *max_size = 0; |
694 | switch (mode) { | 696 | switch (mode) { |
695 | case X86EMUL_MODE_PROT64: | 697 | case X86EMUL_MODE_PROT64: |
696 | *linear = la; | 698 | *linear = la; |
697 | if (is_noncanonical_address(la)) | 699 | va_bits = ctxt_virt_addr_bits(ctxt); |
700 | if (get_canonical(la, va_bits) != la) | ||
698 | goto bad; | 701 | goto bad; |
699 | 702 | ||
700 | *max_size = min_t(u64, ~0u, (1ull << 48) - la); | 703 | *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); |
701 | if (size > *max_size) | 704 | if (size > *max_size) |
702 | goto bad; | 705 | goto bad; |
703 | break; | 706 | break; |
@@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1748 | sizeof(base3), &ctxt->exception); | 1751 | sizeof(base3), &ctxt->exception); |
1749 | if (ret != X86EMUL_CONTINUE) | 1752 | if (ret != X86EMUL_CONTINUE) |
1750 | return ret; | 1753 | return ret; |
1751 | if (is_noncanonical_address(get_desc_base(&seg_desc) | | 1754 | if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | |
1752 | ((u64)base3 << 32))) | 1755 | ((u64)base3 << 32), ctxt)) |
1753 | return emulate_gp(ctxt, 0); | 1756 | return emulate_gp(ctxt, 0); |
1754 | } | 1757 | } |
1755 | load: | 1758 | load: |
@@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) | |||
2333 | 2336 | ||
2334 | eax = 0x80000001; | 2337 | eax = 0x80000001; |
2335 | ecx = 0; | 2338 | ecx = 0; |
2336 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2339 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
2337 | return edx & bit(X86_FEATURE_LM); | 2340 | return edx & bit(X86_FEATURE_LM); |
2338 | } | 2341 | } |
2339 | 2342 | ||
@@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | |||
2636 | u32 eax, ebx, ecx, edx; | 2639 | u32 eax, ebx, ecx, edx; |
2637 | 2640 | ||
2638 | eax = ecx = 0; | 2641 | eax = ecx = 0; |
2639 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2642 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
2640 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | 2643 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx |
2641 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | 2644 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx |
2642 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | 2645 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; |
@@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | |||
2656 | 2659 | ||
2657 | eax = 0x00000000; | 2660 | eax = 0x00000000; |
2658 | ecx = 0x00000000; | 2661 | ecx = 0x00000000; |
2659 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2662 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
2660 | /* | 2663 | /* |
2661 | * Intel ("GenuineIntel") | 2664 | * Intel ("GenuineIntel") |
2662 | * remark: Intel CPUs only support "syscall" in 64bit | 2665 | * remark: Intel CPUs only support "syscall" in 64bit |
@@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2840 | ss_sel = cs_sel + 8; | 2843 | ss_sel = cs_sel + 8; |
2841 | cs.d = 0; | 2844 | cs.d = 0; |
2842 | cs.l = 1; | 2845 | cs.l = 1; |
2843 | if (is_noncanonical_address(rcx) || | 2846 | if (emul_is_noncanonical_address(rcx, ctxt) || |
2844 | is_noncanonical_address(rdx)) | 2847 | emul_is_noncanonical_address(rdx, ctxt)) |
2845 | return emulate_gp(ctxt, 0); | 2848 | return emulate_gp(ctxt, 0); |
2846 | break; | 2849 | break; |
2847 | } | 2850 | } |
@@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) | |||
3551 | /* | 3554 | /* |
3552 | * Check MOVBE is set in the guest-visible CPUID leaf. | 3555 | * Check MOVBE is set in the guest-visible CPUID leaf. |
3553 | */ | 3556 | */ |
3554 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3557 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
3555 | if (!(ecx & FFL(MOVBE))) | 3558 | if (!(ecx & FFL(MOVBE))) |
3556 | return emulate_ud(ctxt); | 3559 | return emulate_ud(ctxt); |
3557 | 3560 | ||
@@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) | |||
3756 | if (rc != X86EMUL_CONTINUE) | 3759 | if (rc != X86EMUL_CONTINUE) |
3757 | return rc; | 3760 | return rc; |
3758 | if (ctxt->mode == X86EMUL_MODE_PROT64 && | 3761 | if (ctxt->mode == X86EMUL_MODE_PROT64 && |
3759 | is_noncanonical_address(desc_ptr.address)) | 3762 | emul_is_noncanonical_address(desc_ptr.address, ctxt)) |
3760 | return emulate_gp(ctxt, 0); | 3763 | return emulate_gp(ctxt, 0); |
3761 | if (lgdt) | 3764 | if (lgdt) |
3762 | ctxt->ops->set_gdt(ctxt, &desc_ptr); | 3765 | ctxt->ops->set_gdt(ctxt, &desc_ptr); |
@@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) | |||
3865 | 3868 | ||
3866 | eax = reg_read(ctxt, VCPU_REGS_RAX); | 3869 | eax = reg_read(ctxt, VCPU_REGS_RAX); |
3867 | ecx = reg_read(ctxt, VCPU_REGS_RCX); | 3870 | ecx = reg_read(ctxt, VCPU_REGS_RCX); |
3868 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3871 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); |
3869 | *reg_write(ctxt, VCPU_REGS_RAX) = eax; | 3872 | *reg_write(ctxt, VCPU_REGS_RAX) = eax; |
3870 | *reg_write(ctxt, VCPU_REGS_RBX) = ebx; | 3873 | *reg_write(ctxt, VCPU_REGS_RBX) = ebx; |
3871 | *reg_write(ctxt, VCPU_REGS_RCX) = ecx; | 3874 | *reg_write(ctxt, VCPU_REGS_RCX) = ecx; |
@@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) | |||
3924 | { | 3927 | { |
3925 | u32 eax = 1, ebx, ecx = 0, edx; | 3928 | u32 eax = 1, ebx, ecx = 0, edx; |
3926 | 3929 | ||
3927 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3930 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
3928 | if (!(edx & FFL(FXSR))) | 3931 | if (!(edx & FFL(FXSR))) |
3929 | return emulate_ud(ctxt); | 3932 | return emulate_ud(ctxt); |
3930 | 3933 | ||
@@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) | |||
4097 | u64 rsvd = 0; | 4100 | u64 rsvd = 0; |
4098 | 4101 | ||
4099 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 4102 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
4100 | if (efer & EFER_LMA) | 4103 | if (efer & EFER_LMA) { |
4101 | rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; | 4104 | u64 maxphyaddr; |
4105 | u32 eax = 0x80000008; | ||
4106 | |||
4107 | if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, | ||
4108 | NULL, false)) | ||
4109 | maxphyaddr = eax & 0xff; | ||
4110 | else | ||
4111 | maxphyaddr = 36; | ||
4112 | rsvd = rsvd_bits(maxphyaddr, 62); | ||
4113 | } | ||
4102 | 4114 | ||
4103 | if (new_val & rsvd) | 4115 | if (new_val & rsvd) |
4104 | return emulate_gp(ctxt, 0); | 4116 | return emulate_gp(ctxt, 0); |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 337b6d2730fa..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1160 | return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), | 1160 | return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), |
1161 | pdata); | 1161 | pdata); |
1162 | } | 1162 | } |
1163 | case HV_X64_MSR_TSC_FREQUENCY: | ||
1164 | data = (u64)vcpu->arch.virtual_tsc_khz * 1000; | ||
1165 | break; | ||
1166 | case HV_X64_MSR_APIC_FREQUENCY: | ||
1167 | data = APIC_BUS_FREQUENCY; | ||
1168 | break; | ||
1163 | default: | 1169 | default: |
1164 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1170 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
1165 | return 1; | 1171 | return 1; |
@@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1268 | 1274 | ||
1269 | switch (code) { | 1275 | switch (code) { |
1270 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: | 1276 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: |
1271 | kvm_vcpu_on_spin(vcpu); | 1277 | kvm_vcpu_on_spin(vcpu, true); |
1272 | break; | 1278 | break; |
1273 | case HVCALL_POST_MESSAGE: | 1279 | case HVCALL_POST_MESSAGE: |
1274 | case HVCALL_SIGNAL_EVENT: | 1280 | case HVCALL_SIGNAL_EVENT: |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index e1e89ee4af75..9add410f195f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS | 4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS |
5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ | 5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ |
6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
7 | | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) | 7 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) |
8 | 8 | ||
9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
10 | enum kvm_reg reg) | 10 | enum kvm_reg reg) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 589dcc117086..aaf10b6f5380 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -54,8 +54,6 @@ | |||
54 | #define PRIu64 "u" | 54 | #define PRIu64 "u" |
55 | #define PRIo64 "o" | 55 | #define PRIo64 "o" |
56 | 56 | ||
57 | #define APIC_BUS_CYCLE_NS 1 | ||
58 | |||
59 | /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ | 57 | /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ |
60 | #define apic_debug(fmt, arg...) | 58 | #define apic_debug(fmt, arg...) |
61 | 59 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 29caa2c3dff9..215721e1426a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -12,6 +12,9 @@ | |||
12 | #define KVM_APIC_SHORT_MASK 0xc0000 | 12 | #define KVM_APIC_SHORT_MASK 0xc0000 |
13 | #define KVM_APIC_DEST_MASK 0x800 | 13 | #define KVM_APIC_DEST_MASK 0x800 |
14 | 14 | ||
15 | #define APIC_BUS_CYCLE_NS 1 | ||
16 | #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) | ||
17 | |||
15 | struct kvm_timer { | 18 | struct kvm_timer { |
16 | struct hrtimer timer; | 19 | struct hrtimer timer; |
17 | s64 period; /* unit: ns */ | 20 | s64 period; /* unit: ns */ |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 04d750813c9d..eca30c1eb1d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2169 | } | 2169 | } |
2170 | 2170 | ||
2171 | struct mmu_page_path { | 2171 | struct mmu_page_path { |
2172 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; | 2172 | struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; |
2173 | unsigned int idx[PT64_ROOT_LEVEL]; | 2173 | unsigned int idx[PT64_ROOT_MAX_LEVEL]; |
2174 | }; | 2174 | }; |
2175 | 2175 | ||
2176 | #define for_each_sp(pvec, sp, parents, i) \ | 2176 | #define for_each_sp(pvec, sp, parents, i) \ |
@@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, | |||
2385 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; | 2385 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; |
2386 | iterator->level = vcpu->arch.mmu.shadow_root_level; | 2386 | iterator->level = vcpu->arch.mmu.shadow_root_level; |
2387 | 2387 | ||
2388 | if (iterator->level == PT64_ROOT_LEVEL && | 2388 | if (iterator->level == PT64_ROOT_4LEVEL && |
2389 | vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && | 2389 | vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && |
2390 | !vcpu->arch.mmu.direct_map) | 2390 | !vcpu->arch.mmu.direct_map) |
2391 | --iterator->level; | 2391 | --iterator->level; |
2392 | 2392 | ||
@@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | |||
2610 | 2610 | ||
2611 | sp = list_last_entry(&kvm->arch.active_mmu_pages, | 2611 | sp = list_last_entry(&kvm->arch.active_mmu_pages, |
2612 | struct kvm_mmu_page, link); | 2612 | struct kvm_mmu_page, link); |
2613 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | 2613 | return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
2614 | |||
2615 | return true; | ||
2616 | } | 2614 | } |
2617 | 2615 | ||
2618 | /* | 2616 | /* |
@@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
3262 | 3260 | ||
3263 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 3261 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
3264 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); | 3262 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); |
3265 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu); | 3263 | static int make_mmu_pages_available(struct kvm_vcpu *vcpu); |
3266 | 3264 | ||
3267 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | 3265 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
3268 | gfn_t gfn, bool prefault) | 3266 | gfn_t gfn, bool prefault) |
@@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
3302 | spin_lock(&vcpu->kvm->mmu_lock); | 3300 | spin_lock(&vcpu->kvm->mmu_lock); |
3303 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3301 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
3304 | goto out_unlock; | 3302 | goto out_unlock; |
3305 | make_mmu_pages_available(vcpu); | 3303 | if (make_mmu_pages_available(vcpu) < 0) |
3304 | goto out_unlock; | ||
3306 | if (likely(!force_pt_level)) | 3305 | if (likely(!force_pt_level)) |
3307 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3306 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
3308 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 3307 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); |
@@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
3326 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3325 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
3327 | return; | 3326 | return; |
3328 | 3327 | ||
3329 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && | 3328 | if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && |
3330 | (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || | 3329 | (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || |
3331 | vcpu->arch.mmu.direct_map)) { | 3330 | vcpu->arch.mmu.direct_map)) { |
3332 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3331 | hpa_t root = vcpu->arch.mmu.root_hpa; |
3333 | 3332 | ||
@@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
3379 | struct kvm_mmu_page *sp; | 3378 | struct kvm_mmu_page *sp; |
3380 | unsigned i; | 3379 | unsigned i; |
3381 | 3380 | ||
3382 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 3381 | if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { |
3383 | spin_lock(&vcpu->kvm->mmu_lock); | 3382 | spin_lock(&vcpu->kvm->mmu_lock); |
3384 | make_mmu_pages_available(vcpu); | 3383 | if(make_mmu_pages_available(vcpu) < 0) { |
3385 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); | 3384 | spin_unlock(&vcpu->kvm->mmu_lock); |
3385 | return 1; | ||
3386 | } | ||
3387 | sp = kvm_mmu_get_page(vcpu, 0, 0, | ||
3388 | vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); | ||
3386 | ++sp->root_count; | 3389 | ++sp->root_count; |
3387 | spin_unlock(&vcpu->kvm->mmu_lock); | 3390 | spin_unlock(&vcpu->kvm->mmu_lock); |
3388 | vcpu->arch.mmu.root_hpa = __pa(sp->spt); | 3391 | vcpu->arch.mmu.root_hpa = __pa(sp->spt); |
@@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
3392 | 3395 | ||
3393 | MMU_WARN_ON(VALID_PAGE(root)); | 3396 | MMU_WARN_ON(VALID_PAGE(root)); |
3394 | spin_lock(&vcpu->kvm->mmu_lock); | 3397 | spin_lock(&vcpu->kvm->mmu_lock); |
3395 | make_mmu_pages_available(vcpu); | 3398 | if (make_mmu_pages_available(vcpu) < 0) { |
3399 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
3400 | return 1; | ||
3401 | } | ||
3396 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 3402 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
3397 | i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); | 3403 | i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); |
3398 | root = __pa(sp->spt); | 3404 | root = __pa(sp->spt); |
@@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3423 | * Do we shadow a long mode page table? If so we need to | 3429 | * Do we shadow a long mode page table? If so we need to |
3424 | * write-protect the guests page table root. | 3430 | * write-protect the guests page table root. |
3425 | */ | 3431 | */ |
3426 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3432 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
3427 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3433 | hpa_t root = vcpu->arch.mmu.root_hpa; |
3428 | 3434 | ||
3429 | MMU_WARN_ON(VALID_PAGE(root)); | 3435 | MMU_WARN_ON(VALID_PAGE(root)); |
3430 | 3436 | ||
3431 | spin_lock(&vcpu->kvm->mmu_lock); | 3437 | spin_lock(&vcpu->kvm->mmu_lock); |
3432 | make_mmu_pages_available(vcpu); | 3438 | if (make_mmu_pages_available(vcpu) < 0) { |
3433 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, | 3439 | spin_unlock(&vcpu->kvm->mmu_lock); |
3434 | 0, ACC_ALL); | 3440 | return 1; |
3441 | } | ||
3442 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | ||
3443 | vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); | ||
3435 | root = __pa(sp->spt); | 3444 | root = __pa(sp->spt); |
3436 | ++sp->root_count; | 3445 | ++sp->root_count; |
3437 | spin_unlock(&vcpu->kvm->mmu_lock); | 3446 | spin_unlock(&vcpu->kvm->mmu_lock); |
@@ -3445,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3445 | * the shadow page table may be a PAE or a long mode page table. | 3454 | * the shadow page table may be a PAE or a long mode page table. |
3446 | */ | 3455 | */ |
3447 | pm_mask = PT_PRESENT_MASK; | 3456 | pm_mask = PT_PRESENT_MASK; |
3448 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) | 3457 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) |
3449 | pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; | 3458 | pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; |
3450 | 3459 | ||
3451 | for (i = 0; i < 4; ++i) { | 3460 | for (i = 0; i < 4; ++i) { |
@@ -3463,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3463 | return 1; | 3472 | return 1; |
3464 | } | 3473 | } |
3465 | spin_lock(&vcpu->kvm->mmu_lock); | 3474 | spin_lock(&vcpu->kvm->mmu_lock); |
3466 | make_mmu_pages_available(vcpu); | 3475 | if (make_mmu_pages_available(vcpu) < 0) { |
3476 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
3477 | return 1; | ||
3478 | } | ||
3467 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, | 3479 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, |
3468 | 0, ACC_ALL); | 3480 | 0, ACC_ALL); |
3469 | root = __pa(sp->spt); | 3481 | root = __pa(sp->spt); |
@@ -3478,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
3478 | * If we shadow a 32 bit page table with a long mode page | 3490 | * If we shadow a 32 bit page table with a long mode page |
3479 | * table we enter this path. | 3491 | * table we enter this path. |
3480 | */ | 3492 | */ |
3481 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 3493 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) { |
3482 | if (vcpu->arch.mmu.lm_root == NULL) { | 3494 | if (vcpu->arch.mmu.lm_root == NULL) { |
3483 | /* | 3495 | /* |
3484 | * The additional page necessary for this is only | 3496 | * The additional page necessary for this is only |
@@ -3523,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
3523 | 3535 | ||
3524 | vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); | 3536 | vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); |
3525 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 3537 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
3526 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3538 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
3527 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3539 | hpa_t root = vcpu->arch.mmu.root_hpa; |
3528 | sp = page_header(root); | 3540 | sp = page_header(root); |
3529 | mmu_sync_children(vcpu, sp); | 3541 | mmu_sync_children(vcpu, sp); |
@@ -3588,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) | |||
3588 | 3600 | ||
3589 | static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3601 | static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
3590 | { | 3602 | { |
3603 | /* | ||
3604 | * A nested guest cannot use the MMIO cache if it is using nested | ||
3605 | * page tables, because cr2 is a nGPA while the cache stores GPAs. | ||
3606 | */ | ||
3607 | if (mmu_is_nested(vcpu)) | ||
3608 | return false; | ||
3609 | |||
3591 | if (direct) | 3610 | if (direct) |
3592 | return vcpu_match_mmio_gpa(vcpu, addr); | 3611 | return vcpu_match_mmio_gpa(vcpu, addr); |
3593 | 3612 | ||
@@ -3599,7 +3618,7 @@ static bool | |||
3599 | walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) | 3618 | walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) |
3600 | { | 3619 | { |
3601 | struct kvm_shadow_walk_iterator iterator; | 3620 | struct kvm_shadow_walk_iterator iterator; |
3602 | u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; | 3621 | u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; |
3603 | int root, leaf; | 3622 | int root, leaf; |
3604 | bool reserved = false; | 3623 | bool reserved = false; |
3605 | 3624 | ||
@@ -3640,7 +3659,23 @@ exit: | |||
3640 | return reserved; | 3659 | return reserved; |
3641 | } | 3660 | } |
3642 | 3661 | ||
3643 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3662 | /* |
3663 | * Return values of handle_mmio_page_fault: | ||
3664 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
3665 | * directly. | ||
3666 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
3667 | * fault path update the mmio spte. | ||
3668 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
3669 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). | ||
3670 | */ | ||
3671 | enum { | ||
3672 | RET_MMIO_PF_EMULATE = 1, | ||
3673 | RET_MMIO_PF_INVALID = 2, | ||
3674 | RET_MMIO_PF_RETRY = 0, | ||
3675 | RET_MMIO_PF_BUG = -1 | ||
3676 | }; | ||
3677 | |||
3678 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | ||
3644 | { | 3679 | { |
3645 | u64 spte; | 3680 | u64 spte; |
3646 | bool reserved; | 3681 | bool reserved; |
@@ -3872,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3872 | spin_lock(&vcpu->kvm->mmu_lock); | 3907 | spin_lock(&vcpu->kvm->mmu_lock); |
3873 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3908 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
3874 | goto out_unlock; | 3909 | goto out_unlock; |
3875 | make_mmu_pages_available(vcpu); | 3910 | if (make_mmu_pages_available(vcpu) < 0) |
3911 | goto out_unlock; | ||
3876 | if (likely(!force_pt_level)) | 3912 | if (likely(!force_pt_level)) |
3877 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3913 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
3878 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 3914 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); |
@@ -4025,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
4025 | rsvd_check->rsvd_bits_mask[1][0] = | 4061 | rsvd_check->rsvd_bits_mask[1][0] = |
4026 | rsvd_check->rsvd_bits_mask[0][0]; | 4062 | rsvd_check->rsvd_bits_mask[0][0]; |
4027 | break; | 4063 | break; |
4028 | case PT64_ROOT_LEVEL: | 4064 | case PT64_ROOT_5LEVEL: |
4065 | rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd | | ||
4066 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | | ||
4067 | rsvd_bits(maxphyaddr, 51); | ||
4068 | rsvd_check->rsvd_bits_mask[1][4] = | ||
4069 | rsvd_check->rsvd_bits_mask[0][4]; | ||
4070 | case PT64_ROOT_4LEVEL: | ||
4029 | rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 4071 | rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
4030 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | | 4072 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | |
4031 | rsvd_bits(maxphyaddr, 51); | 4073 | rsvd_bits(maxphyaddr, 51); |
@@ -4055,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
4055 | { | 4097 | { |
4056 | __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, | 4098 | __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, |
4057 | cpuid_maxphyaddr(vcpu), context->root_level, | 4099 | cpuid_maxphyaddr(vcpu), context->root_level, |
4058 | context->nx, guest_cpuid_has_gbpages(vcpu), | 4100 | context->nx, |
4101 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), | ||
4059 | is_pse(vcpu), guest_cpuid_is_amd(vcpu)); | 4102 | is_pse(vcpu), guest_cpuid_is_amd(vcpu)); |
4060 | } | 4103 | } |
4061 | 4104 | ||
@@ -4065,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
4065 | { | 4108 | { |
4066 | u64 bad_mt_xwr; | 4109 | u64 bad_mt_xwr; |
4067 | 4110 | ||
4111 | rsvd_check->rsvd_bits_mask[0][4] = | ||
4112 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | ||
4068 | rsvd_check->rsvd_bits_mask[0][3] = | 4113 | rsvd_check->rsvd_bits_mask[0][3] = |
4069 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | 4114 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); |
4070 | rsvd_check->rsvd_bits_mask[0][2] = | 4115 | rsvd_check->rsvd_bits_mask[0][2] = |
@@ -4074,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
4074 | rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); | 4119 | rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); |
4075 | 4120 | ||
4076 | /* large page */ | 4121 | /* large page */ |
4122 | rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; | ||
4077 | rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; | 4123 | rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; |
4078 | rsvd_check->rsvd_bits_mask[1][2] = | 4124 | rsvd_check->rsvd_bits_mask[1][2] = |
4079 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); | 4125 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); |
@@ -4120,8 +4166,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
4120 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, | 4166 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, |
4121 | boot_cpu_data.x86_phys_bits, | 4167 | boot_cpu_data.x86_phys_bits, |
4122 | context->shadow_root_level, uses_nx, | 4168 | context->shadow_root_level, uses_nx, |
4123 | guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), | 4169 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), |
4124 | true); | 4170 | is_pse(vcpu), true); |
4125 | 4171 | ||
4126 | if (!shadow_me_mask) | 4172 | if (!shadow_me_mask) |
4127 | return; | 4173 | return; |
@@ -4185,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, | |||
4185 | boot_cpu_data.x86_phys_bits, execonly); | 4231 | boot_cpu_data.x86_phys_bits, execonly); |
4186 | } | 4232 | } |
4187 | 4233 | ||
4234 | #define BYTE_MASK(access) \ | ||
4235 | ((1 & (access) ? 2 : 0) | \ | ||
4236 | (2 & (access) ? 4 : 0) | \ | ||
4237 | (3 & (access) ? 8 : 0) | \ | ||
4238 | (4 & (access) ? 16 : 0) | \ | ||
4239 | (5 & (access) ? 32 : 0) | \ | ||
4240 | (6 & (access) ? 64 : 0) | \ | ||
4241 | (7 & (access) ? 128 : 0)) | ||
4242 | |||
4243 | |||
4188 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | 4244 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, |
4189 | struct kvm_mmu *mmu, bool ept) | 4245 | struct kvm_mmu *mmu, bool ept) |
4190 | { | 4246 | { |
4191 | unsigned bit, byte, pfec; | 4247 | unsigned byte; |
4192 | u8 map; | 4248 | |
4193 | bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; | 4249 | const u8 x = BYTE_MASK(ACC_EXEC_MASK); |
4250 | const u8 w = BYTE_MASK(ACC_WRITE_MASK); | ||
4251 | const u8 u = BYTE_MASK(ACC_USER_MASK); | ||
4252 | |||
4253 | bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0; | ||
4254 | bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0; | ||
4255 | bool cr0_wp = is_write_protection(vcpu); | ||
4194 | 4256 | ||
4195 | cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | ||
4196 | cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); | ||
4197 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { | 4257 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { |
4198 | pfec = byte << 1; | 4258 | unsigned pfec = byte << 1; |
4199 | map = 0; | 4259 | |
4200 | wf = pfec & PFERR_WRITE_MASK; | ||
4201 | uf = pfec & PFERR_USER_MASK; | ||
4202 | ff = pfec & PFERR_FETCH_MASK; | ||
4203 | /* | 4260 | /* |
4204 | * PFERR_RSVD_MASK bit is set in PFEC if the access is not | 4261 | * Each "*f" variable has a 1 bit for each UWX value |
4205 | * subject to SMAP restrictions, and cleared otherwise. The | 4262 | * that causes a fault with the given PFEC. |
4206 | * bit is only meaningful if the SMAP bit is set in CR4. | ||
4207 | */ | 4263 | */ |
4208 | smapf = !(pfec & PFERR_RSVD_MASK); | ||
4209 | for (bit = 0; bit < 8; ++bit) { | ||
4210 | x = bit & ACC_EXEC_MASK; | ||
4211 | w = bit & ACC_WRITE_MASK; | ||
4212 | u = bit & ACC_USER_MASK; | ||
4213 | |||
4214 | if (!ept) { | ||
4215 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
4216 | x |= !mmu->nx; | ||
4217 | /* Allow supervisor writes if !cr0.wp */ | ||
4218 | w |= !is_write_protection(vcpu) && !uf; | ||
4219 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
4220 | x &= !(cr4_smep && u && !uf); | ||
4221 | |||
4222 | /* | ||
4223 | * SMAP:kernel-mode data accesses from user-mode | ||
4224 | * mappings should fault. A fault is considered | ||
4225 | * as a SMAP violation if all of the following | ||
4226 | * conditions are ture: | ||
4227 | * - X86_CR4_SMAP is set in CR4 | ||
4228 | * - A user page is accessed | ||
4229 | * - Page fault in kernel mode | ||
4230 | * - if CPL = 3 or X86_EFLAGS_AC is clear | ||
4231 | * | ||
4232 | * Here, we cover the first three conditions. | ||
4233 | * The fourth is computed dynamically in | ||
4234 | * permission_fault() and is in smapf. | ||
4235 | * | ||
4236 | * Also, SMAP does not affect instruction | ||
4237 | * fetches, add the !ff check here to make it | ||
4238 | * clearer. | ||
4239 | */ | ||
4240 | smap = cr4_smap && u && !uf && !ff; | ||
4241 | } | ||
4242 | 4264 | ||
4243 | fault = (ff && !x) || (uf && !u) || (wf && !w) || | 4265 | /* Faults from writes to non-writable pages */ |
4244 | (smapf && smap); | 4266 | u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; |
4245 | map |= fault << bit; | 4267 | /* Faults from user mode accesses to supervisor pages */ |
4268 | u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; | ||
4269 | /* Faults from fetches of non-executable pages*/ | ||
4270 | u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; | ||
4271 | /* Faults from kernel mode fetches of user pages */ | ||
4272 | u8 smepf = 0; | ||
4273 | /* Faults from kernel mode accesses of user pages */ | ||
4274 | u8 smapf = 0; | ||
4275 | |||
4276 | if (!ept) { | ||
4277 | /* Faults from kernel mode accesses to user pages */ | ||
4278 | u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; | ||
4279 | |||
4280 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
4281 | if (!mmu->nx) | ||
4282 | ff = 0; | ||
4283 | |||
4284 | /* Allow supervisor writes if !cr0.wp */ | ||
4285 | if (!cr0_wp) | ||
4286 | wf = (pfec & PFERR_USER_MASK) ? wf : 0; | ||
4287 | |||
4288 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
4289 | if (cr4_smep) | ||
4290 | smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; | ||
4291 | |||
4292 | /* | ||
4293 | * SMAP:kernel-mode data accesses from user-mode | ||
4294 | * mappings should fault. A fault is considered | ||
4295 | * as a SMAP violation if all of the following | ||
4296 | * conditions are ture: | ||
4297 | * - X86_CR4_SMAP is set in CR4 | ||
4298 | * - A user page is accessed | ||
4299 | * - The access is not a fetch | ||
4300 | * - Page fault in kernel mode | ||
4301 | * - if CPL = 3 or X86_EFLAGS_AC is clear | ||
4302 | * | ||
4303 | * Here, we cover the first three conditions. | ||
4304 | * The fourth is computed dynamically in permission_fault(); | ||
4305 | * PFERR_RSVD_MASK bit will be set in PFEC if the access is | ||
4306 | * *not* subject to SMAP restrictions. | ||
4307 | */ | ||
4308 | if (cr4_smap) | ||
4309 | smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; | ||
4246 | } | 4310 | } |
4247 | mmu->permissions[byte] = map; | 4311 | |
4312 | mmu->permissions[byte] = ff | uf | wf | smepf | smapf; | ||
4248 | } | 4313 | } |
4249 | } | 4314 | } |
4250 | 4315 | ||
@@ -4358,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
4358 | static void paging64_init_context(struct kvm_vcpu *vcpu, | 4423 | static void paging64_init_context(struct kvm_vcpu *vcpu, |
4359 | struct kvm_mmu *context) | 4424 | struct kvm_mmu *context) |
4360 | { | 4425 | { |
4361 | paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); | 4426 | int root_level = is_la57_mode(vcpu) ? |
4427 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
4428 | |||
4429 | paging64_init_context_common(vcpu, context, root_level); | ||
4362 | } | 4430 | } |
4363 | 4431 | ||
4364 | static void paging32_init_context(struct kvm_vcpu *vcpu, | 4432 | static void paging32_init_context(struct kvm_vcpu *vcpu, |
@@ -4399,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
4399 | context->sync_page = nonpaging_sync_page; | 4467 | context->sync_page = nonpaging_sync_page; |
4400 | context->invlpg = nonpaging_invlpg; | 4468 | context->invlpg = nonpaging_invlpg; |
4401 | context->update_pte = nonpaging_update_pte; | 4469 | context->update_pte = nonpaging_update_pte; |
4402 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 4470 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); |
4403 | context->root_hpa = INVALID_PAGE; | 4471 | context->root_hpa = INVALID_PAGE; |
4404 | context->direct_map = true; | 4472 | context->direct_map = true; |
4405 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; | 4473 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; |
@@ -4413,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
4413 | context->root_level = 0; | 4481 | context->root_level = 0; |
4414 | } else if (is_long_mode(vcpu)) { | 4482 | } else if (is_long_mode(vcpu)) { |
4415 | context->nx = is_nx(vcpu); | 4483 | context->nx = is_nx(vcpu); |
4416 | context->root_level = PT64_ROOT_LEVEL; | 4484 | context->root_level = is_la57_mode(vcpu) ? |
4485 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
4417 | reset_rsvds_bits_mask(vcpu, context); | 4486 | reset_rsvds_bits_mask(vcpu, context); |
4418 | context->gva_to_gpa = paging64_gva_to_gpa; | 4487 | context->gva_to_gpa = paging64_gva_to_gpa; |
4419 | } else if (is_pae(vcpu)) { | 4488 | } else if (is_pae(vcpu)) { |
@@ -4470,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | |||
4470 | 4539 | ||
4471 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); | 4540 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); |
4472 | 4541 | ||
4473 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 4542 | context->shadow_root_level = PT64_ROOT_4LEVEL; |
4474 | 4543 | ||
4475 | context->nx = true; | 4544 | context->nx = true; |
4476 | context->ept_ad = accessed_dirty; | 4545 | context->ept_ad = accessed_dirty; |
@@ -4479,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | |||
4479 | context->sync_page = ept_sync_page; | 4548 | context->sync_page = ept_sync_page; |
4480 | context->invlpg = ept_invlpg; | 4549 | context->invlpg = ept_invlpg; |
4481 | context->update_pte = ept_update_pte; | 4550 | context->update_pte = ept_update_pte; |
4482 | context->root_level = context->shadow_root_level; | 4551 | context->root_level = PT64_ROOT_4LEVEL; |
4483 | context->root_hpa = INVALID_PAGE; | 4552 | context->root_hpa = INVALID_PAGE; |
4484 | context->direct_map = false; | 4553 | context->direct_map = false; |
4485 | context->base_role.ad_disabled = !accessed_dirty; | 4554 | context->base_role.ad_disabled = !accessed_dirty; |
@@ -4524,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
4524 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; | 4593 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; |
4525 | } else if (is_long_mode(vcpu)) { | 4594 | } else if (is_long_mode(vcpu)) { |
4526 | g_context->nx = is_nx(vcpu); | 4595 | g_context->nx = is_nx(vcpu); |
4527 | g_context->root_level = PT64_ROOT_LEVEL; | 4596 | g_context->root_level = is_la57_mode(vcpu) ? |
4597 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
4528 | reset_rsvds_bits_mask(vcpu, g_context); | 4598 | reset_rsvds_bits_mask(vcpu, g_context); |
4529 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 4599 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
4530 | } else if (is_pae(vcpu)) { | 4600 | } else if (is_pae(vcpu)) { |
@@ -4814,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
4814 | } | 4884 | } |
4815 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 4885 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
4816 | 4886 | ||
4817 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu) | 4887 | static int make_mmu_pages_available(struct kvm_vcpu *vcpu) |
4818 | { | 4888 | { |
4819 | LIST_HEAD(invalid_list); | 4889 | LIST_HEAD(invalid_list); |
4820 | 4890 | ||
4821 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) | 4891 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) |
4822 | return; | 4892 | return 0; |
4823 | 4893 | ||
4824 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { | 4894 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { |
4825 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) | 4895 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) |
@@ -4828,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) | |||
4828 | ++vcpu->kvm->stat.mmu_recycled; | 4898 | ++vcpu->kvm->stat.mmu_recycled; |
4829 | } | 4899 | } |
4830 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4900 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
4901 | |||
4902 | if (!kvm_mmu_available_pages(vcpu->kvm)) | ||
4903 | return -ENOSPC; | ||
4904 | return 0; | ||
4831 | } | 4905 | } |
4832 | 4906 | ||
4833 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | 4907 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, |
@@ -4835,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
4835 | { | 4909 | { |
4836 | int r, emulation_type = EMULTYPE_RETRY; | 4910 | int r, emulation_type = EMULTYPE_RETRY; |
4837 | enum emulation_result er; | 4911 | enum emulation_result er; |
4838 | bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); | 4912 | bool direct = vcpu->arch.mmu.direct_map; |
4913 | |||
4914 | /* With shadow page tables, fault_address contains a GVA or nGPA. */ | ||
4915 | if (vcpu->arch.mmu.direct_map) { | ||
4916 | vcpu->arch.gpa_available = true; | ||
4917 | vcpu->arch.gpa_val = cr2; | ||
4918 | } | ||
4839 | 4919 | ||
4840 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 4920 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
4841 | r = handle_mmio_page_fault(vcpu, cr2, direct); | 4921 | r = handle_mmio_page_fault(vcpu, cr2, direct); |
@@ -4847,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
4847 | return 1; | 4927 | return 1; |
4848 | if (r < 0) | 4928 | if (r < 0) |
4849 | return r; | 4929 | return r; |
4930 | /* Must be RET_MMIO_PF_INVALID. */ | ||
4850 | } | 4931 | } |
4851 | 4932 | ||
4852 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), | 4933 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), |
@@ -4862,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
4862 | * This can occur when using nested virtualization with nested | 4943 | * This can occur when using nested virtualization with nested |
4863 | * paging in both guests. If true, we simply unprotect the page | 4944 | * paging in both guests. If true, we simply unprotect the page |
4864 | * and resume the guest. | 4945 | * and resume the guest. |
4865 | * | ||
4866 | * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used | ||
4867 | * in PFERR_NEXT_GUEST_PAGE) | ||
4868 | */ | 4946 | */ |
4869 | if (error_code == PFERR_NESTED_GUEST_PAGE) { | 4947 | if (vcpu->arch.mmu.direct_map && |
4948 | (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { | ||
4870 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); | 4949 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); |
4871 | return 1; | 4950 | return 1; |
4872 | } | 4951 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4b9a3ae6b725..64a2dbd2b1af 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -37,7 +37,8 @@ | |||
37 | #define PT32_DIR_PSE36_MASK \ | 37 | #define PT32_DIR_PSE36_MASK \ |
38 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | 38 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) |
39 | 39 | ||
40 | #define PT64_ROOT_LEVEL 4 | 40 | #define PT64_ROOT_5LEVEL 5 |
41 | #define PT64_ROOT_4LEVEL 4 | ||
41 | #define PT32_ROOT_LEVEL 2 | 42 | #define PT32_ROOT_LEVEL 2 |
42 | #define PT32E_ROOT_LEVEL 3 | 43 | #define PT32E_ROOT_LEVEL 3 |
43 | 44 | ||
@@ -48,6 +49,9 @@ | |||
48 | 49 | ||
49 | static inline u64 rsvd_bits(int s, int e) | 50 | static inline u64 rsvd_bits(int s, int e) |
50 | { | 51 | { |
52 | if (e < s) | ||
53 | return 0; | ||
54 | |||
51 | return ((1ULL << (e - s + 1)) - 1) << s; | 55 | return ((1ULL << (e - s + 1)) - 1) << s; |
52 | } | 56 | } |
53 | 57 | ||
@@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); | |||
56 | void | 60 | void |
57 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 61 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
58 | 62 | ||
59 | /* | ||
60 | * Return values of handle_mmio_page_fault: | ||
61 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
62 | * directly. | ||
63 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
64 | * fault path update the mmio spte. | ||
65 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
66 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). | ||
67 | */ | ||
68 | enum { | ||
69 | RET_MMIO_PF_EMULATE = 1, | ||
70 | RET_MMIO_PF_INVALID = 2, | ||
71 | RET_MMIO_PF_RETRY = 0, | ||
72 | RET_MMIO_PF_BUG = -1 | ||
73 | }; | ||
74 | |||
75 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); | ||
76 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); | 63 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); |
77 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | 64 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, |
78 | bool accessed_dirty); | 65 | bool accessed_dirty); |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index dcce533d420c..d22ddbdf5e6e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) | |||
62 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 62 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
63 | return; | 63 | return; |
64 | 64 | ||
65 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 65 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
66 | hpa_t root = vcpu->arch.mmu.root_hpa; | 66 | hpa_t root = vcpu->arch.mmu.root_hpa; |
67 | 67 | ||
68 | sp = page_header(root); | 68 | sp = page_header(root); |
69 | __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL); | 69 | __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level); |
70 | return; | 70 | return; |
71 | } | 71 | } |
72 | 72 | ||
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 0149ac59c273..e9ea2d45ae66 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c | |||
@@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) | |||
130 | * enable MTRRs and it is obviously undesirable to run the | 130 | * enable MTRRs and it is obviously undesirable to run the |
131 | * guest entirely with UC memory and we use WB. | 131 | * guest entirely with UC memory and we use WB. |
132 | */ | 132 | */ |
133 | if (guest_cpuid_has_mtrr(vcpu)) | 133 | if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) |
134 | return MTRR_TYPE_UNCACHABLE; | 134 | return MTRR_TYPE_UNCACHABLE; |
135 | else | 135 | else |
136 | return MTRR_TYPE_WRBACK; | 136 | return MTRR_TYPE_WRBACK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b0454c7e4cff..86b68dc5a649 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
790 | &map_writable)) | 790 | &map_writable)) |
791 | return 0; | 791 | return 0; |
792 | 792 | ||
793 | if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, | 793 | if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) |
794 | walker.gfn, pfn, walker.pte_access, &r)) | ||
795 | return r; | 794 | return r; |
796 | 795 | ||
797 | /* | 796 | /* |
@@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
819 | goto out_unlock; | 818 | goto out_unlock; |
820 | 819 | ||
821 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 820 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
822 | make_mmu_pages_available(vcpu); | 821 | if (make_mmu_pages_available(vcpu) < 0) |
822 | goto out_unlock; | ||
823 | if (!force_pt_level) | 823 | if (!force_pt_level) |
824 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 824 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
825 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 825 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8dbd8dbc83eb..2c1cfe68a9af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO); | |||
280 | static int vls = true; | 280 | static int vls = true; |
281 | module_param(vls, int, 0444); | 281 | module_param(vls, int, 0444); |
282 | 282 | ||
283 | /* AVIC VM ID bit masks and lock */ | 283 | /* enable/disable Virtual GIF */ |
284 | static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); | 284 | static int vgif = true; |
285 | static DEFINE_SPINLOCK(avic_vm_id_lock); | 285 | module_param(vgif, int, 0444); |
286 | 286 | ||
287 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 287 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
288 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 288 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
@@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) | |||
479 | recalc_intercepts(svm); | 479 | recalc_intercepts(svm); |
480 | } | 480 | } |
481 | 481 | ||
482 | static inline bool vgif_enabled(struct vcpu_svm *svm) | ||
483 | { | ||
484 | return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); | ||
485 | } | ||
486 | |||
482 | static inline void enable_gif(struct vcpu_svm *svm) | 487 | static inline void enable_gif(struct vcpu_svm *svm) |
483 | { | 488 | { |
484 | svm->vcpu.arch.hflags |= HF_GIF_MASK; | 489 | if (vgif_enabled(svm)) |
490 | svm->vmcb->control.int_ctl |= V_GIF_MASK; | ||
491 | else | ||
492 | svm->vcpu.arch.hflags |= HF_GIF_MASK; | ||
485 | } | 493 | } |
486 | 494 | ||
487 | static inline void disable_gif(struct vcpu_svm *svm) | 495 | static inline void disable_gif(struct vcpu_svm *svm) |
488 | { | 496 | { |
489 | svm->vcpu.arch.hflags &= ~HF_GIF_MASK; | 497 | if (vgif_enabled(svm)) |
498 | svm->vmcb->control.int_ctl &= ~V_GIF_MASK; | ||
499 | else | ||
500 | svm->vcpu.arch.hflags &= ~HF_GIF_MASK; | ||
490 | } | 501 | } |
491 | 502 | ||
492 | static inline bool gif_set(struct vcpu_svm *svm) | 503 | static inline bool gif_set(struct vcpu_svm *svm) |
493 | { | 504 | { |
494 | return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); | 505 | if (vgif_enabled(svm)) |
506 | return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); | ||
507 | else | ||
508 | return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
495 | } | 509 | } |
496 | 510 | ||
497 | static unsigned long iopm_base; | 511 | static unsigned long iopm_base; |
@@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid) | |||
567 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); | 581 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
568 | } | 582 | } |
569 | 583 | ||
570 | static int get_npt_level(void) | 584 | static int get_npt_level(struct kvm_vcpu *vcpu) |
571 | { | 585 | { |
572 | #ifdef CONFIG_X86_64 | 586 | #ifdef CONFIG_X86_64 |
573 | return PT64_ROOT_LEVEL; | 587 | return PT64_ROOT_4LEVEL; |
574 | #else | 588 | #else |
575 | return PT32E_ROOT_LEVEL; | 589 | return PT32E_ROOT_LEVEL; |
576 | #endif | 590 | #endif |
@@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) | |||
641 | struct vcpu_svm *svm = to_svm(vcpu); | 655 | struct vcpu_svm *svm = to_svm(vcpu); |
642 | unsigned nr = vcpu->arch.exception.nr; | 656 | unsigned nr = vcpu->arch.exception.nr; |
643 | bool has_error_code = vcpu->arch.exception.has_error_code; | 657 | bool has_error_code = vcpu->arch.exception.has_error_code; |
644 | bool reinject = vcpu->arch.exception.reinject; | 658 | bool reinject = vcpu->arch.exception.injected; |
645 | u32 error_code = vcpu->arch.exception.error_code; | 659 | u32 error_code = vcpu->arch.exception.error_code; |
646 | 660 | ||
647 | /* | 661 | /* |
@@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) | |||
973 | static void disable_nmi_singlestep(struct vcpu_svm *svm) | 987 | static void disable_nmi_singlestep(struct vcpu_svm *svm) |
974 | { | 988 | { |
975 | svm->nmi_singlestep = false; | 989 | svm->nmi_singlestep = false; |
990 | |||
976 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { | 991 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { |
977 | /* Clear our flags if they were not set by the guest */ | 992 | /* Clear our flags if they were not set by the guest */ |
978 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) | 993 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) |
@@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) | |||
989 | */ | 1004 | */ |
990 | #define SVM_VM_DATA_HASH_BITS 8 | 1005 | #define SVM_VM_DATA_HASH_BITS 8 |
991 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); | 1006 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); |
1007 | static u32 next_vm_id = 0; | ||
1008 | static bool next_vm_id_wrapped = 0; | ||
992 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); | 1009 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); |
993 | 1010 | ||
994 | /* Note: | 1011 | /* Note: |
@@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void) | |||
1108 | } | 1125 | } |
1109 | } | 1126 | } |
1110 | 1127 | ||
1128 | if (vgif) { | ||
1129 | if (!boot_cpu_has(X86_FEATURE_VGIF)) | ||
1130 | vgif = false; | ||
1131 | else | ||
1132 | pr_info("Virtual GIF supported\n"); | ||
1133 | } | ||
1134 | |||
1111 | return 0; | 1135 | return 0; |
1112 | 1136 | ||
1113 | err: | 1137 | err: |
@@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1305 | svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; | 1329 | svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; |
1306 | } | 1330 | } |
1307 | 1331 | ||
1332 | if (vgif) { | ||
1333 | clr_intercept(svm, INTERCEPT_STGI); | ||
1334 | clr_intercept(svm, INTERCEPT_CLGI); | ||
1335 | svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; | ||
1336 | } | ||
1337 | |||
1308 | mark_all_dirty(svm->vmcb); | 1338 | mark_all_dirty(svm->vmcb); |
1309 | 1339 | ||
1310 | enable_gif(svm); | 1340 | enable_gif(svm); |
@@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) | |||
1387 | return 0; | 1417 | return 0; |
1388 | } | 1418 | } |
1389 | 1419 | ||
1390 | static inline int avic_get_next_vm_id(void) | ||
1391 | { | ||
1392 | int id; | ||
1393 | |||
1394 | spin_lock(&avic_vm_id_lock); | ||
1395 | |||
1396 | /* AVIC VM ID is one-based. */ | ||
1397 | id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1); | ||
1398 | if (id <= AVIC_VM_ID_MASK) | ||
1399 | __set_bit(id, avic_vm_id_bitmap); | ||
1400 | else | ||
1401 | id = -EAGAIN; | ||
1402 | |||
1403 | spin_unlock(&avic_vm_id_lock); | ||
1404 | return id; | ||
1405 | } | ||
1406 | |||
1407 | static inline int avic_free_vm_id(int id) | ||
1408 | { | ||
1409 | if (id <= 0 || id > AVIC_VM_ID_MASK) | ||
1410 | return -EINVAL; | ||
1411 | |||
1412 | spin_lock(&avic_vm_id_lock); | ||
1413 | __clear_bit(id, avic_vm_id_bitmap); | ||
1414 | spin_unlock(&avic_vm_id_lock); | ||
1415 | return 0; | ||
1416 | } | ||
1417 | |||
1418 | static void avic_vm_destroy(struct kvm *kvm) | 1420 | static void avic_vm_destroy(struct kvm *kvm) |
1419 | { | 1421 | { |
1420 | unsigned long flags; | 1422 | unsigned long flags; |
@@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm) | |||
1423 | if (!avic) | 1425 | if (!avic) |
1424 | return; | 1426 | return; |
1425 | 1427 | ||
1426 | avic_free_vm_id(vm_data->avic_vm_id); | ||
1427 | |||
1428 | if (vm_data->avic_logical_id_table_page) | 1428 | if (vm_data->avic_logical_id_table_page) |
1429 | __free_page(vm_data->avic_logical_id_table_page); | 1429 | __free_page(vm_data->avic_logical_id_table_page); |
1430 | if (vm_data->avic_physical_id_table_page) | 1430 | if (vm_data->avic_physical_id_table_page) |
@@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm) | |||
1438 | static int avic_vm_init(struct kvm *kvm) | 1438 | static int avic_vm_init(struct kvm *kvm) |
1439 | { | 1439 | { |
1440 | unsigned long flags; | 1440 | unsigned long flags; |
1441 | int vm_id, err = -ENOMEM; | 1441 | int err = -ENOMEM; |
1442 | struct kvm_arch *vm_data = &kvm->arch; | 1442 | struct kvm_arch *vm_data = &kvm->arch; |
1443 | struct page *p_page; | 1443 | struct page *p_page; |
1444 | struct page *l_page; | 1444 | struct page *l_page; |
1445 | struct kvm_arch *ka; | ||
1446 | u32 vm_id; | ||
1445 | 1447 | ||
1446 | if (!avic) | 1448 | if (!avic) |
1447 | return 0; | 1449 | return 0; |
1448 | 1450 | ||
1449 | vm_id = avic_get_next_vm_id(); | ||
1450 | if (vm_id < 0) | ||
1451 | return vm_id; | ||
1452 | vm_data->avic_vm_id = (u32)vm_id; | ||
1453 | |||
1454 | /* Allocating physical APIC ID table (4KB) */ | 1451 | /* Allocating physical APIC ID table (4KB) */ |
1455 | p_page = alloc_page(GFP_KERNEL); | 1452 | p_page = alloc_page(GFP_KERNEL); |
1456 | if (!p_page) | 1453 | if (!p_page) |
@@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm) | |||
1468 | clear_page(page_address(l_page)); | 1465 | clear_page(page_address(l_page)); |
1469 | 1466 | ||
1470 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | 1467 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); |
1468 | again: | ||
1469 | vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; | ||
1470 | if (vm_id == 0) { /* id is 1-based, zero is not okay */ | ||
1471 | next_vm_id_wrapped = 1; | ||
1472 | goto again; | ||
1473 | } | ||
1474 | /* Is it still in use? Only possible if wrapped at least once */ | ||
1475 | if (next_vm_id_wrapped) { | ||
1476 | hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { | ||
1477 | struct kvm *k2 = container_of(ka, struct kvm, arch); | ||
1478 | struct kvm_arch *vd2 = &k2->arch; | ||
1479 | if (vd2->avic_vm_id == vm_id) | ||
1480 | goto again; | ||
1481 | } | ||
1482 | } | ||
1483 | vm_data->avic_vm_id = vm_id; | ||
1471 | hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); | 1484 | hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); |
1472 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | 1485 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); |
1473 | 1486 | ||
@@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1580 | } | 1593 | } |
1581 | init_vmcb(svm); | 1594 | init_vmcb(svm); |
1582 | 1595 | ||
1583 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1596 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); |
1584 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1597 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
1585 | 1598 | ||
1586 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) | 1599 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) |
@@ -2384,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |||
2384 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 2397 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
2385 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; | 2398 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; |
2386 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; | 2399 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; |
2387 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); | 2400 | vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); |
2388 | reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); | 2401 | reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); |
2389 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 2402 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
2390 | } | 2403 | } |
@@ -3147,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm) | |||
3147 | if (nested_svm_check_permissions(svm)) | 3160 | if (nested_svm_check_permissions(svm)) |
3148 | return 1; | 3161 | return 1; |
3149 | 3162 | ||
3163 | /* | ||
3164 | * If VGIF is enabled, the STGI intercept is only added to | ||
3165 | * detect the opening of the NMI window; remove it now. | ||
3166 | */ | ||
3167 | if (vgif_enabled(svm)) | ||
3168 | clr_intercept(svm, INTERCEPT_STGI); | ||
3169 | |||
3150 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3170 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
3151 | ret = kvm_skip_emulated_instruction(&svm->vcpu); | 3171 | ret = kvm_skip_emulated_instruction(&svm->vcpu); |
3152 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3172 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
@@ -3744,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
3744 | 3764 | ||
3745 | static int pause_interception(struct vcpu_svm *svm) | 3765 | static int pause_interception(struct vcpu_svm *svm) |
3746 | { | 3766 | { |
3747 | kvm_vcpu_on_spin(&(svm->vcpu)); | 3767 | struct kvm_vcpu *vcpu = &svm->vcpu; |
3768 | bool in_kernel = (svm_get_cpl(vcpu) == 0); | ||
3769 | |||
3770 | kvm_vcpu_on_spin(vcpu, in_kernel); | ||
3748 | return 1; | 3771 | return 1; |
3749 | } | 3772 | } |
3750 | 3773 | ||
@@ -4228,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
4228 | 4251 | ||
4229 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); | 4252 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); |
4230 | 4253 | ||
4231 | vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); | ||
4232 | |||
4233 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) | 4254 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
4234 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 4255 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
4235 | if (npt_enabled) | 4256 | if (npt_enabled) |
@@ -4682,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
4682 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | 4703 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes |
4683 | * 1, because that's a separate STGI/VMRUN intercept. The next time we | 4704 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
4684 | * get that intercept, this function will be called again though and | 4705 | * get that intercept, this function will be called again though and |
4685 | * we'll get the vintr intercept. | 4706 | * we'll get the vintr intercept. However, if the vGIF feature is |
4707 | * enabled, the STGI interception will not occur. Enable the irq | ||
4708 | * window under the assumption that the hardware will set the GIF. | ||
4686 | */ | 4709 | */ |
4687 | if (gif_set(svm) && nested_svm_intr(svm)) { | 4710 | if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { |
4688 | svm_set_vintr(svm); | 4711 | svm_set_vintr(svm); |
4689 | svm_inject_irq(svm, 0x0); | 4712 | svm_inject_irq(svm, 0x0); |
4690 | } | 4713 | } |
@@ -4698,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
4698 | == HF_NMI_MASK) | 4721 | == HF_NMI_MASK) |
4699 | return; /* IRET will cause a vm exit */ | 4722 | return; /* IRET will cause a vm exit */ |
4700 | 4723 | ||
4701 | if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) | 4724 | if (!gif_set(svm)) { |
4725 | if (vgif_enabled(svm)) | ||
4726 | set_intercept(svm, INTERCEPT_STGI); | ||
4702 | return; /* STGI will cause a vm exit */ | 4727 | return; /* STGI will cause a vm exit */ |
4728 | } | ||
4703 | 4729 | ||
4704 | if (svm->nested.exit_required) | 4730 | if (svm->nested.exit_required) |
4705 | return; /* we're not going to run the guest yet */ | 4731 | return; /* we're not going to run the guest yet */ |
@@ -5071,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
5071 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | 5097 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) |
5072 | { | 5098 | { |
5073 | struct vcpu_svm *svm = to_svm(vcpu); | 5099 | struct vcpu_svm *svm = to_svm(vcpu); |
5074 | struct kvm_cpuid_entry2 *entry; | ||
5075 | 5100 | ||
5076 | /* Update nrips enabled cache */ | 5101 | /* Update nrips enabled cache */ |
5077 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); | 5102 | svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); |
5078 | 5103 | ||
5079 | if (!kvm_vcpu_apicv_active(vcpu)) | 5104 | if (!kvm_vcpu_apicv_active(vcpu)) |
5080 | return; | 5105 | return; |
5081 | 5106 | ||
5082 | entry = kvm_find_cpuid_entry(vcpu, 1, 0); | 5107 | guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); |
5083 | if (entry) | ||
5084 | entry->ecx &= ~bit(X86_FEATURE_X2APIC); | ||
5085 | } | 5108 | } |
5086 | 5109 | ||
5087 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 5110 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0a6cc6754ec5..8a202c49e2a0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio, | |||
151 | */ | 151 | */ |
152 | TRACE_EVENT(kvm_cpuid, | 152 | TRACE_EVENT(kvm_cpuid, |
153 | TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, | 153 | TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, |
154 | unsigned long rcx, unsigned long rdx), | 154 | unsigned long rcx, unsigned long rdx, bool found), |
155 | TP_ARGS(function, rax, rbx, rcx, rdx), | 155 | TP_ARGS(function, rax, rbx, rcx, rdx, found), |
156 | 156 | ||
157 | TP_STRUCT__entry( | 157 | TP_STRUCT__entry( |
158 | __field( unsigned int, function ) | 158 | __field( unsigned int, function ) |
@@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid, | |||
160 | __field( unsigned long, rbx ) | 160 | __field( unsigned long, rbx ) |
161 | __field( unsigned long, rcx ) | 161 | __field( unsigned long, rcx ) |
162 | __field( unsigned long, rdx ) | 162 | __field( unsigned long, rdx ) |
163 | __field( bool, found ) | ||
163 | ), | 164 | ), |
164 | 165 | ||
165 | TP_fast_assign( | 166 | TP_fast_assign( |
@@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid, | |||
168 | __entry->rbx = rbx; | 169 | __entry->rbx = rbx; |
169 | __entry->rcx = rcx; | 170 | __entry->rcx = rcx; |
170 | __entry->rdx = rdx; | 171 | __entry->rdx = rdx; |
172 | __entry->found = found; | ||
171 | ), | 173 | ), |
172 | 174 | ||
173 | TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx", | 175 | TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s", |
174 | __entry->function, __entry->rax, | 176 | __entry->function, __entry->rax, |
175 | __entry->rbx, __entry->rcx, __entry->rdx) | 177 | __entry->rbx, __entry->rcx, __entry->rdx, |
178 | __entry->found ? "found" : "not found") | ||
176 | ); | 179 | ); |
177 | 180 | ||
178 | #define AREG(x) { APIC_##x, "APIC_" #x } | 181 | #define AREG(x) { APIC_##x, "APIC_" #x } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70b90c0810d0..4253adef9044 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); | |||
122 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 122 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
123 | #define KVM_CR4_GUEST_OWNED_BITS \ | 123 | #define KVM_CR4_GUEST_OWNED_BITS \ |
124 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 124 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
125 | | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) | 125 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) |
126 | 126 | ||
127 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | 127 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
128 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | 128 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
@@ -243,11 +243,13 @@ struct __packed vmcs12 { | |||
243 | u64 virtual_apic_page_addr; | 243 | u64 virtual_apic_page_addr; |
244 | u64 apic_access_addr; | 244 | u64 apic_access_addr; |
245 | u64 posted_intr_desc_addr; | 245 | u64 posted_intr_desc_addr; |
246 | u64 vm_function_control; | ||
246 | u64 ept_pointer; | 247 | u64 ept_pointer; |
247 | u64 eoi_exit_bitmap0; | 248 | u64 eoi_exit_bitmap0; |
248 | u64 eoi_exit_bitmap1; | 249 | u64 eoi_exit_bitmap1; |
249 | u64 eoi_exit_bitmap2; | 250 | u64 eoi_exit_bitmap2; |
250 | u64 eoi_exit_bitmap3; | 251 | u64 eoi_exit_bitmap3; |
252 | u64 eptp_list_address; | ||
251 | u64 xss_exit_bitmap; | 253 | u64 xss_exit_bitmap; |
252 | u64 guest_physical_address; | 254 | u64 guest_physical_address; |
253 | u64 vmcs_link_pointer; | 255 | u64 vmcs_link_pointer; |
@@ -481,6 +483,7 @@ struct nested_vmx { | |||
481 | u64 nested_vmx_cr4_fixed0; | 483 | u64 nested_vmx_cr4_fixed0; |
482 | u64 nested_vmx_cr4_fixed1; | 484 | u64 nested_vmx_cr4_fixed1; |
483 | u64 nested_vmx_vmcs_enum; | 485 | u64 nested_vmx_vmcs_enum; |
486 | u64 nested_vmx_vmfunc_controls; | ||
484 | }; | 487 | }; |
485 | 488 | ||
486 | #define POSTED_INTR_ON 0 | 489 | #define POSTED_INTR_ON 0 |
@@ -573,6 +576,8 @@ struct vcpu_vmx { | |||
573 | #endif | 576 | #endif |
574 | u32 vm_entry_controls_shadow; | 577 | u32 vm_entry_controls_shadow; |
575 | u32 vm_exit_controls_shadow; | 578 | u32 vm_exit_controls_shadow; |
579 | u32 secondary_exec_control; | ||
580 | |||
576 | /* | 581 | /* |
577 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 582 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
578 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 583 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
@@ -761,11 +766,13 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
761 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), | 766 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), |
762 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), | 767 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), |
763 | FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), | 768 | FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), |
769 | FIELD64(VM_FUNCTION_CONTROL, vm_function_control), | ||
764 | FIELD64(EPT_POINTER, ept_pointer), | 770 | FIELD64(EPT_POINTER, ept_pointer), |
765 | FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), | 771 | FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), |
766 | FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), | 772 | FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), |
767 | FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), | 773 | FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), |
768 | FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), | 774 | FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), |
775 | FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), | ||
769 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), | 776 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), |
770 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), | 777 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), |
771 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), | 778 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), |
@@ -889,25 +896,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | |||
889 | return to_vmx(vcpu)->nested.cached_vmcs12; | 896 | return to_vmx(vcpu)->nested.cached_vmcs12; |
890 | } | 897 | } |
891 | 898 | ||
892 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) | ||
893 | { | ||
894 | struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); | ||
895 | if (is_error_page(page)) | ||
896 | return NULL; | ||
897 | |||
898 | return page; | ||
899 | } | ||
900 | |||
901 | static void nested_release_page(struct page *page) | ||
902 | { | ||
903 | kvm_release_page_dirty(page); | ||
904 | } | ||
905 | |||
906 | static void nested_release_page_clean(struct page *page) | ||
907 | { | ||
908 | kvm_release_page_clean(page); | ||
909 | } | ||
910 | |||
911 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); | 899 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); |
912 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | 900 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); |
913 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); | 901 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); |
@@ -1212,6 +1200,16 @@ static inline bool cpu_has_vmx_ept_4levels(void) | |||
1212 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | 1200 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; |
1213 | } | 1201 | } |
1214 | 1202 | ||
1203 | static inline bool cpu_has_vmx_ept_mt_wb(void) | ||
1204 | { | ||
1205 | return vmx_capability.ept & VMX_EPTP_WB_BIT; | ||
1206 | } | ||
1207 | |||
1208 | static inline bool cpu_has_vmx_ept_5levels(void) | ||
1209 | { | ||
1210 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT; | ||
1211 | } | ||
1212 | |||
1215 | static inline bool cpu_has_vmx_ept_ad_bits(void) | 1213 | static inline bool cpu_has_vmx_ept_ad_bits(void) |
1216 | { | 1214 | { |
1217 | return vmx_capability.ept & VMX_EPT_AD_BIT; | 1215 | return vmx_capability.ept & VMX_EPT_AD_BIT; |
@@ -1317,6 +1315,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) | |||
1317 | SECONDARY_EXEC_TSC_SCALING; | 1315 | SECONDARY_EXEC_TSC_SCALING; |
1318 | } | 1316 | } |
1319 | 1317 | ||
1318 | static inline bool cpu_has_vmx_vmfunc(void) | ||
1319 | { | ||
1320 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
1321 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
1322 | } | ||
1323 | |||
1320 | static inline bool report_flexpriority(void) | 1324 | static inline bool report_flexpriority(void) |
1321 | { | 1325 | { |
1322 | return flexpriority_enabled; | 1326 | return flexpriority_enabled; |
@@ -1357,8 +1361,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | |||
1357 | 1361 | ||
1358 | static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) | 1362 | static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) |
1359 | { | 1363 | { |
1360 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && | 1364 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); |
1361 | vmx_xsaves_supported(); | ||
1362 | } | 1365 | } |
1363 | 1366 | ||
1364 | static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) | 1367 | static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) |
@@ -1391,6 +1394,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) | |||
1391 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; | 1394 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; |
1392 | } | 1395 | } |
1393 | 1396 | ||
1397 | static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) | ||
1398 | { | ||
1399 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); | ||
1400 | } | ||
1401 | |||
1402 | static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) | ||
1403 | { | ||
1404 | return nested_cpu_has_vmfunc(vmcs12) && | ||
1405 | (vmcs12->vm_function_control & | ||
1406 | VMX_VMFUNC_EPTP_SWITCHING); | ||
1407 | } | ||
1408 | |||
1394 | static inline bool is_nmi(u32 intr_info) | 1409 | static inline bool is_nmi(u32 intr_info) |
1395 | { | 1410 | { |
1396 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1411 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
@@ -2450,15 +2465,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, | |||
2450 | * KVM wants to inject page-faults which it got to the guest. This function | 2465 | * KVM wants to inject page-faults which it got to the guest. This function |
2451 | * checks whether in a nested guest, we need to inject them to L1 or L2. | 2466 | * checks whether in a nested guest, we need to inject them to L1 or L2. |
2452 | */ | 2467 | */ |
2453 | static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) | 2468 | static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) |
2454 | { | 2469 | { |
2455 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 2470 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
2456 | unsigned int nr = vcpu->arch.exception.nr; | 2471 | unsigned int nr = vcpu->arch.exception.nr; |
2457 | 2472 | ||
2458 | if (nr == PF_VECTOR) { | 2473 | if (nr == PF_VECTOR) { |
2459 | if (vcpu->arch.exception.nested_apf) { | 2474 | if (vcpu->arch.exception.nested_apf) { |
2460 | nested_vmx_inject_exception_vmexit(vcpu, | 2475 | *exit_qual = vcpu->arch.apf.nested_apf_token; |
2461 | vcpu->arch.apf.nested_apf_token); | ||
2462 | return 1; | 2476 | return 1; |
2463 | } | 2477 | } |
2464 | /* | 2478 | /* |
@@ -2472,16 +2486,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) | |||
2472 | */ | 2486 | */ |
2473 | if (nested_vmx_is_page_fault_vmexit(vmcs12, | 2487 | if (nested_vmx_is_page_fault_vmexit(vmcs12, |
2474 | vcpu->arch.exception.error_code)) { | 2488 | vcpu->arch.exception.error_code)) { |
2475 | nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); | 2489 | *exit_qual = vcpu->arch.cr2; |
2476 | return 1; | 2490 | return 1; |
2477 | } | 2491 | } |
2478 | } else { | 2492 | } else { |
2479 | unsigned long exit_qual = 0; | ||
2480 | if (nr == DB_VECTOR) | ||
2481 | exit_qual = vcpu->arch.dr6; | ||
2482 | |||
2483 | if (vmcs12->exception_bitmap & (1u << nr)) { | 2493 | if (vmcs12->exception_bitmap & (1u << nr)) { |
2484 | nested_vmx_inject_exception_vmexit(vcpu, exit_qual); | 2494 | if (nr == DB_VECTOR) |
2495 | *exit_qual = vcpu->arch.dr6; | ||
2496 | else | ||
2497 | *exit_qual = 0; | ||
2485 | return 1; | 2498 | return 1; |
2486 | } | 2499 | } |
2487 | } | 2500 | } |
@@ -2494,14 +2507,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) | |||
2494 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2507 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2495 | unsigned nr = vcpu->arch.exception.nr; | 2508 | unsigned nr = vcpu->arch.exception.nr; |
2496 | bool has_error_code = vcpu->arch.exception.has_error_code; | 2509 | bool has_error_code = vcpu->arch.exception.has_error_code; |
2497 | bool reinject = vcpu->arch.exception.reinject; | ||
2498 | u32 error_code = vcpu->arch.exception.error_code; | 2510 | u32 error_code = vcpu->arch.exception.error_code; |
2499 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 2511 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
2500 | 2512 | ||
2501 | if (!reinject && is_guest_mode(vcpu) && | ||
2502 | nested_vmx_check_exception(vcpu)) | ||
2503 | return; | ||
2504 | |||
2505 | if (has_error_code) { | 2513 | if (has_error_code) { |
2506 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | 2514 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); |
2507 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 2515 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
@@ -2600,7 +2608,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
2600 | if (index >= 0) | 2608 | if (index >= 0) |
2601 | move_msr_up(vmx, index, save_nmsrs++); | 2609 | move_msr_up(vmx, index, save_nmsrs++); |
2602 | index = __find_msr_index(vmx, MSR_TSC_AUX); | 2610 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
2603 | if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) | 2611 | if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) |
2604 | move_msr_up(vmx, index, save_nmsrs++); | 2612 | move_msr_up(vmx, index, save_nmsrs++); |
2605 | /* | 2613 | /* |
2606 | * MSR_STAR is only needed on long mode guests, and only | 2614 | * MSR_STAR is only needed on long mode guests, and only |
@@ -2660,12 +2668,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
2660 | } | 2668 | } |
2661 | } | 2669 | } |
2662 | 2670 | ||
2663 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | ||
2664 | { | ||
2665 | struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
2666 | return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31))); | ||
2667 | } | ||
2668 | |||
2669 | /* | 2671 | /* |
2670 | * nested_vmx_allowed() checks whether a guest should be allowed to use VMX | 2672 | * nested_vmx_allowed() checks whether a guest should be allowed to use VMX |
2671 | * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for | 2673 | * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for |
@@ -2674,7 +2676,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | |||
2674 | */ | 2676 | */ |
2675 | static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | 2677 | static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) |
2676 | { | 2678 | { |
2677 | return nested && guest_cpuid_has_vmx(vcpu); | 2679 | return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); |
2678 | } | 2680 | } |
2679 | 2681 | ||
2680 | /* | 2682 | /* |
@@ -2797,21 +2799,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2797 | vmx->nested.nested_vmx_procbased_ctls_low &= | 2799 | vmx->nested.nested_vmx_procbased_ctls_low &= |
2798 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); | 2800 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); |
2799 | 2801 | ||
2800 | /* secondary cpu-based controls */ | 2802 | /* |
2803 | * secondary cpu-based controls. Do not include those that | ||
2804 | * depend on CPUID bits, they are added later by vmx_cpuid_update. | ||
2805 | */ | ||
2801 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 2806 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
2802 | vmx->nested.nested_vmx_secondary_ctls_low, | 2807 | vmx->nested.nested_vmx_secondary_ctls_low, |
2803 | vmx->nested.nested_vmx_secondary_ctls_high); | 2808 | vmx->nested.nested_vmx_secondary_ctls_high); |
2804 | vmx->nested.nested_vmx_secondary_ctls_low = 0; | 2809 | vmx->nested.nested_vmx_secondary_ctls_low = 0; |
2805 | vmx->nested.nested_vmx_secondary_ctls_high &= | 2810 | vmx->nested.nested_vmx_secondary_ctls_high &= |
2806 | SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | | ||
2807 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2811 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2808 | SECONDARY_EXEC_RDTSCP | | ||
2809 | SECONDARY_EXEC_DESC | | 2812 | SECONDARY_EXEC_DESC | |
2810 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2813 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
2811 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2814 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2812 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2815 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2813 | SECONDARY_EXEC_WBINVD_EXITING | | 2816 | SECONDARY_EXEC_WBINVD_EXITING; |
2814 | SECONDARY_EXEC_XSAVES; | ||
2815 | 2817 | ||
2816 | if (enable_ept) { | 2818 | if (enable_ept) { |
2817 | /* nested EPT: emulate EPT also to L1 */ | 2819 | /* nested EPT: emulate EPT also to L1 */ |
@@ -2834,6 +2836,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
2834 | } else | 2836 | } else |
2835 | vmx->nested.nested_vmx_ept_caps = 0; | 2837 | vmx->nested.nested_vmx_ept_caps = 0; |
2836 | 2838 | ||
2839 | if (cpu_has_vmx_vmfunc()) { | ||
2840 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
2841 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
2842 | /* | ||
2843 | * Advertise EPTP switching unconditionally | ||
2844 | * since we emulate it | ||
2845 | */ | ||
2846 | vmx->nested.nested_vmx_vmfunc_controls = | ||
2847 | VMX_VMFUNC_EPTP_SWITCHING; | ||
2848 | } | ||
2849 | |||
2837 | /* | 2850 | /* |
2838 | * Old versions of KVM use the single-context version without | 2851 | * Old versions of KVM use the single-context version without |
2839 | * checking for support, so declare that it is supported even | 2852 | * checking for support, so declare that it is supported even |
@@ -3203,6 +3216,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
3203 | *pdata = vmx->nested.nested_vmx_ept_caps | | 3216 | *pdata = vmx->nested.nested_vmx_ept_caps | |
3204 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); | 3217 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); |
3205 | break; | 3218 | break; |
3219 | case MSR_IA32_VMX_VMFUNC: | ||
3220 | *pdata = vmx->nested.nested_vmx_vmfunc_controls; | ||
3221 | break; | ||
3206 | default: | 3222 | default: |
3207 | return 1; | 3223 | return 1; |
3208 | } | 3224 | } |
@@ -3256,7 +3272,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3256 | break; | 3272 | break; |
3257 | case MSR_IA32_BNDCFGS: | 3273 | case MSR_IA32_BNDCFGS: |
3258 | if (!kvm_mpx_supported() || | 3274 | if (!kvm_mpx_supported() || |
3259 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | 3275 | (!msr_info->host_initiated && |
3276 | !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) | ||
3260 | return 1; | 3277 | return 1; |
3261 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); | 3278 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); |
3262 | break; | 3279 | break; |
@@ -3280,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3280 | msr_info->data = vcpu->arch.ia32_xss; | 3297 | msr_info->data = vcpu->arch.ia32_xss; |
3281 | break; | 3298 | break; |
3282 | case MSR_TSC_AUX: | 3299 | case MSR_TSC_AUX: |
3283 | if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) | 3300 | if (!msr_info->host_initiated && |
3301 | !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) | ||
3284 | return 1; | 3302 | return 1; |
3285 | /* Otherwise falls through */ | 3303 | /* Otherwise falls through */ |
3286 | default: | 3304 | default: |
@@ -3339,9 +3357,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3339 | break; | 3357 | break; |
3340 | case MSR_IA32_BNDCFGS: | 3358 | case MSR_IA32_BNDCFGS: |
3341 | if (!kvm_mpx_supported() || | 3359 | if (!kvm_mpx_supported() || |
3342 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | 3360 | (!msr_info->host_initiated && |
3361 | !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) | ||
3343 | return 1; | 3362 | return 1; |
3344 | if (is_noncanonical_address(data & PAGE_MASK) || | 3363 | if (is_noncanonical_address(data & PAGE_MASK, vcpu) || |
3345 | (data & MSR_IA32_BNDCFGS_RSVD)) | 3364 | (data & MSR_IA32_BNDCFGS_RSVD)) |
3346 | return 1; | 3365 | return 1; |
3347 | vmcs_write64(GUEST_BNDCFGS, data); | 3366 | vmcs_write64(GUEST_BNDCFGS, data); |
@@ -3402,7 +3421,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
3402 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | 3421 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); |
3403 | break; | 3422 | break; |
3404 | case MSR_TSC_AUX: | 3423 | case MSR_TSC_AUX: |
3405 | if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) | 3424 | if (!msr_info->host_initiated && |
3425 | !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) | ||
3406 | return 1; | 3426 | return 1; |
3407 | /* Check reserved bit, higher 32 bits should be zero */ | 3427 | /* Check reserved bit, higher 32 bits should be zero */ |
3408 | if ((data >> 32) != 0) | 3428 | if ((data >> 32) != 0) |
@@ -3639,8 +3659,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3639 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 3659 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
3640 | SECONDARY_EXEC_SHADOW_VMCS | | 3660 | SECONDARY_EXEC_SHADOW_VMCS | |
3641 | SECONDARY_EXEC_XSAVES | | 3661 | SECONDARY_EXEC_XSAVES | |
3662 | SECONDARY_EXEC_RDSEED | | ||
3663 | SECONDARY_EXEC_RDRAND | | ||
3642 | SECONDARY_EXEC_ENABLE_PML | | 3664 | SECONDARY_EXEC_ENABLE_PML | |
3643 | SECONDARY_EXEC_TSC_SCALING; | 3665 | SECONDARY_EXEC_TSC_SCALING | |
3666 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
3644 | if (adjust_vmx_controls(min2, opt2, | 3667 | if (adjust_vmx_controls(min2, opt2, |
3645 | MSR_IA32_VMX_PROCBASED_CTLS2, | 3668 | MSR_IA32_VMX_PROCBASED_CTLS2, |
3646 | &_cpu_based_2nd_exec_control) < 0) | 3669 | &_cpu_based_2nd_exec_control) < 0) |
@@ -4272,16 +4295,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
4272 | vmx->emulation_required = emulation_required(vcpu); | 4295 | vmx->emulation_required = emulation_required(vcpu); |
4273 | } | 4296 | } |
4274 | 4297 | ||
4298 | static int get_ept_level(struct kvm_vcpu *vcpu) | ||
4299 | { | ||
4300 | if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) | ||
4301 | return 5; | ||
4302 | return 4; | ||
4303 | } | ||
4304 | |||
4275 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) | 4305 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) |
4276 | { | 4306 | { |
4277 | u64 eptp; | 4307 | u64 eptp = VMX_EPTP_MT_WB; |
4308 | |||
4309 | eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; | ||
4278 | 4310 | ||
4279 | /* TODO write the value reading from MSR */ | ||
4280 | eptp = VMX_EPT_DEFAULT_MT | | ||
4281 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | ||
4282 | if (enable_ept_ad_bits && | 4311 | if (enable_ept_ad_bits && |
4283 | (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) | 4312 | (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) |
4284 | eptp |= VMX_EPT_AD_ENABLE_BIT; | 4313 | eptp |= VMX_EPTP_AD_ENABLE_BIT; |
4285 | eptp |= (root_hpa & PAGE_MASK); | 4314 | eptp |= (root_hpa & PAGE_MASK); |
4286 | 4315 | ||
4287 | return eptp; | 4316 | return eptp; |
@@ -5243,10 +5272,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
5243 | return exec_control; | 5272 | return exec_control; |
5244 | } | 5273 | } |
5245 | 5274 | ||
5246 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 5275 | static bool vmx_rdrand_supported(void) |
5247 | { | 5276 | { |
5277 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
5278 | SECONDARY_EXEC_RDRAND; | ||
5279 | } | ||
5280 | |||
5281 | static bool vmx_rdseed_supported(void) | ||
5282 | { | ||
5283 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
5284 | SECONDARY_EXEC_RDSEED; | ||
5285 | } | ||
5286 | |||
5287 | static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | ||
5288 | { | ||
5289 | struct kvm_vcpu *vcpu = &vmx->vcpu; | ||
5290 | |||
5248 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 5291 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
5249 | if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) | 5292 | if (!cpu_need_virtualize_apic_accesses(vcpu)) |
5250 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 5293 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
5251 | if (vmx->vpid == 0) | 5294 | if (vmx->vpid == 0) |
5252 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 5295 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
@@ -5260,7 +5303,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
5260 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 5303 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
5261 | if (!ple_gap) | 5304 | if (!ple_gap) |
5262 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 5305 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
5263 | if (!kvm_vcpu_apicv_active(&vmx->vcpu)) | 5306 | if (!kvm_vcpu_apicv_active(vcpu)) |
5264 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 5307 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
5265 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 5308 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
5266 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 5309 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
@@ -5274,7 +5317,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
5274 | if (!enable_pml) | 5317 | if (!enable_pml) |
5275 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | 5318 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
5276 | 5319 | ||
5277 | return exec_control; | 5320 | if (vmx_xsaves_supported()) { |
5321 | /* Exposing XSAVES only when XSAVE is exposed */ | ||
5322 | bool xsaves_enabled = | ||
5323 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||
5324 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); | ||
5325 | |||
5326 | if (!xsaves_enabled) | ||
5327 | exec_control &= ~SECONDARY_EXEC_XSAVES; | ||
5328 | |||
5329 | if (nested) { | ||
5330 | if (xsaves_enabled) | ||
5331 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
5332 | SECONDARY_EXEC_XSAVES; | ||
5333 | else | ||
5334 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
5335 | ~SECONDARY_EXEC_XSAVES; | ||
5336 | } | ||
5337 | } | ||
5338 | |||
5339 | if (vmx_rdtscp_supported()) { | ||
5340 | bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); | ||
5341 | if (!rdtscp_enabled) | ||
5342 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
5343 | |||
5344 | if (nested) { | ||
5345 | if (rdtscp_enabled) | ||
5346 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
5347 | SECONDARY_EXEC_RDTSCP; | ||
5348 | else | ||
5349 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
5350 | ~SECONDARY_EXEC_RDTSCP; | ||
5351 | } | ||
5352 | } | ||
5353 | |||
5354 | if (vmx_invpcid_supported()) { | ||
5355 | /* Exposing INVPCID only when PCID is exposed */ | ||
5356 | bool invpcid_enabled = | ||
5357 | guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && | ||
5358 | guest_cpuid_has(vcpu, X86_FEATURE_PCID); | ||
5359 | |||
5360 | if (!invpcid_enabled) { | ||
5361 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
5362 | guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); | ||
5363 | } | ||
5364 | |||
5365 | if (nested) { | ||
5366 | if (invpcid_enabled) | ||
5367 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
5368 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
5369 | else | ||
5370 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
5371 | ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
5372 | } | ||
5373 | } | ||
5374 | |||
5375 | if (vmx_rdrand_supported()) { | ||
5376 | bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); | ||
5377 | if (rdrand_enabled) | ||
5378 | exec_control &= ~SECONDARY_EXEC_RDRAND; | ||
5379 | |||
5380 | if (nested) { | ||
5381 | if (rdrand_enabled) | ||
5382 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
5383 | SECONDARY_EXEC_RDRAND; | ||
5384 | else | ||
5385 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
5386 | ~SECONDARY_EXEC_RDRAND; | ||
5387 | } | ||
5388 | } | ||
5389 | |||
5390 | if (vmx_rdseed_supported()) { | ||
5391 | bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); | ||
5392 | if (rdseed_enabled) | ||
5393 | exec_control &= ~SECONDARY_EXEC_RDSEED; | ||
5394 | |||
5395 | if (nested) { | ||
5396 | if (rdseed_enabled) | ||
5397 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
5398 | SECONDARY_EXEC_RDSEED; | ||
5399 | else | ||
5400 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
5401 | ~SECONDARY_EXEC_RDSEED; | ||
5402 | } | ||
5403 | } | ||
5404 | |||
5405 | vmx->secondary_exec_control = exec_control; | ||
5278 | } | 5406 | } |
5279 | 5407 | ||
5280 | static void ept_set_mmio_spte_mask(void) | 5408 | static void ept_set_mmio_spte_mask(void) |
@@ -5318,8 +5446,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
5318 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 5446 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
5319 | 5447 | ||
5320 | if (cpu_has_secondary_exec_ctrls()) { | 5448 | if (cpu_has_secondary_exec_ctrls()) { |
5449 | vmx_compute_secondary_exec_control(vmx); | ||
5321 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 5450 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, |
5322 | vmx_secondary_exec_control(vmx)); | 5451 | vmx->secondary_exec_control); |
5323 | } | 5452 | } |
5324 | 5453 | ||
5325 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { | 5454 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { |
@@ -5357,6 +5486,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
5357 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ | 5486 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ |
5358 | #endif | 5487 | #endif |
5359 | 5488 | ||
5489 | if (cpu_has_vmx_vmfunc()) | ||
5490 | vmcs_write64(VM_FUNCTION_CONTROL, 0); | ||
5491 | |||
5360 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 5492 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
5361 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 5493 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
5362 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | 5494 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); |
@@ -5835,6 +5967,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) | |||
5835 | static int handle_triple_fault(struct kvm_vcpu *vcpu) | 5967 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
5836 | { | 5968 | { |
5837 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 5969 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
5970 | vcpu->mmio_needed = 0; | ||
5838 | return 0; | 5971 | return 0; |
5839 | } | 5972 | } |
5840 | 5973 | ||
@@ -6330,7 +6463,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6330 | { | 6463 | { |
6331 | unsigned long exit_qualification; | 6464 | unsigned long exit_qualification; |
6332 | gpa_t gpa; | 6465 | gpa_t gpa; |
6333 | u32 error_code; | 6466 | u64 error_code; |
6334 | 6467 | ||
6335 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6468 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6336 | 6469 | ||
@@ -6362,9 +6495,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
6362 | EPT_VIOLATION_EXECUTABLE)) | 6495 | EPT_VIOLATION_EXECUTABLE)) |
6363 | ? PFERR_PRESENT_MASK : 0; | 6496 | ? PFERR_PRESENT_MASK : 0; |
6364 | 6497 | ||
6365 | vcpu->arch.gpa_available = true; | 6498 | error_code |= (exit_qualification & 0x100) != 0 ? |
6366 | vcpu->arch.exit_qualification = exit_qualification; | 6499 | PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; |
6367 | 6500 | ||
6501 | vcpu->arch.exit_qualification = exit_qualification; | ||
6368 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 6502 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
6369 | } | 6503 | } |
6370 | 6504 | ||
@@ -6373,23 +6507,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
6373 | int ret; | 6507 | int ret; |
6374 | gpa_t gpa; | 6508 | gpa_t gpa; |
6375 | 6509 | ||
6510 | /* | ||
6511 | * A nested guest cannot optimize MMIO vmexits, because we have an | ||
6512 | * nGPA here instead of the required GPA. | ||
6513 | */ | ||
6376 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 6514 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
6377 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | 6515 | if (!is_guest_mode(vcpu) && |
6516 | !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | ||
6378 | trace_kvm_fast_mmio(gpa); | 6517 | trace_kvm_fast_mmio(gpa); |
6379 | return kvm_skip_emulated_instruction(vcpu); | 6518 | return kvm_skip_emulated_instruction(vcpu); |
6380 | } | 6519 | } |
6381 | 6520 | ||
6382 | ret = handle_mmio_page_fault(vcpu, gpa, true); | 6521 | ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); |
6383 | vcpu->arch.gpa_available = true; | 6522 | if (ret >= 0) |
6384 | if (likely(ret == RET_MMIO_PF_EMULATE)) | 6523 | return ret; |
6385 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | ||
6386 | EMULATE_DONE; | ||
6387 | |||
6388 | if (unlikely(ret == RET_MMIO_PF_INVALID)) | ||
6389 | return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); | ||
6390 | |||
6391 | if (unlikely(ret == RET_MMIO_PF_RETRY)) | ||
6392 | return 1; | ||
6393 | 6524 | ||
6394 | /* It is the real ept misconfig */ | 6525 | /* It is the real ept misconfig */ |
6395 | WARN_ON(1); | 6526 | WARN_ON(1); |
@@ -6611,7 +6742,8 @@ static __init int hardware_setup(void) | |||
6611 | init_vmcs_shadow_fields(); | 6742 | init_vmcs_shadow_fields(); |
6612 | 6743 | ||
6613 | if (!cpu_has_vmx_ept() || | 6744 | if (!cpu_has_vmx_ept() || |
6614 | !cpu_has_vmx_ept_4levels()) { | 6745 | !cpu_has_vmx_ept_4levels() || |
6746 | !cpu_has_vmx_ept_mt_wb()) { | ||
6615 | enable_ept = 0; | 6747 | enable_ept = 0; |
6616 | enable_unrestricted_guest = 0; | 6748 | enable_unrestricted_guest = 0; |
6617 | enable_ept_ad_bits = 0; | 6749 | enable_ept_ad_bits = 0; |
@@ -6754,7 +6886,13 @@ static int handle_pause(struct kvm_vcpu *vcpu) | |||
6754 | if (ple_gap) | 6886 | if (ple_gap) |
6755 | grow_ple_window(vcpu); | 6887 | grow_ple_window(vcpu); |
6756 | 6888 | ||
6757 | kvm_vcpu_on_spin(vcpu); | 6889 | /* |
6890 | * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" | ||
6891 | * VM-execution control is ignored if CPL > 0. OTOH, KVM | ||
6892 | * never set PAUSE_EXITING and just set PLE if supported, | ||
6893 | * so the vcpu must be CPL=0 if it gets a PAUSE exit. | ||
6894 | */ | ||
6895 | kvm_vcpu_on_spin(vcpu, true); | ||
6758 | return kvm_skip_emulated_instruction(vcpu); | 6896 | return kvm_skip_emulated_instruction(vcpu); |
6759 | } | 6897 | } |
6760 | 6898 | ||
@@ -6769,6 +6907,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu) | |||
6769 | return handle_nop(vcpu); | 6907 | return handle_nop(vcpu); |
6770 | } | 6908 | } |
6771 | 6909 | ||
6910 | static int handle_invalid_op(struct kvm_vcpu *vcpu) | ||
6911 | { | ||
6912 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6913 | return 1; | ||
6914 | } | ||
6915 | |||
6772 | static int handle_monitor_trap(struct kvm_vcpu *vcpu) | 6916 | static int handle_monitor_trap(struct kvm_vcpu *vcpu) |
6773 | { | 6917 | { |
6774 | return 1; | 6918 | return 1; |
@@ -6985,7 +7129,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | |||
6985 | * non-canonical form. This is the only check on the memory | 7129 | * non-canonical form. This is the only check on the memory |
6986 | * destination for long mode! | 7130 | * destination for long mode! |
6987 | */ | 7131 | */ |
6988 | exn = is_noncanonical_address(*ret); | 7132 | exn = is_noncanonical_address(*ret, vcpu); |
6989 | } else if (is_protmode(vcpu)) { | 7133 | } else if (is_protmode(vcpu)) { |
6990 | /* Protected mode: apply checks for segment validity in the | 7134 | /* Protected mode: apply checks for segment validity in the |
6991 | * following order: | 7135 | * following order: |
@@ -7149,19 +7293,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
7149 | return kvm_skip_emulated_instruction(vcpu); | 7293 | return kvm_skip_emulated_instruction(vcpu); |
7150 | } | 7294 | } |
7151 | 7295 | ||
7152 | page = nested_get_page(vcpu, vmptr); | 7296 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); |
7153 | if (page == NULL) { | 7297 | if (is_error_page(page)) { |
7154 | nested_vmx_failInvalid(vcpu); | 7298 | nested_vmx_failInvalid(vcpu); |
7155 | return kvm_skip_emulated_instruction(vcpu); | 7299 | return kvm_skip_emulated_instruction(vcpu); |
7156 | } | 7300 | } |
7157 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { | 7301 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { |
7158 | kunmap(page); | 7302 | kunmap(page); |
7159 | nested_release_page_clean(page); | 7303 | kvm_release_page_clean(page); |
7160 | nested_vmx_failInvalid(vcpu); | 7304 | nested_vmx_failInvalid(vcpu); |
7161 | return kvm_skip_emulated_instruction(vcpu); | 7305 | return kvm_skip_emulated_instruction(vcpu); |
7162 | } | 7306 | } |
7163 | kunmap(page); | 7307 | kunmap(page); |
7164 | nested_release_page_clean(page); | 7308 | kvm_release_page_clean(page); |
7165 | 7309 | ||
7166 | vmx->nested.vmxon_ptr = vmptr; | 7310 | vmx->nested.vmxon_ptr = vmptr; |
7167 | ret = enter_vmx_operation(vcpu); | 7311 | ret = enter_vmx_operation(vcpu); |
@@ -7242,16 +7386,16 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
7242 | kfree(vmx->nested.cached_vmcs12); | 7386 | kfree(vmx->nested.cached_vmcs12); |
7243 | /* Unpin physical memory we referred to in current vmcs02 */ | 7387 | /* Unpin physical memory we referred to in current vmcs02 */ |
7244 | if (vmx->nested.apic_access_page) { | 7388 | if (vmx->nested.apic_access_page) { |
7245 | nested_release_page(vmx->nested.apic_access_page); | 7389 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
7246 | vmx->nested.apic_access_page = NULL; | 7390 | vmx->nested.apic_access_page = NULL; |
7247 | } | 7391 | } |
7248 | if (vmx->nested.virtual_apic_page) { | 7392 | if (vmx->nested.virtual_apic_page) { |
7249 | nested_release_page(vmx->nested.virtual_apic_page); | 7393 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
7250 | vmx->nested.virtual_apic_page = NULL; | 7394 | vmx->nested.virtual_apic_page = NULL; |
7251 | } | 7395 | } |
7252 | if (vmx->nested.pi_desc_page) { | 7396 | if (vmx->nested.pi_desc_page) { |
7253 | kunmap(vmx->nested.pi_desc_page); | 7397 | kunmap(vmx->nested.pi_desc_page); |
7254 | nested_release_page(vmx->nested.pi_desc_page); | 7398 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
7255 | vmx->nested.pi_desc_page = NULL; | 7399 | vmx->nested.pi_desc_page = NULL; |
7256 | vmx->nested.pi_desc = NULL; | 7400 | vmx->nested.pi_desc = NULL; |
7257 | } | 7401 | } |
@@ -7618,15 +7762,15 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7618 | if (vmx->nested.current_vmptr != vmptr) { | 7762 | if (vmx->nested.current_vmptr != vmptr) { |
7619 | struct vmcs12 *new_vmcs12; | 7763 | struct vmcs12 *new_vmcs12; |
7620 | struct page *page; | 7764 | struct page *page; |
7621 | page = nested_get_page(vcpu, vmptr); | 7765 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); |
7622 | if (page == NULL) { | 7766 | if (is_error_page(page)) { |
7623 | nested_vmx_failInvalid(vcpu); | 7767 | nested_vmx_failInvalid(vcpu); |
7624 | return kvm_skip_emulated_instruction(vcpu); | 7768 | return kvm_skip_emulated_instruction(vcpu); |
7625 | } | 7769 | } |
7626 | new_vmcs12 = kmap(page); | 7770 | new_vmcs12 = kmap(page); |
7627 | if (new_vmcs12->revision_id != VMCS12_REVISION) { | 7771 | if (new_vmcs12->revision_id != VMCS12_REVISION) { |
7628 | kunmap(page); | 7772 | kunmap(page); |
7629 | nested_release_page_clean(page); | 7773 | kvm_release_page_clean(page); |
7630 | nested_vmx_failValid(vcpu, | 7774 | nested_vmx_failValid(vcpu, |
7631 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); | 7775 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); |
7632 | return kvm_skip_emulated_instruction(vcpu); | 7776 | return kvm_skip_emulated_instruction(vcpu); |
@@ -7639,7 +7783,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
7639 | */ | 7783 | */ |
7640 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); | 7784 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); |
7641 | kunmap(page); | 7785 | kunmap(page); |
7642 | nested_release_page_clean(page); | 7786 | kvm_release_page_clean(page); |
7643 | 7787 | ||
7644 | set_current_vmptr(vmx, vmptr); | 7788 | set_current_vmptr(vmx, vmptr); |
7645 | } | 7789 | } |
@@ -7790,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
7790 | 7934 | ||
7791 | switch (type) { | 7935 | switch (type) { |
7792 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: | 7936 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: |
7793 | if (is_noncanonical_address(operand.gla)) { | 7937 | if (is_noncanonical_address(operand.gla, vcpu)) { |
7794 | nested_vmx_failValid(vcpu, | 7938 | nested_vmx_failValid(vcpu, |
7795 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | 7939 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); |
7796 | return kvm_skip_emulated_instruction(vcpu); | 7940 | return kvm_skip_emulated_instruction(vcpu); |
@@ -7847,6 +7991,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) | |||
7847 | return 1; | 7991 | return 1; |
7848 | } | 7992 | } |
7849 | 7993 | ||
7994 | static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) | ||
7995 | { | ||
7996 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7997 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
7998 | |||
7999 | /* Check for memory type validity */ | ||
8000 | switch (address & VMX_EPTP_MT_MASK) { | ||
8001 | case VMX_EPTP_MT_UC: | ||
8002 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) | ||
8003 | return false; | ||
8004 | break; | ||
8005 | case VMX_EPTP_MT_WB: | ||
8006 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) | ||
8007 | return false; | ||
8008 | break; | ||
8009 | default: | ||
8010 | return false; | ||
8011 | } | ||
8012 | |||
8013 | /* only 4 levels page-walk length are valid */ | ||
8014 | if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) | ||
8015 | return false; | ||
8016 | |||
8017 | /* Reserved bits should not be set */ | ||
8018 | if (address >> maxphyaddr || ((address >> 7) & 0x1f)) | ||
8019 | return false; | ||
8020 | |||
8021 | /* AD, if set, should be supported */ | ||
8022 | if (address & VMX_EPTP_AD_ENABLE_BIT) { | ||
8023 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) | ||
8024 | return false; | ||
8025 | } | ||
8026 | |||
8027 | return true; | ||
8028 | } | ||
8029 | |||
8030 | static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, | ||
8031 | struct vmcs12 *vmcs12) | ||
8032 | { | ||
8033 | u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; | ||
8034 | u64 address; | ||
8035 | bool accessed_dirty; | ||
8036 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | ||
8037 | |||
8038 | if (!nested_cpu_has_eptp_switching(vmcs12) || | ||
8039 | !nested_cpu_has_ept(vmcs12)) | ||
8040 | return 1; | ||
8041 | |||
8042 | if (index >= VMFUNC_EPTP_ENTRIES) | ||
8043 | return 1; | ||
8044 | |||
8045 | |||
8046 | if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, | ||
8047 | &address, index * 8, 8)) | ||
8048 | return 1; | ||
8049 | |||
8050 | accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT); | ||
8051 | |||
8052 | /* | ||
8053 | * If the (L2) guest does a vmfunc to the currently | ||
8054 | * active ept pointer, we don't have to do anything else | ||
8055 | */ | ||
8056 | if (vmcs12->ept_pointer != address) { | ||
8057 | if (!valid_ept_address(vcpu, address)) | ||
8058 | return 1; | ||
8059 | |||
8060 | kvm_mmu_unload(vcpu); | ||
8061 | mmu->ept_ad = accessed_dirty; | ||
8062 | mmu->base_role.ad_disabled = !accessed_dirty; | ||
8063 | vmcs12->ept_pointer = address; | ||
8064 | /* | ||
8065 | * TODO: Check what's the correct approach in case | ||
8066 | * mmu reload fails. Currently, we just let the next | ||
8067 | * reload potentially fail | ||
8068 | */ | ||
8069 | kvm_mmu_reload(vcpu); | ||
8070 | } | ||
8071 | |||
8072 | return 0; | ||
8073 | } | ||
8074 | |||
8075 | static int handle_vmfunc(struct kvm_vcpu *vcpu) | ||
8076 | { | ||
8077 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
8078 | struct vmcs12 *vmcs12; | ||
8079 | u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
8080 | |||
8081 | /* | ||
8082 | * VMFUNC is only supported for nested guests, but we always enable the | ||
8083 | * secondary control for simplicity; for non-nested mode, fake that we | ||
8084 | * didn't by injecting #UD. | ||
8085 | */ | ||
8086 | if (!is_guest_mode(vcpu)) { | ||
8087 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
8088 | return 1; | ||
8089 | } | ||
8090 | |||
8091 | vmcs12 = get_vmcs12(vcpu); | ||
8092 | if ((vmcs12->vm_function_control & (1 << function)) == 0) | ||
8093 | goto fail; | ||
8094 | |||
8095 | switch (function) { | ||
8096 | case 0: | ||
8097 | if (nested_vmx_eptp_switching(vcpu, vmcs12)) | ||
8098 | goto fail; | ||
8099 | break; | ||
8100 | default: | ||
8101 | goto fail; | ||
8102 | } | ||
8103 | return kvm_skip_emulated_instruction(vcpu); | ||
8104 | |||
8105 | fail: | ||
8106 | nested_vmx_vmexit(vcpu, vmx->exit_reason, | ||
8107 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
8108 | vmcs_readl(EXIT_QUALIFICATION)); | ||
8109 | return 1; | ||
8110 | } | ||
8111 | |||
7850 | /* | 8112 | /* |
7851 | * The exit handlers return 1 if the exit was handled fully and guest execution | 8113 | * The exit handlers return 1 if the exit was handled fully and guest execution |
7852 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 8114 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -7894,9 +8156,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
7894 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 8156 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
7895 | [EXIT_REASON_INVEPT] = handle_invept, | 8157 | [EXIT_REASON_INVEPT] = handle_invept, |
7896 | [EXIT_REASON_INVVPID] = handle_invvpid, | 8158 | [EXIT_REASON_INVVPID] = handle_invvpid, |
8159 | [EXIT_REASON_RDRAND] = handle_invalid_op, | ||
8160 | [EXIT_REASON_RDSEED] = handle_invalid_op, | ||
7897 | [EXIT_REASON_XSAVES] = handle_xsaves, | 8161 | [EXIT_REASON_XSAVES] = handle_xsaves, |
7898 | [EXIT_REASON_XRSTORS] = handle_xrstors, | 8162 | [EXIT_REASON_XRSTORS] = handle_xrstors, |
7899 | [EXIT_REASON_PML_FULL] = handle_pml_full, | 8163 | [EXIT_REASON_PML_FULL] = handle_pml_full, |
8164 | [EXIT_REASON_VMFUNC] = handle_vmfunc, | ||
7900 | [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, | 8165 | [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, |
7901 | }; | 8166 | }; |
7902 | 8167 | ||
@@ -8212,6 +8477,10 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
8212 | * table is L0's fault. | 8477 | * table is L0's fault. |
8213 | */ | 8478 | */ |
8214 | return false; | 8479 | return false; |
8480 | case EXIT_REASON_INVPCID: | ||
8481 | return | ||
8482 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && | ||
8483 | nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); | ||
8215 | case EXIT_REASON_WBINVD: | 8484 | case EXIT_REASON_WBINVD: |
8216 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 8485 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
8217 | case EXIT_REASON_XSETBV: | 8486 | case EXIT_REASON_XSETBV: |
@@ -8229,6 +8498,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
8229 | case EXIT_REASON_PML_FULL: | 8498 | case EXIT_REASON_PML_FULL: |
8230 | /* We emulate PML support to L1. */ | 8499 | /* We emulate PML support to L1. */ |
8231 | return false; | 8500 | return false; |
8501 | case EXIT_REASON_VMFUNC: | ||
8502 | /* VM functions are emulated through L2->L0 vmexits. */ | ||
8503 | return false; | ||
8232 | default: | 8504 | default: |
8233 | return true; | 8505 | return true; |
8234 | } | 8506 | } |
@@ -8487,7 +8759,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
8487 | u32 vectoring_info = vmx->idt_vectoring_info; | 8759 | u32 vectoring_info = vmx->idt_vectoring_info; |
8488 | 8760 | ||
8489 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | 8761 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
8490 | vcpu->arch.gpa_available = false; | ||
8491 | 8762 | ||
8492 | /* | 8763 | /* |
8493 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more | 8764 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more |
@@ -9341,11 +9612,6 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
9341 | } | 9612 | } |
9342 | } | 9613 | } |
9343 | 9614 | ||
9344 | static int get_ept_level(void) | ||
9345 | { | ||
9346 | return VMX_EPT_DEFAULT_GAW + 1; | ||
9347 | } | ||
9348 | |||
9349 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | 9615 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
9350 | { | 9616 | { |
9351 | u8 cache; | 9617 | u8 cache; |
@@ -9462,39 +9728,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) | |||
9462 | 9728 | ||
9463 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 9729 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
9464 | { | 9730 | { |
9465 | struct kvm_cpuid_entry2 *best; | ||
9466 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9731 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
9467 | u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); | ||
9468 | 9732 | ||
9469 | if (vmx_rdtscp_supported()) { | 9733 | if (cpu_has_secondary_exec_ctrls()) { |
9470 | bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); | 9734 | vmx_compute_secondary_exec_control(vmx); |
9471 | if (!rdtscp_enabled) | 9735 | vmcs_set_secondary_exec_control(vmx->secondary_exec_control); |
9472 | secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; | ||
9473 | |||
9474 | if (nested) { | ||
9475 | if (rdtscp_enabled) | ||
9476 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
9477 | SECONDARY_EXEC_RDTSCP; | ||
9478 | else | ||
9479 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
9480 | ~SECONDARY_EXEC_RDTSCP; | ||
9481 | } | ||
9482 | } | ||
9483 | |||
9484 | /* Exposing INVPCID only when PCID is exposed */ | ||
9485 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | ||
9486 | if (vmx_invpcid_supported() && | ||
9487 | (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || | ||
9488 | !guest_cpuid_has_pcid(vcpu))) { | ||
9489 | secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
9490 | |||
9491 | if (best) | ||
9492 | best->ebx &= ~bit(X86_FEATURE_INVPCID); | ||
9493 | } | 9736 | } |
9494 | 9737 | ||
9495 | if (cpu_has_secondary_exec_ctrls()) | ||
9496 | vmcs_set_secondary_exec_control(secondary_exec_ctl); | ||
9497 | |||
9498 | if (nested_vmx_allowed(vcpu)) | 9738 | if (nested_vmx_allowed(vcpu)) |
9499 | to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= | 9739 | to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= |
9500 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | 9740 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
@@ -9535,7 +9775,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | |||
9535 | 9775 | ||
9536 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) | 9776 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) |
9537 | { | 9777 | { |
9538 | return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; | 9778 | return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT; |
9539 | } | 9779 | } |
9540 | 9780 | ||
9541 | /* Callbacks for nested_ept_init_mmu_context: */ | 9781 | /* Callbacks for nested_ept_init_mmu_context: */ |
@@ -9548,18 +9788,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
9548 | 9788 | ||
9549 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 9789 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
9550 | { | 9790 | { |
9551 | bool wants_ad; | ||
9552 | |||
9553 | WARN_ON(mmu_is_nested(vcpu)); | 9791 | WARN_ON(mmu_is_nested(vcpu)); |
9554 | wants_ad = nested_ept_ad_enabled(vcpu); | 9792 | if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) |
9555 | if (wants_ad && !enable_ept_ad_bits) | ||
9556 | return 1; | 9793 | return 1; |
9557 | 9794 | ||
9558 | kvm_mmu_unload(vcpu); | 9795 | kvm_mmu_unload(vcpu); |
9559 | kvm_init_shadow_ept_mmu(vcpu, | 9796 | kvm_init_shadow_ept_mmu(vcpu, |
9560 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | 9797 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & |
9561 | VMX_EPT_EXECUTE_ONLY_BIT, | 9798 | VMX_EPT_EXECUTE_ONLY_BIT, |
9562 | wants_ad); | 9799 | nested_ept_ad_enabled(vcpu)); |
9563 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 9800 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
9564 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | 9801 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; |
9565 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 9802 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
@@ -9610,6 +9847,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9610 | struct vmcs12 *vmcs12) | 9847 | struct vmcs12 *vmcs12) |
9611 | { | 9848 | { |
9612 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9849 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
9850 | struct page *page; | ||
9613 | u64 hpa; | 9851 | u64 hpa; |
9614 | 9852 | ||
9615 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 9853 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
@@ -9619,17 +9857,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9619 | * physical address remains valid. We keep a reference | 9857 | * physical address remains valid. We keep a reference |
9620 | * to it so we can release it later. | 9858 | * to it so we can release it later. |
9621 | */ | 9859 | */ |
9622 | if (vmx->nested.apic_access_page) /* shouldn't happen */ | 9860 | if (vmx->nested.apic_access_page) { /* shouldn't happen */ |
9623 | nested_release_page(vmx->nested.apic_access_page); | 9861 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
9624 | vmx->nested.apic_access_page = | 9862 | vmx->nested.apic_access_page = NULL; |
9625 | nested_get_page(vcpu, vmcs12->apic_access_addr); | 9863 | } |
9864 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); | ||
9626 | /* | 9865 | /* |
9627 | * If translation failed, no matter: This feature asks | 9866 | * If translation failed, no matter: This feature asks |
9628 | * to exit when accessing the given address, and if it | 9867 | * to exit when accessing the given address, and if it |
9629 | * can never be accessed, this feature won't do | 9868 | * can never be accessed, this feature won't do |
9630 | * anything anyway. | 9869 | * anything anyway. |
9631 | */ | 9870 | */ |
9632 | if (vmx->nested.apic_access_page) { | 9871 | if (!is_error_page(page)) { |
9872 | vmx->nested.apic_access_page = page; | ||
9633 | hpa = page_to_phys(vmx->nested.apic_access_page); | 9873 | hpa = page_to_phys(vmx->nested.apic_access_page); |
9634 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | 9874 | vmcs_write64(APIC_ACCESS_ADDR, hpa); |
9635 | } else { | 9875 | } else { |
@@ -9644,10 +9884,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9644 | } | 9884 | } |
9645 | 9885 | ||
9646 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 9886 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
9647 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | 9887 | if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ |
9648 | nested_release_page(vmx->nested.virtual_apic_page); | 9888 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
9649 | vmx->nested.virtual_apic_page = | 9889 | vmx->nested.virtual_apic_page = NULL; |
9650 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); | 9890 | } |
9891 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); | ||
9651 | 9892 | ||
9652 | /* | 9893 | /* |
9653 | * If translation failed, VM entry will fail because | 9894 | * If translation failed, VM entry will fail because |
@@ -9662,7 +9903,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9662 | * control. But such a configuration is useless, so | 9903 | * control. But such a configuration is useless, so |
9663 | * let's keep the code simple. | 9904 | * let's keep the code simple. |
9664 | */ | 9905 | */ |
9665 | if (vmx->nested.virtual_apic_page) { | 9906 | if (!is_error_page(page)) { |
9907 | vmx->nested.virtual_apic_page = page; | ||
9666 | hpa = page_to_phys(vmx->nested.virtual_apic_page); | 9908 | hpa = page_to_phys(vmx->nested.virtual_apic_page); |
9667 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); | 9909 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); |
9668 | } | 9910 | } |
@@ -9671,16 +9913,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
9671 | if (nested_cpu_has_posted_intr(vmcs12)) { | 9913 | if (nested_cpu_has_posted_intr(vmcs12)) { |
9672 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 9914 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ |
9673 | kunmap(vmx->nested.pi_desc_page); | 9915 | kunmap(vmx->nested.pi_desc_page); |
9674 | nested_release_page(vmx->nested.pi_desc_page); | 9916 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
9917 | vmx->nested.pi_desc_page = NULL; | ||
9675 | } | 9918 | } |
9676 | vmx->nested.pi_desc_page = | 9919 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); |
9677 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); | 9920 | if (is_error_page(page)) |
9678 | vmx->nested.pi_desc = | ||
9679 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); | ||
9680 | if (!vmx->nested.pi_desc) { | ||
9681 | nested_release_page_clean(vmx->nested.pi_desc_page); | ||
9682 | return; | 9921 | return; |
9683 | } | 9922 | vmx->nested.pi_desc_page = page; |
9923 | vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); | ||
9684 | vmx->nested.pi_desc = | 9924 | vmx->nested.pi_desc = |
9685 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | 9925 | (struct pi_desc *)((void *)vmx->nested.pi_desc + |
9686 | (unsigned long)(vmcs12->posted_intr_desc_addr & | 9926 | (unsigned long)(vmcs12->posted_intr_desc_addr & |
@@ -9746,6 +9986,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, | |||
9746 | return 0; | 9986 | return 0; |
9747 | } | 9987 | } |
9748 | 9988 | ||
9989 | static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, | ||
9990 | struct vmcs12 *vmcs12) | ||
9991 | { | ||
9992 | if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) | ||
9993 | return 0; | ||
9994 | |||
9995 | if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) | ||
9996 | return -EINVAL; | ||
9997 | |||
9998 | return 0; | ||
9999 | } | ||
10000 | |||
9749 | /* | 10001 | /* |
9750 | * Merge L0's and L1's MSR bitmap, return false to indicate that | 10002 | * Merge L0's and L1's MSR bitmap, return false to indicate that |
9751 | * we do not use the hardware. | 10003 | * we do not use the hardware. |
@@ -9762,8 +10014,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
9762 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) | 10014 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) |
9763 | return false; | 10015 | return false; |
9764 | 10016 | ||
9765 | page = nested_get_page(vcpu, vmcs12->msr_bitmap); | 10017 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); |
9766 | if (!page) | 10018 | if (is_error_page(page)) |
9767 | return false; | 10019 | return false; |
9768 | msr_bitmap_l1 = (unsigned long *)kmap(page); | 10020 | msr_bitmap_l1 = (unsigned long *)kmap(page); |
9769 | 10021 | ||
@@ -9793,7 +10045,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
9793 | } | 10045 | } |
9794 | } | 10046 | } |
9795 | kunmap(page); | 10047 | kunmap(page); |
9796 | nested_release_page_clean(page); | 10048 | kvm_release_page_clean(page); |
9797 | 10049 | ||
9798 | return true; | 10050 | return true; |
9799 | } | 10051 | } |
@@ -10187,13 +10439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10187 | enable_ept ? vmcs12->page_fault_error_code_match : 0); | 10439 | enable_ept ? vmcs12->page_fault_error_code_match : 0); |
10188 | 10440 | ||
10189 | if (cpu_has_secondary_exec_ctrls()) { | 10441 | if (cpu_has_secondary_exec_ctrls()) { |
10190 | exec_control = vmx_secondary_exec_control(vmx); | 10442 | exec_control = vmx->secondary_exec_control; |
10191 | 10443 | ||
10192 | /* Take the following fields only from vmcs12 */ | 10444 | /* Take the following fields only from vmcs12 */ |
10193 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 10445 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
10446 | SECONDARY_EXEC_ENABLE_INVPCID | | ||
10194 | SECONDARY_EXEC_RDTSCP | | 10447 | SECONDARY_EXEC_RDTSCP | |
10448 | SECONDARY_EXEC_XSAVES | | ||
10195 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 10449 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
10196 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | 10450 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
10451 | SECONDARY_EXEC_ENABLE_VMFUNC); | ||
10197 | if (nested_cpu_has(vmcs12, | 10452 | if (nested_cpu_has(vmcs12, |
10198 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { | 10453 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { |
10199 | vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & | 10454 | vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & |
@@ -10201,6 +10456,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10201 | exec_control |= vmcs12_exec_ctrl; | 10456 | exec_control |= vmcs12_exec_ctrl; |
10202 | } | 10457 | } |
10203 | 10458 | ||
10459 | /* All VMFUNCs are currently emulated through L0 vmexits. */ | ||
10460 | if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC) | ||
10461 | vmcs_write64(VM_FUNCTION_CONTROL, 0); | ||
10462 | |||
10204 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { | 10463 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { |
10205 | vmcs_write64(EOI_EXIT_BITMAP0, | 10464 | vmcs_write64(EOI_EXIT_BITMAP0, |
10206 | vmcs12->eoi_exit_bitmap0); | 10465 | vmcs12->eoi_exit_bitmap0); |
@@ -10426,6 +10685,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
10426 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) | 10685 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) |
10427 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10686 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10428 | 10687 | ||
10688 | if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) | ||
10689 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
10690 | |||
10429 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) | 10691 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) |
10430 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10692 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10431 | 10693 | ||
@@ -10453,6 +10715,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
10453 | vmx->nested.nested_vmx_entry_ctls_high)) | 10715 | vmx->nested.nested_vmx_entry_ctls_high)) |
10454 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10716 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10455 | 10717 | ||
10718 | if (nested_cpu_has_vmfunc(vmcs12)) { | ||
10719 | if (vmcs12->vm_function_control & | ||
10720 | ~vmx->nested.nested_vmx_vmfunc_controls) | ||
10721 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
10722 | |||
10723 | if (nested_cpu_has_eptp_switching(vmcs12)) { | ||
10724 | if (!nested_cpu_has_ept(vmcs12) || | ||
10725 | !page_address_valid(vcpu, vmcs12->eptp_list_address)) | ||
10726 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
10727 | } | ||
10728 | } | ||
10729 | |||
10456 | if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) | 10730 | if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) |
10457 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10731 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
10458 | 10732 | ||
@@ -10699,7 +10973,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
10699 | u32 idt_vectoring; | 10973 | u32 idt_vectoring; |
10700 | unsigned int nr; | 10974 | unsigned int nr; |
10701 | 10975 | ||
10702 | if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { | 10976 | if (vcpu->arch.exception.injected) { |
10703 | nr = vcpu->arch.exception.nr; | 10977 | nr = vcpu->arch.exception.nr; |
10704 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | 10978 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; |
10705 | 10979 | ||
@@ -10738,12 +11012,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
10738 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | 11012 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) |
10739 | { | 11013 | { |
10740 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 11014 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
11015 | unsigned long exit_qual; | ||
10741 | 11016 | ||
10742 | if (vcpu->arch.exception.pending || | 11017 | if (kvm_event_needs_reinjection(vcpu)) |
10743 | vcpu->arch.nmi_injected || | ||
10744 | vcpu->arch.interrupt.pending) | ||
10745 | return -EBUSY; | 11018 | return -EBUSY; |
10746 | 11019 | ||
11020 | if (vcpu->arch.exception.pending && | ||
11021 | nested_vmx_check_exception(vcpu, &exit_qual)) { | ||
11022 | if (vmx->nested.nested_run_pending) | ||
11023 | return -EBUSY; | ||
11024 | nested_vmx_inject_exception_vmexit(vcpu, exit_qual); | ||
11025 | vcpu->arch.exception.pending = false; | ||
11026 | return 0; | ||
11027 | } | ||
11028 | |||
10747 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && | 11029 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && |
10748 | vmx->nested.preemption_timer_expired) { | 11030 | vmx->nested.preemption_timer_expired) { |
10749 | if (vmx->nested.nested_run_pending) | 11031 | if (vmx->nested.nested_run_pending) |
@@ -11184,16 +11466,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11184 | 11466 | ||
11185 | /* Unpin physical memory we referred to in vmcs02 */ | 11467 | /* Unpin physical memory we referred to in vmcs02 */ |
11186 | if (vmx->nested.apic_access_page) { | 11468 | if (vmx->nested.apic_access_page) { |
11187 | nested_release_page(vmx->nested.apic_access_page); | 11469 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
11188 | vmx->nested.apic_access_page = NULL; | 11470 | vmx->nested.apic_access_page = NULL; |
11189 | } | 11471 | } |
11190 | if (vmx->nested.virtual_apic_page) { | 11472 | if (vmx->nested.virtual_apic_page) { |
11191 | nested_release_page(vmx->nested.virtual_apic_page); | 11473 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
11192 | vmx->nested.virtual_apic_page = NULL; | 11474 | vmx->nested.virtual_apic_page = NULL; |
11193 | } | 11475 | } |
11194 | if (vmx->nested.pi_desc_page) { | 11476 | if (vmx->nested.pi_desc_page) { |
11195 | kunmap(vmx->nested.pi_desc_page); | 11477 | kunmap(vmx->nested.pi_desc_page); |
11196 | nested_release_page(vmx->nested.pi_desc_page); | 11478 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
11197 | vmx->nested.pi_desc_page = NULL; | 11479 | vmx->nested.pi_desc_page = NULL; |
11198 | vmx->nested.pi_desc = NULL; | 11480 | vmx->nested.pi_desc = NULL; |
11199 | } | 11481 | } |
@@ -11369,14 +11651,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) | |||
11369 | 11651 | ||
11370 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; | 11652 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; |
11371 | 11653 | ||
11372 | page = nested_get_page(vcpu, vmcs12->pml_address); | 11654 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); |
11373 | if (!page) | 11655 | if (is_error_page(page)) |
11374 | return 0; | 11656 | return 0; |
11375 | 11657 | ||
11376 | pml_address = kmap(page); | 11658 | pml_address = kmap(page); |
11377 | pml_address[vmcs12->guest_pml_index--] = gpa; | 11659 | pml_address[vmcs12->guest_pml_index--] = gpa; |
11378 | kunmap(page); | 11660 | kunmap(page); |
11379 | nested_release_page_clean(page); | 11661 | kvm_release_page_clean(page); |
11380 | } | 11662 | } |
11381 | 11663 | ||
11382 | return 0; | 11664 | return 0; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef5102f80497..6069af86da3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
311 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | 311 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
312 | u64 new_state = msr_info->data & | 312 | u64 new_state = msr_info->data & |
313 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | 313 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
314 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | | 314 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | |
315 | 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); | 315 | (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); |
316 | 316 | ||
317 | if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) | ||
318 | return 1; | ||
317 | if (!msr_info->host_initiated && | 319 | if (!msr_info->host_initiated && |
318 | ((msr_info->data & reserved_bits) != 0 || | 320 | ((new_state == MSR_IA32_APICBASE_ENABLE && |
319 | new_state == X2APIC_ENABLE || | ||
320 | (new_state == MSR_IA32_APICBASE_ENABLE && | ||
321 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || | 321 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || |
322 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && | 322 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && |
323 | old_state == 0))) | 323 | old_state == 0))) |
@@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
390 | 390 | ||
391 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 391 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
392 | 392 | ||
393 | if (!vcpu->arch.exception.pending) { | 393 | if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { |
394 | queue: | 394 | queue: |
395 | if (has_error && !is_protmode(vcpu)) | 395 | if (has_error && !is_protmode(vcpu)) |
396 | has_error = false; | 396 | has_error = false; |
397 | vcpu->arch.exception.pending = true; | 397 | if (reinject) { |
398 | /* | ||
399 | * On vmentry, vcpu->arch.exception.pending is only | ||
400 | * true if an event injection was blocked by | ||
401 | * nested_run_pending. In that case, however, | ||
402 | * vcpu_enter_guest requests an immediate exit, | ||
403 | * and the guest shouldn't proceed far enough to | ||
404 | * need reinjection. | ||
405 | */ | ||
406 | WARN_ON_ONCE(vcpu->arch.exception.pending); | ||
407 | vcpu->arch.exception.injected = true; | ||
408 | } else { | ||
409 | vcpu->arch.exception.pending = true; | ||
410 | vcpu->arch.exception.injected = false; | ||
411 | } | ||
398 | vcpu->arch.exception.has_error_code = has_error; | 412 | vcpu->arch.exception.has_error_code = has_error; |
399 | vcpu->arch.exception.nr = nr; | 413 | vcpu->arch.exception.nr = nr; |
400 | vcpu->arch.exception.error_code = error_code; | 414 | vcpu->arch.exception.error_code = error_code; |
401 | vcpu->arch.exception.reinject = reinject; | ||
402 | return; | 415 | return; |
403 | } | 416 | } |
404 | 417 | ||
@@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
413 | class2 = exception_class(nr); | 426 | class2 = exception_class(nr); |
414 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | 427 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) |
415 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | 428 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { |
416 | /* generate double fault per SDM Table 5-5 */ | 429 | /* |
430 | * Generate double fault per SDM Table 5-5. Set | ||
431 | * exception.pending = true so that the double fault | ||
432 | * can trigger a nested vmexit. | ||
433 | */ | ||
417 | vcpu->arch.exception.pending = true; | 434 | vcpu->arch.exception.pending = true; |
435 | vcpu->arch.exception.injected = false; | ||
418 | vcpu->arch.exception.has_error_code = true; | 436 | vcpu->arch.exception.has_error_code = true; |
419 | vcpu->arch.exception.nr = DF_VECTOR; | 437 | vcpu->arch.exception.nr = DF_VECTOR; |
420 | vcpu->arch.exception.error_code = 0; | 438 | vcpu->arch.exception.error_code = 0; |
@@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
755 | if (cr4 & CR4_RESERVED_BITS) | 773 | if (cr4 & CR4_RESERVED_BITS) |
756 | return 1; | 774 | return 1; |
757 | 775 | ||
758 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | 776 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) |
777 | return 1; | ||
778 | |||
779 | if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) | ||
759 | return 1; | 780 | return 1; |
760 | 781 | ||
761 | if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) | 782 | if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) |
762 | return 1; | 783 | return 1; |
763 | 784 | ||
764 | if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) | 785 | if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) |
765 | return 1; | 786 | return 1; |
766 | 787 | ||
767 | if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) | 788 | if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) |
768 | return 1; | 789 | return 1; |
769 | 790 | ||
770 | if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) | 791 | if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) |
771 | return 1; | 792 | return 1; |
772 | 793 | ||
773 | if (is_long_mode(vcpu)) { | 794 | if (is_long_mode(vcpu)) { |
@@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
780 | return 1; | 801 | return 1; |
781 | 802 | ||
782 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { | 803 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { |
783 | if (!guest_cpuid_has_pcid(vcpu)) | 804 | if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) |
784 | return 1; | 805 | return 1; |
785 | 806 | ||
786 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ | 807 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ |
@@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
814 | return 0; | 835 | return 0; |
815 | } | 836 | } |
816 | 837 | ||
817 | if (is_long_mode(vcpu)) { | 838 | if (is_long_mode(vcpu) && |
818 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | 839 | (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62))) |
819 | return 1; | 840 | return 1; |
820 | } else if (is_pae(vcpu) && is_paging(vcpu) && | 841 | else if (is_pae(vcpu) && is_paging(vcpu) && |
821 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) | 842 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) |
822 | return 1; | 843 | return 1; |
823 | 844 | ||
@@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) | |||
884 | { | 905 | { |
885 | u64 fixed = DR6_FIXED_1; | 906 | u64 fixed = DR6_FIXED_1; |
886 | 907 | ||
887 | if (!guest_cpuid_has_rtm(vcpu)) | 908 | if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) |
888 | fixed |= DR6_RTM; | 909 | fixed |= DR6_RTM; |
889 | return fixed; | 910 | return fixed; |
890 | } | 911 | } |
@@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = { | |||
994 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 1015 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
995 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 1016 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
996 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | 1017 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, |
1018 | HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, | ||
997 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, | 1019 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, |
998 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, | 1020 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, |
999 | HV_X64_MSR_RESET, | 1021 | HV_X64_MSR_RESET, |
@@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1022 | if (efer & efer_reserved_bits) | 1044 | if (efer & efer_reserved_bits) |
1023 | return false; | 1045 | return false; |
1024 | 1046 | ||
1025 | if (efer & EFER_FFXSR) { | 1047 | if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) |
1026 | struct kvm_cpuid_entry2 *feat; | ||
1027 | |||
1028 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
1029 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) | ||
1030 | return false; | 1048 | return false; |
1031 | } | ||
1032 | 1049 | ||
1033 | if (efer & EFER_SVME) { | 1050 | if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) |
1034 | struct kvm_cpuid_entry2 *feat; | ||
1035 | |||
1036 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
1037 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) | ||
1038 | return false; | 1051 | return false; |
1039 | } | ||
1040 | 1052 | ||
1041 | return true; | 1053 | return true; |
1042 | } | 1054 | } |
@@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1084 | case MSR_KERNEL_GS_BASE: | 1096 | case MSR_KERNEL_GS_BASE: |
1085 | case MSR_CSTAR: | 1097 | case MSR_CSTAR: |
1086 | case MSR_LSTAR: | 1098 | case MSR_LSTAR: |
1087 | if (is_noncanonical_address(msr->data)) | 1099 | if (is_noncanonical_address(msr->data, vcpu)) |
1088 | return 1; | 1100 | return 1; |
1089 | break; | 1101 | break; |
1090 | case MSR_IA32_SYSENTER_EIP: | 1102 | case MSR_IA32_SYSENTER_EIP: |
@@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1101 | * value, and that something deterministic happens if the guest | 1113 | * value, and that something deterministic happens if the guest |
1102 | * invokes 64-bit SYSENTER. | 1114 | * invokes 64-bit SYSENTER. |
1103 | */ | 1115 | */ |
1104 | msr->data = get_canonical(msr->data); | 1116 | msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); |
1105 | } | 1117 | } |
1106 | return kvm_x86_ops->set_msr(vcpu, msr); | 1118 | return kvm_x86_ops->set_msr(vcpu, msr); |
1107 | } | 1119 | } |
@@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1534 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; | 1546 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; |
1535 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; | 1547 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; |
1536 | 1548 | ||
1537 | if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) | 1549 | if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) |
1538 | update_ia32_tsc_adjust_msr(vcpu, offset); | 1550 | update_ia32_tsc_adjust_msr(vcpu, offset); |
1551 | |||
1539 | kvm_vcpu_write_tsc_offset(vcpu, offset); | 1552 | kvm_vcpu_write_tsc_offset(vcpu, offset); |
1540 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1553 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1541 | 1554 | ||
@@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2185 | kvm_set_lapic_tscdeadline_msr(vcpu, data); | 2198 | kvm_set_lapic_tscdeadline_msr(vcpu, data); |
2186 | break; | 2199 | break; |
2187 | case MSR_IA32_TSC_ADJUST: | 2200 | case MSR_IA32_TSC_ADJUST: |
2188 | if (guest_cpuid_has_tsc_adjust(vcpu)) { | 2201 | if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { |
2189 | if (!msr_info->host_initiated) { | 2202 | if (!msr_info->host_initiated) { |
2190 | s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; | 2203 | s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; |
2191 | adjust_tsc_offset_guest(vcpu, adj); | 2204 | adjust_tsc_offset_guest(vcpu, adj); |
@@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2307 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); | 2320 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); |
2308 | break; | 2321 | break; |
2309 | case MSR_AMD64_OSVW_ID_LENGTH: | 2322 | case MSR_AMD64_OSVW_ID_LENGTH: |
2310 | if (!guest_cpuid_has_osvw(vcpu)) | 2323 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
2311 | return 1; | 2324 | return 1; |
2312 | vcpu->arch.osvw.length = data; | 2325 | vcpu->arch.osvw.length = data; |
2313 | break; | 2326 | break; |
2314 | case MSR_AMD64_OSVW_STATUS: | 2327 | case MSR_AMD64_OSVW_STATUS: |
2315 | if (!guest_cpuid_has_osvw(vcpu)) | 2328 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
2316 | return 1; | 2329 | return 1; |
2317 | vcpu->arch.osvw.status = data; | 2330 | vcpu->arch.osvw.status = data; |
2318 | break; | 2331 | break; |
@@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2537 | msr_info->data = 0xbe702111; | 2550 | msr_info->data = 0xbe702111; |
2538 | break; | 2551 | break; |
2539 | case MSR_AMD64_OSVW_ID_LENGTH: | 2552 | case MSR_AMD64_OSVW_ID_LENGTH: |
2540 | if (!guest_cpuid_has_osvw(vcpu)) | 2553 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
2541 | return 1; | 2554 | return 1; |
2542 | msr_info->data = vcpu->arch.osvw.length; | 2555 | msr_info->data = vcpu->arch.osvw.length; |
2543 | break; | 2556 | break; |
2544 | case MSR_AMD64_OSVW_STATUS: | 2557 | case MSR_AMD64_OSVW_STATUS: |
2545 | if (!guest_cpuid_has_osvw(vcpu)) | 2558 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
2546 | return 1; | 2559 | return 1; |
2547 | msr_info->data = vcpu->arch.osvw.status; | 2560 | msr_info->data = vcpu->arch.osvw.status; |
2548 | break; | 2561 | break; |
@@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) | |||
2882 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 2895 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
2883 | { | 2896 | { |
2884 | int idx; | 2897 | int idx; |
2898 | |||
2899 | if (vcpu->preempted) | ||
2900 | vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); | ||
2901 | |||
2885 | /* | 2902 | /* |
2886 | * Disable page faults because we're in atomic context here. | 2903 | * Disable page faults because we're in atomic context here. |
2887 | * kvm_write_guest_offset_cached() would call might_fault() | 2904 | * kvm_write_guest_offset_cached() would call might_fault() |
@@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
3074 | struct kvm_vcpu_events *events) | 3091 | struct kvm_vcpu_events *events) |
3075 | { | 3092 | { |
3076 | process_nmi(vcpu); | 3093 | process_nmi(vcpu); |
3094 | /* | ||
3095 | * FIXME: pass injected and pending separately. This is only | ||
3096 | * needed for nested virtualization, whose state cannot be | ||
3097 | * migrated yet. For now we can combine them. | ||
3098 | */ | ||
3077 | events->exception.injected = | 3099 | events->exception.injected = |
3078 | vcpu->arch.exception.pending && | 3100 | (vcpu->arch.exception.pending || |
3101 | vcpu->arch.exception.injected) && | ||
3079 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 3102 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
3080 | events->exception.nr = vcpu->arch.exception.nr; | 3103 | events->exception.nr = vcpu->arch.exception.nr; |
3081 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 3104 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
@@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
3130 | return -EINVAL; | 3153 | return -EINVAL; |
3131 | 3154 | ||
3132 | process_nmi(vcpu); | 3155 | process_nmi(vcpu); |
3156 | vcpu->arch.exception.injected = false; | ||
3133 | vcpu->arch.exception.pending = events->exception.injected; | 3157 | vcpu->arch.exception.pending = events->exception.injected; |
3134 | vcpu->arch.exception.nr = events->exception.nr; | 3158 | vcpu->arch.exception.nr = events->exception.nr; |
3135 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 3159 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
@@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, | |||
4671 | */ | 4695 | */ |
4672 | if (vcpu->arch.gpa_available && | 4696 | if (vcpu->arch.gpa_available && |
4673 | emulator_can_use_gpa(ctxt) && | 4697 | emulator_can_use_gpa(ctxt) && |
4674 | vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && | 4698 | (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { |
4675 | (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { | 4699 | gpa = vcpu->arch.gpa_val; |
4676 | gpa = exception->address; | 4700 | ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); |
4677 | goto mmio; | 4701 | } else { |
4702 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | ||
4703 | if (ret < 0) | ||
4704 | return X86EMUL_PROPAGATE_FAULT; | ||
4678 | } | 4705 | } |
4679 | 4706 | ||
4680 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | 4707 | if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) |
4681 | |||
4682 | if (ret < 0) | ||
4683 | return X86EMUL_PROPAGATE_FAULT; | ||
4684 | |||
4685 | /* For APIC access vmexit */ | ||
4686 | if (ret) | ||
4687 | goto mmio; | ||
4688 | |||
4689 | if (ops->read_write_emulate(vcpu, gpa, val, bytes)) | ||
4690 | return X86EMUL_CONTINUE; | 4708 | return X86EMUL_CONTINUE; |
4691 | 4709 | ||
4692 | mmio: | ||
4693 | /* | 4710 | /* |
4694 | * Is this MMIO handled locally? | 4711 | * Is this MMIO handled locally? |
4695 | */ | 4712 | */ |
@@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | |||
5227 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | 5244 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); |
5228 | } | 5245 | } |
5229 | 5246 | ||
5230 | static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, | 5247 | static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, |
5231 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 5248 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) |
5232 | { | 5249 | { |
5233 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); | 5250 | return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); |
5234 | } | 5251 | } |
5235 | 5252 | ||
5236 | static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) | 5253 | static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) |
@@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
6362 | int r; | 6379 | int r; |
6363 | 6380 | ||
6364 | /* try to reinject previous events if any */ | 6381 | /* try to reinject previous events if any */ |
6382 | if (vcpu->arch.exception.injected) { | ||
6383 | kvm_x86_ops->queue_exception(vcpu); | ||
6384 | return 0; | ||
6385 | } | ||
6386 | |||
6387 | /* | ||
6388 | * Exceptions must be injected immediately, or the exception | ||
6389 | * frame will have the address of the NMI or interrupt handler. | ||
6390 | */ | ||
6391 | if (!vcpu->arch.exception.pending) { | ||
6392 | if (vcpu->arch.nmi_injected) { | ||
6393 | kvm_x86_ops->set_nmi(vcpu); | ||
6394 | return 0; | ||
6395 | } | ||
6396 | |||
6397 | if (vcpu->arch.interrupt.pending) { | ||
6398 | kvm_x86_ops->set_irq(vcpu); | ||
6399 | return 0; | ||
6400 | } | ||
6401 | } | ||
6402 | |||
6403 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
6404 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
6405 | if (r != 0) | ||
6406 | return r; | ||
6407 | } | ||
6408 | |||
6409 | /* try to inject new event if pending */ | ||
6365 | if (vcpu->arch.exception.pending) { | 6410 | if (vcpu->arch.exception.pending) { |
6366 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | 6411 | trace_kvm_inj_exception(vcpu->arch.exception.nr, |
6367 | vcpu->arch.exception.has_error_code, | 6412 | vcpu->arch.exception.has_error_code, |
6368 | vcpu->arch.exception.error_code); | 6413 | vcpu->arch.exception.error_code); |
6369 | 6414 | ||
6415 | vcpu->arch.exception.pending = false; | ||
6416 | vcpu->arch.exception.injected = true; | ||
6417 | |||
6370 | if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) | 6418 | if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) |
6371 | __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | | 6419 | __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | |
6372 | X86_EFLAGS_RF); | 6420 | X86_EFLAGS_RF); |
@@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
6378 | } | 6426 | } |
6379 | 6427 | ||
6380 | kvm_x86_ops->queue_exception(vcpu); | 6428 | kvm_x86_ops->queue_exception(vcpu); |
6381 | return 0; | 6429 | } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { |
6382 | } | ||
6383 | |||
6384 | if (vcpu->arch.nmi_injected) { | ||
6385 | kvm_x86_ops->set_nmi(vcpu); | ||
6386 | return 0; | ||
6387 | } | ||
6388 | |||
6389 | if (vcpu->arch.interrupt.pending) { | ||
6390 | kvm_x86_ops->set_irq(vcpu); | ||
6391 | return 0; | ||
6392 | } | ||
6393 | |||
6394 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
6395 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
6396 | if (r != 0) | ||
6397 | return r; | ||
6398 | } | ||
6399 | |||
6400 | /* try to inject new event if pending */ | ||
6401 | if (vcpu->arch.smi_pending && !is_smm(vcpu)) { | ||
6402 | vcpu->arch.smi_pending = false; | 6430 | vcpu->arch.smi_pending = false; |
6403 | enter_smm(vcpu); | 6431 | enter_smm(vcpu); |
6404 | } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { | 6432 | } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { |
@@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) | |||
6615 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); | 6643 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); |
6616 | vcpu->arch.hflags |= HF_SMM_MASK; | 6644 | vcpu->arch.hflags |= HF_SMM_MASK; |
6617 | memset(buf, 0, 512); | 6645 | memset(buf, 0, 512); |
6618 | if (guest_cpuid_has_longmode(vcpu)) | 6646 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) |
6619 | enter_smm_save_state_64(vcpu, buf); | 6647 | enter_smm_save_state_64(vcpu, buf); |
6620 | else | 6648 | else |
6621 | enter_smm_save_state_32(vcpu, buf); | 6649 | enter_smm_save_state_32(vcpu, buf); |
@@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) | |||
6667 | kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); | 6695 | kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); |
6668 | kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); | 6696 | kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); |
6669 | 6697 | ||
6670 | if (guest_cpuid_has_longmode(vcpu)) | 6698 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) |
6671 | kvm_x86_ops->set_efer(vcpu, 0); | 6699 | kvm_x86_ops->set_efer(vcpu, 0); |
6672 | 6700 | ||
6673 | kvm_update_cpuid(vcpu); | 6701 | kvm_update_cpuid(vcpu); |
@@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6774 | } | 6802 | } |
6775 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { | 6803 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { |
6776 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 6804 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
6805 | vcpu->mmio_needed = 0; | ||
6777 | r = 0; | 6806 | r = 0; |
6778 | goto out; | 6807 | goto out; |
6779 | } | 6808 | } |
@@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
6862 | kvm_x86_ops->enable_nmi_window(vcpu); | 6891 | kvm_x86_ops->enable_nmi_window(vcpu); |
6863 | if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 6892 | if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
6864 | kvm_x86_ops->enable_irq_window(vcpu); | 6893 | kvm_x86_ops->enable_irq_window(vcpu); |
6894 | WARN_ON(vcpu->arch.exception.pending); | ||
6865 | } | 6895 | } |
6866 | 6896 | ||
6867 | if (kvm_lapic_enabled(vcpu)) { | 6897 | if (kvm_lapic_enabled(vcpu)) { |
@@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
7004 | if (vcpu->arch.apic_attention) | 7034 | if (vcpu->arch.apic_attention) |
7005 | kvm_lapic_sync_from_vapic(vcpu); | 7035 | kvm_lapic_sync_from_vapic(vcpu); |
7006 | 7036 | ||
7037 | vcpu->arch.gpa_available = false; | ||
7007 | r = kvm_x86_ops->handle_exit(vcpu); | 7038 | r = kvm_x86_ops->handle_exit(vcpu); |
7008 | return r; | 7039 | return r; |
7009 | 7040 | ||
@@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
7422 | int pending_vec, max_bits, idx; | 7453 | int pending_vec, max_bits, idx; |
7423 | struct desc_ptr dt; | 7454 | struct desc_ptr dt; |
7424 | 7455 | ||
7425 | if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE)) | 7456 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && |
7457 | (sregs->cr4 & X86_CR4_OSXSAVE)) | ||
7458 | return -EINVAL; | ||
7459 | |||
7460 | apic_base_msr.data = sregs->apic_base; | ||
7461 | apic_base_msr.host_initiated = true; | ||
7462 | if (kvm_set_apic_base(vcpu, &apic_base_msr)) | ||
7426 | return -EINVAL; | 7463 | return -EINVAL; |
7427 | 7464 | ||
7428 | dt.size = sregs->idt.limit; | 7465 | dt.size = sregs->idt.limit; |
@@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
7441 | 7478 | ||
7442 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; | 7479 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
7443 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 7480 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
7444 | apic_base_msr.data = sregs->apic_base; | ||
7445 | apic_base_msr.host_initiated = true; | ||
7446 | kvm_set_apic_base(vcpu, &apic_base_msr); | ||
7447 | 7481 | ||
7448 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; | 7482 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
7449 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 7483 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
@@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
7734 | vcpu->arch.nmi_injected = false; | 7768 | vcpu->arch.nmi_injected = false; |
7735 | kvm_clear_interrupt_queue(vcpu); | 7769 | kvm_clear_interrupt_queue(vcpu); |
7736 | kvm_clear_exception_queue(vcpu); | 7770 | kvm_clear_exception_queue(vcpu); |
7771 | vcpu->arch.exception.pending = false; | ||
7737 | 7772 | ||
7738 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 7773 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
7739 | kvm_update_dr0123(vcpu); | 7774 | kvm_update_dr0123(vcpu); |
@@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7993 | kvm_pmu_init(vcpu); | 8028 | kvm_pmu_init(vcpu); |
7994 | 8029 | ||
7995 | vcpu->arch.pending_external_vector = -1; | 8030 | vcpu->arch.pending_external_vector = -1; |
8031 | vcpu->arch.preempted_in_kernel = false; | ||
7996 | 8032 | ||
7997 | kvm_hv_vcpu_init(vcpu); | 8033 | kvm_hv_vcpu_init(vcpu); |
7998 | 8034 | ||
@@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
8440 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); | 8476 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); |
8441 | } | 8477 | } |
8442 | 8478 | ||
8479 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
8480 | { | ||
8481 | return vcpu->arch.preempted_in_kernel; | ||
8482 | } | ||
8483 | |||
8443 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 8484 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
8444 | { | 8485 | { |
8445 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; | 8486 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 612067074905..51e349cf5f45 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 12 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
13 | { | 13 | { |
14 | vcpu->arch.exception.pending = false; | 14 | vcpu->arch.exception.injected = false; |
15 | } | 15 | } |
16 | 16 | ||
17 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, | 17 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, |
@@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | |||
29 | 29 | ||
30 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) | 30 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) |
31 | { | 31 | { |
32 | return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || | 32 | return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || |
33 | vcpu->arch.nmi_injected; | 33 | vcpu->arch.nmi_injected; |
34 | } | 34 | } |
35 | 35 | ||
@@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) | |||
62 | return cs_l; | 62 | return cs_l; |
63 | } | 63 | } |
64 | 64 | ||
65 | static inline bool is_la57_mode(struct kvm_vcpu *vcpu) | ||
66 | { | ||
67 | #ifdef CONFIG_X86_64 | ||
68 | return (vcpu->arch.efer & EFER_LMA) && | ||
69 | kvm_read_cr4_bits(vcpu, X86_CR4_LA57); | ||
70 | #else | ||
71 | return 0; | ||
72 | #endif | ||
73 | } | ||
74 | |||
65 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) | 75 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) |
66 | { | 76 | { |
67 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; | 77 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; |
@@ -87,10 +97,48 @@ static inline u32 bit(int bitno) | |||
87 | return 1 << (bitno & 31); | 97 | return 1 << (bitno & 31); |
88 | } | 98 | } |
89 | 99 | ||
100 | static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) | ||
101 | { | ||
102 | return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; | ||
103 | } | ||
104 | |||
105 | static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) | ||
106 | { | ||
107 | return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48; | ||
108 | } | ||
109 | |||
110 | static inline u64 get_canonical(u64 la, u8 vaddr_bits) | ||
111 | { | ||
112 | return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); | ||
113 | } | ||
114 | |||
115 | static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) | ||
116 | { | ||
117 | #ifdef CONFIG_X86_64 | ||
118 | return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; | ||
119 | #else | ||
120 | return false; | ||
121 | #endif | ||
122 | } | ||
123 | |||
124 | static inline bool emul_is_noncanonical_address(u64 la, | ||
125 | struct x86_emulate_ctxt *ctxt) | ||
126 | { | ||
127 | #ifdef CONFIG_X86_64 | ||
128 | return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; | ||
129 | #else | ||
130 | return false; | ||
131 | #endif | ||
132 | } | ||
133 | |||
90 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | 134 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, |
91 | gva_t gva, gfn_t gfn, unsigned access) | 135 | gva_t gva, gfn_t gfn, unsigned access) |
92 | { | 136 | { |
93 | vcpu->arch.mmio_gva = gva & PAGE_MASK; | 137 | /* |
138 | * If this is a shadow nested page table, the "GVA" is | ||
139 | * actually a nGPA. | ||
140 | */ | ||
141 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; | ||
94 | vcpu->arch.access = access; | 142 | vcpu->arch.access = access; |
95 | vcpu->arch.mmio_gfn = gfn; | 143 | vcpu->arch.mmio_gfn = gfn; |
96 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; | 144 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 21a6fd6c44af..6882538eda32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); | |||
720 | bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); | 720 | bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); |
721 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 721 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
722 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); | 722 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); |
723 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); | 723 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); |
724 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 724 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
725 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 725 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
726 | 726 | ||
@@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void); | |||
800 | void kvm_arch_hardware_unsetup(void); | 800 | void kvm_arch_hardware_unsetup(void); |
801 | void kvm_arch_check_processor_compat(void *rtn); | 801 | void kvm_arch_check_processor_compat(void *rtn); |
802 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); | 802 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); |
803 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); | ||
803 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); | 804 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); |
804 | 805 | ||
805 | #ifndef __KVM_HAVE_ARCH_VM_ALLOC | 806 | #ifndef __KVM_HAVE_ARCH_VM_ALLOC |
@@ -985,6 +986,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) | |||
985 | return (hpa_t)pfn << PAGE_SHIFT; | 986 | return (hpa_t)pfn << PAGE_SHIFT; |
986 | } | 987 | } |
987 | 988 | ||
989 | static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, | ||
990 | gpa_t gpa) | ||
991 | { | ||
992 | return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); | ||
993 | } | ||
994 | |||
988 | static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) | 995 | static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) |
989 | { | 996 | { |
990 | unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); | 997 | unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6cd63c18708a..838887587411 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { | |||
711 | struct kvm_ppc_smmu_info { | 711 | struct kvm_ppc_smmu_info { |
712 | __u64 flags; | 712 | __u64 flags; |
713 | __u32 slb_size; | 713 | __u32 slb_size; |
714 | __u32 pad; | 714 | __u16 data_keys; /* # storage keys supported for data */ |
715 | __u16 instr_keys; /* # storage keys supported for instructions */ | ||
715 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; | 716 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; |
716 | }; | 717 | }; |
717 | 718 | ||
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a39a1e161e63..b9f68e4add71 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
@@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | |||
416 | && !v->arch.power_off && !v->arch.pause); | 416 | && !v->arch.power_off && !v->arch.pause); |
417 | } | 417 | } |
418 | 418 | ||
419 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
420 | { | ||
421 | return vcpu_mode_priv(vcpu); | ||
422 | } | ||
423 | |||
419 | /* Just ensure a guest exit from a particular CPU */ | 424 | /* Just ensure a guest exit from a particular CPU */ |
420 | static void exit_vm_noop(void *info) | 425 | static void exit_vm_noop(void *info) |
421 | { | 426 | { |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2ea21dac0b44..b36945d49986 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
@@ -1454,25 +1454,6 @@ out: | |||
1454 | kvm_set_pfn_accessed(pfn); | 1454 | kvm_set_pfn_accessed(pfn); |
1455 | } | 1455 | } |
1456 | 1456 | ||
1457 | static bool is_abort_sea(unsigned long fault_status) | ||
1458 | { | ||
1459 | switch (fault_status) { | ||
1460 | case FSC_SEA: | ||
1461 | case FSC_SEA_TTW0: | ||
1462 | case FSC_SEA_TTW1: | ||
1463 | case FSC_SEA_TTW2: | ||
1464 | case FSC_SEA_TTW3: | ||
1465 | case FSC_SECC: | ||
1466 | case FSC_SECC_TTW0: | ||
1467 | case FSC_SECC_TTW1: | ||
1468 | case FSC_SECC_TTW2: | ||
1469 | case FSC_SECC_TTW3: | ||
1470 | return true; | ||
1471 | default: | ||
1472 | return false; | ||
1473 | } | ||
1474 | } | ||
1475 | |||
1476 | /** | 1457 | /** |
1477 | * kvm_handle_guest_abort - handles all 2nd stage aborts | 1458 | * kvm_handle_guest_abort - handles all 2nd stage aborts |
1478 | * @vcpu: the VCPU pointer | 1459 | * @vcpu: the VCPU pointer |
@@ -1498,20 +1479,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
1498 | fault_status = kvm_vcpu_trap_get_fault_type(vcpu); | 1479 | fault_status = kvm_vcpu_trap_get_fault_type(vcpu); |
1499 | 1480 | ||
1500 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); | 1481 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); |
1482 | is_iabt = kvm_vcpu_trap_is_iabt(vcpu); | ||
1501 | 1483 | ||
1502 | /* | 1484 | /* Synchronous External Abort? */ |
1503 | * The host kernel will handle the synchronous external abort. There | 1485 | if (kvm_vcpu_dabt_isextabt(vcpu)) { |
1504 | * is no need to pass the error into the guest. | 1486 | /* |
1505 | */ | 1487 | * For RAS the host kernel may handle this abort. |
1506 | if (is_abort_sea(fault_status)) { | 1488 | * There is no need to pass the error into the guest. |
1489 | */ | ||
1507 | if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) | 1490 | if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) |
1508 | return 1; | 1491 | return 1; |
1509 | } | ||
1510 | 1492 | ||
1511 | is_iabt = kvm_vcpu_trap_is_iabt(vcpu); | 1493 | if (unlikely(!is_iabt)) { |
1512 | if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { | 1494 | kvm_inject_vabt(vcpu); |
1513 | kvm_inject_vabt(vcpu); | 1495 | return 1; |
1514 | return 1; | 1496 | } |
1515 | } | 1497 | } |
1516 | 1498 | ||
1517 | trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), | 1499 | trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), |
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 7072ab743332..10b38178cff2 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c | |||
@@ -234,7 +234,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) | |||
234 | return 0; | 234 | return 0; |
235 | } | 235 | } |
236 | 236 | ||
237 | static struct seq_operations vgic_debug_seq_ops = { | 237 | static const struct seq_operations vgic_debug_seq_ops = { |
238 | .start = vgic_debug_start, | 238 | .start = vgic_debug_start, |
239 | .next = vgic_debug_next, | 239 | .next = vgic_debug_next, |
240 | .stop = vgic_debug_stop, | 240 | .stop = vgic_debug_stop, |
@@ -255,7 +255,7 @@ static int debug_open(struct inode *inode, struct file *file) | |||
255 | return ret; | 255 | return ret; |
256 | }; | 256 | }; |
257 | 257 | ||
258 | static struct file_operations vgic_debug_fops = { | 258 | static const struct file_operations vgic_debug_fops = { |
259 | .owner = THIS_MODULE, | 259 | .owner = THIS_MODULE, |
260 | .open = debug_open, | 260 | .open = debug_open, |
261 | .read = seq_read, | 261 | .read = seq_read, |
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index aa6b68db80b4..f51c1e1b3f70 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
@@ -144,7 +144,6 @@ struct its_ite { | |||
144 | 144 | ||
145 | struct vgic_irq *irq; | 145 | struct vgic_irq *irq; |
146 | struct its_collection *collection; | 146 | struct its_collection *collection; |
147 | u32 lpi; | ||
148 | u32 event_id; | 147 | u32 event_id; |
149 | }; | 148 | }; |
150 | 149 | ||
@@ -813,7 +812,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) | |||
813 | /* Must be called with its_lock mutex held */ | 812 | /* Must be called with its_lock mutex held */ |
814 | static struct its_ite *vgic_its_alloc_ite(struct its_device *device, | 813 | static struct its_ite *vgic_its_alloc_ite(struct its_device *device, |
815 | struct its_collection *collection, | 814 | struct its_collection *collection, |
816 | u32 lpi_id, u32 event_id) | 815 | u32 event_id) |
817 | { | 816 | { |
818 | struct its_ite *ite; | 817 | struct its_ite *ite; |
819 | 818 | ||
@@ -823,7 +822,6 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, | |||
823 | 822 | ||
824 | ite->event_id = event_id; | 823 | ite->event_id = event_id; |
825 | ite->collection = collection; | 824 | ite->collection = collection; |
826 | ite->lpi = lpi_id; | ||
827 | 825 | ||
828 | list_add_tail(&ite->ite_list, &device->itt_head); | 826 | list_add_tail(&ite->ite_list, &device->itt_head); |
829 | return ite; | 827 | return ite; |
@@ -873,7 +871,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, | |||
873 | new_coll = collection; | 871 | new_coll = collection; |
874 | } | 872 | } |
875 | 873 | ||
876 | ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id); | 874 | ite = vgic_its_alloc_ite(device, collection, event_id); |
877 | if (IS_ERR(ite)) { | 875 | if (IS_ERR(ite)) { |
878 | if (new_coll) | 876 | if (new_coll) |
879 | vgic_its_free_collection(its, coll_id); | 877 | vgic_its_free_collection(its, coll_id); |
@@ -1848,7 +1846,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, | |||
1848 | 1846 | ||
1849 | next_offset = compute_next_eventid_offset(&dev->itt_head, ite); | 1847 | next_offset = compute_next_eventid_offset(&dev->itt_head, ite); |
1850 | val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | | 1848 | val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | |
1851 | ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) | | 1849 | ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | |
1852 | ite->collection->collection_id; | 1850 | ite->collection->collection_id; |
1853 | val = cpu_to_le64(val); | 1851 | val = cpu_to_le64(val); |
1854 | return kvm_write_guest(kvm, gpa, &val, ite_esz); | 1852 | return kvm_write_guest(kvm, gpa, &val, ite_esz); |
@@ -1895,7 +1893,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, | |||
1895 | if (!collection) | 1893 | if (!collection) |
1896 | return -EINVAL; | 1894 | return -EINVAL; |
1897 | 1895 | ||
1898 | ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id); | 1896 | ite = vgic_its_alloc_ite(dev, collection, event_id); |
1899 | if (IS_ERR(ite)) | 1897 | if (IS_ERR(ite)) |
1900 | return PTR_ERR(ite); | 1898 | return PTR_ERR(ite); |
1901 | 1899 | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 37522e65eb53..b3d4a10f09a1 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c | |||
@@ -303,6 +303,51 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, | |||
303 | vgic_set_vmcr(vcpu, &vmcr); | 303 | vgic_set_vmcr(vcpu, &vmcr); |
304 | } | 304 | } |
305 | 305 | ||
306 | static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, | ||
307 | gpa_t addr, unsigned int len) | ||
308 | { | ||
309 | int n; /* which APRn is this */ | ||
310 | |||
311 | n = (addr >> 2) & 0x3; | ||
312 | |||
313 | if (kvm_vgic_global_state.type == VGIC_V2) { | ||
314 | /* GICv2 hardware systems support max. 32 groups */ | ||
315 | if (n != 0) | ||
316 | return 0; | ||
317 | return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; | ||
318 | } else { | ||
319 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
320 | |||
321 | if (n > vgic_v3_max_apr_idx(vcpu)) | ||
322 | return 0; | ||
323 | /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ | ||
324 | return vgicv3->vgic_ap1r[n]; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, | ||
329 | gpa_t addr, unsigned int len, | ||
330 | unsigned long val) | ||
331 | { | ||
332 | int n; /* which APRn is this */ | ||
333 | |||
334 | n = (addr >> 2) & 0x3; | ||
335 | |||
336 | if (kvm_vgic_global_state.type == VGIC_V2) { | ||
337 | /* GICv2 hardware systems support max. 32 groups */ | ||
338 | if (n != 0) | ||
339 | return; | ||
340 | vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; | ||
341 | } else { | ||
342 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
343 | |||
344 | if (n > vgic_v3_max_apr_idx(vcpu)) | ||
345 | return; | ||
346 | /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ | ||
347 | vgicv3->vgic_ap1r[n] = val; | ||
348 | } | ||
349 | } | ||
350 | |||
306 | static const struct vgic_register_region vgic_v2_dist_registers[] = { | 351 | static const struct vgic_register_region vgic_v2_dist_registers[] = { |
307 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, | 352 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, |
308 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, | 353 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, |
@@ -364,7 +409,7 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = { | |||
364 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | 409 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, |
365 | VGIC_ACCESS_32bit), | 410 | VGIC_ACCESS_32bit), |
366 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, | 411 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, |
367 | vgic_mmio_read_raz, vgic_mmio_write_wi, 16, | 412 | vgic_mmio_read_apr, vgic_mmio_write_apr, 16, |
368 | VGIC_ACCESS_32bit), | 413 | VGIC_ACCESS_32bit), |
369 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, | 414 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, |
370 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | 415 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, |
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bba7fa22a7f7..bf9ceab67c77 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h | |||
@@ -220,4 +220,20 @@ int vgic_debug_destroy(struct kvm *kvm); | |||
220 | bool lock_all_vcpus(struct kvm *kvm); | 220 | bool lock_all_vcpus(struct kvm *kvm); |
221 | void unlock_all_vcpus(struct kvm *kvm); | 221 | void unlock_all_vcpus(struct kvm *kvm); |
222 | 222 | ||
223 | static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; | ||
226 | |||
227 | /* | ||
228 | * num_pri_bits are initialized with HW supported values. | ||
229 | * We can rely safely on num_pri_bits even if VM has not | ||
230 | * restored ICC_CTLR_EL1 before restoring APnR registers. | ||
231 | */ | ||
232 | switch (cpu_if->num_pri_bits) { | ||
233 | case 7: return 3; | ||
234 | case 6: return 1; | ||
235 | default: return 0; | ||
236 | } | ||
237 | } | ||
238 | |||
223 | #endif | 239 | #endif |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d81f6ded88e..6ed1c2021198 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -1609,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, | |||
1609 | struct page **pages, int nr_pages) | 1609 | struct page **pages, int nr_pages) |
1610 | { | 1610 | { |
1611 | unsigned long addr; | 1611 | unsigned long addr; |
1612 | gfn_t entry; | 1612 | gfn_t entry = 0; |
1613 | 1613 | ||
1614 | addr = gfn_to_hva_many(slot, gfn, &entry); | 1614 | addr = gfn_to_hva_many(slot, gfn, &entry); |
1615 | if (kvm_is_error_hva(addr)) | 1615 | if (kvm_is_error_hva(addr)) |
@@ -1928,6 +1928,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, | |||
1928 | * verify that the entire region is valid here. | 1928 | * verify that the entire region is valid here. |
1929 | */ | 1929 | */ |
1930 | while (start_gfn <= end_gfn) { | 1930 | while (start_gfn <= end_gfn) { |
1931 | nr_pages_avail = 0; | ||
1931 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); | 1932 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
1932 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, | 1933 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, |
1933 | &nr_pages_avail); | 1934 | &nr_pages_avail); |
@@ -2275,7 +2276,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | |||
2275 | #endif | 2276 | #endif |
2276 | } | 2277 | } |
2277 | 2278 | ||
2278 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 2279 | void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) |
2279 | { | 2280 | { |
2280 | struct kvm *kvm = me->kvm; | 2281 | struct kvm *kvm = me->kvm; |
2281 | struct kvm_vcpu *vcpu; | 2282 | struct kvm_vcpu *vcpu; |
@@ -2306,6 +2307,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
2306 | continue; | 2307 | continue; |
2307 | if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) | 2308 | if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) |
2308 | continue; | 2309 | continue; |
2310 | if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) | ||
2311 | continue; | ||
2309 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | 2312 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) |
2310 | continue; | 2313 | continue; |
2311 | 2314 | ||