diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 18:18:36 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-08 18:18:36 -0400 |
| commit | 0756b7fbb696d2cb18785da9cab13ec164017f64 (patch) | |
| tree | d06242e3f35a7623e00068d7c95d06824f396df3 | |
| parent | 6d6218976df142ba5594371f8dbd56650151c56f (diff) | |
| parent | 5f54c8b2d4fad95d1f8ecbe023ebe6038e6d3760 (diff) | |
Merge tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"First batch of KVM changes for 4.14
Common:
- improve heuristic for boosting preempted spinlocks by ignoring
VCPUs in user mode
ARM:
- fix for decoding external abort types from guests
- added support for migrating the active priority of interrupts when
running a GICv2 guest on a GICv3 host
- minor cleanup
PPC:
- expose storage keys to userspace
- merge kvm-ppc-fixes with a fix that missed 4.13 because of
vacations
- fixes
s390:
- merge of kvm/master to avoid conflicts with additional sthyi fixes
- wire up the no-dat enhancements in KVM
- multiple epoch facility (z14 feature)
- Configuration z/Architecture Mode
- more sthyi fixes
- gdb server range checking fix
- small code cleanups
x86:
- emulate Hyper-V TSC frequency MSRs
- add nested INVPCID
- emulate EPTP switching VMFUNC
- support Virtual GIF
- support 5 level page tables
- speedup nested VM exits by packing byte operations
- speedup MMIO by using hardware provided physical address
- a lot of fixes and cleanups, especially nested"
* tag 'kvm-4.14-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (67 commits)
KVM: arm/arm64: Support uaccess of GICC_APRn
KVM: arm/arm64: Extract GICv3 max APRn index calculation
KVM: arm/arm64: vITS: Drop its_ite->lpi field
KVM: arm/arm64: vgic: constify seq_operations and file_operations
KVM: arm/arm64: Fix guest external abort matching
KVM: PPC: Book3S HV: Fix memory leak in kvm_vm_ioctl_get_htab_fd
KVM: s390: vsie: cleanup mcck reinjection
KVM: s390: use WARN_ON_ONCE only for checking
KVM: s390: guestdbg: fix range check
KVM: PPC: Book3S HV: Report storage key support to userspace
KVM: PPC: Book3S HV: Fix case where HDEC is treated as 32-bit on POWER9
KVM: PPC: Book3S HV: Fix invalid use of register expression
KVM: PPC: Book3S HV: Fix H_REGISTER_VPA VPA size validation
KVM: PPC: Book3S HV: Fix setting of storage key in H_ENTER
KVM: PPC: e500mc: Fix a NULL dereference
KVM: PPC: e500: Fix some NULL dereferences on error
KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list
KVM: s390: we are always in czam mode
KVM: s390: expose no-DAT to guest and migration support
KVM: s390: sthyi: remove invalid guest write access
...
64 files changed, 1479 insertions, 768 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index b2f60ca8b60c..b3ce12643553 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt | |||
| @@ -83,6 +83,11 @@ Groups: | |||
| 83 | 83 | ||
| 84 | Bits for undefined preemption levels are RAZ/WI. | 84 | Bits for undefined preemption levels are RAZ/WI. |
| 85 | 85 | ||
| 86 | Note that this differs from a CPU's view of the APRs on hardware in which | ||
| 87 | a GIC without the security extensions expose group 0 and group 1 active | ||
| 88 | priorities in separate register groups, whereas we show a combined view | ||
| 89 | similar to GICv2's GICH_APR. | ||
| 90 | |||
| 86 | For historical reasons and to provide ABI compatibility with userspace we | 91 | For historical reasons and to provide ABI compatibility with userspace we |
| 87 | export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask | 92 | export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask |
| 88 | field in the lower 5 bits of a word, meaning that userspace must always | 93 | field in the lower 5 bits of a word, meaning that userspace must always |
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 903fc926860b..95ca68d663a4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt | |||
| @@ -176,7 +176,8 @@ Architectures: s390 | |||
| 176 | 176 | ||
| 177 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH | 177 | 3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH |
| 178 | 178 | ||
| 179 | Allows user space to set/get the TOD clock extension (u8). | 179 | Allows user space to set/get the TOD clock extension (u8) (superseded by |
| 180 | KVM_S390_VM_TOD_EXT). | ||
| 180 | 181 | ||
| 181 | Parameters: address of a buffer in user space to store the data (u8) to | 182 | Parameters: address of a buffer in user space to store the data (u8) to |
| 182 | Returns: -EFAULT if the given address is not accessible from kernel space | 183 | Returns: -EFAULT if the given address is not accessible from kernel space |
| @@ -190,6 +191,17 @@ the POP (u64). | |||
| 190 | Parameters: address of a buffer in user space to store the data (u64) to | 191 | Parameters: address of a buffer in user space to store the data (u64) to |
| 191 | Returns: -EFAULT if the given address is not accessible from kernel space | 192 | Returns: -EFAULT if the given address is not accessible from kernel space |
| 192 | 193 | ||
| 194 | 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT | ||
| 195 | Allows user space to set/get bits 0-63 of the TOD clock register as defined in | ||
| 196 | the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it | ||
| 197 | also allows user space to get/set it. If the guest CPU model does not support | ||
| 198 | it, it is stored as 0 and not allowed to be set to a value != 0. | ||
| 199 | |||
| 200 | Parameters: address of a buffer in user space to store the data | ||
| 201 | (kvm_s390_vm_tod_clock) to | ||
| 202 | Returns: -EFAULT if the given address is not accessible from kernel space | ||
| 203 | -EINVAL if setting the TOD clock extension to != 0 is not supported | ||
| 204 | |||
| 193 | 4. GROUP: KVM_S390_VM_CRYPTO | 205 | 4. GROUP: KVM_S390_VM_CRYPTO |
| 194 | Architectures: s390 | 206 | Architectures: s390 |
| 195 | 207 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index bf206bd9f056..722c7aec88c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -7464,18 +7464,30 @@ L: kvm@vger.kernel.org | |||
| 7464 | W: http://www.linux-kvm.org | 7464 | W: http://www.linux-kvm.org |
| 7465 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git | 7465 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git |
| 7466 | S: Supported | 7466 | S: Supported |
| 7467 | F: Documentation/*/kvm*.txt | ||
| 7468 | F: Documentation/virtual/kvm/ | 7467 | F: Documentation/virtual/kvm/ |
| 7469 | F: arch/*/kvm/ | 7468 | F: include/trace/events/kvm.h |
| 7470 | F: arch/x86/kernel/kvm.c | 7469 | F: include/uapi/asm-generic/kvm* |
| 7471 | F: arch/x86/kernel/kvmclock.c | ||
| 7472 | F: arch/*/include/asm/kvm* | ||
| 7473 | F: include/linux/kvm* | ||
| 7474 | F: include/uapi/linux/kvm* | 7470 | F: include/uapi/linux/kvm* |
| 7475 | F: virt/kvm/ | 7471 | F: include/asm-generic/kvm* |
| 7472 | F: include/linux/kvm* | ||
| 7473 | F: include/kvm/iodev.h | ||
| 7474 | F: virt/kvm/* | ||
| 7476 | F: tools/kvm/ | 7475 | F: tools/kvm/ |
| 7477 | 7476 | ||
| 7478 | KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V | 7477 | KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) |
| 7478 | M: Paolo Bonzini <pbonzini@redhat.com> | ||
| 7479 | M: Radim KrÄmář <rkrcmar@redhat.com> | ||
| 7480 | L: kvm@vger.kernel.org | ||
| 7481 | W: http://www.linux-kvm.org | ||
| 7482 | T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git | ||
| 7483 | S: Supported | ||
| 7484 | F: arch/x86/kvm/ | ||
| 7485 | F: arch/x86/include/uapi/asm/kvm* | ||
| 7486 | F: arch/x86/include/asm/kvm* | ||
| 7487 | F: arch/x86/kernel/kvm.c | ||
| 7488 | F: arch/x86/kernel/kvmclock.c | ||
| 7489 | |||
| 7490 | KERNEL VIRTUAL MACHINE FOR AMD-V (KVM/amd) | ||
| 7479 | M: Joerg Roedel <joro@8bytes.org> | 7491 | M: Joerg Roedel <joro@8bytes.org> |
| 7480 | L: kvm@vger.kernel.org | 7492 | L: kvm@vger.kernel.org |
| 7481 | W: http://www.linux-kvm.org/ | 7493 | W: http://www.linux-kvm.org/ |
| @@ -7483,7 +7495,7 @@ S: Maintained | |||
| 7483 | F: arch/x86/include/asm/svm.h | 7495 | F: arch/x86/include/asm/svm.h |
| 7484 | F: arch/x86/kvm/svm.c | 7496 | F: arch/x86/kvm/svm.c |
| 7485 | 7497 | ||
| 7486 | KERNEL VIRTUAL MACHINE (KVM) FOR ARM | 7498 | KERNEL VIRTUAL MACHINE FOR ARM (KVM/arm) |
| 7487 | M: Christoffer Dall <christoffer.dall@linaro.org> | 7499 | M: Christoffer Dall <christoffer.dall@linaro.org> |
| 7488 | M: Marc Zyngier <marc.zyngier@arm.com> | 7500 | M: Marc Zyngier <marc.zyngier@arm.com> |
| 7489 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) | 7501 | L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) |
| @@ -7497,14 +7509,16 @@ F: arch/arm/kvm/ | |||
| 7497 | F: virt/kvm/arm/ | 7509 | F: virt/kvm/arm/ |
| 7498 | F: include/kvm/arm_* | 7510 | F: include/kvm/arm_* |
| 7499 | 7511 | ||
| 7500 | KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC | 7512 | KERNEL VIRTUAL MACHINE FOR POWERPC (KVM/powerpc) |
| 7501 | M: Alexander Graf <agraf@suse.com> | 7513 | M: Alexander Graf <agraf@suse.com> |
| 7502 | L: kvm-ppc@vger.kernel.org | 7514 | L: kvm-ppc@vger.kernel.org |
| 7503 | W: http://www.linux-kvm.org/ | 7515 | W: http://www.linux-kvm.org/ |
| 7504 | T: git git://github.com/agraf/linux-2.6.git | 7516 | T: git git://github.com/agraf/linux-2.6.git |
| 7505 | S: Supported | 7517 | S: Supported |
| 7518 | F: arch/powerpc/include/uapi/asm/kvm* | ||
| 7506 | F: arch/powerpc/include/asm/kvm* | 7519 | F: arch/powerpc/include/asm/kvm* |
| 7507 | F: arch/powerpc/kvm/ | 7520 | F: arch/powerpc/kvm/ |
| 7521 | F: arch/powerpc/kernel/kvm* | ||
| 7508 | 7522 | ||
| 7509 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) | 7523 | KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) |
| 7510 | M: Christoffer Dall <christoffer.dall@linaro.org> | 7524 | M: Christoffer Dall <christoffer.dall@linaro.org> |
| @@ -7531,7 +7545,8 @@ L: linux-s390@vger.kernel.org | |||
| 7531 | W: http://www.ibm.com/developerworks/linux/linux390/ | 7545 | W: http://www.ibm.com/developerworks/linux/linux390/ |
| 7532 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git | 7546 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git |
| 7533 | S: Supported | 7547 | S: Supported |
| 7534 | F: Documentation/s390/kvm.txt | 7548 | F: arch/s390/include/uapi/asm/kvm* |
| 7549 | F: arch/s390/include/asm/gmap.h | ||
| 7535 | F: arch/s390/include/asm/kvm* | 7550 | F: arch/s390/include/asm/kvm* |
| 7536 | F: arch/s390/kvm/ | 7551 | F: arch/s390/kvm/ |
| 7537 | F: arch/s390/mm/gmap.c | 7552 | F: arch/s390/mm/gmap.c |
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index ebf020b02bc8..c8781450905b 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h | |||
| @@ -227,7 +227,6 @@ | |||
| 227 | 227 | ||
| 228 | #define HSR_DABT_S1PTW (_AC(1, UL) << 7) | 228 | #define HSR_DABT_S1PTW (_AC(1, UL) << 7) |
| 229 | #define HSR_DABT_CM (_AC(1, UL) << 8) | 229 | #define HSR_DABT_CM (_AC(1, UL) << 8) |
| 230 | #define HSR_DABT_EA (_AC(1, UL) << 9) | ||
| 231 | 230 | ||
| 232 | #define kvm_arm_exception_type \ | 231 | #define kvm_arm_exception_type \ |
| 233 | {0, "RESET" }, \ | 232 | {0, "RESET" }, \ |
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9a8a45aaf19a..98089ffd91bb 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
| @@ -149,11 +149,6 @@ static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) | |||
| 149 | return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; | 149 | return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; |
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | ||
| 153 | { | ||
| 154 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_EA; | ||
| 155 | } | ||
| 156 | |||
| 157 | static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) | 152 | static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) |
| 158 | { | 153 | { |
| 159 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; | 154 | return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; |
| @@ -206,6 +201,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu) | |||
| 206 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; | 201 | return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE; |
| 207 | } | 202 | } |
| 208 | 203 | ||
| 204 | static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) | ||
| 205 | { | ||
| 206 | switch (kvm_vcpu_trap_get_fault_type(vcpu)) { | ||
| 207 | case FSC_SEA: | ||
| 208 | case FSC_SEA_TTW0: | ||
| 209 | case FSC_SEA_TTW1: | ||
| 210 | case FSC_SEA_TTW2: | ||
| 211 | case FSC_SEA_TTW3: | ||
| 212 | case FSC_SECC: | ||
| 213 | case FSC_SECC_TTW0: | ||
| 214 | case FSC_SECC_TTW1: | ||
| 215 | case FSC_SECC_TTW2: | ||
| 216 | case FSC_SECC_TTW3: | ||
| 217 | return true; | ||
| 218 | default: | ||
| 219 | return false; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 209 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) | 223 | static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) |
| 210 | { | 224 | { |
| 211 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; | 225 | return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; |
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 54442e375354..cf8bf6bf87c4 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c | |||
| @@ -67,7 +67,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 67 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { | 67 | if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { |
| 68 | trace_kvm_wfx(*vcpu_pc(vcpu), true); | 68 | trace_kvm_wfx(*vcpu_pc(vcpu), true); |
| 69 | vcpu->stat.wfe_exit_stat++; | 69 | vcpu->stat.wfe_exit_stat++; |
| 70 | kvm_vcpu_on_spin(vcpu); | 70 | kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); |
| 71 | } else { | 71 | } else { |
| 72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); | 72 | trace_kvm_wfx(*vcpu_pc(vcpu), false); |
| 73 | vcpu->stat.wfi_exit_stat++; | 73 | vcpu->stat.wfi_exit_stat++; |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fe39e6841326..e5df3fce0008 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
| @@ -188,11 +188,6 @@ static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) | |||
| 188 | return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; | 188 | return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; |
| 189 | } | 189 | } |
| 190 | 190 | ||
| 191 | static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) | ||
| 192 | { | ||
| 193 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA); | ||
| 194 | } | ||
| 195 | |||
| 196 | static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) | 191 | static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) |
| 197 | { | 192 | { |
| 198 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); | 193 | return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); |
| @@ -240,6 +235,25 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) | |||
| 240 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; | 235 | return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; |
| 241 | } | 236 | } |
| 242 | 237 | ||
| 238 | static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) | ||
| 239 | { | ||
| 240 | switch (kvm_vcpu_trap_get_fault_type(vcpu)) { | ||
| 241 | case FSC_SEA: | ||
| 242 | case FSC_SEA_TTW0: | ||
| 243 | case FSC_SEA_TTW1: | ||
| 244 | case FSC_SEA_TTW2: | ||
| 245 | case FSC_SEA_TTW3: | ||
| 246 | case FSC_SECC: | ||
| 247 | case FSC_SECC_TTW0: | ||
| 248 | case FSC_SECC_TTW1: | ||
| 249 | case FSC_SECC_TTW2: | ||
| 250 | case FSC_SECC_TTW3: | ||
| 251 | return true; | ||
| 252 | default: | ||
| 253 | return false; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 243 | static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) | 257 | static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) |
| 244 | { | 258 | { |
| 245 | u32 esr = kvm_vcpu_get_hsr(vcpu); | 259 | u32 esr = kvm_vcpu_get_hsr(vcpu); |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 17d8a1677a0b..7debb74843a0 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
| @@ -84,7 +84,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 84 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { | 84 | if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { |
| 85 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); | 85 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); |
| 86 | vcpu->stat.wfe_exit_stat++; | 86 | vcpu->stat.wfe_exit_stat++; |
| 87 | kvm_vcpu_on_spin(vcpu); | 87 | kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); |
| 88 | } else { | 88 | } else { |
| 89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); | 89 | trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); |
| 90 | vcpu->stat.wfi_exit_stat++; | 90 | vcpu->stat.wfi_exit_stat++; |
diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 116786d2e8e8..c77d508b7462 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c | |||
| @@ -208,29 +208,12 @@ static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, | |||
| 208 | static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | 208 | static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, |
| 209 | const struct sys_reg_desc *r, u8 apr) | 209 | const struct sys_reg_desc *r, u8 apr) |
| 210 | { | 210 | { |
| 211 | struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; | ||
| 212 | u8 idx = r->Op2 & 3; | 211 | u8 idx = r->Op2 & 3; |
| 213 | 212 | ||
| 214 | /* | 213 | if (idx > vgic_v3_max_apr_idx(vcpu)) |
| 215 | * num_pri_bits are initialized with HW supported values. | 214 | goto err; |
| 216 | * We can rely safely on num_pri_bits even if VM has not | ||
| 217 | * restored ICC_CTLR_EL1 before restoring APnR registers. | ||
| 218 | */ | ||
| 219 | switch (vgic_v3_cpu->num_pri_bits) { | ||
| 220 | case 7: | ||
| 221 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
| 222 | break; | ||
| 223 | case 6: | ||
| 224 | if (idx > 1) | ||
| 225 | goto err; | ||
| 226 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
| 227 | break; | ||
| 228 | default: | ||
| 229 | if (idx > 0) | ||
| 230 | goto err; | ||
| 231 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
| 232 | } | ||
| 233 | 215 | ||
| 216 | vgic_v3_access_apr_reg(vcpu, p, apr, idx); | ||
| 234 | return true; | 217 | return true; |
| 235 | err: | 218 | err: |
| 236 | if (!p->is_write) | 219 | if (!p->is_write) |
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d4b2ad18eef2..bce2a6431430 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
| @@ -98,6 +98,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 98 | return !!(vcpu->arch.pending_exceptions); | 98 | return !!(vcpu->arch.pending_exceptions); |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
| 102 | { | ||
| 103 | return false; | ||
| 104 | } | ||
| 105 | |||
| 101 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 106 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
| 102 | { | 107 | { |
| 103 | return 1; | 108 | return 1; |
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f28d21c69f79..508275bb05d5 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h | |||
| @@ -104,6 +104,7 @@ | |||
| 104 | #define HPTE_R_C ASM_CONST(0x0000000000000080) | 104 | #define HPTE_R_C ASM_CONST(0x0000000000000080) |
| 105 | #define HPTE_R_R ASM_CONST(0x0000000000000100) | 105 | #define HPTE_R_R ASM_CONST(0x0000000000000100) |
| 106 | #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) | 106 | #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) |
| 107 | #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) | ||
| 107 | 108 | ||
| 108 | #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) | 109 | #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) |
| 109 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) | 110 | #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 67075e065ef2..7c62967d672c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -1941,6 +1941,7 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) | |||
| 1941 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; | 1941 | rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; |
| 1942 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); | 1942 | ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC); |
| 1943 | if (ret < 0) { | 1943 | if (ret < 0) { |
| 1944 | kfree(ctx); | ||
| 1944 | kvm_put_kvm(kvm); | 1945 | kvm_put_kvm(kvm); |
| 1945 | return ret; | 1946 | return ret; |
| 1946 | } | 1947 | } |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 53766e2bc029..8f2da8bba737 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
| @@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) | |||
| 265 | { | 265 | { |
| 266 | struct kvmppc_spapr_tce_table *stt = filp->private_data; | 266 | struct kvmppc_spapr_tce_table *stt = filp->private_data; |
| 267 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; | 267 | struct kvmppc_spapr_tce_iommu_table *stit, *tmp; |
| 268 | struct kvm *kvm = stt->kvm; | ||
| 268 | 269 | ||
| 270 | mutex_lock(&kvm->lock); | ||
| 269 | list_del_rcu(&stt->list); | 271 | list_del_rcu(&stt->list); |
| 272 | mutex_unlock(&kvm->lock); | ||
| 270 | 273 | ||
| 271 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { | 274 | list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { |
| 272 | WARN_ON(!kref_read(&stit->kref)); | 275 | WARN_ON(!kref_read(&stit->kref)); |
| @@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 298 | unsigned long npages, size; | 301 | unsigned long npages, size; |
| 299 | int ret = -ENOMEM; | 302 | int ret = -ENOMEM; |
| 300 | int i; | 303 | int i; |
| 301 | int fd = -1; | ||
| 302 | 304 | ||
| 303 | if (!args->size) | 305 | if (!args->size) |
| 304 | return -EINVAL; | 306 | return -EINVAL; |
| @@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 328 | goto fail; | 330 | goto fail; |
| 329 | } | 331 | } |
| 330 | 332 | ||
| 331 | ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | ||
| 332 | stt, O_RDWR | O_CLOEXEC); | ||
| 333 | if (ret < 0) | ||
| 334 | goto fail; | ||
| 335 | |||
| 336 | mutex_lock(&kvm->lock); | 333 | mutex_lock(&kvm->lock); |
| 337 | 334 | ||
| 338 | /* Check this LIOBN hasn't been previously allocated */ | 335 | /* Check this LIOBN hasn't been previously allocated */ |
| @@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
| 344 | } | 341 | } |
| 345 | } | 342 | } |
| 346 | 343 | ||
| 347 | if (!ret) { | 344 | if (!ret) |
| 345 | ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, | ||
| 346 | stt, O_RDWR | O_CLOEXEC); | ||
| 347 | |||
| 348 | if (ret >= 0) { | ||
| 348 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); | 349 | list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); |
| 349 | kvm_get_kvm(kvm); | 350 | kvm_get_kvm(kvm); |
| 350 | } | 351 | } |
| 351 | 352 | ||
| 352 | mutex_unlock(&kvm->lock); | 353 | mutex_unlock(&kvm->lock); |
| 353 | 354 | ||
| 354 | if (!ret) | 355 | if (ret >= 0) |
| 355 | return fd; | 356 | return ret; |
| 356 | |||
| 357 | put_unused_fd(fd); | ||
| 358 | 357 | ||
| 359 | fail: | 358 | fail: |
| 360 | for (i = 0; i < npages; i++) | 359 | for (i = 0; i < npages; i++) |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ebcf97cb5c98..18e974a34fce 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -485,7 +485,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
| 485 | 485 | ||
| 486 | switch (subfunc) { | 486 | switch (subfunc) { |
| 487 | case H_VPA_REG_VPA: /* register VPA */ | 487 | case H_VPA_REG_VPA: /* register VPA */ |
| 488 | if (len < sizeof(struct lppaca)) | 488 | /* |
| 489 | * The size of our lppaca is 1kB because of the way we align | ||
| 490 | * it for the guest to avoid crossing a 4kB boundary. We only | ||
| 491 | * use 640 bytes of the structure though, so we should accept | ||
| 492 | * clients that set a size of 640. | ||
| 493 | */ | ||
| 494 | if (len < 640) | ||
| 489 | break; | 495 | break; |
| 490 | vpap = &tvcpu->arch.vpa; | 496 | vpap = &tvcpu->arch.vpa; |
| 491 | err = 0; | 497 | err = 0; |
| @@ -3336,6 +3342,14 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, | |||
| 3336 | if (radix_enabled()) | 3342 | if (radix_enabled()) |
| 3337 | return -EINVAL; | 3343 | return -EINVAL; |
| 3338 | 3344 | ||
| 3345 | /* | ||
| 3346 | * POWER7, POWER8 and POWER9 all support 32 storage keys for data. | ||
| 3347 | * POWER7 doesn't support keys for instruction accesses, | ||
| 3348 | * POWER8 and POWER9 do. | ||
| 3349 | */ | ||
| 3350 | info->data_keys = 32; | ||
| 3351 | info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0; | ||
| 3352 | |||
| 3339 | info->flags = KVM_PPC_PAGE_SIZES_REAL; | 3353 | info->flags = KVM_PPC_PAGE_SIZES_REAL; |
| 3340 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) | 3354 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) |
| 3341 | info->flags |= KVM_PPC_1T_SEGMENTS; | 3355 | info->flags |= KVM_PPC_1T_SEGMENTS; |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fedb0139524c..4efe364f1188 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
| @@ -269,7 +269,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | |||
| 269 | if (!realmode) | 269 | if (!realmode) |
| 270 | local_irq_restore(irq_flags); | 270 | local_irq_restore(irq_flags); |
| 271 | 271 | ||
| 272 | ptel &= ~(HPTE_R_PP0 - psize); | 272 | ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); |
| 273 | ptel |= pa; | 273 | ptel |= pa; |
| 274 | 274 | ||
| 275 | if (pa) | 275 | if (pa) |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2259b6cde119..663a4a861e7f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
| @@ -982,7 +982,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
| 982 | #ifdef CONFIG_KVM_XICS | 982 | #ifdef CONFIG_KVM_XICS |
| 983 | /* We are entering the guest on that thread, push VCPU to XIVE */ | 983 | /* We are entering the guest on that thread, push VCPU to XIVE */ |
| 984 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | 984 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) |
| 985 | cmpldi cr0, r10, r0 | 985 | cmpldi cr0, r10, 0 |
| 986 | beq no_xive | 986 | beq no_xive |
| 987 | ld r11, VCPU_XIVE_SAVED_STATE(r4) | 987 | ld r11, VCPU_XIVE_SAVED_STATE(r4) |
| 988 | li r9, TM_QW1_OS | 988 | li r9, TM_QW1_OS |
| @@ -1286,7 +1286,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | |||
| 1286 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER | 1286 | cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER |
| 1287 | bne 2f | 1287 | bne 2f |
| 1288 | mfspr r3,SPRN_HDEC | 1288 | mfspr r3,SPRN_HDEC |
| 1289 | cmpwi r3,0 | 1289 | EXTEND_HDEC(r3) |
| 1290 | cmpdi r3,0 | ||
| 1290 | mr r4,r9 | 1291 | mr r4,r9 |
| 1291 | bge fast_guest_return | 1292 | bge fast_guest_return |
| 1292 | 2: | 1293 | 2: |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 32fdab57d604..f9f6468f4171 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
| @@ -455,16 +455,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, | |||
| 455 | if (err) | 455 | if (err) |
| 456 | goto free_vcpu; | 456 | goto free_vcpu; |
| 457 | 457 | ||
| 458 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) | 458 | if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) { |
| 459 | err = -ENOMEM; | ||
| 459 | goto uninit_vcpu; | 460 | goto uninit_vcpu; |
| 461 | } | ||
| 460 | 462 | ||
| 461 | err = kvmppc_e500_tlb_init(vcpu_e500); | 463 | err = kvmppc_e500_tlb_init(vcpu_e500); |
| 462 | if (err) | 464 | if (err) |
| 463 | goto uninit_id; | 465 | goto uninit_id; |
| 464 | 466 | ||
| 465 | vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); | 467 | vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); |
| 466 | if (!vcpu->arch.shared) | 468 | if (!vcpu->arch.shared) { |
| 469 | err = -ENOMEM; | ||
| 467 | goto uninit_tlb; | 470 | goto uninit_tlb; |
| 471 | } | ||
| 468 | 472 | ||
| 469 | return vcpu; | 473 | return vcpu; |
| 470 | 474 | ||
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index f48a0c22e8f9..d0b6b5788afc 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c | |||
| @@ -331,8 +331,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm, | |||
| 331 | goto uninit_vcpu; | 331 | goto uninit_vcpu; |
| 332 | 332 | ||
| 333 | vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 333 | vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); |
| 334 | if (!vcpu->arch.shared) | 334 | if (!vcpu->arch.shared) { |
| 335 | err = -ENOMEM; | ||
| 335 | goto uninit_tlb; | 336 | goto uninit_tlb; |
| 337 | } | ||
| 336 | 338 | ||
| 337 | return vcpu; | 339 | return vcpu; |
| 338 | 340 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1a75c0b5f4ca..3480faaf1ef8 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -58,6 +58,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | |||
| 58 | return !!(v->arch.pending_exceptions) || kvm_request_pending(v); | 58 | return !!(v->arch.pending_exceptions) || kvm_request_pending(v); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
| 62 | { | ||
| 63 | return false; | ||
| 64 | } | ||
| 65 | |||
| 61 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 66 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
| 62 | { | 67 | { |
| 63 | return 1; | 68 | return 1; |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a409d5991934..51375e766e90 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -226,7 +226,9 @@ struct kvm_s390_sie_block { | |||
| 226 | #define ECB3_RI 0x01 | 226 | #define ECB3_RI 0x01 |
| 227 | __u8 ecb3; /* 0x0063 */ | 227 | __u8 ecb3; /* 0x0063 */ |
| 228 | __u32 scaol; /* 0x0064 */ | 228 | __u32 scaol; /* 0x0064 */ |
| 229 | __u8 reserved68[4]; /* 0x0068 */ | 229 | __u8 reserved68; /* 0x0068 */ |
| 230 | __u8 epdx; /* 0x0069 */ | ||
| 231 | __u8 reserved6a[2]; /* 0x006a */ | ||
| 230 | __u32 todpr; /* 0x006c */ | 232 | __u32 todpr; /* 0x006c */ |
| 231 | __u8 reserved70[16]; /* 0x0070 */ | 233 | __u8 reserved70[16]; /* 0x0070 */ |
| 232 | __u64 mso; /* 0x0080 */ | 234 | __u64 mso; /* 0x0080 */ |
| @@ -265,6 +267,7 @@ struct kvm_s390_sie_block { | |||
| 265 | __u64 cbrlo; /* 0x01b8 */ | 267 | __u64 cbrlo; /* 0x01b8 */ |
| 266 | __u8 reserved1c0[8]; /* 0x01c0 */ | 268 | __u8 reserved1c0[8]; /* 0x01c0 */ |
| 267 | #define ECD_HOSTREGMGMT 0x20000000 | 269 | #define ECD_HOSTREGMGMT 0x20000000 |
| 270 | #define ECD_MEF 0x08000000 | ||
| 268 | __u32 ecd; /* 0x01c8 */ | 271 | __u32 ecd; /* 0x01c8 */ |
| 269 | __u8 reserved1cc[18]; /* 0x01cc */ | 272 | __u8 reserved1cc[18]; /* 0x01cc */ |
| 270 | __u64 pp; /* 0x01de */ | 273 | __u64 pp; /* 0x01de */ |
| @@ -739,6 +742,7 @@ struct kvm_arch{ | |||
| 739 | struct kvm_s390_cpu_model model; | 742 | struct kvm_s390_cpu_model model; |
| 740 | struct kvm_s390_crypto crypto; | 743 | struct kvm_s390_crypto crypto; |
| 741 | struct kvm_s390_vsie vsie; | 744 | struct kvm_s390_vsie vsie; |
| 745 | u8 epdx; | ||
| 742 | u64 epoch; | 746 | u64 epoch; |
| 743 | struct kvm_s390_migration_state *migration_state; | 747 | struct kvm_s390_migration_state *migration_state; |
| 744 | /* subset of available cpu features enabled by user space */ | 748 | /* subset of available cpu features enabled by user space */ |
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index ca21b28a7b17..22b0f49e87c1 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h | |||
| @@ -15,6 +15,6 @@ | |||
| 15 | #define ESSA_SET_STABLE_IF_RESIDENT 6 | 15 | #define ESSA_SET_STABLE_IF_RESIDENT 6 |
| 16 | #define ESSA_SET_STABLE_NODAT 7 | 16 | #define ESSA_SET_STABLE_NODAT 7 |
| 17 | 17 | ||
| 18 | #define ESSA_MAX ESSA_SET_STABLE_IF_RESIDENT | 18 | #define ESSA_MAX ESSA_SET_STABLE_NODAT |
| 19 | 19 | ||
| 20 | #endif | 20 | #endif |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 69d09c39bbcd..cd7359e23d86 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
| @@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { | |||
| 88 | /* kvm attributes for KVM_S390_VM_TOD */ | 88 | /* kvm attributes for KVM_S390_VM_TOD */ |
| 89 | #define KVM_S390_VM_TOD_LOW 0 | 89 | #define KVM_S390_VM_TOD_LOW 0 |
| 90 | #define KVM_S390_VM_TOD_HIGH 1 | 90 | #define KVM_S390_VM_TOD_HIGH 1 |
| 91 | #define KVM_S390_VM_TOD_EXT 2 | ||
| 92 | |||
| 93 | struct kvm_s390_vm_tod_clock { | ||
| 94 | __u8 epoch_idx; | ||
| 95 | __u64 tod; | ||
| 96 | }; | ||
| 91 | 97 | ||
| 92 | /* kvm attributes for KVM_S390_VM_CPU_MODEL */ | 98 | /* kvm attributes for KVM_S390_VM_CPU_MODEL */ |
| 93 | /* processor related attributes are r/w */ | 99 | /* processor related attributes are r/w */ |
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index e4d36094aceb..d93a2c0474bf 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
| @@ -150,7 +150,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) | |||
| 150 | { | 150 | { |
| 151 | VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); | 151 | VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); |
| 152 | vcpu->stat.diagnose_44++; | 152 | vcpu->stat.diagnose_44++; |
| 153 | kvm_vcpu_on_spin(vcpu); | 153 | kvm_vcpu_on_spin(vcpu, true); |
| 154 | return 0; | 154 | return 0; |
| 155 | } | 155 | } |
| 156 | 156 | ||
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index c2e0ddc1356e..bcbd86621d01 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c | |||
| @@ -308,7 +308,7 @@ static inline int in_addr_range(u64 addr, u64 a, u64 b) | |||
| 308 | return (addr >= a) && (addr <= b); | 308 | return (addr >= a) && (addr <= b); |
| 309 | else | 309 | else |
| 310 | /* "overflowing" interval */ | 310 | /* "overflowing" interval */ |
| 311 | return (addr <= a) && (addr >= b); | 311 | return (addr >= a) || (addr <= b); |
| 312 | } | 312 | } |
| 313 | 313 | ||
| 314 | #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) | 314 | #define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index a619ddae610d..a832ad031cee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -2479,6 +2479,7 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | |||
| 2479 | struct kvm_s390_mchk_info *mchk; | 2479 | struct kvm_s390_mchk_info *mchk; |
| 2480 | union mci mci; | 2480 | union mci mci; |
| 2481 | __u64 cr14 = 0; /* upper bits are not used */ | 2481 | __u64 cr14 = 0; /* upper bits are not used */ |
| 2482 | int rc; | ||
| 2482 | 2483 | ||
| 2483 | mci.val = mcck_info->mcic; | 2484 | mci.val = mcck_info->mcic; |
| 2484 | if (mci.sr) | 2485 | if (mci.sr) |
| @@ -2496,12 +2497,13 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, | |||
| 2496 | if (mci.ck) { | 2497 | if (mci.ck) { |
| 2497 | /* Inject the floating machine check */ | 2498 | /* Inject the floating machine check */ |
| 2498 | inti.type = KVM_S390_MCHK; | 2499 | inti.type = KVM_S390_MCHK; |
| 2499 | WARN_ON_ONCE(__inject_vm(vcpu->kvm, &inti)); | 2500 | rc = __inject_vm(vcpu->kvm, &inti); |
| 2500 | } else { | 2501 | } else { |
| 2501 | /* Inject the machine check to specified vcpu */ | 2502 | /* Inject the machine check to specified vcpu */ |
| 2502 | irq.type = KVM_S390_MCHK; | 2503 | irq.type = KVM_S390_MCHK; |
| 2503 | WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq)); | 2504 | rc = kvm_s390_inject_vcpu(vcpu, &irq); |
| 2504 | } | 2505 | } |
| 2506 | WARN_ON_ONCE(rc); | ||
| 2505 | } | 2507 | } |
| 2506 | 2508 | ||
| 2507 | int kvm_set_routing_entry(struct kvm *kvm, | 2509 | int kvm_set_routing_entry(struct kvm *kvm, |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af09d3437631..40d0a1a97889 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -130,6 +130,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 130 | { NULL } | 130 | { NULL } |
| 131 | }; | 131 | }; |
| 132 | 132 | ||
| 133 | struct kvm_s390_tod_clock_ext { | ||
| 134 | __u8 epoch_idx; | ||
| 135 | __u64 tod; | ||
| 136 | __u8 reserved[7]; | ||
| 137 | } __packed; | ||
| 138 | |||
| 133 | /* allow nested virtualization in KVM (if enabled by user space) */ | 139 | /* allow nested virtualization in KVM (if enabled by user space) */ |
| 134 | static int nested; | 140 | static int nested; |
| 135 | module_param(nested, int, S_IRUGO); | 141 | module_param(nested, int, S_IRUGO); |
| @@ -874,6 +880,26 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, | |||
| 874 | return 0; | 880 | return 0; |
| 875 | } | 881 | } |
| 876 | 882 | ||
| 883 | static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) | ||
| 884 | { | ||
| 885 | struct kvm_s390_vm_tod_clock gtod; | ||
| 886 | |||
| 887 | if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) | ||
| 888 | return -EFAULT; | ||
| 889 | |||
| 890 | if (test_kvm_facility(kvm, 139)) | ||
| 891 | kvm_s390_set_tod_clock_ext(kvm, >od); | ||
| 892 | else if (gtod.epoch_idx == 0) | ||
| 893 | kvm_s390_set_tod_clock(kvm, gtod.tod); | ||
| 894 | else | ||
| 895 | return -EINVAL; | ||
| 896 | |||
| 897 | VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", | ||
| 898 | gtod.epoch_idx, gtod.tod); | ||
| 899 | |||
| 900 | return 0; | ||
| 901 | } | ||
| 902 | |||
| 877 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | 903 | static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) |
| 878 | { | 904 | { |
| 879 | u8 gtod_high; | 905 | u8 gtod_high; |
| @@ -909,6 +935,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 909 | return -EINVAL; | 935 | return -EINVAL; |
| 910 | 936 | ||
| 911 | switch (attr->attr) { | 937 | switch (attr->attr) { |
| 938 | case KVM_S390_VM_TOD_EXT: | ||
| 939 | ret = kvm_s390_set_tod_ext(kvm, attr); | ||
| 940 | break; | ||
| 912 | case KVM_S390_VM_TOD_HIGH: | 941 | case KVM_S390_VM_TOD_HIGH: |
| 913 | ret = kvm_s390_set_tod_high(kvm, attr); | 942 | ret = kvm_s390_set_tod_high(kvm, attr); |
| 914 | break; | 943 | break; |
| @@ -922,6 +951,43 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 922 | return ret; | 951 | return ret; |
| 923 | } | 952 | } |
| 924 | 953 | ||
| 954 | static void kvm_s390_get_tod_clock_ext(struct kvm *kvm, | ||
| 955 | struct kvm_s390_vm_tod_clock *gtod) | ||
| 956 | { | ||
| 957 | struct kvm_s390_tod_clock_ext htod; | ||
| 958 | |||
| 959 | preempt_disable(); | ||
| 960 | |||
| 961 | get_tod_clock_ext((char *)&htod); | ||
| 962 | |||
| 963 | gtod->tod = htod.tod + kvm->arch.epoch; | ||
| 964 | gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx; | ||
| 965 | |||
| 966 | if (gtod->tod < htod.tod) | ||
| 967 | gtod->epoch_idx += 1; | ||
| 968 | |||
| 969 | preempt_enable(); | ||
| 970 | } | ||
| 971 | |||
| 972 | static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) | ||
| 973 | { | ||
| 974 | struct kvm_s390_vm_tod_clock gtod; | ||
| 975 | |||
| 976 | memset(>od, 0, sizeof(gtod)); | ||
| 977 | |||
| 978 | if (test_kvm_facility(kvm, 139)) | ||
| 979 | kvm_s390_get_tod_clock_ext(kvm, >od); | ||
| 980 | else | ||
| 981 | gtod.tod = kvm_s390_get_tod_clock_fast(kvm); | ||
| 982 | |||
| 983 | if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) | ||
| 984 | return -EFAULT; | ||
| 985 | |||
| 986 | VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx", | ||
| 987 | gtod.epoch_idx, gtod.tod); | ||
| 988 | return 0; | ||
| 989 | } | ||
| 990 | |||
| 925 | static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) | 991 | static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) |
| 926 | { | 992 | { |
| 927 | u8 gtod_high = 0; | 993 | u8 gtod_high = 0; |
| @@ -954,6 +1020,9 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr) | |||
| 954 | return -EINVAL; | 1020 | return -EINVAL; |
| 955 | 1021 | ||
| 956 | switch (attr->attr) { | 1022 | switch (attr->attr) { |
| 1023 | case KVM_S390_VM_TOD_EXT: | ||
| 1024 | ret = kvm_s390_get_tod_ext(kvm, attr); | ||
| 1025 | break; | ||
| 957 | case KVM_S390_VM_TOD_HIGH: | 1026 | case KVM_S390_VM_TOD_HIGH: |
| 958 | ret = kvm_s390_get_tod_high(kvm, attr); | 1027 | ret = kvm_s390_get_tod_high(kvm, attr); |
| 959 | break; | 1028 | break; |
| @@ -1505,7 +1574,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, | |||
| 1505 | if (r < 0) | 1574 | if (r < 0) |
| 1506 | pgstev = 0; | 1575 | pgstev = 0; |
| 1507 | /* save the value */ | 1576 | /* save the value */ |
| 1508 | res[i++] = (pgstev >> 24) & 0x3; | 1577 | res[i++] = (pgstev >> 24) & 0x43; |
| 1509 | /* | 1578 | /* |
| 1510 | * if the next bit is too far away, stop. | 1579 | * if the next bit is too far away, stop. |
| 1511 | * if we reached the previous "next", find the next one | 1580 | * if we reached the previous "next", find the next one |
| @@ -1583,7 +1652,7 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, | |||
| 1583 | 1652 | ||
| 1584 | pgstev = bits[i]; | 1653 | pgstev = bits[i]; |
| 1585 | pgstev = pgstev << 24; | 1654 | pgstev = pgstev << 24; |
| 1586 | mask &= _PGSTE_GPS_USAGE_MASK; | 1655 | mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT; |
| 1587 | set_pgste_bits(kvm->mm, hva, mask, pgstev); | 1656 | set_pgste_bits(kvm->mm, hva, mask, pgstev); |
| 1588 | } | 1657 | } |
| 1589 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1658 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
| @@ -1858,8 +1927,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 1858 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, | 1927 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, |
| 1859 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 1928 | S390_ARCH_FAC_LIST_SIZE_BYTE); |
| 1860 | 1929 | ||
| 1930 | /* we are always in czam mode - even on pre z14 machines */ | ||
| 1931 | set_kvm_facility(kvm->arch.model.fac_mask, 138); | ||
| 1932 | set_kvm_facility(kvm->arch.model.fac_list, 138); | ||
| 1933 | /* we emulate STHYI in kvm */ | ||
| 1861 | set_kvm_facility(kvm->arch.model.fac_mask, 74); | 1934 | set_kvm_facility(kvm->arch.model.fac_mask, 74); |
| 1862 | set_kvm_facility(kvm->arch.model.fac_list, 74); | 1935 | set_kvm_facility(kvm->arch.model.fac_list, 74); |
| 1936 | if (MACHINE_HAS_TLB_GUEST) { | ||
| 1937 | set_kvm_facility(kvm->arch.model.fac_mask, 147); | ||
| 1938 | set_kvm_facility(kvm->arch.model.fac_list, 147); | ||
| 1939 | } | ||
| 1863 | 1940 | ||
| 1864 | kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); | 1941 | kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); |
| 1865 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; | 1942 | kvm->arch.model.ibc = sclp.ibc & 0x0fff; |
| @@ -2369,6 +2446,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 2369 | vcpu->arch.sie_block->eca |= ECA_VX; | 2446 | vcpu->arch.sie_block->eca |= ECA_VX; |
| 2370 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; | 2447 | vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; |
| 2371 | } | 2448 | } |
| 2449 | if (test_kvm_facility(vcpu->kvm, 139)) | ||
| 2450 | vcpu->arch.sie_block->ecd |= ECD_MEF; | ||
| 2451 | |||
| 2372 | vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) | 2452 | vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) |
| 2373 | | SDNXC; | 2453 | | SDNXC; |
| 2374 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; | 2454 | vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; |
| @@ -2447,6 +2527,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 2447 | return kvm_s390_vcpu_has_irq(vcpu, 0); | 2527 | return kvm_s390_vcpu_has_irq(vcpu, 0); |
| 2448 | } | 2528 | } |
| 2449 | 2529 | ||
| 2530 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
| 2531 | { | ||
| 2532 | return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE); | ||
| 2533 | } | ||
| 2534 | |||
| 2450 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) | 2535 | void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu) |
| 2451 | { | 2536 | { |
| 2452 | atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); | 2537 | atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); |
| @@ -2855,6 +2940,35 @@ retry: | |||
| 2855 | return 0; | 2940 | return 0; |
| 2856 | } | 2941 | } |
| 2857 | 2942 | ||
| 2943 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | ||
| 2944 | const struct kvm_s390_vm_tod_clock *gtod) | ||
| 2945 | { | ||
| 2946 | struct kvm_vcpu *vcpu; | ||
| 2947 | struct kvm_s390_tod_clock_ext htod; | ||
| 2948 | int i; | ||
| 2949 | |||
| 2950 | mutex_lock(&kvm->lock); | ||
| 2951 | preempt_disable(); | ||
| 2952 | |||
| 2953 | get_tod_clock_ext((char *)&htod); | ||
| 2954 | |||
| 2955 | kvm->arch.epoch = gtod->tod - htod.tod; | ||
| 2956 | kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx; | ||
| 2957 | |||
| 2958 | if (kvm->arch.epoch > gtod->tod) | ||
| 2959 | kvm->arch.epdx -= 1; | ||
| 2960 | |||
| 2961 | kvm_s390_vcpu_block_all(kvm); | ||
| 2962 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
| 2963 | vcpu->arch.sie_block->epoch = kvm->arch.epoch; | ||
| 2964 | vcpu->arch.sie_block->epdx = kvm->arch.epdx; | ||
| 2965 | } | ||
| 2966 | |||
| 2967 | kvm_s390_vcpu_unblock_all(kvm); | ||
| 2968 | preempt_enable(); | ||
| 2969 | mutex_unlock(&kvm->lock); | ||
| 2970 | } | ||
| 2971 | |||
| 2858 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) | 2972 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) |
| 2859 | { | 2973 | { |
| 2860 | struct kvm_vcpu *vcpu; | 2974 | struct kvm_vcpu *vcpu; |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6fedc8bc7a37..9f8fdd7b2311 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -272,6 +272,8 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); | |||
| 272 | int handle_sthyi(struct kvm_vcpu *vcpu); | 272 | int handle_sthyi(struct kvm_vcpu *vcpu); |
| 273 | 273 | ||
| 274 | /* implemented in kvm-s390.c */ | 274 | /* implemented in kvm-s390.c */ |
| 275 | void kvm_s390_set_tod_clock_ext(struct kvm *kvm, | ||
| 276 | const struct kvm_s390_vm_tod_clock *gtod); | ||
| 275 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); | 277 | void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); |
| 276 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); | 278 | long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); |
| 277 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); | 279 | int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 785ad028bde6..c954ac49eee4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -988,6 +988,8 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) | |||
| 988 | if (pgstev & _PGSTE_GPS_ZERO) | 988 | if (pgstev & _PGSTE_GPS_ZERO) |
| 989 | res |= 1; | 989 | res |= 1; |
| 990 | } | 990 | } |
| 991 | if (pgstev & _PGSTE_GPS_NODAT) | ||
| 992 | res |= 0x20; | ||
| 991 | vcpu->run->s.regs.gprs[r1] = res; | 993 | vcpu->run->s.regs.gprs[r1] = res; |
| 992 | /* | 994 | /* |
| 993 | * It is possible that all the normal 511 slots were full, in which case | 995 | * It is possible that all the normal 511 slots were full, in which case |
| @@ -1027,7 +1029,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
| 1027 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | 1029 | return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); |
| 1028 | /* Check for invalid operation request code */ | 1030 | /* Check for invalid operation request code */ |
| 1029 | orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; | 1031 | orc = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; |
| 1030 | if (orc > ESSA_MAX) | 1032 | /* ORCs 0-6 are always valid */ |
| 1033 | if (orc > (test_kvm_facility(vcpu->kvm, 147) ? ESSA_SET_STABLE_NODAT | ||
| 1034 | : ESSA_SET_STABLE_IF_RESIDENT)) | ||
| 1031 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 1035 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 1032 | 1036 | ||
| 1033 | if (likely(!vcpu->kvm->arch.migration_state)) { | 1037 | if (likely(!vcpu->kvm->arch.migration_state)) { |
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 1a252f537081..9d592ef4104b 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
| @@ -155,29 +155,26 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, | |||
| 155 | return rc; | 155 | return rc; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) | 158 | static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, |
| 159 | u64 *status_reg) | ||
| 159 | { | 160 | { |
| 160 | int rc; | ||
| 161 | unsigned int i; | 161 | unsigned int i; |
| 162 | struct kvm_vcpu *v; | 162 | struct kvm_vcpu *v; |
| 163 | bool all_stopped = true; | ||
| 163 | 164 | ||
| 164 | switch (parameter & 0xff) { | 165 | kvm_for_each_vcpu(i, v, vcpu->kvm) { |
| 165 | case 0: | 166 | if (v == vcpu) |
| 166 | rc = SIGP_CC_NOT_OPERATIONAL; | 167 | continue; |
| 167 | break; | 168 | if (!is_vcpu_stopped(v)) |
| 168 | case 1: | 169 | all_stopped = false; |
| 169 | case 2: | ||
| 170 | kvm_for_each_vcpu(i, v, vcpu->kvm) { | ||
| 171 | v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; | ||
| 172 | kvm_clear_async_pf_completion_queue(v); | ||
| 173 | } | ||
| 174 | |||
| 175 | rc = SIGP_CC_ORDER_CODE_ACCEPTED; | ||
| 176 | break; | ||
| 177 | default: | ||
| 178 | rc = -EOPNOTSUPP; | ||
| 179 | } | 170 | } |
| 180 | return rc; | 171 | |
| 172 | *status_reg &= 0xffffffff00000000UL; | ||
| 173 | |||
| 174 | /* Reject set arch order, with czam we're always in z/Arch mode. */ | ||
| 175 | *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : | ||
| 176 | SIGP_STATUS_INCORRECT_STATE); | ||
| 177 | return SIGP_CC_STATUS_STORED; | ||
| 181 | } | 178 | } |
| 182 | 179 | ||
| 183 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, | 180 | static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, |
| @@ -446,7 +443,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) | |||
| 446 | switch (order_code) { | 443 | switch (order_code) { |
| 447 | case SIGP_SET_ARCHITECTURE: | 444 | case SIGP_SET_ARCHITECTURE: |
| 448 | vcpu->stat.instruction_sigp_arch++; | 445 | vcpu->stat.instruction_sigp_arch++; |
| 449 | rc = __sigp_set_arch(vcpu, parameter); | 446 | rc = __sigp_set_arch(vcpu, parameter, |
| 447 | &vcpu->run->s.regs.gprs[r1]); | ||
| 450 | break; | 448 | break; |
| 451 | default: | 449 | default: |
| 452 | rc = handle_sigp_dst(vcpu, order_code, cpu_addr, | 450 | rc = handle_sigp_dst(vcpu, order_code, cpu_addr, |
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index a2e5c24f47a7..395926b8c1ed 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c | |||
| @@ -436,14 +436,6 @@ int handle_sthyi(struct kvm_vcpu *vcpu) | |||
| 436 | if (addr & ~PAGE_MASK) | 436 | if (addr & ~PAGE_MASK) |
| 437 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 437 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
| 438 | 438 | ||
| 439 | /* | ||
| 440 | * If the page has not yet been faulted in, we want to do that | ||
| 441 | * now and not after all the expensive calculations. | ||
| 442 | */ | ||
| 443 | r = write_guest(vcpu, addr, reg2, &cc, 1); | ||
| 444 | if (r) | ||
| 445 | return kvm_s390_inject_prog_cond(vcpu, r); | ||
| 446 | |||
| 447 | sctns = (void *)get_zeroed_page(GFP_KERNEL); | 439 | sctns = (void *)get_zeroed_page(GFP_KERNEL); |
| 448 | if (!sctns) | 440 | if (!sctns) |
| 449 | return -ENOMEM; | 441 | return -ENOMEM; |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ba8203e4d516..b18b5652e5c5 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
| @@ -349,6 +349,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 349 | scb_s->eca |= scb_o->eca & ECA_IB; | 349 | scb_s->eca |= scb_o->eca & ECA_IB; |
| 350 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) | 350 | if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) |
| 351 | scb_s->eca |= scb_o->eca & ECA_CEI; | 351 | scb_s->eca |= scb_o->eca & ECA_CEI; |
| 352 | /* Epoch Extension */ | ||
| 353 | if (test_kvm_facility(vcpu->kvm, 139)) | ||
| 354 | scb_s->ecd |= scb_o->ecd & ECD_MEF; | ||
| 352 | 355 | ||
| 353 | prepare_ibc(vcpu, vsie_page); | 356 | prepare_ibc(vcpu, vsie_page); |
| 354 | rc = shadow_crycb(vcpu, vsie_page); | 357 | rc = shadow_crycb(vcpu, vsie_page); |
| @@ -806,8 +809,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 806 | { | 809 | { |
| 807 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; | 810 | struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; |
| 808 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; | 811 | struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; |
| 809 | struct mcck_volatile_info *mcck_info; | ||
| 810 | struct sie_page *sie_page; | ||
| 811 | int rc; | 812 | int rc; |
| 812 | 813 | ||
| 813 | handle_last_fault(vcpu, vsie_page); | 814 | handle_last_fault(vcpu, vsie_page); |
| @@ -831,9 +832,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
| 831 | 832 | ||
| 832 | if (rc == -EINTR) { | 833 | if (rc == -EINTR) { |
| 833 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); | 834 | VCPU_EVENT(vcpu, 3, "%s", "machine check"); |
| 834 | sie_page = container_of(scb_s, struct sie_page, sie_block); | 835 | kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info); |
| 835 | mcck_info = &sie_page->mcck_info; | ||
| 836 | kvm_s390_reinject_machine_check(vcpu, mcck_info); | ||
| 837 | return 0; | 836 | return 0; |
| 838 | } | 837 | } |
| 839 | 838 | ||
| @@ -919,6 +918,13 @@ static void register_shadow_scb(struct kvm_vcpu *vcpu, | |||
| 919 | */ | 918 | */ |
| 920 | preempt_disable(); | 919 | preempt_disable(); |
| 921 | scb_s->epoch += vcpu->kvm->arch.epoch; | 920 | scb_s->epoch += vcpu->kvm->arch.epoch; |
| 921 | |||
| 922 | if (scb_s->ecd & ECD_MEF) { | ||
| 923 | scb_s->epdx += vcpu->kvm->arch.epdx; | ||
| 924 | if (scb_s->epoch < vcpu->kvm->arch.epoch) | ||
| 925 | scb_s->epdx += 1; | ||
| 926 | } | ||
| 927 | |||
| 922 | preempt_enable(); | 928 | preempt_enable(); |
| 923 | } | 929 | } |
| 924 | 930 | ||
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4198a71b8fdd..ae677f814bc0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c | |||
| @@ -919,7 +919,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, | |||
| 919 | case ESSA_GET_STATE: | 919 | case ESSA_GET_STATE: |
| 920 | break; | 920 | break; |
| 921 | case ESSA_SET_STABLE: | 921 | case ESSA_SET_STABLE: |
| 922 | pgstev &= ~_PGSTE_GPS_USAGE_MASK; | 922 | pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); |
| 923 | pgstev |= _PGSTE_GPS_USAGE_STABLE; | 923 | pgstev |= _PGSTE_GPS_USAGE_STABLE; |
| 924 | break; | 924 | break; |
| 925 | case ESSA_SET_UNUSED: | 925 | case ESSA_SET_UNUSED: |
| @@ -965,6 +965,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, | |||
| 965 | pgstev |= _PGSTE_GPS_USAGE_STABLE; | 965 | pgstev |= _PGSTE_GPS_USAGE_STABLE; |
| 966 | } | 966 | } |
| 967 | break; | 967 | break; |
| 968 | case ESSA_SET_STABLE_NODAT: | ||
| 969 | pgstev &= ~_PGSTE_GPS_USAGE_MASK; | ||
| 970 | pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; | ||
| 971 | break; | ||
| 968 | default: | 972 | default: |
| 969 | /* we should never get here! */ | 973 | /* we should never get here! */ |
| 970 | break; | 974 | break; |
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 29d72bf8ed2b..70dd8f17d054 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c | |||
| @@ -83,6 +83,7 @@ static struct facility_def facility_defs[] = { | |||
| 83 | 78, /* enhanced-DAT 2 */ | 83 | 78, /* enhanced-DAT 2 */ |
| 84 | 130, /* instruction-execution-protection */ | 84 | 130, /* instruction-execution-protection */ |
| 85 | 131, /* enhanced-SOP 2 and side-effect */ | 85 | 131, /* enhanced-SOP 2 and side-effect */ |
| 86 | 139, /* multiple epoch facility */ | ||
| 86 | 146, /* msa extension 8 */ | 87 | 146, /* msa extension 8 */ |
| 87 | -1 /* END */ | 88 | -1 /* END */ |
| 88 | } | 89 | } |
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 42bbbf0f173d..2519c6c801c9 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h | |||
| @@ -288,6 +288,7 @@ | |||
| 288 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | 288 | #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ |
| 289 | #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ | 289 | #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ |
| 290 | #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ | 290 | #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ |
| 291 | #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ | ||
| 291 | 292 | ||
| 292 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ | 293 | /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ |
| 293 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ | 294 | #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index fde36f189836..fa2558e12024 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -219,8 +219,8 @@ struct x86_emulate_ops { | |||
| 219 | struct x86_instruction_info *info, | 219 | struct x86_instruction_info *info, |
| 220 | enum x86_intercept_stage stage); | 220 | enum x86_intercept_stage stage); |
| 221 | 221 | ||
| 222 | void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, | 222 | bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, |
| 223 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 223 | u32 *ecx, u32 *edx, bool check_limit); |
| 224 | void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); | 224 | void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); |
| 225 | 225 | ||
| 226 | unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); | 226 | unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 369e41c23f07..8844eee290b2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -79,15 +79,14 @@ | |||
| 79 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | 79 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
| 80 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | 80 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) |
| 81 | 81 | ||
| 82 | #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL | ||
| 83 | #define CR3_PCID_INVD BIT_64(63) | 82 | #define CR3_PCID_INVD BIT_64(63) |
| 84 | #define CR4_RESERVED_BITS \ | 83 | #define CR4_RESERVED_BITS \ |
| 85 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | 84 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ |
| 86 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | 85 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ |
| 87 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | 86 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
| 88 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | 87 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
| 89 | | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \ | 88 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ |
| 90 | | X86_CR4_PKE)) | 89 | | X86_CR4_SMAP | X86_CR4_PKE)) |
| 91 | 90 | ||
| 92 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | 91 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) |
| 93 | 92 | ||
| @@ -204,7 +203,6 @@ enum { | |||
| 204 | #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) | 203 | #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) |
| 205 | 204 | ||
| 206 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ | 205 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ |
| 207 | PFERR_USER_MASK | \ | ||
| 208 | PFERR_WRITE_MASK | \ | 206 | PFERR_WRITE_MASK | \ |
| 209 | PFERR_PRESENT_MASK) | 207 | PFERR_PRESENT_MASK) |
| 210 | 208 | ||
| @@ -317,15 +315,17 @@ struct kvm_pio_request { | |||
| 317 | int size; | 315 | int size; |
| 318 | }; | 316 | }; |
| 319 | 317 | ||
| 318 | #define PT64_ROOT_MAX_LEVEL 5 | ||
| 319 | |||
| 320 | struct rsvd_bits_validate { | 320 | struct rsvd_bits_validate { |
| 321 | u64 rsvd_bits_mask[2][4]; | 321 | u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; |
| 322 | u64 bad_mt_xwr; | 322 | u64 bad_mt_xwr; |
| 323 | }; | 323 | }; |
| 324 | 324 | ||
| 325 | /* | 325 | /* |
| 326 | * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level | 326 | * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, |
| 327 | * 32-bit). The kvm_mmu structure abstracts the details of the current mmu | 327 | * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the |
| 328 | * mode. | 328 | * current mmu mode. |
| 329 | */ | 329 | */ |
| 330 | struct kvm_mmu { | 330 | struct kvm_mmu { |
| 331 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); | 331 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); |
| @@ -548,8 +548,8 @@ struct kvm_vcpu_arch { | |||
| 548 | 548 | ||
| 549 | struct kvm_queued_exception { | 549 | struct kvm_queued_exception { |
| 550 | bool pending; | 550 | bool pending; |
| 551 | bool injected; | ||
| 551 | bool has_error_code; | 552 | bool has_error_code; |
| 552 | bool reinject; | ||
| 553 | u8 nr; | 553 | u8 nr; |
| 554 | u32 error_code; | 554 | u32 error_code; |
| 555 | u8 nested_apf; | 555 | u8 nested_apf; |
| @@ -687,8 +687,12 @@ struct kvm_vcpu_arch { | |||
| 687 | int pending_ioapic_eoi; | 687 | int pending_ioapic_eoi; |
| 688 | int pending_external_vector; | 688 | int pending_external_vector; |
| 689 | 689 | ||
| 690 | /* GPA available (AMD only) */ | 690 | /* GPA available */ |
| 691 | bool gpa_available; | 691 | bool gpa_available; |
| 692 | gpa_t gpa_val; | ||
| 693 | |||
| 694 | /* be preempted when it's in kernel-mode(cpl=0) */ | ||
| 695 | bool preempted_in_kernel; | ||
| 692 | }; | 696 | }; |
| 693 | 697 | ||
| 694 | struct kvm_lpage_info { | 698 | struct kvm_lpage_info { |
| @@ -979,7 +983,7 @@ struct kvm_x86_ops { | |||
| 979 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 983 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
| 980 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 984 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
| 981 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 985 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
| 982 | int (*get_tdp_level)(void); | 986 | int (*get_tdp_level)(struct kvm_vcpu *vcpu); |
| 983 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 987 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
| 984 | int (*get_lpage_level)(void); | 988 | int (*get_lpage_level)(void); |
| 985 | bool (*rdtscp_supported)(void); | 989 | bool (*rdtscp_supported)(void); |
| @@ -1297,20 +1301,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
| 1297 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 1301 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
| 1298 | } | 1302 | } |
| 1299 | 1303 | ||
| 1300 | static inline u64 get_canonical(u64 la) | ||
| 1301 | { | ||
| 1302 | return ((int64_t)la << 16) >> 16; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | static inline bool is_noncanonical_address(u64 la) | ||
| 1306 | { | ||
| 1307 | #ifdef CONFIG_X86_64 | ||
| 1308 | return get_canonical(la) != la; | ||
| 1309 | #else | ||
| 1310 | return false; | ||
| 1311 | #endif | ||
| 1312 | } | ||
| 1313 | |||
| 1314 | #define TSS_IOPB_BASE_OFFSET 0x66 | 1304 | #define TSS_IOPB_BASE_OFFSET 0x66 |
| 1315 | #define TSS_BASE_SIZE 0x68 | 1305 | #define TSS_BASE_SIZE 0x68 |
| 1316 | #define TSS_IOPB_SIZE (65536 / 8) | 1306 | #define TSS_IOPB_SIZE (65536 / 8) |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 58fffe79e417..14835dd205a5 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -107,6 +107,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 107 | #define V_IRQ_SHIFT 8 | 107 | #define V_IRQ_SHIFT 8 |
| 108 | #define V_IRQ_MASK (1 << V_IRQ_SHIFT) | 108 | #define V_IRQ_MASK (1 << V_IRQ_SHIFT) |
| 109 | 109 | ||
| 110 | #define V_GIF_SHIFT 9 | ||
| 111 | #define V_GIF_MASK (1 << V_GIF_SHIFT) | ||
| 112 | |||
| 110 | #define V_INTR_PRIO_SHIFT 16 | 113 | #define V_INTR_PRIO_SHIFT 16 |
| 111 | #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) | 114 | #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) |
| 112 | 115 | ||
| @@ -116,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 116 | #define V_INTR_MASKING_SHIFT 24 | 119 | #define V_INTR_MASKING_SHIFT 24 |
| 117 | #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) | 120 | #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) |
| 118 | 121 | ||
| 122 | #define V_GIF_ENABLE_SHIFT 25 | ||
| 123 | #define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) | ||
| 124 | |||
| 119 | #define AVIC_ENABLE_SHIFT 31 | 125 | #define AVIC_ENABLE_SHIFT 31 |
| 120 | #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) | 126 | #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) |
| 121 | 127 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 35cd06f636ab..caec8417539f 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -72,6 +72,7 @@ | |||
| 72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 72 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
| 73 | #define SECONDARY_EXEC_RDRAND 0x00000800 | 73 | #define SECONDARY_EXEC_RDRAND 0x00000800 |
| 74 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 74 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
| 75 | #define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 | ||
| 75 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | 76 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 |
| 76 | #define SECONDARY_EXEC_RDSEED 0x00010000 | 77 | #define SECONDARY_EXEC_RDSEED 0x00010000 |
| 77 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 | 78 | #define SECONDARY_EXEC_ENABLE_PML 0x00020000 |
| @@ -114,6 +115,10 @@ | |||
| 114 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | 115 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 |
| 115 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 | 116 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 |
| 116 | 117 | ||
| 118 | /* VMFUNC functions */ | ||
| 119 | #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 | ||
| 120 | #define VMFUNC_EPTP_ENTRIES 512 | ||
| 121 | |||
| 117 | static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) | 122 | static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic) |
| 118 | { | 123 | { |
| 119 | return vmx_basic & GENMASK_ULL(30, 0); | 124 | return vmx_basic & GENMASK_ULL(30, 0); |
| @@ -187,6 +192,8 @@ enum vmcs_field { | |||
| 187 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 192 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
| 188 | POSTED_INTR_DESC_ADDR = 0x00002016, | 193 | POSTED_INTR_DESC_ADDR = 0x00002016, |
| 189 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | 194 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, |
| 195 | VM_FUNCTION_CONTROL = 0x00002018, | ||
| 196 | VM_FUNCTION_CONTROL_HIGH = 0x00002019, | ||
| 190 | EPT_POINTER = 0x0000201a, | 197 | EPT_POINTER = 0x0000201a, |
| 191 | EPT_POINTER_HIGH = 0x0000201b, | 198 | EPT_POINTER_HIGH = 0x0000201b, |
| 192 | EOI_EXIT_BITMAP0 = 0x0000201c, | 199 | EOI_EXIT_BITMAP0 = 0x0000201c, |
| @@ -197,6 +204,8 @@ enum vmcs_field { | |||
| 197 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | 204 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, |
| 198 | EOI_EXIT_BITMAP3 = 0x00002022, | 205 | EOI_EXIT_BITMAP3 = 0x00002022, |
| 199 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | 206 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, |
| 207 | EPTP_LIST_ADDRESS = 0x00002024, | ||
| 208 | EPTP_LIST_ADDRESS_HIGH = 0x00002025, | ||
| 200 | VMREAD_BITMAP = 0x00002026, | 209 | VMREAD_BITMAP = 0x00002026, |
| 201 | VMWRITE_BITMAP = 0x00002028, | 210 | VMWRITE_BITMAP = 0x00002028, |
| 202 | XSS_EXIT_BITMAP = 0x0000202C, | 211 | XSS_EXIT_BITMAP = 0x0000202C, |
| @@ -444,6 +453,7 @@ enum vmcs_field { | |||
| 444 | 453 | ||
| 445 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) | 454 | #define VMX_EPT_EXECUTE_ONLY_BIT (1ull) |
| 446 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) | 455 | #define VMX_EPT_PAGE_WALK_4_BIT (1ull << 6) |
| 456 | #define VMX_EPT_PAGE_WALK_5_BIT (1ull << 7) | ||
| 447 | #define VMX_EPTP_UC_BIT (1ull << 8) | 457 | #define VMX_EPTP_UC_BIT (1ull << 8) |
| 448 | #define VMX_EPTP_WB_BIT (1ull << 14) | 458 | #define VMX_EPTP_WB_BIT (1ull << 14) |
| 449 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 459 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
| @@ -459,12 +469,14 @@ enum vmcs_field { | |||
| 459 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ | 469 | #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ |
| 460 | #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ | 470 | #define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */ |
| 461 | 471 | ||
| 462 | #define VMX_EPT_DEFAULT_GAW 3 | ||
| 463 | #define VMX_EPT_MAX_GAW 0x4 | ||
| 464 | #define VMX_EPT_MT_EPTE_SHIFT 3 | 472 | #define VMX_EPT_MT_EPTE_SHIFT 3 |
| 465 | #define VMX_EPT_GAW_EPTP_SHIFT 3 | 473 | #define VMX_EPTP_PWL_MASK 0x38ull |
| 466 | #define VMX_EPT_AD_ENABLE_BIT (1ull << 6) | 474 | #define VMX_EPTP_PWL_4 0x18ull |
| 467 | #define VMX_EPT_DEFAULT_MT 0x6ull | 475 | #define VMX_EPTP_PWL_5 0x20ull |
| 476 | #define VMX_EPTP_AD_ENABLE_BIT (1ull << 6) | ||
| 477 | #define VMX_EPTP_MT_MASK 0x7ull | ||
| 478 | #define VMX_EPTP_MT_WB 0x6ull | ||
| 479 | #define VMX_EPTP_MT_UC 0x0ull | ||
| 468 | #define VMX_EPT_READABLE_MASK 0x1ull | 480 | #define VMX_EPT_READABLE_MASK 0x1ull |
| 469 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 481 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
| 470 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 482 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19adbb418443..0099e10eb045 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -126,16 +126,20 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
| 126 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); | 126 | best->ebx = xstate_required_size(vcpu->arch.xcr0, true); |
| 127 | 127 | ||
| 128 | /* | 128 | /* |
| 129 | * The existing code assumes virtual address is 48-bit in the canonical | 129 | * The existing code assumes virtual address is 48-bit or 57-bit in the |
| 130 | * address checks; exit if it is ever changed. | 130 | * canonical address checks; exit if it is ever changed. |
| 131 | */ | 131 | */ |
| 132 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | 132 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
| 133 | if (best && ((best->eax & 0xff00) >> 8) != 48 && | 133 | if (best) { |
| 134 | ((best->eax & 0xff00) >> 8) != 0) | 134 | int vaddr_bits = (best->eax & 0xff00) >> 8; |
| 135 | return -EINVAL; | 135 | |
| 136 | if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0) | ||
| 137 | return -EINVAL; | ||
| 138 | } | ||
| 136 | 139 | ||
| 137 | /* Update physical-address width */ | 140 | /* Update physical-address width */ |
| 138 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 141 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
| 142 | kvm_mmu_reset_context(vcpu); | ||
| 139 | 143 | ||
| 140 | kvm_pmu_refresh(vcpu); | 144 | kvm_pmu_refresh(vcpu); |
| 141 | return 0; | 145 | return 0; |
| @@ -383,7 +387,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 383 | 387 | ||
| 384 | /* cpuid 7.0.ecx*/ | 388 | /* cpuid 7.0.ecx*/ |
| 385 | const u32 kvm_cpuid_7_0_ecx_x86_features = | 389 | const u32 kvm_cpuid_7_0_ecx_x86_features = |
| 386 | F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); | 390 | F(AVX512VBMI) | F(LA57) | F(PKU) | |
| 391 | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); | ||
| 387 | 392 | ||
| 388 | /* cpuid 7.0.edx*/ | 393 | /* cpuid 7.0.edx*/ |
| 389 | const u32 kvm_cpuid_7_0_edx_x86_features = | 394 | const u32 kvm_cpuid_7_0_edx_x86_features = |
| @@ -853,16 +858,24 @@ static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu, | |||
| 853 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); | 858 | return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index); |
| 854 | } | 859 | } |
| 855 | 860 | ||
| 856 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 861 | bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, |
| 862 | u32 *ecx, u32 *edx, bool check_limit) | ||
| 857 | { | 863 | { |
| 858 | u32 function = *eax, index = *ecx; | 864 | u32 function = *eax, index = *ecx; |
| 859 | struct kvm_cpuid_entry2 *best; | 865 | struct kvm_cpuid_entry2 *best; |
| 866 | bool entry_found = true; | ||
| 860 | 867 | ||
| 861 | best = kvm_find_cpuid_entry(vcpu, function, index); | 868 | best = kvm_find_cpuid_entry(vcpu, function, index); |
| 862 | 869 | ||
| 863 | if (!best) | 870 | if (!best) { |
| 871 | entry_found = false; | ||
| 872 | if (!check_limit) | ||
| 873 | goto out; | ||
| 874 | |||
| 864 | best = check_cpuid_limit(vcpu, function, index); | 875 | best = check_cpuid_limit(vcpu, function, index); |
| 876 | } | ||
| 865 | 877 | ||
| 878 | out: | ||
| 866 | if (best) { | 879 | if (best) { |
| 867 | *eax = best->eax; | 880 | *eax = best->eax; |
| 868 | *ebx = best->ebx; | 881 | *ebx = best->ebx; |
| @@ -870,7 +883,8 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | |||
| 870 | *edx = best->edx; | 883 | *edx = best->edx; |
| 871 | } else | 884 | } else |
| 872 | *eax = *ebx = *ecx = *edx = 0; | 885 | *eax = *ebx = *ecx = *edx = 0; |
| 873 | trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx); | 886 | trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found); |
| 887 | return entry_found; | ||
| 874 | } | 888 | } |
| 875 | EXPORT_SYMBOL_GPL(kvm_cpuid); | 889 | EXPORT_SYMBOL_GPL(kvm_cpuid); |
| 876 | 890 | ||
| @@ -883,7 +897,7 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
| 883 | 897 | ||
| 884 | eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 898 | eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 885 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 899 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 886 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); | 900 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); |
| 887 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); | 901 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); |
| 888 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); | 902 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); |
| 889 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | 903 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index da6728383052..1ea3c0e1e3a9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include "x86.h" | 4 | #include "x86.h" |
| 5 | #include <asm/cpu.h> | 5 | #include <asm/cpu.h> |
| 6 | #include <asm/processor.h> | ||
| 6 | 7 | ||
| 7 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); | 8 | int kvm_update_cpuid(struct kvm_vcpu *vcpu); |
| 8 | bool kvm_mpx_supported(void); | 9 | bool kvm_mpx_supported(void); |
| @@ -20,7 +21,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
| 20 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, | 21 | int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, |
| 21 | struct kvm_cpuid2 *cpuid, | 22 | struct kvm_cpuid2 *cpuid, |
| 22 | struct kvm_cpuid_entry2 __user *entries); | 23 | struct kvm_cpuid_entry2 __user *entries); |
| 23 | void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); | 24 | bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, |
| 25 | u32 *ecx, u32 *edx, bool check_limit); | ||
| 24 | 26 | ||
| 25 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); | 27 | int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); |
| 26 | 28 | ||
| @@ -29,95 +31,87 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | |||
| 29 | return vcpu->arch.maxphyaddr; | 31 | return vcpu->arch.maxphyaddr; |
| 30 | } | 32 | } |
| 31 | 33 | ||
| 32 | static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) | 34 | struct cpuid_reg { |
| 33 | { | 35 | u32 function; |
| 34 | struct kvm_cpuid_entry2 *best; | 36 | u32 index; |
| 35 | 37 | int reg; | |
| 36 | if (!static_cpu_has(X86_FEATURE_XSAVE)) | 38 | }; |
| 37 | return false; | ||
| 38 | |||
| 39 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 40 | return best && (best->ecx & bit(X86_FEATURE_XSAVE)); | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu) | ||
| 44 | { | ||
| 45 | struct kvm_cpuid_entry2 *best; | ||
| 46 | |||
| 47 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 48 | return best && (best->edx & bit(X86_FEATURE_MTRR)); | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu) | ||
| 52 | { | ||
| 53 | struct kvm_cpuid_entry2 *best; | ||
| 54 | |||
| 55 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 56 | return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST)); | ||
| 57 | } | ||
| 58 | 39 | ||
| 59 | static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu) | 40 | static const struct cpuid_reg reverse_cpuid[] = { |
| 60 | { | 41 | [CPUID_1_EDX] = { 1, 0, CPUID_EDX}, |
| 61 | struct kvm_cpuid_entry2 *best; | 42 | [CPUID_8000_0001_EDX] = {0x80000001, 0, CPUID_EDX}, |
| 62 | 43 | [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, | |
| 63 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 44 | [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, |
| 64 | return best && (best->ebx & bit(X86_FEATURE_SMEP)); | 45 | [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, |
| 65 | } | 46 | [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, |
| 47 | [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, | ||
| 48 | [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, | ||
| 49 | [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, | ||
| 50 | [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, | ||
| 51 | [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, | ||
| 52 | [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, | ||
| 53 | [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, | ||
| 54 | [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, | ||
| 55 | [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, | ||
| 56 | }; | ||
| 66 | 57 | ||
| 67 | static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu) | 58 | static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) |
| 68 | { | 59 | { |
| 69 | struct kvm_cpuid_entry2 *best; | 60 | unsigned x86_leaf = x86_feature / 32; |
| 70 | |||
| 71 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 72 | return best && (best->ebx & bit(X86_FEATURE_SMAP)); | ||
| 73 | } | ||
| 74 | 61 | ||
| 75 | static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu) | 62 | BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); |
| 76 | { | 63 | BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); |
| 77 | struct kvm_cpuid_entry2 *best; | 64 | BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); |
| 78 | 65 | ||
| 79 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | 66 | return reverse_cpuid[x86_leaf]; |
| 80 | return best && (best->ebx & bit(X86_FEATURE_FSGSBASE)); | ||
| 81 | } | 67 | } |
| 82 | 68 | ||
| 83 | static inline bool guest_cpuid_has_pku(struct kvm_vcpu *vcpu) | 69 | static __always_inline int *guest_cpuid_get_register(struct kvm_vcpu *vcpu, unsigned x86_feature) |
| 84 | { | 70 | { |
| 85 | struct kvm_cpuid_entry2 *best; | 71 | struct kvm_cpuid_entry2 *entry; |
| 86 | 72 | const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature); | |
| 87 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 88 | return best && (best->ecx & bit(X86_FEATURE_PKU)); | ||
| 89 | } | ||
| 90 | 73 | ||
| 91 | static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu) | 74 | entry = kvm_find_cpuid_entry(vcpu, cpuid.function, cpuid.index); |
| 92 | { | 75 | if (!entry) |
| 93 | struct kvm_cpuid_entry2 *best; | 76 | return NULL; |
| 94 | 77 | ||
| 95 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 78 | switch (cpuid.reg) { |
| 96 | return best && (best->edx & bit(X86_FEATURE_LM)); | 79 | case CPUID_EAX: |
| 80 | return &entry->eax; | ||
| 81 | case CPUID_EBX: | ||
| 82 | return &entry->ebx; | ||
| 83 | case CPUID_ECX: | ||
| 84 | return &entry->ecx; | ||
| 85 | case CPUID_EDX: | ||
| 86 | return &entry->edx; | ||
| 87 | default: | ||
| 88 | BUILD_BUG(); | ||
| 89 | return NULL; | ||
| 90 | } | ||
| 97 | } | 91 | } |
| 98 | 92 | ||
| 99 | static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu) | 93 | static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu, unsigned x86_feature) |
| 100 | { | 94 | { |
| 101 | struct kvm_cpuid_entry2 *best; | 95 | int *reg; |
| 102 | 96 | ||
| 103 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 97 | if (x86_feature == X86_FEATURE_XSAVE && |
| 104 | return best && (best->ecx & bit(X86_FEATURE_OSVW)); | 98 | !static_cpu_has(X86_FEATURE_XSAVE)) |
| 105 | } | 99 | return false; |
| 106 | 100 | ||
| 107 | static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | 101 | reg = guest_cpuid_get_register(vcpu, x86_feature); |
| 108 | { | 102 | if (!reg) |
| 109 | struct kvm_cpuid_entry2 *best; | 103 | return false; |
| 110 | 104 | ||
| 111 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 105 | return *reg & bit(x86_feature); |
| 112 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | ||
| 113 | } | 106 | } |
| 114 | 107 | ||
| 115 | static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) | 108 | static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu, unsigned x86_feature) |
| 116 | { | 109 | { |
| 117 | struct kvm_cpuid_entry2 *best; | 110 | int *reg; |
| 118 | 111 | ||
| 119 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | 112 | reg = guest_cpuid_get_register(vcpu, x86_feature); |
| 120 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); | 113 | if (reg) |
| 114 | *reg &= ~bit(x86_feature); | ||
| 121 | } | 115 | } |
| 122 | 116 | ||
| 123 | static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) | 117 | static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) |
| @@ -128,58 +122,6 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) | |||
| 128 | return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; | 122 | return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; |
| 129 | } | 123 | } |
| 130 | 124 | ||
| 131 | static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) | ||
| 132 | { | ||
| 133 | struct kvm_cpuid_entry2 *best; | ||
| 134 | |||
| 135 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
| 136 | return best && (best->edx & bit(X86_FEATURE_GBPAGES)); | ||
| 137 | } | ||
| 138 | |||
| 139 | static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) | ||
| 140 | { | ||
| 141 | struct kvm_cpuid_entry2 *best; | ||
| 142 | |||
| 143 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 144 | return best && (best->ebx & bit(X86_FEATURE_RTM)); | ||
| 145 | } | ||
| 146 | |||
| 147 | static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) | ||
| 148 | { | ||
| 149 | struct kvm_cpuid_entry2 *best; | ||
| 150 | |||
| 151 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
| 152 | return best && (best->ebx & bit(X86_FEATURE_MPX)); | ||
| 153 | } | ||
| 154 | |||
| 155 | static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | ||
| 156 | { | ||
| 157 | struct kvm_cpuid_entry2 *best; | ||
| 158 | |||
| 159 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
| 160 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); | ||
| 161 | } | ||
| 162 | |||
| 163 | /* | ||
| 164 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 | ||
| 165 | */ | ||
| 166 | #define BIT_NRIPS 3 | ||
| 167 | |||
| 168 | static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) | ||
| 169 | { | ||
| 170 | struct kvm_cpuid_entry2 *best; | ||
| 171 | |||
| 172 | best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); | ||
| 173 | |||
| 174 | /* | ||
| 175 | * NRIPS is a scattered cpuid feature, so we can't use | ||
| 176 | * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit | ||
| 177 | * position 8, not 3). | ||
| 178 | */ | ||
| 179 | return best && (best->edx & bit(BIT_NRIPS)); | ||
| 180 | } | ||
| 181 | #undef BIT_NRIPS | ||
| 182 | |||
| 183 | static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) | 125 | static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) |
| 184 | { | 126 | { |
| 185 | struct kvm_cpuid_entry2 *best; | 127 | struct kvm_cpuid_entry2 *best; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb0055953fbc..16bf6655aa85 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | 28 | ||
| 29 | #include "x86.h" | 29 | #include "x86.h" |
| 30 | #include "tss.h" | 30 | #include "tss.h" |
| 31 | #include "mmu.h" | ||
| 31 | 32 | ||
| 32 | /* | 33 | /* |
| 33 | * Operand types | 34 | * Operand types |
| @@ -688,16 +689,18 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, | |||
| 688 | ulong la; | 689 | ulong la; |
| 689 | u32 lim; | 690 | u32 lim; |
| 690 | u16 sel; | 691 | u16 sel; |
| 692 | u8 va_bits; | ||
| 691 | 693 | ||
| 692 | la = seg_base(ctxt, addr.seg) + addr.ea; | 694 | la = seg_base(ctxt, addr.seg) + addr.ea; |
| 693 | *max_size = 0; | 695 | *max_size = 0; |
| 694 | switch (mode) { | 696 | switch (mode) { |
| 695 | case X86EMUL_MODE_PROT64: | 697 | case X86EMUL_MODE_PROT64: |
| 696 | *linear = la; | 698 | *linear = la; |
| 697 | if (is_noncanonical_address(la)) | 699 | va_bits = ctxt_virt_addr_bits(ctxt); |
| 700 | if (get_canonical(la, va_bits) != la) | ||
| 698 | goto bad; | 701 | goto bad; |
| 699 | 702 | ||
| 700 | *max_size = min_t(u64, ~0u, (1ull << 48) - la); | 703 | *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); |
| 701 | if (size > *max_size) | 704 | if (size > *max_size) |
| 702 | goto bad; | 705 | goto bad; |
| 703 | break; | 706 | break; |
| @@ -1748,8 +1751,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 1748 | sizeof(base3), &ctxt->exception); | 1751 | sizeof(base3), &ctxt->exception); |
| 1749 | if (ret != X86EMUL_CONTINUE) | 1752 | if (ret != X86EMUL_CONTINUE) |
| 1750 | return ret; | 1753 | return ret; |
| 1751 | if (is_noncanonical_address(get_desc_base(&seg_desc) | | 1754 | if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | |
| 1752 | ((u64)base3 << 32))) | 1755 | ((u64)base3 << 32), ctxt)) |
| 1753 | return emulate_gp(ctxt, 0); | 1756 | return emulate_gp(ctxt, 0); |
| 1754 | } | 1757 | } |
| 1755 | load: | 1758 | load: |
| @@ -2333,7 +2336,7 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) | |||
| 2333 | 2336 | ||
| 2334 | eax = 0x80000001; | 2337 | eax = 0x80000001; |
| 2335 | ecx = 0; | 2338 | ecx = 0; |
| 2336 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2339 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
| 2337 | return edx & bit(X86_FEATURE_LM); | 2340 | return edx & bit(X86_FEATURE_LM); |
| 2338 | } | 2341 | } |
| 2339 | 2342 | ||
| @@ -2636,7 +2639,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) | |||
| 2636 | u32 eax, ebx, ecx, edx; | 2639 | u32 eax, ebx, ecx, edx; |
| 2637 | 2640 | ||
| 2638 | eax = ecx = 0; | 2641 | eax = ecx = 0; |
| 2639 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2642 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
| 2640 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx | 2643 | return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx |
| 2641 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx | 2644 | && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx |
| 2642 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; | 2645 | && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx; |
| @@ -2656,7 +2659,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) | |||
| 2656 | 2659 | ||
| 2657 | eax = 0x00000000; | 2660 | eax = 0x00000000; |
| 2658 | ecx = 0x00000000; | 2661 | ecx = 0x00000000; |
| 2659 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 2662 | ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
| 2660 | /* | 2663 | /* |
| 2661 | * Intel ("GenuineIntel") | 2664 | * Intel ("GenuineIntel") |
| 2662 | * remark: Intel CPUs only support "syscall" in 64bit | 2665 | * remark: Intel CPUs only support "syscall" in 64bit |
| @@ -2840,8 +2843,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
| 2840 | ss_sel = cs_sel + 8; | 2843 | ss_sel = cs_sel + 8; |
| 2841 | cs.d = 0; | 2844 | cs.d = 0; |
| 2842 | cs.l = 1; | 2845 | cs.l = 1; |
| 2843 | if (is_noncanonical_address(rcx) || | 2846 | if (emul_is_noncanonical_address(rcx, ctxt) || |
| 2844 | is_noncanonical_address(rdx)) | 2847 | emul_is_noncanonical_address(rdx, ctxt)) |
| 2845 | return emulate_gp(ctxt, 0); | 2848 | return emulate_gp(ctxt, 0); |
| 2846 | break; | 2849 | break; |
| 2847 | } | 2850 | } |
| @@ -3551,7 +3554,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) | |||
| 3551 | /* | 3554 | /* |
| 3552 | * Check MOVBE is set in the guest-visible CPUID leaf. | 3555 | * Check MOVBE is set in the guest-visible CPUID leaf. |
| 3553 | */ | 3556 | */ |
| 3554 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3557 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
| 3555 | if (!(ecx & FFL(MOVBE))) | 3558 | if (!(ecx & FFL(MOVBE))) |
| 3556 | return emulate_ud(ctxt); | 3559 | return emulate_ud(ctxt); |
| 3557 | 3560 | ||
| @@ -3756,7 +3759,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt) | |||
| 3756 | if (rc != X86EMUL_CONTINUE) | 3759 | if (rc != X86EMUL_CONTINUE) |
| 3757 | return rc; | 3760 | return rc; |
| 3758 | if (ctxt->mode == X86EMUL_MODE_PROT64 && | 3761 | if (ctxt->mode == X86EMUL_MODE_PROT64 && |
| 3759 | is_noncanonical_address(desc_ptr.address)) | 3762 | emul_is_noncanonical_address(desc_ptr.address, ctxt)) |
| 3760 | return emulate_gp(ctxt, 0); | 3763 | return emulate_gp(ctxt, 0); |
| 3761 | if (lgdt) | 3764 | if (lgdt) |
| 3762 | ctxt->ops->set_gdt(ctxt, &desc_ptr); | 3765 | ctxt->ops->set_gdt(ctxt, &desc_ptr); |
| @@ -3865,7 +3868,7 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) | |||
| 3865 | 3868 | ||
| 3866 | eax = reg_read(ctxt, VCPU_REGS_RAX); | 3869 | eax = reg_read(ctxt, VCPU_REGS_RAX); |
| 3867 | ecx = reg_read(ctxt, VCPU_REGS_RCX); | 3870 | ecx = reg_read(ctxt, VCPU_REGS_RCX); |
| 3868 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3871 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true); |
| 3869 | *reg_write(ctxt, VCPU_REGS_RAX) = eax; | 3872 | *reg_write(ctxt, VCPU_REGS_RAX) = eax; |
| 3870 | *reg_write(ctxt, VCPU_REGS_RBX) = ebx; | 3873 | *reg_write(ctxt, VCPU_REGS_RBX) = ebx; |
| 3871 | *reg_write(ctxt, VCPU_REGS_RCX) = ecx; | 3874 | *reg_write(ctxt, VCPU_REGS_RCX) = ecx; |
| @@ -3924,7 +3927,7 @@ static int check_fxsr(struct x86_emulate_ctxt *ctxt) | |||
| 3924 | { | 3927 | { |
| 3925 | u32 eax = 1, ebx, ecx = 0, edx; | 3928 | u32 eax = 1, ebx, ecx = 0, edx; |
| 3926 | 3929 | ||
| 3927 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | 3930 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false); |
| 3928 | if (!(edx & FFL(FXSR))) | 3931 | if (!(edx & FFL(FXSR))) |
| 3929 | return emulate_ud(ctxt); | 3932 | return emulate_ud(ctxt); |
| 3930 | 3933 | ||
| @@ -4097,8 +4100,17 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) | |||
| 4097 | u64 rsvd = 0; | 4100 | u64 rsvd = 0; |
| 4098 | 4101 | ||
| 4099 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); | 4102 | ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); |
| 4100 | if (efer & EFER_LMA) | 4103 | if (efer & EFER_LMA) { |
| 4101 | rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; | 4104 | u64 maxphyaddr; |
| 4105 | u32 eax = 0x80000008; | ||
| 4106 | |||
| 4107 | if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, | ||
| 4108 | NULL, false)) | ||
| 4109 | maxphyaddr = eax & 0xff; | ||
| 4110 | else | ||
| 4111 | maxphyaddr = 36; | ||
| 4112 | rsvd = rsvd_bits(maxphyaddr, 62); | ||
| 4113 | } | ||
| 4102 | 4114 | ||
| 4103 | if (new_val & rsvd) | 4115 | if (new_val & rsvd) |
| 4104 | return emulate_gp(ctxt, 0); | 4116 | return emulate_gp(ctxt, 0); |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 337b6d2730fa..dc97f2544b6f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
| @@ -1160,6 +1160,12 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1160 | return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), | 1160 | return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), |
| 1161 | pdata); | 1161 | pdata); |
| 1162 | } | 1162 | } |
| 1163 | case HV_X64_MSR_TSC_FREQUENCY: | ||
| 1164 | data = (u64)vcpu->arch.virtual_tsc_khz * 1000; | ||
| 1165 | break; | ||
| 1166 | case HV_X64_MSR_APIC_FREQUENCY: | ||
| 1167 | data = APIC_BUS_FREQUENCY; | ||
| 1168 | break; | ||
| 1163 | default: | 1169 | default: |
| 1164 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1170 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
| 1165 | return 1; | 1171 | return 1; |
| @@ -1268,7 +1274,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
| 1268 | 1274 | ||
| 1269 | switch (code) { | 1275 | switch (code) { |
| 1270 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: | 1276 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: |
| 1271 | kvm_vcpu_on_spin(vcpu); | 1277 | kvm_vcpu_on_spin(vcpu, true); |
| 1272 | break; | 1278 | break; |
| 1273 | case HVCALL_POST_MESSAGE: | 1279 | case HVCALL_POST_MESSAGE: |
| 1274 | case HVCALL_SIGNAL_EVENT: | 1280 | case HVCALL_SIGNAL_EVENT: |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index e1e89ee4af75..9add410f195f 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS | 4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS |
| 5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ | 5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ |
| 6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
| 7 | | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) | 7 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) |
| 8 | 8 | ||
| 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
| 10 | enum kvm_reg reg) | 10 | enum kvm_reg reg) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 589dcc117086..aaf10b6f5380 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -54,8 +54,6 @@ | |||
| 54 | #define PRIu64 "u" | 54 | #define PRIu64 "u" |
| 55 | #define PRIo64 "o" | 55 | #define PRIo64 "o" |
| 56 | 56 | ||
| 57 | #define APIC_BUS_CYCLE_NS 1 | ||
| 58 | |||
| 59 | /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ | 57 | /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ |
| 60 | #define apic_debug(fmt, arg...) | 58 | #define apic_debug(fmt, arg...) |
| 61 | 59 | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 29caa2c3dff9..215721e1426a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -12,6 +12,9 @@ | |||
| 12 | #define KVM_APIC_SHORT_MASK 0xc0000 | 12 | #define KVM_APIC_SHORT_MASK 0xc0000 |
| 13 | #define KVM_APIC_DEST_MASK 0x800 | 13 | #define KVM_APIC_DEST_MASK 0x800 |
| 14 | 14 | ||
| 15 | #define APIC_BUS_CYCLE_NS 1 | ||
| 16 | #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) | ||
| 17 | |||
| 15 | struct kvm_timer { | 18 | struct kvm_timer { |
| 16 | struct hrtimer timer; | 19 | struct hrtimer timer; |
| 17 | s64 period; /* unit: ns */ | 20 | s64 period; /* unit: ns */ |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 04d750813c9d..eca30c1eb1d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -2169,8 +2169,8 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 2169 | } | 2169 | } |
| 2170 | 2170 | ||
| 2171 | struct mmu_page_path { | 2171 | struct mmu_page_path { |
| 2172 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; | 2172 | struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL]; |
| 2173 | unsigned int idx[PT64_ROOT_LEVEL]; | 2173 | unsigned int idx[PT64_ROOT_MAX_LEVEL]; |
| 2174 | }; | 2174 | }; |
| 2175 | 2175 | ||
| 2176 | #define for_each_sp(pvec, sp, parents, i) \ | 2176 | #define for_each_sp(pvec, sp, parents, i) \ |
| @@ -2385,8 +2385,8 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, | |||
| 2385 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; | 2385 | iterator->shadow_addr = vcpu->arch.mmu.root_hpa; |
| 2386 | iterator->level = vcpu->arch.mmu.shadow_root_level; | 2386 | iterator->level = vcpu->arch.mmu.shadow_root_level; |
| 2387 | 2387 | ||
| 2388 | if (iterator->level == PT64_ROOT_LEVEL && | 2388 | if (iterator->level == PT64_ROOT_4LEVEL && |
| 2389 | vcpu->arch.mmu.root_level < PT64_ROOT_LEVEL && | 2389 | vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && |
| 2390 | !vcpu->arch.mmu.direct_map) | 2390 | !vcpu->arch.mmu.direct_map) |
| 2391 | --iterator->level; | 2391 | --iterator->level; |
| 2392 | 2392 | ||
| @@ -2610,9 +2610,7 @@ static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | |||
| 2610 | 2610 | ||
| 2611 | sp = list_last_entry(&kvm->arch.active_mmu_pages, | 2611 | sp = list_last_entry(&kvm->arch.active_mmu_pages, |
| 2612 | struct kvm_mmu_page, link); | 2612 | struct kvm_mmu_page, link); |
| 2613 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | 2613 | return kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); |
| 2614 | |||
| 2615 | return true; | ||
| 2616 | } | 2614 | } |
| 2617 | 2615 | ||
| 2618 | /* | 2616 | /* |
| @@ -3262,7 +3260,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
| 3262 | 3260 | ||
| 3263 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 3261 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
| 3264 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); | 3262 | gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); |
| 3265 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu); | 3263 | static int make_mmu_pages_available(struct kvm_vcpu *vcpu); |
| 3266 | 3264 | ||
| 3267 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | 3265 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
| 3268 | gfn_t gfn, bool prefault) | 3266 | gfn_t gfn, bool prefault) |
| @@ -3302,7 +3300,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
| 3302 | spin_lock(&vcpu->kvm->mmu_lock); | 3300 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3303 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3301 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
| 3304 | goto out_unlock; | 3302 | goto out_unlock; |
| 3305 | make_mmu_pages_available(vcpu); | 3303 | if (make_mmu_pages_available(vcpu) < 0) |
| 3304 | goto out_unlock; | ||
| 3306 | if (likely(!force_pt_level)) | 3305 | if (likely(!force_pt_level)) |
| 3307 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3306 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
| 3308 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 3307 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); |
| @@ -3326,8 +3325,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 3326 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 3325 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
| 3327 | return; | 3326 | return; |
| 3328 | 3327 | ||
| 3329 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL && | 3328 | if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && |
| 3330 | (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL || | 3329 | (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || |
| 3331 | vcpu->arch.mmu.direct_map)) { | 3330 | vcpu->arch.mmu.direct_map)) { |
| 3332 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3331 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 3333 | 3332 | ||
| @@ -3379,10 +3378,14 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
| 3379 | struct kvm_mmu_page *sp; | 3378 | struct kvm_mmu_page *sp; |
| 3380 | unsigned i; | 3379 | unsigned i; |
| 3381 | 3380 | ||
| 3382 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 3381 | if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { |
| 3383 | spin_lock(&vcpu->kvm->mmu_lock); | 3382 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3384 | make_mmu_pages_available(vcpu); | 3383 | if(make_mmu_pages_available(vcpu) < 0) { |
| 3385 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); | 3384 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 3385 | return 1; | ||
| 3386 | } | ||
| 3387 | sp = kvm_mmu_get_page(vcpu, 0, 0, | ||
| 3388 | vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); | ||
| 3386 | ++sp->root_count; | 3389 | ++sp->root_count; |
| 3387 | spin_unlock(&vcpu->kvm->mmu_lock); | 3390 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 3388 | vcpu->arch.mmu.root_hpa = __pa(sp->spt); | 3391 | vcpu->arch.mmu.root_hpa = __pa(sp->spt); |
| @@ -3392,7 +3395,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
| 3392 | 3395 | ||
| 3393 | MMU_WARN_ON(VALID_PAGE(root)); | 3396 | MMU_WARN_ON(VALID_PAGE(root)); |
| 3394 | spin_lock(&vcpu->kvm->mmu_lock); | 3397 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3395 | make_mmu_pages_available(vcpu); | 3398 | if (make_mmu_pages_available(vcpu) < 0) { |
| 3399 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 3400 | return 1; | ||
| 3401 | } | ||
| 3396 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 3402 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
| 3397 | i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); | 3403 | i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); |
| 3398 | root = __pa(sp->spt); | 3404 | root = __pa(sp->spt); |
| @@ -3423,15 +3429,18 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3423 | * Do we shadow a long mode page table? If so we need to | 3429 | * Do we shadow a long mode page table? If so we need to |
| 3424 | * write-protect the guests page table root. | 3430 | * write-protect the guests page table root. |
| 3425 | */ | 3431 | */ |
| 3426 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3432 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
| 3427 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3433 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 3428 | 3434 | ||
| 3429 | MMU_WARN_ON(VALID_PAGE(root)); | 3435 | MMU_WARN_ON(VALID_PAGE(root)); |
| 3430 | 3436 | ||
| 3431 | spin_lock(&vcpu->kvm->mmu_lock); | 3437 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3432 | make_mmu_pages_available(vcpu); | 3438 | if (make_mmu_pages_available(vcpu) < 0) { |
| 3433 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, | 3439 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 3434 | 0, ACC_ALL); | 3440 | return 1; |
| 3441 | } | ||
| 3442 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | ||
| 3443 | vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); | ||
| 3435 | root = __pa(sp->spt); | 3444 | root = __pa(sp->spt); |
| 3436 | ++sp->root_count; | 3445 | ++sp->root_count; |
| 3437 | spin_unlock(&vcpu->kvm->mmu_lock); | 3446 | spin_unlock(&vcpu->kvm->mmu_lock); |
| @@ -3445,7 +3454,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3445 | * the shadow page table may be a PAE or a long mode page table. | 3454 | * the shadow page table may be a PAE or a long mode page table. |
| 3446 | */ | 3455 | */ |
| 3447 | pm_mask = PT_PRESENT_MASK; | 3456 | pm_mask = PT_PRESENT_MASK; |
| 3448 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) | 3457 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) |
| 3449 | pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; | 3458 | pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; |
| 3450 | 3459 | ||
| 3451 | for (i = 0; i < 4; ++i) { | 3460 | for (i = 0; i < 4; ++i) { |
| @@ -3463,7 +3472,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3463 | return 1; | 3472 | return 1; |
| 3464 | } | 3473 | } |
| 3465 | spin_lock(&vcpu->kvm->mmu_lock); | 3474 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3466 | make_mmu_pages_available(vcpu); | 3475 | if (make_mmu_pages_available(vcpu) < 0) { |
| 3476 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 3477 | return 1; | ||
| 3478 | } | ||
| 3467 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, | 3479 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, |
| 3468 | 0, ACC_ALL); | 3480 | 0, ACC_ALL); |
| 3469 | root = __pa(sp->spt); | 3481 | root = __pa(sp->spt); |
| @@ -3478,7 +3490,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
| 3478 | * If we shadow a 32 bit page table with a long mode page | 3490 | * If we shadow a 32 bit page table with a long mode page |
| 3479 | * table we enter this path. | 3491 | * table we enter this path. |
| 3480 | */ | 3492 | */ |
| 3481 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 3493 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) { |
| 3482 | if (vcpu->arch.mmu.lm_root == NULL) { | 3494 | if (vcpu->arch.mmu.lm_root == NULL) { |
| 3483 | /* | 3495 | /* |
| 3484 | * The additional page necessary for this is only | 3496 | * The additional page necessary for this is only |
| @@ -3523,7 +3535,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 3523 | 3535 | ||
| 3524 | vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); | 3536 | vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); |
| 3525 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); | 3537 | kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); |
| 3526 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 3538 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
| 3527 | hpa_t root = vcpu->arch.mmu.root_hpa; | 3539 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 3528 | sp = page_header(root); | 3540 | sp = page_header(root); |
| 3529 | mmu_sync_children(vcpu, sp); | 3541 | mmu_sync_children(vcpu, sp); |
| @@ -3588,6 +3600,13 @@ static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) | |||
| 3588 | 3600 | ||
| 3589 | static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3601 | static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) |
| 3590 | { | 3602 | { |
| 3603 | /* | ||
| 3604 | * A nested guest cannot use the MMIO cache if it is using nested | ||
| 3605 | * page tables, because cr2 is a nGPA while the cache stores GPAs. | ||
| 3606 | */ | ||
| 3607 | if (mmu_is_nested(vcpu)) | ||
| 3608 | return false; | ||
| 3609 | |||
| 3591 | if (direct) | 3610 | if (direct) |
| 3592 | return vcpu_match_mmio_gpa(vcpu, addr); | 3611 | return vcpu_match_mmio_gpa(vcpu, addr); |
| 3593 | 3612 | ||
| @@ -3599,7 +3618,7 @@ static bool | |||
| 3599 | walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) | 3618 | walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) |
| 3600 | { | 3619 | { |
| 3601 | struct kvm_shadow_walk_iterator iterator; | 3620 | struct kvm_shadow_walk_iterator iterator; |
| 3602 | u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; | 3621 | u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull; |
| 3603 | int root, leaf; | 3622 | int root, leaf; |
| 3604 | bool reserved = false; | 3623 | bool reserved = false; |
| 3605 | 3624 | ||
| @@ -3640,7 +3659,23 @@ exit: | |||
| 3640 | return reserved; | 3659 | return reserved; |
| 3641 | } | 3660 | } |
| 3642 | 3661 | ||
| 3643 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | 3662 | /* |
| 3663 | * Return values of handle_mmio_page_fault: | ||
| 3664 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
| 3665 | * directly. | ||
| 3666 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
| 3667 | * fault path update the mmio spte. | ||
| 3668 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
| 3669 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). | ||
| 3670 | */ | ||
| 3671 | enum { | ||
| 3672 | RET_MMIO_PF_EMULATE = 1, | ||
| 3673 | RET_MMIO_PF_INVALID = 2, | ||
| 3674 | RET_MMIO_PF_RETRY = 0, | ||
| 3675 | RET_MMIO_PF_BUG = -1 | ||
| 3676 | }; | ||
| 3677 | |||
| 3678 | static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) | ||
| 3644 | { | 3679 | { |
| 3645 | u64 spte; | 3680 | u64 spte; |
| 3646 | bool reserved; | 3681 | bool reserved; |
| @@ -3872,7 +3907,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
| 3872 | spin_lock(&vcpu->kvm->mmu_lock); | 3907 | spin_lock(&vcpu->kvm->mmu_lock); |
| 3873 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3908 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
| 3874 | goto out_unlock; | 3909 | goto out_unlock; |
| 3875 | make_mmu_pages_available(vcpu); | 3910 | if (make_mmu_pages_available(vcpu) < 0) |
| 3911 | goto out_unlock; | ||
| 3876 | if (likely(!force_pt_level)) | 3912 | if (likely(!force_pt_level)) |
| 3877 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3913 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
| 3878 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); | 3914 | r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); |
| @@ -4025,7 +4061,13 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
| 4025 | rsvd_check->rsvd_bits_mask[1][0] = | 4061 | rsvd_check->rsvd_bits_mask[1][0] = |
| 4026 | rsvd_check->rsvd_bits_mask[0][0]; | 4062 | rsvd_check->rsvd_bits_mask[0][0]; |
| 4027 | break; | 4063 | break; |
| 4028 | case PT64_ROOT_LEVEL: | 4064 | case PT64_ROOT_5LEVEL: |
| 4065 | rsvd_check->rsvd_bits_mask[0][4] = exb_bit_rsvd | | ||
| 4066 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | | ||
| 4067 | rsvd_bits(maxphyaddr, 51); | ||
| 4068 | rsvd_check->rsvd_bits_mask[1][4] = | ||
| 4069 | rsvd_check->rsvd_bits_mask[0][4]; | ||
| 4070 | case PT64_ROOT_4LEVEL: | ||
| 4029 | rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | | 4071 | rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | |
| 4030 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | | 4072 | nonleaf_bit8_rsvd | rsvd_bits(7, 7) | |
| 4031 | rsvd_bits(maxphyaddr, 51); | 4073 | rsvd_bits(maxphyaddr, 51); |
| @@ -4055,7 +4097,8 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, | |||
| 4055 | { | 4097 | { |
| 4056 | __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, | 4098 | __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, |
| 4057 | cpuid_maxphyaddr(vcpu), context->root_level, | 4099 | cpuid_maxphyaddr(vcpu), context->root_level, |
| 4058 | context->nx, guest_cpuid_has_gbpages(vcpu), | 4100 | context->nx, |
| 4101 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), | ||
| 4059 | is_pse(vcpu), guest_cpuid_is_amd(vcpu)); | 4102 | is_pse(vcpu), guest_cpuid_is_amd(vcpu)); |
| 4060 | } | 4103 | } |
| 4061 | 4104 | ||
| @@ -4065,6 +4108,8 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
| 4065 | { | 4108 | { |
| 4066 | u64 bad_mt_xwr; | 4109 | u64 bad_mt_xwr; |
| 4067 | 4110 | ||
| 4111 | rsvd_check->rsvd_bits_mask[0][4] = | ||
| 4112 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | ||
| 4068 | rsvd_check->rsvd_bits_mask[0][3] = | 4113 | rsvd_check->rsvd_bits_mask[0][3] = |
| 4069 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); | 4114 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); |
| 4070 | rsvd_check->rsvd_bits_mask[0][2] = | 4115 | rsvd_check->rsvd_bits_mask[0][2] = |
| @@ -4074,6 +4119,7 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, | |||
| 4074 | rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); | 4119 | rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); |
| 4075 | 4120 | ||
| 4076 | /* large page */ | 4121 | /* large page */ |
| 4122 | rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4]; | ||
| 4077 | rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; | 4123 | rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; |
| 4078 | rsvd_check->rsvd_bits_mask[1][2] = | 4124 | rsvd_check->rsvd_bits_mask[1][2] = |
| 4079 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); | 4125 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); |
| @@ -4120,8 +4166,8 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) | |||
| 4120 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, | 4166 | __reset_rsvds_bits_mask(vcpu, shadow_zero_check, |
| 4121 | boot_cpu_data.x86_phys_bits, | 4167 | boot_cpu_data.x86_phys_bits, |
| 4122 | context->shadow_root_level, uses_nx, | 4168 | context->shadow_root_level, uses_nx, |
| 4123 | guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), | 4169 | guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), |
| 4124 | true); | 4170 | is_pse(vcpu), true); |
| 4125 | 4171 | ||
| 4126 | if (!shadow_me_mask) | 4172 | if (!shadow_me_mask) |
| 4127 | return; | 4173 | return; |
| @@ -4185,66 +4231,85 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, | |||
| 4185 | boot_cpu_data.x86_phys_bits, execonly); | 4231 | boot_cpu_data.x86_phys_bits, execonly); |
| 4186 | } | 4232 | } |
| 4187 | 4233 | ||
| 4234 | #define BYTE_MASK(access) \ | ||
| 4235 | ((1 & (access) ? 2 : 0) | \ | ||
| 4236 | (2 & (access) ? 4 : 0) | \ | ||
| 4237 | (3 & (access) ? 8 : 0) | \ | ||
| 4238 | (4 & (access) ? 16 : 0) | \ | ||
| 4239 | (5 & (access) ? 32 : 0) | \ | ||
| 4240 | (6 & (access) ? 64 : 0) | \ | ||
| 4241 | (7 & (access) ? 128 : 0)) | ||
| 4242 | |||
| 4243 | |||
| 4188 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, | 4244 | static void update_permission_bitmask(struct kvm_vcpu *vcpu, |
| 4189 | struct kvm_mmu *mmu, bool ept) | 4245 | struct kvm_mmu *mmu, bool ept) |
| 4190 | { | 4246 | { |
| 4191 | unsigned bit, byte, pfec; | 4247 | unsigned byte; |
| 4192 | u8 map; | 4248 | |
| 4193 | bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0; | 4249 | const u8 x = BYTE_MASK(ACC_EXEC_MASK); |
| 4250 | const u8 w = BYTE_MASK(ACC_WRITE_MASK); | ||
| 4251 | const u8 u = BYTE_MASK(ACC_USER_MASK); | ||
| 4252 | |||
| 4253 | bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0; | ||
| 4254 | bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0; | ||
| 4255 | bool cr0_wp = is_write_protection(vcpu); | ||
| 4194 | 4256 | ||
| 4195 | cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); | ||
| 4196 | cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); | ||
| 4197 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { | 4257 | for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { |
| 4198 | pfec = byte << 1; | 4258 | unsigned pfec = byte << 1; |
| 4199 | map = 0; | 4259 | |
| 4200 | wf = pfec & PFERR_WRITE_MASK; | ||
| 4201 | uf = pfec & PFERR_USER_MASK; | ||
| 4202 | ff = pfec & PFERR_FETCH_MASK; | ||
| 4203 | /* | 4260 | /* |
| 4204 | * PFERR_RSVD_MASK bit is set in PFEC if the access is not | 4261 | * Each "*f" variable has a 1 bit for each UWX value |
| 4205 | * subject to SMAP restrictions, and cleared otherwise. The | 4262 | * that causes a fault with the given PFEC. |
| 4206 | * bit is only meaningful if the SMAP bit is set in CR4. | ||
| 4207 | */ | 4263 | */ |
| 4208 | smapf = !(pfec & PFERR_RSVD_MASK); | ||
| 4209 | for (bit = 0; bit < 8; ++bit) { | ||
| 4210 | x = bit & ACC_EXEC_MASK; | ||
| 4211 | w = bit & ACC_WRITE_MASK; | ||
| 4212 | u = bit & ACC_USER_MASK; | ||
| 4213 | |||
| 4214 | if (!ept) { | ||
| 4215 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
| 4216 | x |= !mmu->nx; | ||
| 4217 | /* Allow supervisor writes if !cr0.wp */ | ||
| 4218 | w |= !is_write_protection(vcpu) && !uf; | ||
| 4219 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
| 4220 | x &= !(cr4_smep && u && !uf); | ||
| 4221 | |||
| 4222 | /* | ||
| 4223 | * SMAP:kernel-mode data accesses from user-mode | ||
| 4224 | * mappings should fault. A fault is considered | ||
| 4225 | * as a SMAP violation if all of the following | ||
| 4226 | * conditions are ture: | ||
| 4227 | * - X86_CR4_SMAP is set in CR4 | ||
| 4228 | * - A user page is accessed | ||
| 4229 | * - Page fault in kernel mode | ||
| 4230 | * - if CPL = 3 or X86_EFLAGS_AC is clear | ||
| 4231 | * | ||
| 4232 | * Here, we cover the first three conditions. | ||
| 4233 | * The fourth is computed dynamically in | ||
| 4234 | * permission_fault() and is in smapf. | ||
| 4235 | * | ||
| 4236 | * Also, SMAP does not affect instruction | ||
| 4237 | * fetches, add the !ff check here to make it | ||
| 4238 | * clearer. | ||
| 4239 | */ | ||
| 4240 | smap = cr4_smap && u && !uf && !ff; | ||
| 4241 | } | ||
| 4242 | 4264 | ||
| 4243 | fault = (ff && !x) || (uf && !u) || (wf && !w) || | 4265 | /* Faults from writes to non-writable pages */ |
| 4244 | (smapf && smap); | 4266 | u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; |
| 4245 | map |= fault << bit; | 4267 | /* Faults from user mode accesses to supervisor pages */ |
| 4268 | u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; | ||
| 4269 | /* Faults from fetches of non-executable pages*/ | ||
| 4270 | u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; | ||
| 4271 | /* Faults from kernel mode fetches of user pages */ | ||
| 4272 | u8 smepf = 0; | ||
| 4273 | /* Faults from kernel mode accesses of user pages */ | ||
| 4274 | u8 smapf = 0; | ||
| 4275 | |||
| 4276 | if (!ept) { | ||
| 4277 | /* Faults from kernel mode accesses to user pages */ | ||
| 4278 | u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u; | ||
| 4279 | |||
| 4280 | /* Not really needed: !nx will cause pte.nx to fault */ | ||
| 4281 | if (!mmu->nx) | ||
| 4282 | ff = 0; | ||
| 4283 | |||
| 4284 | /* Allow supervisor writes if !cr0.wp */ | ||
| 4285 | if (!cr0_wp) | ||
| 4286 | wf = (pfec & PFERR_USER_MASK) ? wf : 0; | ||
| 4287 | |||
| 4288 | /* Disallow supervisor fetches of user code if cr4.smep */ | ||
| 4289 | if (cr4_smep) | ||
| 4290 | smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0; | ||
| 4291 | |||
| 4292 | /* | ||
| 4293 | * SMAP:kernel-mode data accesses from user-mode | ||
| 4294 | * mappings should fault. A fault is considered | ||
| 4295 | * as a SMAP violation if all of the following | ||
| 4296 | * conditions are ture: | ||
| 4297 | * - X86_CR4_SMAP is set in CR4 | ||
| 4298 | * - A user page is accessed | ||
| 4299 | * - The access is not a fetch | ||
| 4300 | * - Page fault in kernel mode | ||
| 4301 | * - if CPL = 3 or X86_EFLAGS_AC is clear | ||
| 4302 | * | ||
| 4303 | * Here, we cover the first three conditions. | ||
| 4304 | * The fourth is computed dynamically in permission_fault(); | ||
| 4305 | * PFERR_RSVD_MASK bit will be set in PFEC if the access is | ||
| 4306 | * *not* subject to SMAP restrictions. | ||
| 4307 | */ | ||
| 4308 | if (cr4_smap) | ||
| 4309 | smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf; | ||
| 4246 | } | 4310 | } |
| 4247 | mmu->permissions[byte] = map; | 4311 | |
| 4312 | mmu->permissions[byte] = ff | uf | wf | smepf | smapf; | ||
| 4248 | } | 4313 | } |
| 4249 | } | 4314 | } |
| 4250 | 4315 | ||
| @@ -4358,7 +4423,10 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
| 4358 | static void paging64_init_context(struct kvm_vcpu *vcpu, | 4423 | static void paging64_init_context(struct kvm_vcpu *vcpu, |
| 4359 | struct kvm_mmu *context) | 4424 | struct kvm_mmu *context) |
| 4360 | { | 4425 | { |
| 4361 | paging64_init_context_common(vcpu, context, PT64_ROOT_LEVEL); | 4426 | int root_level = is_la57_mode(vcpu) ? |
| 4427 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
| 4428 | |||
| 4429 | paging64_init_context_common(vcpu, context, root_level); | ||
| 4362 | } | 4430 | } |
| 4363 | 4431 | ||
| 4364 | static void paging32_init_context(struct kvm_vcpu *vcpu, | 4432 | static void paging32_init_context(struct kvm_vcpu *vcpu, |
| @@ -4399,7 +4467,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 4399 | context->sync_page = nonpaging_sync_page; | 4467 | context->sync_page = nonpaging_sync_page; |
| 4400 | context->invlpg = nonpaging_invlpg; | 4468 | context->invlpg = nonpaging_invlpg; |
| 4401 | context->update_pte = nonpaging_update_pte; | 4469 | context->update_pte = nonpaging_update_pte; |
| 4402 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 4470 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); |
| 4403 | context->root_hpa = INVALID_PAGE; | 4471 | context->root_hpa = INVALID_PAGE; |
| 4404 | context->direct_map = true; | 4472 | context->direct_map = true; |
| 4405 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; | 4473 | context->set_cr3 = kvm_x86_ops->set_tdp_cr3; |
| @@ -4413,7 +4481,8 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 4413 | context->root_level = 0; | 4481 | context->root_level = 0; |
| 4414 | } else if (is_long_mode(vcpu)) { | 4482 | } else if (is_long_mode(vcpu)) { |
| 4415 | context->nx = is_nx(vcpu); | 4483 | context->nx = is_nx(vcpu); |
| 4416 | context->root_level = PT64_ROOT_LEVEL; | 4484 | context->root_level = is_la57_mode(vcpu) ? |
| 4485 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
| 4417 | reset_rsvds_bits_mask(vcpu, context); | 4486 | reset_rsvds_bits_mask(vcpu, context); |
| 4418 | context->gva_to_gpa = paging64_gva_to_gpa; | 4487 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 4419 | } else if (is_pae(vcpu)) { | 4488 | } else if (is_pae(vcpu)) { |
| @@ -4470,7 +4539,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | |||
| 4470 | 4539 | ||
| 4471 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); | 4540 | MMU_WARN_ON(VALID_PAGE(context->root_hpa)); |
| 4472 | 4541 | ||
| 4473 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 4542 | context->shadow_root_level = PT64_ROOT_4LEVEL; |
| 4474 | 4543 | ||
| 4475 | context->nx = true; | 4544 | context->nx = true; |
| 4476 | context->ept_ad = accessed_dirty; | 4545 | context->ept_ad = accessed_dirty; |
| @@ -4479,7 +4548,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | |||
| 4479 | context->sync_page = ept_sync_page; | 4548 | context->sync_page = ept_sync_page; |
| 4480 | context->invlpg = ept_invlpg; | 4549 | context->invlpg = ept_invlpg; |
| 4481 | context->update_pte = ept_update_pte; | 4550 | context->update_pte = ept_update_pte; |
| 4482 | context->root_level = context->shadow_root_level; | 4551 | context->root_level = PT64_ROOT_4LEVEL; |
| 4483 | context->root_hpa = INVALID_PAGE; | 4552 | context->root_hpa = INVALID_PAGE; |
| 4484 | context->direct_map = false; | 4553 | context->direct_map = false; |
| 4485 | context->base_role.ad_disabled = !accessed_dirty; | 4554 | context->base_role.ad_disabled = !accessed_dirty; |
| @@ -4524,7 +4593,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
| 4524 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; | 4593 | g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested; |
| 4525 | } else if (is_long_mode(vcpu)) { | 4594 | } else if (is_long_mode(vcpu)) { |
| 4526 | g_context->nx = is_nx(vcpu); | 4595 | g_context->nx = is_nx(vcpu); |
| 4527 | g_context->root_level = PT64_ROOT_LEVEL; | 4596 | g_context->root_level = is_la57_mode(vcpu) ? |
| 4597 | PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL; | ||
| 4528 | reset_rsvds_bits_mask(vcpu, g_context); | 4598 | reset_rsvds_bits_mask(vcpu, g_context); |
| 4529 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; | 4599 | g_context->gva_to_gpa = paging64_gva_to_gpa_nested; |
| 4530 | } else if (is_pae(vcpu)) { | 4600 | } else if (is_pae(vcpu)) { |
| @@ -4814,12 +4884,12 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 4814 | } | 4884 | } |
| 4815 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 4885 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
| 4816 | 4886 | ||
| 4817 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu) | 4887 | static int make_mmu_pages_available(struct kvm_vcpu *vcpu) |
| 4818 | { | 4888 | { |
| 4819 | LIST_HEAD(invalid_list); | 4889 | LIST_HEAD(invalid_list); |
| 4820 | 4890 | ||
| 4821 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) | 4891 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) |
| 4822 | return; | 4892 | return 0; |
| 4823 | 4893 | ||
| 4824 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { | 4894 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { |
| 4825 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) | 4895 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) |
| @@ -4828,6 +4898,10 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu) | |||
| 4828 | ++vcpu->kvm->stat.mmu_recycled; | 4898 | ++vcpu->kvm->stat.mmu_recycled; |
| 4829 | } | 4899 | } |
| 4830 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4900 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
| 4901 | |||
| 4902 | if (!kvm_mmu_available_pages(vcpu->kvm)) | ||
| 4903 | return -ENOSPC; | ||
| 4904 | return 0; | ||
| 4831 | } | 4905 | } |
| 4832 | 4906 | ||
| 4833 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | 4907 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, |
| @@ -4835,7 +4909,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
| 4835 | { | 4909 | { |
| 4836 | int r, emulation_type = EMULTYPE_RETRY; | 4910 | int r, emulation_type = EMULTYPE_RETRY; |
| 4837 | enum emulation_result er; | 4911 | enum emulation_result er; |
| 4838 | bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu); | 4912 | bool direct = vcpu->arch.mmu.direct_map; |
| 4913 | |||
| 4914 | /* With shadow page tables, fault_address contains a GVA or nGPA. */ | ||
| 4915 | if (vcpu->arch.mmu.direct_map) { | ||
| 4916 | vcpu->arch.gpa_available = true; | ||
| 4917 | vcpu->arch.gpa_val = cr2; | ||
| 4918 | } | ||
| 4839 | 4919 | ||
| 4840 | if (unlikely(error_code & PFERR_RSVD_MASK)) { | 4920 | if (unlikely(error_code & PFERR_RSVD_MASK)) { |
| 4841 | r = handle_mmio_page_fault(vcpu, cr2, direct); | 4921 | r = handle_mmio_page_fault(vcpu, cr2, direct); |
| @@ -4847,6 +4927,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
| 4847 | return 1; | 4927 | return 1; |
| 4848 | if (r < 0) | 4928 | if (r < 0) |
| 4849 | return r; | 4929 | return r; |
| 4930 | /* Must be RET_MMIO_PF_INVALID. */ | ||
| 4850 | } | 4931 | } |
| 4851 | 4932 | ||
| 4852 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), | 4933 | r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), |
| @@ -4862,11 +4943,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, | |||
| 4862 | * This can occur when using nested virtualization with nested | 4943 | * This can occur when using nested virtualization with nested |
| 4863 | * paging in both guests. If true, we simply unprotect the page | 4944 | * paging in both guests. If true, we simply unprotect the page |
| 4864 | * and resume the guest. | 4945 | * and resume the guest. |
| 4865 | * | ||
| 4866 | * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used | ||
| 4867 | * in PFERR_NEXT_GUEST_PAGE) | ||
| 4868 | */ | 4946 | */ |
| 4869 | if (error_code == PFERR_NESTED_GUEST_PAGE) { | 4947 | if (vcpu->arch.mmu.direct_map && |
| 4948 | (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { | ||
| 4870 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); | 4949 | kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); |
| 4871 | return 1; | 4950 | return 1; |
| 4872 | } | 4951 | } |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 4b9a3ae6b725..64a2dbd2b1af 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
| @@ -37,7 +37,8 @@ | |||
| 37 | #define PT32_DIR_PSE36_MASK \ | 37 | #define PT32_DIR_PSE36_MASK \ |
| 38 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) | 38 | (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT) |
| 39 | 39 | ||
| 40 | #define PT64_ROOT_LEVEL 4 | 40 | #define PT64_ROOT_5LEVEL 5 |
| 41 | #define PT64_ROOT_4LEVEL 4 | ||
| 41 | #define PT32_ROOT_LEVEL 2 | 42 | #define PT32_ROOT_LEVEL 2 |
| 42 | #define PT32E_ROOT_LEVEL 3 | 43 | #define PT32E_ROOT_LEVEL 3 |
| 43 | 44 | ||
| @@ -48,6 +49,9 @@ | |||
| 48 | 49 | ||
| 49 | static inline u64 rsvd_bits(int s, int e) | 50 | static inline u64 rsvd_bits(int s, int e) |
| 50 | { | 51 | { |
| 52 | if (e < s) | ||
| 53 | return 0; | ||
| 54 | |||
| 51 | return ((1ULL << (e - s + 1)) - 1) << s; | 55 | return ((1ULL << (e - s + 1)) - 1) << s; |
| 52 | } | 56 | } |
| 53 | 57 | ||
| @@ -56,23 +60,6 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value); | |||
| 56 | void | 60 | void |
| 57 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | 61 | reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); |
| 58 | 62 | ||
| 59 | /* | ||
| 60 | * Return values of handle_mmio_page_fault: | ||
| 61 | * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction | ||
| 62 | * directly. | ||
| 63 | * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page | ||
| 64 | * fault path update the mmio spte. | ||
| 65 | * RET_MMIO_PF_RETRY: let CPU fault again on the address. | ||
| 66 | * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). | ||
| 67 | */ | ||
| 68 | enum { | ||
| 69 | RET_MMIO_PF_EMULATE = 1, | ||
| 70 | RET_MMIO_PF_INVALID = 2, | ||
| 71 | RET_MMIO_PF_RETRY = 0, | ||
| 72 | RET_MMIO_PF_BUG = -1 | ||
| 73 | }; | ||
| 74 | |||
| 75 | int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); | ||
| 76 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); | 63 | void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); |
| 77 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, | 64 | void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, |
| 78 | bool accessed_dirty); | 65 | bool accessed_dirty); |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index dcce533d420c..d22ddbdf5e6e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
| @@ -62,11 +62,11 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) | |||
| 62 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | 62 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) |
| 63 | return; | 63 | return; |
| 64 | 64 | ||
| 65 | if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { | 65 | if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { |
| 66 | hpa_t root = vcpu->arch.mmu.root_hpa; | 66 | hpa_t root = vcpu->arch.mmu.root_hpa; |
| 67 | 67 | ||
| 68 | sp = page_header(root); | 68 | sp = page_header(root); |
| 69 | __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL); | 69 | __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level); |
| 70 | return; | 70 | return; |
| 71 | } | 71 | } |
| 72 | 72 | ||
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 0149ac59c273..e9ea2d45ae66 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c | |||
| @@ -130,7 +130,7 @@ static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu) | |||
| 130 | * enable MTRRs and it is obviously undesirable to run the | 130 | * enable MTRRs and it is obviously undesirable to run the |
| 131 | * guest entirely with UC memory and we use WB. | 131 | * guest entirely with UC memory and we use WB. |
| 132 | */ | 132 | */ |
| 133 | if (guest_cpuid_has_mtrr(vcpu)) | 133 | if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR)) |
| 134 | return MTRR_TYPE_UNCACHABLE; | 134 | return MTRR_TYPE_UNCACHABLE; |
| 135 | else | 135 | else |
| 136 | return MTRR_TYPE_WRBACK; | 136 | return MTRR_TYPE_WRBACK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b0454c7e4cff..86b68dc5a649 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -790,8 +790,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 790 | &map_writable)) | 790 | &map_writable)) |
| 791 | return 0; | 791 | return 0; |
| 792 | 792 | ||
| 793 | if (handle_abnormal_pfn(vcpu, mmu_is_nested(vcpu) ? 0 : addr, | 793 | if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r)) |
| 794 | walker.gfn, pfn, walker.pte_access, &r)) | ||
| 795 | return r; | 794 | return r; |
| 796 | 795 | ||
| 797 | /* | 796 | /* |
| @@ -819,7 +818,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
| 819 | goto out_unlock; | 818 | goto out_unlock; |
| 820 | 819 | ||
| 821 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 820 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
| 822 | make_mmu_pages_available(vcpu); | 821 | if (make_mmu_pages_available(vcpu) < 0) |
| 822 | goto out_unlock; | ||
| 823 | if (!force_pt_level) | 823 | if (!force_pt_level) |
| 824 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 824 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
| 825 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 825 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8dbd8dbc83eb..2c1cfe68a9af 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -280,9 +280,9 @@ module_param(avic, int, S_IRUGO); | |||
| 280 | static int vls = true; | 280 | static int vls = true; |
| 281 | module_param(vls, int, 0444); | 281 | module_param(vls, int, 0444); |
| 282 | 282 | ||
| 283 | /* AVIC VM ID bit masks and lock */ | 283 | /* enable/disable Virtual GIF */ |
| 284 | static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); | 284 | static int vgif = true; |
| 285 | static DEFINE_SPINLOCK(avic_vm_id_lock); | 285 | module_param(vgif, int, 0444); |
| 286 | 286 | ||
| 287 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); | 287 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
| 288 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 288 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
| @@ -479,19 +479,33 @@ static inline void clr_intercept(struct vcpu_svm *svm, int bit) | |||
| 479 | recalc_intercepts(svm); | 479 | recalc_intercepts(svm); |
| 480 | } | 480 | } |
| 481 | 481 | ||
| 482 | static inline bool vgif_enabled(struct vcpu_svm *svm) | ||
| 483 | { | ||
| 484 | return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK); | ||
| 485 | } | ||
| 486 | |||
| 482 | static inline void enable_gif(struct vcpu_svm *svm) | 487 | static inline void enable_gif(struct vcpu_svm *svm) |
| 483 | { | 488 | { |
| 484 | svm->vcpu.arch.hflags |= HF_GIF_MASK; | 489 | if (vgif_enabled(svm)) |
| 490 | svm->vmcb->control.int_ctl |= V_GIF_MASK; | ||
| 491 | else | ||
| 492 | svm->vcpu.arch.hflags |= HF_GIF_MASK; | ||
| 485 | } | 493 | } |
| 486 | 494 | ||
| 487 | static inline void disable_gif(struct vcpu_svm *svm) | 495 | static inline void disable_gif(struct vcpu_svm *svm) |
| 488 | { | 496 | { |
| 489 | svm->vcpu.arch.hflags &= ~HF_GIF_MASK; | 497 | if (vgif_enabled(svm)) |
| 498 | svm->vmcb->control.int_ctl &= ~V_GIF_MASK; | ||
| 499 | else | ||
| 500 | svm->vcpu.arch.hflags &= ~HF_GIF_MASK; | ||
| 490 | } | 501 | } |
| 491 | 502 | ||
| 492 | static inline bool gif_set(struct vcpu_svm *svm) | 503 | static inline bool gif_set(struct vcpu_svm *svm) |
| 493 | { | 504 | { |
| 494 | return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); | 505 | if (vgif_enabled(svm)) |
| 506 | return !!(svm->vmcb->control.int_ctl & V_GIF_MASK); | ||
| 507 | else | ||
| 508 | return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
| 495 | } | 509 | } |
| 496 | 510 | ||
| 497 | static unsigned long iopm_base; | 511 | static unsigned long iopm_base; |
| @@ -567,10 +581,10 @@ static inline void invlpga(unsigned long addr, u32 asid) | |||
| 567 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); | 581 | asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); |
| 568 | } | 582 | } |
| 569 | 583 | ||
| 570 | static int get_npt_level(void) | 584 | static int get_npt_level(struct kvm_vcpu *vcpu) |
| 571 | { | 585 | { |
| 572 | #ifdef CONFIG_X86_64 | 586 | #ifdef CONFIG_X86_64 |
| 573 | return PT64_ROOT_LEVEL; | 587 | return PT64_ROOT_4LEVEL; |
| 574 | #else | 588 | #else |
| 575 | return PT32E_ROOT_LEVEL; | 589 | return PT32E_ROOT_LEVEL; |
| 576 | #endif | 590 | #endif |
| @@ -641,7 +655,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) | |||
| 641 | struct vcpu_svm *svm = to_svm(vcpu); | 655 | struct vcpu_svm *svm = to_svm(vcpu); |
| 642 | unsigned nr = vcpu->arch.exception.nr; | 656 | unsigned nr = vcpu->arch.exception.nr; |
| 643 | bool has_error_code = vcpu->arch.exception.has_error_code; | 657 | bool has_error_code = vcpu->arch.exception.has_error_code; |
| 644 | bool reinject = vcpu->arch.exception.reinject; | 658 | bool reinject = vcpu->arch.exception.injected; |
| 645 | u32 error_code = vcpu->arch.exception.error_code; | 659 | u32 error_code = vcpu->arch.exception.error_code; |
| 646 | 660 | ||
| 647 | /* | 661 | /* |
| @@ -973,6 +987,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) | |||
| 973 | static void disable_nmi_singlestep(struct vcpu_svm *svm) | 987 | static void disable_nmi_singlestep(struct vcpu_svm *svm) |
| 974 | { | 988 | { |
| 975 | svm->nmi_singlestep = false; | 989 | svm->nmi_singlestep = false; |
| 990 | |||
| 976 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { | 991 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) { |
| 977 | /* Clear our flags if they were not set by the guest */ | 992 | /* Clear our flags if they were not set by the guest */ |
| 978 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) | 993 | if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF)) |
| @@ -989,6 +1004,8 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) | |||
| 989 | */ | 1004 | */ |
| 990 | #define SVM_VM_DATA_HASH_BITS 8 | 1005 | #define SVM_VM_DATA_HASH_BITS 8 |
| 991 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); | 1006 | static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); |
| 1007 | static u32 next_vm_id = 0; | ||
| 1008 | static bool next_vm_id_wrapped = 0; | ||
| 992 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); | 1009 | static DEFINE_SPINLOCK(svm_vm_data_hash_lock); |
| 993 | 1010 | ||
| 994 | /* Note: | 1011 | /* Note: |
| @@ -1108,6 +1125,13 @@ static __init int svm_hardware_setup(void) | |||
| 1108 | } | 1125 | } |
| 1109 | } | 1126 | } |
| 1110 | 1127 | ||
| 1128 | if (vgif) { | ||
| 1129 | if (!boot_cpu_has(X86_FEATURE_VGIF)) | ||
| 1130 | vgif = false; | ||
| 1131 | else | ||
| 1132 | pr_info("Virtual GIF supported\n"); | ||
| 1133 | } | ||
| 1134 | |||
| 1111 | return 0; | 1135 | return 0; |
| 1112 | 1136 | ||
| 1113 | err: | 1137 | err: |
| @@ -1305,6 +1329,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1305 | svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; | 1329 | svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; |
| 1306 | } | 1330 | } |
| 1307 | 1331 | ||
| 1332 | if (vgif) { | ||
| 1333 | clr_intercept(svm, INTERCEPT_STGI); | ||
| 1334 | clr_intercept(svm, INTERCEPT_CLGI); | ||
| 1335 | svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK; | ||
| 1336 | } | ||
| 1337 | |||
| 1308 | mark_all_dirty(svm->vmcb); | 1338 | mark_all_dirty(svm->vmcb); |
| 1309 | 1339 | ||
| 1310 | enable_gif(svm); | 1340 | enable_gif(svm); |
| @@ -1387,34 +1417,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) | |||
| 1387 | return 0; | 1417 | return 0; |
| 1388 | } | 1418 | } |
| 1389 | 1419 | ||
| 1390 | static inline int avic_get_next_vm_id(void) | ||
| 1391 | { | ||
| 1392 | int id; | ||
| 1393 | |||
| 1394 | spin_lock(&avic_vm_id_lock); | ||
| 1395 | |||
| 1396 | /* AVIC VM ID is one-based. */ | ||
| 1397 | id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1); | ||
| 1398 | if (id <= AVIC_VM_ID_MASK) | ||
| 1399 | __set_bit(id, avic_vm_id_bitmap); | ||
| 1400 | else | ||
| 1401 | id = -EAGAIN; | ||
| 1402 | |||
| 1403 | spin_unlock(&avic_vm_id_lock); | ||
| 1404 | return id; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | static inline int avic_free_vm_id(int id) | ||
| 1408 | { | ||
| 1409 | if (id <= 0 || id > AVIC_VM_ID_MASK) | ||
| 1410 | return -EINVAL; | ||
| 1411 | |||
| 1412 | spin_lock(&avic_vm_id_lock); | ||
| 1413 | __clear_bit(id, avic_vm_id_bitmap); | ||
| 1414 | spin_unlock(&avic_vm_id_lock); | ||
| 1415 | return 0; | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | static void avic_vm_destroy(struct kvm *kvm) | 1420 | static void avic_vm_destroy(struct kvm *kvm) |
| 1419 | { | 1421 | { |
| 1420 | unsigned long flags; | 1422 | unsigned long flags; |
| @@ -1423,8 +1425,6 @@ static void avic_vm_destroy(struct kvm *kvm) | |||
| 1423 | if (!avic) | 1425 | if (!avic) |
| 1424 | return; | 1426 | return; |
| 1425 | 1427 | ||
| 1426 | avic_free_vm_id(vm_data->avic_vm_id); | ||
| 1427 | |||
| 1428 | if (vm_data->avic_logical_id_table_page) | 1428 | if (vm_data->avic_logical_id_table_page) |
| 1429 | __free_page(vm_data->avic_logical_id_table_page); | 1429 | __free_page(vm_data->avic_logical_id_table_page); |
| 1430 | if (vm_data->avic_physical_id_table_page) | 1430 | if (vm_data->avic_physical_id_table_page) |
| @@ -1438,19 +1438,16 @@ static void avic_vm_destroy(struct kvm *kvm) | |||
| 1438 | static int avic_vm_init(struct kvm *kvm) | 1438 | static int avic_vm_init(struct kvm *kvm) |
| 1439 | { | 1439 | { |
| 1440 | unsigned long flags; | 1440 | unsigned long flags; |
| 1441 | int vm_id, err = -ENOMEM; | 1441 | int err = -ENOMEM; |
| 1442 | struct kvm_arch *vm_data = &kvm->arch; | 1442 | struct kvm_arch *vm_data = &kvm->arch; |
| 1443 | struct page *p_page; | 1443 | struct page *p_page; |
| 1444 | struct page *l_page; | 1444 | struct page *l_page; |
| 1445 | struct kvm_arch *ka; | ||
| 1446 | u32 vm_id; | ||
| 1445 | 1447 | ||
| 1446 | if (!avic) | 1448 | if (!avic) |
| 1447 | return 0; | 1449 | return 0; |
| 1448 | 1450 | ||
| 1449 | vm_id = avic_get_next_vm_id(); | ||
| 1450 | if (vm_id < 0) | ||
| 1451 | return vm_id; | ||
| 1452 | vm_data->avic_vm_id = (u32)vm_id; | ||
| 1453 | |||
| 1454 | /* Allocating physical APIC ID table (4KB) */ | 1451 | /* Allocating physical APIC ID table (4KB) */ |
| 1455 | p_page = alloc_page(GFP_KERNEL); | 1452 | p_page = alloc_page(GFP_KERNEL); |
| 1456 | if (!p_page) | 1453 | if (!p_page) |
| @@ -1468,6 +1465,22 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1468 | clear_page(page_address(l_page)); | 1465 | clear_page(page_address(l_page)); |
| 1469 | 1466 | ||
| 1470 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | 1467 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); |
| 1468 | again: | ||
| 1469 | vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK; | ||
| 1470 | if (vm_id == 0) { /* id is 1-based, zero is not okay */ | ||
| 1471 | next_vm_id_wrapped = 1; | ||
| 1472 | goto again; | ||
| 1473 | } | ||
| 1474 | /* Is it still in use? Only possible if wrapped at least once */ | ||
| 1475 | if (next_vm_id_wrapped) { | ||
| 1476 | hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { | ||
| 1477 | struct kvm *k2 = container_of(ka, struct kvm, arch); | ||
| 1478 | struct kvm_arch *vd2 = &k2->arch; | ||
| 1479 | if (vd2->avic_vm_id == vm_id) | ||
| 1480 | goto again; | ||
| 1481 | } | ||
| 1482 | } | ||
| 1483 | vm_data->avic_vm_id = vm_id; | ||
| 1471 | hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); | 1484 | hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); |
| 1472 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | 1485 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); |
| 1473 | 1486 | ||
| @@ -1580,7 +1593,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
| 1580 | } | 1593 | } |
| 1581 | init_vmcb(svm); | 1594 | init_vmcb(svm); |
| 1582 | 1595 | ||
| 1583 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1596 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); |
| 1584 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1597 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
| 1585 | 1598 | ||
| 1586 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) | 1599 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) |
| @@ -2384,7 +2397,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |||
| 2384 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; | 2397 | vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; |
| 2385 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; | 2398 | vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; |
| 2386 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; | 2399 | vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; |
| 2387 | vcpu->arch.mmu.shadow_root_level = get_npt_level(); | 2400 | vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); |
| 2388 | reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); | 2401 | reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); |
| 2389 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; | 2402 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
| 2390 | } | 2403 | } |
| @@ -3147,6 +3160,13 @@ static int stgi_interception(struct vcpu_svm *svm) | |||
| 3147 | if (nested_svm_check_permissions(svm)) | 3160 | if (nested_svm_check_permissions(svm)) |
| 3148 | return 1; | 3161 | return 1; |
| 3149 | 3162 | ||
| 3163 | /* | ||
| 3164 | * If VGIF is enabled, the STGI intercept is only added to | ||
| 3165 | * detect the opening of the NMI window; remove it now. | ||
| 3166 | */ | ||
| 3167 | if (vgif_enabled(svm)) | ||
| 3168 | clr_intercept(svm, INTERCEPT_STGI); | ||
| 3169 | |||
| 3150 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3170 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 3151 | ret = kvm_skip_emulated_instruction(&svm->vcpu); | 3171 | ret = kvm_skip_emulated_instruction(&svm->vcpu); |
| 3152 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3172 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
| @@ -3744,7 +3764,10 @@ static int interrupt_window_interception(struct vcpu_svm *svm) | |||
| 3744 | 3764 | ||
| 3745 | static int pause_interception(struct vcpu_svm *svm) | 3765 | static int pause_interception(struct vcpu_svm *svm) |
| 3746 | { | 3766 | { |
| 3747 | kvm_vcpu_on_spin(&(svm->vcpu)); | 3767 | struct kvm_vcpu *vcpu = &svm->vcpu; |
| 3768 | bool in_kernel = (svm_get_cpl(vcpu) == 0); | ||
| 3769 | |||
| 3770 | kvm_vcpu_on_spin(vcpu, in_kernel); | ||
| 3748 | return 1; | 3771 | return 1; |
| 3749 | } | 3772 | } |
| 3750 | 3773 | ||
| @@ -4228,8 +4251,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
| 4228 | 4251 | ||
| 4229 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); | 4252 | trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); |
| 4230 | 4253 | ||
| 4231 | vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); | ||
| 4232 | |||
| 4233 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) | 4254 | if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) |
| 4234 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 4255 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
| 4235 | if (npt_enabled) | 4256 | if (npt_enabled) |
| @@ -4682,9 +4703,11 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
| 4682 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes | 4703 | * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes |
| 4683 | * 1, because that's a separate STGI/VMRUN intercept. The next time we | 4704 | * 1, because that's a separate STGI/VMRUN intercept. The next time we |
| 4684 | * get that intercept, this function will be called again though and | 4705 | * get that intercept, this function will be called again though and |
| 4685 | * we'll get the vintr intercept. | 4706 | * we'll get the vintr intercept. However, if the vGIF feature is |
| 4707 | * enabled, the STGI interception will not occur. Enable the irq | ||
| 4708 | * window under the assumption that the hardware will set the GIF. | ||
| 4686 | */ | 4709 | */ |
| 4687 | if (gif_set(svm) && nested_svm_intr(svm)) { | 4710 | if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) { |
| 4688 | svm_set_vintr(svm); | 4711 | svm_set_vintr(svm); |
| 4689 | svm_inject_irq(svm, 0x0); | 4712 | svm_inject_irq(svm, 0x0); |
| 4690 | } | 4713 | } |
| @@ -4698,8 +4721,11 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 4698 | == HF_NMI_MASK) | 4721 | == HF_NMI_MASK) |
| 4699 | return; /* IRET will cause a vm exit */ | 4722 | return; /* IRET will cause a vm exit */ |
| 4700 | 4723 | ||
| 4701 | if ((svm->vcpu.arch.hflags & HF_GIF_MASK) == 0) | 4724 | if (!gif_set(svm)) { |
| 4725 | if (vgif_enabled(svm)) | ||
| 4726 | set_intercept(svm, INTERCEPT_STGI); | ||
| 4702 | return; /* STGI will cause a vm exit */ | 4727 | return; /* STGI will cause a vm exit */ |
| 4728 | } | ||
| 4703 | 4729 | ||
| 4704 | if (svm->nested.exit_required) | 4730 | if (svm->nested.exit_required) |
| 4705 | return; /* we're not going to run the guest yet */ | 4731 | return; /* we're not going to run the guest yet */ |
| @@ -5071,17 +5097,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
| 5071 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | 5097 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) |
| 5072 | { | 5098 | { |
| 5073 | struct vcpu_svm *svm = to_svm(vcpu); | 5099 | struct vcpu_svm *svm = to_svm(vcpu); |
| 5074 | struct kvm_cpuid_entry2 *entry; | ||
| 5075 | 5100 | ||
| 5076 | /* Update nrips enabled cache */ | 5101 | /* Update nrips enabled cache */ |
| 5077 | svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); | 5102 | svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS); |
| 5078 | 5103 | ||
| 5079 | if (!kvm_vcpu_apicv_active(vcpu)) | 5104 | if (!kvm_vcpu_apicv_active(vcpu)) |
| 5080 | return; | 5105 | return; |
| 5081 | 5106 | ||
| 5082 | entry = kvm_find_cpuid_entry(vcpu, 1, 0); | 5107 | guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC); |
| 5083 | if (entry) | ||
| 5084 | entry->ecx &= ~bit(X86_FEATURE_X2APIC); | ||
| 5085 | } | 5108 | } |
| 5086 | 5109 | ||
| 5087 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | 5110 | static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0a6cc6754ec5..8a202c49e2a0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
| @@ -151,8 +151,8 @@ TRACE_EVENT(kvm_fast_mmio, | |||
| 151 | */ | 151 | */ |
| 152 | TRACE_EVENT(kvm_cpuid, | 152 | TRACE_EVENT(kvm_cpuid, |
| 153 | TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, | 153 | TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx, |
| 154 | unsigned long rcx, unsigned long rdx), | 154 | unsigned long rcx, unsigned long rdx, bool found), |
| 155 | TP_ARGS(function, rax, rbx, rcx, rdx), | 155 | TP_ARGS(function, rax, rbx, rcx, rdx, found), |
| 156 | 156 | ||
| 157 | TP_STRUCT__entry( | 157 | TP_STRUCT__entry( |
| 158 | __field( unsigned int, function ) | 158 | __field( unsigned int, function ) |
| @@ -160,6 +160,7 @@ TRACE_EVENT(kvm_cpuid, | |||
| 160 | __field( unsigned long, rbx ) | 160 | __field( unsigned long, rbx ) |
| 161 | __field( unsigned long, rcx ) | 161 | __field( unsigned long, rcx ) |
| 162 | __field( unsigned long, rdx ) | 162 | __field( unsigned long, rdx ) |
| 163 | __field( bool, found ) | ||
| 163 | ), | 164 | ), |
| 164 | 165 | ||
| 165 | TP_fast_assign( | 166 | TP_fast_assign( |
| @@ -168,11 +169,13 @@ TRACE_EVENT(kvm_cpuid, | |||
| 168 | __entry->rbx = rbx; | 169 | __entry->rbx = rbx; |
| 169 | __entry->rcx = rcx; | 170 | __entry->rcx = rcx; |
| 170 | __entry->rdx = rdx; | 171 | __entry->rdx = rdx; |
| 172 | __entry->found = found; | ||
| 171 | ), | 173 | ), |
| 172 | 174 | ||
| 173 | TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx", | 175 | TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s", |
| 174 | __entry->function, __entry->rax, | 176 | __entry->function, __entry->rax, |
| 175 | __entry->rbx, __entry->rcx, __entry->rdx) | 177 | __entry->rbx, __entry->rcx, __entry->rdx, |
| 178 | __entry->found ? "found" : "not found") | ||
| 176 | ); | 179 | ); |
| 177 | 180 | ||
| 178 | #define AREG(x) { APIC_##x, "APIC_" #x } | 181 | #define AREG(x) { APIC_##x, "APIC_" #x } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70b90c0810d0..4253adef9044 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -122,7 +122,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); | |||
| 122 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 122 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) |
| 123 | #define KVM_CR4_GUEST_OWNED_BITS \ | 123 | #define KVM_CR4_GUEST_OWNED_BITS \ |
| 124 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 124 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
| 125 | | X86_CR4_OSXMMEXCPT | X86_CR4_TSD) | 125 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) |
| 126 | 126 | ||
| 127 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | 127 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
| 128 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | 128 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
| @@ -243,11 +243,13 @@ struct __packed vmcs12 { | |||
| 243 | u64 virtual_apic_page_addr; | 243 | u64 virtual_apic_page_addr; |
| 244 | u64 apic_access_addr; | 244 | u64 apic_access_addr; |
| 245 | u64 posted_intr_desc_addr; | 245 | u64 posted_intr_desc_addr; |
| 246 | u64 vm_function_control; | ||
| 246 | u64 ept_pointer; | 247 | u64 ept_pointer; |
| 247 | u64 eoi_exit_bitmap0; | 248 | u64 eoi_exit_bitmap0; |
| 248 | u64 eoi_exit_bitmap1; | 249 | u64 eoi_exit_bitmap1; |
| 249 | u64 eoi_exit_bitmap2; | 250 | u64 eoi_exit_bitmap2; |
| 250 | u64 eoi_exit_bitmap3; | 251 | u64 eoi_exit_bitmap3; |
| 252 | u64 eptp_list_address; | ||
| 251 | u64 xss_exit_bitmap; | 253 | u64 xss_exit_bitmap; |
| 252 | u64 guest_physical_address; | 254 | u64 guest_physical_address; |
| 253 | u64 vmcs_link_pointer; | 255 | u64 vmcs_link_pointer; |
| @@ -481,6 +483,7 @@ struct nested_vmx { | |||
| 481 | u64 nested_vmx_cr4_fixed0; | 483 | u64 nested_vmx_cr4_fixed0; |
| 482 | u64 nested_vmx_cr4_fixed1; | 484 | u64 nested_vmx_cr4_fixed1; |
| 483 | u64 nested_vmx_vmcs_enum; | 485 | u64 nested_vmx_vmcs_enum; |
| 486 | u64 nested_vmx_vmfunc_controls; | ||
| 484 | }; | 487 | }; |
| 485 | 488 | ||
| 486 | #define POSTED_INTR_ON 0 | 489 | #define POSTED_INTR_ON 0 |
| @@ -573,6 +576,8 @@ struct vcpu_vmx { | |||
| 573 | #endif | 576 | #endif |
| 574 | u32 vm_entry_controls_shadow; | 577 | u32 vm_entry_controls_shadow; |
| 575 | u32 vm_exit_controls_shadow; | 578 | u32 vm_exit_controls_shadow; |
| 579 | u32 secondary_exec_control; | ||
| 580 | |||
| 576 | /* | 581 | /* |
| 577 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 582 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
| 578 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 583 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
| @@ -761,11 +766,13 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
| 761 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), | 766 | FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr), |
| 762 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), | 767 | FIELD64(APIC_ACCESS_ADDR, apic_access_addr), |
| 763 | FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), | 768 | FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr), |
| 769 | FIELD64(VM_FUNCTION_CONTROL, vm_function_control), | ||
| 764 | FIELD64(EPT_POINTER, ept_pointer), | 770 | FIELD64(EPT_POINTER, ept_pointer), |
| 765 | FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), | 771 | FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0), |
| 766 | FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), | 772 | FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1), |
| 767 | FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), | 773 | FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2), |
| 768 | FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), | 774 | FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3), |
| 775 | FIELD64(EPTP_LIST_ADDRESS, eptp_list_address), | ||
| 769 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), | 776 | FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), |
| 770 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), | 777 | FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), |
| 771 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), | 778 | FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), |
| @@ -889,25 +896,6 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | |||
| 889 | return to_vmx(vcpu)->nested.cached_vmcs12; | 896 | return to_vmx(vcpu)->nested.cached_vmcs12; |
| 890 | } | 897 | } |
| 891 | 898 | ||
| 892 | static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) | ||
| 893 | { | ||
| 894 | struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT); | ||
| 895 | if (is_error_page(page)) | ||
| 896 | return NULL; | ||
| 897 | |||
| 898 | return page; | ||
| 899 | } | ||
| 900 | |||
| 901 | static void nested_release_page(struct page *page) | ||
| 902 | { | ||
| 903 | kvm_release_page_dirty(page); | ||
| 904 | } | ||
| 905 | |||
| 906 | static void nested_release_page_clean(struct page *page) | ||
| 907 | { | ||
| 908 | kvm_release_page_clean(page); | ||
| 909 | } | ||
| 910 | |||
| 911 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); | 899 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu); |
| 912 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | 900 | static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); |
| 913 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); | 901 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); |
| @@ -1212,6 +1200,16 @@ static inline bool cpu_has_vmx_ept_4levels(void) | |||
| 1212 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; | 1200 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT; |
| 1213 | } | 1201 | } |
| 1214 | 1202 | ||
| 1203 | static inline bool cpu_has_vmx_ept_mt_wb(void) | ||
| 1204 | { | ||
| 1205 | return vmx_capability.ept & VMX_EPTP_WB_BIT; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | static inline bool cpu_has_vmx_ept_5levels(void) | ||
| 1209 | { | ||
| 1210 | return vmx_capability.ept & VMX_EPT_PAGE_WALK_5_BIT; | ||
| 1211 | } | ||
| 1212 | |||
| 1215 | static inline bool cpu_has_vmx_ept_ad_bits(void) | 1213 | static inline bool cpu_has_vmx_ept_ad_bits(void) |
| 1216 | { | 1214 | { |
| 1217 | return vmx_capability.ept & VMX_EPT_AD_BIT; | 1215 | return vmx_capability.ept & VMX_EPT_AD_BIT; |
| @@ -1317,6 +1315,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void) | |||
| 1317 | SECONDARY_EXEC_TSC_SCALING; | 1315 | SECONDARY_EXEC_TSC_SCALING; |
| 1318 | } | 1316 | } |
| 1319 | 1317 | ||
| 1318 | static inline bool cpu_has_vmx_vmfunc(void) | ||
| 1319 | { | ||
| 1320 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 1321 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
| 1322 | } | ||
| 1323 | |||
| 1320 | static inline bool report_flexpriority(void) | 1324 | static inline bool report_flexpriority(void) |
| 1321 | { | 1325 | { |
| 1322 | return flexpriority_enabled; | 1326 | return flexpriority_enabled; |
| @@ -1357,8 +1361,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | |||
| 1357 | 1361 | ||
| 1358 | static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) | 1362 | static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12) |
| 1359 | { | 1363 | { |
| 1360 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) && | 1364 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); |
| 1361 | vmx_xsaves_supported(); | ||
| 1362 | } | 1365 | } |
| 1363 | 1366 | ||
| 1364 | static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) | 1367 | static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12) |
| @@ -1391,6 +1394,18 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) | |||
| 1391 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; | 1394 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; |
| 1392 | } | 1395 | } |
| 1393 | 1396 | ||
| 1397 | static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12) | ||
| 1398 | { | ||
| 1399 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) | ||
| 1403 | { | ||
| 1404 | return nested_cpu_has_vmfunc(vmcs12) && | ||
| 1405 | (vmcs12->vm_function_control & | ||
| 1406 | VMX_VMFUNC_EPTP_SWITCHING); | ||
| 1407 | } | ||
| 1408 | |||
| 1394 | static inline bool is_nmi(u32 intr_info) | 1409 | static inline bool is_nmi(u32 intr_info) |
| 1395 | { | 1410 | { |
| 1396 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1411 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
| @@ -2450,15 +2465,14 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, | |||
| 2450 | * KVM wants to inject page-faults which it got to the guest. This function | 2465 | * KVM wants to inject page-faults which it got to the guest. This function |
| 2451 | * checks whether in a nested guest, we need to inject them to L1 or L2. | 2466 | * checks whether in a nested guest, we need to inject them to L1 or L2. |
| 2452 | */ | 2467 | */ |
| 2453 | static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) | 2468 | static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual) |
| 2454 | { | 2469 | { |
| 2455 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 2470 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 2456 | unsigned int nr = vcpu->arch.exception.nr; | 2471 | unsigned int nr = vcpu->arch.exception.nr; |
| 2457 | 2472 | ||
| 2458 | if (nr == PF_VECTOR) { | 2473 | if (nr == PF_VECTOR) { |
| 2459 | if (vcpu->arch.exception.nested_apf) { | 2474 | if (vcpu->arch.exception.nested_apf) { |
| 2460 | nested_vmx_inject_exception_vmexit(vcpu, | 2475 | *exit_qual = vcpu->arch.apf.nested_apf_token; |
| 2461 | vcpu->arch.apf.nested_apf_token); | ||
| 2462 | return 1; | 2476 | return 1; |
| 2463 | } | 2477 | } |
| 2464 | /* | 2478 | /* |
| @@ -2472,16 +2486,15 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) | |||
| 2472 | */ | 2486 | */ |
| 2473 | if (nested_vmx_is_page_fault_vmexit(vmcs12, | 2487 | if (nested_vmx_is_page_fault_vmexit(vmcs12, |
| 2474 | vcpu->arch.exception.error_code)) { | 2488 | vcpu->arch.exception.error_code)) { |
| 2475 | nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); | 2489 | *exit_qual = vcpu->arch.cr2; |
| 2476 | return 1; | 2490 | return 1; |
| 2477 | } | 2491 | } |
| 2478 | } else { | 2492 | } else { |
| 2479 | unsigned long exit_qual = 0; | ||
| 2480 | if (nr == DB_VECTOR) | ||
| 2481 | exit_qual = vcpu->arch.dr6; | ||
| 2482 | |||
| 2483 | if (vmcs12->exception_bitmap & (1u << nr)) { | 2493 | if (vmcs12->exception_bitmap & (1u << nr)) { |
| 2484 | nested_vmx_inject_exception_vmexit(vcpu, exit_qual); | 2494 | if (nr == DB_VECTOR) |
| 2495 | *exit_qual = vcpu->arch.dr6; | ||
| 2496 | else | ||
| 2497 | *exit_qual = 0; | ||
| 2485 | return 1; | 2498 | return 1; |
| 2486 | } | 2499 | } |
| 2487 | } | 2500 | } |
| @@ -2494,14 +2507,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) | |||
| 2494 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2507 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2495 | unsigned nr = vcpu->arch.exception.nr; | 2508 | unsigned nr = vcpu->arch.exception.nr; |
| 2496 | bool has_error_code = vcpu->arch.exception.has_error_code; | 2509 | bool has_error_code = vcpu->arch.exception.has_error_code; |
| 2497 | bool reinject = vcpu->arch.exception.reinject; | ||
| 2498 | u32 error_code = vcpu->arch.exception.error_code; | 2510 | u32 error_code = vcpu->arch.exception.error_code; |
| 2499 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 2511 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
| 2500 | 2512 | ||
| 2501 | if (!reinject && is_guest_mode(vcpu) && | ||
| 2502 | nested_vmx_check_exception(vcpu)) | ||
| 2503 | return; | ||
| 2504 | |||
| 2505 | if (has_error_code) { | 2513 | if (has_error_code) { |
| 2506 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | 2514 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); |
| 2507 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 2515 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
| @@ -2600,7 +2608,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 2600 | if (index >= 0) | 2608 | if (index >= 0) |
| 2601 | move_msr_up(vmx, index, save_nmsrs++); | 2609 | move_msr_up(vmx, index, save_nmsrs++); |
| 2602 | index = __find_msr_index(vmx, MSR_TSC_AUX); | 2610 | index = __find_msr_index(vmx, MSR_TSC_AUX); |
| 2603 | if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) | 2611 | if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) |
| 2604 | move_msr_up(vmx, index, save_nmsrs++); | 2612 | move_msr_up(vmx, index, save_nmsrs++); |
| 2605 | /* | 2613 | /* |
| 2606 | * MSR_STAR is only needed on long mode guests, and only | 2614 | * MSR_STAR is only needed on long mode guests, and only |
| @@ -2660,12 +2668,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
| 2660 | } | 2668 | } |
| 2661 | } | 2669 | } |
| 2662 | 2670 | ||
| 2663 | static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | ||
| 2664 | { | ||
| 2665 | struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
| 2666 | return best && (best->ecx & (1 << (X86_FEATURE_VMX & 31))); | ||
| 2667 | } | ||
| 2668 | |||
| 2669 | /* | 2671 | /* |
| 2670 | * nested_vmx_allowed() checks whether a guest should be allowed to use VMX | 2672 | * nested_vmx_allowed() checks whether a guest should be allowed to use VMX |
| 2671 | * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for | 2673 | * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for |
| @@ -2674,7 +2676,7 @@ static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) | |||
| 2674 | */ | 2676 | */ |
| 2675 | static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | 2677 | static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) |
| 2676 | { | 2678 | { |
| 2677 | return nested && guest_cpuid_has_vmx(vcpu); | 2679 | return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); |
| 2678 | } | 2680 | } |
| 2679 | 2681 | ||
| 2680 | /* | 2682 | /* |
| @@ -2797,21 +2799,21 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2797 | vmx->nested.nested_vmx_procbased_ctls_low &= | 2799 | vmx->nested.nested_vmx_procbased_ctls_low &= |
| 2798 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); | 2800 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); |
| 2799 | 2801 | ||
| 2800 | /* secondary cpu-based controls */ | 2802 | /* |
| 2803 | * secondary cpu-based controls. Do not include those that | ||
| 2804 | * depend on CPUID bits, they are added later by vmx_cpuid_update. | ||
| 2805 | */ | ||
| 2801 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 2806 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
| 2802 | vmx->nested.nested_vmx_secondary_ctls_low, | 2807 | vmx->nested.nested_vmx_secondary_ctls_low, |
| 2803 | vmx->nested.nested_vmx_secondary_ctls_high); | 2808 | vmx->nested.nested_vmx_secondary_ctls_high); |
| 2804 | vmx->nested.nested_vmx_secondary_ctls_low = 0; | 2809 | vmx->nested.nested_vmx_secondary_ctls_low = 0; |
| 2805 | vmx->nested.nested_vmx_secondary_ctls_high &= | 2810 | vmx->nested.nested_vmx_secondary_ctls_high &= |
| 2806 | SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | | ||
| 2807 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 2811 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
| 2808 | SECONDARY_EXEC_RDTSCP | | ||
| 2809 | SECONDARY_EXEC_DESC | | 2812 | SECONDARY_EXEC_DESC | |
| 2810 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 2813 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
| 2811 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2814 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| 2812 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 2815 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
| 2813 | SECONDARY_EXEC_WBINVD_EXITING | | 2816 | SECONDARY_EXEC_WBINVD_EXITING; |
| 2814 | SECONDARY_EXEC_XSAVES; | ||
| 2815 | 2817 | ||
| 2816 | if (enable_ept) { | 2818 | if (enable_ept) { |
| 2817 | /* nested EPT: emulate EPT also to L1 */ | 2819 | /* nested EPT: emulate EPT also to L1 */ |
| @@ -2834,6 +2836,17 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2834 | } else | 2836 | } else |
| 2835 | vmx->nested.nested_vmx_ept_caps = 0; | 2837 | vmx->nested.nested_vmx_ept_caps = 0; |
| 2836 | 2838 | ||
| 2839 | if (cpu_has_vmx_vmfunc()) { | ||
| 2840 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 2841 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
| 2842 | /* | ||
| 2843 | * Advertise EPTP switching unconditionally | ||
| 2844 | * since we emulate it | ||
| 2845 | */ | ||
| 2846 | vmx->nested.nested_vmx_vmfunc_controls = | ||
| 2847 | VMX_VMFUNC_EPTP_SWITCHING; | ||
| 2848 | } | ||
| 2849 | |||
| 2837 | /* | 2850 | /* |
| 2838 | * Old versions of KVM use the single-context version without | 2851 | * Old versions of KVM use the single-context version without |
| 2839 | * checking for support, so declare that it is supported even | 2852 | * checking for support, so declare that it is supported even |
| @@ -3203,6 +3216,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 3203 | *pdata = vmx->nested.nested_vmx_ept_caps | | 3216 | *pdata = vmx->nested.nested_vmx_ept_caps | |
| 3204 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); | 3217 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); |
| 3205 | break; | 3218 | break; |
| 3219 | case MSR_IA32_VMX_VMFUNC: | ||
| 3220 | *pdata = vmx->nested.nested_vmx_vmfunc_controls; | ||
| 3221 | break; | ||
| 3206 | default: | 3222 | default: |
| 3207 | return 1; | 3223 | return 1; |
| 3208 | } | 3224 | } |
| @@ -3256,7 +3272,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3256 | break; | 3272 | break; |
| 3257 | case MSR_IA32_BNDCFGS: | 3273 | case MSR_IA32_BNDCFGS: |
| 3258 | if (!kvm_mpx_supported() || | 3274 | if (!kvm_mpx_supported() || |
| 3259 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | 3275 | (!msr_info->host_initiated && |
| 3276 | !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) | ||
| 3260 | return 1; | 3277 | return 1; |
| 3261 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); | 3278 | msr_info->data = vmcs_read64(GUEST_BNDCFGS); |
| 3262 | break; | 3279 | break; |
| @@ -3280,7 +3297,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3280 | msr_info->data = vcpu->arch.ia32_xss; | 3297 | msr_info->data = vcpu->arch.ia32_xss; |
| 3281 | break; | 3298 | break; |
| 3282 | case MSR_TSC_AUX: | 3299 | case MSR_TSC_AUX: |
| 3283 | if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) | 3300 | if (!msr_info->host_initiated && |
| 3301 | !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) | ||
| 3284 | return 1; | 3302 | return 1; |
| 3285 | /* Otherwise falls through */ | 3303 | /* Otherwise falls through */ |
| 3286 | default: | 3304 | default: |
| @@ -3339,9 +3357,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3339 | break; | 3357 | break; |
| 3340 | case MSR_IA32_BNDCFGS: | 3358 | case MSR_IA32_BNDCFGS: |
| 3341 | if (!kvm_mpx_supported() || | 3359 | if (!kvm_mpx_supported() || |
| 3342 | (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu))) | 3360 | (!msr_info->host_initiated && |
| 3361 | !guest_cpuid_has(vcpu, X86_FEATURE_MPX))) | ||
| 3343 | return 1; | 3362 | return 1; |
| 3344 | if (is_noncanonical_address(data & PAGE_MASK) || | 3363 | if (is_noncanonical_address(data & PAGE_MASK, vcpu) || |
| 3345 | (data & MSR_IA32_BNDCFGS_RSVD)) | 3364 | (data & MSR_IA32_BNDCFGS_RSVD)) |
| 3346 | return 1; | 3365 | return 1; |
| 3347 | vmcs_write64(GUEST_BNDCFGS, data); | 3366 | vmcs_write64(GUEST_BNDCFGS, data); |
| @@ -3402,7 +3421,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3402 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); | 3421 | clear_atomic_switch_msr(vmx, MSR_IA32_XSS); |
| 3403 | break; | 3422 | break; |
| 3404 | case MSR_TSC_AUX: | 3423 | case MSR_TSC_AUX: |
| 3405 | if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated) | 3424 | if (!msr_info->host_initiated && |
| 3425 | !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) | ||
| 3406 | return 1; | 3426 | return 1; |
| 3407 | /* Check reserved bit, higher 32 bits should be zero */ | 3427 | /* Check reserved bit, higher 32 bits should be zero */ |
| 3408 | if ((data >> 32) != 0) | 3428 | if ((data >> 32) != 0) |
| @@ -3639,8 +3659,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 3639 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 3659 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
| 3640 | SECONDARY_EXEC_SHADOW_VMCS | | 3660 | SECONDARY_EXEC_SHADOW_VMCS | |
| 3641 | SECONDARY_EXEC_XSAVES | | 3661 | SECONDARY_EXEC_XSAVES | |
| 3662 | SECONDARY_EXEC_RDSEED | | ||
| 3663 | SECONDARY_EXEC_RDRAND | | ||
| 3642 | SECONDARY_EXEC_ENABLE_PML | | 3664 | SECONDARY_EXEC_ENABLE_PML | |
| 3643 | SECONDARY_EXEC_TSC_SCALING; | 3665 | SECONDARY_EXEC_TSC_SCALING | |
| 3666 | SECONDARY_EXEC_ENABLE_VMFUNC; | ||
| 3644 | if (adjust_vmx_controls(min2, opt2, | 3667 | if (adjust_vmx_controls(min2, opt2, |
| 3645 | MSR_IA32_VMX_PROCBASED_CTLS2, | 3668 | MSR_IA32_VMX_PROCBASED_CTLS2, |
| 3646 | &_cpu_based_2nd_exec_control) < 0) | 3669 | &_cpu_based_2nd_exec_control) < 0) |
| @@ -4272,16 +4295,22 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 4272 | vmx->emulation_required = emulation_required(vcpu); | 4295 | vmx->emulation_required = emulation_required(vcpu); |
| 4273 | } | 4296 | } |
| 4274 | 4297 | ||
| 4298 | static int get_ept_level(struct kvm_vcpu *vcpu) | ||
| 4299 | { | ||
| 4300 | if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) | ||
| 4301 | return 5; | ||
| 4302 | return 4; | ||
| 4303 | } | ||
| 4304 | |||
| 4275 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) | 4305 | static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) |
| 4276 | { | 4306 | { |
| 4277 | u64 eptp; | 4307 | u64 eptp = VMX_EPTP_MT_WB; |
| 4308 | |||
| 4309 | eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4; | ||
| 4278 | 4310 | ||
| 4279 | /* TODO write the value reading from MSR */ | ||
| 4280 | eptp = VMX_EPT_DEFAULT_MT | | ||
| 4281 | VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; | ||
| 4282 | if (enable_ept_ad_bits && | 4311 | if (enable_ept_ad_bits && |
| 4283 | (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) | 4312 | (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu))) |
| 4284 | eptp |= VMX_EPT_AD_ENABLE_BIT; | 4313 | eptp |= VMX_EPTP_AD_ENABLE_BIT; |
| 4285 | eptp |= (root_hpa & PAGE_MASK); | 4314 | eptp |= (root_hpa & PAGE_MASK); |
| 4286 | 4315 | ||
| 4287 | return eptp; | 4316 | return eptp; |
| @@ -5243,10 +5272,24 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
| 5243 | return exec_control; | 5272 | return exec_control; |
| 5244 | } | 5273 | } |
| 5245 | 5274 | ||
| 5246 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 5275 | static bool vmx_rdrand_supported(void) |
| 5247 | { | 5276 | { |
| 5277 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 5278 | SECONDARY_EXEC_RDRAND; | ||
| 5279 | } | ||
| 5280 | |||
| 5281 | static bool vmx_rdseed_supported(void) | ||
| 5282 | { | ||
| 5283 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 5284 | SECONDARY_EXEC_RDSEED; | ||
| 5285 | } | ||
| 5286 | |||
| 5287 | static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | ||
| 5288 | { | ||
| 5289 | struct kvm_vcpu *vcpu = &vmx->vcpu; | ||
| 5290 | |||
| 5248 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 5291 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
| 5249 | if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) | 5292 | if (!cpu_need_virtualize_apic_accesses(vcpu)) |
| 5250 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 5293 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
| 5251 | if (vmx->vpid == 0) | 5294 | if (vmx->vpid == 0) |
| 5252 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 5295 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
| @@ -5260,7 +5303,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5260 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 5303 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 5261 | if (!ple_gap) | 5304 | if (!ple_gap) |
| 5262 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 5305 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
| 5263 | if (!kvm_vcpu_apicv_active(&vmx->vcpu)) | 5306 | if (!kvm_vcpu_apicv_active(vcpu)) |
| 5264 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 5307 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| 5265 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 5308 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
| 5266 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 5309 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
| @@ -5274,7 +5317,92 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5274 | if (!enable_pml) | 5317 | if (!enable_pml) |
| 5275 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; | 5318 | exec_control &= ~SECONDARY_EXEC_ENABLE_PML; |
| 5276 | 5319 | ||
| 5277 | return exec_control; | 5320 | if (vmx_xsaves_supported()) { |
| 5321 | /* Exposing XSAVES only when XSAVE is exposed */ | ||
| 5322 | bool xsaves_enabled = | ||
| 5323 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | ||
| 5324 | guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); | ||
| 5325 | |||
| 5326 | if (!xsaves_enabled) | ||
| 5327 | exec_control &= ~SECONDARY_EXEC_XSAVES; | ||
| 5328 | |||
| 5329 | if (nested) { | ||
| 5330 | if (xsaves_enabled) | ||
| 5331 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 5332 | SECONDARY_EXEC_XSAVES; | ||
| 5333 | else | ||
| 5334 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 5335 | ~SECONDARY_EXEC_XSAVES; | ||
| 5336 | } | ||
| 5337 | } | ||
| 5338 | |||
| 5339 | if (vmx_rdtscp_supported()) { | ||
| 5340 | bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP); | ||
| 5341 | if (!rdtscp_enabled) | ||
| 5342 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
| 5343 | |||
| 5344 | if (nested) { | ||
| 5345 | if (rdtscp_enabled) | ||
| 5346 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 5347 | SECONDARY_EXEC_RDTSCP; | ||
| 5348 | else | ||
| 5349 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 5350 | ~SECONDARY_EXEC_RDTSCP; | ||
| 5351 | } | ||
| 5352 | } | ||
| 5353 | |||
| 5354 | if (vmx_invpcid_supported()) { | ||
| 5355 | /* Exposing INVPCID only when PCID is exposed */ | ||
| 5356 | bool invpcid_enabled = | ||
| 5357 | guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) && | ||
| 5358 | guest_cpuid_has(vcpu, X86_FEATURE_PCID); | ||
| 5359 | |||
| 5360 | if (!invpcid_enabled) { | ||
| 5361 | exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
| 5362 | guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID); | ||
| 5363 | } | ||
| 5364 | |||
| 5365 | if (nested) { | ||
| 5366 | if (invpcid_enabled) | ||
| 5367 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 5368 | SECONDARY_EXEC_ENABLE_INVPCID; | ||
| 5369 | else | ||
| 5370 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 5371 | ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
| 5372 | } | ||
| 5373 | } | ||
| 5374 | |||
| 5375 | if (vmx_rdrand_supported()) { | ||
| 5376 | bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND); | ||
| 5377 | if (rdrand_enabled) | ||
| 5378 | exec_control &= ~SECONDARY_EXEC_RDRAND; | ||
| 5379 | |||
| 5380 | if (nested) { | ||
| 5381 | if (rdrand_enabled) | ||
| 5382 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 5383 | SECONDARY_EXEC_RDRAND; | ||
| 5384 | else | ||
| 5385 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 5386 | ~SECONDARY_EXEC_RDRAND; | ||
| 5387 | } | ||
| 5388 | } | ||
| 5389 | |||
| 5390 | if (vmx_rdseed_supported()) { | ||
| 5391 | bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED); | ||
| 5392 | if (rdseed_enabled) | ||
| 5393 | exec_control &= ~SECONDARY_EXEC_RDSEED; | ||
| 5394 | |||
| 5395 | if (nested) { | ||
| 5396 | if (rdseed_enabled) | ||
| 5397 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 5398 | SECONDARY_EXEC_RDSEED; | ||
| 5399 | else | ||
| 5400 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 5401 | ~SECONDARY_EXEC_RDSEED; | ||
| 5402 | } | ||
| 5403 | } | ||
| 5404 | |||
| 5405 | vmx->secondary_exec_control = exec_control; | ||
| 5278 | } | 5406 | } |
| 5279 | 5407 | ||
| 5280 | static void ept_set_mmio_spte_mask(void) | 5408 | static void ept_set_mmio_spte_mask(void) |
| @@ -5318,8 +5446,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 5318 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 5446 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
| 5319 | 5447 | ||
| 5320 | if (cpu_has_secondary_exec_ctrls()) { | 5448 | if (cpu_has_secondary_exec_ctrls()) { |
| 5449 | vmx_compute_secondary_exec_control(vmx); | ||
| 5321 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | 5450 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, |
| 5322 | vmx_secondary_exec_control(vmx)); | 5451 | vmx->secondary_exec_control); |
| 5323 | } | 5452 | } |
| 5324 | 5453 | ||
| 5325 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { | 5454 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) { |
| @@ -5357,6 +5486,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 5357 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ | 5486 | vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */ |
| 5358 | #endif | 5487 | #endif |
| 5359 | 5488 | ||
| 5489 | if (cpu_has_vmx_vmfunc()) | ||
| 5490 | vmcs_write64(VM_FUNCTION_CONTROL, 0); | ||
| 5491 | |||
| 5360 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); | 5492 | vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); |
| 5361 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); | 5493 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); |
| 5362 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); | 5494 | vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host)); |
| @@ -5835,6 +5967,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu) | |||
| 5835 | static int handle_triple_fault(struct kvm_vcpu *vcpu) | 5967 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
| 5836 | { | 5968 | { |
| 5837 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 5969 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
| 5970 | vcpu->mmio_needed = 0; | ||
| 5838 | return 0; | 5971 | return 0; |
| 5839 | } | 5972 | } |
| 5840 | 5973 | ||
| @@ -6330,7 +6463,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
| 6330 | { | 6463 | { |
| 6331 | unsigned long exit_qualification; | 6464 | unsigned long exit_qualification; |
| 6332 | gpa_t gpa; | 6465 | gpa_t gpa; |
| 6333 | u32 error_code; | 6466 | u64 error_code; |
| 6334 | 6467 | ||
| 6335 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6468 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 6336 | 6469 | ||
| @@ -6362,9 +6495,10 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
| 6362 | EPT_VIOLATION_EXECUTABLE)) | 6495 | EPT_VIOLATION_EXECUTABLE)) |
| 6363 | ? PFERR_PRESENT_MASK : 0; | 6496 | ? PFERR_PRESENT_MASK : 0; |
| 6364 | 6497 | ||
| 6365 | vcpu->arch.gpa_available = true; | 6498 | error_code |= (exit_qualification & 0x100) != 0 ? |
| 6366 | vcpu->arch.exit_qualification = exit_qualification; | 6499 | PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; |
| 6367 | 6500 | ||
| 6501 | vcpu->arch.exit_qualification = exit_qualification; | ||
| 6368 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); | 6502 | return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); |
| 6369 | } | 6503 | } |
| 6370 | 6504 | ||
| @@ -6373,23 +6507,20 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
| 6373 | int ret; | 6507 | int ret; |
| 6374 | gpa_t gpa; | 6508 | gpa_t gpa; |
| 6375 | 6509 | ||
| 6510 | /* | ||
| 6511 | * A nested guest cannot optimize MMIO vmexits, because we have an | ||
| 6512 | * nGPA here instead of the required GPA. | ||
| 6513 | */ | ||
| 6376 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); | 6514 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); |
| 6377 | if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | 6515 | if (!is_guest_mode(vcpu) && |
| 6516 | !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { | ||
| 6378 | trace_kvm_fast_mmio(gpa); | 6517 | trace_kvm_fast_mmio(gpa); |
| 6379 | return kvm_skip_emulated_instruction(vcpu); | 6518 | return kvm_skip_emulated_instruction(vcpu); |
| 6380 | } | 6519 | } |
| 6381 | 6520 | ||
| 6382 | ret = handle_mmio_page_fault(vcpu, gpa, true); | 6521 | ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); |
| 6383 | vcpu->arch.gpa_available = true; | 6522 | if (ret >= 0) |
| 6384 | if (likely(ret == RET_MMIO_PF_EMULATE)) | 6523 | return ret; |
| 6385 | return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == | ||
| 6386 | EMULATE_DONE; | ||
| 6387 | |||
| 6388 | if (unlikely(ret == RET_MMIO_PF_INVALID)) | ||
| 6389 | return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0); | ||
| 6390 | |||
| 6391 | if (unlikely(ret == RET_MMIO_PF_RETRY)) | ||
| 6392 | return 1; | ||
| 6393 | 6524 | ||
| 6394 | /* It is the real ept misconfig */ | 6525 | /* It is the real ept misconfig */ |
| 6395 | WARN_ON(1); | 6526 | WARN_ON(1); |
| @@ -6611,7 +6742,8 @@ static __init int hardware_setup(void) | |||
| 6611 | init_vmcs_shadow_fields(); | 6742 | init_vmcs_shadow_fields(); |
| 6612 | 6743 | ||
| 6613 | if (!cpu_has_vmx_ept() || | 6744 | if (!cpu_has_vmx_ept() || |
| 6614 | !cpu_has_vmx_ept_4levels()) { | 6745 | !cpu_has_vmx_ept_4levels() || |
| 6746 | !cpu_has_vmx_ept_mt_wb()) { | ||
| 6615 | enable_ept = 0; | 6747 | enable_ept = 0; |
| 6616 | enable_unrestricted_guest = 0; | 6748 | enable_unrestricted_guest = 0; |
| 6617 | enable_ept_ad_bits = 0; | 6749 | enable_ept_ad_bits = 0; |
| @@ -6754,7 +6886,13 @@ static int handle_pause(struct kvm_vcpu *vcpu) | |||
| 6754 | if (ple_gap) | 6886 | if (ple_gap) |
| 6755 | grow_ple_window(vcpu); | 6887 | grow_ple_window(vcpu); |
| 6756 | 6888 | ||
| 6757 | kvm_vcpu_on_spin(vcpu); | 6889 | /* |
| 6890 | * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting" | ||
| 6891 | * VM-execution control is ignored if CPL > 0. OTOH, KVM | ||
| 6892 | * never set PAUSE_EXITING and just set PLE if supported, | ||
| 6893 | * so the vcpu must be CPL=0 if it gets a PAUSE exit. | ||
| 6894 | */ | ||
| 6895 | kvm_vcpu_on_spin(vcpu, true); | ||
| 6758 | return kvm_skip_emulated_instruction(vcpu); | 6896 | return kvm_skip_emulated_instruction(vcpu); |
| 6759 | } | 6897 | } |
| 6760 | 6898 | ||
| @@ -6769,6 +6907,12 @@ static int handle_mwait(struct kvm_vcpu *vcpu) | |||
| 6769 | return handle_nop(vcpu); | 6907 | return handle_nop(vcpu); |
| 6770 | } | 6908 | } |
| 6771 | 6909 | ||
| 6910 | static int handle_invalid_op(struct kvm_vcpu *vcpu) | ||
| 6911 | { | ||
| 6912 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 6913 | return 1; | ||
| 6914 | } | ||
| 6915 | |||
| 6772 | static int handle_monitor_trap(struct kvm_vcpu *vcpu) | 6916 | static int handle_monitor_trap(struct kvm_vcpu *vcpu) |
| 6773 | { | 6917 | { |
| 6774 | return 1; | 6918 | return 1; |
| @@ -6985,7 +7129,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, | |||
| 6985 | * non-canonical form. This is the only check on the memory | 7129 | * non-canonical form. This is the only check on the memory |
| 6986 | * destination for long mode! | 7130 | * destination for long mode! |
| 6987 | */ | 7131 | */ |
| 6988 | exn = is_noncanonical_address(*ret); | 7132 | exn = is_noncanonical_address(*ret, vcpu); |
| 6989 | } else if (is_protmode(vcpu)) { | 7133 | } else if (is_protmode(vcpu)) { |
| 6990 | /* Protected mode: apply checks for segment validity in the | 7134 | /* Protected mode: apply checks for segment validity in the |
| 6991 | * following order: | 7135 | * following order: |
| @@ -7149,19 +7293,19 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
| 7149 | return kvm_skip_emulated_instruction(vcpu); | 7293 | return kvm_skip_emulated_instruction(vcpu); |
| 7150 | } | 7294 | } |
| 7151 | 7295 | ||
| 7152 | page = nested_get_page(vcpu, vmptr); | 7296 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); |
| 7153 | if (page == NULL) { | 7297 | if (is_error_page(page)) { |
| 7154 | nested_vmx_failInvalid(vcpu); | 7298 | nested_vmx_failInvalid(vcpu); |
| 7155 | return kvm_skip_emulated_instruction(vcpu); | 7299 | return kvm_skip_emulated_instruction(vcpu); |
| 7156 | } | 7300 | } |
| 7157 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { | 7301 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { |
| 7158 | kunmap(page); | 7302 | kunmap(page); |
| 7159 | nested_release_page_clean(page); | 7303 | kvm_release_page_clean(page); |
| 7160 | nested_vmx_failInvalid(vcpu); | 7304 | nested_vmx_failInvalid(vcpu); |
| 7161 | return kvm_skip_emulated_instruction(vcpu); | 7305 | return kvm_skip_emulated_instruction(vcpu); |
| 7162 | } | 7306 | } |
| 7163 | kunmap(page); | 7307 | kunmap(page); |
| 7164 | nested_release_page_clean(page); | 7308 | kvm_release_page_clean(page); |
| 7165 | 7309 | ||
| 7166 | vmx->nested.vmxon_ptr = vmptr; | 7310 | vmx->nested.vmxon_ptr = vmptr; |
| 7167 | ret = enter_vmx_operation(vcpu); | 7311 | ret = enter_vmx_operation(vcpu); |
| @@ -7242,16 +7386,16 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
| 7242 | kfree(vmx->nested.cached_vmcs12); | 7386 | kfree(vmx->nested.cached_vmcs12); |
| 7243 | /* Unpin physical memory we referred to in current vmcs02 */ | 7387 | /* Unpin physical memory we referred to in current vmcs02 */ |
| 7244 | if (vmx->nested.apic_access_page) { | 7388 | if (vmx->nested.apic_access_page) { |
| 7245 | nested_release_page(vmx->nested.apic_access_page); | 7389 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
| 7246 | vmx->nested.apic_access_page = NULL; | 7390 | vmx->nested.apic_access_page = NULL; |
| 7247 | } | 7391 | } |
| 7248 | if (vmx->nested.virtual_apic_page) { | 7392 | if (vmx->nested.virtual_apic_page) { |
| 7249 | nested_release_page(vmx->nested.virtual_apic_page); | 7393 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
| 7250 | vmx->nested.virtual_apic_page = NULL; | 7394 | vmx->nested.virtual_apic_page = NULL; |
| 7251 | } | 7395 | } |
| 7252 | if (vmx->nested.pi_desc_page) { | 7396 | if (vmx->nested.pi_desc_page) { |
| 7253 | kunmap(vmx->nested.pi_desc_page); | 7397 | kunmap(vmx->nested.pi_desc_page); |
| 7254 | nested_release_page(vmx->nested.pi_desc_page); | 7398 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
| 7255 | vmx->nested.pi_desc_page = NULL; | 7399 | vmx->nested.pi_desc_page = NULL; |
| 7256 | vmx->nested.pi_desc = NULL; | 7400 | vmx->nested.pi_desc = NULL; |
| 7257 | } | 7401 | } |
| @@ -7618,15 +7762,15 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 7618 | if (vmx->nested.current_vmptr != vmptr) { | 7762 | if (vmx->nested.current_vmptr != vmptr) { |
| 7619 | struct vmcs12 *new_vmcs12; | 7763 | struct vmcs12 *new_vmcs12; |
| 7620 | struct page *page; | 7764 | struct page *page; |
| 7621 | page = nested_get_page(vcpu, vmptr); | 7765 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); |
| 7622 | if (page == NULL) { | 7766 | if (is_error_page(page)) { |
| 7623 | nested_vmx_failInvalid(vcpu); | 7767 | nested_vmx_failInvalid(vcpu); |
| 7624 | return kvm_skip_emulated_instruction(vcpu); | 7768 | return kvm_skip_emulated_instruction(vcpu); |
| 7625 | } | 7769 | } |
| 7626 | new_vmcs12 = kmap(page); | 7770 | new_vmcs12 = kmap(page); |
| 7627 | if (new_vmcs12->revision_id != VMCS12_REVISION) { | 7771 | if (new_vmcs12->revision_id != VMCS12_REVISION) { |
| 7628 | kunmap(page); | 7772 | kunmap(page); |
| 7629 | nested_release_page_clean(page); | 7773 | kvm_release_page_clean(page); |
| 7630 | nested_vmx_failValid(vcpu, | 7774 | nested_vmx_failValid(vcpu, |
| 7631 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); | 7775 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); |
| 7632 | return kvm_skip_emulated_instruction(vcpu); | 7776 | return kvm_skip_emulated_instruction(vcpu); |
| @@ -7639,7 +7783,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
| 7639 | */ | 7783 | */ |
| 7640 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); | 7784 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); |
| 7641 | kunmap(page); | 7785 | kunmap(page); |
| 7642 | nested_release_page_clean(page); | 7786 | kvm_release_page_clean(page); |
| 7643 | 7787 | ||
| 7644 | set_current_vmptr(vmx, vmptr); | 7788 | set_current_vmptr(vmx, vmptr); |
| 7645 | } | 7789 | } |
| @@ -7790,7 +7934,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
| 7790 | 7934 | ||
| 7791 | switch (type) { | 7935 | switch (type) { |
| 7792 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: | 7936 | case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: |
| 7793 | if (is_noncanonical_address(operand.gla)) { | 7937 | if (is_noncanonical_address(operand.gla, vcpu)) { |
| 7794 | nested_vmx_failValid(vcpu, | 7938 | nested_vmx_failValid(vcpu, |
| 7795 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); | 7939 | VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); |
| 7796 | return kvm_skip_emulated_instruction(vcpu); | 7940 | return kvm_skip_emulated_instruction(vcpu); |
| @@ -7847,6 +7991,124 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) | |||
| 7847 | return 1; | 7991 | return 1; |
| 7848 | } | 7992 | } |
| 7849 | 7993 | ||
| 7994 | static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) | ||
| 7995 | { | ||
| 7996 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 7997 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
| 7998 | |||
| 7999 | /* Check for memory type validity */ | ||
| 8000 | switch (address & VMX_EPTP_MT_MASK) { | ||
| 8001 | case VMX_EPTP_MT_UC: | ||
| 8002 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) | ||
| 8003 | return false; | ||
| 8004 | break; | ||
| 8005 | case VMX_EPTP_MT_WB: | ||
| 8006 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) | ||
| 8007 | return false; | ||
| 8008 | break; | ||
| 8009 | default: | ||
| 8010 | return false; | ||
| 8011 | } | ||
| 8012 | |||
| 8013 | /* only 4 levels page-walk length are valid */ | ||
| 8014 | if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) | ||
| 8015 | return false; | ||
| 8016 | |||
| 8017 | /* Reserved bits should not be set */ | ||
| 8018 | if (address >> maxphyaddr || ((address >> 7) & 0x1f)) | ||
| 8019 | return false; | ||
| 8020 | |||
| 8021 | /* AD, if set, should be supported */ | ||
| 8022 | if (address & VMX_EPTP_AD_ENABLE_BIT) { | ||
| 8023 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) | ||
| 8024 | return false; | ||
| 8025 | } | ||
| 8026 | |||
| 8027 | return true; | ||
| 8028 | } | ||
| 8029 | |||
| 8030 | static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, | ||
| 8031 | struct vmcs12 *vmcs12) | ||
| 8032 | { | ||
| 8033 | u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; | ||
| 8034 | u64 address; | ||
| 8035 | bool accessed_dirty; | ||
| 8036 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | ||
| 8037 | |||
| 8038 | if (!nested_cpu_has_eptp_switching(vmcs12) || | ||
| 8039 | !nested_cpu_has_ept(vmcs12)) | ||
| 8040 | return 1; | ||
| 8041 | |||
| 8042 | if (index >= VMFUNC_EPTP_ENTRIES) | ||
| 8043 | return 1; | ||
| 8044 | |||
| 8045 | |||
| 8046 | if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT, | ||
| 8047 | &address, index * 8, 8)) | ||
| 8048 | return 1; | ||
| 8049 | |||
| 8050 | accessed_dirty = !!(address & VMX_EPTP_AD_ENABLE_BIT); | ||
| 8051 | |||
| 8052 | /* | ||
| 8053 | * If the (L2) guest does a vmfunc to the currently | ||
| 8054 | * active ept pointer, we don't have to do anything else | ||
| 8055 | */ | ||
| 8056 | if (vmcs12->ept_pointer != address) { | ||
| 8057 | if (!valid_ept_address(vcpu, address)) | ||
| 8058 | return 1; | ||
| 8059 | |||
| 8060 | kvm_mmu_unload(vcpu); | ||
| 8061 | mmu->ept_ad = accessed_dirty; | ||
| 8062 | mmu->base_role.ad_disabled = !accessed_dirty; | ||
| 8063 | vmcs12->ept_pointer = address; | ||
| 8064 | /* | ||
| 8065 | * TODO: Check what's the correct approach in case | ||
| 8066 | * mmu reload fails. Currently, we just let the next | ||
| 8067 | * reload potentially fail | ||
| 8068 | */ | ||
| 8069 | kvm_mmu_reload(vcpu); | ||
| 8070 | } | ||
| 8071 | |||
| 8072 | return 0; | ||
| 8073 | } | ||
| 8074 | |||
| 8075 | static int handle_vmfunc(struct kvm_vcpu *vcpu) | ||
| 8076 | { | ||
| 8077 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 8078 | struct vmcs12 *vmcs12; | ||
| 8079 | u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 8080 | |||
| 8081 | /* | ||
| 8082 | * VMFUNC is only supported for nested guests, but we always enable the | ||
| 8083 | * secondary control for simplicity; for non-nested mode, fake that we | ||
| 8084 | * didn't by injecting #UD. | ||
| 8085 | */ | ||
| 8086 | if (!is_guest_mode(vcpu)) { | ||
| 8087 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 8088 | return 1; | ||
| 8089 | } | ||
| 8090 | |||
| 8091 | vmcs12 = get_vmcs12(vcpu); | ||
| 8092 | if ((vmcs12->vm_function_control & (1 << function)) == 0) | ||
| 8093 | goto fail; | ||
| 8094 | |||
| 8095 | switch (function) { | ||
| 8096 | case 0: | ||
| 8097 | if (nested_vmx_eptp_switching(vcpu, vmcs12)) | ||
| 8098 | goto fail; | ||
| 8099 | break; | ||
| 8100 | default: | ||
| 8101 | goto fail; | ||
| 8102 | } | ||
| 8103 | return kvm_skip_emulated_instruction(vcpu); | ||
| 8104 | |||
| 8105 | fail: | ||
| 8106 | nested_vmx_vmexit(vcpu, vmx->exit_reason, | ||
| 8107 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
| 8108 | vmcs_readl(EXIT_QUALIFICATION)); | ||
| 8109 | return 1; | ||
| 8110 | } | ||
| 8111 | |||
| 7850 | /* | 8112 | /* |
| 7851 | * The exit handlers return 1 if the exit was handled fully and guest execution | 8113 | * The exit handlers return 1 if the exit was handled fully and guest execution |
| 7852 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 8114 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
| @@ -7894,9 +8156,12 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
| 7894 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 8156 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
| 7895 | [EXIT_REASON_INVEPT] = handle_invept, | 8157 | [EXIT_REASON_INVEPT] = handle_invept, |
| 7896 | [EXIT_REASON_INVVPID] = handle_invvpid, | 8158 | [EXIT_REASON_INVVPID] = handle_invvpid, |
| 8159 | [EXIT_REASON_RDRAND] = handle_invalid_op, | ||
| 8160 | [EXIT_REASON_RDSEED] = handle_invalid_op, | ||
| 7897 | [EXIT_REASON_XSAVES] = handle_xsaves, | 8161 | [EXIT_REASON_XSAVES] = handle_xsaves, |
| 7898 | [EXIT_REASON_XRSTORS] = handle_xrstors, | 8162 | [EXIT_REASON_XRSTORS] = handle_xrstors, |
| 7899 | [EXIT_REASON_PML_FULL] = handle_pml_full, | 8163 | [EXIT_REASON_PML_FULL] = handle_pml_full, |
| 8164 | [EXIT_REASON_VMFUNC] = handle_vmfunc, | ||
| 7900 | [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, | 8165 | [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, |
| 7901 | }; | 8166 | }; |
| 7902 | 8167 | ||
| @@ -8212,6 +8477,10 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
| 8212 | * table is L0's fault. | 8477 | * table is L0's fault. |
| 8213 | */ | 8478 | */ |
| 8214 | return false; | 8479 | return false; |
| 8480 | case EXIT_REASON_INVPCID: | ||
| 8481 | return | ||
| 8482 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && | ||
| 8483 | nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); | ||
| 8215 | case EXIT_REASON_WBINVD: | 8484 | case EXIT_REASON_WBINVD: |
| 8216 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 8485 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
| 8217 | case EXIT_REASON_XSETBV: | 8486 | case EXIT_REASON_XSETBV: |
| @@ -8229,6 +8498,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
| 8229 | case EXIT_REASON_PML_FULL: | 8498 | case EXIT_REASON_PML_FULL: |
| 8230 | /* We emulate PML support to L1. */ | 8499 | /* We emulate PML support to L1. */ |
| 8231 | return false; | 8500 | return false; |
| 8501 | case EXIT_REASON_VMFUNC: | ||
| 8502 | /* VM functions are emulated through L2->L0 vmexits. */ | ||
| 8503 | return false; | ||
| 8232 | default: | 8504 | default: |
| 8233 | return true; | 8505 | return true; |
| 8234 | } | 8506 | } |
| @@ -8487,7 +8759,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
| 8487 | u32 vectoring_info = vmx->idt_vectoring_info; | 8759 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 8488 | 8760 | ||
| 8489 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); | 8761 | trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); |
| 8490 | vcpu->arch.gpa_available = false; | ||
| 8491 | 8762 | ||
| 8492 | /* | 8763 | /* |
| 8493 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more | 8764 | * Flush logged GPAs PML buffer, this will make dirty_bitmap more |
| @@ -9341,11 +9612,6 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
| 9341 | } | 9612 | } |
| 9342 | } | 9613 | } |
| 9343 | 9614 | ||
| 9344 | static int get_ept_level(void) | ||
| 9345 | { | ||
| 9346 | return VMX_EPT_DEFAULT_GAW + 1; | ||
| 9347 | } | ||
| 9348 | |||
| 9349 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | 9615 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
| 9350 | { | 9616 | { |
| 9351 | u8 cache; | 9617 | u8 cache; |
| @@ -9462,39 +9728,13 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) | |||
| 9462 | 9728 | ||
| 9463 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | 9729 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) |
| 9464 | { | 9730 | { |
| 9465 | struct kvm_cpuid_entry2 *best; | ||
| 9466 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9731 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 9467 | u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); | ||
| 9468 | 9732 | ||
| 9469 | if (vmx_rdtscp_supported()) { | 9733 | if (cpu_has_secondary_exec_ctrls()) { |
| 9470 | bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); | 9734 | vmx_compute_secondary_exec_control(vmx); |
| 9471 | if (!rdtscp_enabled) | 9735 | vmcs_set_secondary_exec_control(vmx->secondary_exec_control); |
| 9472 | secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; | ||
| 9473 | |||
| 9474 | if (nested) { | ||
| 9475 | if (rdtscp_enabled) | ||
| 9476 | vmx->nested.nested_vmx_secondary_ctls_high |= | ||
| 9477 | SECONDARY_EXEC_RDTSCP; | ||
| 9478 | else | ||
| 9479 | vmx->nested.nested_vmx_secondary_ctls_high &= | ||
| 9480 | ~SECONDARY_EXEC_RDTSCP; | ||
| 9481 | } | ||
| 9482 | } | ||
| 9483 | |||
| 9484 | /* Exposing INVPCID only when PCID is exposed */ | ||
| 9485 | best = kvm_find_cpuid_entry(vcpu, 0x7, 0); | ||
| 9486 | if (vmx_invpcid_supported() && | ||
| 9487 | (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || | ||
| 9488 | !guest_cpuid_has_pcid(vcpu))) { | ||
| 9489 | secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; | ||
| 9490 | |||
| 9491 | if (best) | ||
| 9492 | best->ebx &= ~bit(X86_FEATURE_INVPCID); | ||
| 9493 | } | 9736 | } |
| 9494 | 9737 | ||
| 9495 | if (cpu_has_secondary_exec_ctrls()) | ||
| 9496 | vmcs_set_secondary_exec_control(secondary_exec_ctl); | ||
| 9497 | |||
| 9498 | if (nested_vmx_allowed(vcpu)) | 9738 | if (nested_vmx_allowed(vcpu)) |
| 9499 | to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= | 9739 | to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= |
| 9500 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | 9740 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
| @@ -9535,7 +9775,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | |||
| 9535 | 9775 | ||
| 9536 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) | 9776 | static bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) |
| 9537 | { | 9777 | { |
| 9538 | return nested_ept_get_cr3(vcpu) & VMX_EPT_AD_ENABLE_BIT; | 9778 | return nested_ept_get_cr3(vcpu) & VMX_EPTP_AD_ENABLE_BIT; |
| 9539 | } | 9779 | } |
| 9540 | 9780 | ||
| 9541 | /* Callbacks for nested_ept_init_mmu_context: */ | 9781 | /* Callbacks for nested_ept_init_mmu_context: */ |
| @@ -9548,18 +9788,15 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) | |||
| 9548 | 9788 | ||
| 9549 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | 9789 | static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) |
| 9550 | { | 9790 | { |
| 9551 | bool wants_ad; | ||
| 9552 | |||
| 9553 | WARN_ON(mmu_is_nested(vcpu)); | 9791 | WARN_ON(mmu_is_nested(vcpu)); |
| 9554 | wants_ad = nested_ept_ad_enabled(vcpu); | 9792 | if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) |
| 9555 | if (wants_ad && !enable_ept_ad_bits) | ||
| 9556 | return 1; | 9793 | return 1; |
| 9557 | 9794 | ||
| 9558 | kvm_mmu_unload(vcpu); | 9795 | kvm_mmu_unload(vcpu); |
| 9559 | kvm_init_shadow_ept_mmu(vcpu, | 9796 | kvm_init_shadow_ept_mmu(vcpu, |
| 9560 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | 9797 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & |
| 9561 | VMX_EPT_EXECUTE_ONLY_BIT, | 9798 | VMX_EPT_EXECUTE_ONLY_BIT, |
| 9562 | wants_ad); | 9799 | nested_ept_ad_enabled(vcpu)); |
| 9563 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 9800 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
| 9564 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; | 9801 | vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; |
| 9565 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; | 9802 | vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; |
| @@ -9610,6 +9847,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9610 | struct vmcs12 *vmcs12) | 9847 | struct vmcs12 *vmcs12) |
| 9611 | { | 9848 | { |
| 9612 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9849 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 9850 | struct page *page; | ||
| 9613 | u64 hpa; | 9851 | u64 hpa; |
| 9614 | 9852 | ||
| 9615 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 9853 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
| @@ -9619,17 +9857,19 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9619 | * physical address remains valid. We keep a reference | 9857 | * physical address remains valid. We keep a reference |
| 9620 | * to it so we can release it later. | 9858 | * to it so we can release it later. |
| 9621 | */ | 9859 | */ |
| 9622 | if (vmx->nested.apic_access_page) /* shouldn't happen */ | 9860 | if (vmx->nested.apic_access_page) { /* shouldn't happen */ |
| 9623 | nested_release_page(vmx->nested.apic_access_page); | 9861 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
| 9624 | vmx->nested.apic_access_page = | 9862 | vmx->nested.apic_access_page = NULL; |
| 9625 | nested_get_page(vcpu, vmcs12->apic_access_addr); | 9863 | } |
| 9864 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr); | ||
| 9626 | /* | 9865 | /* |
| 9627 | * If translation failed, no matter: This feature asks | 9866 | * If translation failed, no matter: This feature asks |
| 9628 | * to exit when accessing the given address, and if it | 9867 | * to exit when accessing the given address, and if it |
| 9629 | * can never be accessed, this feature won't do | 9868 | * can never be accessed, this feature won't do |
| 9630 | * anything anyway. | 9869 | * anything anyway. |
| 9631 | */ | 9870 | */ |
| 9632 | if (vmx->nested.apic_access_page) { | 9871 | if (!is_error_page(page)) { |
| 9872 | vmx->nested.apic_access_page = page; | ||
| 9633 | hpa = page_to_phys(vmx->nested.apic_access_page); | 9873 | hpa = page_to_phys(vmx->nested.apic_access_page); |
| 9634 | vmcs_write64(APIC_ACCESS_ADDR, hpa); | 9874 | vmcs_write64(APIC_ACCESS_ADDR, hpa); |
| 9635 | } else { | 9875 | } else { |
| @@ -9644,10 +9884,11 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9644 | } | 9884 | } |
| 9645 | 9885 | ||
| 9646 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 9886 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
| 9647 | if (vmx->nested.virtual_apic_page) /* shouldn't happen */ | 9887 | if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ |
| 9648 | nested_release_page(vmx->nested.virtual_apic_page); | 9888 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
| 9649 | vmx->nested.virtual_apic_page = | 9889 | vmx->nested.virtual_apic_page = NULL; |
| 9650 | nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); | 9890 | } |
| 9891 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); | ||
| 9651 | 9892 | ||
| 9652 | /* | 9893 | /* |
| 9653 | * If translation failed, VM entry will fail because | 9894 | * If translation failed, VM entry will fail because |
| @@ -9662,7 +9903,8 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9662 | * control. But such a configuration is useless, so | 9903 | * control. But such a configuration is useless, so |
| 9663 | * let's keep the code simple. | 9904 | * let's keep the code simple. |
| 9664 | */ | 9905 | */ |
| 9665 | if (vmx->nested.virtual_apic_page) { | 9906 | if (!is_error_page(page)) { |
| 9907 | vmx->nested.virtual_apic_page = page; | ||
| 9666 | hpa = page_to_phys(vmx->nested.virtual_apic_page); | 9908 | hpa = page_to_phys(vmx->nested.virtual_apic_page); |
| 9667 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); | 9909 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); |
| 9668 | } | 9910 | } |
| @@ -9671,16 +9913,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, | |||
| 9671 | if (nested_cpu_has_posted_intr(vmcs12)) { | 9913 | if (nested_cpu_has_posted_intr(vmcs12)) { |
| 9672 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 9914 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ |
| 9673 | kunmap(vmx->nested.pi_desc_page); | 9915 | kunmap(vmx->nested.pi_desc_page); |
| 9674 | nested_release_page(vmx->nested.pi_desc_page); | 9916 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
| 9917 | vmx->nested.pi_desc_page = NULL; | ||
| 9675 | } | 9918 | } |
| 9676 | vmx->nested.pi_desc_page = | 9919 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); |
| 9677 | nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); | 9920 | if (is_error_page(page)) |
| 9678 | vmx->nested.pi_desc = | ||
| 9679 | (struct pi_desc *)kmap(vmx->nested.pi_desc_page); | ||
| 9680 | if (!vmx->nested.pi_desc) { | ||
| 9681 | nested_release_page_clean(vmx->nested.pi_desc_page); | ||
| 9682 | return; | 9921 | return; |
| 9683 | } | 9922 | vmx->nested.pi_desc_page = page; |
| 9923 | vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); | ||
| 9684 | vmx->nested.pi_desc = | 9924 | vmx->nested.pi_desc = |
| 9685 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | 9925 | (struct pi_desc *)((void *)vmx->nested.pi_desc + |
| 9686 | (unsigned long)(vmcs12->posted_intr_desc_addr & | 9926 | (unsigned long)(vmcs12->posted_intr_desc_addr & |
| @@ -9746,6 +9986,18 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, | |||
| 9746 | return 0; | 9986 | return 0; |
| 9747 | } | 9987 | } |
| 9748 | 9988 | ||
| 9989 | static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, | ||
| 9990 | struct vmcs12 *vmcs12) | ||
| 9991 | { | ||
| 9992 | if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) | ||
| 9993 | return 0; | ||
| 9994 | |||
| 9995 | if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) | ||
| 9996 | return -EINVAL; | ||
| 9997 | |||
| 9998 | return 0; | ||
| 9999 | } | ||
| 10000 | |||
| 9749 | /* | 10001 | /* |
| 9750 | * Merge L0's and L1's MSR bitmap, return false to indicate that | 10002 | * Merge L0's and L1's MSR bitmap, return false to indicate that |
| 9751 | * we do not use the hardware. | 10003 | * we do not use the hardware. |
| @@ -9762,8 +10014,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
| 9762 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) | 10014 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) |
| 9763 | return false; | 10015 | return false; |
| 9764 | 10016 | ||
| 9765 | page = nested_get_page(vcpu, vmcs12->msr_bitmap); | 10017 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); |
| 9766 | if (!page) | 10018 | if (is_error_page(page)) |
| 9767 | return false; | 10019 | return false; |
| 9768 | msr_bitmap_l1 = (unsigned long *)kmap(page); | 10020 | msr_bitmap_l1 = (unsigned long *)kmap(page); |
| 9769 | 10021 | ||
| @@ -9793,7 +10045,7 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | |||
| 9793 | } | 10045 | } |
| 9794 | } | 10046 | } |
| 9795 | kunmap(page); | 10047 | kunmap(page); |
| 9796 | nested_release_page_clean(page); | 10048 | kvm_release_page_clean(page); |
| 9797 | 10049 | ||
| 9798 | return true; | 10050 | return true; |
| 9799 | } | 10051 | } |
| @@ -10187,13 +10439,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10187 | enable_ept ? vmcs12->page_fault_error_code_match : 0); | 10439 | enable_ept ? vmcs12->page_fault_error_code_match : 0); |
| 10188 | 10440 | ||
| 10189 | if (cpu_has_secondary_exec_ctrls()) { | 10441 | if (cpu_has_secondary_exec_ctrls()) { |
| 10190 | exec_control = vmx_secondary_exec_control(vmx); | 10442 | exec_control = vmx->secondary_exec_control; |
| 10191 | 10443 | ||
| 10192 | /* Take the following fields only from vmcs12 */ | 10444 | /* Take the following fields only from vmcs12 */ |
| 10193 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 10445 | exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
| 10446 | SECONDARY_EXEC_ENABLE_INVPCID | | ||
| 10194 | SECONDARY_EXEC_RDTSCP | | 10447 | SECONDARY_EXEC_RDTSCP | |
| 10448 | SECONDARY_EXEC_XSAVES | | ||
| 10195 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | | 10449 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
| 10196 | SECONDARY_EXEC_APIC_REGISTER_VIRT); | 10450 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| 10451 | SECONDARY_EXEC_ENABLE_VMFUNC); | ||
| 10197 | if (nested_cpu_has(vmcs12, | 10452 | if (nested_cpu_has(vmcs12, |
| 10198 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { | 10453 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) { |
| 10199 | vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & | 10454 | vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control & |
| @@ -10201,6 +10456,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10201 | exec_control |= vmcs12_exec_ctrl; | 10456 | exec_control |= vmcs12_exec_ctrl; |
| 10202 | } | 10457 | } |
| 10203 | 10458 | ||
| 10459 | /* All VMFUNCs are currently emulated through L0 vmexits. */ | ||
| 10460 | if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC) | ||
| 10461 | vmcs_write64(VM_FUNCTION_CONTROL, 0); | ||
| 10462 | |||
| 10204 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { | 10463 | if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { |
| 10205 | vmcs_write64(EOI_EXIT_BITMAP0, | 10464 | vmcs_write64(EOI_EXIT_BITMAP0, |
| 10206 | vmcs12->eoi_exit_bitmap0); | 10465 | vmcs12->eoi_exit_bitmap0); |
| @@ -10426,6 +10685,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 10426 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) | 10685 | if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) |
| 10427 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10686 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10428 | 10687 | ||
| 10688 | if (nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12)) | ||
| 10689 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
| 10690 | |||
| 10429 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) | 10691 | if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) |
| 10430 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10692 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10431 | 10693 | ||
| @@ -10453,6 +10715,18 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 10453 | vmx->nested.nested_vmx_entry_ctls_high)) | 10715 | vmx->nested.nested_vmx_entry_ctls_high)) |
| 10454 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10716 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10455 | 10717 | ||
| 10718 | if (nested_cpu_has_vmfunc(vmcs12)) { | ||
| 10719 | if (vmcs12->vm_function_control & | ||
| 10720 | ~vmx->nested.nested_vmx_vmfunc_controls) | ||
| 10721 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
| 10722 | |||
| 10723 | if (nested_cpu_has_eptp_switching(vmcs12)) { | ||
| 10724 | if (!nested_cpu_has_ept(vmcs12) || | ||
| 10725 | !page_address_valid(vcpu, vmcs12->eptp_list_address)) | ||
| 10726 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
| 10727 | } | ||
| 10728 | } | ||
| 10729 | |||
| 10456 | if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) | 10730 | if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) |
| 10457 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 10731 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10458 | 10732 | ||
| @@ -10699,7 +10973,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
| 10699 | u32 idt_vectoring; | 10973 | u32 idt_vectoring; |
| 10700 | unsigned int nr; | 10974 | unsigned int nr; |
| 10701 | 10975 | ||
| 10702 | if (vcpu->arch.exception.pending && vcpu->arch.exception.reinject) { | 10976 | if (vcpu->arch.exception.injected) { |
| 10703 | nr = vcpu->arch.exception.nr; | 10977 | nr = vcpu->arch.exception.nr; |
| 10704 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | 10978 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; |
| 10705 | 10979 | ||
| @@ -10738,12 +11012,20 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
| 10738 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | 11012 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) |
| 10739 | { | 11013 | { |
| 10740 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 11014 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 11015 | unsigned long exit_qual; | ||
| 10741 | 11016 | ||
| 10742 | if (vcpu->arch.exception.pending || | 11017 | if (kvm_event_needs_reinjection(vcpu)) |
| 10743 | vcpu->arch.nmi_injected || | ||
| 10744 | vcpu->arch.interrupt.pending) | ||
| 10745 | return -EBUSY; | 11018 | return -EBUSY; |
| 10746 | 11019 | ||
| 11020 | if (vcpu->arch.exception.pending && | ||
| 11021 | nested_vmx_check_exception(vcpu, &exit_qual)) { | ||
| 11022 | if (vmx->nested.nested_run_pending) | ||
| 11023 | return -EBUSY; | ||
| 11024 | nested_vmx_inject_exception_vmexit(vcpu, exit_qual); | ||
| 11025 | vcpu->arch.exception.pending = false; | ||
| 11026 | return 0; | ||
| 11027 | } | ||
| 11028 | |||
| 10747 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && | 11029 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && |
| 10748 | vmx->nested.preemption_timer_expired) { | 11030 | vmx->nested.preemption_timer_expired) { |
| 10749 | if (vmx->nested.nested_run_pending) | 11031 | if (vmx->nested.nested_run_pending) |
| @@ -11184,16 +11466,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
| 11184 | 11466 | ||
| 11185 | /* Unpin physical memory we referred to in vmcs02 */ | 11467 | /* Unpin physical memory we referred to in vmcs02 */ |
| 11186 | if (vmx->nested.apic_access_page) { | 11468 | if (vmx->nested.apic_access_page) { |
| 11187 | nested_release_page(vmx->nested.apic_access_page); | 11469 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
| 11188 | vmx->nested.apic_access_page = NULL; | 11470 | vmx->nested.apic_access_page = NULL; |
| 11189 | } | 11471 | } |
| 11190 | if (vmx->nested.virtual_apic_page) { | 11472 | if (vmx->nested.virtual_apic_page) { |
| 11191 | nested_release_page(vmx->nested.virtual_apic_page); | 11473 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); |
| 11192 | vmx->nested.virtual_apic_page = NULL; | 11474 | vmx->nested.virtual_apic_page = NULL; |
| 11193 | } | 11475 | } |
| 11194 | if (vmx->nested.pi_desc_page) { | 11476 | if (vmx->nested.pi_desc_page) { |
| 11195 | kunmap(vmx->nested.pi_desc_page); | 11477 | kunmap(vmx->nested.pi_desc_page); |
| 11196 | nested_release_page(vmx->nested.pi_desc_page); | 11478 | kvm_release_page_dirty(vmx->nested.pi_desc_page); |
| 11197 | vmx->nested.pi_desc_page = NULL; | 11479 | vmx->nested.pi_desc_page = NULL; |
| 11198 | vmx->nested.pi_desc = NULL; | 11480 | vmx->nested.pi_desc = NULL; |
| 11199 | } | 11481 | } |
| @@ -11369,14 +11651,14 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) | |||
| 11369 | 11651 | ||
| 11370 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; | 11652 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; |
| 11371 | 11653 | ||
| 11372 | page = nested_get_page(vcpu, vmcs12->pml_address); | 11654 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); |
| 11373 | if (!page) | 11655 | if (is_error_page(page)) |
| 11374 | return 0; | 11656 | return 0; |
| 11375 | 11657 | ||
| 11376 | pml_address = kmap(page); | 11658 | pml_address = kmap(page); |
| 11377 | pml_address[vmcs12->guest_pml_index--] = gpa; | 11659 | pml_address[vmcs12->guest_pml_index--] = gpa; |
| 11378 | kunmap(page); | 11660 | kunmap(page); |
| 11379 | nested_release_page_clean(page); | 11661 | kvm_release_page_clean(page); |
| 11380 | } | 11662 | } |
| 11381 | 11663 | ||
| 11382 | return 0; | 11664 | return 0; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef5102f80497..6069af86da3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 311 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | 311 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
| 312 | u64 new_state = msr_info->data & | 312 | u64 new_state = msr_info->data & |
| 313 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | 313 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
| 314 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | | 314 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | |
| 315 | 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); | 315 | (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); |
| 316 | 316 | ||
| 317 | if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) | ||
| 318 | return 1; | ||
| 317 | if (!msr_info->host_initiated && | 319 | if (!msr_info->host_initiated && |
| 318 | ((msr_info->data & reserved_bits) != 0 || | 320 | ((new_state == MSR_IA32_APICBASE_ENABLE && |
| 319 | new_state == X2APIC_ENABLE || | ||
| 320 | (new_state == MSR_IA32_APICBASE_ENABLE && | ||
| 321 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || | 321 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || |
| 322 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && | 322 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && |
| 323 | old_state == 0))) | 323 | old_state == 0))) |
| @@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 390 | 390 | ||
| 391 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 391 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 392 | 392 | ||
| 393 | if (!vcpu->arch.exception.pending) { | 393 | if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { |
| 394 | queue: | 394 | queue: |
| 395 | if (has_error && !is_protmode(vcpu)) | 395 | if (has_error && !is_protmode(vcpu)) |
| 396 | has_error = false; | 396 | has_error = false; |
| 397 | vcpu->arch.exception.pending = true; | 397 | if (reinject) { |
| 398 | /* | ||
| 399 | * On vmentry, vcpu->arch.exception.pending is only | ||
| 400 | * true if an event injection was blocked by | ||
| 401 | * nested_run_pending. In that case, however, | ||
| 402 | * vcpu_enter_guest requests an immediate exit, | ||
| 403 | * and the guest shouldn't proceed far enough to | ||
| 404 | * need reinjection. | ||
| 405 | */ | ||
| 406 | WARN_ON_ONCE(vcpu->arch.exception.pending); | ||
| 407 | vcpu->arch.exception.injected = true; | ||
| 408 | } else { | ||
| 409 | vcpu->arch.exception.pending = true; | ||
| 410 | vcpu->arch.exception.injected = false; | ||
| 411 | } | ||
| 398 | vcpu->arch.exception.has_error_code = has_error; | 412 | vcpu->arch.exception.has_error_code = has_error; |
| 399 | vcpu->arch.exception.nr = nr; | 413 | vcpu->arch.exception.nr = nr; |
| 400 | vcpu->arch.exception.error_code = error_code; | 414 | vcpu->arch.exception.error_code = error_code; |
| 401 | vcpu->arch.exception.reinject = reinject; | ||
| 402 | return; | 415 | return; |
| 403 | } | 416 | } |
| 404 | 417 | ||
| @@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | |||
| 413 | class2 = exception_class(nr); | 426 | class2 = exception_class(nr); |
| 414 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | 427 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) |
| 415 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | 428 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { |
| 416 | /* generate double fault per SDM Table 5-5 */ | 429 | /* |
| 430 | * Generate double fault per SDM Table 5-5. Set | ||
| 431 | * exception.pending = true so that the double fault | ||
| 432 | * can trigger a nested vmexit. | ||
| 433 | */ | ||
| 417 | vcpu->arch.exception.pending = true; | 434 | vcpu->arch.exception.pending = true; |
| 435 | vcpu->arch.exception.injected = false; | ||
| 418 | vcpu->arch.exception.has_error_code = true; | 436 | vcpu->arch.exception.has_error_code = true; |
| 419 | vcpu->arch.exception.nr = DF_VECTOR; | 437 | vcpu->arch.exception.nr = DF_VECTOR; |
| 420 | vcpu->arch.exception.error_code = 0; | 438 | vcpu->arch.exception.error_code = 0; |
| @@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 755 | if (cr4 & CR4_RESERVED_BITS) | 773 | if (cr4 & CR4_RESERVED_BITS) |
| 756 | return 1; | 774 | return 1; |
| 757 | 775 | ||
| 758 | if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) | 776 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) |
| 777 | return 1; | ||
| 778 | |||
| 779 | if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) | ||
| 759 | return 1; | 780 | return 1; |
| 760 | 781 | ||
| 761 | if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) | 782 | if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) |
| 762 | return 1; | 783 | return 1; |
| 763 | 784 | ||
| 764 | if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) | 785 | if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) |
| 765 | return 1; | 786 | return 1; |
| 766 | 787 | ||
| 767 | if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) | 788 | if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) |
| 768 | return 1; | 789 | return 1; |
| 769 | 790 | ||
| 770 | if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) | 791 | if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) |
| 771 | return 1; | 792 | return 1; |
| 772 | 793 | ||
| 773 | if (is_long_mode(vcpu)) { | 794 | if (is_long_mode(vcpu)) { |
| @@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 780 | return 1; | 801 | return 1; |
| 781 | 802 | ||
| 782 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { | 803 | if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { |
| 783 | if (!guest_cpuid_has_pcid(vcpu)) | 804 | if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) |
| 784 | return 1; | 805 | return 1; |
| 785 | 806 | ||
| 786 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ | 807 | /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ |
| @@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 814 | return 0; | 835 | return 0; |
| 815 | } | 836 | } |
| 816 | 837 | ||
| 817 | if (is_long_mode(vcpu)) { | 838 | if (is_long_mode(vcpu) && |
| 818 | if (cr3 & CR3_L_MODE_RESERVED_BITS) | 839 | (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62))) |
| 819 | return 1; | 840 | return 1; |
| 820 | } else if (is_pae(vcpu) && is_paging(vcpu) && | 841 | else if (is_pae(vcpu) && is_paging(vcpu) && |
| 821 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) | 842 | !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) |
| 822 | return 1; | 843 | return 1; |
| 823 | 844 | ||
| @@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) | |||
| 884 | { | 905 | { |
| 885 | u64 fixed = DR6_FIXED_1; | 906 | u64 fixed = DR6_FIXED_1; |
| 886 | 907 | ||
| 887 | if (!guest_cpuid_has_rtm(vcpu)) | 908 | if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) |
| 888 | fixed |= DR6_RTM; | 909 | fixed |= DR6_RTM; |
| 889 | return fixed; | 910 | return fixed; |
| 890 | } | 911 | } |
| @@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = { | |||
| 994 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 1015 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
| 995 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 1016 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
| 996 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | 1017 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, |
| 1018 | HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, | ||
| 997 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, | 1019 | HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, |
| 998 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, | 1020 | HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, |
| 999 | HV_X64_MSR_RESET, | 1021 | HV_X64_MSR_RESET, |
| @@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 1022 | if (efer & efer_reserved_bits) | 1044 | if (efer & efer_reserved_bits) |
| 1023 | return false; | 1045 | return false; |
| 1024 | 1046 | ||
| 1025 | if (efer & EFER_FFXSR) { | 1047 | if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) |
| 1026 | struct kvm_cpuid_entry2 *feat; | ||
| 1027 | |||
| 1028 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
| 1029 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) | ||
| 1030 | return false; | 1048 | return false; |
| 1031 | } | ||
| 1032 | 1049 | ||
| 1033 | if (efer & EFER_SVME) { | 1050 | if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) |
| 1034 | struct kvm_cpuid_entry2 *feat; | ||
| 1035 | |||
| 1036 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
| 1037 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) | ||
| 1038 | return false; | 1051 | return false; |
| 1039 | } | ||
| 1040 | 1052 | ||
| 1041 | return true; | 1053 | return true; |
| 1042 | } | 1054 | } |
| @@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 1084 | case MSR_KERNEL_GS_BASE: | 1096 | case MSR_KERNEL_GS_BASE: |
| 1085 | case MSR_CSTAR: | 1097 | case MSR_CSTAR: |
| 1086 | case MSR_LSTAR: | 1098 | case MSR_LSTAR: |
| 1087 | if (is_noncanonical_address(msr->data)) | 1099 | if (is_noncanonical_address(msr->data, vcpu)) |
| 1088 | return 1; | 1100 | return 1; |
| 1089 | break; | 1101 | break; |
| 1090 | case MSR_IA32_SYSENTER_EIP: | 1102 | case MSR_IA32_SYSENTER_EIP: |
| @@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 1101 | * value, and that something deterministic happens if the guest | 1113 | * value, and that something deterministic happens if the guest |
| 1102 | * invokes 64-bit SYSENTER. | 1114 | * invokes 64-bit SYSENTER. |
| 1103 | */ | 1115 | */ |
| 1104 | msr->data = get_canonical(msr->data); | 1116 | msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); |
| 1105 | } | 1117 | } |
| 1106 | return kvm_x86_ops->set_msr(vcpu, msr); | 1118 | return kvm_x86_ops->set_msr(vcpu, msr); |
| 1107 | } | 1119 | } |
| @@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
| 1534 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; | 1546 | vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; |
| 1535 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; | 1547 | vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; |
| 1536 | 1548 | ||
| 1537 | if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) | 1549 | if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) |
| 1538 | update_ia32_tsc_adjust_msr(vcpu, offset); | 1550 | update_ia32_tsc_adjust_msr(vcpu, offset); |
| 1551 | |||
| 1539 | kvm_vcpu_write_tsc_offset(vcpu, offset); | 1552 | kvm_vcpu_write_tsc_offset(vcpu, offset); |
| 1540 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1553 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
| 1541 | 1554 | ||
| @@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2185 | kvm_set_lapic_tscdeadline_msr(vcpu, data); | 2198 | kvm_set_lapic_tscdeadline_msr(vcpu, data); |
| 2186 | break; | 2199 | break; |
| 2187 | case MSR_IA32_TSC_ADJUST: | 2200 | case MSR_IA32_TSC_ADJUST: |
| 2188 | if (guest_cpuid_has_tsc_adjust(vcpu)) { | 2201 | if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { |
| 2189 | if (!msr_info->host_initiated) { | 2202 | if (!msr_info->host_initiated) { |
| 2190 | s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; | 2203 | s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; |
| 2191 | adjust_tsc_offset_guest(vcpu, adj); | 2204 | adjust_tsc_offset_guest(vcpu, adj); |
| @@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2307 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); | 2320 | vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); |
| 2308 | break; | 2321 | break; |
| 2309 | case MSR_AMD64_OSVW_ID_LENGTH: | 2322 | case MSR_AMD64_OSVW_ID_LENGTH: |
| 2310 | if (!guest_cpuid_has_osvw(vcpu)) | 2323 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
| 2311 | return 1; | 2324 | return 1; |
| 2312 | vcpu->arch.osvw.length = data; | 2325 | vcpu->arch.osvw.length = data; |
| 2313 | break; | 2326 | break; |
| 2314 | case MSR_AMD64_OSVW_STATUS: | 2327 | case MSR_AMD64_OSVW_STATUS: |
| 2315 | if (!guest_cpuid_has_osvw(vcpu)) | 2328 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
| 2316 | return 1; | 2329 | return 1; |
| 2317 | vcpu->arch.osvw.status = data; | 2330 | vcpu->arch.osvw.status = data; |
| 2318 | break; | 2331 | break; |
| @@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2537 | msr_info->data = 0xbe702111; | 2550 | msr_info->data = 0xbe702111; |
| 2538 | break; | 2551 | break; |
| 2539 | case MSR_AMD64_OSVW_ID_LENGTH: | 2552 | case MSR_AMD64_OSVW_ID_LENGTH: |
| 2540 | if (!guest_cpuid_has_osvw(vcpu)) | 2553 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
| 2541 | return 1; | 2554 | return 1; |
| 2542 | msr_info->data = vcpu->arch.osvw.length; | 2555 | msr_info->data = vcpu->arch.osvw.length; |
| 2543 | break; | 2556 | break; |
| 2544 | case MSR_AMD64_OSVW_STATUS: | 2557 | case MSR_AMD64_OSVW_STATUS: |
| 2545 | if (!guest_cpuid_has_osvw(vcpu)) | 2558 | if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) |
| 2546 | return 1; | 2559 | return 1; |
| 2547 | msr_info->data = vcpu->arch.osvw.status; | 2560 | msr_info->data = vcpu->arch.osvw.status; |
| 2548 | break; | 2561 | break; |
| @@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) | |||
| 2882 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 2895 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
| 2883 | { | 2896 | { |
| 2884 | int idx; | 2897 | int idx; |
| 2898 | |||
| 2899 | if (vcpu->preempted) | ||
| 2900 | vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); | ||
| 2901 | |||
| 2885 | /* | 2902 | /* |
| 2886 | * Disable page faults because we're in atomic context here. | 2903 | * Disable page faults because we're in atomic context here. |
| 2887 | * kvm_write_guest_offset_cached() would call might_fault() | 2904 | * kvm_write_guest_offset_cached() would call might_fault() |
| @@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 3074 | struct kvm_vcpu_events *events) | 3091 | struct kvm_vcpu_events *events) |
| 3075 | { | 3092 | { |
| 3076 | process_nmi(vcpu); | 3093 | process_nmi(vcpu); |
| 3094 | /* | ||
| 3095 | * FIXME: pass injected and pending separately. This is only | ||
| 3096 | * needed for nested virtualization, whose state cannot be | ||
| 3097 | * migrated yet. For now we can combine them. | ||
| 3098 | */ | ||
| 3077 | events->exception.injected = | 3099 | events->exception.injected = |
| 3078 | vcpu->arch.exception.pending && | 3100 | (vcpu->arch.exception.pending || |
| 3101 | vcpu->arch.exception.injected) && | ||
| 3079 | !kvm_exception_is_soft(vcpu->arch.exception.nr); | 3102 | !kvm_exception_is_soft(vcpu->arch.exception.nr); |
| 3080 | events->exception.nr = vcpu->arch.exception.nr; | 3103 | events->exception.nr = vcpu->arch.exception.nr; |
| 3081 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | 3104 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; |
| @@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 3130 | return -EINVAL; | 3153 | return -EINVAL; |
| 3131 | 3154 | ||
| 3132 | process_nmi(vcpu); | 3155 | process_nmi(vcpu); |
| 3156 | vcpu->arch.exception.injected = false; | ||
| 3133 | vcpu->arch.exception.pending = events->exception.injected; | 3157 | vcpu->arch.exception.pending = events->exception.injected; |
| 3134 | vcpu->arch.exception.nr = events->exception.nr; | 3158 | vcpu->arch.exception.nr = events->exception.nr; |
| 3135 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 3159 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
| @@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, | |||
| 4671 | */ | 4695 | */ |
| 4672 | if (vcpu->arch.gpa_available && | 4696 | if (vcpu->arch.gpa_available && |
| 4673 | emulator_can_use_gpa(ctxt) && | 4697 | emulator_can_use_gpa(ctxt) && |
| 4674 | vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && | 4698 | (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { |
| 4675 | (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { | 4699 | gpa = vcpu->arch.gpa_val; |
| 4676 | gpa = exception->address; | 4700 | ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); |
| 4677 | goto mmio; | 4701 | } else { |
| 4702 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | ||
| 4703 | if (ret < 0) | ||
| 4704 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4678 | } | 4705 | } |
| 4679 | 4706 | ||
| 4680 | ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); | 4707 | if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) |
| 4681 | |||
| 4682 | if (ret < 0) | ||
| 4683 | return X86EMUL_PROPAGATE_FAULT; | ||
| 4684 | |||
| 4685 | /* For APIC access vmexit */ | ||
| 4686 | if (ret) | ||
| 4687 | goto mmio; | ||
| 4688 | |||
| 4689 | if (ops->read_write_emulate(vcpu, gpa, val, bytes)) | ||
| 4690 | return X86EMUL_CONTINUE; | 4708 | return X86EMUL_CONTINUE; |
| 4691 | 4709 | ||
| 4692 | mmio: | ||
| 4693 | /* | 4710 | /* |
| 4694 | * Is this MMIO handled locally? | 4711 | * Is this MMIO handled locally? |
| 4695 | */ | 4712 | */ |
| @@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, | |||
| 5227 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); | 5244 | return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); |
| 5228 | } | 5245 | } |
| 5229 | 5246 | ||
| 5230 | static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, | 5247 | static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, |
| 5231 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) | 5248 | u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) |
| 5232 | { | 5249 | { |
| 5233 | kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); | 5250 | return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); |
| 5234 | } | 5251 | } |
| 5235 | 5252 | ||
| 5236 | static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) | 5253 | static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) |
| @@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
| 6362 | int r; | 6379 | int r; |
| 6363 | 6380 | ||
| 6364 | /* try to reinject previous events if any */ | 6381 | /* try to reinject previous events if any */ |
| 6382 | if (vcpu->arch.exception.injected) { | ||
| 6383 | kvm_x86_ops->queue_exception(vcpu); | ||
| 6384 | return 0; | ||
| 6385 | } | ||
| 6386 | |||
| 6387 | /* | ||
| 6388 | * Exceptions must be injected immediately, or the exception | ||
| 6389 | * frame will have the address of the NMI or interrupt handler. | ||
| 6390 | */ | ||
| 6391 | if (!vcpu->arch.exception.pending) { | ||
| 6392 | if (vcpu->arch.nmi_injected) { | ||
| 6393 | kvm_x86_ops->set_nmi(vcpu); | ||
| 6394 | return 0; | ||
| 6395 | } | ||
| 6396 | |||
| 6397 | if (vcpu->arch.interrupt.pending) { | ||
| 6398 | kvm_x86_ops->set_irq(vcpu); | ||
| 6399 | return 0; | ||
| 6400 | } | ||
| 6401 | } | ||
| 6402 | |||
| 6403 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
| 6404 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
| 6405 | if (r != 0) | ||
| 6406 | return r; | ||
| 6407 | } | ||
| 6408 | |||
| 6409 | /* try to inject new event if pending */ | ||
| 6365 | if (vcpu->arch.exception.pending) { | 6410 | if (vcpu->arch.exception.pending) { |
| 6366 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | 6411 | trace_kvm_inj_exception(vcpu->arch.exception.nr, |
| 6367 | vcpu->arch.exception.has_error_code, | 6412 | vcpu->arch.exception.has_error_code, |
| 6368 | vcpu->arch.exception.error_code); | 6413 | vcpu->arch.exception.error_code); |
| 6369 | 6414 | ||
| 6415 | vcpu->arch.exception.pending = false; | ||
| 6416 | vcpu->arch.exception.injected = true; | ||
| 6417 | |||
| 6370 | if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) | 6418 | if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) |
| 6371 | __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | | 6419 | __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | |
| 6372 | X86_EFLAGS_RF); | 6420 | X86_EFLAGS_RF); |
| @@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
| 6378 | } | 6426 | } |
| 6379 | 6427 | ||
| 6380 | kvm_x86_ops->queue_exception(vcpu); | 6428 | kvm_x86_ops->queue_exception(vcpu); |
| 6381 | return 0; | 6429 | } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { |
| 6382 | } | ||
| 6383 | |||
| 6384 | if (vcpu->arch.nmi_injected) { | ||
| 6385 | kvm_x86_ops->set_nmi(vcpu); | ||
| 6386 | return 0; | ||
| 6387 | } | ||
| 6388 | |||
| 6389 | if (vcpu->arch.interrupt.pending) { | ||
| 6390 | kvm_x86_ops->set_irq(vcpu); | ||
| 6391 | return 0; | ||
| 6392 | } | ||
| 6393 | |||
| 6394 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
| 6395 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
| 6396 | if (r != 0) | ||
| 6397 | return r; | ||
| 6398 | } | ||
| 6399 | |||
| 6400 | /* try to inject new event if pending */ | ||
| 6401 | if (vcpu->arch.smi_pending && !is_smm(vcpu)) { | ||
| 6402 | vcpu->arch.smi_pending = false; | 6430 | vcpu->arch.smi_pending = false; |
| 6403 | enter_smm(vcpu); | 6431 | enter_smm(vcpu); |
| 6404 | } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { | 6432 | } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { |
| @@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) | |||
| 6615 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); | 6643 | trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); |
| 6616 | vcpu->arch.hflags |= HF_SMM_MASK; | 6644 | vcpu->arch.hflags |= HF_SMM_MASK; |
| 6617 | memset(buf, 0, 512); | 6645 | memset(buf, 0, 512); |
| 6618 | if (guest_cpuid_has_longmode(vcpu)) | 6646 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) |
| 6619 | enter_smm_save_state_64(vcpu, buf); | 6647 | enter_smm_save_state_64(vcpu, buf); |
| 6620 | else | 6648 | else |
| 6621 | enter_smm_save_state_32(vcpu, buf); | 6649 | enter_smm_save_state_32(vcpu, buf); |
| @@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) | |||
| 6667 | kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); | 6695 | kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); |
| 6668 | kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); | 6696 | kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); |
| 6669 | 6697 | ||
| 6670 | if (guest_cpuid_has_longmode(vcpu)) | 6698 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) |
| 6671 | kvm_x86_ops->set_efer(vcpu, 0); | 6699 | kvm_x86_ops->set_efer(vcpu, 0); |
| 6672 | 6700 | ||
| 6673 | kvm_update_cpuid(vcpu); | 6701 | kvm_update_cpuid(vcpu); |
| @@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 6774 | } | 6802 | } |
| 6775 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { | 6803 | if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { |
| 6776 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; | 6804 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
| 6805 | vcpu->mmio_needed = 0; | ||
| 6777 | r = 0; | 6806 | r = 0; |
| 6778 | goto out; | 6807 | goto out; |
| 6779 | } | 6808 | } |
| @@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 6862 | kvm_x86_ops->enable_nmi_window(vcpu); | 6891 | kvm_x86_ops->enable_nmi_window(vcpu); |
| 6863 | if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 6892 | if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
| 6864 | kvm_x86_ops->enable_irq_window(vcpu); | 6893 | kvm_x86_ops->enable_irq_window(vcpu); |
| 6894 | WARN_ON(vcpu->arch.exception.pending); | ||
| 6865 | } | 6895 | } |
| 6866 | 6896 | ||
| 6867 | if (kvm_lapic_enabled(vcpu)) { | 6897 | if (kvm_lapic_enabled(vcpu)) { |
| @@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 7004 | if (vcpu->arch.apic_attention) | 7034 | if (vcpu->arch.apic_attention) |
| 7005 | kvm_lapic_sync_from_vapic(vcpu); | 7035 | kvm_lapic_sync_from_vapic(vcpu); |
| 7006 | 7036 | ||
| 7037 | vcpu->arch.gpa_available = false; | ||
| 7007 | r = kvm_x86_ops->handle_exit(vcpu); | 7038 | r = kvm_x86_ops->handle_exit(vcpu); |
| 7008 | return r; | 7039 | return r; |
| 7009 | 7040 | ||
| @@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 7422 | int pending_vec, max_bits, idx; | 7453 | int pending_vec, max_bits, idx; |
| 7423 | struct desc_ptr dt; | 7454 | struct desc_ptr dt; |
| 7424 | 7455 | ||
| 7425 | if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE)) | 7456 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && |
| 7457 | (sregs->cr4 & X86_CR4_OSXSAVE)) | ||
| 7458 | return -EINVAL; | ||
| 7459 | |||
| 7460 | apic_base_msr.data = sregs->apic_base; | ||
| 7461 | apic_base_msr.host_initiated = true; | ||
| 7462 | if (kvm_set_apic_base(vcpu, &apic_base_msr)) | ||
| 7426 | return -EINVAL; | 7463 | return -EINVAL; |
| 7427 | 7464 | ||
| 7428 | dt.size = sregs->idt.limit; | 7465 | dt.size = sregs->idt.limit; |
| @@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 7441 | 7478 | ||
| 7442 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; | 7479 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
| 7443 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 7480 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
| 7444 | apic_base_msr.data = sregs->apic_base; | ||
| 7445 | apic_base_msr.host_initiated = true; | ||
| 7446 | kvm_set_apic_base(vcpu, &apic_base_msr); | ||
| 7447 | 7481 | ||
| 7448 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; | 7482 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
| 7449 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 7483 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
| @@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
| 7734 | vcpu->arch.nmi_injected = false; | 7768 | vcpu->arch.nmi_injected = false; |
| 7735 | kvm_clear_interrupt_queue(vcpu); | 7769 | kvm_clear_interrupt_queue(vcpu); |
| 7736 | kvm_clear_exception_queue(vcpu); | 7770 | kvm_clear_exception_queue(vcpu); |
| 7771 | vcpu->arch.exception.pending = false; | ||
| 7737 | 7772 | ||
| 7738 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 7773 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
| 7739 | kvm_update_dr0123(vcpu); | 7774 | kvm_update_dr0123(vcpu); |
| @@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 7993 | kvm_pmu_init(vcpu); | 8028 | kvm_pmu_init(vcpu); |
| 7994 | 8029 | ||
| 7995 | vcpu->arch.pending_external_vector = -1; | 8030 | vcpu->arch.pending_external_vector = -1; |
| 8031 | vcpu->arch.preempted_in_kernel = false; | ||
| 7996 | 8032 | ||
| 7997 | kvm_hv_vcpu_init(vcpu); | 8033 | kvm_hv_vcpu_init(vcpu); |
| 7998 | 8034 | ||
| @@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 8440 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); | 8476 | return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); |
| 8441 | } | 8477 | } |
| 8442 | 8478 | ||
| 8479 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
| 8480 | { | ||
| 8481 | return vcpu->arch.preempted_in_kernel; | ||
| 8482 | } | ||
| 8483 | |||
| 8443 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 8484 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
| 8444 | { | 8485 | { |
| 8445 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; | 8486 | return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 612067074905..51e349cf5f45 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 12 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
| 13 | { | 13 | { |
| 14 | vcpu->arch.exception.pending = false; | 14 | vcpu->arch.exception.injected = false; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, | 17 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, |
| @@ -29,7 +29,7 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | |||
| 29 | 29 | ||
| 30 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) | 30 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) |
| 31 | { | 31 | { |
| 32 | return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || | 32 | return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || |
| 33 | vcpu->arch.nmi_injected; | 33 | vcpu->arch.nmi_injected; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| @@ -62,6 +62,16 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) | |||
| 62 | return cs_l; | 62 | return cs_l; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | static inline bool is_la57_mode(struct kvm_vcpu *vcpu) | ||
| 66 | { | ||
| 67 | #ifdef CONFIG_X86_64 | ||
| 68 | return (vcpu->arch.efer & EFER_LMA) && | ||
| 69 | kvm_read_cr4_bits(vcpu, X86_CR4_LA57); | ||
| 70 | #else | ||
| 71 | return 0; | ||
| 72 | #endif | ||
| 73 | } | ||
| 74 | |||
| 65 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) | 75 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) |
| 66 | { | 76 | { |
| 67 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; | 77 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; |
| @@ -87,10 +97,48 @@ static inline u32 bit(int bitno) | |||
| 87 | return 1 << (bitno & 31); | 97 | return 1 << (bitno & 31); |
| 88 | } | 98 | } |
| 89 | 99 | ||
| 100 | static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) | ||
| 101 | { | ||
| 102 | return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) | ||
| 106 | { | ||
| 107 | return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48; | ||
| 108 | } | ||
| 109 | |||
| 110 | static inline u64 get_canonical(u64 la, u8 vaddr_bits) | ||
| 111 | { | ||
| 112 | return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); | ||
| 113 | } | ||
| 114 | |||
| 115 | static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) | ||
| 116 | { | ||
| 117 | #ifdef CONFIG_X86_64 | ||
| 118 | return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; | ||
| 119 | #else | ||
| 120 | return false; | ||
| 121 | #endif | ||
| 122 | } | ||
| 123 | |||
| 124 | static inline bool emul_is_noncanonical_address(u64 la, | ||
| 125 | struct x86_emulate_ctxt *ctxt) | ||
| 126 | { | ||
| 127 | #ifdef CONFIG_X86_64 | ||
| 128 | return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; | ||
| 129 | #else | ||
| 130 | return false; | ||
| 131 | #endif | ||
| 132 | } | ||
| 133 | |||
| 90 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, | 134 | static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, |
| 91 | gva_t gva, gfn_t gfn, unsigned access) | 135 | gva_t gva, gfn_t gfn, unsigned access) |
| 92 | { | 136 | { |
| 93 | vcpu->arch.mmio_gva = gva & PAGE_MASK; | 137 | /* |
| 138 | * If this is a shadow nested page table, the "GVA" is | ||
| 139 | * actually a nGPA. | ||
| 140 | */ | ||
| 141 | vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; | ||
| 94 | vcpu->arch.access = access; | 142 | vcpu->arch.access = access; |
| 95 | vcpu->arch.mmio_gfn = gfn; | 143 | vcpu->arch.mmio_gfn = gfn; |
| 96 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; | 144 | vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 21a6fd6c44af..6882538eda32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -720,7 +720,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); | |||
| 720 | bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); | 720 | bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); |
| 721 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 721 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
| 722 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); | 722 | int kvm_vcpu_yield_to(struct kvm_vcpu *target); |
| 723 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); | 723 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); |
| 724 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 724 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
| 725 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 725 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
| 726 | 726 | ||
| @@ -800,6 +800,7 @@ int kvm_arch_hardware_setup(void); | |||
| 800 | void kvm_arch_hardware_unsetup(void); | 800 | void kvm_arch_hardware_unsetup(void); |
| 801 | void kvm_arch_check_processor_compat(void *rtn); | 801 | void kvm_arch_check_processor_compat(void *rtn); |
| 802 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); | 802 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); |
| 803 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); | ||
| 803 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); | 804 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); |
| 804 | 805 | ||
| 805 | #ifndef __KVM_HAVE_ARCH_VM_ALLOC | 806 | #ifndef __KVM_HAVE_ARCH_VM_ALLOC |
| @@ -985,6 +986,12 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) | |||
| 985 | return (hpa_t)pfn << PAGE_SHIFT; | 986 | return (hpa_t)pfn << PAGE_SHIFT; |
| 986 | } | 987 | } |
| 987 | 988 | ||
| 989 | static inline struct page *kvm_vcpu_gpa_to_page(struct kvm_vcpu *vcpu, | ||
| 990 | gpa_t gpa) | ||
| 991 | { | ||
| 992 | return kvm_vcpu_gfn_to_page(vcpu, gpa_to_gfn(gpa)); | ||
| 993 | } | ||
| 994 | |||
| 988 | static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) | 995 | static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) |
| 989 | { | 996 | { |
| 990 | unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); | 997 | unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6cd63c18708a..838887587411 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
| @@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { | |||
| 711 | struct kvm_ppc_smmu_info { | 711 | struct kvm_ppc_smmu_info { |
| 712 | __u64 flags; | 712 | __u64 flags; |
| 713 | __u32 slb_size; | 713 | __u32 slb_size; |
| 714 | __u32 pad; | 714 | __u16 data_keys; /* # storage keys supported for data */ |
| 715 | __u16 instr_keys; /* # storage keys supported for instructions */ | ||
| 715 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; | 716 | struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; |
| 716 | }; | 717 | }; |
| 717 | 718 | ||
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a39a1e161e63..b9f68e4add71 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
| @@ -416,6 +416,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | |||
| 416 | && !v->arch.power_off && !v->arch.pause); | 416 | && !v->arch.power_off && !v->arch.pause); |
| 417 | } | 417 | } |
| 418 | 418 | ||
| 419 | bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) | ||
| 420 | { | ||
| 421 | return vcpu_mode_priv(vcpu); | ||
| 422 | } | ||
| 423 | |||
| 419 | /* Just ensure a guest exit from a particular CPU */ | 424 | /* Just ensure a guest exit from a particular CPU */ |
| 420 | static void exit_vm_noop(void *info) | 425 | static void exit_vm_noop(void *info) |
| 421 | { | 426 | { |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2ea21dac0b44..b36945d49986 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
| @@ -1454,25 +1454,6 @@ out: | |||
| 1454 | kvm_set_pfn_accessed(pfn); | 1454 | kvm_set_pfn_accessed(pfn); |
| 1455 | } | 1455 | } |
| 1456 | 1456 | ||
| 1457 | static bool is_abort_sea(unsigned long fault_status) | ||
| 1458 | { | ||
| 1459 | switch (fault_status) { | ||
| 1460 | case FSC_SEA: | ||
| 1461 | case FSC_SEA_TTW0: | ||
| 1462 | case FSC_SEA_TTW1: | ||
| 1463 | case FSC_SEA_TTW2: | ||
| 1464 | case FSC_SEA_TTW3: | ||
| 1465 | case FSC_SECC: | ||
| 1466 | case FSC_SECC_TTW0: | ||
| 1467 | case FSC_SECC_TTW1: | ||
| 1468 | case FSC_SECC_TTW2: | ||
| 1469 | case FSC_SECC_TTW3: | ||
| 1470 | return true; | ||
| 1471 | default: | ||
| 1472 | return false; | ||
| 1473 | } | ||
| 1474 | } | ||
| 1475 | |||
| 1476 | /** | 1457 | /** |
| 1477 | * kvm_handle_guest_abort - handles all 2nd stage aborts | 1458 | * kvm_handle_guest_abort - handles all 2nd stage aborts |
| 1478 | * @vcpu: the VCPU pointer | 1459 | * @vcpu: the VCPU pointer |
| @@ -1498,20 +1479,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 1498 | fault_status = kvm_vcpu_trap_get_fault_type(vcpu); | 1479 | fault_status = kvm_vcpu_trap_get_fault_type(vcpu); |
| 1499 | 1480 | ||
| 1500 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); | 1481 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); |
| 1482 | is_iabt = kvm_vcpu_trap_is_iabt(vcpu); | ||
| 1501 | 1483 | ||
| 1502 | /* | 1484 | /* Synchronous External Abort? */ |
| 1503 | * The host kernel will handle the synchronous external abort. There | 1485 | if (kvm_vcpu_dabt_isextabt(vcpu)) { |
| 1504 | * is no need to pass the error into the guest. | 1486 | /* |
| 1505 | */ | 1487 | * For RAS the host kernel may handle this abort. |
| 1506 | if (is_abort_sea(fault_status)) { | 1488 | * There is no need to pass the error into the guest. |
| 1489 | */ | ||
| 1507 | if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) | 1490 | if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) |
| 1508 | return 1; | 1491 | return 1; |
| 1509 | } | ||
| 1510 | 1492 | ||
| 1511 | is_iabt = kvm_vcpu_trap_is_iabt(vcpu); | 1493 | if (unlikely(!is_iabt)) { |
| 1512 | if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { | 1494 | kvm_inject_vabt(vcpu); |
| 1513 | kvm_inject_vabt(vcpu); | 1495 | return 1; |
| 1514 | return 1; | 1496 | } |
| 1515 | } | 1497 | } |
| 1516 | 1498 | ||
| 1517 | trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), | 1499 | trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), |
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 7072ab743332..10b38178cff2 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c | |||
| @@ -234,7 +234,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) | |||
| 234 | return 0; | 234 | return 0; |
| 235 | } | 235 | } |
| 236 | 236 | ||
| 237 | static struct seq_operations vgic_debug_seq_ops = { | 237 | static const struct seq_operations vgic_debug_seq_ops = { |
| 238 | .start = vgic_debug_start, | 238 | .start = vgic_debug_start, |
| 239 | .next = vgic_debug_next, | 239 | .next = vgic_debug_next, |
| 240 | .stop = vgic_debug_stop, | 240 | .stop = vgic_debug_stop, |
| @@ -255,7 +255,7 @@ static int debug_open(struct inode *inode, struct file *file) | |||
| 255 | return ret; | 255 | return ret; |
| 256 | }; | 256 | }; |
| 257 | 257 | ||
| 258 | static struct file_operations vgic_debug_fops = { | 258 | static const struct file_operations vgic_debug_fops = { |
| 259 | .owner = THIS_MODULE, | 259 | .owner = THIS_MODULE, |
| 260 | .open = debug_open, | 260 | .open = debug_open, |
| 261 | .read = seq_read, | 261 | .read = seq_read, |
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index aa6b68db80b4..f51c1e1b3f70 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
| @@ -144,7 +144,6 @@ struct its_ite { | |||
| 144 | 144 | ||
| 145 | struct vgic_irq *irq; | 145 | struct vgic_irq *irq; |
| 146 | struct its_collection *collection; | 146 | struct its_collection *collection; |
| 147 | u32 lpi; | ||
| 148 | u32 event_id; | 147 | u32 event_id; |
| 149 | }; | 148 | }; |
| 150 | 149 | ||
| @@ -813,7 +812,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) | |||
| 813 | /* Must be called with its_lock mutex held */ | 812 | /* Must be called with its_lock mutex held */ |
| 814 | static struct its_ite *vgic_its_alloc_ite(struct its_device *device, | 813 | static struct its_ite *vgic_its_alloc_ite(struct its_device *device, |
| 815 | struct its_collection *collection, | 814 | struct its_collection *collection, |
| 816 | u32 lpi_id, u32 event_id) | 815 | u32 event_id) |
| 817 | { | 816 | { |
| 818 | struct its_ite *ite; | 817 | struct its_ite *ite; |
| 819 | 818 | ||
| @@ -823,7 +822,6 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, | |||
| 823 | 822 | ||
| 824 | ite->event_id = event_id; | 823 | ite->event_id = event_id; |
| 825 | ite->collection = collection; | 824 | ite->collection = collection; |
| 826 | ite->lpi = lpi_id; | ||
| 827 | 825 | ||
| 828 | list_add_tail(&ite->ite_list, &device->itt_head); | 826 | list_add_tail(&ite->ite_list, &device->itt_head); |
| 829 | return ite; | 827 | return ite; |
| @@ -873,7 +871,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, | |||
| 873 | new_coll = collection; | 871 | new_coll = collection; |
| 874 | } | 872 | } |
| 875 | 873 | ||
| 876 | ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id); | 874 | ite = vgic_its_alloc_ite(device, collection, event_id); |
| 877 | if (IS_ERR(ite)) { | 875 | if (IS_ERR(ite)) { |
| 878 | if (new_coll) | 876 | if (new_coll) |
| 879 | vgic_its_free_collection(its, coll_id); | 877 | vgic_its_free_collection(its, coll_id); |
| @@ -1848,7 +1846,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, | |||
| 1848 | 1846 | ||
| 1849 | next_offset = compute_next_eventid_offset(&dev->itt_head, ite); | 1847 | next_offset = compute_next_eventid_offset(&dev->itt_head, ite); |
| 1850 | val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | | 1848 | val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | |
| 1851 | ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) | | 1849 | ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | |
| 1852 | ite->collection->collection_id; | 1850 | ite->collection->collection_id; |
| 1853 | val = cpu_to_le64(val); | 1851 | val = cpu_to_le64(val); |
| 1854 | return kvm_write_guest(kvm, gpa, &val, ite_esz); | 1852 | return kvm_write_guest(kvm, gpa, &val, ite_esz); |
| @@ -1895,7 +1893,7 @@ static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, | |||
| 1895 | if (!collection) | 1893 | if (!collection) |
| 1896 | return -EINVAL; | 1894 | return -EINVAL; |
| 1897 | 1895 | ||
| 1898 | ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id); | 1896 | ite = vgic_its_alloc_ite(dev, collection, event_id); |
| 1899 | if (IS_ERR(ite)) | 1897 | if (IS_ERR(ite)) |
| 1900 | return PTR_ERR(ite); | 1898 | return PTR_ERR(ite); |
| 1901 | 1899 | ||
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 37522e65eb53..b3d4a10f09a1 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c | |||
| @@ -303,6 +303,51 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, | |||
| 303 | vgic_set_vmcr(vcpu, &vmcr); | 303 | vgic_set_vmcr(vcpu, &vmcr); |
| 304 | } | 304 | } |
| 305 | 305 | ||
| 306 | static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, | ||
| 307 | gpa_t addr, unsigned int len) | ||
| 308 | { | ||
| 309 | int n; /* which APRn is this */ | ||
| 310 | |||
| 311 | n = (addr >> 2) & 0x3; | ||
| 312 | |||
| 313 | if (kvm_vgic_global_state.type == VGIC_V2) { | ||
| 314 | /* GICv2 hardware systems support max. 32 groups */ | ||
| 315 | if (n != 0) | ||
| 316 | return 0; | ||
| 317 | return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; | ||
| 318 | } else { | ||
| 319 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
| 320 | |||
| 321 | if (n > vgic_v3_max_apr_idx(vcpu)) | ||
| 322 | return 0; | ||
| 323 | /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ | ||
| 324 | return vgicv3->vgic_ap1r[n]; | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, | ||
| 329 | gpa_t addr, unsigned int len, | ||
| 330 | unsigned long val) | ||
| 331 | { | ||
| 332 | int n; /* which APRn is this */ | ||
| 333 | |||
| 334 | n = (addr >> 2) & 0x3; | ||
| 335 | |||
| 336 | if (kvm_vgic_global_state.type == VGIC_V2) { | ||
| 337 | /* GICv2 hardware systems support max. 32 groups */ | ||
| 338 | if (n != 0) | ||
| 339 | return; | ||
| 340 | vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; | ||
| 341 | } else { | ||
| 342 | struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; | ||
| 343 | |||
| 344 | if (n > vgic_v3_max_apr_idx(vcpu)) | ||
| 345 | return; | ||
| 346 | /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ | ||
| 347 | vgicv3->vgic_ap1r[n] = val; | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 306 | static const struct vgic_register_region vgic_v2_dist_registers[] = { | 351 | static const struct vgic_register_region vgic_v2_dist_registers[] = { |
| 307 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, | 352 | REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL, |
| 308 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, | 353 | vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, |
| @@ -364,7 +409,7 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = { | |||
| 364 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | 409 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, |
| 365 | VGIC_ACCESS_32bit), | 410 | VGIC_ACCESS_32bit), |
| 366 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, | 411 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, |
| 367 | vgic_mmio_read_raz, vgic_mmio_write_wi, 16, | 412 | vgic_mmio_read_apr, vgic_mmio_write_apr, 16, |
| 368 | VGIC_ACCESS_32bit), | 413 | VGIC_ACCESS_32bit), |
| 369 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, | 414 | REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, |
| 370 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, | 415 | vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, |
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index bba7fa22a7f7..bf9ceab67c77 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h | |||
| @@ -220,4 +220,20 @@ int vgic_debug_destroy(struct kvm *kvm); | |||
| 220 | bool lock_all_vcpus(struct kvm *kvm); | 220 | bool lock_all_vcpus(struct kvm *kvm); |
| 221 | void unlock_all_vcpus(struct kvm *kvm); | 221 | void unlock_all_vcpus(struct kvm *kvm); |
| 222 | 222 | ||
| 223 | static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) | ||
| 224 | { | ||
| 225 | struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; | ||
| 226 | |||
| 227 | /* | ||
| 228 | * num_pri_bits are initialized with HW supported values. | ||
| 229 | * We can rely safely on num_pri_bits even if VM has not | ||
| 230 | * restored ICC_CTLR_EL1 before restoring APnR registers. | ||
| 231 | */ | ||
| 232 | switch (cpu_if->num_pri_bits) { | ||
| 233 | case 7: return 3; | ||
| 234 | case 6: return 1; | ||
| 235 | default: return 0; | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 223 | #endif | 239 | #endif |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d81f6ded88e..6ed1c2021198 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -1609,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, | |||
| 1609 | struct page **pages, int nr_pages) | 1609 | struct page **pages, int nr_pages) |
| 1610 | { | 1610 | { |
| 1611 | unsigned long addr; | 1611 | unsigned long addr; |
| 1612 | gfn_t entry; | 1612 | gfn_t entry = 0; |
| 1613 | 1613 | ||
| 1614 | addr = gfn_to_hva_many(slot, gfn, &entry); | 1614 | addr = gfn_to_hva_many(slot, gfn, &entry); |
| 1615 | if (kvm_is_error_hva(addr)) | 1615 | if (kvm_is_error_hva(addr)) |
| @@ -1928,6 +1928,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, | |||
| 1928 | * verify that the entire region is valid here. | 1928 | * verify that the entire region is valid here. |
| 1929 | */ | 1929 | */ |
| 1930 | while (start_gfn <= end_gfn) { | 1930 | while (start_gfn <= end_gfn) { |
| 1931 | nr_pages_avail = 0; | ||
| 1931 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); | 1932 | ghc->memslot = __gfn_to_memslot(slots, start_gfn); |
| 1932 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, | 1933 | ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, |
| 1933 | &nr_pages_avail); | 1934 | &nr_pages_avail); |
| @@ -2275,7 +2276,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | |||
| 2275 | #endif | 2276 | #endif |
| 2276 | } | 2277 | } |
| 2277 | 2278 | ||
| 2278 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 2279 | void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) |
| 2279 | { | 2280 | { |
| 2280 | struct kvm *kvm = me->kvm; | 2281 | struct kvm *kvm = me->kvm; |
| 2281 | struct kvm_vcpu *vcpu; | 2282 | struct kvm_vcpu *vcpu; |
| @@ -2306,6 +2307,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
| 2306 | continue; | 2307 | continue; |
| 2307 | if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) | 2308 | if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) |
| 2308 | continue; | 2309 | continue; |
| 2310 | if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) | ||
| 2311 | continue; | ||
| 2309 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | 2312 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) |
| 2310 | continue; | 2313 | continue; |
| 2311 | 2314 | ||
