diff options
150 files changed, 11900 insertions, 1982 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9a3edf7e901a..11fc28ecdb6d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
| @@ -1907,6 +1907,9 @@ | |||
| 1907 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | 1907 | kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. |
| 1908 | Default is 0 (don't ignore, but inject #GP) | 1908 | Default is 0 (don't ignore, but inject #GP) |
| 1909 | 1909 | ||
| 1910 | kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface. | ||
| 1911 | Default is false (don't support). | ||
| 1912 | |||
| 1910 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit | 1913 | kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit |
| 1911 | KVM MMU at runtime. | 1914 | KVM MMU at runtime. |
| 1912 | Default is 0 (off) | 1915 | Default is 0 (off) |
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 671bc0639262..c5dab30d3389 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt | |||
| @@ -86,9 +86,12 @@ Translation table lookup with 64KB pages: | |||
| 86 | +-------------------------------------------------> [63] TTBR0/1 | 86 | +-------------------------------------------------> [63] TTBR0/1 |
| 87 | 87 | ||
| 88 | 88 | ||
| 89 | When using KVM without the Virtualization Host Extensions, the hypervisor | 89 | When using KVM without the Virtualization Host Extensions, the |
| 90 | maps kernel pages in EL2 at a fixed offset from the kernel VA. See the | 90 | hypervisor maps kernel pages in EL2 at a fixed (and potentially |
| 91 | kern_hyp_va macro for more details. | 91 | random) offset from the linear mapping. See the kern_hyp_va macro and |
| 92 | kvm_update_va_mask function for more details. MMIO devices such as | ||
| 93 | GICv2 gets mapped next to the HYP idmap page, as do vectors when | ||
| 94 | ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs. | ||
| 92 | 95 | ||
| 93 | When using KVM with the Virtualization Host Extensions, no additional | 96 | When using KVM with the Virtualization Host Extensions, no additional |
| 94 | mappings are created, since the host kernel runs directly in EL2. | 97 | mappings are created, since the host kernel runs directly in EL2. |
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX index 3da73aabff5a..3492458a4ae8 100644 --- a/Documentation/virtual/kvm/00-INDEX +++ b/Documentation/virtual/kvm/00-INDEX | |||
| @@ -1,7 +1,12 @@ | |||
| 1 | 00-INDEX | 1 | 00-INDEX |
| 2 | - this file. | 2 | - this file. |
| 3 | amd-memory-encryption.rst | ||
| 4 | - notes on AMD Secure Encrypted Virtualization feature and SEV firmware | ||
| 5 | command description | ||
| 3 | api.txt | 6 | api.txt |
| 4 | - KVM userspace API. | 7 | - KVM userspace API. |
| 8 | arm | ||
| 9 | - internal ABI between the kernel and HYP (for arm/arm64) | ||
| 5 | cpuid.txt | 10 | cpuid.txt |
| 6 | - KVM-specific cpuid leaves (x86). | 11 | - KVM-specific cpuid leaves (x86). |
| 7 | devices/ | 12 | devices/ |
| @@ -26,6 +31,5 @@ s390-diag.txt | |||
| 26 | - Diagnose hypercall description (for IBM S/390) | 31 | - Diagnose hypercall description (for IBM S/390) |
| 27 | timekeeping.txt | 32 | timekeeping.txt |
| 28 | - timekeeping virtualization for x86-based architectures. | 33 | - timekeeping virtualization for x86-based architectures. |
| 29 | amd-memory-encryption.txt | 34 | vcpu-requests.rst |
| 30 | - notes on AMD Secure Encrypted Virtualization feature and SEV firmware | 35 | - internal VCPU request API |
| 31 | command description | ||
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d6b3ff51a14f..1c7958b57fe9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
| @@ -3480,7 +3480,7 @@ encrypted VMs. | |||
| 3480 | 3480 | ||
| 3481 | Currently, this ioctl is used for issuing Secure Encrypted Virtualization | 3481 | Currently, this ioctl is used for issuing Secure Encrypted Virtualization |
| 3482 | (SEV) commands on AMD Processors. The SEV commands are defined in | 3482 | (SEV) commands on AMD Processors. The SEV commands are defined in |
| 3483 | Documentation/virtual/kvm/amd-memory-encryption.txt. | 3483 | Documentation/virtual/kvm/amd-memory-encryption.rst. |
| 3484 | 3484 | ||
| 3485 | 4.111 KVM_MEMORY_ENCRYPT_REG_REGION | 3485 | 4.111 KVM_MEMORY_ENCRYPT_REG_REGION |
| 3486 | 3486 | ||
| @@ -3516,6 +3516,38 @@ Returns: 0 on success; -1 on error | |||
| 3516 | This ioctl can be used to unregister the guest memory region registered | 3516 | This ioctl can be used to unregister the guest memory region registered |
| 3517 | with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above. | 3517 | with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above. |
| 3518 | 3518 | ||
| 3519 | 4.113 KVM_HYPERV_EVENTFD | ||
| 3520 | |||
| 3521 | Capability: KVM_CAP_HYPERV_EVENTFD | ||
| 3522 | Architectures: x86 | ||
| 3523 | Type: vm ioctl | ||
| 3524 | Parameters: struct kvm_hyperv_eventfd (in) | ||
| 3525 | |||
| 3526 | This ioctl (un)registers an eventfd to receive notifications from the guest on | ||
| 3527 | the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without | ||
| 3528 | causing a user exit. SIGNAL_EVENT hypercall with non-zero event flag number | ||
| 3529 | (bits 24-31) still triggers a KVM_EXIT_HYPERV_HCALL user exit. | ||
| 3530 | |||
| 3531 | struct kvm_hyperv_eventfd { | ||
| 3532 | __u32 conn_id; | ||
| 3533 | __s32 fd; | ||
| 3534 | __u32 flags; | ||
| 3535 | __u32 padding[3]; | ||
| 3536 | }; | ||
| 3537 | |||
| 3538 | The conn_id field should fit within 24 bits: | ||
| 3539 | |||
| 3540 | #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff | ||
| 3541 | |||
| 3542 | The acceptable values for the flags field are: | ||
| 3543 | |||
| 3544 | #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) | ||
| 3545 | |||
| 3546 | Returns: 0 on success, | ||
| 3547 | -EINVAL if conn_id or flags is outside the allowed range | ||
| 3548 | -ENOENT on deassign if the conn_id isn't registered | ||
| 3549 | -EEXIST on assign if the conn_id is already registered | ||
| 3550 | |||
| 3519 | 3551 | ||
| 3520 | 5. The kvm_run structure | 3552 | 5. The kvm_run structure |
| 3521 | ------------------------ | 3553 | ------------------------ |
| @@ -3873,7 +3905,7 @@ in userspace. | |||
| 3873 | __u64 kvm_dirty_regs; | 3905 | __u64 kvm_dirty_regs; |
| 3874 | union { | 3906 | union { |
| 3875 | struct kvm_sync_regs regs; | 3907 | struct kvm_sync_regs regs; |
| 3876 | char padding[1024]; | 3908 | char padding[SYNC_REGS_SIZE_BYTES]; |
| 3877 | } s; | 3909 | } s; |
| 3878 | 3910 | ||
| 3879 | If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access | 3911 | If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access |
| @@ -4078,6 +4110,46 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be | |||
| 4078 | accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from | 4110 | accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from |
| 4079 | the guest. | 4111 | the guest. |
| 4080 | 4112 | ||
| 4113 | 6.74 KVM_CAP_SYNC_REGS | ||
| 4114 | Architectures: s390, x86 | ||
| 4115 | Target: s390: always enabled, x86: vcpu | ||
| 4116 | Parameters: none | ||
| 4117 | Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register | ||
| 4118 | sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h). | ||
| 4119 | |||
| 4120 | As described above in the kvm_sync_regs struct info in section 5 (kvm_run): | ||
| 4121 | KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers | ||
| 4122 | without having to call SET/GET_*REGS". This reduces overhead by eliminating | ||
| 4123 | repeated ioctl calls for setting and/or getting register values. This is | ||
| 4124 | particularly important when userspace is making synchronous guest state | ||
| 4125 | modifications, e.g. when emulating and/or intercepting instructions in | ||
| 4126 | userspace. | ||
| 4127 | |||
| 4128 | For s390 specifics, please refer to the source code. | ||
| 4129 | |||
| 4130 | For x86: | ||
| 4131 | - the register sets to be copied out to kvm_run are selectable | ||
| 4132 | by userspace (rather that all sets being copied out for every exit). | ||
| 4133 | - vcpu_events are available in addition to regs and sregs. | ||
| 4134 | |||
| 4135 | For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to | ||
| 4136 | function as an input bit-array field set by userspace to indicate the | ||
| 4137 | specific register sets to be copied out on the next exit. | ||
| 4138 | |||
| 4139 | To indicate when userspace has modified values that should be copied into | ||
| 4140 | the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set. | ||
| 4141 | This is done using the same bitflags as for the 'kvm_valid_regs' field. | ||
| 4142 | If the dirty bit is not set, then the register set values will not be copied | ||
| 4143 | into the vCPU even if they've been modified. | ||
| 4144 | |||
| 4145 | Unused bitfields in the bitarrays must be set to zero. | ||
| 4146 | |||
| 4147 | struct kvm_sync_regs { | ||
| 4148 | struct kvm_regs regs; | ||
| 4149 | struct kvm_sregs sregs; | ||
| 4150 | struct kvm_vcpu_events events; | ||
| 4151 | }; | ||
| 4152 | |||
| 4081 | 7. Capabilities that can be enabled on VMs | 4153 | 7. Capabilities that can be enabled on VMs |
| 4082 | ------------------------------------------ | 4154 | ------------------------------------------ |
| 4083 | 4155 | ||
| @@ -4286,6 +4358,26 @@ enables QEMU to build error log and branch to guest kernel registered | |||
| 4286 | machine check handling routine. Without this capability KVM will | 4358 | machine check handling routine. Without this capability KVM will |
| 4287 | branch to guests' 0x200 interrupt vector. | 4359 | branch to guests' 0x200 interrupt vector. |
| 4288 | 4360 | ||
| 4361 | 7.13 KVM_CAP_X86_DISABLE_EXITS | ||
| 4362 | |||
| 4363 | Architectures: x86 | ||
| 4364 | Parameters: args[0] defines which exits are disabled | ||
| 4365 | Returns: 0 on success, -EINVAL when args[0] contains invalid exits | ||
| 4366 | |||
| 4367 | Valid bits in args[0] are | ||
| 4368 | |||
| 4369 | #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) | ||
| 4370 | #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) | ||
| 4371 | |||
| 4372 | Enabling this capability on a VM provides userspace with a way to no | ||
| 4373 | longer intercept some instructions for improved latency in some | ||
| 4374 | workloads, and is suggested when vCPUs are associated to dedicated | ||
| 4375 | physical CPUs. More bits can be added in the future; userspace can | ||
| 4376 | just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable | ||
| 4377 | all such vmexits. | ||
| 4378 | |||
| 4379 | Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits. | ||
| 4380 | |||
| 4289 | 8. Other capabilities. | 4381 | 8. Other capabilities. |
| 4290 | ---------------------- | 4382 | ---------------------- |
| 4291 | 4383 | ||
| @@ -4398,15 +4490,6 @@ reserved. | |||
| 4398 | Both registers and addresses are 64-bits wide. | 4490 | Both registers and addresses are 64-bits wide. |
| 4399 | It will be possible to run 64-bit or 32-bit guest code. | 4491 | It will be possible to run 64-bit or 32-bit guest code. |
| 4400 | 4492 | ||
| 4401 | 8.8 KVM_CAP_X86_GUEST_MWAIT | ||
| 4402 | |||
| 4403 | Architectures: x86 | ||
| 4404 | |||
| 4405 | This capability indicates that guest using memory monotoring instructions | ||
| 4406 | (MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time | ||
| 4407 | spent while virtual CPU is halted in this way will then be accounted for as | ||
| 4408 | guest running time on the host (as opposed to e.g. HLT). | ||
| 4409 | |||
| 4410 | 8.9 KVM_CAP_ARM_USER_IRQ | 4493 | 8.9 KVM_CAP_ARM_USER_IRQ |
| 4411 | 4494 | ||
| 4412 | Architectures: arm, arm64 | 4495 | Architectures: arm, arm64 |
| @@ -4483,3 +4566,33 @@ Parameters: none | |||
| 4483 | This capability indicates if the flic device will be able to get/set the | 4566 | This capability indicates if the flic device will be able to get/set the |
| 4484 | AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows | 4567 | AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows |
| 4485 | to discover this without having to create a flic device. | 4568 | to discover this without having to create a flic device. |
| 4569 | |||
| 4570 | 8.14 KVM_CAP_S390_PSW | ||
| 4571 | |||
| 4572 | Architectures: s390 | ||
| 4573 | |||
| 4574 | This capability indicates that the PSW is exposed via the kvm_run structure. | ||
| 4575 | |||
| 4576 | 8.15 KVM_CAP_S390_GMAP | ||
| 4577 | |||
| 4578 | Architectures: s390 | ||
| 4579 | |||
| 4580 | This capability indicates that the user space memory used as guest mapping can | ||
| 4581 | be anywhere in the user memory address space, as long as the memory slots are | ||
| 4582 | aligned and sized to a segment (1MB) boundary. | ||
| 4583 | |||
| 4584 | 8.16 KVM_CAP_S390_COW | ||
| 4585 | |||
| 4586 | Architectures: s390 | ||
| 4587 | |||
| 4588 | This capability indicates that the user space memory used as guest mapping can | ||
| 4589 | use copy-on-write semantics as well as dirty pages tracking via read-only page | ||
| 4590 | tables. | ||
| 4591 | |||
| 4592 | 8.17 KVM_CAP_S390_BPB | ||
| 4593 | |||
| 4594 | Architectures: s390 | ||
| 4595 | |||
| 4596 | This capability indicates that kvm will implement the interfaces to handle | ||
| 4597 | reset, migration and nested KVM for branch prediction blocking. The stfle | ||
| 4598 | facility 82 should not be provided to the guest without this capability. | ||
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 87a7506f31c2..d4f33eb805dd 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt | |||
| @@ -23,8 +23,8 @@ This function queries the presence of KVM cpuid leafs. | |||
| 23 | 23 | ||
| 24 | 24 | ||
| 25 | function: define KVM_CPUID_FEATURES (0x40000001) | 25 | function: define KVM_CPUID_FEATURES (0x40000001) |
| 26 | returns : ebx, ecx, edx = 0 | 26 | returns : ebx, ecx |
| 27 | eax = and OR'ed group of (1 << flag), where each flags is: | 27 | eax = an OR'ed group of (1 << flag), where each flags is: |
| 28 | 28 | ||
| 29 | 29 | ||
| 30 | flag || value || meaning | 30 | flag || value || meaning |
| @@ -66,3 +66,14 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | |||
| 66 | || || per-cpu warps are expected in | 66 | || || per-cpu warps are expected in |
| 67 | || || kvmclock. | 67 | || || kvmclock. |
| 68 | ------------------------------------------------------------------------------ | 68 | ------------------------------------------------------------------------------ |
| 69 | |||
| 70 | edx = an OR'ed group of (1 << flag), where each flags is: | ||
| 71 | |||
| 72 | |||
| 73 | flag || value || meaning | ||
| 74 | ================================================================================== | ||
| 75 | KVM_HINTS_DEDICATED || 0 || guest checks this feature bit to | ||
| 76 | || || determine if there is vCPU pinning | ||
| 77 | || || and there is no vCPU over-commitment, | ||
| 78 | || || allowing optimizations | ||
| 79 | ---------------------------------------------------------------------------------- | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 4c3c17e1e163..6d296bdce328 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -6516,7 +6516,7 @@ S: Maintained | |||
| 6516 | F: Documentation/networking/netvsc.txt | 6516 | F: Documentation/networking/netvsc.txt |
| 6517 | F: arch/x86/include/asm/mshyperv.h | 6517 | F: arch/x86/include/asm/mshyperv.h |
| 6518 | F: arch/x86/include/asm/trace/hyperv.h | 6518 | F: arch/x86/include/asm/trace/hyperv.h |
| 6519 | F: arch/x86/include/uapi/asm/hyperv.h | 6519 | F: arch/x86/include/asm/hyperv-tlfs.h |
| 6520 | F: arch/x86/kernel/cpu/mshyperv.c | 6520 | F: arch/x86/kernel/cpu/mshyperv.c |
| 6521 | F: arch/x86/hyperv | 6521 | F: arch/x86/hyperv |
| 6522 | F: drivers/hid/hid-hyperv.c | 6522 | F: drivers/hid/hid-hyperv.c |
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 36dd2962a42d..5a953ecb0d78 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h | |||
| @@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); | |||
| 70 | 70 | ||
| 71 | extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); | 71 | extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); |
| 72 | 72 | ||
| 73 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 73 | /* no VHE on 32-bit :( */ |
| 74 | static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; } | ||
| 75 | |||
| 76 | extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); | ||
| 74 | 77 | ||
| 75 | extern void __init_stage2_translation(void); | 78 | extern void __init_stage2_translation(void); |
| 76 | 79 | ||
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9003bd19cb70..6493bd479ddc 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
| @@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num) | |||
| 41 | return vcpu_reg(vcpu, reg_num); | 41 | return vcpu_reg(vcpu, reg_num); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); | 44 | unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu); |
| 45 | |||
| 46 | static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu) | ||
| 47 | { | ||
| 48 | return *__vcpu_spsr(vcpu); | ||
| 49 | } | ||
| 50 | |||
| 51 | static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) | ||
| 52 | { | ||
| 53 | *__vcpu_spsr(vcpu) = v; | ||
| 54 | } | ||
| 45 | 55 | ||
| 46 | static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, | 56 | static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, |
| 47 | u8 reg_num) | 57 | u8 reg_num) |
| @@ -92,14 +102,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | |||
| 92 | vcpu->arch.hcr = HCR_GUEST_MASK; | 102 | vcpu->arch.hcr = HCR_GUEST_MASK; |
| 93 | } | 103 | } |
| 94 | 104 | ||
| 95 | static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu) | 105 | static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) |
| 96 | { | ||
| 97 | return vcpu->arch.hcr; | ||
| 98 | } | ||
| 99 | |||
| 100 | static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) | ||
| 101 | { | 106 | { |
| 102 | vcpu->arch.hcr = hcr; | 107 | return (unsigned long *)&vcpu->arch.hcr; |
| 103 | } | 108 | } |
| 104 | 109 | ||
| 105 | static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) | 110 | static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 248b930563e5..c6a749568dd6 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
| @@ -155,9 +155,6 @@ struct kvm_vcpu_arch { | |||
| 155 | /* HYP trapping configuration */ | 155 | /* HYP trapping configuration */ |
| 156 | u32 hcr; | 156 | u32 hcr; |
| 157 | 157 | ||
| 158 | /* Interrupt related fields */ | ||
| 159 | u32 irq_lines; /* IRQ and FIQ levels */ | ||
| 160 | |||
| 161 | /* Exception Information */ | 158 | /* Exception Information */ |
| 162 | struct kvm_vcpu_fault_info fault; | 159 | struct kvm_vcpu_fault_info fault; |
| 163 | 160 | ||
| @@ -315,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) | |||
| 315 | return false; | 312 | return false; |
| 316 | } | 313 | } |
| 317 | 314 | ||
| 315 | static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {} | ||
| 316 | static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} | ||
| 317 | |||
| 318 | #endif /* __ARM_KVM_HOST_H__ */ | 318 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 1ab8329e9ff7..e93a0cac9add 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h | |||
| @@ -110,6 +110,10 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt); | |||
| 110 | 110 | ||
| 111 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); | 111 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); |
| 112 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); | 112 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); |
| 113 | void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); | ||
| 114 | void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); | ||
| 115 | void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); | ||
| 116 | void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); | ||
| 113 | 117 | ||
| 114 | asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp); | 118 | asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp); |
| 115 | asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp); | 119 | asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp); |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index de1b919404e4..707a1f06dc5d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
| @@ -28,6 +28,13 @@ | |||
| 28 | */ | 28 | */ |
| 29 | #define kern_hyp_va(kva) (kva) | 29 | #define kern_hyp_va(kva) (kva) |
| 30 | 30 | ||
| 31 | /* Contrary to arm64, there is no need to generate a PC-relative address */ | ||
| 32 | #define hyp_symbol_addr(s) \ | ||
| 33 | ({ \ | ||
| 34 | typeof(s) *addr = &(s); \ | ||
| 35 | addr; \ | ||
| 36 | }) | ||
| 37 | |||
| 31 | /* | 38 | /* |
| 32 | * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. | 39 | * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. |
| 33 | */ | 40 | */ |
| @@ -42,8 +49,15 @@ | |||
| 42 | #include <asm/pgalloc.h> | 49 | #include <asm/pgalloc.h> |
| 43 | #include <asm/stage2_pgtable.h> | 50 | #include <asm/stage2_pgtable.h> |
| 44 | 51 | ||
| 52 | /* Ensure compatibility with arm64 */ | ||
| 53 | #define VA_BITS 32 | ||
| 54 | |||
| 45 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); | 55 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); |
| 46 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); | 56 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, |
| 57 | void __iomem **kaddr, | ||
| 58 | void __iomem **haddr); | ||
| 59 | int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | ||
| 60 | void **haddr); | ||
| 47 | void free_hyp_pgds(void); | 61 | void free_hyp_pgds(void); |
| 48 | 62 | ||
| 49 | void stage2_unmap_vm(struct kvm *kvm); | 63 | void stage2_unmap_vm(struct kvm *kvm); |
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 6edd177bb1c7..2ba95d6fe852 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h | |||
| @@ -135,6 +135,15 @@ struct kvm_arch_memory_slot { | |||
| 135 | #define KVM_REG_ARM_CRM_SHIFT 7 | 135 | #define KVM_REG_ARM_CRM_SHIFT 7 |
| 136 | #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 | 136 | #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 |
| 137 | #define KVM_REG_ARM_32_CRN_SHIFT 11 | 137 | #define KVM_REG_ARM_32_CRN_SHIFT 11 |
| 138 | /* | ||
| 139 | * For KVM currently all guest registers are nonsecure, but we reserve a bit | ||
| 140 | * in the encoding to distinguish secure from nonsecure for AArch32 system | ||
| 141 | * registers that are banked by security. This is 1 for the secure banked | ||
| 142 | * register, and 0 for the nonsecure banked register or if the register is | ||
| 143 | * not banked by security. | ||
| 144 | */ | ||
| 145 | #define KVM_REG_ARM_SECURE_MASK 0x0000000010000000 | ||
| 146 | #define KVM_REG_ARM_SECURE_SHIFT 28 | ||
| 138 | 147 | ||
| 139 | #define ARM_CP15_REG_SHIFT_MASK(x,n) \ | 148 | #define ARM_CP15_REG_SHIFT_MASK(x,n) \ |
| 140 | (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) | 149 | (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) |
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 6d1d2e26dfe5..3a02e76699a6 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c | |||
| @@ -270,6 +270,60 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, | |||
| 270 | return true; | 270 | return true; |
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | static bool access_cntp_tval(struct kvm_vcpu *vcpu, | ||
| 274 | const struct coproc_params *p, | ||
| 275 | const struct coproc_reg *r) | ||
| 276 | { | ||
| 277 | u64 now = kvm_phys_timer_read(); | ||
| 278 | u64 val; | ||
| 279 | |||
| 280 | if (p->is_write) { | ||
| 281 | val = *vcpu_reg(vcpu, p->Rt1); | ||
| 282 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now); | ||
| 283 | } else { | ||
| 284 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | ||
| 285 | *vcpu_reg(vcpu, p->Rt1) = val - now; | ||
| 286 | } | ||
| 287 | |||
| 288 | return true; | ||
| 289 | } | ||
| 290 | |||
| 291 | static bool access_cntp_ctl(struct kvm_vcpu *vcpu, | ||
| 292 | const struct coproc_params *p, | ||
| 293 | const struct coproc_reg *r) | ||
| 294 | { | ||
| 295 | u32 val; | ||
| 296 | |||
| 297 | if (p->is_write) { | ||
| 298 | val = *vcpu_reg(vcpu, p->Rt1); | ||
| 299 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val); | ||
| 300 | } else { | ||
| 301 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); | ||
| 302 | *vcpu_reg(vcpu, p->Rt1) = val; | ||
| 303 | } | ||
| 304 | |||
| 305 | return true; | ||
| 306 | } | ||
| 307 | |||
| 308 | static bool access_cntp_cval(struct kvm_vcpu *vcpu, | ||
| 309 | const struct coproc_params *p, | ||
| 310 | const struct coproc_reg *r) | ||
| 311 | { | ||
| 312 | u64 val; | ||
| 313 | |||
| 314 | if (p->is_write) { | ||
| 315 | val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; | ||
| 316 | val |= *vcpu_reg(vcpu, p->Rt1); | ||
| 317 | kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val); | ||
| 318 | } else { | ||
| 319 | val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); | ||
| 320 | *vcpu_reg(vcpu, p->Rt1) = val; | ||
| 321 | *vcpu_reg(vcpu, p->Rt2) = val >> 32; | ||
| 322 | } | ||
| 323 | |||
| 324 | return true; | ||
| 325 | } | ||
| 326 | |||
| 273 | /* | 327 | /* |
| 274 | * We could trap ID_DFR0 and tell the guest we don't support performance | 328 | * We could trap ID_DFR0 and tell the guest we don't support performance |
| 275 | * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was | 329 | * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was |
| @@ -423,10 +477,17 @@ static const struct coproc_reg cp15_regs[] = { | |||
| 423 | { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32, | 477 | { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32, |
| 424 | NULL, reset_unknown, c13_TID_PRIV }, | 478 | NULL, reset_unknown, c13_TID_PRIV }, |
| 425 | 479 | ||
| 480 | /* CNTP */ | ||
| 481 | { CRm64(14), Op1( 2), is64, access_cntp_cval}, | ||
| 482 | |||
| 426 | /* CNTKCTL: swapped by interrupt.S. */ | 483 | /* CNTKCTL: swapped by interrupt.S. */ |
| 427 | { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, | 484 | { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, |
| 428 | NULL, reset_val, c14_CNTKCTL, 0x00000000 }, | 485 | NULL, reset_val, c14_CNTKCTL, 0x00000000 }, |
| 429 | 486 | ||
| 487 | /* CNTP */ | ||
| 488 | { CRn(14), CRm( 2), Op1( 0), Op2( 0), is32, access_cntp_tval }, | ||
| 489 | { CRn(14), CRm( 2), Op1( 0), Op2( 1), is32, access_cntp_ctl }, | ||
| 490 | |||
| 430 | /* The Configuration Base Address Register. */ | 491 | /* The Configuration Base Address Register. */ |
| 431 | { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, | 492 | { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, |
| 432 | }; | 493 | }; |
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index cdff963f133a..9046b53d87c1 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c | |||
| @@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num) | |||
| 142 | /* | 142 | /* |
| 143 | * Return the SPSR for the current mode of the virtual CPU. | 143 | * Return the SPSR for the current mode of the virtual CPU. |
| 144 | */ | 144 | */ |
| 145 | unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) | 145 | unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu) |
| 146 | { | 146 | { |
| 147 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; | 147 | unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK; |
| 148 | switch (mode) { | 148 | switch (mode) { |
| @@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) | |||
| 174 | */ | 174 | */ |
| 175 | void kvm_inject_vabt(struct kvm_vcpu *vcpu) | 175 | void kvm_inject_vabt(struct kvm_vcpu *vcpu) |
| 176 | { | 176 | { |
| 177 | vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA); | 177 | *vcpu_hcr(vcpu) |= HCR_VA; |
| 178 | } | 178 | } |
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 63d6b404d88e..7fc0638f263a 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile | |||
| @@ -9,7 +9,6 @@ KVM=../../../../virt/kvm | |||
| 9 | 9 | ||
| 10 | CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) | 10 | CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) |
| 11 | 11 | ||
| 12 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o | ||
| 13 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o | 12 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o |
| 14 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o | 13 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o |
| 15 | 14 | ||
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index ae45ae96aac2..acf1c37fa49c 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c | |||
| @@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host) | |||
| 44 | isb(); | 44 | isb(); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR); | 47 | write_sysreg(vcpu->arch.hcr, HCR); |
| 48 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ | 48 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ |
| 49 | write_sysreg(HSTR_T(15), HSTR); | 49 | write_sysreg(HSTR_T(15), HSTR); |
| 50 | write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); | 50 | write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR); |
| @@ -90,18 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) | |||
| 90 | 90 | ||
| 91 | static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) | 91 | static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) |
| 92 | { | 92 | { |
| 93 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | 93 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 94 | __vgic_v3_save_state(vcpu); | 94 | __vgic_v3_save_state(vcpu); |
| 95 | else | 95 | __vgic_v3_deactivate_traps(vcpu); |
| 96 | __vgic_v2_save_state(vcpu); | 96 | } |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) | 99 | static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) |
| 100 | { | 100 | { |
| 101 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | 101 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 102 | __vgic_v3_activate_traps(vcpu); | ||
| 102 | __vgic_v3_restore_state(vcpu); | 103 | __vgic_v3_restore_state(vcpu); |
| 103 | else | 104 | } |
| 104 | __vgic_v2_restore_state(vcpu); | ||
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) | 107 | static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) |
| @@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) | |||
| 154 | return true; | 154 | return true; |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) | 157 | int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) |
| 158 | { | 158 | { |
| 159 | struct kvm_cpu_context *host_ctxt; | 159 | struct kvm_cpu_context *host_ctxt; |
| 160 | struct kvm_cpu_context *guest_ctxt; | 160 | struct kvm_cpu_context *guest_ctxt; |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 177be0d1d090..eb2cf4938f6d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
| @@ -922,6 +922,22 @@ config HARDEN_BRANCH_PREDICTOR | |||
| 922 | 922 | ||
| 923 | If unsure, say Y. | 923 | If unsure, say Y. |
| 924 | 924 | ||
| 925 | config HARDEN_EL2_VECTORS | ||
| 926 | bool "Harden EL2 vector mapping against system register leak" if EXPERT | ||
| 927 | default y | ||
| 928 | help | ||
| 929 | Speculation attacks against some high-performance processors can | ||
| 930 | be used to leak privileged information such as the vector base | ||
| 931 | register, resulting in a potential defeat of the EL2 layout | ||
| 932 | randomization. | ||
| 933 | |||
| 934 | This config option will map the vectors to a fixed location, | ||
| 935 | independent of the EL2 code mapping, so that revealing VBAR_EL2 | ||
| 936 | to an attacker does not give away any extra information. This | ||
| 937 | only gets enabled on affected CPUs. | ||
| 938 | |||
| 939 | If unsure, say Y. | ||
| 940 | |||
| 925 | menuconfig ARMV8_DEPRECATED | 941 | menuconfig ARMV8_DEPRECATED |
| 926 | bool "Emulate deprecated/obsolete ARMv8 instructions" | 942 | bool "Emulate deprecated/obsolete ARMv8 instructions" |
| 927 | depends on COMPAT | 943 | depends on COMPAT |
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 669028172fd6..a91933b1e2e6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <asm/cpucaps.h> | 5 | #include <asm/cpucaps.h> |
| 6 | #include <asm/insn.h> | 6 | #include <asm/insn.h> |
| 7 | 7 | ||
| 8 | #define ARM64_CB_PATCH ARM64_NCAPS | ||
| 9 | |||
| 8 | #ifndef __ASSEMBLY__ | 10 | #ifndef __ASSEMBLY__ |
| 9 | 11 | ||
| 10 | #include <linux/init.h> | 12 | #include <linux/init.h> |
| @@ -22,12 +24,19 @@ struct alt_instr { | |||
| 22 | u8 alt_len; /* size of new instruction(s), <= orig_len */ | 24 | u8 alt_len; /* size of new instruction(s), <= orig_len */ |
| 23 | }; | 25 | }; |
| 24 | 26 | ||
| 27 | typedef void (*alternative_cb_t)(struct alt_instr *alt, | ||
| 28 | __le32 *origptr, __le32 *updptr, int nr_inst); | ||
| 29 | |||
| 25 | void __init apply_alternatives_all(void); | 30 | void __init apply_alternatives_all(void); |
| 26 | void apply_alternatives(void *start, size_t length); | 31 | void apply_alternatives(void *start, size_t length); |
| 27 | 32 | ||
| 28 | #define ALTINSTR_ENTRY(feature) \ | 33 | #define ALTINSTR_ENTRY(feature,cb) \ |
| 29 | " .word 661b - .\n" /* label */ \ | 34 | " .word 661b - .\n" /* label */ \ |
| 35 | " .if " __stringify(cb) " == 0\n" \ | ||
| 30 | " .word 663f - .\n" /* new instruction */ \ | 36 | " .word 663f - .\n" /* new instruction */ \ |
| 37 | " .else\n" \ | ||
| 38 | " .word " __stringify(cb) "- .\n" /* callback */ \ | ||
| 39 | " .endif\n" \ | ||
| 31 | " .hword " __stringify(feature) "\n" /* feature bit */ \ | 40 | " .hword " __stringify(feature) "\n" /* feature bit */ \ |
| 32 | " .byte 662b-661b\n" /* source len */ \ | 41 | " .byte 662b-661b\n" /* source len */ \ |
| 33 | " .byte 664f-663f\n" /* replacement len */ | 42 | " .byte 664f-663f\n" /* replacement len */ |
| @@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length); | |||
| 45 | * but most assemblers die if insn1 or insn2 have a .inst. This should | 54 | * but most assemblers die if insn1 or insn2 have a .inst. This should |
| 46 | * be fixed in a binutils release posterior to 2.25.51.0.2 (anything | 55 | * be fixed in a binutils release posterior to 2.25.51.0.2 (anything |
| 47 | * containing commit 4e4d08cf7399b606 or c1baaddf8861). | 56 | * containing commit 4e4d08cf7399b606 or c1baaddf8861). |
| 57 | * | ||
| 58 | * Alternatives with callbacks do not generate replacement instructions. | ||
| 48 | */ | 59 | */ |
| 49 | #define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ | 60 | #define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ |
| 50 | ".if "__stringify(cfg_enabled)" == 1\n" \ | 61 | ".if "__stringify(cfg_enabled)" == 1\n" \ |
| 51 | "661:\n\t" \ | 62 | "661:\n\t" \ |
| 52 | oldinstr "\n" \ | 63 | oldinstr "\n" \ |
| 53 | "662:\n" \ | 64 | "662:\n" \ |
| 54 | ".pushsection .altinstructions,\"a\"\n" \ | 65 | ".pushsection .altinstructions,\"a\"\n" \ |
| 55 | ALTINSTR_ENTRY(feature) \ | 66 | ALTINSTR_ENTRY(feature,cb) \ |
| 56 | ".popsection\n" \ | 67 | ".popsection\n" \ |
| 68 | " .if " __stringify(cb) " == 0\n" \ | ||
| 57 | ".pushsection .altinstr_replacement, \"a\"\n" \ | 69 | ".pushsection .altinstr_replacement, \"a\"\n" \ |
| 58 | "663:\n\t" \ | 70 | "663:\n\t" \ |
| 59 | newinstr "\n" \ | 71 | newinstr "\n" \ |
| @@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length); | |||
| 61 | ".popsection\n\t" \ | 73 | ".popsection\n\t" \ |
| 62 | ".org . - (664b-663b) + (662b-661b)\n\t" \ | 74 | ".org . - (664b-663b) + (662b-661b)\n\t" \ |
| 63 | ".org . - (662b-661b) + (664b-663b)\n" \ | 75 | ".org . - (662b-661b) + (664b-663b)\n" \ |
| 76 | ".else\n\t" \ | ||
| 77 | "663:\n\t" \ | ||
| 78 | "664:\n\t" \ | ||
| 79 | ".endif\n" \ | ||
| 64 | ".endif\n" | 80 | ".endif\n" |
| 65 | 81 | ||
| 66 | #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ | 82 | #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ |
| 67 | __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) | 83 | __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) |
| 68 | 84 | ||
| 85 | #define ALTERNATIVE_CB(oldinstr, cb) \ | ||
| 86 | __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) | ||
| 69 | #else | 87 | #else |
| 70 | 88 | ||
| 71 | #include <asm/assembler.h> | 89 | #include <asm/assembler.h> |
| @@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length); | |||
| 132 | 661: | 150 | 661: |
| 133 | .endm | 151 | .endm |
| 134 | 152 | ||
| 153 | .macro alternative_cb cb | ||
| 154 | .set .Lasm_alt_mode, 0 | ||
| 155 | .pushsection .altinstructions, "a" | ||
| 156 | altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 | ||
| 157 | .popsection | ||
| 158 | 661: | ||
| 159 | .endm | ||
| 160 | |||
| 135 | /* | 161 | /* |
| 136 | * Provide the other half of the alternative code sequence. | 162 | * Provide the other half of the alternative code sequence. |
| 137 | */ | 163 | */ |
| @@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length); | |||
| 158 | .endm | 184 | .endm |
| 159 | 185 | ||
| 160 | /* | 186 | /* |
| 187 | * Callback-based alternative epilogue | ||
| 188 | */ | ||
| 189 | .macro alternative_cb_end | ||
| 190 | 662: | ||
| 191 | .endm | ||
| 192 | |||
| 193 | /* | ||
| 161 | * Provides a trivial alternative or default sequence consisting solely | 194 | * Provides a trivial alternative or default sequence consisting solely |
| 162 | * of NOPs. The number of NOPs is chosen automatically to match the | 195 | * of NOPs. The number of NOPs is chosen automatically to match the |
| 163 | * previous case. | 196 | * previous case. |
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 21bb624e0a7a..a311880feb0f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | #define ARM64_HAS_VIRT_HOST_EXTN 11 | 32 | #define ARM64_HAS_VIRT_HOST_EXTN 11 |
| 33 | #define ARM64_WORKAROUND_CAVIUM_27456 12 | 33 | #define ARM64_WORKAROUND_CAVIUM_27456 12 |
| 34 | #define ARM64_HAS_32BIT_EL0 13 | 34 | #define ARM64_HAS_32BIT_EL0 13 |
| 35 | #define ARM64_HYP_OFFSET_LOW 14 | 35 | #define ARM64_HARDEN_EL2_VECTORS 14 |
| 36 | #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 | 36 | #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 |
| 37 | #define ARM64_HAS_NO_FPSIMD 16 | 37 | #define ARM64_HAS_NO_FPSIMD 16 |
| 38 | #define ARM64_WORKAROUND_REPEAT_TLBI 17 | 38 | #define ARM64_WORKAROUND_REPEAT_TLBI 17 |
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 4214c38d016b..f62c56b1793f 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h | |||
| @@ -70,6 +70,7 @@ enum aarch64_insn_imm_type { | |||
| 70 | AARCH64_INSN_IMM_6, | 70 | AARCH64_INSN_IMM_6, |
| 71 | AARCH64_INSN_IMM_S, | 71 | AARCH64_INSN_IMM_S, |
| 72 | AARCH64_INSN_IMM_R, | 72 | AARCH64_INSN_IMM_R, |
| 73 | AARCH64_INSN_IMM_N, | ||
| 73 | AARCH64_INSN_IMM_MAX | 74 | AARCH64_INSN_IMM_MAX |
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| @@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000) | |||
| 314 | __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) | 315 | __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) |
| 315 | __AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000) | 316 | __AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000) |
| 316 | __AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000) | 317 | __AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000) |
| 318 | __AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000) | ||
| 319 | __AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000) | ||
| 320 | __AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000) | ||
| 321 | __AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000) | ||
| 322 | __AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000) | ||
| 317 | __AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) | 323 | __AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000) |
| 318 | __AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) | 324 | __AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000) |
| 319 | __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) | 325 | __AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000) |
| @@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, | |||
| 423 | int shift, | 429 | int shift, |
| 424 | enum aarch64_insn_variant variant, | 430 | enum aarch64_insn_variant variant, |
| 425 | enum aarch64_insn_logic_type type); | 431 | enum aarch64_insn_logic_type type); |
| 432 | u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, | ||
| 433 | enum aarch64_insn_variant variant, | ||
| 434 | enum aarch64_insn_register Rn, | ||
| 435 | enum aarch64_insn_register Rd, | ||
| 436 | u64 imm); | ||
| 437 | u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, | ||
| 438 | enum aarch64_insn_register Rm, | ||
| 439 | enum aarch64_insn_register Rn, | ||
| 440 | enum aarch64_insn_register Rd, | ||
| 441 | u8 lsb); | ||
| 426 | u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, | 442 | u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, |
| 427 | enum aarch64_insn_prfm_type type, | 443 | enum aarch64_insn_prfm_type type, |
| 428 | enum aarch64_insn_prfm_target target, | 444 | enum aarch64_insn_prfm_target target, |
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index b0c84171e6a3..6dd285e979c9 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | /* Hyp Configuration Register (HCR) bits */ | 25 | /* Hyp Configuration Register (HCR) bits */ |
| 26 | #define HCR_TEA (UL(1) << 37) | 26 | #define HCR_TEA (UL(1) << 37) |
| 27 | #define HCR_TERR (UL(1) << 36) | 27 | #define HCR_TERR (UL(1) << 36) |
| 28 | #define HCR_TLOR (UL(1) << 35) | ||
| 28 | #define HCR_E2H (UL(1) << 34) | 29 | #define HCR_E2H (UL(1) << 34) |
| 29 | #define HCR_ID (UL(1) << 33) | 30 | #define HCR_ID (UL(1) << 33) |
| 30 | #define HCR_CD (UL(1) << 32) | 31 | #define HCR_CD (UL(1) << 32) |
| @@ -64,6 +65,7 @@ | |||
| 64 | 65 | ||
| 65 | /* | 66 | /* |
| 66 | * The bits we set in HCR: | 67 | * The bits we set in HCR: |
| 68 | * TLOR: Trap LORegion register accesses | ||
| 67 | * RW: 64bit by default, can be overridden for 32bit VMs | 69 | * RW: 64bit by default, can be overridden for 32bit VMs |
| 68 | * TAC: Trap ACTLR | 70 | * TAC: Trap ACTLR |
| 69 | * TSC: Trap SMC | 71 | * TSC: Trap SMC |
| @@ -81,9 +83,9 @@ | |||
| 81 | */ | 83 | */ |
| 82 | #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ | 84 | #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ |
| 83 | HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ | 85 | HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ |
| 84 | HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) | 86 | HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ |
| 87 | HCR_FMO | HCR_IMO) | ||
| 85 | #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) | 88 | #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) |
| 86 | #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) | ||
| 87 | #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) | 89 | #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) |
| 88 | 90 | ||
| 89 | /* TCR_EL2 Registers bits */ | 91 | /* TCR_EL2 Registers bits */ |
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 24961b732e65..d53d40704416 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 | 33 | #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 |
| 34 | #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) | 34 | #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) |
| 35 | 35 | ||
| 36 | /* Translate a kernel address of @sym into its equivalent linear mapping */ | ||
| 36 | #define kvm_ksym_ref(sym) \ | 37 | #define kvm_ksym_ref(sym) \ |
| 37 | ({ \ | 38 | ({ \ |
| 38 | void *val = &sym; \ | 39 | void *val = &sym; \ |
| @@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); | |||
| 57 | 58 | ||
| 58 | extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); | 59 | extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); |
| 59 | 60 | ||
| 60 | extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); | 61 | extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); |
| 62 | |||
| 63 | extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); | ||
| 61 | 64 | ||
| 62 | extern u64 __vgic_v3_get_ich_vtr_el2(void); | 65 | extern u64 __vgic_v3_get_ich_vtr_el2(void); |
| 63 | extern u64 __vgic_v3_read_vmcr(void); | 66 | extern u64 __vgic_v3_read_vmcr(void); |
| @@ -70,6 +73,20 @@ extern u32 __init_stage2_translation(void); | |||
| 70 | 73 | ||
| 71 | extern void __qcom_hyp_sanitize_btac_predictors(void); | 74 | extern void __qcom_hyp_sanitize_btac_predictors(void); |
| 72 | 75 | ||
| 76 | #else /* __ASSEMBLY__ */ | ||
| 77 | |||
| 78 | .macro get_host_ctxt reg, tmp | ||
| 79 | adr_l \reg, kvm_host_cpu_state | ||
| 80 | mrs \tmp, tpidr_el2 | ||
| 81 | add \reg, \reg, \tmp | ||
| 82 | .endm | ||
| 83 | |||
| 84 | .macro get_vcpu_ptr vcpu, ctxt | ||
| 85 | get_host_ctxt \ctxt, \vcpu | ||
| 86 | ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] | ||
| 87 | kern_hyp_va \vcpu | ||
| 88 | .endm | ||
| 89 | |||
| 73 | #endif | 90 | #endif |
| 74 | 91 | ||
| 75 | #endif /* __ARM_KVM_ASM_H__ */ | 92 | #endif /* __ARM_KVM_ASM_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 413dc82b1e89..23b33e8ea03a 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
| @@ -26,13 +26,15 @@ | |||
| 26 | 26 | ||
| 27 | #include <asm/esr.h> | 27 | #include <asm/esr.h> |
| 28 | #include <asm/kvm_arm.h> | 28 | #include <asm/kvm_arm.h> |
| 29 | #include <asm/kvm_hyp.h> | ||
| 29 | #include <asm/kvm_mmio.h> | 30 | #include <asm/kvm_mmio.h> |
| 30 | #include <asm/ptrace.h> | 31 | #include <asm/ptrace.h> |
| 31 | #include <asm/cputype.h> | 32 | #include <asm/cputype.h> |
| 32 | #include <asm/virt.h> | 33 | #include <asm/virt.h> |
| 33 | 34 | ||
| 34 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); | 35 | unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); |
| 35 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); | 36 | unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); |
| 37 | void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); | ||
| 36 | 38 | ||
| 37 | bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); | 39 | bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); |
| 38 | void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); | 40 | void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); |
| @@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu); | |||
| 45 | void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); | 47 | void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); |
| 46 | void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); | 48 | void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); |
| 47 | 49 | ||
| 50 | static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) | ||
| 51 | { | ||
| 52 | return !(vcpu->arch.hcr_el2 & HCR_RW); | ||
| 53 | } | ||
| 54 | |||
| 48 | static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | 55 | static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) |
| 49 | { | 56 | { |
| 50 | vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; | 57 | vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS; |
| @@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) | |||
| 59 | 66 | ||
| 60 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) | 67 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) |
| 61 | vcpu->arch.hcr_el2 &= ~HCR_RW; | 68 | vcpu->arch.hcr_el2 &= ~HCR_RW; |
| 62 | } | ||
| 63 | 69 | ||
| 64 | static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) | 70 | /* |
| 65 | { | 71 | * TID3: trap feature register accesses that we virtualise. |
| 66 | return vcpu->arch.hcr_el2; | 72 | * For now this is conditional, since no AArch32 feature regs |
| 73 | * are currently virtualised. | ||
| 74 | */ | ||
| 75 | if (!vcpu_el1_is_32bit(vcpu)) | ||
| 76 | vcpu->arch.hcr_el2 |= HCR_TID3; | ||
| 67 | } | 77 | } |
| 68 | 78 | ||
| 69 | static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) | 79 | static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) |
| 70 | { | 80 | { |
| 71 | vcpu->arch.hcr_el2 = hcr; | 81 | return (unsigned long *)&vcpu->arch.hcr_el2; |
| 72 | } | 82 | } |
| 73 | 83 | ||
| 74 | static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) | 84 | static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr) |
| @@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) | |||
| 81 | return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; | 91 | return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; |
| 82 | } | 92 | } |
| 83 | 93 | ||
| 84 | static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu) | 94 | static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) |
| 85 | { | 95 | { |
| 86 | return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; | 96 | return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1; |
| 87 | } | 97 | } |
| 88 | 98 | ||
| 99 | static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) | ||
| 100 | { | ||
| 101 | if (vcpu->arch.sysregs_loaded_on_cpu) | ||
| 102 | return read_sysreg_el1(elr); | ||
| 103 | else | ||
| 104 | return *__vcpu_elr_el1(vcpu); | ||
| 105 | } | ||
| 106 | |||
| 107 | static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) | ||
| 108 | { | ||
| 109 | if (vcpu->arch.sysregs_loaded_on_cpu) | ||
| 110 | write_sysreg_el1(v, elr); | ||
| 111 | else | ||
| 112 | *__vcpu_elr_el1(vcpu) = v; | ||
| 113 | } | ||
| 114 | |||
| 89 | static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) | 115 | static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) |
| 90 | { | 116 | { |
| 91 | return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; | 117 | return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate; |
| @@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, | |||
| 135 | vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; | 161 | vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; |
| 136 | } | 162 | } |
| 137 | 163 | ||
| 138 | /* Get vcpu SPSR for current mode */ | 164 | static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) |
| 139 | static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) | ||
| 140 | { | 165 | { |
| 141 | if (vcpu_mode_is_32bit(vcpu)) | 166 | if (vcpu_mode_is_32bit(vcpu)) |
| 142 | return vcpu_spsr32(vcpu); | 167 | return vcpu_read_spsr32(vcpu); |
| 143 | 168 | ||
| 144 | return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; | 169 | if (vcpu->arch.sysregs_loaded_on_cpu) |
| 170 | return read_sysreg_el1(spsr); | ||
| 171 | else | ||
| 172 | return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; | ||
| 173 | } | ||
| 174 | |||
| 175 | static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) | ||
| 176 | { | ||
| 177 | if (vcpu_mode_is_32bit(vcpu)) { | ||
| 178 | vcpu_write_spsr32(vcpu, v); | ||
| 179 | return; | ||
| 180 | } | ||
| 181 | |||
| 182 | if (vcpu->arch.sysregs_loaded_on_cpu) | ||
| 183 | write_sysreg_el1(v, spsr); | ||
| 184 | else | ||
| 185 | vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; | ||
| 145 | } | 186 | } |
| 146 | 187 | ||
| 147 | static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) | 188 | static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) |
| @@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) | |||
| 282 | 323 | ||
| 283 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) | 324 | static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) |
| 284 | { | 325 | { |
| 285 | return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; | 326 | return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; |
| 286 | } | 327 | } |
| 287 | 328 | ||
| 288 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) | 329 | static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) |
| 289 | { | 330 | { |
| 290 | if (vcpu_mode_is_32bit(vcpu)) | 331 | if (vcpu_mode_is_32bit(vcpu)) { |
| 291 | *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; | 332 | *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT; |
| 292 | else | 333 | } else { |
| 293 | vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25); | 334 | u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); |
| 335 | sctlr |= (1 << 25); | ||
| 336 | vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr); | ||
| 337 | } | ||
| 294 | } | 338 | } |
| 295 | 339 | ||
| 296 | static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) | 340 | static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) |
| @@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) | |||
| 298 | if (vcpu_mode_is_32bit(vcpu)) | 342 | if (vcpu_mode_is_32bit(vcpu)) |
| 299 | return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); | 343 | return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT); |
| 300 | 344 | ||
| 301 | return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); | 345 | return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); |
| 302 | } | 346 | } |
| 303 | 347 | ||
| 304 | static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, | 348 | static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 596f8e414a4c..ab46bc70add6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
| @@ -272,9 +272,6 @@ struct kvm_vcpu_arch { | |||
| 272 | /* IO related fields */ | 272 | /* IO related fields */ |
| 273 | struct kvm_decode mmio_decode; | 273 | struct kvm_decode mmio_decode; |
| 274 | 274 | ||
| 275 | /* Interrupt related fields */ | ||
| 276 | u64 irq_lines; /* IRQ and FIQ levels */ | ||
| 277 | |||
| 278 | /* Cache some mmu pages needed inside spinlock regions */ | 275 | /* Cache some mmu pages needed inside spinlock regions */ |
| 279 | struct kvm_mmu_memory_cache mmu_page_cache; | 276 | struct kvm_mmu_memory_cache mmu_page_cache; |
| 280 | 277 | ||
| @@ -287,10 +284,25 @@ struct kvm_vcpu_arch { | |||
| 287 | 284 | ||
| 288 | /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ | 285 | /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ |
| 289 | u64 vsesr_el2; | 286 | u64 vsesr_el2; |
| 287 | |||
| 288 | /* True when deferrable sysregs are loaded on the physical CPU, | ||
| 289 | * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ | ||
| 290 | bool sysregs_loaded_on_cpu; | ||
| 290 | }; | 291 | }; |
| 291 | 292 | ||
| 292 | #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) | 293 | #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) |
| 293 | #define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) | 294 | |
| 295 | /* | ||
| 296 | * Only use __vcpu_sys_reg if you know you want the memory backed version of a | ||
| 297 | * register, and not the one most recently accessed by a running VCPU. For | ||
| 298 | * example, for userspace access or for system registers that are never context | ||
| 299 | * switched, but only emulated. | ||
| 300 | */ | ||
| 301 | #define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)]) | ||
| 302 | |||
| 303 | u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg); | ||
| 304 | void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); | ||
| 305 | |||
| 294 | /* | 306 | /* |
| 295 | * CP14 and CP15 live in the same array, as they are backed by the | 307 | * CP14 and CP15 live in the same array, as they are backed by the |
| 296 | * same system registers. | 308 | * same system registers. |
| @@ -298,14 +310,6 @@ struct kvm_vcpu_arch { | |||
| 298 | #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) | 310 | #define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) |
| 299 | #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) | 311 | #define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) |
| 300 | 312 | ||
| 301 | #ifdef CONFIG_CPU_BIG_ENDIAN | ||
| 302 | #define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r)) | ||
| 303 | #define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1) | ||
| 304 | #else | ||
| 305 | #define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1) | ||
| 306 | #define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r)) | ||
| 307 | #endif | ||
| 308 | |||
| 309 | struct kvm_vm_stat { | 313 | struct kvm_vm_stat { |
| 310 | ulong remote_tlb_flush; | 314 | ulong remote_tlb_flush; |
| 311 | }; | 315 | }; |
| @@ -358,10 +362,15 @@ int kvm_perf_teardown(void); | |||
| 358 | 362 | ||
| 359 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | 363 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); |
| 360 | 364 | ||
| 365 | void __kvm_set_tpidr_el2(u64 tpidr_el2); | ||
| 366 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); | ||
| 367 | |||
| 361 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | 368 | static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, |
| 362 | unsigned long hyp_stack_ptr, | 369 | unsigned long hyp_stack_ptr, |
| 363 | unsigned long vector_ptr) | 370 | unsigned long vector_ptr) |
| 364 | { | 371 | { |
| 372 | u64 tpidr_el2; | ||
| 373 | |||
| 365 | /* | 374 | /* |
| 366 | * Call initialization code, and switch to the full blown HYP code. | 375 | * Call initialization code, and switch to the full blown HYP code. |
| 367 | * If the cpucaps haven't been finalized yet, something has gone very | 376 | * If the cpucaps haven't been finalized yet, something has gone very |
| @@ -370,6 +379,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | |||
| 370 | */ | 379 | */ |
| 371 | BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); | 380 | BUG_ON(!static_branch_likely(&arm64_const_caps_ready)); |
| 372 | __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); | 381 | __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); |
| 382 | |||
| 383 | /* | ||
| 384 | * Calculate the raw per-cpu offset without a translation from the | ||
| 385 | * kernel's mapping to the linear mapping, and store it in tpidr_el2 | ||
| 386 | * so that we can use adr_l to access per-cpu variables in EL2. | ||
| 387 | */ | ||
| 388 | tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state) | ||
| 389 | - (u64)kvm_ksym_ref(kvm_host_cpu_state); | ||
| 390 | |||
| 391 | kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); | ||
| 373 | } | 392 | } |
| 374 | 393 | ||
| 375 | static inline void kvm_arch_hardware_unsetup(void) {} | 394 | static inline void kvm_arch_hardware_unsetup(void) {} |
| @@ -416,6 +435,13 @@ static inline void kvm_arm_vhe_guest_enter(void) | |||
| 416 | static inline void kvm_arm_vhe_guest_exit(void) | 435 | static inline void kvm_arm_vhe_guest_exit(void) |
| 417 | { | 436 | { |
| 418 | local_daif_restore(DAIF_PROCCTX_NOIRQ); | 437 | local_daif_restore(DAIF_PROCCTX_NOIRQ); |
| 438 | |||
| 439 | /* | ||
| 440 | * When we exit from the guest we change a number of CPU configuration | ||
| 441 | * parameters, such as traps. Make sure these changes take effect | ||
| 442 | * before running the host or additional guests. | ||
| 443 | */ | ||
| 444 | isb(); | ||
| 419 | } | 445 | } |
| 420 | 446 | ||
| 421 | static inline bool kvm_arm_harden_branch_predictor(void) | 447 | static inline bool kvm_arm_harden_branch_predictor(void) |
| @@ -423,4 +449,7 @@ static inline bool kvm_arm_harden_branch_predictor(void) | |||
| 423 | return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); | 449 | return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); |
| 424 | } | 450 | } |
| 425 | 451 | ||
| 452 | void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); | ||
| 453 | void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); | ||
| 454 | |||
| 426 | #endif /* __ARM64_KVM_HOST_H__ */ | 455 | #endif /* __ARM64_KVM_HOST_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index f26f9cd70c72..384c34397619 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
| @@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \ | |||
| 120 | return val; \ | 120 | return val; \ |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | void __vgic_v2_save_state(struct kvm_vcpu *vcpu); | ||
| 124 | void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); | ||
| 125 | int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); | 123 | int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); |
| 126 | 124 | ||
| 127 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); | 125 | void __vgic_v3_save_state(struct kvm_vcpu *vcpu); |
| 128 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); | 126 | void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); |
| 127 | void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); | ||
| 128 | void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); | ||
| 129 | void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); | ||
| 130 | void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); | ||
| 129 | int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); | 131 | int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); |
| 130 | 132 | ||
| 131 | void __timer_enable_traps(struct kvm_vcpu *vcpu); | 133 | void __timer_enable_traps(struct kvm_vcpu *vcpu); |
| 132 | void __timer_disable_traps(struct kvm_vcpu *vcpu); | 134 | void __timer_disable_traps(struct kvm_vcpu *vcpu); |
| 133 | 135 | ||
| 134 | void __sysreg_save_host_state(struct kvm_cpu_context *ctxt); | 136 | void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt); |
| 135 | void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt); | 137 | void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt); |
| 136 | void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt); | 138 | void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt); |
| 137 | void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt); | 139 | void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt); |
| 140 | void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt); | ||
| 141 | void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); | ||
| 138 | void __sysreg32_save_state(struct kvm_vcpu *vcpu); | 142 | void __sysreg32_save_state(struct kvm_vcpu *vcpu); |
| 139 | void __sysreg32_restore_state(struct kvm_vcpu *vcpu); | 143 | void __sysreg32_restore_state(struct kvm_vcpu *vcpu); |
| 140 | 144 | ||
| 141 | void __debug_save_state(struct kvm_vcpu *vcpu, | 145 | void __debug_switch_to_guest(struct kvm_vcpu *vcpu); |
| 142 | struct kvm_guest_debug_arch *dbg, | 146 | void __debug_switch_to_host(struct kvm_vcpu *vcpu); |
| 143 | struct kvm_cpu_context *ctxt); | ||
| 144 | void __debug_restore_state(struct kvm_vcpu *vcpu, | ||
| 145 | struct kvm_guest_debug_arch *dbg, | ||
| 146 | struct kvm_cpu_context *ctxt); | ||
| 147 | void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); | ||
| 148 | void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); | ||
| 149 | 147 | ||
| 150 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); | 148 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); |
| 151 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); | 149 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); |
| 152 | bool __fpsimd_enabled(void); | 150 | bool __fpsimd_enabled(void); |
| 153 | 151 | ||
| 152 | void activate_traps_vhe_load(struct kvm_vcpu *vcpu); | ||
| 153 | void deactivate_traps_vhe_put(void); | ||
| 154 | |||
| 154 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); | 155 | u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); |
| 155 | void __noreturn __hyp_do_panic(unsigned long, ...); | 156 | void __noreturn __hyp_do_panic(unsigned long, ...); |
| 156 | 157 | ||
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7faed6e48b46..082110993647 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h | |||
| @@ -69,9 +69,6 @@ | |||
| 69 | * mappings, and none of this applies in that case. | 69 | * mappings, and none of this applies in that case. |
| 70 | */ | 70 | */ |
| 71 | 71 | ||
| 72 | #define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1) | ||
| 73 | #define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1) | ||
| 74 | |||
| 75 | #ifdef __ASSEMBLY__ | 72 | #ifdef __ASSEMBLY__ |
| 76 | 73 | ||
| 77 | #include <asm/alternative.h> | 74 | #include <asm/alternative.h> |
| @@ -81,28 +78,19 @@ | |||
| 81 | * Convert a kernel VA into a HYP VA. | 78 | * Convert a kernel VA into a HYP VA. |
| 82 | * reg: VA to be converted. | 79 | * reg: VA to be converted. |
| 83 | * | 80 | * |
| 84 | * This generates the following sequences: | 81 | * The actual code generation takes place in kvm_update_va_mask, and |
| 85 | * - High mask: | 82 | * the instructions below are only there to reserve the space and |
| 86 | * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK | 83 | * perform the register allocation (kvm_update_va_mask uses the |
| 87 | * nop | 84 | * specific registers encoded in the instructions). |
| 88 | * - Low mask: | ||
| 89 | * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK | ||
| 90 | * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK | ||
| 91 | * - VHE: | ||
| 92 | * nop | ||
| 93 | * nop | ||
| 94 | * | ||
| 95 | * The "low mask" version works because the mask is a strict subset of | ||
| 96 | * the "high mask", hence performing the first mask for nothing. | ||
| 97 | * Should be completely invisible on any viable CPU. | ||
| 98 | */ | 85 | */ |
| 99 | .macro kern_hyp_va reg | 86 | .macro kern_hyp_va reg |
| 100 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | 87 | alternative_cb kvm_update_va_mask |
| 101 | and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK | 88 | and \reg, \reg, #1 /* mask with va_mask */ |
| 102 | alternative_else_nop_endif | 89 | ror \reg, \reg, #1 /* rotate to the first tag bit */ |
| 103 | alternative_if ARM64_HYP_OFFSET_LOW | 90 | add \reg, \reg, #0 /* insert the low 12 bits of the tag */ |
| 104 | and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK | 91 | add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ |
| 105 | alternative_else_nop_endif | 92 | ror \reg, \reg, #63 /* rotate back */ |
| 93 | alternative_cb_end | ||
| 106 | .endm | 94 | .endm |
| 107 | 95 | ||
| 108 | #else | 96 | #else |
| @@ -113,24 +101,44 @@ alternative_else_nop_endif | |||
| 113 | #include <asm/mmu_context.h> | 101 | #include <asm/mmu_context.h> |
| 114 | #include <asm/pgtable.h> | 102 | #include <asm/pgtable.h> |
| 115 | 103 | ||
| 104 | void kvm_update_va_mask(struct alt_instr *alt, | ||
| 105 | __le32 *origptr, __le32 *updptr, int nr_inst); | ||
| 106 | |||
| 116 | static inline unsigned long __kern_hyp_va(unsigned long v) | 107 | static inline unsigned long __kern_hyp_va(unsigned long v) |
| 117 | { | 108 | { |
| 118 | asm volatile(ALTERNATIVE("and %0, %0, %1", | 109 | asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" |
| 119 | "nop", | 110 | "ror %0, %0, #1\n" |
| 120 | ARM64_HAS_VIRT_HOST_EXTN) | 111 | "add %0, %0, #0\n" |
| 121 | : "+r" (v) | 112 | "add %0, %0, #0, lsl 12\n" |
| 122 | : "i" (HYP_PAGE_OFFSET_HIGH_MASK)); | 113 | "ror %0, %0, #63\n", |
| 123 | asm volatile(ALTERNATIVE("nop", | 114 | kvm_update_va_mask) |
| 124 | "and %0, %0, %1", | 115 | : "+r" (v)); |
| 125 | ARM64_HYP_OFFSET_LOW) | ||
| 126 | : "+r" (v) | ||
| 127 | : "i" (HYP_PAGE_OFFSET_LOW_MASK)); | ||
| 128 | return v; | 116 | return v; |
| 129 | } | 117 | } |
| 130 | 118 | ||
| 131 | #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) | 119 | #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) |
| 132 | 120 | ||
| 133 | /* | 121 | /* |
| 122 | * Obtain the PC-relative address of a kernel symbol | ||
| 123 | * s: symbol | ||
| 124 | * | ||
| 125 | * The goal of this macro is to return a symbol's address based on a | ||
| 126 | * PC-relative computation, as opposed to a loading the VA from a | ||
| 127 | * constant pool or something similar. This works well for HYP, as an | ||
| 128 | * absolute VA is guaranteed to be wrong. Only use this if trying to | ||
| 129 | * obtain the address of a symbol (i.e. not something you obtained by | ||
| 130 | * following a pointer). | ||
| 131 | */ | ||
| 132 | #define hyp_symbol_addr(s) \ | ||
| 133 | ({ \ | ||
| 134 | typeof(s) *addr; \ | ||
| 135 | asm("adrp %0, %1\n" \ | ||
| 136 | "add %0, %0, :lo12:%1\n" \ | ||
| 137 | : "=r" (addr) : "S" (&s)); \ | ||
| 138 | addr; \ | ||
| 139 | }) | ||
| 140 | |||
| 141 | /* | ||
| 134 | * We currently only support a 40bit IPA. | 142 | * We currently only support a 40bit IPA. |
| 135 | */ | 143 | */ |
| 136 | #define KVM_PHYS_SHIFT (40) | 144 | #define KVM_PHYS_SHIFT (40) |
| @@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v) | |||
| 140 | #include <asm/stage2_pgtable.h> | 148 | #include <asm/stage2_pgtable.h> |
| 141 | 149 | ||
| 142 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); | 150 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); |
| 143 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); | 151 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, |
| 152 | void __iomem **kaddr, | ||
| 153 | void __iomem **haddr); | ||
| 154 | int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | ||
| 155 | void **haddr); | ||
| 144 | void free_hyp_pgds(void); | 156 | void free_hyp_pgds(void); |
| 145 | 157 | ||
| 146 | void stage2_unmap_vm(struct kvm *kvm); | 158 | void stage2_unmap_vm(struct kvm *kvm); |
| @@ -249,7 +261,7 @@ struct kvm; | |||
| 249 | 261 | ||
| 250 | static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) | 262 | static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) |
| 251 | { | 263 | { |
| 252 | return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; | 264 | return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; |
| 253 | } | 265 | } |
| 254 | 266 | ||
| 255 | static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) | 267 | static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) |
| @@ -348,36 +360,95 @@ static inline unsigned int kvm_get_vmid_bits(void) | |||
| 348 | return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; | 360 | return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; |
| 349 | } | 361 | } |
| 350 | 362 | ||
| 351 | #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | 363 | #ifdef CONFIG_KVM_INDIRECT_VECTORS |
| 364 | /* | ||
| 365 | * EL2 vectors can be mapped and rerouted in a number of ways, | ||
| 366 | * depending on the kernel configuration and CPU present: | ||
| 367 | * | ||
| 368 | * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the | ||
| 369 | * hardening sequence is placed in one of the vector slots, which is | ||
| 370 | * executed before jumping to the real vectors. | ||
| 371 | * | ||
| 372 | * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the | ||
| 373 | * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the | ||
| 374 | * hardening sequence is mapped next to the idmap page, and executed | ||
| 375 | * before jumping to the real vectors. | ||
| 376 | * | ||
| 377 | * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an | ||
| 378 | * empty slot is selected, mapped next to the idmap page, and | ||
| 379 | * executed before jumping to the real vectors. | ||
| 380 | * | ||
| 381 | * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with | ||
| 382 | * VHE, as we don't have hypervisor-specific mappings. If the system | ||
| 383 | * is VHE and yet selects this capability, it will be ignored. | ||
| 384 | */ | ||
| 352 | #include <asm/mmu.h> | 385 | #include <asm/mmu.h> |
| 353 | 386 | ||
| 387 | extern void *__kvm_bp_vect_base; | ||
| 388 | extern int __kvm_harden_el2_vector_slot; | ||
| 389 | |||
| 354 | static inline void *kvm_get_hyp_vector(void) | 390 | static inline void *kvm_get_hyp_vector(void) |
| 355 | { | 391 | { |
| 356 | struct bp_hardening_data *data = arm64_get_bp_hardening_data(); | 392 | struct bp_hardening_data *data = arm64_get_bp_hardening_data(); |
| 357 | void *vect = kvm_ksym_ref(__kvm_hyp_vector); | 393 | void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); |
| 394 | int slot = -1; | ||
| 358 | 395 | ||
| 359 | if (data->fn) { | 396 | if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { |
| 360 | vect = __bp_harden_hyp_vecs_start + | 397 | vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start)); |
| 361 | data->hyp_vectors_slot * SZ_2K; | 398 | slot = data->hyp_vectors_slot; |
| 399 | } | ||
| 362 | 400 | ||
| 363 | if (!has_vhe()) | 401 | if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { |
| 364 | vect = lm_alias(vect); | 402 | vect = __kvm_bp_vect_base; |
| 403 | if (slot == -1) | ||
| 404 | slot = __kvm_harden_el2_vector_slot; | ||
| 365 | } | 405 | } |
| 366 | 406 | ||
| 407 | if (slot != -1) | ||
| 408 | vect += slot * SZ_2K; | ||
| 409 | |||
| 367 | return vect; | 410 | return vect; |
| 368 | } | 411 | } |
| 369 | 412 | ||
| 413 | /* This is only called on a !VHE system */ | ||
| 370 | static inline int kvm_map_vectors(void) | 414 | static inline int kvm_map_vectors(void) |
| 371 | { | 415 | { |
| 372 | return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start), | 416 | /* |
| 373 | kvm_ksym_ref(__bp_harden_hyp_vecs_end), | 417 | * HBP = ARM64_HARDEN_BRANCH_PREDICTOR |
| 374 | PAGE_HYP_EXEC); | 418 | * HEL2 = ARM64_HARDEN_EL2_VECTORS |
| 375 | } | 419 | * |
| 420 | * !HBP + !HEL2 -> use direct vectors | ||
| 421 | * HBP + !HEL2 -> use hardened vectors in place | ||
| 422 | * !HBP + HEL2 -> allocate one vector slot and use exec mapping | ||
| 423 | * HBP + HEL2 -> use hardened vertors and use exec mapping | ||
| 424 | */ | ||
| 425 | if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { | ||
| 426 | __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start); | ||
| 427 | __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); | ||
| 428 | } | ||
| 429 | |||
| 430 | if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { | ||
| 431 | phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start); | ||
| 432 | unsigned long size = (__bp_harden_hyp_vecs_end - | ||
| 433 | __bp_harden_hyp_vecs_start); | ||
| 434 | |||
| 435 | /* | ||
| 436 | * Always allocate a spare vector slot, as we don't | ||
| 437 | * know yet which CPUs have a BP hardening slot that | ||
| 438 | * we can reuse. | ||
| 439 | */ | ||
| 440 | __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); | ||
| 441 | BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); | ||
| 442 | return create_hyp_exec_mappings(vect_pa, size, | ||
| 443 | &__kvm_bp_vect_base); | ||
| 444 | } | ||
| 376 | 445 | ||
| 446 | return 0; | ||
| 447 | } | ||
| 377 | #else | 448 | #else |
| 378 | static inline void *kvm_get_hyp_vector(void) | 449 | static inline void *kvm_get_hyp_vector(void) |
| 379 | { | 450 | { |
| 380 | return kvm_ksym_ref(__kvm_hyp_vector); | 451 | return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); |
| 381 | } | 452 | } |
| 382 | 453 | ||
| 383 | static inline int kvm_map_vectors(void) | 454 | static inline int kvm_map_vectors(void) |
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a050d4f3615d..dd320df0d026 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) | 21 | #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) |
| 22 | #define TTBR_ASID_MASK (UL(0xffff) << 48) | 22 | #define TTBR_ASID_MASK (UL(0xffff) << 48) |
| 23 | 23 | ||
| 24 | #define BP_HARDEN_EL2_SLOTS 4 | ||
| 25 | |||
| 24 | #ifndef __ASSEMBLY__ | 26 | #ifndef __ASSEMBLY__ |
| 25 | 27 | ||
| 26 | typedef struct { | 28 | typedef struct { |
| @@ -49,9 +51,13 @@ struct bp_hardening_data { | |||
| 49 | bp_hardening_cb_t fn; | 51 | bp_hardening_cb_t fn; |
| 50 | }; | 52 | }; |
| 51 | 53 | ||
| 52 | #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | 54 | #if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ |
| 55 | defined(CONFIG_HARDEN_EL2_VECTORS)) | ||
| 53 | extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; | 56 | extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; |
| 57 | extern atomic_t arm64_el2_vector_last_slot; | ||
| 58 | #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */ | ||
| 54 | 59 | ||
| 60 | #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | ||
| 55 | DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); | 61 | DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); |
| 56 | 62 | ||
| 57 | static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) | 63 | static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) |
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e7b9f154e476..6171178075dc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h | |||
| @@ -288,6 +288,12 @@ | |||
| 288 | #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) | 288 | #define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0) |
| 289 | #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) | 289 | #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) |
| 290 | 290 | ||
| 291 | #define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0) | ||
| 292 | #define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) | ||
| 293 | #define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) | ||
| 294 | #define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) | ||
| 295 | #define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) | ||
| 296 | |||
| 291 | #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) | 297 | #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) |
| 292 | #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) | 298 | #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) |
| 293 | 299 | ||
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 6a4bd80c75bd..9b55a3f24be7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile | |||
| @@ -55,9 +55,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o | |||
| 55 | arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 55 | arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
| 56 | arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o | 56 | arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o |
| 57 | 57 | ||
| 58 | ifeq ($(CONFIG_KVM),y) | 58 | arm64-obj-$(CONFIG_KVM_INDIRECT_VECTORS)+= bpi.o |
| 59 | arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o | ||
| 60 | endif | ||
| 61 | 59 | ||
| 62 | obj-y += $(arm64-obj-y) vdso/ probes/ | 60 | obj-y += $(arm64-obj-y) vdso/ probes/ |
| 63 | obj-m += $(arm64-obj-m) | 61 | obj-m += $(arm64-obj-m) |
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 414288a558c8..5c4bce4ac381 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c | |||
| @@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp | |||
| 107 | return insn; | 107 | return insn; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | static void patch_alternative(struct alt_instr *alt, | ||
| 111 | __le32 *origptr, __le32 *updptr, int nr_inst) | ||
| 112 | { | ||
| 113 | __le32 *replptr; | ||
| 114 | int i; | ||
| 115 | |||
| 116 | replptr = ALT_REPL_PTR(alt); | ||
| 117 | for (i = 0; i < nr_inst; i++) { | ||
| 118 | u32 insn; | ||
| 119 | |||
| 120 | insn = get_alt_insn(alt, origptr + i, replptr + i); | ||
| 121 | updptr[i] = cpu_to_le32(insn); | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 110 | static void __apply_alternatives(void *alt_region, bool use_linear_alias) | 125 | static void __apply_alternatives(void *alt_region, bool use_linear_alias) |
| 111 | { | 126 | { |
| 112 | struct alt_instr *alt; | 127 | struct alt_instr *alt; |
| 113 | struct alt_region *region = alt_region; | 128 | struct alt_region *region = alt_region; |
| 114 | __le32 *origptr, *replptr, *updptr; | 129 | __le32 *origptr, *updptr; |
| 130 | alternative_cb_t alt_cb; | ||
| 115 | 131 | ||
| 116 | for (alt = region->begin; alt < region->end; alt++) { | 132 | for (alt = region->begin; alt < region->end; alt++) { |
| 117 | u32 insn; | 133 | int nr_inst; |
| 118 | int i, nr_inst; | ||
| 119 | 134 | ||
| 120 | if (!cpus_have_cap(alt->cpufeature)) | 135 | /* Use ARM64_CB_PATCH as an unconditional patch */ |
| 136 | if (alt->cpufeature < ARM64_CB_PATCH && | ||
| 137 | !cpus_have_cap(alt->cpufeature)) | ||
| 121 | continue; | 138 | continue; |
| 122 | 139 | ||
| 123 | BUG_ON(alt->alt_len != alt->orig_len); | 140 | if (alt->cpufeature == ARM64_CB_PATCH) |
| 141 | BUG_ON(alt->alt_len != 0); | ||
| 142 | else | ||
| 143 | BUG_ON(alt->alt_len != alt->orig_len); | ||
| 124 | 144 | ||
| 125 | pr_info_once("patching kernel code\n"); | 145 | pr_info_once("patching kernel code\n"); |
| 126 | 146 | ||
| 127 | origptr = ALT_ORIG_PTR(alt); | 147 | origptr = ALT_ORIG_PTR(alt); |
| 128 | replptr = ALT_REPL_PTR(alt); | ||
| 129 | updptr = use_linear_alias ? lm_alias(origptr) : origptr; | 148 | updptr = use_linear_alias ? lm_alias(origptr) : origptr; |
| 130 | nr_inst = alt->alt_len / sizeof(insn); | 149 | nr_inst = alt->orig_len / AARCH64_INSN_SIZE; |
| 131 | 150 | ||
| 132 | for (i = 0; i < nr_inst; i++) { | 151 | if (alt->cpufeature < ARM64_CB_PATCH) |
| 133 | insn = get_alt_insn(alt, origptr + i, replptr + i); | 152 | alt_cb = patch_alternative; |
| 134 | updptr[i] = cpu_to_le32(insn); | 153 | else |
| 135 | } | 154 | alt_cb = ALT_REPL_PTR(alt); |
| 155 | |||
| 156 | alt_cb(alt, origptr, updptr, nr_inst); | ||
| 136 | 157 | ||
| 137 | flush_icache_range((uintptr_t)origptr, | 158 | flush_icache_range((uintptr_t)origptr, |
| 138 | (uintptr_t)(origptr + nr_inst)); | 159 | (uintptr_t)(origptr + nr_inst)); |
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1303e04110cd..78e1b0a70aaf 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c | |||
| @@ -138,6 +138,7 @@ int main(void) | |||
| 138 | DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); | 138 | DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); |
| 139 | DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); | 139 | DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); |
| 140 | DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); | 140 | DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); |
| 141 | DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); | ||
| 141 | #endif | 142 | #endif |
| 142 | #ifdef CONFIG_CPU_PM | 143 | #ifdef CONFIG_CPU_PM |
| 143 | DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); | 144 | DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); |
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index e5de33513b5d..bb0b67722e86 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S | |||
| @@ -19,42 +19,61 @@ | |||
| 19 | #include <linux/linkage.h> | 19 | #include <linux/linkage.h> |
| 20 | #include <linux/arm-smccc.h> | 20 | #include <linux/arm-smccc.h> |
| 21 | 21 | ||
| 22 | .macro ventry target | 22 | #include <asm/alternative.h> |
| 23 | .rept 31 | 23 | #include <asm/mmu.h> |
| 24 | |||
| 25 | .macro hyp_ventry | ||
| 26 | .align 7 | ||
| 27 | 1: .rept 27 | ||
| 24 | nop | 28 | nop |
| 25 | .endr | 29 | .endr |
| 26 | b \target | 30 | /* |
| 31 | * The default sequence is to directly branch to the KVM vectors, | ||
| 32 | * using the computed offset. This applies for VHE as well as | ||
| 33 | * !ARM64_HARDEN_EL2_VECTORS. | ||
| 34 | * | ||
| 35 | * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced | ||
| 36 | * with: | ||
| 37 | * | ||
| 38 | * stp x0, x1, [sp, #-16]! | ||
| 39 | * movz x0, #(addr & 0xffff) | ||
| 40 | * movk x0, #((addr >> 16) & 0xffff), lsl #16 | ||
| 41 | * movk x0, #((addr >> 32) & 0xffff), lsl #32 | ||
| 42 | * br x0 | ||
| 43 | * | ||
| 44 | * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4. | ||
| 45 | * See kvm_patch_vector_branch for details. | ||
| 46 | */ | ||
| 47 | alternative_cb kvm_patch_vector_branch | ||
| 48 | b __kvm_hyp_vector + (1b - 0b) | ||
| 49 | nop | ||
| 50 | nop | ||
| 51 | nop | ||
| 52 | nop | ||
| 53 | alternative_cb_end | ||
| 27 | .endm | 54 | .endm |
| 28 | 55 | ||
| 29 | .macro vectors target | 56 | .macro generate_vectors |
| 30 | ventry \target + 0x000 | 57 | 0: |
| 31 | ventry \target + 0x080 | 58 | .rept 16 |
| 32 | ventry \target + 0x100 | 59 | hyp_ventry |
| 33 | ventry \target + 0x180 | 60 | .endr |
| 34 | 61 | .org 0b + SZ_2K // Safety measure | |
| 35 | ventry \target + 0x200 | 62 | .endm |
| 36 | ventry \target + 0x280 | ||
| 37 | ventry \target + 0x300 | ||
| 38 | ventry \target + 0x380 | ||
| 39 | 63 | ||
| 40 | ventry \target + 0x400 | ||
| 41 | ventry \target + 0x480 | ||
| 42 | ventry \target + 0x500 | ||
| 43 | ventry \target + 0x580 | ||
| 44 | 64 | ||
| 45 | ventry \target + 0x600 | 65 | .text |
| 46 | ventry \target + 0x680 | 66 | .pushsection .hyp.text, "ax" |
| 47 | ventry \target + 0x700 | ||
| 48 | ventry \target + 0x780 | ||
| 49 | .endm | ||
| 50 | 67 | ||
| 51 | .align 11 | 68 | .align 11 |
| 52 | ENTRY(__bp_harden_hyp_vecs_start) | 69 | ENTRY(__bp_harden_hyp_vecs_start) |
| 53 | .rept 4 | 70 | .rept BP_HARDEN_EL2_SLOTS |
| 54 | vectors __kvm_hyp_vector | 71 | generate_vectors |
| 55 | .endr | 72 | .endr |
| 56 | ENTRY(__bp_harden_hyp_vecs_end) | 73 | ENTRY(__bp_harden_hyp_vecs_end) |
| 57 | 74 | ||
| 75 | .popsection | ||
| 76 | |||
| 58 | ENTRY(__qcom_hyp_sanitize_link_stack_start) | 77 | ENTRY(__qcom_hyp_sanitize_link_stack_start) |
| 59 | stp x29, x30, [sp, #-16]! | 78 | stp x29, x30, [sp, #-16]! |
| 60 | .rept 16 | 79 | .rept 16 |
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2df792771053..9262ec57f5ab 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c | |||
| @@ -78,6 +78,8 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) | |||
| 78 | config_sctlr_el1(SCTLR_EL1_UCT, 0); | 78 | config_sctlr_el1(SCTLR_EL1_UCT, 0); |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); | ||
| 82 | |||
| 81 | #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR | 83 | #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR |
| 82 | #include <asm/mmu_context.h> | 84 | #include <asm/mmu_context.h> |
| 83 | #include <asm/cacheflush.h> | 85 | #include <asm/cacheflush.h> |
| @@ -108,7 +110,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, | |||
| 108 | const char *hyp_vecs_start, | 110 | const char *hyp_vecs_start, |
| 109 | const char *hyp_vecs_end) | 111 | const char *hyp_vecs_end) |
| 110 | { | 112 | { |
| 111 | static int last_slot = -1; | ||
| 112 | static DEFINE_SPINLOCK(bp_lock); | 113 | static DEFINE_SPINLOCK(bp_lock); |
| 113 | int cpu, slot = -1; | 114 | int cpu, slot = -1; |
| 114 | 115 | ||
| @@ -121,10 +122,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, | |||
| 121 | } | 122 | } |
| 122 | 123 | ||
| 123 | if (slot == -1) { | 124 | if (slot == -1) { |
| 124 | last_slot++; | 125 | slot = atomic_inc_return(&arm64_el2_vector_last_slot); |
| 125 | BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) | 126 | BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); |
| 126 | / SZ_2K) <= last_slot); | ||
| 127 | slot = last_slot; | ||
| 128 | __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); | 127 | __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); |
| 129 | } | 128 | } |
| 130 | 129 | ||
| @@ -348,6 +347,10 @@ static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = { | |||
| 348 | 347 | ||
| 349 | #endif | 348 | #endif |
| 350 | 349 | ||
| 350 | #ifndef ERRATA_MIDR_ALL_VERSIONS | ||
| 351 | #define ERRATA_MIDR_ALL_VERSIONS(x) MIDR_ALL_VERSIONS(x) | ||
| 352 | #endif | ||
| 353 | |||
| 351 | const struct arm64_cpu_capabilities arm64_errata[] = { | 354 | const struct arm64_cpu_capabilities arm64_errata[] = { |
| 352 | #if defined(CONFIG_ARM64_ERRATUM_826319) || \ | 355 | #if defined(CONFIG_ARM64_ERRATUM_826319) || \ |
| 353 | defined(CONFIG_ARM64_ERRATUM_827319) || \ | 356 | defined(CONFIG_ARM64_ERRATUM_827319) || \ |
| @@ -501,6 +504,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = { | |||
| 501 | ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), | 504 | ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), |
| 502 | }, | 505 | }, |
| 503 | #endif | 506 | #endif |
| 507 | #ifdef CONFIG_HARDEN_EL2_VECTORS | ||
| 508 | { | ||
| 509 | .desc = "Cortex-A57 EL2 vector hardening", | ||
| 510 | .capability = ARM64_HARDEN_EL2_VECTORS, | ||
| 511 | ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), | ||
| 512 | }, | ||
| 513 | { | ||
| 514 | .desc = "Cortex-A72 EL2 vector hardening", | ||
| 515 | .capability = ARM64_HARDEN_EL2_VECTORS, | ||
| 516 | ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), | ||
| 517 | }, | ||
| 518 | #endif | ||
| 504 | { | 519 | { |
| 505 | } | 520 | } |
| 506 | }; | 521 | }; |
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 96b15d7b10a8..536d572e5596 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c | |||
| @@ -838,19 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ | |||
| 838 | MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); | 838 | MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); |
| 839 | } | 839 | } |
| 840 | 840 | ||
| 841 | static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, | ||
| 842 | int __unused) | ||
| 843 | { | ||
| 844 | phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); | ||
| 845 | |||
| 846 | /* | ||
| 847 | * Activate the lower HYP offset only if: | ||
| 848 | * - the idmap doesn't clash with it, | ||
| 849 | * - the kernel is not running at EL2. | ||
| 850 | */ | ||
| 851 | return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); | ||
| 852 | } | ||
| 853 | |||
| 854 | static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) | 841 | static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) |
| 855 | { | 842 | { |
| 856 | u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); | 843 | u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); |
| @@ -1121,12 +1108,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { | |||
| 1121 | .field_pos = ID_AA64PFR0_EL0_SHIFT, | 1108 | .field_pos = ID_AA64PFR0_EL0_SHIFT, |
| 1122 | .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, | 1109 | .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, |
| 1123 | }, | 1110 | }, |
| 1124 | { | ||
| 1125 | .desc = "Reduced HYP mapping offset", | ||
| 1126 | .capability = ARM64_HYP_OFFSET_LOW, | ||
| 1127 | .type = ARM64_CPUCAP_SYSTEM_FEATURE, | ||
| 1128 | .matches = hyp_offset_low, | ||
| 1129 | }, | ||
| 1130 | #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 | 1111 | #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 |
| 1131 | { | 1112 | { |
| 1132 | .desc = "Kernel page table isolation (KPTI)", | 1113 | .desc = "Kernel page table isolation (KPTI)", |
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2b6b8b24e5ab..b0853069702f 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S | |||
| @@ -577,6 +577,13 @@ set_hcr: | |||
| 577 | 7: | 577 | 7: |
| 578 | msr mdcr_el2, x3 // Configure debug traps | 578 | msr mdcr_el2, x3 // Configure debug traps |
| 579 | 579 | ||
| 580 | /* LORegions */ | ||
| 581 | mrs x1, id_aa64mmfr1_el1 | ||
| 582 | ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 | ||
| 583 | cbz x0, 1f | ||
| 584 | msr_s SYS_LORC_EL1, xzr | ||
| 585 | 1: | ||
| 586 | |||
| 580 | /* Stage-2 translation */ | 587 | /* Stage-2 translation */ |
| 581 | msr vttbr_el2, xzr | 588 | msr vttbr_el2, xzr |
| 582 | 589 | ||
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 2718a77da165..816d03c4c913 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #define AARCH64_INSN_SF_BIT BIT(31) | 36 | #define AARCH64_INSN_SF_BIT BIT(31) |
| 37 | #define AARCH64_INSN_N_BIT BIT(22) | 37 | #define AARCH64_INSN_N_BIT BIT(22) |
| 38 | #define AARCH64_INSN_LSL_12 BIT(22) | ||
| 38 | 39 | ||
| 39 | static int aarch64_insn_encoding_class[] = { | 40 | static int aarch64_insn_encoding_class[] = { |
| 40 | AARCH64_INSN_CLS_UNKNOWN, | 41 | AARCH64_INSN_CLS_UNKNOWN, |
| @@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, | |||
| 343 | mask = BIT(6) - 1; | 344 | mask = BIT(6) - 1; |
| 344 | shift = 16; | 345 | shift = 16; |
| 345 | break; | 346 | break; |
| 347 | case AARCH64_INSN_IMM_N: | ||
| 348 | mask = 1; | ||
| 349 | shift = 22; | ||
| 350 | break; | ||
| 346 | default: | 351 | default: |
| 347 | return -EINVAL; | 352 | return -EINVAL; |
| 348 | } | 353 | } |
| @@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, | |||
| 899 | return AARCH64_BREAK_FAULT; | 904 | return AARCH64_BREAK_FAULT; |
| 900 | } | 905 | } |
| 901 | 906 | ||
| 907 | /* We can't encode more than a 24bit value (12bit + 12bit shift) */ | ||
| 908 | if (imm & ~(BIT(24) - 1)) | ||
| 909 | goto out; | ||
| 910 | |||
| 911 | /* If we have something in the top 12 bits... */ | ||
| 902 | if (imm & ~(SZ_4K - 1)) { | 912 | if (imm & ~(SZ_4K - 1)) { |
| 903 | pr_err("%s: invalid immediate encoding %d\n", __func__, imm); | 913 | /* ... and in the low 12 bits -> error */ |
| 904 | return AARCH64_BREAK_FAULT; | 914 | if (imm & (SZ_4K - 1)) |
| 915 | goto out; | ||
| 916 | |||
| 917 | imm >>= 12; | ||
| 918 | insn |= AARCH64_INSN_LSL_12; | ||
| 905 | } | 919 | } |
| 906 | 920 | ||
| 907 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); | 921 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); |
| @@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, | |||
| 909 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src); | 923 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src); |
| 910 | 924 | ||
| 911 | return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); | 925 | return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); |
| 926 | |||
| 927 | out: | ||
| 928 | pr_err("%s: invalid immediate encoding %d\n", __func__, imm); | ||
| 929 | return AARCH64_BREAK_FAULT; | ||
| 912 | } | 930 | } |
| 913 | 931 | ||
| 914 | u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, | 932 | u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, |
| @@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { | |||
| 1481 | __check_hi, __check_ls, __check_ge, __check_lt, | 1499 | __check_hi, __check_ls, __check_ge, __check_lt, |
| 1482 | __check_gt, __check_le, __check_al, __check_al | 1500 | __check_gt, __check_le, __check_al, __check_al |
| 1483 | }; | 1501 | }; |
| 1502 | |||
| 1503 | static bool range_of_ones(u64 val) | ||
| 1504 | { | ||
| 1505 | /* Doesn't handle full ones or full zeroes */ | ||
| 1506 | u64 sval = val >> __ffs64(val); | ||
| 1507 | |||
| 1508 | /* One of Sean Eron Anderson's bithack tricks */ | ||
| 1509 | return ((sval + 1) & (sval)) == 0; | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | static u32 aarch64_encode_immediate(u64 imm, | ||
| 1513 | enum aarch64_insn_variant variant, | ||
| 1514 | u32 insn) | ||
| 1515 | { | ||
| 1516 | unsigned int immr, imms, n, ones, ror, esz, tmp; | ||
| 1517 | u64 mask = ~0UL; | ||
| 1518 | |||
| 1519 | /* Can't encode full zeroes or full ones */ | ||
| 1520 | if (!imm || !~imm) | ||
| 1521 | return AARCH64_BREAK_FAULT; | ||
| 1522 | |||
| 1523 | switch (variant) { | ||
| 1524 | case AARCH64_INSN_VARIANT_32BIT: | ||
| 1525 | if (upper_32_bits(imm)) | ||
| 1526 | return AARCH64_BREAK_FAULT; | ||
| 1527 | esz = 32; | ||
| 1528 | break; | ||
| 1529 | case AARCH64_INSN_VARIANT_64BIT: | ||
| 1530 | insn |= AARCH64_INSN_SF_BIT; | ||
| 1531 | esz = 64; | ||
| 1532 | break; | ||
| 1533 | default: | ||
| 1534 | pr_err("%s: unknown variant encoding %d\n", __func__, variant); | ||
| 1535 | return AARCH64_BREAK_FAULT; | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | /* | ||
| 1539 | * Inverse of Replicate(). Try to spot a repeating pattern | ||
| 1540 | * with a pow2 stride. | ||
| 1541 | */ | ||
| 1542 | for (tmp = esz / 2; tmp >= 2; tmp /= 2) { | ||
| 1543 | u64 emask = BIT(tmp) - 1; | ||
| 1544 | |||
| 1545 | if ((imm & emask) != ((imm >> tmp) & emask)) | ||
| 1546 | break; | ||
| 1547 | |||
| 1548 | esz = tmp; | ||
| 1549 | mask = emask; | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | /* N is only set if we're encoding a 64bit value */ | ||
| 1553 | n = esz == 64; | ||
| 1554 | |||
| 1555 | /* Trim imm to the element size */ | ||
| 1556 | imm &= mask; | ||
| 1557 | |||
| 1558 | /* That's how many ones we need to encode */ | ||
| 1559 | ones = hweight64(imm); | ||
| 1560 | |||
| 1561 | /* | ||
| 1562 | * imms is set to (ones - 1), prefixed with a string of ones | ||
| 1563 | * and a zero if they fit. Cap it to 6 bits. | ||
| 1564 | */ | ||
| 1565 | imms = ones - 1; | ||
| 1566 | imms |= 0xf << ffs(esz); | ||
| 1567 | imms &= BIT(6) - 1; | ||
| 1568 | |||
| 1569 | /* Compute the rotation */ | ||
| 1570 | if (range_of_ones(imm)) { | ||
| 1571 | /* | ||
| 1572 | * Pattern: 0..01..10..0 | ||
| 1573 | * | ||
| 1574 | * Compute how many rotate we need to align it right | ||
| 1575 | */ | ||
| 1576 | ror = __ffs64(imm); | ||
| 1577 | } else { | ||
| 1578 | /* | ||
| 1579 | * Pattern: 0..01..10..01..1 | ||
| 1580 | * | ||
| 1581 | * Fill the unused top bits with ones, and check if | ||
| 1582 | * the result is a valid immediate (all ones with a | ||
| 1583 | * contiguous ranges of zeroes). | ||
| 1584 | */ | ||
| 1585 | imm |= ~mask; | ||
| 1586 | if (!range_of_ones(~imm)) | ||
| 1587 | return AARCH64_BREAK_FAULT; | ||
| 1588 | |||
| 1589 | /* | ||
| 1590 | * Compute the rotation to get a continuous set of | ||
| 1591 | * ones, with the first bit set at position 0 | ||
| 1592 | */ | ||
| 1593 | ror = fls(~imm); | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | /* | ||
| 1597 | * immr is the number of bits we need to rotate back to the | ||
| 1598 | * original set of ones. Note that this is relative to the | ||
| 1599 | * element size... | ||
| 1600 | */ | ||
| 1601 | immr = (esz - ror) % esz; | ||
| 1602 | |||
| 1603 | insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n); | ||
| 1604 | insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr); | ||
| 1605 | return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms); | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, | ||
| 1609 | enum aarch64_insn_variant variant, | ||
| 1610 | enum aarch64_insn_register Rn, | ||
| 1611 | enum aarch64_insn_register Rd, | ||
| 1612 | u64 imm) | ||
| 1613 | { | ||
| 1614 | u32 insn; | ||
| 1615 | |||
| 1616 | switch (type) { | ||
| 1617 | case AARCH64_INSN_LOGIC_AND: | ||
| 1618 | insn = aarch64_insn_get_and_imm_value(); | ||
| 1619 | break; | ||
| 1620 | case AARCH64_INSN_LOGIC_ORR: | ||
| 1621 | insn = aarch64_insn_get_orr_imm_value(); | ||
| 1622 | break; | ||
| 1623 | case AARCH64_INSN_LOGIC_EOR: | ||
| 1624 | insn = aarch64_insn_get_eor_imm_value(); | ||
| 1625 | break; | ||
| 1626 | case AARCH64_INSN_LOGIC_AND_SETFLAGS: | ||
| 1627 | insn = aarch64_insn_get_ands_imm_value(); | ||
| 1628 | break; | ||
| 1629 | default: | ||
| 1630 | pr_err("%s: unknown logical encoding %d\n", __func__, type); | ||
| 1631 | return AARCH64_BREAK_FAULT; | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); | ||
| 1635 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); | ||
| 1636 | return aarch64_encode_immediate(imm, variant, insn); | ||
| 1637 | } | ||
| 1638 | |||
| 1639 | u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, | ||
| 1640 | enum aarch64_insn_register Rm, | ||
| 1641 | enum aarch64_insn_register Rn, | ||
| 1642 | enum aarch64_insn_register Rd, | ||
| 1643 | u8 lsb) | ||
| 1644 | { | ||
| 1645 | u32 insn; | ||
| 1646 | |||
| 1647 | insn = aarch64_insn_get_extr_value(); | ||
| 1648 | |||
| 1649 | switch (variant) { | ||
| 1650 | case AARCH64_INSN_VARIANT_32BIT: | ||
| 1651 | if (lsb > 31) | ||
| 1652 | return AARCH64_BREAK_FAULT; | ||
| 1653 | break; | ||
| 1654 | case AARCH64_INSN_VARIANT_64BIT: | ||
| 1655 | if (lsb > 63) | ||
| 1656 | return AARCH64_BREAK_FAULT; | ||
| 1657 | insn |= AARCH64_INSN_SF_BIT; | ||
| 1658 | insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1); | ||
| 1659 | break; | ||
| 1660 | default: | ||
| 1661 | pr_err("%s: unknown variant encoding %d\n", __func__, variant); | ||
| 1662 | return AARCH64_BREAK_FAULT; | ||
| 1663 | } | ||
| 1664 | |||
| 1665 | insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb); | ||
| 1666 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); | ||
| 1667 | insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); | ||
| 1668 | return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); | ||
| 1669 | } | ||
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 2257dfcc44cc..a2e3a5af1113 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig | |||
| @@ -57,6 +57,9 @@ config KVM_ARM_PMU | |||
| 57 | Adds support for a virtual Performance Monitoring Unit (PMU) in | 57 | Adds support for a virtual Performance Monitoring Unit (PMU) in |
| 58 | virtual machines. | 58 | virtual machines. |
| 59 | 59 | ||
| 60 | config KVM_INDIRECT_VECTORS | ||
| 61 | def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) | ||
| 62 | |||
| 60 | source drivers/vhost/Kconfig | 63 | source drivers/vhost/Kconfig |
| 61 | 64 | ||
| 62 | endif # VIRTUALIZATION | 65 | endif # VIRTUALIZATION |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 87c4f7ae24de..93afff91cb7c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
| @@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e | |||
| 16 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o | 16 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o |
| 17 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o | 17 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o |
| 18 | 18 | ||
| 19 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o | 19 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o |
| 20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o | 20 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o |
| 21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o | 21 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o |
| 22 | kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o | 22 | kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o |
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index fa63b28c65e0..a1f4ebdfe6d3 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c | |||
| @@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2); | |||
| 46 | */ | 46 | */ |
| 47 | static void save_guest_debug_regs(struct kvm_vcpu *vcpu) | 47 | static void save_guest_debug_regs(struct kvm_vcpu *vcpu) |
| 48 | { | 48 | { |
| 49 | vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1); | 49 | u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1); |
| 50 | |||
| 51 | vcpu->arch.guest_debug_preserved.mdscr_el1 = val; | ||
| 50 | 52 | ||
| 51 | trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", | 53 | trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", |
| 52 | vcpu->arch.guest_debug_preserved.mdscr_el1); | 54 | vcpu->arch.guest_debug_preserved.mdscr_el1); |
| @@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) | |||
| 54 | 56 | ||
| 55 | static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) | 57 | static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) |
| 56 | { | 58 | { |
| 57 | vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1; | 59 | u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1; |
| 60 | |||
| 61 | vcpu_write_sys_reg(vcpu, val, MDSCR_EL1); | ||
| 58 | 62 | ||
| 59 | trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", | 63 | trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", |
| 60 | vcpu_sys_reg(vcpu, MDSCR_EL1)); | 64 | vcpu_read_sys_reg(vcpu, MDSCR_EL1)); |
| 61 | } | 65 | } |
| 62 | 66 | ||
| 63 | /** | 67 | /** |
| @@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) | |||
| 108 | void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) | 112 | void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) |
| 109 | { | 113 | { |
| 110 | bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); | 114 | bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); |
| 115 | unsigned long mdscr; | ||
| 111 | 116 | ||
| 112 | trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); | 117 | trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); |
| 113 | 118 | ||
| @@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) | |||
| 152 | */ | 157 | */ |
| 153 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 158 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
| 154 | *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; | 159 | *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; |
| 155 | vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS; | 160 | mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); |
| 161 | mdscr |= DBG_MDSCR_SS; | ||
| 162 | vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); | ||
| 156 | } else { | 163 | } else { |
| 157 | vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS; | 164 | mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); |
| 165 | mdscr &= ~DBG_MDSCR_SS; | ||
| 166 | vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); | ||
| 158 | } | 167 | } |
| 159 | 168 | ||
| 160 | trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); | 169 | trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu)); |
| @@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) | |||
| 170 | */ | 179 | */ |
| 171 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { | 180 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { |
| 172 | /* Enable breakpoints/watchpoints */ | 181 | /* Enable breakpoints/watchpoints */ |
| 173 | vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE; | 182 | mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); |
| 183 | mdscr |= DBG_MDSCR_MDE; | ||
| 184 | vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); | ||
| 174 | 185 | ||
| 175 | vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; | 186 | vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; |
| 176 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; | 187 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; |
| @@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) | |||
| 193 | if (trap_debug) | 204 | if (trap_debug) |
| 194 | vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; | 205 | vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; |
| 195 | 206 | ||
| 207 | /* If KDE or MDE are set, perform a full save/restore cycle. */ | ||
| 208 | if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) | ||
| 209 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; | ||
| 210 | |||
| 196 | trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); | 211 | trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); |
| 197 | trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1)); | 212 | trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); |
| 198 | } | 213 | } |
| 199 | 214 | ||
| 200 | void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) | 215 | void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) |
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 5aa9ccf6db99..6fd91b31a131 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S | |||
| @@ -117,7 +117,6 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE) | |||
| 117 | /* Set the stack and new vectors */ | 117 | /* Set the stack and new vectors */ |
| 118 | kern_hyp_va x1 | 118 | kern_hyp_va x1 |
| 119 | mov sp, x1 | 119 | mov sp, x1 |
| 120 | kern_hyp_va x2 | ||
| 121 | msr vbar_el2, x2 | 120 | msr vbar_el2, x2 |
| 122 | 121 | ||
| 123 | /* copy tpidr_el1 into tpidr_el2 for use by HYP */ | 122 | /* copy tpidr_el1 into tpidr_el2 for use by HYP */ |
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index f04400d494b7..4313f7475333 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile | |||
| @@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING | |||
| 7 | 7 | ||
| 8 | KVM=../../../../virt/kvm | 8 | KVM=../../../../virt/kvm |
| 9 | 9 | ||
| 10 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o | ||
| 11 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o | 10 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o |
| 12 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o | 11 | obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o |
| 13 | 12 | ||
| 13 | obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o | ||
| 14 | obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o | 14 | obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o |
| 15 | obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o | 15 | obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o |
| 16 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o | 16 | obj-$(CONFIG_KVM_ARM_HOST) += entry.o |
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index dabb5cc7b087..3e717f66f011 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c | |||
| @@ -66,11 +66,6 @@ | |||
| 66 | default: write_debug(ptr[0], reg, 0); \ | 66 | default: write_debug(ptr[0], reg, 0); \ |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1) | ||
| 70 | { | ||
| 71 | /* The vcpu can run. but it can't hide. */ | ||
| 72 | } | ||
| 73 | |||
| 74 | static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) | 69 | static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) |
| 75 | { | 70 | { |
| 76 | u64 reg; | 71 | u64 reg; |
| @@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) | |||
| 103 | dsb(nsh); | 98 | dsb(nsh); |
| 104 | } | 99 | } |
| 105 | 100 | ||
| 106 | static hyp_alternate_select(__debug_save_spe, | 101 | static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1) |
| 107 | __debug_save_spe_nvhe, __debug_save_spe_vhe, | ||
| 108 | ARM64_HAS_VIRT_HOST_EXTN); | ||
| 109 | |||
| 110 | static void __hyp_text __debug_restore_spe(u64 pmscr_el1) | ||
| 111 | { | 102 | { |
| 112 | if (!pmscr_el1) | 103 | if (!pmscr_el1) |
| 113 | return; | 104 | return; |
| @@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1) | |||
| 119 | write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); | 110 | write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); |
| 120 | } | 111 | } |
| 121 | 112 | ||
| 122 | void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, | 113 | static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, |
| 123 | struct kvm_guest_debug_arch *dbg, | 114 | struct kvm_guest_debug_arch *dbg, |
| 124 | struct kvm_cpu_context *ctxt) | 115 | struct kvm_cpu_context *ctxt) |
| 125 | { | 116 | { |
| 126 | u64 aa64dfr0; | 117 | u64 aa64dfr0; |
| 127 | int brps, wrps; | 118 | int brps, wrps; |
| 128 | 119 | ||
| 129 | if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) | ||
| 130 | return; | ||
| 131 | |||
| 132 | aa64dfr0 = read_sysreg(id_aa64dfr0_el1); | 120 | aa64dfr0 = read_sysreg(id_aa64dfr0_el1); |
| 133 | brps = (aa64dfr0 >> 12) & 0xf; | 121 | brps = (aa64dfr0 >> 12) & 0xf; |
| 134 | wrps = (aa64dfr0 >> 20) & 0xf; | 122 | wrps = (aa64dfr0 >> 20) & 0xf; |
| @@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, | |||
| 141 | ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); | 129 | ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); |
| 142 | } | 130 | } |
| 143 | 131 | ||
| 144 | void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, | 132 | static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, |
| 145 | struct kvm_guest_debug_arch *dbg, | 133 | struct kvm_guest_debug_arch *dbg, |
| 146 | struct kvm_cpu_context *ctxt) | 134 | struct kvm_cpu_context *ctxt) |
| 147 | { | 135 | { |
| 148 | u64 aa64dfr0; | 136 | u64 aa64dfr0; |
| 149 | int brps, wrps; | 137 | int brps, wrps; |
| 150 | 138 | ||
| 151 | if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) | ||
| 152 | return; | ||
| 153 | |||
| 154 | aa64dfr0 = read_sysreg(id_aa64dfr0_el1); | 139 | aa64dfr0 = read_sysreg(id_aa64dfr0_el1); |
| 155 | 140 | ||
| 156 | brps = (aa64dfr0 >> 12) & 0xf; | 141 | brps = (aa64dfr0 >> 12) & 0xf; |
| @@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, | |||
| 164 | write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); | 149 | write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); |
| 165 | } | 150 | } |
| 166 | 151 | ||
| 167 | void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) | 152 | void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) |
| 168 | { | 153 | { |
| 169 | /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform | 154 | struct kvm_cpu_context *host_ctxt; |
| 170 | * a full save/restore cycle. */ | 155 | struct kvm_cpu_context *guest_ctxt; |
| 171 | if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || | 156 | struct kvm_guest_debug_arch *host_dbg; |
| 172 | (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) | 157 | struct kvm_guest_debug_arch *guest_dbg; |
| 173 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; | 158 | |
| 174 | 159 | /* | |
| 175 | __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs, | 160 | * Non-VHE: Disable and flush SPE data generation |
| 176 | kern_hyp_va(vcpu->arch.host_cpu_context)); | 161 | * VHE: The vcpu can run, but it can't hide. |
| 177 | __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1); | 162 | */ |
| 163 | if (!has_vhe()) | ||
| 164 | __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); | ||
| 165 | |||
| 166 | if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) | ||
| 167 | return; | ||
| 168 | |||
| 169 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
| 170 | guest_ctxt = &vcpu->arch.ctxt; | ||
| 171 | host_dbg = &vcpu->arch.host_debug_state.regs; | ||
| 172 | guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); | ||
| 173 | |||
| 174 | __debug_save_state(vcpu, host_dbg, host_ctxt); | ||
| 175 | __debug_restore_state(vcpu, guest_dbg, guest_ctxt); | ||
| 178 | } | 176 | } |
| 179 | 177 | ||
| 180 | void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) | 178 | void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) |
| 181 | { | 179 | { |
| 182 | __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); | 180 | struct kvm_cpu_context *host_ctxt; |
| 183 | __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs, | 181 | struct kvm_cpu_context *guest_ctxt; |
| 184 | kern_hyp_va(vcpu->arch.host_cpu_context)); | 182 | struct kvm_guest_debug_arch *host_dbg; |
| 183 | struct kvm_guest_debug_arch *guest_dbg; | ||
| 184 | |||
| 185 | if (!has_vhe()) | ||
| 186 | __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); | ||
| 187 | |||
| 188 | if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) | ||
| 189 | return; | ||
| 190 | |||
| 191 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
| 192 | guest_ctxt = &vcpu->arch.ctxt; | ||
| 193 | host_dbg = &vcpu->arch.host_debug_state.regs; | ||
| 194 | guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); | ||
| 195 | |||
| 196 | __debug_save_state(vcpu, guest_dbg, guest_ctxt); | ||
| 197 | __debug_restore_state(vcpu, host_dbg, host_ctxt); | ||
| 185 | 198 | ||
| 186 | if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) | 199 | vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; |
| 187 | vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; | ||
| 188 | } | 200 | } |
| 189 | 201 | ||
| 190 | u32 __hyp_text __kvm_get_mdcr_el2(void) | 202 | u32 __hyp_text __kvm_get_mdcr_el2(void) |
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index fdd1068ee3a5..1f458f7c3b44 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S | |||
| @@ -62,9 +62,6 @@ ENTRY(__guest_enter) | |||
| 62 | // Store the host regs | 62 | // Store the host regs |
| 63 | save_callee_saved_regs x1 | 63 | save_callee_saved_regs x1 |
| 64 | 64 | ||
| 65 | // Store host_ctxt and vcpu for use at exit time | ||
| 66 | stp x1, x0, [sp, #-16]! | ||
| 67 | |||
| 68 | add x18, x0, #VCPU_CONTEXT | 65 | add x18, x0, #VCPU_CONTEXT |
| 69 | 66 | ||
| 70 | // Restore guest regs x0-x17 | 67 | // Restore guest regs x0-x17 |
| @@ -118,8 +115,7 @@ ENTRY(__guest_exit) | |||
| 118 | // Store the guest regs x19-x29, lr | 115 | // Store the guest regs x19-x29, lr |
| 119 | save_callee_saved_regs x1 | 116 | save_callee_saved_regs x1 |
| 120 | 117 | ||
| 121 | // Restore the host_ctxt from the stack | 118 | get_host_ctxt x2, x3 |
| 122 | ldr x2, [sp], #16 | ||
| 123 | 119 | ||
| 124 | // Now restore the host regs | 120 | // Now restore the host regs |
| 125 | restore_callee_saved_regs x2 | 121 | restore_callee_saved_regs x2 |
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f36464bd57c5..87dfecce82b1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S | |||
| @@ -55,15 +55,9 @@ ENTRY(__vhe_hyp_call) | |||
| 55 | ENDPROC(__vhe_hyp_call) | 55 | ENDPROC(__vhe_hyp_call) |
| 56 | 56 | ||
| 57 | el1_sync: // Guest trapped into EL2 | 57 | el1_sync: // Guest trapped into EL2 |
| 58 | stp x0, x1, [sp, #-16]! | ||
| 59 | |||
| 60 | alternative_if_not ARM64_HAS_VIRT_HOST_EXTN | ||
| 61 | mrs x1, esr_el2 | ||
| 62 | alternative_else | ||
| 63 | mrs x1, esr_el1 | ||
| 64 | alternative_endif | ||
| 65 | lsr x0, x1, #ESR_ELx_EC_SHIFT | ||
| 66 | 58 | ||
| 59 | mrs x0, esr_el2 | ||
| 60 | lsr x0, x0, #ESR_ELx_EC_SHIFT | ||
| 67 | cmp x0, #ESR_ELx_EC_HVC64 | 61 | cmp x0, #ESR_ELx_EC_HVC64 |
| 68 | ccmp x0, #ESR_ELx_EC_HVC32, #4, ne | 62 | ccmp x0, #ESR_ELx_EC_HVC32, #4, ne |
| 69 | b.ne el1_trap | 63 | b.ne el1_trap |
| @@ -117,10 +111,14 @@ el1_hvc_guest: | |||
| 117 | eret | 111 | eret |
| 118 | 112 | ||
| 119 | el1_trap: | 113 | el1_trap: |
| 114 | get_vcpu_ptr x1, x0 | ||
| 115 | |||
| 116 | mrs x0, esr_el2 | ||
| 117 | lsr x0, x0, #ESR_ELx_EC_SHIFT | ||
| 120 | /* | 118 | /* |
| 121 | * x0: ESR_EC | 119 | * x0: ESR_EC |
| 120 | * x1: vcpu pointer | ||
| 122 | */ | 121 | */ |
| 123 | ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter | ||
| 124 | 122 | ||
| 125 | /* | 123 | /* |
| 126 | * We trap the first access to the FP/SIMD to save the host context | 124 | * We trap the first access to the FP/SIMD to save the host context |
| @@ -137,18 +135,18 @@ alternative_else_nop_endif | |||
| 137 | b __guest_exit | 135 | b __guest_exit |
| 138 | 136 | ||
| 139 | el1_irq: | 137 | el1_irq: |
| 140 | stp x0, x1, [sp, #-16]! | 138 | get_vcpu_ptr x1, x0 |
| 141 | ldr x1, [sp, #16 + 8] | ||
| 142 | mov x0, #ARM_EXCEPTION_IRQ | 139 | mov x0, #ARM_EXCEPTION_IRQ |
| 143 | b __guest_exit | 140 | b __guest_exit |
| 144 | 141 | ||
| 145 | el1_error: | 142 | el1_error: |
| 146 | stp x0, x1, [sp, #-16]! | 143 | get_vcpu_ptr x1, x0 |
| 147 | ldr x1, [sp, #16 + 8] | ||
| 148 | mov x0, #ARM_EXCEPTION_EL1_SERROR | 144 | mov x0, #ARM_EXCEPTION_EL1_SERROR |
| 149 | b __guest_exit | 145 | b __guest_exit |
| 150 | 146 | ||
| 151 | el2_error: | 147 | el2_error: |
| 148 | ldp x0, x1, [sp], #16 | ||
| 149 | |||
| 152 | /* | 150 | /* |
| 153 | * Only two possibilities: | 151 | * Only two possibilities: |
| 154 | * 1) Either we come from the exit path, having just unmasked | 152 | * 1) Either we come from the exit path, having just unmasked |
| @@ -180,14 +178,7 @@ ENTRY(__hyp_do_panic) | |||
| 180 | ENDPROC(__hyp_do_panic) | 178 | ENDPROC(__hyp_do_panic) |
| 181 | 179 | ||
| 182 | ENTRY(__hyp_panic) | 180 | ENTRY(__hyp_panic) |
| 183 | /* | 181 | get_host_ctxt x0, x1 |
| 184 | * '=kvm_host_cpu_state' is a host VA from the constant pool, it may | ||
| 185 | * not be accessible by this address from EL2, hyp_panic() converts | ||
| 186 | * it with kern_hyp_va() before use. | ||
| 187 | */ | ||
| 188 | ldr x0, =kvm_host_cpu_state | ||
| 189 | mrs x1, tpidr_el2 | ||
| 190 | add x0, x0, x1 | ||
| 191 | b hyp_panic | 182 | b hyp_panic |
| 192 | ENDPROC(__hyp_panic) | 183 | ENDPROC(__hyp_panic) |
| 193 | 184 | ||
| @@ -206,32 +197,43 @@ ENDPROC(\label) | |||
| 206 | invalid_vector el2h_sync_invalid | 197 | invalid_vector el2h_sync_invalid |
| 207 | invalid_vector el2h_irq_invalid | 198 | invalid_vector el2h_irq_invalid |
| 208 | invalid_vector el2h_fiq_invalid | 199 | invalid_vector el2h_fiq_invalid |
| 209 | invalid_vector el1_sync_invalid | ||
| 210 | invalid_vector el1_irq_invalid | ||
| 211 | invalid_vector el1_fiq_invalid | 200 | invalid_vector el1_fiq_invalid |
| 212 | 201 | ||
| 213 | .ltorg | 202 | .ltorg |
| 214 | 203 | ||
| 215 | .align 11 | 204 | .align 11 |
| 216 | 205 | ||
| 206 | .macro valid_vect target | ||
| 207 | .align 7 | ||
| 208 | stp x0, x1, [sp, #-16]! | ||
| 209 | b \target | ||
| 210 | .endm | ||
| 211 | |||
| 212 | .macro invalid_vect target | ||
| 213 | .align 7 | ||
| 214 | b \target | ||
| 215 | ldp x0, x1, [sp], #16 | ||
| 216 | b \target | ||
| 217 | .endm | ||
| 218 | |||
| 217 | ENTRY(__kvm_hyp_vector) | 219 | ENTRY(__kvm_hyp_vector) |
| 218 | ventry el2t_sync_invalid // Synchronous EL2t | 220 | invalid_vect el2t_sync_invalid // Synchronous EL2t |
| 219 | ventry el2t_irq_invalid // IRQ EL2t | 221 | invalid_vect el2t_irq_invalid // IRQ EL2t |
| 220 | ventry el2t_fiq_invalid // FIQ EL2t | 222 | invalid_vect el2t_fiq_invalid // FIQ EL2t |
| 221 | ventry el2t_error_invalid // Error EL2t | 223 | invalid_vect el2t_error_invalid // Error EL2t |
| 222 | 224 | ||
| 223 | ventry el2h_sync_invalid // Synchronous EL2h | 225 | invalid_vect el2h_sync_invalid // Synchronous EL2h |
| 224 | ventry el2h_irq_invalid // IRQ EL2h | 226 | invalid_vect el2h_irq_invalid // IRQ EL2h |
| 225 | ventry el2h_fiq_invalid // FIQ EL2h | 227 | invalid_vect el2h_fiq_invalid // FIQ EL2h |
| 226 | ventry el2_error // Error EL2h | 228 | valid_vect el2_error // Error EL2h |
| 227 | 229 | ||
| 228 | ventry el1_sync // Synchronous 64-bit EL1 | 230 | valid_vect el1_sync // Synchronous 64-bit EL1 |
| 229 | ventry el1_irq // IRQ 64-bit EL1 | 231 | valid_vect el1_irq // IRQ 64-bit EL1 |
| 230 | ventry el1_fiq_invalid // FIQ 64-bit EL1 | 232 | invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 |
| 231 | ventry el1_error // Error 64-bit EL1 | 233 | valid_vect el1_error // Error 64-bit EL1 |
| 232 | 234 | ||
| 233 | ventry el1_sync // Synchronous 32-bit EL1 | 235 | valid_vect el1_sync // Synchronous 32-bit EL1 |
| 234 | ventry el1_irq // IRQ 32-bit EL1 | 236 | valid_vect el1_irq // IRQ 32-bit EL1 |
| 235 | ventry el1_fiq_invalid // FIQ 32-bit EL1 | 237 | invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 |
| 236 | ventry el1_error // Error 32-bit EL1 | 238 | valid_vect el1_error // Error 32-bit EL1 |
| 237 | ENDPROC(__kvm_hyp_vector) | 239 | ENDPROC(__kvm_hyp_vector) |
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 870f4b1587f9..07b572173265 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c | |||
| @@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void) | |||
| 33 | return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); | 33 | return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | static bool __hyp_text __fpsimd_enabled_vhe(void) | 36 | static bool fpsimd_enabled_vhe(void) |
| 37 | { | 37 | { |
| 38 | return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); | 38 | return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | static hyp_alternate_select(__fpsimd_is_enabled, | 41 | /* Save the 32-bit only FPSIMD system register state */ |
| 42 | __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe, | 42 | static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) |
| 43 | ARM64_HAS_VIRT_HOST_EXTN); | ||
| 44 | |||
| 45 | bool __hyp_text __fpsimd_enabled(void) | ||
| 46 | { | ||
| 47 | return __fpsimd_is_enabled()(); | ||
| 48 | } | ||
| 49 | |||
| 50 | static void __hyp_text __activate_traps_vhe(void) | ||
| 51 | { | ||
| 52 | u64 val; | ||
| 53 | |||
| 54 | val = read_sysreg(cpacr_el1); | ||
| 55 | val |= CPACR_EL1_TTA; | ||
| 56 | val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); | ||
| 57 | write_sysreg(val, cpacr_el1); | ||
| 58 | |||
| 59 | write_sysreg(kvm_get_hyp_vector(), vbar_el1); | ||
| 60 | } | ||
| 61 | |||
| 62 | static void __hyp_text __activate_traps_nvhe(void) | ||
| 63 | { | 43 | { |
| 64 | u64 val; | 44 | if (!vcpu_el1_is_32bit(vcpu)) |
| 45 | return; | ||
| 65 | 46 | ||
| 66 | val = CPTR_EL2_DEFAULT; | 47 | vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2); |
| 67 | val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ; | ||
| 68 | write_sysreg(val, cptr_el2); | ||
| 69 | } | 48 | } |
| 70 | 49 | ||
| 71 | static hyp_alternate_select(__activate_traps_arch, | 50 | static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) |
| 72 | __activate_traps_nvhe, __activate_traps_vhe, | ||
| 73 | ARM64_HAS_VIRT_HOST_EXTN); | ||
| 74 | |||
| 75 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | ||
| 76 | { | 51 | { |
| 77 | u64 val; | ||
| 78 | |||
| 79 | /* | 52 | /* |
| 80 | * We are about to set CPTR_EL2.TFP to trap all floating point | 53 | * We are about to set CPTR_EL2.TFP to trap all floating point |
| 81 | * register accesses to EL2, however, the ARM ARM clearly states that | 54 | * register accesses to EL2, however, the ARM ARM clearly states that |
| @@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | |||
| 85 | * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to | 58 | * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to |
| 86 | * it will cause an exception. | 59 | * it will cause an exception. |
| 87 | */ | 60 | */ |
| 88 | val = vcpu->arch.hcr_el2; | 61 | if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { |
| 89 | |||
| 90 | if (!(val & HCR_RW) && system_supports_fpsimd()) { | ||
| 91 | write_sysreg(1 << 30, fpexc32_el2); | 62 | write_sysreg(1 << 30, fpexc32_el2); |
| 92 | isb(); | 63 | isb(); |
| 93 | } | 64 | } |
| 65 | } | ||
| 94 | 66 | ||
| 95 | if (val & HCR_RW) /* for AArch64 only: */ | 67 | static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu) |
| 96 | val |= HCR_TID3; /* TID3: trap feature register accesses */ | 68 | { |
| 97 | 69 | /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ | |
| 98 | write_sysreg(val, hcr_el2); | ||
| 99 | |||
| 100 | if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE)) | ||
| 101 | write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); | ||
| 102 | |||
| 103 | /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ | ||
| 104 | write_sysreg(1 << 15, hstr_el2); | 70 | write_sysreg(1 << 15, hstr_el2); |
| 71 | |||
| 105 | /* | 72 | /* |
| 106 | * Make sure we trap PMU access from EL0 to EL2. Also sanitize | 73 | * Make sure we trap PMU access from EL0 to EL2. Also sanitize |
| 107 | * PMSELR_EL0 to make sure it never contains the cycle | 74 | * PMSELR_EL0 to make sure it never contains the cycle |
| @@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | |||
| 111 | write_sysreg(0, pmselr_el0); | 78 | write_sysreg(0, pmselr_el0); |
| 112 | write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); | 79 | write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); |
| 113 | write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); | 80 | write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); |
| 114 | __activate_traps_arch()(); | ||
| 115 | } | 81 | } |
| 116 | 82 | ||
| 117 | static void __hyp_text __deactivate_traps_vhe(void) | 83 | static void __hyp_text __deactivate_traps_common(void) |
| 118 | { | 84 | { |
| 119 | extern char vectors[]; /* kernel exception vectors */ | 85 | write_sysreg(0, hstr_el2); |
| 120 | u64 mdcr_el2 = read_sysreg(mdcr_el2); | 86 | write_sysreg(0, pmuserenr_el0); |
| 87 | } | ||
| 121 | 88 | ||
| 122 | mdcr_el2 &= MDCR_EL2_HPMN_MASK | | 89 | static void activate_traps_vhe(struct kvm_vcpu *vcpu) |
| 123 | MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | | 90 | { |
| 124 | MDCR_EL2_TPMS; | 91 | u64 val; |
| 125 | 92 | ||
| 126 | write_sysreg(mdcr_el2, mdcr_el2); | 93 | val = read_sysreg(cpacr_el1); |
| 94 | val |= CPACR_EL1_TTA; | ||
| 95 | val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); | ||
| 96 | write_sysreg(val, cpacr_el1); | ||
| 97 | |||
| 98 | write_sysreg(kvm_get_hyp_vector(), vbar_el1); | ||
| 99 | } | ||
| 100 | |||
| 101 | static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) | ||
| 102 | { | ||
| 103 | u64 val; | ||
| 104 | |||
| 105 | __activate_traps_common(vcpu); | ||
| 106 | |||
| 107 | val = CPTR_EL2_DEFAULT; | ||
| 108 | val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ; | ||
| 109 | write_sysreg(val, cptr_el2); | ||
| 110 | } | ||
| 111 | |||
| 112 | static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) | ||
| 113 | { | ||
| 114 | u64 hcr = vcpu->arch.hcr_el2; | ||
| 115 | |||
| 116 | write_sysreg(hcr, hcr_el2); | ||
| 117 | |||
| 118 | if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) | ||
| 119 | write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); | ||
| 120 | |||
| 121 | __activate_traps_fpsimd32(vcpu); | ||
| 122 | if (has_vhe()) | ||
| 123 | activate_traps_vhe(vcpu); | ||
| 124 | else | ||
| 125 | __activate_traps_nvhe(vcpu); | ||
| 126 | } | ||
| 127 | |||
| 128 | static void deactivate_traps_vhe(void) | ||
| 129 | { | ||
| 130 | extern char vectors[]; /* kernel exception vectors */ | ||
| 127 | write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); | 131 | write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); |
| 128 | write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); | 132 | write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); |
| 129 | write_sysreg(vectors, vbar_el1); | 133 | write_sysreg(vectors, vbar_el1); |
| @@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void) | |||
| 133 | { | 137 | { |
| 134 | u64 mdcr_el2 = read_sysreg(mdcr_el2); | 138 | u64 mdcr_el2 = read_sysreg(mdcr_el2); |
| 135 | 139 | ||
| 140 | __deactivate_traps_common(); | ||
| 141 | |||
| 136 | mdcr_el2 &= MDCR_EL2_HPMN_MASK; | 142 | mdcr_el2 &= MDCR_EL2_HPMN_MASK; |
| 137 | mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; | 143 | mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; |
| 138 | 144 | ||
| @@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void) | |||
| 141 | write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); | 147 | write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); |
| 142 | } | 148 | } |
| 143 | 149 | ||
| 144 | static hyp_alternate_select(__deactivate_traps_arch, | ||
| 145 | __deactivate_traps_nvhe, __deactivate_traps_vhe, | ||
| 146 | ARM64_HAS_VIRT_HOST_EXTN); | ||
| 147 | |||
| 148 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | 150 | static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) |
| 149 | { | 151 | { |
| 150 | /* | 152 | /* |
| @@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) | |||
| 156 | if (vcpu->arch.hcr_el2 & HCR_VSE) | 158 | if (vcpu->arch.hcr_el2 & HCR_VSE) |
| 157 | vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); | 159 | vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); |
| 158 | 160 | ||
| 159 | __deactivate_traps_arch()(); | 161 | if (has_vhe()) |
| 160 | write_sysreg(0, hstr_el2); | 162 | deactivate_traps_vhe(); |
| 161 | write_sysreg(0, pmuserenr_el0); | 163 | else |
| 164 | __deactivate_traps_nvhe(); | ||
| 165 | } | ||
| 166 | |||
| 167 | void activate_traps_vhe_load(struct kvm_vcpu *vcpu) | ||
| 168 | { | ||
| 169 | __activate_traps_common(vcpu); | ||
| 170 | } | ||
| 171 | |||
| 172 | void deactivate_traps_vhe_put(void) | ||
| 173 | { | ||
| 174 | u64 mdcr_el2 = read_sysreg(mdcr_el2); | ||
| 175 | |||
| 176 | mdcr_el2 &= MDCR_EL2_HPMN_MASK | | ||
| 177 | MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | | ||
| 178 | MDCR_EL2_TPMS; | ||
| 179 | |||
| 180 | write_sysreg(mdcr_el2, mdcr_el2); | ||
| 181 | |||
| 182 | __deactivate_traps_common(); | ||
| 162 | } | 183 | } |
| 163 | 184 | ||
| 164 | static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) | 185 | static void __hyp_text __activate_vm(struct kvm *kvm) |
| 165 | { | 186 | { |
| 166 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
| 167 | write_sysreg(kvm->arch.vttbr, vttbr_el2); | 187 | write_sysreg(kvm->arch.vttbr, vttbr_el2); |
| 168 | } | 188 | } |
| 169 | 189 | ||
| @@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) | |||
| 172 | write_sysreg(0, vttbr_el2); | 192 | write_sysreg(0, vttbr_el2); |
| 173 | } | 193 | } |
| 174 | 194 | ||
| 175 | static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) | 195 | /* Save VGICv3 state on non-VHE systems */ |
| 196 | static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) | ||
| 176 | { | 197 | { |
| 177 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | 198 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 178 | __vgic_v3_save_state(vcpu); | 199 | __vgic_v3_save_state(vcpu); |
| 179 | else | 200 | __vgic_v3_deactivate_traps(vcpu); |
| 180 | __vgic_v2_save_state(vcpu); | 201 | } |
| 181 | |||
| 182 | write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); | ||
| 183 | } | 202 | } |
| 184 | 203 | ||
| 185 | static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) | 204 | /* Restore VGICv3 state on non_VEH systems */ |
| 205 | static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) | ||
| 186 | { | 206 | { |
| 187 | u64 val; | 207 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { |
| 188 | 208 | __vgic_v3_activate_traps(vcpu); | |
| 189 | val = read_sysreg(hcr_el2); | ||
| 190 | val |= HCR_INT_OVERRIDE; | ||
| 191 | val |= vcpu->arch.irq_lines; | ||
| 192 | write_sysreg(val, hcr_el2); | ||
| 193 | |||
| 194 | if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | ||
| 195 | __vgic_v3_restore_state(vcpu); | 209 | __vgic_v3_restore_state(vcpu); |
| 196 | else | 210 | } |
| 197 | __vgic_v2_restore_state(vcpu); | ||
| 198 | } | 211 | } |
| 199 | 212 | ||
| 200 | static bool __hyp_text __true_value(void) | 213 | static bool __hyp_text __true_value(void) |
| @@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) | |||
| 305 | } | 318 | } |
| 306 | } | 319 | } |
| 307 | 320 | ||
| 308 | int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) | 321 | /* |
| 322 | * Return true when we were able to fixup the guest exit and should return to | ||
| 323 | * the guest, false when we should restore the host state and return to the | ||
| 324 | * main run loop. | ||
| 325 | */ | ||
| 326 | static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) | ||
| 309 | { | 327 | { |
| 310 | struct kvm_cpu_context *host_ctxt; | 328 | if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) |
| 311 | struct kvm_cpu_context *guest_ctxt; | ||
| 312 | bool fp_enabled; | ||
| 313 | u64 exit_code; | ||
| 314 | |||
| 315 | vcpu = kern_hyp_va(vcpu); | ||
| 316 | |||
| 317 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
| 318 | host_ctxt->__hyp_running_vcpu = vcpu; | ||
| 319 | guest_ctxt = &vcpu->arch.ctxt; | ||
| 320 | |||
| 321 | __sysreg_save_host_state(host_ctxt); | ||
| 322 | __debug_cond_save_host_state(vcpu); | ||
| 323 | |||
| 324 | __activate_traps(vcpu); | ||
| 325 | __activate_vm(vcpu); | ||
| 326 | |||
| 327 | __vgic_restore_state(vcpu); | ||
| 328 | __timer_enable_traps(vcpu); | ||
| 329 | |||
| 330 | /* | ||
| 331 | * We must restore the 32-bit state before the sysregs, thanks | ||
| 332 | * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). | ||
| 333 | */ | ||
| 334 | __sysreg32_restore_state(vcpu); | ||
| 335 | __sysreg_restore_guest_state(guest_ctxt); | ||
| 336 | __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); | ||
| 337 | |||
| 338 | /* Jump in the fire! */ | ||
| 339 | again: | ||
| 340 | exit_code = __guest_enter(vcpu, host_ctxt); | ||
| 341 | /* And we're baaack! */ | ||
| 342 | |||
| 343 | if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ) | ||
| 344 | vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr); | 329 | vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr); |
| 330 | |||
| 345 | /* | 331 | /* |
| 346 | * We're using the raw exception code in order to only process | 332 | * We're using the raw exception code in order to only process |
| 347 | * the trap if no SError is pending. We will come back to the | 333 | * the trap if no SError is pending. We will come back to the |
| 348 | * same PC once the SError has been injected, and replay the | 334 | * same PC once the SError has been injected, and replay the |
| 349 | * trapping instruction. | 335 | * trapping instruction. |
| 350 | */ | 336 | */ |
| 351 | if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) | 337 | if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) |
| 352 | goto again; | 338 | return true; |
| 353 | 339 | ||
| 354 | if (static_branch_unlikely(&vgic_v2_cpuif_trap) && | 340 | if (static_branch_unlikely(&vgic_v2_cpuif_trap) && |
| 355 | exit_code == ARM_EXCEPTION_TRAP) { | 341 | *exit_code == ARM_EXCEPTION_TRAP) { |
| 356 | bool valid; | 342 | bool valid; |
| 357 | 343 | ||
| 358 | valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && | 344 | valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && |
| @@ -366,9 +352,9 @@ again: | |||
| 366 | 352 | ||
| 367 | if (ret == 1) { | 353 | if (ret == 1) { |
| 368 | if (__skip_instr(vcpu)) | 354 | if (__skip_instr(vcpu)) |
| 369 | goto again; | 355 | return true; |
| 370 | else | 356 | else |
| 371 | exit_code = ARM_EXCEPTION_TRAP; | 357 | *exit_code = ARM_EXCEPTION_TRAP; |
| 372 | } | 358 | } |
| 373 | 359 | ||
| 374 | if (ret == -1) { | 360 | if (ret == -1) { |
| @@ -380,29 +366,112 @@ again: | |||
| 380 | */ | 366 | */ |
| 381 | if (!__skip_instr(vcpu)) | 367 | if (!__skip_instr(vcpu)) |
| 382 | *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; | 368 | *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; |
| 383 | exit_code = ARM_EXCEPTION_EL1_SERROR; | 369 | *exit_code = ARM_EXCEPTION_EL1_SERROR; |
| 384 | } | 370 | } |
| 385 | |||
| 386 | /* 0 falls through to be handler out of EL2 */ | ||
| 387 | } | 371 | } |
| 388 | } | 372 | } |
| 389 | 373 | ||
| 390 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) && | 374 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) && |
| 391 | exit_code == ARM_EXCEPTION_TRAP && | 375 | *exit_code == ARM_EXCEPTION_TRAP && |
| 392 | (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || | 376 | (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || |
| 393 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { | 377 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { |
| 394 | int ret = __vgic_v3_perform_cpuif_access(vcpu); | 378 | int ret = __vgic_v3_perform_cpuif_access(vcpu); |
| 395 | 379 | ||
| 396 | if (ret == 1) { | 380 | if (ret == 1) { |
| 397 | if (__skip_instr(vcpu)) | 381 | if (__skip_instr(vcpu)) |
| 398 | goto again; | 382 | return true; |
| 399 | else | 383 | else |
| 400 | exit_code = ARM_EXCEPTION_TRAP; | 384 | *exit_code = ARM_EXCEPTION_TRAP; |
| 401 | } | 385 | } |
| 386 | } | ||
| 402 | 387 | ||
| 403 | /* 0 falls through to be handled out of EL2 */ | 388 | /* Return to the host kernel and handle the exit */ |
| 389 | return false; | ||
| 390 | } | ||
| 391 | |||
| 392 | /* Switch to the guest for VHE systems running in EL2 */ | ||
| 393 | int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) | ||
| 394 | { | ||
| 395 | struct kvm_cpu_context *host_ctxt; | ||
| 396 | struct kvm_cpu_context *guest_ctxt; | ||
| 397 | bool fp_enabled; | ||
| 398 | u64 exit_code; | ||
| 399 | |||
| 400 | host_ctxt = vcpu->arch.host_cpu_context; | ||
| 401 | host_ctxt->__hyp_running_vcpu = vcpu; | ||
| 402 | guest_ctxt = &vcpu->arch.ctxt; | ||
| 403 | |||
| 404 | sysreg_save_host_state_vhe(host_ctxt); | ||
| 405 | |||
| 406 | __activate_traps(vcpu); | ||
| 407 | __activate_vm(vcpu->kvm); | ||
| 408 | |||
| 409 | sysreg_restore_guest_state_vhe(guest_ctxt); | ||
| 410 | __debug_switch_to_guest(vcpu); | ||
| 411 | |||
| 412 | do { | ||
| 413 | /* Jump in the fire! */ | ||
| 414 | exit_code = __guest_enter(vcpu, host_ctxt); | ||
| 415 | |||
| 416 | /* And we're baaack! */ | ||
| 417 | } while (fixup_guest_exit(vcpu, &exit_code)); | ||
| 418 | |||
| 419 | fp_enabled = fpsimd_enabled_vhe(); | ||
| 420 | |||
| 421 | sysreg_save_guest_state_vhe(guest_ctxt); | ||
| 422 | |||
| 423 | __deactivate_traps(vcpu); | ||
| 424 | |||
| 425 | sysreg_restore_host_state_vhe(host_ctxt); | ||
| 426 | |||
| 427 | if (fp_enabled) { | ||
| 428 | __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); | ||
| 429 | __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); | ||
| 430 | __fpsimd_save_fpexc32(vcpu); | ||
| 404 | } | 431 | } |
| 405 | 432 | ||
| 433 | __debug_switch_to_host(vcpu); | ||
| 434 | |||
| 435 | return exit_code; | ||
| 436 | } | ||
| 437 | |||
| 438 | /* Switch to the guest for legacy non-VHE systems */ | ||
| 439 | int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) | ||
| 440 | { | ||
| 441 | struct kvm_cpu_context *host_ctxt; | ||
| 442 | struct kvm_cpu_context *guest_ctxt; | ||
| 443 | bool fp_enabled; | ||
| 444 | u64 exit_code; | ||
| 445 | |||
| 446 | vcpu = kern_hyp_va(vcpu); | ||
| 447 | |||
| 448 | host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); | ||
| 449 | host_ctxt->__hyp_running_vcpu = vcpu; | ||
| 450 | guest_ctxt = &vcpu->arch.ctxt; | ||
| 451 | |||
| 452 | __sysreg_save_state_nvhe(host_ctxt); | ||
| 453 | |||
| 454 | __activate_traps(vcpu); | ||
| 455 | __activate_vm(kern_hyp_va(vcpu->kvm)); | ||
| 456 | |||
| 457 | __hyp_vgic_restore_state(vcpu); | ||
| 458 | __timer_enable_traps(vcpu); | ||
| 459 | |||
| 460 | /* | ||
| 461 | * We must restore the 32-bit state before the sysregs, thanks | ||
| 462 | * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). | ||
| 463 | */ | ||
| 464 | __sysreg32_restore_state(vcpu); | ||
| 465 | __sysreg_restore_state_nvhe(guest_ctxt); | ||
| 466 | __debug_switch_to_guest(vcpu); | ||
| 467 | |||
| 468 | do { | ||
| 469 | /* Jump in the fire! */ | ||
| 470 | exit_code = __guest_enter(vcpu, host_ctxt); | ||
| 471 | |||
| 472 | /* And we're baaack! */ | ||
| 473 | } while (fixup_guest_exit(vcpu, &exit_code)); | ||
| 474 | |||
| 406 | if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { | 475 | if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { |
| 407 | u32 midr = read_cpuid_id(); | 476 | u32 midr = read_cpuid_id(); |
| 408 | 477 | ||
| @@ -413,29 +482,29 @@ again: | |||
| 413 | } | 482 | } |
| 414 | } | 483 | } |
| 415 | 484 | ||
| 416 | fp_enabled = __fpsimd_enabled(); | 485 | fp_enabled = __fpsimd_enabled_nvhe(); |
| 417 | 486 | ||
| 418 | __sysreg_save_guest_state(guest_ctxt); | 487 | __sysreg_save_state_nvhe(guest_ctxt); |
| 419 | __sysreg32_save_state(vcpu); | 488 | __sysreg32_save_state(vcpu); |
| 420 | __timer_disable_traps(vcpu); | 489 | __timer_disable_traps(vcpu); |
| 421 | __vgic_save_state(vcpu); | 490 | __hyp_vgic_save_state(vcpu); |
| 422 | 491 | ||
| 423 | __deactivate_traps(vcpu); | 492 | __deactivate_traps(vcpu); |
| 424 | __deactivate_vm(vcpu); | 493 | __deactivate_vm(vcpu); |
| 425 | 494 | ||
| 426 | __sysreg_restore_host_state(host_ctxt); | 495 | __sysreg_restore_state_nvhe(host_ctxt); |
| 427 | 496 | ||
| 428 | if (fp_enabled) { | 497 | if (fp_enabled) { |
| 429 | __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); | 498 | __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); |
| 430 | __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); | 499 | __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); |
| 500 | __fpsimd_save_fpexc32(vcpu); | ||
| 431 | } | 501 | } |
| 432 | 502 | ||
| 433 | __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); | ||
| 434 | /* | 503 | /* |
| 435 | * This must come after restoring the host sysregs, since a non-VHE | 504 | * This must come after restoring the host sysregs, since a non-VHE |
| 436 | * system may enable SPE here and make use of the TTBRs. | 505 | * system may enable SPE here and make use of the TTBRs. |
| 437 | */ | 506 | */ |
| 438 | __debug_cond_restore_host_state(vcpu); | 507 | __debug_switch_to_host(vcpu); |
| 439 | 508 | ||
| 440 | return exit_code; | 509 | return exit_code; |
| 441 | } | 510 | } |
| @@ -443,10 +512,20 @@ again: | |||
| 443 | static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; | 512 | static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; |
| 444 | 513 | ||
| 445 | static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, | 514 | static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, |
| 446 | struct kvm_vcpu *vcpu) | 515 | struct kvm_cpu_context *__host_ctxt) |
| 447 | { | 516 | { |
| 517 | struct kvm_vcpu *vcpu; | ||
| 448 | unsigned long str_va; | 518 | unsigned long str_va; |
| 449 | 519 | ||
| 520 | vcpu = __host_ctxt->__hyp_running_vcpu; | ||
| 521 | |||
| 522 | if (read_sysreg(vttbr_el2)) { | ||
| 523 | __timer_disable_traps(vcpu); | ||
| 524 | __deactivate_traps(vcpu); | ||
| 525 | __deactivate_vm(vcpu); | ||
| 526 | __sysreg_restore_state_nvhe(__host_ctxt); | ||
| 527 | } | ||
| 528 | |||
| 450 | /* | 529 | /* |
| 451 | * Force the panic string to be loaded from the literal pool, | 530 | * Force the panic string to be loaded from the literal pool, |
| 452 | * making sure it is a kernel address and not a PC-relative | 531 | * making sure it is a kernel address and not a PC-relative |
| @@ -460,40 +539,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, | |||
| 460 | read_sysreg(hpfar_el2), par, vcpu); | 539 | read_sysreg(hpfar_el2), par, vcpu); |
| 461 | } | 540 | } |
| 462 | 541 | ||
| 463 | static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, | 542 | static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, |
| 464 | struct kvm_vcpu *vcpu) | 543 | struct kvm_cpu_context *host_ctxt) |
| 465 | { | 544 | { |
| 545 | struct kvm_vcpu *vcpu; | ||
| 546 | vcpu = host_ctxt->__hyp_running_vcpu; | ||
| 547 | |||
| 548 | __deactivate_traps(vcpu); | ||
| 549 | sysreg_restore_host_state_vhe(host_ctxt); | ||
| 550 | |||
| 466 | panic(__hyp_panic_string, | 551 | panic(__hyp_panic_string, |
| 467 | spsr, elr, | 552 | spsr, elr, |
| 468 | read_sysreg_el2(esr), read_sysreg_el2(far), | 553 | read_sysreg_el2(esr), read_sysreg_el2(far), |
| 469 | read_sysreg(hpfar_el2), par, vcpu); | 554 | read_sysreg(hpfar_el2), par, vcpu); |
| 470 | } | 555 | } |
| 471 | 556 | ||
| 472 | static hyp_alternate_select(__hyp_call_panic, | 557 | void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) |
| 473 | __hyp_call_panic_nvhe, __hyp_call_panic_vhe, | ||
| 474 | ARM64_HAS_VIRT_HOST_EXTN); | ||
| 475 | |||
| 476 | void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt) | ||
| 477 | { | 558 | { |
| 478 | struct kvm_vcpu *vcpu = NULL; | ||
| 479 | |||
| 480 | u64 spsr = read_sysreg_el2(spsr); | 559 | u64 spsr = read_sysreg_el2(spsr); |
| 481 | u64 elr = read_sysreg_el2(elr); | 560 | u64 elr = read_sysreg_el2(elr); |
| 482 | u64 par = read_sysreg(par_el1); | 561 | u64 par = read_sysreg(par_el1); |
| 483 | 562 | ||
| 484 | if (read_sysreg(vttbr_el2)) { | 563 | if (!has_vhe()) |
| 485 | struct kvm_cpu_context *host_ctxt; | 564 | __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt); |
| 486 | 565 | else | |
| 487 | host_ctxt = kern_hyp_va(__host_ctxt); | 566 | __hyp_call_panic_vhe(spsr, elr, par, host_ctxt); |
| 488 | vcpu = host_ctxt->__hyp_running_vcpu; | ||
| 489 | __timer_disable_traps(vcpu); | ||
| 490 | __deactivate_traps(vcpu); | ||
| 491 | __deactivate_vm(vcpu); | ||
| 492 | __sysreg_restore_host_state(host_ctxt); | ||
| 493 | } | ||
| 494 | |||
| 495 | /* Call panic for real */ | ||
| 496 | __hyp_call_panic()(spsr, elr, par, vcpu); | ||
| 497 | 567 | ||
| 498 | unreachable(); | 568 | unreachable(); |
| 499 | } | 569 | } |
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 2c17afd2be96..b3894df6bf1a 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c | |||
| @@ -19,32 +19,43 @@ | |||
| 19 | #include <linux/kvm_host.h> | 19 | #include <linux/kvm_host.h> |
| 20 | 20 | ||
| 21 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
| 22 | #include <asm/kvm_emulate.h> | ||
| 22 | #include <asm/kvm_hyp.h> | 23 | #include <asm/kvm_hyp.h> |
| 23 | 24 | ||
| 24 | /* Yes, this does nothing, on purpose */ | ||
| 25 | static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } | ||
| 26 | |||
| 27 | /* | 25 | /* |
| 28 | * Non-VHE: Both host and guest must save everything. | 26 | * Non-VHE: Both host and guest must save everything. |
| 29 | * | 27 | * |
| 30 | * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0, | 28 | * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate, |
| 31 | * and guest must save everything. | 29 | * which are handled as part of the el2 return state) on every switch. |
| 30 | * tpidr_el0 and tpidrro_el0 only need to be switched when going | ||
| 31 | * to host userspace or a different VCPU. EL1 registers only need to be | ||
| 32 | * switched when potentially going to run a different VCPU. The latter two | ||
| 33 | * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put. | ||
| 32 | */ | 34 | */ |
| 33 | 35 | ||
| 34 | static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) | 36 | static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) |
| 35 | { | 37 | { |
| 36 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); | ||
| 37 | ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); | ||
| 38 | ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); | ||
| 39 | ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); | 38 | ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); |
| 39 | |||
| 40 | /* | ||
| 41 | * The host arm64 Linux uses sp_el0 to point to 'current' and it must | ||
| 42 | * therefore be saved/restored on every entry/exit to/from the guest. | ||
| 43 | */ | ||
| 40 | ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); | 44 | ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); |
| 41 | } | 45 | } |
| 42 | 46 | ||
| 43 | static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | 47 | static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) |
| 48 | { | ||
| 49 | ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); | ||
| 50 | ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); | ||
| 51 | } | ||
| 52 | |||
| 53 | static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) | ||
| 44 | { | 54 | { |
| 45 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); | 55 | ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); |
| 46 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); | 56 | ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); |
| 47 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); | 57 | ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); |
| 58 | ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); | ||
| 48 | ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); | 59 | ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); |
| 49 | ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); | 60 | ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); |
| 50 | ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); | 61 | ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); |
| @@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | |||
| 64 | ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); | 75 | ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); |
| 65 | ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); | 76 | ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); |
| 66 | ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); | 77 | ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); |
| 78 | } | ||
| 79 | |||
| 80 | static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) | ||
| 81 | { | ||
| 67 | ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); | 82 | ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); |
| 68 | ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); | 83 | ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); |
| 69 | 84 | ||
| @@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) | |||
| 71 | ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); | 86 | ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); |
| 72 | } | 87 | } |
| 73 | 88 | ||
| 74 | static hyp_alternate_select(__sysreg_call_save_host_state, | 89 | void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt) |
| 75 | __sysreg_save_state, __sysreg_do_nothing, | 90 | { |
| 76 | ARM64_HAS_VIRT_HOST_EXTN); | 91 | __sysreg_save_el1_state(ctxt); |
| 92 | __sysreg_save_common_state(ctxt); | ||
| 93 | __sysreg_save_user_state(ctxt); | ||
| 94 | __sysreg_save_el2_return_state(ctxt); | ||
| 95 | } | ||
| 77 | 96 | ||
| 78 | void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt) | 97 | void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt) |
| 79 | { | 98 | { |
| 80 | __sysreg_call_save_host_state()(ctxt); | ||
| 81 | __sysreg_save_common_state(ctxt); | 99 | __sysreg_save_common_state(ctxt); |
| 82 | } | 100 | } |
| 83 | 101 | ||
| 84 | void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt) | 102 | void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt) |
| 85 | { | 103 | { |
| 86 | __sysreg_save_state(ctxt); | ||
| 87 | __sysreg_save_common_state(ctxt); | 104 | __sysreg_save_common_state(ctxt); |
| 105 | __sysreg_save_el2_return_state(ctxt); | ||
| 88 | } | 106 | } |
| 89 | 107 | ||
| 90 | static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) | 108 | static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) |
| 91 | { | 109 | { |
| 92 | write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); | ||
| 93 | write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); | ||
| 94 | write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); | ||
| 95 | write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); | 110 | write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); |
| 111 | |||
| 112 | /* | ||
| 113 | * The host arm64 Linux uses sp_el0 to point to 'current' and it must | ||
| 114 | * therefore be saved/restored on every entry/exit to/from the guest. | ||
| 115 | */ | ||
| 96 | write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); | 116 | write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); |
| 97 | } | 117 | } |
| 98 | 118 | ||
| 99 | static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) | 119 | static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) |
| 120 | { | ||
| 121 | write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); | ||
| 122 | write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); | ||
| 123 | } | ||
| 124 | |||
| 125 | static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) | ||
| 100 | { | 126 | { |
| 101 | write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); | 127 | write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); |
| 102 | write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); | 128 | write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); |
| 103 | write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); | 129 | write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); |
| 130 | write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); | ||
| 104 | write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); | 131 | write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); |
| 105 | write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); | 132 | write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); |
| 106 | write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); | 133 | write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); |
| @@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) | |||
| 120 | write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); | 147 | write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); |
| 121 | write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); | 148 | write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); |
| 122 | write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); | 149 | write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); |
| 150 | } | ||
| 151 | |||
| 152 | static void __hyp_text | ||
| 153 | __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) | ||
| 154 | { | ||
| 123 | write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); | 155 | write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); |
| 124 | write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); | 156 | write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); |
| 125 | 157 | ||
| @@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) | |||
| 127 | write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); | 159 | write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); |
| 128 | } | 160 | } |
| 129 | 161 | ||
| 130 | static hyp_alternate_select(__sysreg_call_restore_host_state, | 162 | void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) |
| 131 | __sysreg_restore_state, __sysreg_do_nothing, | 163 | { |
| 132 | ARM64_HAS_VIRT_HOST_EXTN); | 164 | __sysreg_restore_el1_state(ctxt); |
| 165 | __sysreg_restore_common_state(ctxt); | ||
| 166 | __sysreg_restore_user_state(ctxt); | ||
| 167 | __sysreg_restore_el2_return_state(ctxt); | ||
| 168 | } | ||
| 133 | 169 | ||
| 134 | void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt) | 170 | void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt) |
| 135 | { | 171 | { |
| 136 | __sysreg_call_restore_host_state()(ctxt); | ||
| 137 | __sysreg_restore_common_state(ctxt); | 172 | __sysreg_restore_common_state(ctxt); |
| 138 | } | 173 | } |
| 139 | 174 | ||
| 140 | void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt) | 175 | void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt) |
| 141 | { | 176 | { |
| 142 | __sysreg_restore_state(ctxt); | ||
| 143 | __sysreg_restore_common_state(ctxt); | 177 | __sysreg_restore_common_state(ctxt); |
| 178 | __sysreg_restore_el2_return_state(ctxt); | ||
| 144 | } | 179 | } |
| 145 | 180 | ||
| 146 | void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) | 181 | void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) |
| 147 | { | 182 | { |
| 148 | u64 *spsr, *sysreg; | 183 | u64 *spsr, *sysreg; |
| 149 | 184 | ||
| 150 | if (read_sysreg(hcr_el2) & HCR_RW) | 185 | if (!vcpu_el1_is_32bit(vcpu)) |
| 151 | return; | 186 | return; |
| 152 | 187 | ||
| 153 | spsr = vcpu->arch.ctxt.gp_regs.spsr; | 188 | spsr = vcpu->arch.ctxt.gp_regs.spsr; |
| @@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) | |||
| 161 | sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); | 196 | sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); |
| 162 | sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); | 197 | sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); |
| 163 | 198 | ||
| 164 | if (__fpsimd_enabled()) | 199 | if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) |
| 165 | sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); | ||
| 166 | |||
| 167 | if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) | ||
| 168 | sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); | 200 | sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); |
| 169 | } | 201 | } |
| 170 | 202 | ||
| @@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) | |||
| 172 | { | 204 | { |
| 173 | u64 *spsr, *sysreg; | 205 | u64 *spsr, *sysreg; |
| 174 | 206 | ||
| 175 | if (read_sysreg(hcr_el2) & HCR_RW) | 207 | if (!vcpu_el1_is_32bit(vcpu)) |
| 176 | return; | 208 | return; |
| 177 | 209 | ||
| 178 | spsr = vcpu->arch.ctxt.gp_regs.spsr; | 210 | spsr = vcpu->arch.ctxt.gp_regs.spsr; |
| @@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) | |||
| 186 | write_sysreg(sysreg[DACR32_EL2], dacr32_el2); | 218 | write_sysreg(sysreg[DACR32_EL2], dacr32_el2); |
| 187 | write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); | 219 | write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); |
| 188 | 220 | ||
| 189 | if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) | 221 | if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) |
| 190 | write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); | 222 | write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); |
| 191 | } | 223 | } |
| 224 | |||
| 225 | /** | ||
| 226 | * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU | ||
| 227 | * | ||
| 228 | * @vcpu: The VCPU pointer | ||
| 229 | * | ||
| 230 | * Load system registers that do not affect the host's execution, for | ||
| 231 | * example EL1 system registers on a VHE system where the host kernel | ||
| 232 | * runs at EL2. This function is called from KVM's vcpu_load() function | ||
| 233 | * and loading system register state early avoids having to load them on | ||
| 234 | * every entry to the VM. | ||
| 235 | */ | ||
| 236 | void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) | ||
| 237 | { | ||
| 238 | struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; | ||
| 239 | struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; | ||
| 240 | |||
| 241 | if (!has_vhe()) | ||
| 242 | return; | ||
| 243 | |||
| 244 | __sysreg_save_user_state(host_ctxt); | ||
| 245 | |||
| 246 | /* | ||
| 247 | * Load guest EL1 and user state | ||
| 248 | * | ||
| 249 | * We must restore the 32-bit state before the sysregs, thanks | ||
| 250 | * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). | ||
| 251 | */ | ||
| 252 | __sysreg32_restore_state(vcpu); | ||
| 253 | __sysreg_restore_user_state(guest_ctxt); | ||
| 254 | __sysreg_restore_el1_state(guest_ctxt); | ||
| 255 | |||
| 256 | vcpu->arch.sysregs_loaded_on_cpu = true; | ||
| 257 | |||
| 258 | activate_traps_vhe_load(vcpu); | ||
| 259 | } | ||
| 260 | |||
| 261 | /** | ||
| 262 | * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU | ||
| 263 | * | ||
| 264 | * @vcpu: The VCPU pointer | ||
| 265 | * | ||
| 266 | * Save guest system registers that do not affect the host's execution, for | ||
| 267 | * example EL1 system registers on a VHE system where the host kernel | ||
| 268 | * runs at EL2. This function is called from KVM's vcpu_put() function | ||
| 269 | * and deferring saving system register state until we're no longer running the | ||
| 270 | * VCPU avoids having to save them on every exit from the VM. | ||
| 271 | */ | ||
| 272 | void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) | ||
| 273 | { | ||
| 274 | struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context; | ||
| 275 | struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; | ||
| 276 | |||
| 277 | if (!has_vhe()) | ||
| 278 | return; | ||
| 279 | |||
| 280 | deactivate_traps_vhe_put(); | ||
| 281 | |||
| 282 | __sysreg_save_el1_state(guest_ctxt); | ||
| 283 | __sysreg_save_user_state(guest_ctxt); | ||
| 284 | __sysreg32_save_state(vcpu); | ||
| 285 | |||
| 286 | /* Restore host user state */ | ||
| 287 | __sysreg_restore_user_state(host_ctxt); | ||
| 288 | |||
| 289 | vcpu->arch.sysregs_loaded_on_cpu = false; | ||
| 290 | } | ||
| 291 | |||
| 292 | void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2) | ||
| 293 | { | ||
| 294 | asm("msr tpidr_el2, %0": : "r" (tpidr_el2)); | ||
| 295 | } | ||
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c new file mode 100644 index 000000000000..86801b6055d6 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2012-2015 - ARM Ltd | ||
| 3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License version 2 as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/compiler.h> | ||
| 19 | #include <linux/irqchip/arm-gic.h> | ||
| 20 | #include <linux/kvm_host.h> | ||
| 21 | |||
| 22 | #include <asm/kvm_emulate.h> | ||
| 23 | #include <asm/kvm_hyp.h> | ||
| 24 | #include <asm/kvm_mmu.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the | ||
| 28 | * guest. | ||
| 29 | * | ||
| 30 | * @vcpu: the offending vcpu | ||
| 31 | * | ||
| 32 | * Returns: | ||
| 33 | * 1: GICV access successfully performed | ||
| 34 | * 0: Not a GICV access | ||
| 35 | * -1: Illegal GICV access | ||
| 36 | */ | ||
| 37 | int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) | ||
| 38 | { | ||
| 39 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
| 40 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
| 41 | phys_addr_t fault_ipa; | ||
| 42 | void __iomem *addr; | ||
| 43 | int rd; | ||
| 44 | |||
| 45 | /* Build the full address */ | ||
| 46 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); | ||
| 47 | fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); | ||
| 48 | |||
| 49 | /* If not for GICV, move on */ | ||
| 50 | if (fault_ipa < vgic->vgic_cpu_base || | ||
| 51 | fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) | ||
| 52 | return 0; | ||
| 53 | |||
| 54 | /* Reject anything but a 32bit access */ | ||
| 55 | if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) | ||
| 56 | return -1; | ||
| 57 | |||
| 58 | /* Not aligned? Don't bother */ | ||
| 59 | if (fault_ipa & 3) | ||
| 60 | return -1; | ||
| 61 | |||
| 62 | rd = kvm_vcpu_dabt_get_rd(vcpu); | ||
| 63 | addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va; | ||
| 64 | addr += fault_ipa - vgic->vgic_cpu_base; | ||
| 65 | |||
| 66 | if (kvm_vcpu_dabt_iswrite(vcpu)) { | ||
| 67 | u32 data = vcpu_data_guest_to_host(vcpu, | ||
| 68 | vcpu_get_reg(vcpu, rd), | ||
| 69 | sizeof(u32)); | ||
| 70 | writel_relaxed(data, addr); | ||
| 71 | } else { | ||
| 72 | u32 data = readl_relaxed(addr); | ||
| 73 | vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, | ||
| 74 | sizeof(u32))); | ||
| 75 | } | ||
| 76 | |||
| 77 | return 1; | ||
| 78 | } | ||
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 60666a056944..d8e71659ba7e 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c | |||
| @@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) | |||
| 58 | exc_offset = LOWER_EL_AArch32_VECTOR; | 58 | exc_offset = LOWER_EL_AArch32_VECTOR; |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; | 61 | return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) | 64 | static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) |
| @@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr | |||
| 67 | bool is_aarch32 = vcpu_mode_is_32bit(vcpu); | 67 | bool is_aarch32 = vcpu_mode_is_32bit(vcpu); |
| 68 | u32 esr = 0; | 68 | u32 esr = 0; |
| 69 | 69 | ||
| 70 | *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); | 70 | vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); |
| 71 | *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); | 71 | *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); |
| 72 | 72 | ||
| 73 | *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; | 73 | *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; |
| 74 | *vcpu_spsr(vcpu) = cpsr; | 74 | vcpu_write_spsr(vcpu, cpsr); |
| 75 | 75 | ||
| 76 | vcpu_sys_reg(vcpu, FAR_EL1) = addr; | 76 | vcpu_write_sys_reg(vcpu, addr, FAR_EL1); |
| 77 | 77 | ||
| 78 | /* | 78 | /* |
| 79 | * Build an {i,d}abort, depending on the level and the | 79 | * Build an {i,d}abort, depending on the level and the |
| @@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr | |||
| 94 | if (!is_iabt) | 94 | if (!is_iabt) |
| 95 | esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; | 95 | esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; |
| 96 | 96 | ||
| 97 | vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; | 97 | vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | static void inject_undef64(struct kvm_vcpu *vcpu) | 100 | static void inject_undef64(struct kvm_vcpu *vcpu) |
| @@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) | |||
| 102 | unsigned long cpsr = *vcpu_cpsr(vcpu); | 102 | unsigned long cpsr = *vcpu_cpsr(vcpu); |
| 103 | u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); | 103 | u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); |
| 104 | 104 | ||
| 105 | *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); | 105 | vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); |
| 106 | *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); | 106 | *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); |
| 107 | 107 | ||
| 108 | *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; | 108 | *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; |
| 109 | *vcpu_spsr(vcpu) = cpsr; | 109 | vcpu_write_spsr(vcpu, cpsr); |
| 110 | 110 | ||
| 111 | /* | 111 | /* |
| 112 | * Build an unknown exception, depending on the instruction | 112 | * Build an unknown exception, depending on the instruction |
| @@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) | |||
| 115 | if (kvm_vcpu_trap_il_is32bit(vcpu)) | 115 | if (kvm_vcpu_trap_il_is32bit(vcpu)) |
| 116 | esr |= ESR_ELx_IL; | 116 | esr |= ESR_ELx_IL; |
| 117 | 117 | ||
| 118 | vcpu_sys_reg(vcpu, ESR_EL1) = esr; | 118 | vcpu_write_sys_reg(vcpu, esr, ESR_EL1); |
| 119 | } | 119 | } |
| 120 | 120 | ||
| 121 | /** | 121 | /** |
| @@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) | |||
| 128 | */ | 128 | */ |
| 129 | void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) | 129 | void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) |
| 130 | { | 130 | { |
| 131 | if (!(vcpu->arch.hcr_el2 & HCR_RW)) | 131 | if (vcpu_el1_is_32bit(vcpu)) |
| 132 | kvm_inject_dabt32(vcpu, addr); | 132 | kvm_inject_dabt32(vcpu, addr); |
| 133 | else | 133 | else |
| 134 | inject_abt64(vcpu, false, addr); | 134 | inject_abt64(vcpu, false, addr); |
| @@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) | |||
| 144 | */ | 144 | */ |
| 145 | void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) | 145 | void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) |
| 146 | { | 146 | { |
| 147 | if (!(vcpu->arch.hcr_el2 & HCR_RW)) | 147 | if (vcpu_el1_is_32bit(vcpu)) |
| 148 | kvm_inject_pabt32(vcpu, addr); | 148 | kvm_inject_pabt32(vcpu, addr); |
| 149 | else | 149 | else |
| 150 | inject_abt64(vcpu, true, addr); | 150 | inject_abt64(vcpu, true, addr); |
| @@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) | |||
| 158 | */ | 158 | */ |
| 159 | void kvm_inject_undefined(struct kvm_vcpu *vcpu) | 159 | void kvm_inject_undefined(struct kvm_vcpu *vcpu) |
| 160 | { | 160 | { |
| 161 | if (!(vcpu->arch.hcr_el2 & HCR_RW)) | 161 | if (vcpu_el1_is_32bit(vcpu)) |
| 162 | kvm_inject_undef32(vcpu); | 162 | kvm_inject_undef32(vcpu); |
| 163 | else | 163 | else |
| 164 | inject_undef64(vcpu); | 164 | inject_undef64(vcpu); |
| @@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) | |||
| 167 | static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) | 167 | static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr) |
| 168 | { | 168 | { |
| 169 | vcpu_set_vsesr(vcpu, esr); | 169 | vcpu_set_vsesr(vcpu, esr); |
| 170 | vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); | 170 | *vcpu_hcr(vcpu) |= HCR_VSE; |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | /** | 173 | /** |
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index bbc6ae32e4af..eefe403a2e63 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c | |||
| @@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) | |||
| 141 | /* | 141 | /* |
| 142 | * Return the SPSR for the current mode of the virtual CPU. | 142 | * Return the SPSR for the current mode of the virtual CPU. |
| 143 | */ | 143 | */ |
| 144 | unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu) | 144 | static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) |
| 145 | { | 145 | { |
| 146 | unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; | 146 | unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; |
| 147 | switch (mode) { | 147 | switch (mode) { |
| 148 | case COMPAT_PSR_MODE_SVC: | 148 | case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC; |
| 149 | mode = KVM_SPSR_SVC; | 149 | case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT; |
| 150 | break; | 150 | case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND; |
| 151 | case COMPAT_PSR_MODE_ABT: | 151 | case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ; |
| 152 | mode = KVM_SPSR_ABT; | 152 | case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ; |
| 153 | break; | 153 | default: BUG(); |
| 154 | case COMPAT_PSR_MODE_UND: | 154 | } |
| 155 | mode = KVM_SPSR_UND; | 155 | } |
| 156 | break; | 156 | |
| 157 | case COMPAT_PSR_MODE_IRQ: | 157 | unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) |
| 158 | mode = KVM_SPSR_IRQ; | 158 | { |
| 159 | break; | 159 | int spsr_idx = vcpu_spsr32_mode(vcpu); |
| 160 | case COMPAT_PSR_MODE_FIQ: | 160 | |
| 161 | mode = KVM_SPSR_FIQ; | 161 | if (!vcpu->arch.sysregs_loaded_on_cpu) |
| 162 | break; | 162 | return vcpu_gp_regs(vcpu)->spsr[spsr_idx]; |
| 163 | |||
| 164 | switch (spsr_idx) { | ||
| 165 | case KVM_SPSR_SVC: | ||
| 166 | return read_sysreg_el1(spsr); | ||
| 167 | case KVM_SPSR_ABT: | ||
| 168 | return read_sysreg(spsr_abt); | ||
| 169 | case KVM_SPSR_UND: | ||
| 170 | return read_sysreg(spsr_und); | ||
| 171 | case KVM_SPSR_IRQ: | ||
| 172 | return read_sysreg(spsr_irq); | ||
| 173 | case KVM_SPSR_FIQ: | ||
| 174 | return read_sysreg(spsr_fiq); | ||
| 163 | default: | 175 | default: |
| 164 | BUG(); | 176 | BUG(); |
| 165 | } | 177 | } |
| 178 | } | ||
| 179 | |||
| 180 | void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) | ||
| 181 | { | ||
| 182 | int spsr_idx = vcpu_spsr32_mode(vcpu); | ||
| 183 | |||
| 184 | if (!vcpu->arch.sysregs_loaded_on_cpu) { | ||
| 185 | vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v; | ||
| 186 | return; | ||
| 187 | } | ||
| 166 | 188 | ||
| 167 | return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode]; | 189 | switch (spsr_idx) { |
| 190 | case KVM_SPSR_SVC: | ||
| 191 | write_sysreg_el1(v, spsr); | ||
| 192 | case KVM_SPSR_ABT: | ||
| 193 | write_sysreg(v, spsr_abt); | ||
| 194 | case KVM_SPSR_UND: | ||
| 195 | write_sysreg(v, spsr_und); | ||
| 196 | case KVM_SPSR_IRQ: | ||
| 197 | write_sysreg(v, spsr_irq); | ||
| 198 | case KVM_SPSR_FIQ: | ||
| 199 | write_sysreg(v, spsr_fiq); | ||
| 200 | } | ||
| 168 | } | 201 | } |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 50a43c7b97ca..806b0b126a64 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <asm/kvm_coproc.h> | 35 | #include <asm/kvm_coproc.h> |
| 36 | #include <asm/kvm_emulate.h> | 36 | #include <asm/kvm_emulate.h> |
| 37 | #include <asm/kvm_host.h> | 37 | #include <asm/kvm_host.h> |
| 38 | #include <asm/kvm_hyp.h> | ||
| 38 | #include <asm/kvm_mmu.h> | 39 | #include <asm/kvm_mmu.h> |
| 39 | #include <asm/perf_event.h> | 40 | #include <asm/perf_event.h> |
| 40 | #include <asm/sysreg.h> | 41 | #include <asm/sysreg.h> |
| @@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, | |||
| 76 | return false; | 77 | return false; |
| 77 | } | 78 | } |
| 78 | 79 | ||
| 80 | u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg) | ||
| 81 | { | ||
| 82 | if (!vcpu->arch.sysregs_loaded_on_cpu) | ||
| 83 | goto immediate_read; | ||
| 84 | |||
| 85 | /* | ||
| 86 | * System registers listed in the switch are not saved on every | ||
| 87 | * exit from the guest but are only saved on vcpu_put. | ||
| 88 | * | ||
| 89 | * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but | ||
| 90 | * should never be listed below, because the guest cannot modify its | ||
| 91 | * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's | ||
| 92 | * thread when emulating cross-VCPU communication. | ||
| 93 | */ | ||
| 94 | switch (reg) { | ||
| 95 | case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); | ||
| 96 | case SCTLR_EL1: return read_sysreg_s(sctlr_EL12); | ||
| 97 | case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); | ||
| 98 | case CPACR_EL1: return read_sysreg_s(cpacr_EL12); | ||
| 99 | case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12); | ||
| 100 | case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12); | ||
| 101 | case TCR_EL1: return read_sysreg_s(tcr_EL12); | ||
| 102 | case ESR_EL1: return read_sysreg_s(esr_EL12); | ||
| 103 | case AFSR0_EL1: return read_sysreg_s(afsr0_EL12); | ||
| 104 | case AFSR1_EL1: return read_sysreg_s(afsr1_EL12); | ||
| 105 | case FAR_EL1: return read_sysreg_s(far_EL12); | ||
| 106 | case MAIR_EL1: return read_sysreg_s(mair_EL12); | ||
| 107 | case VBAR_EL1: return read_sysreg_s(vbar_EL12); | ||
| 108 | case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12); | ||
| 109 | case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); | ||
| 110 | case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); | ||
| 111 | case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); | ||
| 112 | case AMAIR_EL1: return read_sysreg_s(amair_EL12); | ||
| 113 | case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12); | ||
| 114 | case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); | ||
| 115 | case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); | ||
| 116 | case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); | ||
| 117 | case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); | ||
| 118 | } | ||
| 119 | |||
| 120 | immediate_read: | ||
| 121 | return __vcpu_sys_reg(vcpu, reg); | ||
| 122 | } | ||
| 123 | |||
| 124 | void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) | ||
| 125 | { | ||
| 126 | if (!vcpu->arch.sysregs_loaded_on_cpu) | ||
| 127 | goto immediate_write; | ||
| 128 | |||
| 129 | /* | ||
| 130 | * System registers listed in the switch are not restored on every | ||
| 131 | * entry to the guest but are only restored on vcpu_load. | ||
| 132 | * | ||
| 133 | * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but | ||
| 134 | * should never be listed below, because the the MPIDR should only be | ||
| 135 | * set once, before running the VCPU, and never changed later. | ||
| 136 | */ | ||
| 137 | switch (reg) { | ||
| 138 | case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; | ||
| 139 | case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return; | ||
| 140 | case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; | ||
| 141 | case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return; | ||
| 142 | case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return; | ||
| 143 | case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return; | ||
| 144 | case TCR_EL1: write_sysreg_s(val, tcr_EL12); return; | ||
| 145 | case ESR_EL1: write_sysreg_s(val, esr_EL12); return; | ||
| 146 | case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return; | ||
| 147 | case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return; | ||
| 148 | case FAR_EL1: write_sysreg_s(val, far_EL12); return; | ||
| 149 | case MAIR_EL1: write_sysreg_s(val, mair_EL12); return; | ||
| 150 | case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return; | ||
| 151 | case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return; | ||
| 152 | case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; | ||
| 153 | case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; | ||
| 154 | case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; | ||
| 155 | case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return; | ||
| 156 | case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return; | ||
| 157 | case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; | ||
| 158 | case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; | ||
| 159 | case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; | ||
| 160 | case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; | ||
| 161 | } | ||
| 162 | |||
| 163 | immediate_write: | ||
| 164 | __vcpu_sys_reg(vcpu, reg) = val; | ||
| 165 | } | ||
| 166 | |||
| 79 | /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ | 167 | /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ |
| 80 | static u32 cache_levels; | 168 | static u32 cache_levels; |
| 81 | 169 | ||
| @@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, | |||
| 121 | const struct sys_reg_desc *r) | 209 | const struct sys_reg_desc *r) |
| 122 | { | 210 | { |
| 123 | bool was_enabled = vcpu_has_cache_enabled(vcpu); | 211 | bool was_enabled = vcpu_has_cache_enabled(vcpu); |
| 212 | u64 val; | ||
| 213 | int reg = r->reg; | ||
| 124 | 214 | ||
| 125 | BUG_ON(!p->is_write); | 215 | BUG_ON(!p->is_write); |
| 126 | 216 | ||
| 127 | if (!p->is_aarch32) { | 217 | /* See the 32bit mapping in kvm_host.h */ |
| 128 | vcpu_sys_reg(vcpu, r->reg) = p->regval; | 218 | if (p->is_aarch32) |
| 219 | reg = r->reg / 2; | ||
| 220 | |||
| 221 | if (!p->is_aarch32 || !p->is_32bit) { | ||
| 222 | val = p->regval; | ||
| 129 | } else { | 223 | } else { |
| 130 | if (!p->is_32bit) | 224 | val = vcpu_read_sys_reg(vcpu, reg); |
| 131 | vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval); | 225 | if (r->reg % 2) |
| 132 | vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval); | 226 | val = (p->regval << 32) | (u64)lower_32_bits(val); |
| 227 | else | ||
| 228 | val = ((u64)upper_32_bits(val) << 32) | | ||
| 229 | lower_32_bits(p->regval); | ||
| 133 | } | 230 | } |
| 231 | vcpu_write_sys_reg(vcpu, val, reg); | ||
| 134 | 232 | ||
| 135 | kvm_toggle_cache(vcpu, was_enabled); | 233 | kvm_toggle_cache(vcpu, was_enabled); |
| 136 | return true; | 234 | return true; |
| @@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu, | |||
| 175 | return read_zero(vcpu, p); | 273 | return read_zero(vcpu, p); |
| 176 | } | 274 | } |
| 177 | 275 | ||
| 276 | static bool trap_undef(struct kvm_vcpu *vcpu, | ||
| 277 | struct sys_reg_params *p, | ||
| 278 | const struct sys_reg_desc *r) | ||
| 279 | { | ||
| 280 | kvm_inject_undefined(vcpu); | ||
| 281 | return false; | ||
| 282 | } | ||
| 283 | |||
| 178 | static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, | 284 | static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, |
| 179 | struct sys_reg_params *p, | 285 | struct sys_reg_params *p, |
| 180 | const struct sys_reg_desc *r) | 286 | const struct sys_reg_desc *r) |
| @@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, | |||
| 231 | const struct sys_reg_desc *r) | 337 | const struct sys_reg_desc *r) |
| 232 | { | 338 | { |
| 233 | if (p->is_write) { | 339 | if (p->is_write) { |
| 234 | vcpu_sys_reg(vcpu, r->reg) = p->regval; | 340 | vcpu_write_sys_reg(vcpu, p->regval, r->reg); |
| 235 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; | 341 | vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; |
| 236 | } else { | 342 | } else { |
| 237 | p->regval = vcpu_sys_reg(vcpu, r->reg); | 343 | p->regval = vcpu_read_sys_reg(vcpu, r->reg); |
| 238 | } | 344 | } |
| 239 | 345 | ||
| 240 | trace_trap_reg(__func__, r->reg, p->is_write, p->regval); | 346 | trace_trap_reg(__func__, r->reg, p->is_write, p->regval); |
| @@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu, | |||
| 447 | 553 | ||
| 448 | static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 554 | static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
| 449 | { | 555 | { |
| 450 | vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1); | 556 | u64 amair = read_sysreg(amair_el1); |
| 557 | vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); | ||
| 451 | } | 558 | } |
| 452 | 559 | ||
| 453 | static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 560 | static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
| @@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | |||
| 464 | mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); | 571 | mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); |
| 465 | mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); | 572 | mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); |
| 466 | mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); | 573 | mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); |
| 467 | vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr; | 574 | vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); |
| 468 | } | 575 | } |
| 469 | 576 | ||
| 470 | static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 577 | static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
| @@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | |||
| 478 | */ | 585 | */ |
| 479 | val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) | 586 | val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) |
| 480 | | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); | 587 | | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); |
| 481 | vcpu_sys_reg(vcpu, PMCR_EL0) = val; | 588 | __vcpu_sys_reg(vcpu, PMCR_EL0) = val; |
| 482 | } | 589 | } |
| 483 | 590 | ||
| 484 | static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) | 591 | static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) |
| 485 | { | 592 | { |
| 486 | u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); | 593 | u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); |
| 487 | bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); | 594 | bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); |
| 488 | 595 | ||
| 489 | if (!enabled) | 596 | if (!enabled) |
| @@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 525 | 632 | ||
| 526 | if (p->is_write) { | 633 | if (p->is_write) { |
| 527 | /* Only update writeable bits of PMCR */ | 634 | /* Only update writeable bits of PMCR */ |
| 528 | val = vcpu_sys_reg(vcpu, PMCR_EL0); | 635 | val = __vcpu_sys_reg(vcpu, PMCR_EL0); |
| 529 | val &= ~ARMV8_PMU_PMCR_MASK; | 636 | val &= ~ARMV8_PMU_PMCR_MASK; |
| 530 | val |= p->regval & ARMV8_PMU_PMCR_MASK; | 637 | val |= p->regval & ARMV8_PMU_PMCR_MASK; |
| 531 | vcpu_sys_reg(vcpu, PMCR_EL0) = val; | 638 | __vcpu_sys_reg(vcpu, PMCR_EL0) = val; |
| 532 | kvm_pmu_handle_pmcr(vcpu, val); | 639 | kvm_pmu_handle_pmcr(vcpu, val); |
| 533 | } else { | 640 | } else { |
| 534 | /* PMCR.P & PMCR.C are RAZ */ | 641 | /* PMCR.P & PMCR.C are RAZ */ |
| 535 | val = vcpu_sys_reg(vcpu, PMCR_EL0) | 642 | val = __vcpu_sys_reg(vcpu, PMCR_EL0) |
| 536 | & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); | 643 | & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C); |
| 537 | p->regval = val; | 644 | p->regval = val; |
| 538 | } | 645 | } |
| @@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 550 | return false; | 657 | return false; |
| 551 | 658 | ||
| 552 | if (p->is_write) | 659 | if (p->is_write) |
| 553 | vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; | 660 | __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; |
| 554 | else | 661 | else |
| 555 | /* return PMSELR.SEL field */ | 662 | /* return PMSELR.SEL field */ |
| 556 | p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0) | 663 | p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) |
| 557 | & ARMV8_PMU_COUNTER_MASK; | 664 | & ARMV8_PMU_COUNTER_MASK; |
| 558 | 665 | ||
| 559 | return true; | 666 | return true; |
| @@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) | |||
| 586 | { | 693 | { |
| 587 | u64 pmcr, val; | 694 | u64 pmcr, val; |
| 588 | 695 | ||
| 589 | pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); | 696 | pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); |
| 590 | val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; | 697 | val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; |
| 591 | if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { | 698 | if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { |
| 592 | kvm_inject_undefined(vcpu); | 699 | kvm_inject_undefined(vcpu); |
| @@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, | |||
| 611 | if (pmu_access_event_counter_el0_disabled(vcpu)) | 718 | if (pmu_access_event_counter_el0_disabled(vcpu)) |
| 612 | return false; | 719 | return false; |
| 613 | 720 | ||
| 614 | idx = vcpu_sys_reg(vcpu, PMSELR_EL0) | 721 | idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) |
| 615 | & ARMV8_PMU_COUNTER_MASK; | 722 | & ARMV8_PMU_COUNTER_MASK; |
| 616 | } else if (r->Op2 == 0) { | 723 | } else if (r->Op2 == 0) { |
| 617 | /* PMCCNTR_EL0 */ | 724 | /* PMCCNTR_EL0 */ |
| @@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 666 | 773 | ||
| 667 | if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { | 774 | if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) { |
| 668 | /* PMXEVTYPER_EL0 */ | 775 | /* PMXEVTYPER_EL0 */ |
| 669 | idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; | 776 | idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK; |
| 670 | reg = PMEVTYPER0_EL0 + idx; | 777 | reg = PMEVTYPER0_EL0 + idx; |
| 671 | } else if (r->CRn == 14 && (r->CRm & 12) == 12) { | 778 | } else if (r->CRn == 14 && (r->CRm & 12) == 12) { |
| 672 | idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); | 779 | idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); |
| @@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 684 | 791 | ||
| 685 | if (p->is_write) { | 792 | if (p->is_write) { |
| 686 | kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); | 793 | kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); |
| 687 | vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; | 794 | __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; |
| 688 | } else { | 795 | } else { |
| 689 | p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; | 796 | p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; |
| 690 | } | 797 | } |
| 691 | 798 | ||
| 692 | return true; | 799 | return true; |
| @@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 708 | val = p->regval & mask; | 815 | val = p->regval & mask; |
| 709 | if (r->Op2 & 0x1) { | 816 | if (r->Op2 & 0x1) { |
| 710 | /* accessing PMCNTENSET_EL0 */ | 817 | /* accessing PMCNTENSET_EL0 */ |
| 711 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; | 818 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; |
| 712 | kvm_pmu_enable_counter(vcpu, val); | 819 | kvm_pmu_enable_counter(vcpu, val); |
| 713 | } else { | 820 | } else { |
| 714 | /* accessing PMCNTENCLR_EL0 */ | 821 | /* accessing PMCNTENCLR_EL0 */ |
| 715 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; | 822 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; |
| 716 | kvm_pmu_disable_counter(vcpu, val); | 823 | kvm_pmu_disable_counter(vcpu, val); |
| 717 | } | 824 | } |
| 718 | } else { | 825 | } else { |
| 719 | p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; | 826 | p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; |
| 720 | } | 827 | } |
| 721 | 828 | ||
| 722 | return true; | 829 | return true; |
| @@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 740 | 847 | ||
| 741 | if (r->Op2 & 0x1) | 848 | if (r->Op2 & 0x1) |
| 742 | /* accessing PMINTENSET_EL1 */ | 849 | /* accessing PMINTENSET_EL1 */ |
| 743 | vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; | 850 | __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; |
| 744 | else | 851 | else |
| 745 | /* accessing PMINTENCLR_EL1 */ | 852 | /* accessing PMINTENCLR_EL1 */ |
| 746 | vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; | 853 | __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; |
| 747 | } else { | 854 | } else { |
| 748 | p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; | 855 | p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask; |
| 749 | } | 856 | } |
| 750 | 857 | ||
| 751 | return true; | 858 | return true; |
| @@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 765 | if (p->is_write) { | 872 | if (p->is_write) { |
| 766 | if (r->CRm & 0x2) | 873 | if (r->CRm & 0x2) |
| 767 | /* accessing PMOVSSET_EL0 */ | 874 | /* accessing PMOVSSET_EL0 */ |
| 768 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); | 875 | __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); |
| 769 | else | 876 | else |
| 770 | /* accessing PMOVSCLR_EL0 */ | 877 | /* accessing PMOVSCLR_EL0 */ |
| 771 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); | 878 | __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); |
| 772 | } else { | 879 | } else { |
| 773 | p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; | 880 | p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask; |
| 774 | } | 881 | } |
| 775 | 882 | ||
| 776 | return true; | 883 | return true; |
| @@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
| 807 | return false; | 914 | return false; |
| 808 | } | 915 | } |
| 809 | 916 | ||
| 810 | vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval | 917 | __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = |
| 811 | & ARMV8_PMU_USERENR_MASK; | 918 | p->regval & ARMV8_PMU_USERENR_MASK; |
| 812 | } else { | 919 | } else { |
| 813 | p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0) | 920 | p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) |
| 814 | & ARMV8_PMU_USERENR_MASK; | 921 | & ARMV8_PMU_USERENR_MASK; |
| 815 | } | 922 | } |
| 816 | 923 | ||
| @@ -893,6 +1000,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz) | |||
| 893 | task_pid_nr(current)); | 1000 | task_pid_nr(current)); |
| 894 | 1001 | ||
| 895 | val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); | 1002 | val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); |
| 1003 | } else if (id == SYS_ID_AA64MMFR1_EL1) { | ||
| 1004 | if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT)) | ||
| 1005 | pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n", | ||
| 1006 | task_pid_nr(current)); | ||
| 1007 | |||
| 1008 | val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT); | ||
| 896 | } | 1009 | } |
| 897 | 1010 | ||
| 898 | return val; | 1011 | return val; |
| @@ -1178,6 +1291,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
| 1178 | { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, | 1291 | { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, |
| 1179 | { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, | 1292 | { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, |
| 1180 | 1293 | ||
| 1294 | { SYS_DESC(SYS_LORSA_EL1), trap_undef }, | ||
| 1295 | { SYS_DESC(SYS_LOREA_EL1), trap_undef }, | ||
| 1296 | { SYS_DESC(SYS_LORN_EL1), trap_undef }, | ||
| 1297 | { SYS_DESC(SYS_LORC_EL1), trap_undef }, | ||
| 1298 | { SYS_DESC(SYS_LORID_EL1), trap_undef }, | ||
| 1299 | |||
| 1181 | { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, | 1300 | { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, |
| 1182 | { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, | 1301 | { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, |
| 1183 | 1302 | ||
| @@ -1545,6 +1664,11 @@ static const struct sys_reg_desc cp15_regs[] = { | |||
| 1545 | 1664 | ||
| 1546 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, | 1665 | { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, |
| 1547 | 1666 | ||
| 1667 | /* CNTP_TVAL */ | ||
| 1668 | { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, | ||
| 1669 | /* CNTP_CTL */ | ||
| 1670 | { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, | ||
| 1671 | |||
| 1548 | /* PMEVCNTRn */ | 1672 | /* PMEVCNTRn */ |
| 1549 | PMU_PMEVCNTR(0), | 1673 | PMU_PMEVCNTR(0), |
| 1550 | PMU_PMEVCNTR(1), | 1674 | PMU_PMEVCNTR(1), |
| @@ -1618,6 +1742,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { | |||
| 1618 | { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, | 1742 | { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, |
| 1619 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, | 1743 | { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, |
| 1620 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, | 1744 | { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, |
| 1745 | { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, | ||
| 1621 | }; | 1746 | }; |
| 1622 | 1747 | ||
| 1623 | /* Target specific emulation tables */ | 1748 | /* Target specific emulation tables */ |
| @@ -2194,7 +2319,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg | |||
| 2194 | if (r->get_user) | 2319 | if (r->get_user) |
| 2195 | return (r->get_user)(vcpu, r, reg, uaddr); | 2320 | return (r->get_user)(vcpu, r, reg, uaddr); |
| 2196 | 2321 | ||
| 2197 | return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id); | 2322 | return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); |
| 2198 | } | 2323 | } |
| 2199 | 2324 | ||
| 2200 | int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | 2325 | int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) |
| @@ -2215,7 +2340,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg | |||
| 2215 | if (r->set_user) | 2340 | if (r->set_user) |
| 2216 | return (r->set_user)(vcpu, r, reg, uaddr); | 2341 | return (r->set_user)(vcpu, r, reg, uaddr); |
| 2217 | 2342 | ||
| 2218 | return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); | 2343 | return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); |
| 2219 | } | 2344 | } |
| 2220 | 2345 | ||
| 2221 | static unsigned int num_demux_regs(void) | 2346 | static unsigned int num_demux_regs(void) |
| @@ -2421,6 +2546,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) | |||
| 2421 | reset_sys_reg_descs(vcpu, table, num); | 2546 | reset_sys_reg_descs(vcpu, table, num); |
| 2422 | 2547 | ||
| 2423 | for (num = 1; num < NR_SYS_REGS; num++) | 2548 | for (num = 1; num < NR_SYS_REGS; num++) |
| 2424 | if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242) | 2549 | if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242) |
| 2425 | panic("Didn't reset vcpu_sys_reg(%zi)", num); | 2550 | panic("Didn't reset __vcpu_sys_reg(%zi)", num); |
| 2426 | } | 2551 | } |
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 060f5348ef25..cd710f8b63e0 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h | |||
| @@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu, | |||
| 89 | { | 89 | { |
| 90 | BUG_ON(!r->reg); | 90 | BUG_ON(!r->reg); |
| 91 | BUG_ON(r->reg >= NR_SYS_REGS); | 91 | BUG_ON(r->reg >= NR_SYS_REGS); |
| 92 | vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; | 92 | __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 95 | static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
| 96 | { | 96 | { |
| 97 | BUG_ON(!r->reg); | 97 | BUG_ON(!r->reg); |
| 98 | BUG_ON(r->reg >= NR_SYS_REGS); | 98 | BUG_ON(r->reg >= NR_SYS_REGS); |
| 99 | vcpu_sys_reg(vcpu, r->reg) = r->val; | 99 | __vcpu_sys_reg(vcpu, r->reg) = r->val; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | static inline int cmp_sys_reg(const struct sys_reg_desc *i1, | 102 | static inline int cmp_sys_reg(const struct sys_reg_desc *i1, |
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 969ade1d333d..ddb8497d18d6 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c | |||
| @@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, | |||
| 38 | if (p->is_write) | 38 | if (p->is_write) |
| 39 | return ignore_write(vcpu, p); | 39 | return ignore_write(vcpu, p); |
| 40 | 40 | ||
| 41 | p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1); | 41 | p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); |
| 42 | return true; | 42 | return true; |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) | 45 | static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) |
| 46 | { | 46 | { |
| 47 | vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); | 47 | __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | /* | 50 | /* |
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c new file mode 100644 index 000000000000..c712a7376bc1 --- /dev/null +++ b/arch/arm64/kvm/va_layout.c | |||
| @@ -0,0 +1,227 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2017 ARM Ltd. | ||
| 3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License version 2 as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/kvm_host.h> | ||
| 19 | #include <linux/random.h> | ||
| 20 | #include <linux/memblock.h> | ||
| 21 | #include <asm/alternative.h> | ||
| 22 | #include <asm/debug-monitors.h> | ||
| 23 | #include <asm/insn.h> | ||
| 24 | #include <asm/kvm_mmu.h> | ||
| 25 | |||
| 26 | /* | ||
| 27 | * The LSB of the random hyp VA tag or 0 if no randomization is used. | ||
| 28 | */ | ||
| 29 | static u8 tag_lsb; | ||
| 30 | /* | ||
| 31 | * The random hyp VA tag value with the region bit if hyp randomization is used | ||
| 32 | */ | ||
| 33 | static u64 tag_val; | ||
| 34 | static u64 va_mask; | ||
| 35 | |||
| 36 | static void compute_layout(void) | ||
| 37 | { | ||
| 38 | phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); | ||
| 39 | u64 hyp_va_msb; | ||
| 40 | int kva_msb; | ||
| 41 | |||
| 42 | /* Where is my RAM region? */ | ||
| 43 | hyp_va_msb = idmap_addr & BIT(VA_BITS - 1); | ||
| 44 | hyp_va_msb ^= BIT(VA_BITS - 1); | ||
| 45 | |||
| 46 | kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ | ||
| 47 | (u64)(high_memory - 1)); | ||
| 48 | |||
| 49 | if (kva_msb == (VA_BITS - 1)) { | ||
| 50 | /* | ||
| 51 | * No space in the address, let's compute the mask so | ||
| 52 | * that it covers (VA_BITS - 1) bits, and the region | ||
| 53 | * bit. The tag stays set to zero. | ||
| 54 | */ | ||
| 55 | va_mask = BIT(VA_BITS - 1) - 1; | ||
| 56 | va_mask |= hyp_va_msb; | ||
| 57 | } else { | ||
| 58 | /* | ||
| 59 | * We do have some free bits to insert a random tag. | ||
| 60 | * Hyp VAs are now created from kernel linear map VAs | ||
| 61 | * using the following formula (with V == VA_BITS): | ||
| 62 | * | ||
| 63 | * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 | ||
| 64 | * --------------------------------------------------------- | ||
| 65 | * | 0000000 | hyp_va_msb | random tag | kern linear VA | | ||
| 66 | */ | ||
| 67 | tag_lsb = kva_msb; | ||
| 68 | va_mask = GENMASK_ULL(tag_lsb - 1, 0); | ||
| 69 | tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb); | ||
| 70 | tag_val |= hyp_va_msb; | ||
| 71 | tag_val >>= tag_lsb; | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | static u32 compute_instruction(int n, u32 rd, u32 rn) | ||
| 76 | { | ||
| 77 | u32 insn = AARCH64_BREAK_FAULT; | ||
| 78 | |||
| 79 | switch (n) { | ||
| 80 | case 0: | ||
| 81 | insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND, | ||
| 82 | AARCH64_INSN_VARIANT_64BIT, | ||
| 83 | rn, rd, va_mask); | ||
| 84 | break; | ||
| 85 | |||
| 86 | case 1: | ||
| 87 | /* ROR is a variant of EXTR with Rm = Rn */ | ||
| 88 | insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, | ||
| 89 | rn, rn, rd, | ||
| 90 | tag_lsb); | ||
| 91 | break; | ||
| 92 | |||
| 93 | case 2: | ||
| 94 | insn = aarch64_insn_gen_add_sub_imm(rd, rn, | ||
| 95 | tag_val & GENMASK(11, 0), | ||
| 96 | AARCH64_INSN_VARIANT_64BIT, | ||
| 97 | AARCH64_INSN_ADSB_ADD); | ||
| 98 | break; | ||
| 99 | |||
| 100 | case 3: | ||
| 101 | insn = aarch64_insn_gen_add_sub_imm(rd, rn, | ||
| 102 | tag_val & GENMASK(23, 12), | ||
| 103 | AARCH64_INSN_VARIANT_64BIT, | ||
| 104 | AARCH64_INSN_ADSB_ADD); | ||
| 105 | break; | ||
| 106 | |||
| 107 | case 4: | ||
| 108 | /* ROR is a variant of EXTR with Rm = Rn */ | ||
| 109 | insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT, | ||
| 110 | rn, rn, rd, 64 - tag_lsb); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | return insn; | ||
| 115 | } | ||
| 116 | |||
| 117 | void __init kvm_update_va_mask(struct alt_instr *alt, | ||
| 118 | __le32 *origptr, __le32 *updptr, int nr_inst) | ||
| 119 | { | ||
| 120 | int i; | ||
| 121 | |||
| 122 | BUG_ON(nr_inst != 5); | ||
| 123 | |||
| 124 | if (!has_vhe() && !va_mask) | ||
| 125 | compute_layout(); | ||
| 126 | |||
| 127 | for (i = 0; i < nr_inst; i++) { | ||
| 128 | u32 rd, rn, insn, oinsn; | ||
| 129 | |||
| 130 | /* | ||
| 131 | * VHE doesn't need any address translation, let's NOP | ||
| 132 | * everything. | ||
| 133 | * | ||
| 134 | * Alternatively, if we don't have any spare bits in | ||
| 135 | * the address, NOP everything after masking that | ||
| 136 | * kernel VA. | ||
| 137 | */ | ||
| 138 | if (has_vhe() || (!tag_lsb && i > 0)) { | ||
| 139 | updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); | ||
| 140 | continue; | ||
| 141 | } | ||
| 142 | |||
| 143 | oinsn = le32_to_cpu(origptr[i]); | ||
| 144 | rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); | ||
| 145 | rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn); | ||
| 146 | |||
| 147 | insn = compute_instruction(i, rd, rn); | ||
| 148 | BUG_ON(insn == AARCH64_BREAK_FAULT); | ||
| 149 | |||
| 150 | updptr[i] = cpu_to_le32(insn); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | void *__kvm_bp_vect_base; | ||
| 155 | int __kvm_harden_el2_vector_slot; | ||
| 156 | |||
| 157 | void kvm_patch_vector_branch(struct alt_instr *alt, | ||
| 158 | __le32 *origptr, __le32 *updptr, int nr_inst) | ||
| 159 | { | ||
| 160 | u64 addr; | ||
| 161 | u32 insn; | ||
| 162 | |||
| 163 | BUG_ON(nr_inst != 5); | ||
| 164 | |||
| 165 | if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { | ||
| 166 | WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); | ||
| 167 | return; | ||
| 168 | } | ||
| 169 | |||
| 170 | if (!va_mask) | ||
| 171 | compute_layout(); | ||
| 172 | |||
| 173 | /* | ||
| 174 | * Compute HYP VA by using the same computation as kern_hyp_va() | ||
| 175 | */ | ||
| 176 | addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector); | ||
| 177 | addr &= va_mask; | ||
| 178 | addr |= tag_val << tag_lsb; | ||
| 179 | |||
| 180 | /* Use PC[10:7] to branch to the same vector in KVM */ | ||
| 181 | addr |= ((u64)origptr & GENMASK_ULL(10, 7)); | ||
| 182 | |||
| 183 | /* | ||
| 184 | * Branch to the second instruction in the vectors in order to | ||
| 185 | * avoid the initial store on the stack (which we already | ||
| 186 | * perform in the hardening vectors). | ||
| 187 | */ | ||
| 188 | addr += AARCH64_INSN_SIZE; | ||
| 189 | |||
| 190 | /* stp x0, x1, [sp, #-16]! */ | ||
| 191 | insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, | ||
| 192 | AARCH64_INSN_REG_1, | ||
| 193 | AARCH64_INSN_REG_SP, | ||
| 194 | -16, | ||
| 195 | AARCH64_INSN_VARIANT_64BIT, | ||
| 196 | AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); | ||
| 197 | *updptr++ = cpu_to_le32(insn); | ||
| 198 | |||
| 199 | /* movz x0, #(addr & 0xffff) */ | ||
| 200 | insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, | ||
| 201 | (u16)addr, | ||
| 202 | 0, | ||
| 203 | AARCH64_INSN_VARIANT_64BIT, | ||
| 204 | AARCH64_INSN_MOVEWIDE_ZERO); | ||
| 205 | *updptr++ = cpu_to_le32(insn); | ||
| 206 | |||
| 207 | /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */ | ||
| 208 | insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, | ||
| 209 | (u16)(addr >> 16), | ||
| 210 | 16, | ||
| 211 | AARCH64_INSN_VARIANT_64BIT, | ||
| 212 | AARCH64_INSN_MOVEWIDE_KEEP); | ||
| 213 | *updptr++ = cpu_to_le32(insn); | ||
| 214 | |||
| 215 | /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */ | ||
| 216 | insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, | ||
| 217 | (u16)(addr >> 32), | ||
| 218 | 32, | ||
| 219 | AARCH64_INSN_VARIANT_64BIT, | ||
| 220 | AARCH64_INSN_MOVEWIDE_KEEP); | ||
| 221 | *updptr++ = cpu_to_le32(insn); | ||
| 222 | |||
| 223 | /* br x0 */ | ||
| 224 | insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0, | ||
| 225 | AARCH64_INSN_BRANCH_NOLINK); | ||
| 226 | *updptr++ = cpu_to_le32(insn); | ||
| 227 | } | ||
diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h index 60b1aa0b7014..b57e978b0946 100644 --- a/arch/mips/include/asm/kvm_para.h +++ b/arch/mips/include/asm/kvm_para.h | |||
| @@ -94,6 +94,11 @@ static inline unsigned int kvm_arch_para_features(void) | |||
| 94 | return 0; | 94 | return 0; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static inline unsigned int kvm_arch_para_hints(void) | ||
| 98 | { | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 97 | #ifdef CONFIG_MIPS_PARAVIRT | 102 | #ifdef CONFIG_MIPS_PARAVIRT |
| 98 | static inline bool kvm_para_available(void) | 103 | static inline bool kvm_para_available(void) |
| 99 | { | 104 | { |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index deb54293398c..17498e9a26e4 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -60,7 +60,6 @@ | |||
| 60 | 60 | ||
| 61 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 61 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
| 62 | 62 | ||
| 63 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | ||
| 64 | extern int kvm_unmap_hva_range(struct kvm *kvm, | 63 | extern int kvm_unmap_hva_range(struct kvm *kvm, |
| 65 | unsigned long start, unsigned long end); | 64 | unsigned long start, unsigned long end); |
| 66 | extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); | 65 | extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); |
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 336a91acb8b1..5ceb4efca65f 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h | |||
| @@ -61,6 +61,11 @@ static inline unsigned int kvm_arch_para_features(void) | |||
| 61 | return r; | 61 | return r; |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | static inline unsigned int kvm_arch_para_hints(void) | ||
| 65 | { | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | |||
| 64 | static inline bool kvm_check_and_clear_guest_paused(void) | 69 | static inline bool kvm_check_and_clear_guest_paused(void) |
| 65 | { | 70 | { |
| 66 | return false; | 71 | return false; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b7d066b037da..abe7032cdb54 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -295,7 +295,6 @@ struct kvmppc_ops { | |||
| 295 | const struct kvm_userspace_memory_region *mem, | 295 | const struct kvm_userspace_memory_region *mem, |
| 296 | const struct kvm_memory_slot *old, | 296 | const struct kvm_memory_slot *old, |
| 297 | const struct kvm_memory_slot *new); | 297 | const struct kvm_memory_slot *new); |
| 298 | int (*unmap_hva)(struct kvm *kvm, unsigned long hva); | ||
| 299 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, | 298 | int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, |
| 300 | unsigned long end); | 299 | unsigned long end); |
| 301 | int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); | 300 | int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 234531d1bee1..97d4a112648f 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
| @@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, | |||
| 819 | kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); | 819 | kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); |
| 820 | } | 820 | } |
| 821 | 821 | ||
| 822 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
| 823 | { | ||
| 824 | return kvm->arch.kvm_ops->unmap_hva(kvm, hva); | ||
| 825 | } | ||
| 826 | EXPORT_SYMBOL_GPL(kvm_unmap_hva); | ||
| 827 | |||
| 828 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | 822 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) |
| 829 | { | 823 | { |
| 830 | return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); | 824 | return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); |
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index d2b3ec088b8c..4ad5e287b8bc 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | 14 | ||
| 15 | extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, | 15 | extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm, |
| 16 | struct kvm_memory_slot *memslot); | 16 | struct kvm_memory_slot *memslot); |
| 17 | extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva); | ||
| 18 | extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, | 17 | extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, |
| 19 | unsigned long end); | 18 | unsigned long end); |
| 20 | extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, | 19 | extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ef243fed2f2b..a670fa5fbe50 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
| 877 | return 0; | 877 | return 0; |
| 878 | } | 878 | } |
| 879 | 879 | ||
| 880 | int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) | ||
| 881 | { | ||
| 882 | hva_handler_fn handler; | ||
| 883 | |||
| 884 | handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; | ||
| 885 | kvm_handle_hva(kvm, hva, handler); | ||
| 886 | return 0; | ||
| 887 | } | ||
| 888 | |||
| 889 | int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) | 880 | int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) |
| 890 | { | 881 | { |
| 891 | hva_handler_fn handler; | 882 | hva_handler_fn handler; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5d9bafe9a371..a57eafec4dc2 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
| @@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, | |||
| 150 | { | 150 | { |
| 151 | int psize = MMU_BASE_PSIZE; | 151 | int psize = MMU_BASE_PSIZE; |
| 152 | 152 | ||
| 153 | if (pshift >= PMD_SHIFT) | 153 | if (pshift >= PUD_SHIFT) |
| 154 | psize = MMU_PAGE_1G; | ||
| 155 | else if (pshift >= PMD_SHIFT) | ||
| 154 | psize = MMU_PAGE_2M; | 156 | psize = MMU_PAGE_2M; |
| 155 | addr &= ~0xfffUL; | 157 | addr &= ~0xfffUL; |
| 156 | addr |= mmu_psize_defs[psize].ap << 5; | 158 | addr |= mmu_psize_defs[psize].ap << 5; |
| @@ -163,6 +165,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, | |||
| 163 | asm volatile("ptesync": : :"memory"); | 165 | asm volatile("ptesync": : :"memory"); |
| 164 | } | 166 | } |
| 165 | 167 | ||
| 168 | static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) | ||
| 169 | { | ||
| 170 | unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ | ||
| 171 | |||
| 172 | asm volatile("ptesync": : :"memory"); | ||
| 173 | /* RIC=1 PRS=0 R=1 IS=2 */ | ||
| 174 | asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) | ||
| 175 | : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); | ||
| 176 | asm volatile("ptesync": : :"memory"); | ||
| 177 | } | ||
| 178 | |||
| 166 | unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, | 179 | unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, |
| 167 | unsigned long clr, unsigned long set, | 180 | unsigned long clr, unsigned long set, |
| 168 | unsigned long addr, unsigned int shift) | 181 | unsigned long addr, unsigned int shift) |
| @@ -223,9 +236,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, | |||
| 223 | new_pud = pud_alloc_one(kvm->mm, gpa); | 236 | new_pud = pud_alloc_one(kvm->mm, gpa); |
| 224 | 237 | ||
| 225 | pmd = NULL; | 238 | pmd = NULL; |
| 226 | if (pud && pud_present(*pud)) | 239 | if (pud && pud_present(*pud) && !pud_huge(*pud)) |
| 227 | pmd = pmd_offset(pud, gpa); | 240 | pmd = pmd_offset(pud, gpa); |
| 228 | else | 241 | else if (level <= 1) |
| 229 | new_pmd = pmd_alloc_one(kvm->mm, gpa); | 242 | new_pmd = pmd_alloc_one(kvm->mm, gpa); |
| 230 | 243 | ||
| 231 | if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) | 244 | if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) |
| @@ -246,6 +259,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, | |||
| 246 | new_pud = NULL; | 259 | new_pud = NULL; |
| 247 | } | 260 | } |
| 248 | pud = pud_offset(pgd, gpa); | 261 | pud = pud_offset(pgd, gpa); |
| 262 | if (pud_huge(*pud)) { | ||
| 263 | unsigned long hgpa = gpa & PUD_MASK; | ||
| 264 | |||
| 265 | /* | ||
| 266 | * If we raced with another CPU which has just put | ||
| 267 | * a 1GB pte in after we saw a pmd page, try again. | ||
| 268 | */ | ||
| 269 | if (level <= 1 && !new_pmd) { | ||
| 270 | ret = -EAGAIN; | ||
| 271 | goto out_unlock; | ||
| 272 | } | ||
| 273 | /* Check if we raced and someone else has set the same thing */ | ||
| 274 | if (level == 2 && pud_raw(*pud) == pte_raw(pte)) { | ||
| 275 | ret = 0; | ||
| 276 | goto out_unlock; | ||
| 277 | } | ||
| 278 | /* Valid 1GB page here already, remove it */ | ||
| 279 | old = kvmppc_radix_update_pte(kvm, (pte_t *)pud, | ||
| 280 | ~0UL, 0, hgpa, PUD_SHIFT); | ||
| 281 | kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT); | ||
| 282 | if (old & _PAGE_DIRTY) { | ||
| 283 | unsigned long gfn = hgpa >> PAGE_SHIFT; | ||
| 284 | struct kvm_memory_slot *memslot; | ||
| 285 | memslot = gfn_to_memslot(kvm, gfn); | ||
| 286 | if (memslot && memslot->dirty_bitmap) | ||
| 287 | kvmppc_update_dirty_map(memslot, | ||
| 288 | gfn, PUD_SIZE); | ||
| 289 | } | ||
| 290 | } | ||
| 291 | if (level == 2) { | ||
| 292 | if (!pud_none(*pud)) { | ||
| 293 | /* | ||
| 294 | * There's a page table page here, but we wanted to | ||
| 295 | * install a large page, so remove and free the page | ||
| 296 | * table page. new_pmd will be NULL since level == 2. | ||
| 297 | */ | ||
| 298 | new_pmd = pmd_offset(pud, 0); | ||
| 299 | pud_clear(pud); | ||
| 300 | kvmppc_radix_flush_pwc(kvm, gpa); | ||
| 301 | } | ||
| 302 | kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); | ||
| 303 | ret = 0; | ||
| 304 | goto out_unlock; | ||
| 305 | } | ||
| 249 | if (pud_none(*pud)) { | 306 | if (pud_none(*pud)) { |
| 250 | if (!new_pmd) | 307 | if (!new_pmd) |
| 251 | goto out_unlock; | 308 | goto out_unlock; |
| @@ -264,6 +321,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, | |||
| 264 | ret = -EAGAIN; | 321 | ret = -EAGAIN; |
| 265 | goto out_unlock; | 322 | goto out_unlock; |
| 266 | } | 323 | } |
| 324 | /* Check if we raced and someone else has set the same thing */ | ||
| 325 | if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) { | ||
| 326 | ret = 0; | ||
| 327 | goto out_unlock; | ||
| 328 | } | ||
| 267 | /* Valid 2MB page here already, remove it */ | 329 | /* Valid 2MB page here already, remove it */ |
| 268 | old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), | 330 | old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd), |
| 269 | ~0UL, 0, lgpa, PMD_SHIFT); | 331 | ~0UL, 0, lgpa, PMD_SHIFT); |
| @@ -276,35 +338,43 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, | |||
| 276 | kvmppc_update_dirty_map(memslot, | 338 | kvmppc_update_dirty_map(memslot, |
| 277 | gfn, PMD_SIZE); | 339 | gfn, PMD_SIZE); |
| 278 | } | 340 | } |
| 279 | } else if (level == 1 && !pmd_none(*pmd)) { | ||
| 280 | /* | ||
| 281 | * There's a page table page here, but we wanted | ||
| 282 | * to install a large page. Tell the caller and let | ||
| 283 | * it try installing a normal page if it wants. | ||
| 284 | */ | ||
| 285 | ret = -EBUSY; | ||
| 286 | goto out_unlock; | ||
| 287 | } | 341 | } |
| 288 | if (level == 0) { | 342 | if (level == 1) { |
| 289 | if (pmd_none(*pmd)) { | 343 | if (!pmd_none(*pmd)) { |
| 290 | if (!new_ptep) | 344 | /* |
| 291 | goto out_unlock; | 345 | * There's a page table page here, but we wanted to |
| 292 | pmd_populate(kvm->mm, pmd, new_ptep); | 346 | * install a large page, so remove and free the page |
| 293 | new_ptep = NULL; | 347 | * table page. new_ptep will be NULL since level == 1. |
| 294 | } | 348 | */ |
| 295 | ptep = pte_offset_kernel(pmd, gpa); | 349 | new_ptep = pte_offset_kernel(pmd, 0); |
| 296 | if (pte_present(*ptep)) { | 350 | pmd_clear(pmd); |
| 297 | /* PTE was previously valid, so invalidate it */ | 351 | kvmppc_radix_flush_pwc(kvm, gpa); |
| 298 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, | ||
| 299 | 0, gpa, 0); | ||
| 300 | kvmppc_radix_tlbie_page(kvm, gpa, 0); | ||
| 301 | if (old & _PAGE_DIRTY) | ||
| 302 | mark_page_dirty(kvm, gpa >> PAGE_SHIFT); | ||
| 303 | } | 352 | } |
| 304 | kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); | ||
| 305 | } else { | ||
| 306 | kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); | 353 | kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); |
| 354 | ret = 0; | ||
| 355 | goto out_unlock; | ||
| 307 | } | 356 | } |
| 357 | if (pmd_none(*pmd)) { | ||
| 358 | if (!new_ptep) | ||
| 359 | goto out_unlock; | ||
| 360 | pmd_populate(kvm->mm, pmd, new_ptep); | ||
| 361 | new_ptep = NULL; | ||
| 362 | } | ||
| 363 | ptep = pte_offset_kernel(pmd, gpa); | ||
| 364 | if (pte_present(*ptep)) { | ||
| 365 | /* Check if someone else set the same thing */ | ||
| 366 | if (pte_raw(*ptep) == pte_raw(pte)) { | ||
| 367 | ret = 0; | ||
| 368 | goto out_unlock; | ||
| 369 | } | ||
| 370 | /* PTE was previously valid, so invalidate it */ | ||
| 371 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, | ||
| 372 | 0, gpa, 0); | ||
| 373 | kvmppc_radix_tlbie_page(kvm, gpa, 0); | ||
| 374 | if (old & _PAGE_DIRTY) | ||
| 375 | mark_page_dirty(kvm, gpa >> PAGE_SHIFT); | ||
| 376 | } | ||
| 377 | kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); | ||
| 308 | ret = 0; | 378 | ret = 0; |
| 309 | 379 | ||
| 310 | out_unlock: | 380 | out_unlock: |
| @@ -325,11 +395,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 325 | unsigned long mmu_seq, pte_size; | 395 | unsigned long mmu_seq, pte_size; |
| 326 | unsigned long gpa, gfn, hva, pfn; | 396 | unsigned long gpa, gfn, hva, pfn; |
| 327 | struct kvm_memory_slot *memslot; | 397 | struct kvm_memory_slot *memslot; |
| 328 | struct page *page = NULL, *pages[1]; | 398 | struct page *page = NULL; |
| 329 | long ret, npages, ok; | 399 | long ret; |
| 330 | unsigned int writing; | 400 | bool writing; |
| 331 | struct vm_area_struct *vma; | 401 | bool upgrade_write = false; |
| 332 | unsigned long flags; | 402 | bool *upgrade_p = &upgrade_write; |
| 333 | pte_t pte, *ptep; | 403 | pte_t pte, *ptep; |
| 334 | unsigned long pgflags; | 404 | unsigned long pgflags; |
| 335 | unsigned int shift, level; | 405 | unsigned int shift, level; |
| @@ -369,122 +439,131 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 369 | dsisr & DSISR_ISSTORE); | 439 | dsisr & DSISR_ISSTORE); |
| 370 | } | 440 | } |
| 371 | 441 | ||
| 372 | /* used to check for invalidations in progress */ | ||
| 373 | mmu_seq = kvm->mmu_notifier_seq; | ||
| 374 | smp_rmb(); | ||
| 375 | |||
| 376 | writing = (dsisr & DSISR_ISSTORE) != 0; | 442 | writing = (dsisr & DSISR_ISSTORE) != 0; |
| 377 | hva = gfn_to_hva_memslot(memslot, gfn); | 443 | if (memslot->flags & KVM_MEM_READONLY) { |
| 444 | if (writing) { | ||
| 445 | /* give the guest a DSI */ | ||
| 446 | dsisr = DSISR_ISSTORE | DSISR_PROTFAULT; | ||
| 447 | kvmppc_core_queue_data_storage(vcpu, ea, dsisr); | ||
| 448 | return RESUME_GUEST; | ||
| 449 | } | ||
| 450 | upgrade_p = NULL; | ||
| 451 | } | ||
| 452 | |||
| 378 | if (dsisr & DSISR_SET_RC) { | 453 | if (dsisr & DSISR_SET_RC) { |
| 379 | /* | 454 | /* |
| 380 | * Need to set an R or C bit in the 2nd-level tables; | 455 | * Need to set an R or C bit in the 2nd-level tables; |
| 381 | * if the relevant bits aren't already set in the linux | 456 | * since we are just helping out the hardware here, |
| 382 | * page tables, fall through to do the gup_fast to | 457 | * it is sufficient to do what the hardware does. |
| 383 | * set them in the linux page tables too. | ||
| 384 | */ | 458 | */ |
| 385 | ok = 0; | ||
| 386 | pgflags = _PAGE_ACCESSED; | 459 | pgflags = _PAGE_ACCESSED; |
| 387 | if (writing) | 460 | if (writing) |
| 388 | pgflags |= _PAGE_DIRTY; | 461 | pgflags |= _PAGE_DIRTY; |
| 389 | local_irq_save(flags); | 462 | /* |
| 390 | ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); | 463 | * We are walking the secondary page table here. We can do this |
| 391 | if (ptep) { | 464 | * without disabling irq. |
| 392 | pte = READ_ONCE(*ptep); | 465 | */ |
| 393 | if (pte_present(pte) && | 466 | spin_lock(&kvm->mmu_lock); |
| 394 | (pte_val(pte) & pgflags) == pgflags) | 467 | ptep = __find_linux_pte(kvm->arch.pgtable, |
| 395 | ok = 1; | 468 | gpa, NULL, &shift); |
| 396 | } | 469 | if (ptep && pte_present(*ptep) && |
| 397 | local_irq_restore(flags); | 470 | (!writing || pte_write(*ptep))) { |
| 398 | if (ok) { | 471 | kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, |
| 399 | spin_lock(&kvm->mmu_lock); | 472 | gpa, shift); |
| 400 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { | 473 | dsisr &= ~DSISR_SET_RC; |
| 401 | spin_unlock(&kvm->mmu_lock); | ||
| 402 | return RESUME_GUEST; | ||
| 403 | } | ||
| 404 | /* | ||
| 405 | * We are walking the secondary page table here. We can do this | ||
| 406 | * without disabling irq. | ||
| 407 | */ | ||
| 408 | ptep = __find_linux_pte(kvm->arch.pgtable, | ||
| 409 | gpa, NULL, &shift); | ||
| 410 | if (ptep && pte_present(*ptep)) { | ||
| 411 | kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, | ||
| 412 | gpa, shift); | ||
| 413 | spin_unlock(&kvm->mmu_lock); | ||
| 414 | return RESUME_GUEST; | ||
| 415 | } | ||
| 416 | spin_unlock(&kvm->mmu_lock); | ||
| 417 | } | 474 | } |
| 475 | spin_unlock(&kvm->mmu_lock); | ||
| 476 | if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | | ||
| 477 | DSISR_PROTFAULT | DSISR_SET_RC))) | ||
| 478 | return RESUME_GUEST; | ||
| 418 | } | 479 | } |
| 419 | 480 | ||
| 420 | ret = -EFAULT; | 481 | /* used to check for invalidations in progress */ |
| 421 | pfn = 0; | 482 | mmu_seq = kvm->mmu_notifier_seq; |
| 422 | pte_size = PAGE_SIZE; | 483 | smp_rmb(); |
| 423 | pgflags = _PAGE_READ | _PAGE_EXEC; | 484 | |
| 424 | level = 0; | 485 | /* |
| 425 | npages = get_user_pages_fast(hva, 1, writing, pages); | 486 | * Do a fast check first, since __gfn_to_pfn_memslot doesn't |
| 426 | if (npages < 1) { | 487 | * do it with !atomic && !async, which is how we call it. |
| 427 | /* Check if it's an I/O mapping */ | 488 | * We always ask for write permission since the common case |
| 428 | down_read(¤t->mm->mmap_sem); | 489 | * is that the page is writable. |
| 429 | vma = find_vma(current->mm, hva); | 490 | */ |
| 430 | if (vma && vma->vm_start <= hva && hva < vma->vm_end && | 491 | hva = gfn_to_hva_memslot(memslot, gfn); |
| 431 | (vma->vm_flags & VM_PFNMAP)) { | 492 | if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { |
| 432 | pfn = vma->vm_pgoff + | ||
| 433 | ((hva - vma->vm_start) >> PAGE_SHIFT); | ||
| 434 | pgflags = pgprot_val(vma->vm_page_prot); | ||
| 435 | } | ||
| 436 | up_read(¤t->mm->mmap_sem); | ||
| 437 | if (!pfn) | ||
| 438 | return -EFAULT; | ||
| 439 | } else { | ||
| 440 | page = pages[0]; | ||
| 441 | pfn = page_to_pfn(page); | 493 | pfn = page_to_pfn(page); |
| 442 | if (PageCompound(page)) { | 494 | upgrade_write = true; |
| 443 | pte_size <<= compound_order(compound_head(page)); | 495 | } else { |
| 444 | /* See if we can insert a 2MB large-page PTE here */ | 496 | /* Call KVM generic code to do the slow-path check */ |
| 445 | if (pte_size >= PMD_SIZE && | 497 | pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, |
| 446 | (gpa & (PMD_SIZE - PAGE_SIZE)) == | 498 | writing, upgrade_p); |
| 447 | (hva & (PMD_SIZE - PAGE_SIZE))) { | 499 | if (is_error_noslot_pfn(pfn)) |
| 448 | level = 1; | 500 | return -EFAULT; |
| 449 | pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); | 501 | page = NULL; |
| 450 | } | 502 | if (pfn_valid(pfn)) { |
| 503 | page = pfn_to_page(pfn); | ||
| 504 | if (PageReserved(page)) | ||
| 505 | page = NULL; | ||
| 451 | } | 506 | } |
| 452 | /* See if we can provide write access */ | 507 | } |
| 453 | if (writing) { | 508 | |
| 454 | pgflags |= _PAGE_WRITE; | 509 | /* See if we can insert a 1GB or 2MB large PTE here */ |
| 455 | } else { | 510 | level = 0; |
| 456 | local_irq_save(flags); | 511 | if (page && PageCompound(page)) { |
| 457 | ptep = find_current_mm_pte(current->mm->pgd, | 512 | pte_size = PAGE_SIZE << compound_order(compound_head(page)); |
| 458 | hva, NULL, NULL); | 513 | if (pte_size >= PUD_SIZE && |
| 459 | if (ptep && pte_write(*ptep)) | 514 | (gpa & (PUD_SIZE - PAGE_SIZE)) == |
| 460 | pgflags |= _PAGE_WRITE; | 515 | (hva & (PUD_SIZE - PAGE_SIZE))) { |
| 461 | local_irq_restore(flags); | 516 | level = 2; |
| 517 | pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1); | ||
| 518 | } else if (pte_size >= PMD_SIZE && | ||
| 519 | (gpa & (PMD_SIZE - PAGE_SIZE)) == | ||
| 520 | (hva & (PMD_SIZE - PAGE_SIZE))) { | ||
| 521 | level = 1; | ||
| 522 | pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); | ||
| 462 | } | 523 | } |
| 463 | } | 524 | } |
| 464 | 525 | ||
| 465 | /* | 526 | /* |
| 466 | * Compute the PTE value that we need to insert. | 527 | * Compute the PTE value that we need to insert. |
| 467 | */ | 528 | */ |
| 468 | pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; | 529 | if (page) { |
| 469 | if (pgflags & _PAGE_WRITE) | 530 | pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | |
| 470 | pgflags |= _PAGE_DIRTY; | 531 | _PAGE_ACCESSED; |
| 471 | pte = pfn_pte(pfn, __pgprot(pgflags)); | 532 | if (writing || upgrade_write) |
| 472 | 533 | pgflags |= _PAGE_WRITE | _PAGE_DIRTY; | |
| 473 | /* Allocate space in the tree and write the PTE */ | 534 | pte = pfn_pte(pfn, __pgprot(pgflags)); |
| 474 | ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); | 535 | } else { |
| 475 | if (ret == -EBUSY) { | ||
| 476 | /* | 536 | /* |
| 477 | * There's already a PMD where wanted to install a large page; | 537 | * Read the PTE from the process' radix tree and use that |
| 478 | * for now, fall back to installing a small page. | 538 | * so we get the attribute bits. |
| 479 | */ | 539 | */ |
| 480 | level = 0; | 540 | local_irq_disable(); |
| 481 | pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); | 541 | ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); |
| 482 | pte = pfn_pte(pfn, __pgprot(pgflags)); | 542 | pte = *ptep; |
| 483 | ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); | 543 | local_irq_enable(); |
| 544 | if (shift == PUD_SHIFT && | ||
| 545 | (gpa & (PUD_SIZE - PAGE_SIZE)) == | ||
| 546 | (hva & (PUD_SIZE - PAGE_SIZE))) { | ||
| 547 | level = 2; | ||
| 548 | } else if (shift == PMD_SHIFT && | ||
| 549 | (gpa & (PMD_SIZE - PAGE_SIZE)) == | ||
| 550 | (hva & (PMD_SIZE - PAGE_SIZE))) { | ||
| 551 | level = 1; | ||
| 552 | } else if (shift && shift != PAGE_SHIFT) { | ||
| 553 | /* Adjust PFN */ | ||
| 554 | unsigned long mask = (1ul << shift) - PAGE_SIZE; | ||
| 555 | pte = __pte(pte_val(pte) | (hva & mask)); | ||
| 556 | } | ||
| 557 | if (!(writing || upgrade_write)) | ||
| 558 | pte = __pte(pte_val(pte) & ~ _PAGE_WRITE); | ||
| 559 | pte = __pte(pte_val(pte) | _PAGE_EXEC); | ||
| 484 | } | 560 | } |
| 485 | 561 | ||
| 562 | /* Allocate space in the tree and write the PTE */ | ||
| 563 | ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); | ||
| 564 | |||
| 486 | if (page) { | 565 | if (page) { |
| 487 | if (!ret && (pgflags & _PAGE_WRITE)) | 566 | if (!ret && (pte_val(pte) & _PAGE_WRITE)) |
| 488 | set_page_dirty_lock(page); | 567 | set_page_dirty_lock(page); |
| 489 | put_page(page); | 568 | put_page(page); |
| 490 | } | 569 | } |
| @@ -662,6 +741,10 @@ void kvmppc_free_radix(struct kvm *kvm) | |||
| 662 | for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { | 741 | for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { |
| 663 | if (!pud_present(*pud)) | 742 | if (!pud_present(*pud)) |
| 664 | continue; | 743 | continue; |
| 744 | if (pud_huge(*pud)) { | ||
| 745 | pud_clear(pud); | ||
| 746 | continue; | ||
| 747 | } | ||
| 665 | pmd = pmd_offset(pud, 0); | 748 | pmd = pmd_offset(pud, 0); |
| 666 | for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { | 749 | for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { |
| 667 | if (pmd_is_leaf(*pmd)) { | 750 | if (pmd_is_leaf(*pmd)) { |
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index c32e9bfe75b1..6651f736a0b1 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c | |||
| @@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
| 450 | 450 | ||
| 451 | /* | 451 | /* |
| 452 | * Synchronize with the MMU notifier callbacks in | 452 | * Synchronize with the MMU notifier callbacks in |
| 453 | * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). | 453 | * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). |
| 454 | * While we have the rmap lock, code running on other CPUs | 454 | * While we have the rmap lock, code running on other CPUs |
| 455 | * cannot finish unmapping the host real page that backs | 455 | * cannot finish unmapping the host real page that backs |
| 456 | * this guest real page, so we are OK to access the host | 456 | * this guest real page, so we are OK to access the host |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 81e2ea882d97..4d07fca5121c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
| @@ -4375,7 +4375,6 @@ static struct kvmppc_ops kvm_ops_hv = { | |||
| 4375 | .flush_memslot = kvmppc_core_flush_memslot_hv, | 4375 | .flush_memslot = kvmppc_core_flush_memslot_hv, |
| 4376 | .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, | 4376 | .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, |
| 4377 | .commit_memory_region = kvmppc_core_commit_memory_region_hv, | 4377 | .commit_memory_region = kvmppc_core_commit_memory_region_hv, |
| 4378 | .unmap_hva = kvm_unmap_hva_hv, | ||
| 4379 | .unmap_hva_range = kvm_unmap_hva_range_hv, | 4378 | .unmap_hva_range = kvm_unmap_hva_range_hv, |
| 4380 | .age_hva = kvm_age_hva_hv, | 4379 | .age_hva = kvm_age_hva_hv, |
| 4381 | .test_age_hva = kvm_test_age_hva_hv, | 4380 | .test_age_hva = kvm_test_age_hva_hv, |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3ae752314b34..d3f304d06adf 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
| @@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, | |||
| 277 | } | 277 | } |
| 278 | } | 278 | } |
| 279 | 279 | ||
| 280 | static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) | ||
| 281 | { | ||
| 282 | trace_kvm_unmap_hva(hva); | ||
| 283 | |||
| 284 | do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); | ||
| 285 | |||
| 286 | return 0; | ||
| 287 | } | ||
| 288 | |||
| 289 | static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, | 280 | static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, |
| 290 | unsigned long end) | 281 | unsigned long end) |
| 291 | { | 282 | { |
| @@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = { | |||
| 1773 | .flush_memslot = kvmppc_core_flush_memslot_pr, | 1764 | .flush_memslot = kvmppc_core_flush_memslot_pr, |
| 1774 | .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, | 1765 | .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, |
| 1775 | .commit_memory_region = kvmppc_core_commit_memory_region_pr, | 1766 | .commit_memory_region = kvmppc_core_commit_memory_region_pr, |
| 1776 | .unmap_hva = kvm_unmap_hva_pr, | ||
| 1777 | .unmap_hva_range = kvm_unmap_hva_range_pr, | 1767 | .unmap_hva_range = kvm_unmap_hva_range_pr, |
| 1778 | .age_hva = kvm_age_hva_pr, | 1768 | .age_hva = kvm_age_hva_pr, |
| 1779 | .test_age_hva = kvm_test_age_hva_pr, | 1769 | .test_age_hva = kvm_test_age_hva_pr, |
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 423b21393bc9..c878b4ffb86f 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c | |||
| @@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, | |||
| 724 | 724 | ||
| 725 | /************* MMU Notifiers *************/ | 725 | /************* MMU Notifiers *************/ |
| 726 | 726 | ||
| 727 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 727 | static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
| 728 | { | 728 | { |
| 729 | trace_kvm_unmap_hva(hva); | 729 | trace_kvm_unmap_hva(hva); |
| 730 | 730 | ||
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 85785a370c0e..2f9a8829552b 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h | |||
| @@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit, | |||
| 254 | ) | 254 | ) |
| 255 | ); | 255 | ); |
| 256 | 256 | ||
| 257 | TRACE_EVENT(kvm_unmap_hva, | ||
| 258 | TP_PROTO(unsigned long hva), | ||
| 259 | TP_ARGS(hva), | ||
| 260 | |||
| 261 | TP_STRUCT__entry( | ||
| 262 | __field( unsigned long, hva ) | ||
| 263 | ), | ||
| 264 | |||
| 265 | TP_fast_assign( | ||
| 266 | __entry->hva = hva; | ||
| 267 | ), | ||
| 268 | |||
| 269 | TP_printk("unmap hva 0x%lx\n", __entry->hva) | ||
| 270 | ); | ||
| 271 | |||
| 272 | #endif /* _TRACE_KVM_H */ | 257 | #endif /* _TRACE_KVM_H */ |
| 273 | 258 | ||
| 274 | /* This part must be outside protection */ | 259 | /* This part must be outside protection */ |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index afb0f08b8021..81cdb6b55118 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -294,6 +294,7 @@ struct kvm_vcpu_stat { | |||
| 294 | u64 exit_userspace; | 294 | u64 exit_userspace; |
| 295 | u64 exit_null; | 295 | u64 exit_null; |
| 296 | u64 exit_external_request; | 296 | u64 exit_external_request; |
| 297 | u64 exit_io_request; | ||
| 297 | u64 exit_external_interrupt; | 298 | u64 exit_external_interrupt; |
| 298 | u64 exit_stop_request; | 299 | u64 exit_stop_request; |
| 299 | u64 exit_validity; | 300 | u64 exit_validity; |
| @@ -310,16 +311,29 @@ struct kvm_vcpu_stat { | |||
| 310 | u64 exit_program_interruption; | 311 | u64 exit_program_interruption; |
| 311 | u64 exit_instr_and_program; | 312 | u64 exit_instr_and_program; |
| 312 | u64 exit_operation_exception; | 313 | u64 exit_operation_exception; |
| 314 | u64 deliver_ckc; | ||
| 315 | u64 deliver_cputm; | ||
| 313 | u64 deliver_external_call; | 316 | u64 deliver_external_call; |
| 314 | u64 deliver_emergency_signal; | 317 | u64 deliver_emergency_signal; |
| 315 | u64 deliver_service_signal; | 318 | u64 deliver_service_signal; |
| 316 | u64 deliver_virtio_interrupt; | 319 | u64 deliver_virtio; |
| 317 | u64 deliver_stop_signal; | 320 | u64 deliver_stop_signal; |
| 318 | u64 deliver_prefix_signal; | 321 | u64 deliver_prefix_signal; |
| 319 | u64 deliver_restart_signal; | 322 | u64 deliver_restart_signal; |
| 320 | u64 deliver_program_int; | 323 | u64 deliver_program; |
| 321 | u64 deliver_io_int; | 324 | u64 deliver_io; |
| 325 | u64 deliver_machine_check; | ||
| 322 | u64 exit_wait_state; | 326 | u64 exit_wait_state; |
| 327 | u64 inject_ckc; | ||
| 328 | u64 inject_cputm; | ||
| 329 | u64 inject_external_call; | ||
| 330 | u64 inject_emergency_signal; | ||
| 331 | u64 inject_mchk; | ||
| 332 | u64 inject_pfault_init; | ||
| 333 | u64 inject_program; | ||
| 334 | u64 inject_restart; | ||
| 335 | u64 inject_set_prefix; | ||
| 336 | u64 inject_stop_signal; | ||
| 323 | u64 instruction_epsw; | 337 | u64 instruction_epsw; |
| 324 | u64 instruction_gs; | 338 | u64 instruction_gs; |
| 325 | u64 instruction_io_other; | 339 | u64 instruction_io_other; |
| @@ -644,7 +658,12 @@ struct kvm_vcpu_arch { | |||
| 644 | }; | 658 | }; |
| 645 | 659 | ||
| 646 | struct kvm_vm_stat { | 660 | struct kvm_vm_stat { |
| 647 | ulong remote_tlb_flush; | 661 | u64 inject_io; |
| 662 | u64 inject_float_mchk; | ||
| 663 | u64 inject_pfault_done; | ||
| 664 | u64 inject_service_signal; | ||
| 665 | u64 inject_virtio; | ||
| 666 | u64 remote_tlb_flush; | ||
| 648 | }; | 667 | }; |
| 649 | 668 | ||
| 650 | struct kvm_arch_memory_slot { | 669 | struct kvm_arch_memory_slot { |
| @@ -792,6 +811,7 @@ struct kvm_arch{ | |||
| 792 | int css_support; | 811 | int css_support; |
| 793 | int use_irqchip; | 812 | int use_irqchip; |
| 794 | int use_cmma; | 813 | int use_cmma; |
| 814 | int use_pfmfi; | ||
| 795 | int user_cpu_state_ctrl; | 815 | int user_cpu_state_ctrl; |
| 796 | int user_sigp; | 816 | int user_sigp; |
| 797 | int user_stsi; | 817 | int user_stsi; |
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 74eeec9c0a80..cbc7c3a68e4d 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h | |||
| @@ -193,6 +193,11 @@ static inline unsigned int kvm_arch_para_features(void) | |||
| 193 | return 0; | 193 | return 0; |
| 194 | } | 194 | } |
| 195 | 195 | ||
| 196 | static inline unsigned int kvm_arch_para_hints(void) | ||
| 197 | { | ||
| 198 | return 0; | ||
| 199 | } | ||
| 200 | |||
| 196 | static inline bool kvm_check_and_clear_guest_paused(void) | 201 | static inline bool kvm_check_and_clear_guest_paused(void) |
| 197 | { | 202 | { |
| 198 | return false; | 203 | return false; |
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index db35c41a59d5..c639c95850e4 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h | |||
| @@ -22,8 +22,8 @@ typedef struct { | |||
| 22 | unsigned int has_pgste:1; | 22 | unsigned int has_pgste:1; |
| 23 | /* The mmu context uses storage keys. */ | 23 | /* The mmu context uses storage keys. */ |
| 24 | unsigned int use_skey:1; | 24 | unsigned int use_skey:1; |
| 25 | /* The mmu context uses CMMA. */ | 25 | /* The mmu context uses CMM. */ |
| 26 | unsigned int use_cmma:1; | 26 | unsigned int uses_cmm:1; |
| 27 | } mm_context_t; | 27 | } mm_context_t; |
| 28 | 28 | ||
| 29 | #define INIT_MM_CONTEXT(name) \ | 29 | #define INIT_MM_CONTEXT(name) \ |
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 6c8ce15cde7b..324f6f452982 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h | |||
| @@ -31,7 +31,7 @@ static inline int init_new_context(struct task_struct *tsk, | |||
| 31 | (current->mm && current->mm->context.alloc_pgste); | 31 | (current->mm && current->mm->context.alloc_pgste); |
| 32 | mm->context.has_pgste = 0; | 32 | mm->context.has_pgste = 0; |
| 33 | mm->context.use_skey = 0; | 33 | mm->context.use_skey = 0; |
| 34 | mm->context.use_cmma = 0; | 34 | mm->context.uses_cmm = 0; |
| 35 | #endif | 35 | #endif |
| 36 | switch (mm->context.asce_limit) { | 36 | switch (mm->context.asce_limit) { |
| 37 | case _REGION2_SIZE: | 37 | case _REGION2_SIZE: |
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index c24bfa72baf7..8e2b8647ee12 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c | |||
| @@ -1050,8 +1050,7 @@ shadow_r2t: | |||
| 1050 | rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); | 1050 | rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); |
| 1051 | if (rc) | 1051 | if (rc) |
| 1052 | return rc; | 1052 | return rc; |
| 1053 | /* fallthrough */ | 1053 | } /* fallthrough */ |
| 1054 | } | ||
| 1055 | case ASCE_TYPE_REGION2: { | 1054 | case ASCE_TYPE_REGION2: { |
| 1056 | union region2_table_entry rste; | 1055 | union region2_table_entry rste; |
| 1057 | 1056 | ||
| @@ -1077,8 +1076,7 @@ shadow_r3t: | |||
| 1077 | rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); | 1076 | rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); |
| 1078 | if (rc) | 1077 | if (rc) |
| 1079 | return rc; | 1078 | return rc; |
| 1080 | /* fallthrough */ | 1079 | } /* fallthrough */ |
| 1081 | } | ||
| 1082 | case ASCE_TYPE_REGION3: { | 1080 | case ASCE_TYPE_REGION3: { |
| 1083 | union region3_table_entry rtte; | 1081 | union region3_table_entry rtte; |
| 1084 | 1082 | ||
| @@ -1113,8 +1111,7 @@ shadow_sgt: | |||
| 1113 | rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); | 1111 | rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); |
| 1114 | if (rc) | 1112 | if (rc) |
| 1115 | return rc; | 1113 | return rc; |
| 1116 | /* fallthrough */ | 1114 | } /* fallthrough */ |
| 1117 | } | ||
| 1118 | case ASCE_TYPE_SEGMENT: { | 1115 | case ASCE_TYPE_SEGMENT: { |
| 1119 | union segment_table_entry ste; | 1116 | union segment_table_entry ste; |
| 1120 | 1117 | ||
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 07c6e81163bf..a389fa85cca2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
| @@ -50,18 +50,6 @@ u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) | |||
| 50 | return ilen; | 50 | return ilen; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | static int handle_noop(struct kvm_vcpu *vcpu) | ||
| 54 | { | ||
| 55 | switch (vcpu->arch.sie_block->icptcode) { | ||
| 56 | case 0x10: | ||
| 57 | vcpu->stat.exit_external_request++; | ||
| 58 | break; | ||
| 59 | default: | ||
| 60 | break; /* nothing */ | ||
| 61 | } | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | static int handle_stop(struct kvm_vcpu *vcpu) | 53 | static int handle_stop(struct kvm_vcpu *vcpu) |
| 66 | { | 54 | { |
| 67 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 55 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| @@ -465,8 +453,11 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) | |||
| 465 | 453 | ||
| 466 | switch (vcpu->arch.sie_block->icptcode) { | 454 | switch (vcpu->arch.sie_block->icptcode) { |
| 467 | case ICPT_EXTREQ: | 455 | case ICPT_EXTREQ: |
| 456 | vcpu->stat.exit_external_request++; | ||
| 457 | return 0; | ||
| 468 | case ICPT_IOREQ: | 458 | case ICPT_IOREQ: |
| 469 | return handle_noop(vcpu); | 459 | vcpu->stat.exit_io_request++; |
| 460 | return 0; | ||
| 470 | case ICPT_INST: | 461 | case ICPT_INST: |
| 471 | rc = handle_instruction(vcpu); | 462 | rc = handle_instruction(vcpu); |
| 472 | break; | 463 | break; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b04616b57a94..37d06e022238 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -391,6 +391,7 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) | |||
| 391 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 391 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 392 | int rc; | 392 | int rc; |
| 393 | 393 | ||
| 394 | vcpu->stat.deliver_cputm++; | ||
| 394 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, | 395 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, |
| 395 | 0, 0); | 396 | 0, 0); |
| 396 | 397 | ||
| @@ -410,6 +411,7 @@ static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu) | |||
| 410 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 411 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 411 | int rc; | 412 | int rc; |
| 412 | 413 | ||
| 414 | vcpu->stat.deliver_ckc++; | ||
| 413 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, | 415 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, |
| 414 | 0, 0); | 416 | 0, 0); |
| 415 | 417 | ||
| @@ -595,6 +597,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) | |||
| 595 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, | 597 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, |
| 596 | KVM_S390_MCHK, | 598 | KVM_S390_MCHK, |
| 597 | mchk.cr14, mchk.mcic); | 599 | mchk.cr14, mchk.mcic); |
| 600 | vcpu->stat.deliver_machine_check++; | ||
| 598 | rc = __write_machine_check(vcpu, &mchk); | 601 | rc = __write_machine_check(vcpu, &mchk); |
| 599 | } | 602 | } |
| 600 | return rc; | 603 | return rc; |
| @@ -710,7 +713,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) | |||
| 710 | ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; | 713 | ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK; |
| 711 | VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", | 714 | VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d", |
| 712 | pgm_info.code, ilen); | 715 | pgm_info.code, ilen); |
| 713 | vcpu->stat.deliver_program_int++; | 716 | vcpu->stat.deliver_program++; |
| 714 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, | 717 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, |
| 715 | pgm_info.code, 0); | 718 | pgm_info.code, 0); |
| 716 | 719 | ||
| @@ -899,7 +902,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) | |||
| 899 | VCPU_EVENT(vcpu, 4, | 902 | VCPU_EVENT(vcpu, 4, |
| 900 | "deliver: virtio parm: 0x%x,parm64: 0x%llx", | 903 | "deliver: virtio parm: 0x%x,parm64: 0x%llx", |
| 901 | inti->ext.ext_params, inti->ext.ext_params2); | 904 | inti->ext.ext_params, inti->ext.ext_params2); |
| 902 | vcpu->stat.deliver_virtio_interrupt++; | 905 | vcpu->stat.deliver_virtio++; |
| 903 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, | 906 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, |
| 904 | inti->type, | 907 | inti->type, |
| 905 | inti->ext.ext_params, | 908 | inti->ext.ext_params, |
| @@ -975,7 +978,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
| 975 | inti->io.subchannel_id >> 1 & 0x3, | 978 | inti->io.subchannel_id >> 1 & 0x3, |
| 976 | inti->io.subchannel_nr); | 979 | inti->io.subchannel_nr); |
| 977 | 980 | ||
| 978 | vcpu->stat.deliver_io_int++; | 981 | vcpu->stat.deliver_io++; |
| 979 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, | 982 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, |
| 980 | inti->type, | 983 | inti->type, |
| 981 | ((__u32)inti->io.subchannel_id << 16) | | 984 | ((__u32)inti->io.subchannel_id << 16) | |
| @@ -1004,7 +1007,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, | |||
| 1004 | VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc); | 1007 | VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc); |
| 1005 | memset(&io, 0, sizeof(io)); | 1008 | memset(&io, 0, sizeof(io)); |
| 1006 | io.io_int_word = isc_to_int_word(isc); | 1009 | io.io_int_word = isc_to_int_word(isc); |
| 1007 | vcpu->stat.deliver_io_int++; | 1010 | vcpu->stat.deliver_io++; |
| 1008 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, | 1011 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, |
| 1009 | KVM_S390_INT_IO(1, 0, 0, 0), | 1012 | KVM_S390_INT_IO(1, 0, 0, 0), |
| 1010 | ((__u32)io.subchannel_id << 16) | | 1013 | ((__u32)io.subchannel_id << 16) | |
| @@ -1268,6 +1271,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1268 | { | 1271 | { |
| 1269 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1272 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1270 | 1273 | ||
| 1274 | vcpu->stat.inject_program++; | ||
| 1271 | VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code); | 1275 | VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code); |
| 1272 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, | 1276 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, |
| 1273 | irq->u.pgm.code, 0); | 1277 | irq->u.pgm.code, 0); |
| @@ -1309,6 +1313,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1309 | { | 1313 | { |
| 1310 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1314 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1311 | 1315 | ||
| 1316 | vcpu->stat.inject_pfault_init++; | ||
| 1312 | VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx", | 1317 | VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx", |
| 1313 | irq->u.ext.ext_params2); | 1318 | irq->u.ext.ext_params2); |
| 1314 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, | 1319 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, |
| @@ -1327,6 +1332,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1327 | struct kvm_s390_extcall_info *extcall = &li->irq.extcall; | 1332 | struct kvm_s390_extcall_info *extcall = &li->irq.extcall; |
| 1328 | uint16_t src_id = irq->u.extcall.code; | 1333 | uint16_t src_id = irq->u.extcall.code; |
| 1329 | 1334 | ||
| 1335 | vcpu->stat.inject_external_call++; | ||
| 1330 | VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u", | 1336 | VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u", |
| 1331 | src_id); | 1337 | src_id); |
| 1332 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, | 1338 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, |
| @@ -1351,6 +1357,7 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1351 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1357 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1352 | struct kvm_s390_prefix_info *prefix = &li->irq.prefix; | 1358 | struct kvm_s390_prefix_info *prefix = &li->irq.prefix; |
| 1353 | 1359 | ||
| 1360 | vcpu->stat.inject_set_prefix++; | ||
| 1354 | VCPU_EVENT(vcpu, 3, "inject: set prefix to %x", | 1361 | VCPU_EVENT(vcpu, 3, "inject: set prefix to %x", |
| 1355 | irq->u.prefix.address); | 1362 | irq->u.prefix.address); |
| 1356 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, | 1363 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, |
| @@ -1371,6 +1378,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1371 | struct kvm_s390_stop_info *stop = &li->irq.stop; | 1378 | struct kvm_s390_stop_info *stop = &li->irq.stop; |
| 1372 | int rc = 0; | 1379 | int rc = 0; |
| 1373 | 1380 | ||
| 1381 | vcpu->stat.inject_stop_signal++; | ||
| 1374 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0); | 1382 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0); |
| 1375 | 1383 | ||
| 1376 | if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) | 1384 | if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) |
| @@ -1395,6 +1403,7 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu, | |||
| 1395 | { | 1403 | { |
| 1396 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1404 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1397 | 1405 | ||
| 1406 | vcpu->stat.inject_restart++; | ||
| 1398 | VCPU_EVENT(vcpu, 3, "%s", "inject: restart int"); | 1407 | VCPU_EVENT(vcpu, 3, "%s", "inject: restart int"); |
| 1399 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); | 1408 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); |
| 1400 | 1409 | ||
| @@ -1407,6 +1416,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, | |||
| 1407 | { | 1416 | { |
| 1408 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1417 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1409 | 1418 | ||
| 1419 | vcpu->stat.inject_emergency_signal++; | ||
| 1410 | VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u", | 1420 | VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u", |
| 1411 | irq->u.emerg.code); | 1421 | irq->u.emerg.code); |
| 1412 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, | 1422 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, |
| @@ -1427,6 +1437,7 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) | |||
| 1427 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1437 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1428 | struct kvm_s390_mchk_info *mchk = &li->irq.mchk; | 1438 | struct kvm_s390_mchk_info *mchk = &li->irq.mchk; |
| 1429 | 1439 | ||
| 1440 | vcpu->stat.inject_mchk++; | ||
| 1430 | VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx", | 1441 | VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx", |
| 1431 | irq->u.mchk.mcic); | 1442 | irq->u.mchk.mcic); |
| 1432 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, | 1443 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, |
| @@ -1457,6 +1468,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu) | |||
| 1457 | { | 1468 | { |
| 1458 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1469 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1459 | 1470 | ||
| 1471 | vcpu->stat.inject_ckc++; | ||
| 1460 | VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external"); | 1472 | VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external"); |
| 1461 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, | 1473 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, |
| 1462 | 0, 0); | 1474 | 0, 0); |
| @@ -1470,6 +1482,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) | |||
| 1470 | { | 1482 | { |
| 1471 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; | 1483 | struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; |
| 1472 | 1484 | ||
| 1485 | vcpu->stat.inject_cputm++; | ||
| 1473 | VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external"); | 1486 | VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external"); |
| 1474 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, | 1487 | trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, |
| 1475 | 0, 0); | 1488 | 0, 0); |
| @@ -1596,6 +1609,7 @@ static int __inject_service(struct kvm *kvm, | |||
| 1596 | { | 1609 | { |
| 1597 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | 1610 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; |
| 1598 | 1611 | ||
| 1612 | kvm->stat.inject_service_signal++; | ||
| 1599 | spin_lock(&fi->lock); | 1613 | spin_lock(&fi->lock); |
| 1600 | fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; | 1614 | fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; |
| 1601 | /* | 1615 | /* |
| @@ -1621,6 +1635,7 @@ static int __inject_virtio(struct kvm *kvm, | |||
| 1621 | { | 1635 | { |
| 1622 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | 1636 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; |
| 1623 | 1637 | ||
| 1638 | kvm->stat.inject_virtio++; | ||
| 1624 | spin_lock(&fi->lock); | 1639 | spin_lock(&fi->lock); |
| 1625 | if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) { | 1640 | if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) { |
| 1626 | spin_unlock(&fi->lock); | 1641 | spin_unlock(&fi->lock); |
| @@ -1638,6 +1653,7 @@ static int __inject_pfault_done(struct kvm *kvm, | |||
| 1638 | { | 1653 | { |
| 1639 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | 1654 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; |
| 1640 | 1655 | ||
| 1656 | kvm->stat.inject_pfault_done++; | ||
| 1641 | spin_lock(&fi->lock); | 1657 | spin_lock(&fi->lock); |
| 1642 | if (fi->counters[FIRQ_CNTR_PFAULT] >= | 1658 | if (fi->counters[FIRQ_CNTR_PFAULT] >= |
| 1643 | (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) { | 1659 | (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) { |
| @@ -1657,6 +1673,7 @@ static int __inject_float_mchk(struct kvm *kvm, | |||
| 1657 | { | 1673 | { |
| 1658 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; | 1674 | struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; |
| 1659 | 1675 | ||
| 1676 | kvm->stat.inject_float_mchk++; | ||
| 1660 | spin_lock(&fi->lock); | 1677 | spin_lock(&fi->lock); |
| 1661 | fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS); | 1678 | fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS); |
| 1662 | fi->mchk.mcic |= inti->mchk.mcic; | 1679 | fi->mchk.mcic |= inti->mchk.mcic; |
| @@ -1672,6 +1689,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) | |||
| 1672 | struct list_head *list; | 1689 | struct list_head *list; |
| 1673 | int isc; | 1690 | int isc; |
| 1674 | 1691 | ||
| 1692 | kvm->stat.inject_io++; | ||
| 1675 | isc = int_word_to_isc(inti->io.io_int_word); | 1693 | isc = int_word_to_isc(inti->io.io_int_word); |
| 1676 | 1694 | ||
| 1677 | if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { | 1695 | if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 339ac0964590..64c986243018 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -57,6 +57,7 @@ | |||
| 57 | (KVM_MAX_VCPUS + LOCAL_IRQS)) | 57 | (KVM_MAX_VCPUS + LOCAL_IRQS)) |
| 58 | 58 | ||
| 59 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU | 59 | #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU |
| 60 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | ||
| 60 | 61 | ||
| 61 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 62 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 62 | { "userspace_handled", VCPU_STAT(exit_userspace) }, | 63 | { "userspace_handled", VCPU_STAT(exit_userspace) }, |
| @@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 64 | { "exit_validity", VCPU_STAT(exit_validity) }, | 65 | { "exit_validity", VCPU_STAT(exit_validity) }, |
| 65 | { "exit_stop_request", VCPU_STAT(exit_stop_request) }, | 66 | { "exit_stop_request", VCPU_STAT(exit_stop_request) }, |
| 66 | { "exit_external_request", VCPU_STAT(exit_external_request) }, | 67 | { "exit_external_request", VCPU_STAT(exit_external_request) }, |
| 68 | { "exit_io_request", VCPU_STAT(exit_io_request) }, | ||
| 67 | { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, | 69 | { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, |
| 68 | { "exit_instruction", VCPU_STAT(exit_instruction) }, | 70 | { "exit_instruction", VCPU_STAT(exit_instruction) }, |
| 69 | { "exit_pei", VCPU_STAT(exit_pei) }, | 71 | { "exit_pei", VCPU_STAT(exit_pei) }, |
| @@ -78,16 +80,34 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 78 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 80 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
| 79 | { "instruction_stctl", VCPU_STAT(instruction_stctl) }, | 81 | { "instruction_stctl", VCPU_STAT(instruction_stctl) }, |
| 80 | { "instruction_stctg", VCPU_STAT(instruction_stctg) }, | 82 | { "instruction_stctg", VCPU_STAT(instruction_stctg) }, |
| 83 | { "deliver_ckc", VCPU_STAT(deliver_ckc) }, | ||
| 84 | { "deliver_cputm", VCPU_STAT(deliver_cputm) }, | ||
| 81 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, | 85 | { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, |
| 82 | { "deliver_external_call", VCPU_STAT(deliver_external_call) }, | 86 | { "deliver_external_call", VCPU_STAT(deliver_external_call) }, |
| 83 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, | 87 | { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, |
| 84 | { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, | 88 | { "deliver_virtio", VCPU_STAT(deliver_virtio) }, |
| 85 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, | 89 | { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, |
| 86 | { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, | 90 | { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, |
| 87 | { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, | 91 | { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, |
| 88 | { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, | 92 | { "deliver_program", VCPU_STAT(deliver_program) }, |
| 89 | { "deliver_io_interrupt", VCPU_STAT(deliver_io_int) }, | 93 | { "deliver_io", VCPU_STAT(deliver_io) }, |
| 94 | { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, | ||
| 90 | { "exit_wait_state", VCPU_STAT(exit_wait_state) }, | 95 | { "exit_wait_state", VCPU_STAT(exit_wait_state) }, |
| 96 | { "inject_ckc", VCPU_STAT(inject_ckc) }, | ||
| 97 | { "inject_cputm", VCPU_STAT(inject_cputm) }, | ||
| 98 | { "inject_external_call", VCPU_STAT(inject_external_call) }, | ||
| 99 | { "inject_float_mchk", VM_STAT(inject_float_mchk) }, | ||
| 100 | { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, | ||
| 101 | { "inject_io", VM_STAT(inject_io) }, | ||
| 102 | { "inject_mchk", VCPU_STAT(inject_mchk) }, | ||
| 103 | { "inject_pfault_done", VM_STAT(inject_pfault_done) }, | ||
| 104 | { "inject_program", VCPU_STAT(inject_program) }, | ||
| 105 | { "inject_restart", VCPU_STAT(inject_restart) }, | ||
| 106 | { "inject_service_signal", VM_STAT(inject_service_signal) }, | ||
| 107 | { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, | ||
| 108 | { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, | ||
| 109 | { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, | ||
| 110 | { "inject_virtio", VM_STAT(inject_virtio) }, | ||
| 91 | { "instruction_epsw", VCPU_STAT(instruction_epsw) }, | 111 | { "instruction_epsw", VCPU_STAT(instruction_epsw) }, |
| 92 | { "instruction_gs", VCPU_STAT(instruction_gs) }, | 112 | { "instruction_gs", VCPU_STAT(instruction_gs) }, |
| 93 | { "instruction_io_other", VCPU_STAT(instruction_io_other) }, | 113 | { "instruction_io_other", VCPU_STAT(instruction_io_other) }, |
| @@ -152,13 +172,33 @@ static int nested; | |||
| 152 | module_param(nested, int, S_IRUGO); | 172 | module_param(nested, int, S_IRUGO); |
| 153 | MODULE_PARM_DESC(nested, "Nested virtualization support"); | 173 | MODULE_PARM_DESC(nested, "Nested virtualization support"); |
| 154 | 174 | ||
| 155 | /* upper facilities limit for kvm */ | ||
| 156 | unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM }; | ||
| 157 | 175 | ||
| 158 | unsigned long kvm_s390_fac_list_mask_size(void) | 176 | /* |
| 177 | * For now we handle at most 16 double words as this is what the s390 base | ||
| 178 | * kernel handles and stores in the prefix page. If we ever need to go beyond | ||
| 179 | * this, this requires changes to code, but the external uapi can stay. | ||
| 180 | */ | ||
| 181 | #define SIZE_INTERNAL 16 | ||
| 182 | |||
| 183 | /* | ||
| 184 | * Base feature mask that defines default mask for facilities. Consists of the | ||
| 185 | * defines in FACILITIES_KVM and the non-hypervisor managed bits. | ||
| 186 | */ | ||
| 187 | static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; | ||
| 188 | /* | ||
| 189 | * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL | ||
| 190 | * and defines the facilities that can be enabled via a cpu model. | ||
| 191 | */ | ||
| 192 | static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL }; | ||
| 193 | |||
| 194 | static unsigned long kvm_s390_fac_size(void) | ||
| 159 | { | 195 | { |
| 160 | BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64); | 196 | BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64); |
| 161 | return ARRAY_SIZE(kvm_s390_fac_list_mask); | 197 | BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64); |
| 198 | BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) > | ||
| 199 | sizeof(S390_lowcore.stfle_fac_list)); | ||
| 200 | |||
| 201 | return SIZE_INTERNAL; | ||
| 162 | } | 202 | } |
| 163 | 203 | ||
| 164 | /* available cpu features supported by kvm */ | 204 | /* available cpu features supported by kvm */ |
| @@ -679,6 +719,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att | |||
| 679 | mutex_lock(&kvm->lock); | 719 | mutex_lock(&kvm->lock); |
| 680 | if (!kvm->created_vcpus) { | 720 | if (!kvm->created_vcpus) { |
| 681 | kvm->arch.use_cmma = 1; | 721 | kvm->arch.use_cmma = 1; |
| 722 | /* Not compatible with cmma. */ | ||
| 723 | kvm->arch.use_pfmfi = 0; | ||
| 682 | ret = 0; | 724 | ret = 0; |
| 683 | } | 725 | } |
| 684 | mutex_unlock(&kvm->lock); | 726 | mutex_unlock(&kvm->lock); |
| @@ -1583,7 +1625,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, | |||
| 1583 | return -EINVAL; | 1625 | return -EINVAL; |
| 1584 | /* CMMA is disabled or was not used, or the buffer has length zero */ | 1626 | /* CMMA is disabled or was not used, or the buffer has length zero */ |
| 1585 | bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); | 1627 | bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); |
| 1586 | if (!bufsize || !kvm->mm->context.use_cmma) { | 1628 | if (!bufsize || !kvm->mm->context.uses_cmm) { |
| 1587 | memset(args, 0, sizeof(*args)); | 1629 | memset(args, 0, sizeof(*args)); |
| 1588 | return 0; | 1630 | return 0; |
| 1589 | } | 1631 | } |
| @@ -1660,7 +1702,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm, | |||
| 1660 | /* | 1702 | /* |
| 1661 | * This function sets the CMMA attributes for the given pages. If the input | 1703 | * This function sets the CMMA attributes for the given pages. If the input |
| 1662 | * buffer has zero length, no action is taken, otherwise the attributes are | 1704 | * buffer has zero length, no action is taken, otherwise the attributes are |
| 1663 | * set and the mm->context.use_cmma flag is set. | 1705 | * set and the mm->context.uses_cmm flag is set. |
| 1664 | */ | 1706 | */ |
| 1665 | static int kvm_s390_set_cmma_bits(struct kvm *kvm, | 1707 | static int kvm_s390_set_cmma_bits(struct kvm *kvm, |
| 1666 | const struct kvm_s390_cmma_log *args) | 1708 | const struct kvm_s390_cmma_log *args) |
| @@ -1710,9 +1752,9 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, | |||
| 1710 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1752 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
| 1711 | up_read(&kvm->mm->mmap_sem); | 1753 | up_read(&kvm->mm->mmap_sem); |
| 1712 | 1754 | ||
| 1713 | if (!kvm->mm->context.use_cmma) { | 1755 | if (!kvm->mm->context.uses_cmm) { |
| 1714 | down_write(&kvm->mm->mmap_sem); | 1756 | down_write(&kvm->mm->mmap_sem); |
| 1715 | kvm->mm->context.use_cmma = 1; | 1757 | kvm->mm->context.uses_cmm = 1; |
| 1716 | up_write(&kvm->mm->mmap_sem); | 1758 | up_write(&kvm->mm->mmap_sem); |
| 1717 | } | 1759 | } |
| 1718 | out: | 1760 | out: |
| @@ -1967,20 +2009,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 1967 | if (!kvm->arch.sie_page2) | 2009 | if (!kvm->arch.sie_page2) |
| 1968 | goto out_err; | 2010 | goto out_err; |
| 1969 | 2011 | ||
| 1970 | /* Populate the facility mask initially. */ | ||
| 1971 | memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list, | ||
| 1972 | sizeof(S390_lowcore.stfle_fac_list)); | ||
| 1973 | for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { | ||
| 1974 | if (i < kvm_s390_fac_list_mask_size()) | ||
| 1975 | kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i]; | ||
| 1976 | else | ||
| 1977 | kvm->arch.model.fac_mask[i] = 0UL; | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | /* Populate the facility list initially. */ | ||
| 1981 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; | 2012 | kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; |
| 1982 | memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, | 2013 | |
| 1983 | S390_ARCH_FAC_LIST_SIZE_BYTE); | 2014 | for (i = 0; i < kvm_s390_fac_size(); i++) { |
| 2015 | kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] & | ||
| 2016 | (kvm_s390_fac_base[i] | | ||
| 2017 | kvm_s390_fac_ext[i]); | ||
| 2018 | kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & | ||
| 2019 | kvm_s390_fac_base[i]; | ||
| 2020 | } | ||
| 1984 | 2021 | ||
| 1985 | /* we are always in czam mode - even on pre z14 machines */ | 2022 | /* we are always in czam mode - even on pre z14 machines */ |
| 1986 | set_kvm_facility(kvm->arch.model.fac_mask, 138); | 2023 | set_kvm_facility(kvm->arch.model.fac_mask, 138); |
| @@ -2028,6 +2065,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 2028 | 2065 | ||
| 2029 | kvm->arch.css_support = 0; | 2066 | kvm->arch.css_support = 0; |
| 2030 | kvm->arch.use_irqchip = 0; | 2067 | kvm->arch.use_irqchip = 0; |
| 2068 | kvm->arch.use_pfmfi = sclp.has_pfmfi; | ||
| 2031 | kvm->arch.epoch = 0; | 2069 | kvm->arch.epoch = 0; |
| 2032 | 2070 | ||
| 2033 | spin_lock_init(&kvm->arch.start_stop_lock); | 2071 | spin_lock_init(&kvm->arch.start_stop_lock); |
| @@ -2454,8 +2492,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) | |||
| 2454 | vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); | 2492 | vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL); |
| 2455 | if (!vcpu->arch.sie_block->cbrlo) | 2493 | if (!vcpu->arch.sie_block->cbrlo) |
| 2456 | return -ENOMEM; | 2494 | return -ENOMEM; |
| 2457 | |||
| 2458 | vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; | ||
| 2459 | return 0; | 2495 | return 0; |
| 2460 | } | 2496 | } |
| 2461 | 2497 | ||
| @@ -2491,7 +2527,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 2491 | if (test_kvm_facility(vcpu->kvm, 73)) | 2527 | if (test_kvm_facility(vcpu->kvm, 73)) |
| 2492 | vcpu->arch.sie_block->ecb |= ECB_TE; | 2528 | vcpu->arch.sie_block->ecb |= ECB_TE; |
| 2493 | 2529 | ||
| 2494 | if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) | 2530 | if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi) |
| 2495 | vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; | 2531 | vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; |
| 2496 | if (test_kvm_facility(vcpu->kvm, 130)) | 2532 | if (test_kvm_facility(vcpu->kvm, 130)) |
| 2497 | vcpu->arch.sie_block->ecb2 |= ECB2_IEP; | 2533 | vcpu->arch.sie_block->ecb2 |= ECB2_IEP; |
| @@ -3023,7 +3059,7 @@ retry: | |||
| 3023 | 3059 | ||
| 3024 | if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { | 3060 | if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { |
| 3025 | /* | 3061 | /* |
| 3026 | * Disable CMMA virtualization; we will emulate the ESSA | 3062 | * Disable CMM virtualization; we will emulate the ESSA |
| 3027 | * instruction manually, in order to provide additional | 3063 | * instruction manually, in order to provide additional |
| 3028 | * functionalities needed for live migration. | 3064 | * functionalities needed for live migration. |
| 3029 | */ | 3065 | */ |
| @@ -3033,11 +3069,11 @@ retry: | |||
| 3033 | 3069 | ||
| 3034 | if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { | 3070 | if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { |
| 3035 | /* | 3071 | /* |
| 3036 | * Re-enable CMMA virtualization if CMMA is available and | 3072 | * Re-enable CMM virtualization if CMMA is available and |
| 3037 | * was used. | 3073 | * CMM has been used. |
| 3038 | */ | 3074 | */ |
| 3039 | if ((vcpu->kvm->arch.use_cmma) && | 3075 | if ((vcpu->kvm->arch.use_cmma) && |
| 3040 | (vcpu->kvm->mm->context.use_cmma)) | 3076 | (vcpu->kvm->mm->context.uses_cmm)) |
| 3041 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; | 3077 | vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; |
| 3042 | goto retry; | 3078 | goto retry; |
| 3043 | } | 3079 | } |
| @@ -4044,7 +4080,7 @@ static int __init kvm_s390_init(void) | |||
| 4044 | } | 4080 | } |
| 4045 | 4081 | ||
| 4046 | for (i = 0; i < 16; i++) | 4082 | for (i = 0; i < 16; i++) |
| 4047 | kvm_s390_fac_list_mask[i] |= | 4083 | kvm_s390_fac_base[i] |= |
| 4048 | S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); | 4084 | S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); |
| 4049 | 4085 | ||
| 4050 | return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); | 4086 | return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f55ac0ef99ea..1b5621f4fe5b 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -294,8 +294,6 @@ void exit_sie(struct kvm_vcpu *vcpu); | |||
| 294 | void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); | 294 | void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); |
| 295 | int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); | 295 | int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); |
| 296 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); | 296 | void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); |
| 297 | unsigned long kvm_s390_fac_list_mask_size(void); | ||
| 298 | extern unsigned long kvm_s390_fac_list_mask[]; | ||
| 299 | void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); | 297 | void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); |
| 300 | __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); | 298 | __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); |
| 301 | 299 | ||
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f0b4185158af..ebfa0442e569 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -1078,9 +1078,9 @@ static int handle_essa(struct kvm_vcpu *vcpu) | |||
| 1078 | * value really needs to be written to; if the value is | 1078 | * value really needs to be written to; if the value is |
| 1079 | * already correct, we do nothing and avoid the lock. | 1079 | * already correct, we do nothing and avoid the lock. |
| 1080 | */ | 1080 | */ |
| 1081 | if (vcpu->kvm->mm->context.use_cmma == 0) { | 1081 | if (vcpu->kvm->mm->context.uses_cmm == 0) { |
| 1082 | down_write(&vcpu->kvm->mm->mmap_sem); | 1082 | down_write(&vcpu->kvm->mm->mmap_sem); |
| 1083 | vcpu->kvm->mm->context.use_cmma = 1; | 1083 | vcpu->kvm->mm->context.uses_cmm = 1; |
| 1084 | up_write(&vcpu->kvm->mm->mmap_sem); | 1084 | up_write(&vcpu->kvm->mm->mmap_sem); |
| 1085 | } | 1085 | } |
| 1086 | /* | 1086 | /* |
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 424a1ba4f874..90a8c9e84ca6 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c | |||
| @@ -62,6 +62,13 @@ static struct facility_def facility_defs[] = { | |||
| 62 | } | 62 | } |
| 63 | }, | 63 | }, |
| 64 | { | 64 | { |
| 65 | /* | ||
| 66 | * FACILITIES_KVM contains the list of facilities that are part | ||
| 67 | * of the default facility mask and list that are passed to the | ||
| 68 | * initial CPU model. If no CPU model is used, this, together | ||
| 69 | * with the non-hypervisor managed bits, is the maximum list of | ||
| 70 | * guest facilities supported by KVM. | ||
| 71 | */ | ||
| 65 | .name = "FACILITIES_KVM", | 72 | .name = "FACILITIES_KVM", |
| 66 | .bits = (int[]){ | 73 | .bits = (int[]){ |
| 67 | 0, /* N3 instructions */ | 74 | 0, /* N3 instructions */ |
| @@ -89,6 +96,19 @@ static struct facility_def facility_defs[] = { | |||
| 89 | -1 /* END */ | 96 | -1 /* END */ |
| 90 | } | 97 | } |
| 91 | }, | 98 | }, |
| 99 | { | ||
| 100 | /* | ||
| 101 | * FACILITIES_KVM_CPUMODEL contains the list of facilities | ||
| 102 | * that can be enabled by CPU model code if the host supports | ||
| 103 | * it. These facilities are not passed to the guest without | ||
| 104 | * CPU model support. | ||
| 105 | */ | ||
| 106 | |||
| 107 | .name = "FACILITIES_KVM_CPUMODEL", | ||
| 108 | .bits = (int[]){ | ||
| 109 | -1 /* END */ | ||
| 110 | } | ||
| 111 | }, | ||
| 92 | }; | 112 | }; |
| 93 | 113 | ||
| 94 | static void print_facility_list(struct facility_def *def) | 114 | static void print_facility_list(struct facility_def *def) |
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2edc49e7409b..cfecc2272f2d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
| 22 | #include <asm/desc.h> | 22 | #include <asm/desc.h> |
| 23 | #include <asm/hypervisor.h> | 23 | #include <asm/hypervisor.h> |
| 24 | #include <asm/hyperv.h> | 24 | #include <asm/hyperv-tlfs.h> |
| 25 | #include <asm/mshyperv.h> | 25 | #include <asm/mshyperv.h> |
| 26 | #include <linux/version.h> | 26 | #include <linux/version.h> |
| 27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
| @@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs); | |||
| 88 | u32 *hv_vp_index; | 88 | u32 *hv_vp_index; |
| 89 | EXPORT_SYMBOL_GPL(hv_vp_index); | 89 | EXPORT_SYMBOL_GPL(hv_vp_index); |
| 90 | 90 | ||
| 91 | struct hv_vp_assist_page **hv_vp_assist_page; | ||
| 92 | EXPORT_SYMBOL_GPL(hv_vp_assist_page); | ||
| 93 | |||
| 91 | u32 hv_max_vp_index; | 94 | u32 hv_max_vp_index; |
| 92 | 95 | ||
| 93 | static int hv_cpu_init(unsigned int cpu) | 96 | static int hv_cpu_init(unsigned int cpu) |
| 94 | { | 97 | { |
| 95 | u64 msr_vp_index; | 98 | u64 msr_vp_index; |
| 99 | struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; | ||
| 96 | 100 | ||
| 97 | hv_get_vp_index(msr_vp_index); | 101 | hv_get_vp_index(msr_vp_index); |
| 98 | 102 | ||
| @@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu) | |||
| 101 | if (msr_vp_index > hv_max_vp_index) | 105 | if (msr_vp_index > hv_max_vp_index) |
| 102 | hv_max_vp_index = msr_vp_index; | 106 | hv_max_vp_index = msr_vp_index; |
| 103 | 107 | ||
| 108 | if (!hv_vp_assist_page) | ||
| 109 | return 0; | ||
| 110 | |||
| 111 | if (!*hvp) | ||
| 112 | *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); | ||
| 113 | |||
| 114 | if (*hvp) { | ||
| 115 | u64 val; | ||
| 116 | |||
| 117 | val = vmalloc_to_pfn(*hvp); | ||
| 118 | val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | | ||
| 119 | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; | ||
| 120 | |||
| 121 | wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); | ||
| 122 | } | ||
| 123 | |||
| 104 | return 0; | 124 | return 0; |
| 105 | } | 125 | } |
| 106 | 126 | ||
| @@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu) | |||
| 198 | struct hv_reenlightenment_control re_ctrl; | 218 | struct hv_reenlightenment_control re_ctrl; |
| 199 | unsigned int new_cpu; | 219 | unsigned int new_cpu; |
| 200 | 220 | ||
| 221 | if (hv_vp_assist_page && hv_vp_assist_page[cpu]) | ||
| 222 | wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); | ||
| 223 | |||
| 201 | if (hv_reenlightenment_cb == NULL) | 224 | if (hv_reenlightenment_cb == NULL) |
| 202 | return 0; | 225 | return 0; |
| 203 | 226 | ||
| @@ -224,6 +247,7 @@ void hyperv_init(void) | |||
| 224 | { | 247 | { |
| 225 | u64 guest_id, required_msrs; | 248 | u64 guest_id, required_msrs; |
| 226 | union hv_x64_msr_hypercall_contents hypercall_msr; | 249 | union hv_x64_msr_hypercall_contents hypercall_msr; |
| 250 | int cpuhp; | ||
| 227 | 251 | ||
| 228 | if (x86_hyper_type != X86_HYPER_MS_HYPERV) | 252 | if (x86_hyper_type != X86_HYPER_MS_HYPERV) |
| 229 | return; | 253 | return; |
| @@ -241,9 +265,17 @@ void hyperv_init(void) | |||
| 241 | if (!hv_vp_index) | 265 | if (!hv_vp_index) |
| 242 | return; | 266 | return; |
| 243 | 267 | ||
| 244 | if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", | 268 | hv_vp_assist_page = kcalloc(num_possible_cpus(), |
| 245 | hv_cpu_init, hv_cpu_die) < 0) | 269 | sizeof(*hv_vp_assist_page), GFP_KERNEL); |
| 270 | if (!hv_vp_assist_page) { | ||
| 271 | ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; | ||
| 246 | goto free_vp_index; | 272 | goto free_vp_index; |
| 273 | } | ||
| 274 | |||
| 275 | cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", | ||
| 276 | hv_cpu_init, hv_cpu_die); | ||
| 277 | if (cpuhp < 0) | ||
| 278 | goto free_vp_assist_page; | ||
| 247 | 279 | ||
| 248 | /* | 280 | /* |
| 249 | * Setup the hypercall page and enable hypercalls. | 281 | * Setup the hypercall page and enable hypercalls. |
| @@ -256,7 +288,7 @@ void hyperv_init(void) | |||
| 256 | hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); | 288 | hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); |
| 257 | if (hv_hypercall_pg == NULL) { | 289 | if (hv_hypercall_pg == NULL) { |
| 258 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); | 290 | wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); |
| 259 | goto free_vp_index; | 291 | goto remove_cpuhp_state; |
| 260 | } | 292 | } |
| 261 | 293 | ||
| 262 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); | 294 | rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); |
| @@ -304,6 +336,11 @@ register_msr_cs: | |||
| 304 | 336 | ||
| 305 | return; | 337 | return; |
| 306 | 338 | ||
| 339 | remove_cpuhp_state: | ||
| 340 | cpuhp_remove_state(cpuhp); | ||
| 341 | free_vp_assist_page: | ||
| 342 | kfree(hv_vp_assist_page); | ||
| 343 | hv_vp_assist_page = NULL; | ||
| 307 | free_vp_index: | 344 | free_vp_index: |
| 308 | kfree(hv_vp_index); | 345 | kfree(hv_vp_index); |
| 309 | hv_vp_index = NULL; | 346 | hv_vp_index = NULL; |
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h index 6c0c3a3b631c..416cb0e0c496 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv-tlfs.h | |||
| @@ -1,6 +1,13 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| 2 | #ifndef _ASM_X86_HYPERV_H | 2 | |
| 3 | #define _ASM_X86_HYPERV_H | 3 | /* |
| 4 | * This file contains definitions from Hyper-V Hypervisor Top-Level Functional | ||
| 5 | * Specification (TLFS): | ||
| 6 | * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifndef _ASM_X86_HYPERV_TLFS_H | ||
| 10 | #define _ASM_X86_HYPERV_TLFS_H | ||
| 4 | 11 | ||
| 5 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 6 | 13 | ||
| @@ -14,6 +21,7 @@ | |||
| 14 | #define HYPERV_CPUID_FEATURES 0x40000003 | 21 | #define HYPERV_CPUID_FEATURES 0x40000003 |
| 15 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | 22 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 |
| 16 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | 23 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 |
| 24 | #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A | ||
| 17 | 25 | ||
| 18 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 | 26 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 |
| 19 | #define HYPERV_CPUID_MIN 0x40000005 | 27 | #define HYPERV_CPUID_MIN 0x40000005 |
| @@ -159,6 +167,9 @@ | |||
| 159 | /* Recommend using the newer ExProcessorMasks interface */ | 167 | /* Recommend using the newer ExProcessorMasks interface */ |
| 160 | #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) | 168 | #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) |
| 161 | 169 | ||
| 170 | /* Recommend using enlightened VMCS */ | ||
| 171 | #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) | ||
| 172 | |||
| 162 | /* | 173 | /* |
| 163 | * Crash notification flag. | 174 | * Crash notification flag. |
| 164 | */ | 175 | */ |
| @@ -192,7 +203,7 @@ | |||
| 192 | #define HV_X64_MSR_EOI 0x40000070 | 203 | #define HV_X64_MSR_EOI 0x40000070 |
| 193 | #define HV_X64_MSR_ICR 0x40000071 | 204 | #define HV_X64_MSR_ICR 0x40000071 |
| 194 | #define HV_X64_MSR_TPR 0x40000072 | 205 | #define HV_X64_MSR_TPR 0x40000072 |
| 195 | #define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 | 206 | #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 |
| 196 | 207 | ||
| 197 | /* Define synthetic interrupt controller model specific registers. */ | 208 | /* Define synthetic interrupt controller model specific registers. */ |
| 198 | #define HV_X64_MSR_SCONTROL 0x40000080 | 209 | #define HV_X64_MSR_SCONTROL 0x40000080 |
| @@ -240,6 +251,55 @@ | |||
| 240 | #define HV_X64_MSR_CRASH_PARAMS \ | 251 | #define HV_X64_MSR_CRASH_PARAMS \ |
| 241 | (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) | 252 | (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) |
| 242 | 253 | ||
| 254 | /* | ||
| 255 | * Declare the MSR used to setup pages used to communicate with the hypervisor. | ||
| 256 | */ | ||
| 257 | union hv_x64_msr_hypercall_contents { | ||
| 258 | u64 as_uint64; | ||
| 259 | struct { | ||
| 260 | u64 enable:1; | ||
| 261 | u64 reserved:11; | ||
| 262 | u64 guest_physical_address:52; | ||
| 263 | }; | ||
| 264 | }; | ||
| 265 | |||
| 266 | /* | ||
| 267 | * TSC page layout. | ||
| 268 | */ | ||
| 269 | struct ms_hyperv_tsc_page { | ||
| 270 | volatile u32 tsc_sequence; | ||
| 271 | u32 reserved1; | ||
| 272 | volatile u64 tsc_scale; | ||
| 273 | volatile s64 tsc_offset; | ||
| 274 | u64 reserved2[509]; | ||
| 275 | }; | ||
| 276 | |||
| 277 | /* | ||
| 278 | * The guest OS needs to register the guest ID with the hypervisor. | ||
| 279 | * The guest ID is a 64 bit entity and the structure of this ID is | ||
| 280 | * specified in the Hyper-V specification: | ||
| 281 | * | ||
| 282 | * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx | ||
| 283 | * | ||
| 284 | * While the current guideline does not specify how Linux guest ID(s) | ||
| 285 | * need to be generated, our plan is to publish the guidelines for | ||
| 286 | * Linux and other guest operating systems that currently are hosted | ||
| 287 | * on Hyper-V. The implementation here conforms to this yet | ||
| 288 | * unpublished guidelines. | ||
| 289 | * | ||
| 290 | * | ||
| 291 | * Bit(s) | ||
| 292 | * 63 - Indicates if the OS is Open Source or not; 1 is Open Source | ||
| 293 | * 62:56 - Os Type; Linux is 0x100 | ||
| 294 | * 55:48 - Distro specific identification | ||
| 295 | * 47:16 - Linux kernel version number | ||
| 296 | * 15:0 - Distro specific identification | ||
| 297 | * | ||
| 298 | * | ||
| 299 | */ | ||
| 300 | |||
| 301 | #define HV_LINUX_VENDOR_ID 0x8100 | ||
| 302 | |||
| 243 | /* TSC emulation after migration */ | 303 | /* TSC emulation after migration */ |
| 244 | #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 | 304 | #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 |
| 245 | 305 | ||
| @@ -278,10 +338,13 @@ struct hv_tsc_emulation_status { | |||
| 278 | #define HVCALL_POST_MESSAGE 0x005c | 338 | #define HVCALL_POST_MESSAGE 0x005c |
| 279 | #define HVCALL_SIGNAL_EVENT 0x005d | 339 | #define HVCALL_SIGNAL_EVENT 0x005d |
| 280 | 340 | ||
| 281 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 | 341 | #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 |
| 282 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 | 342 | #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 |
| 283 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | 343 | #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ |
| 284 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | 344 | (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) |
| 345 | |||
| 346 | /* Hyper-V Enlightened VMCS version mask in nested features CPUID */ | ||
| 347 | #define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff | ||
| 285 | 348 | ||
| 286 | #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 | 349 | #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 |
| 287 | #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 | 350 | #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 |
| @@ -301,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT { | |||
| 301 | HV_GENERIC_SET_ALL, | 364 | HV_GENERIC_SET_ALL, |
| 302 | }; | 365 | }; |
| 303 | 366 | ||
| 367 | #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) | ||
| 368 | #define HV_HYPERCALL_FAST_BIT BIT(16) | ||
| 369 | #define HV_HYPERCALL_VARHEAD_OFFSET 17 | ||
| 370 | #define HV_HYPERCALL_REP_COMP_OFFSET 32 | ||
| 371 | #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) | ||
| 372 | #define HV_HYPERCALL_REP_START_OFFSET 48 | ||
| 373 | #define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) | ||
| 374 | |||
| 304 | /* hypercall status code */ | 375 | /* hypercall status code */ |
| 305 | #define HV_STATUS_SUCCESS 0 | 376 | #define HV_STATUS_SUCCESS 0 |
| 306 | #define HV_STATUS_INVALID_HYPERCALL_CODE 2 | 377 | #define HV_STATUS_INVALID_HYPERCALL_CODE 2 |
| 307 | #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 | 378 | #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 |
| 308 | #define HV_STATUS_INVALID_ALIGNMENT 4 | 379 | #define HV_STATUS_INVALID_ALIGNMENT 4 |
| 380 | #define HV_STATUS_INVALID_PARAMETER 5 | ||
| 309 | #define HV_STATUS_INSUFFICIENT_MEMORY 11 | 381 | #define HV_STATUS_INSUFFICIENT_MEMORY 11 |
| 382 | #define HV_STATUS_INVALID_PORT_ID 17 | ||
| 310 | #define HV_STATUS_INVALID_CONNECTION_ID 18 | 383 | #define HV_STATUS_INVALID_CONNECTION_ID 18 |
| 311 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 | 384 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 |
| 312 | 385 | ||
| @@ -321,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE { | |||
| 321 | #define HV_SYNIC_SINT_COUNT (16) | 394 | #define HV_SYNIC_SINT_COUNT (16) |
| 322 | /* Define the expected SynIC version. */ | 395 | /* Define the expected SynIC version. */ |
| 323 | #define HV_SYNIC_VERSION_1 (0x1) | 396 | #define HV_SYNIC_VERSION_1 (0x1) |
| 397 | /* Valid SynIC vectors are 16-255. */ | ||
| 398 | #define HV_SYNIC_FIRST_VALID_VECTOR (16) | ||
| 324 | 399 | ||
| 325 | #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) | 400 | #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) |
| 326 | #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) | 401 | #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) |
| @@ -415,6 +490,216 @@ struct hv_timer_message_payload { | |||
| 415 | __u64 delivery_time; /* When the message was delivered */ | 490 | __u64 delivery_time; /* When the message was delivered */ |
| 416 | }; | 491 | }; |
| 417 | 492 | ||
| 493 | /* Define virtual processor assist page structure. */ | ||
| 494 | struct hv_vp_assist_page { | ||
| 495 | __u32 apic_assist; | ||
| 496 | __u32 reserved; | ||
| 497 | __u64 vtl_control[2]; | ||
| 498 | __u64 nested_enlightenments_control[2]; | ||
| 499 | __u32 enlighten_vmentry; | ||
| 500 | __u64 current_nested_vmcs; | ||
| 501 | }; | ||
| 502 | |||
| 503 | struct hv_enlightened_vmcs { | ||
| 504 | u32 revision_id; | ||
| 505 | u32 abort; | ||
| 506 | |||
| 507 | u16 host_es_selector; | ||
| 508 | u16 host_cs_selector; | ||
| 509 | u16 host_ss_selector; | ||
| 510 | u16 host_ds_selector; | ||
| 511 | u16 host_fs_selector; | ||
| 512 | u16 host_gs_selector; | ||
| 513 | u16 host_tr_selector; | ||
| 514 | |||
| 515 | u64 host_ia32_pat; | ||
| 516 | u64 host_ia32_efer; | ||
| 517 | |||
| 518 | u64 host_cr0; | ||
| 519 | u64 host_cr3; | ||
| 520 | u64 host_cr4; | ||
| 521 | |||
| 522 | u64 host_ia32_sysenter_esp; | ||
| 523 | u64 host_ia32_sysenter_eip; | ||
| 524 | u64 host_rip; | ||
| 525 | u32 host_ia32_sysenter_cs; | ||
| 526 | |||
| 527 | u32 pin_based_vm_exec_control; | ||
| 528 | u32 vm_exit_controls; | ||
| 529 | u32 secondary_vm_exec_control; | ||
| 530 | |||
| 531 | u64 io_bitmap_a; | ||
| 532 | u64 io_bitmap_b; | ||
| 533 | u64 msr_bitmap; | ||
| 534 | |||
| 535 | u16 guest_es_selector; | ||
| 536 | u16 guest_cs_selector; | ||
| 537 | u16 guest_ss_selector; | ||
| 538 | u16 guest_ds_selector; | ||
| 539 | u16 guest_fs_selector; | ||
| 540 | u16 guest_gs_selector; | ||
| 541 | u16 guest_ldtr_selector; | ||
| 542 | u16 guest_tr_selector; | ||
| 543 | |||
| 544 | u32 guest_es_limit; | ||
| 545 | u32 guest_cs_limit; | ||
| 546 | u32 guest_ss_limit; | ||
| 547 | u32 guest_ds_limit; | ||
| 548 | u32 guest_fs_limit; | ||
| 549 | u32 guest_gs_limit; | ||
| 550 | u32 guest_ldtr_limit; | ||
| 551 | u32 guest_tr_limit; | ||
| 552 | u32 guest_gdtr_limit; | ||
| 553 | u32 guest_idtr_limit; | ||
| 554 | |||
| 555 | u32 guest_es_ar_bytes; | ||
| 556 | u32 guest_cs_ar_bytes; | ||
| 557 | u32 guest_ss_ar_bytes; | ||
| 558 | u32 guest_ds_ar_bytes; | ||
| 559 | u32 guest_fs_ar_bytes; | ||
| 560 | u32 guest_gs_ar_bytes; | ||
| 561 | u32 guest_ldtr_ar_bytes; | ||
| 562 | u32 guest_tr_ar_bytes; | ||
| 563 | |||
| 564 | u64 guest_es_base; | ||
| 565 | u64 guest_cs_base; | ||
| 566 | u64 guest_ss_base; | ||
| 567 | u64 guest_ds_base; | ||
| 568 | u64 guest_fs_base; | ||
| 569 | u64 guest_gs_base; | ||
| 570 | u64 guest_ldtr_base; | ||
| 571 | u64 guest_tr_base; | ||
| 572 | u64 guest_gdtr_base; | ||
| 573 | u64 guest_idtr_base; | ||
| 574 | |||
| 575 | u64 padding64_1[3]; | ||
| 576 | |||
| 577 | u64 vm_exit_msr_store_addr; | ||
| 578 | u64 vm_exit_msr_load_addr; | ||
| 579 | u64 vm_entry_msr_load_addr; | ||
| 580 | |||
| 581 | u64 cr3_target_value0; | ||
| 582 | u64 cr3_target_value1; | ||
| 583 | u64 cr3_target_value2; | ||
| 584 | u64 cr3_target_value3; | ||
| 585 | |||
| 586 | u32 page_fault_error_code_mask; | ||
| 587 | u32 page_fault_error_code_match; | ||
| 588 | |||
| 589 | u32 cr3_target_count; | ||
| 590 | u32 vm_exit_msr_store_count; | ||
| 591 | u32 vm_exit_msr_load_count; | ||
| 592 | u32 vm_entry_msr_load_count; | ||
| 593 | |||
| 594 | u64 tsc_offset; | ||
| 595 | u64 virtual_apic_page_addr; | ||
| 596 | u64 vmcs_link_pointer; | ||
| 597 | |||
| 598 | u64 guest_ia32_debugctl; | ||
| 599 | u64 guest_ia32_pat; | ||
| 600 | u64 guest_ia32_efer; | ||
| 601 | |||
| 602 | u64 guest_pdptr0; | ||
| 603 | u64 guest_pdptr1; | ||
| 604 | u64 guest_pdptr2; | ||
| 605 | u64 guest_pdptr3; | ||
| 606 | |||
| 607 | u64 guest_pending_dbg_exceptions; | ||
| 608 | u64 guest_sysenter_esp; | ||
| 609 | u64 guest_sysenter_eip; | ||
| 610 | |||
| 611 | u32 guest_activity_state; | ||
| 612 | u32 guest_sysenter_cs; | ||
| 613 | |||
| 614 | u64 cr0_guest_host_mask; | ||
| 615 | u64 cr4_guest_host_mask; | ||
| 616 | u64 cr0_read_shadow; | ||
| 617 | u64 cr4_read_shadow; | ||
| 618 | u64 guest_cr0; | ||
| 619 | u64 guest_cr3; | ||
| 620 | u64 guest_cr4; | ||
| 621 | u64 guest_dr7; | ||
| 622 | |||
| 623 | u64 host_fs_base; | ||
| 624 | u64 host_gs_base; | ||
| 625 | u64 host_tr_base; | ||
| 626 | u64 host_gdtr_base; | ||
| 627 | u64 host_idtr_base; | ||
| 628 | u64 host_rsp; | ||
| 629 | |||
| 630 | u64 ept_pointer; | ||
| 631 | |||
| 632 | u16 virtual_processor_id; | ||
| 633 | u16 padding16[3]; | ||
| 634 | |||
| 635 | u64 padding64_2[5]; | ||
| 636 | u64 guest_physical_address; | ||
| 637 | |||
| 638 | u32 vm_instruction_error; | ||
| 639 | u32 vm_exit_reason; | ||
| 640 | u32 vm_exit_intr_info; | ||
| 641 | u32 vm_exit_intr_error_code; | ||
| 642 | u32 idt_vectoring_info_field; | ||
| 643 | u32 idt_vectoring_error_code; | ||
| 644 | u32 vm_exit_instruction_len; | ||
| 645 | u32 vmx_instruction_info; | ||
| 646 | |||
| 647 | u64 exit_qualification; | ||
| 648 | u64 exit_io_instruction_ecx; | ||
| 649 | u64 exit_io_instruction_esi; | ||
| 650 | u64 exit_io_instruction_edi; | ||
| 651 | u64 exit_io_instruction_eip; | ||
| 652 | |||
| 653 | u64 guest_linear_address; | ||
| 654 | u64 guest_rsp; | ||
| 655 | u64 guest_rflags; | ||
| 656 | |||
| 657 | u32 guest_interruptibility_info; | ||
| 658 | u32 cpu_based_vm_exec_control; | ||
| 659 | u32 exception_bitmap; | ||
| 660 | u32 vm_entry_controls; | ||
| 661 | u32 vm_entry_intr_info_field; | ||
| 662 | u32 vm_entry_exception_error_code; | ||
| 663 | u32 vm_entry_instruction_len; | ||
| 664 | u32 tpr_threshold; | ||
| 665 | |||
| 666 | u64 guest_rip; | ||
| 667 | |||
| 668 | u32 hv_clean_fields; | ||
| 669 | u32 hv_padding_32; | ||
| 670 | u32 hv_synthetic_controls; | ||
| 671 | u32 hv_enlightenments_control; | ||
| 672 | u32 hv_vp_id; | ||
| 673 | |||
| 674 | u64 hv_vm_id; | ||
| 675 | u64 partition_assist_page; | ||
| 676 | u64 padding64_4[4]; | ||
| 677 | u64 guest_bndcfgs; | ||
| 678 | u64 padding64_5[7]; | ||
| 679 | u64 xss_exit_bitmap; | ||
| 680 | u64 padding64_6[7]; | ||
| 681 | }; | ||
| 682 | |||
| 683 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 | ||
| 684 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) | ||
| 685 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) | ||
| 686 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) | ||
| 687 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) | ||
| 688 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) | ||
| 689 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) | ||
| 690 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) | ||
| 691 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) | ||
| 692 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) | ||
| 693 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) | ||
| 694 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) | ||
| 695 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) | ||
| 696 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) | ||
| 697 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) | ||
| 698 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) | ||
| 699 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) | ||
| 700 | |||
| 701 | #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF | ||
| 702 | |||
| 418 | #define HV_STIMER_ENABLE (1ULL << 0) | 703 | #define HV_STIMER_ENABLE (1ULL << 0) |
| 419 | #define HV_STIMER_PERIODIC (1ULL << 1) | 704 | #define HV_STIMER_PERIODIC (1ULL << 1) |
| 420 | #define HV_STIMER_LAZY (1ULL << 2) | 705 | #define HV_STIMER_LAZY (1ULL << 2) |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b605a5b6a30c..949c977bc4c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include <asm/msr-index.h> | 34 | #include <asm/msr-index.h> |
| 35 | #include <asm/asm.h> | 35 | #include <asm/asm.h> |
| 36 | #include <asm/kvm_page_track.h> | 36 | #include <asm/kvm_page_track.h> |
| 37 | #include <asm/hyperv-tlfs.h> | ||
| 37 | 38 | ||
| 38 | #define KVM_MAX_VCPUS 288 | 39 | #define KVM_MAX_VCPUS 288 |
| 39 | #define KVM_SOFT_MAX_VCPUS 240 | 40 | #define KVM_SOFT_MAX_VCPUS 240 |
| @@ -73,6 +74,7 @@ | |||
| 73 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) | 74 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) |
| 74 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) | 75 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) |
| 75 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) | 76 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) |
| 77 | #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) | ||
| 76 | 78 | ||
| 77 | #define CR0_RESERVED_BITS \ | 79 | #define CR0_RESERVED_BITS \ |
| 78 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | 80 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
| @@ -498,6 +500,7 @@ struct kvm_vcpu_arch { | |||
| 498 | u64 apic_base; | 500 | u64 apic_base; |
| 499 | struct kvm_lapic *apic; /* kernel irqchip context */ | 501 | struct kvm_lapic *apic; /* kernel irqchip context */ |
| 500 | bool apicv_active; | 502 | bool apicv_active; |
| 503 | bool load_eoi_exitmap_pending; | ||
| 501 | DECLARE_BITMAP(ioapic_handled_vectors, 256); | 504 | DECLARE_BITMAP(ioapic_handled_vectors, 256); |
| 502 | unsigned long apic_attention; | 505 | unsigned long apic_attention; |
| 503 | int32_t apic_arb_prio; | 506 | int32_t apic_arb_prio; |
| @@ -571,7 +574,7 @@ struct kvm_vcpu_arch { | |||
| 571 | } exception; | 574 | } exception; |
| 572 | 575 | ||
| 573 | struct kvm_queued_interrupt { | 576 | struct kvm_queued_interrupt { |
| 574 | bool pending; | 577 | bool injected; |
| 575 | bool soft; | 578 | bool soft; |
| 576 | u8 nr; | 579 | u8 nr; |
| 577 | } interrupt; | 580 | } interrupt; |
| @@ -754,6 +757,12 @@ struct kvm_hv { | |||
| 754 | u64 hv_crash_ctl; | 757 | u64 hv_crash_ctl; |
| 755 | 758 | ||
| 756 | HV_REFERENCE_TSC_PAGE tsc_ref; | 759 | HV_REFERENCE_TSC_PAGE tsc_ref; |
| 760 | |||
| 761 | struct idr conn_to_evt; | ||
| 762 | |||
| 763 | u64 hv_reenlightenment_control; | ||
| 764 | u64 hv_tsc_emulation_control; | ||
| 765 | u64 hv_tsc_emulation_status; | ||
| 757 | }; | 766 | }; |
| 758 | 767 | ||
| 759 | enum kvm_irqchip_mode { | 768 | enum kvm_irqchip_mode { |
| @@ -762,15 +771,6 @@ enum kvm_irqchip_mode { | |||
| 762 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | 771 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ |
| 763 | }; | 772 | }; |
| 764 | 773 | ||
| 765 | struct kvm_sev_info { | ||
| 766 | bool active; /* SEV enabled guest */ | ||
| 767 | unsigned int asid; /* ASID used for this guest */ | ||
| 768 | unsigned int handle; /* SEV firmware handle */ | ||
| 769 | int fd; /* SEV device fd */ | ||
| 770 | unsigned long pages_locked; /* Number of pages locked */ | ||
| 771 | struct list_head regions_list; /* List of registered regions */ | ||
| 772 | }; | ||
| 773 | |||
| 774 | struct kvm_arch { | 774 | struct kvm_arch { |
| 775 | unsigned int n_used_mmu_pages; | 775 | unsigned int n_used_mmu_pages; |
| 776 | unsigned int n_requested_mmu_pages; | 776 | unsigned int n_requested_mmu_pages; |
| @@ -800,13 +800,13 @@ struct kvm_arch { | |||
| 800 | struct mutex apic_map_lock; | 800 | struct mutex apic_map_lock; |
| 801 | struct kvm_apic_map *apic_map; | 801 | struct kvm_apic_map *apic_map; |
| 802 | 802 | ||
| 803 | unsigned int tss_addr; | ||
| 804 | bool apic_access_page_done; | 803 | bool apic_access_page_done; |
| 805 | 804 | ||
| 806 | gpa_t wall_clock; | 805 | gpa_t wall_clock; |
| 807 | 806 | ||
| 808 | bool ept_identity_pagetable_done; | 807 | bool mwait_in_guest; |
| 809 | gpa_t ept_identity_map_addr; | 808 | bool hlt_in_guest; |
| 809 | bool pause_in_guest; | ||
| 810 | 810 | ||
| 811 | unsigned long irq_sources_bitmap; | 811 | unsigned long irq_sources_bitmap; |
| 812 | s64 kvmclock_offset; | 812 | s64 kvmclock_offset; |
| @@ -849,17 +849,8 @@ struct kvm_arch { | |||
| 849 | 849 | ||
| 850 | bool disabled_lapic_found; | 850 | bool disabled_lapic_found; |
| 851 | 851 | ||
| 852 | /* Struct members for AVIC */ | ||
| 853 | u32 avic_vm_id; | ||
| 854 | u32 ldr_mode; | ||
| 855 | struct page *avic_logical_id_table_page; | ||
| 856 | struct page *avic_physical_id_table_page; | ||
| 857 | struct hlist_node hnode; | ||
| 858 | |||
| 859 | bool x2apic_format; | 852 | bool x2apic_format; |
| 860 | bool x2apic_broadcast_quirk_disabled; | 853 | bool x2apic_broadcast_quirk_disabled; |
| 861 | |||
| 862 | struct kvm_sev_info sev_info; | ||
| 863 | }; | 854 | }; |
| 864 | 855 | ||
| 865 | struct kvm_vm_stat { | 856 | struct kvm_vm_stat { |
| @@ -936,6 +927,8 @@ struct kvm_x86_ops { | |||
| 936 | bool (*cpu_has_high_real_mode_segbase)(void); | 927 | bool (*cpu_has_high_real_mode_segbase)(void); |
| 937 | void (*cpuid_update)(struct kvm_vcpu *vcpu); | 928 | void (*cpuid_update)(struct kvm_vcpu *vcpu); |
| 938 | 929 | ||
| 930 | struct kvm *(*vm_alloc)(void); | ||
| 931 | void (*vm_free)(struct kvm *); | ||
| 939 | int (*vm_init)(struct kvm *kvm); | 932 | int (*vm_init)(struct kvm *kvm); |
| 940 | void (*vm_destroy)(struct kvm *kvm); | 933 | void (*vm_destroy)(struct kvm *kvm); |
| 941 | 934 | ||
| @@ -1007,6 +1000,7 @@ struct kvm_x86_ops { | |||
| 1007 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | 1000 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); |
| 1008 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | 1001 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
| 1009 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 1002 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
| 1003 | int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); | ||
| 1010 | int (*get_tdp_level)(struct kvm_vcpu *vcpu); | 1004 | int (*get_tdp_level)(struct kvm_vcpu *vcpu); |
| 1011 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 1005 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
| 1012 | int (*get_lpage_level)(void); | 1006 | int (*get_lpage_level)(void); |
| @@ -1109,6 +1103,17 @@ struct kvm_arch_async_pf { | |||
| 1109 | 1103 | ||
| 1110 | extern struct kvm_x86_ops *kvm_x86_ops; | 1104 | extern struct kvm_x86_ops *kvm_x86_ops; |
| 1111 | 1105 | ||
| 1106 | #define __KVM_HAVE_ARCH_VM_ALLOC | ||
| 1107 | static inline struct kvm *kvm_arch_alloc_vm(void) | ||
| 1108 | { | ||
| 1109 | return kvm_x86_ops->vm_alloc(); | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | static inline void kvm_arch_free_vm(struct kvm *kvm) | ||
| 1113 | { | ||
| 1114 | return kvm_x86_ops->vm_free(kvm); | ||
| 1115 | } | ||
| 1116 | |||
| 1112 | int kvm_mmu_module_init(void); | 1117 | int kvm_mmu_module_init(void); |
| 1113 | void kvm_mmu_module_exit(void); | 1118 | void kvm_mmu_module_exit(void); |
| 1114 | 1119 | ||
| @@ -1187,6 +1192,8 @@ enum emulation_result { | |||
| 1187 | #define EMULTYPE_SKIP (1 << 2) | 1192 | #define EMULTYPE_SKIP (1 << 2) |
| 1188 | #define EMULTYPE_RETRY (1 << 3) | 1193 | #define EMULTYPE_RETRY (1 << 3) |
| 1189 | #define EMULTYPE_NO_REEXECUTE (1 << 4) | 1194 | #define EMULTYPE_NO_REEXECUTE (1 << 4) |
| 1195 | #define EMULTYPE_NO_UD_ON_FAIL (1 << 5) | ||
| 1196 | #define EMULTYPE_VMWARE (1 << 6) | ||
| 1190 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 1197 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 1191 | int emulation_type, void *insn, int insn_len); | 1198 | int emulation_type, void *insn, int insn_len); |
| 1192 | 1199 | ||
| @@ -1204,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); | |||
| 1204 | 1211 | ||
| 1205 | struct x86_emulate_ctxt; | 1212 | struct x86_emulate_ctxt; |
| 1206 | 1213 | ||
| 1207 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); | 1214 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); |
| 1208 | int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); | ||
| 1209 | int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 1215 | int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
| 1210 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); | 1216 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
| 1211 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu); | 1217 | int kvm_vcpu_halt(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b407dda2bd7..3aea2658323a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
| @@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, | |||
| 88 | #ifdef CONFIG_KVM_GUEST | 88 | #ifdef CONFIG_KVM_GUEST |
| 89 | bool kvm_para_available(void); | 89 | bool kvm_para_available(void); |
| 90 | unsigned int kvm_arch_para_features(void); | 90 | unsigned int kvm_arch_para_features(void); |
| 91 | unsigned int kvm_arch_para_hints(void); | ||
| 91 | void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); | 92 | void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); |
| 92 | void kvm_async_pf_task_wake(u32 token); | 93 | void kvm_async_pf_task_wake(u32 token); |
| 93 | u32 kvm_read_and_reset_pf_reason(void); | 94 | u32 kvm_read_and_reset_pf_reason(void); |
| @@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void) | |||
| 115 | return 0; | 116 | return 0; |
| 116 | } | 117 | } |
| 117 | 118 | ||
| 119 | static inline unsigned int kvm_arch_para_hints(void) | ||
| 120 | { | ||
| 121 | return 0; | ||
| 122 | } | ||
| 123 | |||
| 118 | static inline u32 kvm_read_and_reset_pf_reason(void) | 124 | static inline u32 kvm_read_and_reset_pf_reason(void) |
| 119 | { | 125 | { |
| 120 | return 0; | 126 | return 0; |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e73c4d0c06ad..b90e79610cf7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h | |||
| @@ -6,90 +6,23 @@ | |||
| 6 | #include <linux/atomic.h> | 6 | #include <linux/atomic.h> |
| 7 | #include <linux/nmi.h> | 7 | #include <linux/nmi.h> |
| 8 | #include <asm/io.h> | 8 | #include <asm/io.h> |
| 9 | #include <asm/hyperv.h> | 9 | #include <asm/hyperv-tlfs.h> |
| 10 | #include <asm/nospec-branch.h> | 10 | #include <asm/nospec-branch.h> |
| 11 | 11 | ||
| 12 | /* | ||
| 13 | * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent | ||
| 14 | * is set by CPUID(HVCPUID_VERSION_FEATURES). | ||
| 15 | */ | ||
| 16 | enum hv_cpuid_function { | ||
| 17 | HVCPUID_VERSION_FEATURES = 0x00000001, | ||
| 18 | HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, | ||
| 19 | HVCPUID_INTERFACE = 0x40000001, | ||
| 20 | |||
| 21 | /* | ||
| 22 | * The remaining functions depend on the value of | ||
| 23 | * HVCPUID_INTERFACE | ||
| 24 | */ | ||
| 25 | HVCPUID_VERSION = 0x40000002, | ||
| 26 | HVCPUID_FEATURES = 0x40000003, | ||
| 27 | HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, | ||
| 28 | HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, | ||
| 29 | }; | ||
| 30 | |||
| 31 | struct ms_hyperv_info { | 12 | struct ms_hyperv_info { |
| 32 | u32 features; | 13 | u32 features; |
| 33 | u32 misc_features; | 14 | u32 misc_features; |
| 34 | u32 hints; | 15 | u32 hints; |
| 16 | u32 nested_features; | ||
| 35 | u32 max_vp_index; | 17 | u32 max_vp_index; |
| 36 | u32 max_lp_index; | 18 | u32 max_lp_index; |
| 37 | }; | 19 | }; |
| 38 | 20 | ||
| 39 | extern struct ms_hyperv_info ms_hyperv; | 21 | extern struct ms_hyperv_info ms_hyperv; |
| 40 | 22 | ||
| 41 | /* | ||
| 42 | * Declare the MSR used to setup pages used to communicate with the hypervisor. | ||
| 43 | */ | ||
| 44 | union hv_x64_msr_hypercall_contents { | ||
| 45 | u64 as_uint64; | ||
| 46 | struct { | ||
| 47 | u64 enable:1; | ||
| 48 | u64 reserved:11; | ||
| 49 | u64 guest_physical_address:52; | ||
| 50 | }; | ||
| 51 | }; | ||
| 52 | 23 | ||
| 53 | /* | 24 | /* |
| 54 | * TSC page layout. | 25 | * Generate the guest ID. |
| 55 | */ | ||
| 56 | |||
| 57 | struct ms_hyperv_tsc_page { | ||
| 58 | volatile u32 tsc_sequence; | ||
| 59 | u32 reserved1; | ||
| 60 | volatile u64 tsc_scale; | ||
| 61 | volatile s64 tsc_offset; | ||
| 62 | u64 reserved2[509]; | ||
| 63 | }; | ||
| 64 | |||
| 65 | /* | ||
| 66 | * The guest OS needs to register the guest ID with the hypervisor. | ||
| 67 | * The guest ID is a 64 bit entity and the structure of this ID is | ||
| 68 | * specified in the Hyper-V specification: | ||
| 69 | * | ||
| 70 | * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx | ||
| 71 | * | ||
| 72 | * While the current guideline does not specify how Linux guest ID(s) | ||
| 73 | * need to be generated, our plan is to publish the guidelines for | ||
| 74 | * Linux and other guest operating systems that currently are hosted | ||
| 75 | * on Hyper-V. The implementation here conforms to this yet | ||
| 76 | * unpublished guidelines. | ||
| 77 | * | ||
| 78 | * | ||
| 79 | * Bit(s) | ||
| 80 | * 63 - Indicates if the OS is Open Source or not; 1 is Open Source | ||
| 81 | * 62:56 - Os Type; Linux is 0x100 | ||
| 82 | * 55:48 - Distro specific identification | ||
| 83 | * 47:16 - Linux kernel version number | ||
| 84 | * 15:0 - Distro specific identification | ||
| 85 | * | ||
| 86 | * | ||
| 87 | */ | ||
| 88 | |||
| 89 | #define HV_LINUX_VENDOR_ID 0x8100 | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Generate the guest ID based on the guideline described above. | ||
| 93 | */ | 26 | */ |
| 94 | 27 | ||
| 95 | static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, | 28 | static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, |
| @@ -228,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) | |||
| 228 | return hv_status; | 161 | return hv_status; |
| 229 | } | 162 | } |
| 230 | 163 | ||
| 231 | #define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) | ||
| 232 | #define HV_HYPERCALL_FAST_BIT BIT(16) | ||
| 233 | #define HV_HYPERCALL_VARHEAD_OFFSET 17 | ||
| 234 | #define HV_HYPERCALL_REP_COMP_OFFSET 32 | ||
| 235 | #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) | ||
| 236 | #define HV_HYPERCALL_REP_START_OFFSET 48 | ||
| 237 | #define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) | ||
| 238 | |||
| 239 | /* Fast hypercall with 8 bytes of input and no output */ | 164 | /* Fast hypercall with 8 bytes of input and no output */ |
| 240 | static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) | 165 | static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) |
| 241 | { | 166 | { |
| @@ -307,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, | |||
| 307 | */ | 232 | */ |
| 308 | extern u32 *hv_vp_index; | 233 | extern u32 *hv_vp_index; |
| 309 | extern u32 hv_max_vp_index; | 234 | extern u32 hv_max_vp_index; |
| 235 | extern struct hv_vp_assist_page **hv_vp_assist_page; | ||
| 236 | |||
| 237 | static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) | ||
| 238 | { | ||
| 239 | if (!hv_vp_assist_page) | ||
| 240 | return NULL; | ||
| 241 | |||
| 242 | return hv_vp_assist_page[cpu]; | ||
| 243 | } | ||
| 310 | 244 | ||
| 311 | /** | 245 | /** |
| 312 | * hv_cpu_number_to_vp_number() - Map CPU to VP. | 246 | * hv_cpu_number_to_vp_number() - Map CPU to VP. |
| @@ -343,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {} | |||
| 343 | static inline void set_hv_tscchange_cb(void (*cb)(void)) {} | 277 | static inline void set_hv_tscchange_cb(void (*cb)(void)) {} |
| 344 | static inline void clear_hv_tscchange_cb(void) {} | 278 | static inline void clear_hv_tscchange_cb(void) {} |
| 345 | static inline void hyperv_stop_tsc_emulation(void) {}; | 279 | static inline void hyperv_stop_tsc_emulation(void) {}; |
| 280 | static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) | ||
| 281 | { | ||
| 282 | return NULL; | ||
| 283 | } | ||
| 346 | #endif /* CONFIG_HYPERV */ | 284 | #endif /* CONFIG_HYPERV */ |
| 347 | 285 | ||
| 348 | #ifdef CONFIG_HYPERV_TSCPAGE | 286 | #ifdef CONFIG_HYPERV_TSCPAGE |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c9084dedfcfa..53d5b1b9255e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -353,7 +353,21 @@ | |||
| 353 | 353 | ||
| 354 | /* Fam 15h MSRs */ | 354 | /* Fam 15h MSRs */ |
| 355 | #define MSR_F15H_PERF_CTL 0xc0010200 | 355 | #define MSR_F15H_PERF_CTL 0xc0010200 |
| 356 | #define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL | ||
| 357 | #define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) | ||
| 358 | #define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) | ||
| 359 | #define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) | ||
| 360 | #define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) | ||
| 361 | #define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) | ||
| 362 | |||
| 356 | #define MSR_F15H_PERF_CTR 0xc0010201 | 363 | #define MSR_F15H_PERF_CTR 0xc0010201 |
| 364 | #define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR | ||
| 365 | #define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) | ||
| 366 | #define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) | ||
| 367 | #define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) | ||
| 368 | #define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) | ||
| 369 | #define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) | ||
| 370 | |||
| 357 | #define MSR_F15H_NB_PERF_CTL 0xc0010240 | 371 | #define MSR_F15H_NB_PERF_CTL 0xc0010240 |
| 358 | #define MSR_F15H_NB_PERF_CTR 0xc0010241 | 372 | #define MSR_F15H_NB_PERF_CTR 0xc0010241 |
| 359 | #define MSR_F15H_PTSC 0xc0010280 | 373 | #define MSR_F15H_PTSC 0xc0010280 |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0ccd4847a58..4fa4206029e3 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -407,9 +407,19 @@ union irq_stack_union { | |||
| 407 | DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; | 407 | DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; |
| 408 | DECLARE_INIT_PER_CPU(irq_stack_union); | 408 | DECLARE_INIT_PER_CPU(irq_stack_union); |
| 409 | 409 | ||
| 410 | static inline unsigned long cpu_kernelmode_gs_base(int cpu) | ||
| 411 | { | ||
| 412 | return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); | ||
| 413 | } | ||
| 414 | |||
| 410 | DECLARE_PER_CPU(char *, irq_stack_ptr); | 415 | DECLARE_PER_CPU(char *, irq_stack_ptr); |
| 411 | DECLARE_PER_CPU(unsigned int, irq_count); | 416 | DECLARE_PER_CPU(unsigned int, irq_count); |
| 412 | extern asmlinkage void ignore_sysret(void); | 417 | extern asmlinkage void ignore_sysret(void); |
| 418 | |||
| 419 | #if IS_ENABLED(CONFIG_KVM) | ||
| 420 | /* Save actual FS/GS selectors and bases to current->thread */ | ||
| 421 | void save_fsgs_for_kvm(void); | ||
| 422 | #endif | ||
| 413 | #else /* X86_64 */ | 423 | #else /* X86_64 */ |
| 414 | #ifdef CONFIG_CC_STACKPROTECTOR | 424 | #ifdef CONFIG_CC_STACKPROTECTOR |
| 415 | /* | 425 | /* |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0487ac054870..93b462e48067 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 60 | u32 intercept_dr; | 60 | u32 intercept_dr; |
| 61 | u32 intercept_exceptions; | 61 | u32 intercept_exceptions; |
| 62 | u64 intercept; | 62 | u64 intercept; |
| 63 | u8 reserved_1[42]; | 63 | u8 reserved_1[40]; |
| 64 | u16 pause_filter_thresh; | ||
| 64 | u16 pause_filter_count; | 65 | u16 pause_filter_count; |
| 65 | u64 iopm_base_pa; | 66 | u64 iopm_base_pa; |
| 66 | u64 msrpm_base_pa; | 67 | u64 msrpm_base_pa; |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..c535c2fdea13 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
| @@ -354,8 +354,25 @@ struct kvm_xcrs { | |||
| 354 | __u64 padding[16]; | 354 | __u64 padding[16]; |
| 355 | }; | 355 | }; |
| 356 | 356 | ||
| 357 | /* definition of registers in kvm_run */ | 357 | #define KVM_SYNC_X86_REGS (1UL << 0) |
| 358 | #define KVM_SYNC_X86_SREGS (1UL << 1) | ||
| 359 | #define KVM_SYNC_X86_EVENTS (1UL << 2) | ||
| 360 | |||
| 361 | #define KVM_SYNC_X86_VALID_FIELDS \ | ||
| 362 | (KVM_SYNC_X86_REGS| \ | ||
| 363 | KVM_SYNC_X86_SREGS| \ | ||
| 364 | KVM_SYNC_X86_EVENTS) | ||
| 365 | |||
| 366 | /* kvm_sync_regs struct included by kvm_run struct */ | ||
| 358 | struct kvm_sync_regs { | 367 | struct kvm_sync_regs { |
| 368 | /* Members of this structure are potentially malicious. | ||
| 369 | * Care must be taken by code reading, esp. interpreting, | ||
| 370 | * data fields from them inside KVM to prevent TOCTOU and | ||
| 371 | * double-fetch types of vulnerabilities. | ||
| 372 | */ | ||
| 373 | struct kvm_regs regs; | ||
| 374 | struct kvm_sregs sregs; | ||
| 375 | struct kvm_vcpu_events events; | ||
| 359 | }; | 376 | }; |
| 360 | 377 | ||
| 361 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) | 378 | #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) |
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6cfa9c8cb7d6..4c851ebb3ceb 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h | |||
| @@ -3,15 +3,16 @@ | |||
| 3 | #define _UAPI_ASM_X86_KVM_PARA_H | 3 | #define _UAPI_ASM_X86_KVM_PARA_H |
| 4 | 4 | ||
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | #include <asm/hyperv.h> | ||
| 7 | 6 | ||
| 8 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It | 7 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It |
| 9 | * should be used to determine that a VM is running under KVM. | 8 | * should be used to determine that a VM is running under KVM. |
| 10 | */ | 9 | */ |
| 11 | #define KVM_CPUID_SIGNATURE 0x40000000 | 10 | #define KVM_CPUID_SIGNATURE 0x40000000 |
| 12 | 11 | ||
| 13 | /* This CPUID returns a feature bitmap in eax. Before enabling a particular | 12 | /* This CPUID returns two feature bitmaps in eax, edx. Before enabling |
| 14 | * paravirtualization, the appropriate feature bit should be checked. | 13 | * a particular paravirtualization, the appropriate feature bit should |
| 14 | * be checked in eax. The performance hint feature bit should be checked | ||
| 15 | * in edx. | ||
| 15 | */ | 16 | */ |
| 16 | #define KVM_CPUID_FEATURES 0x40000001 | 17 | #define KVM_CPUID_FEATURES 0x40000001 |
| 17 | #define KVM_FEATURE_CLOCKSOURCE 0 | 18 | #define KVM_FEATURE_CLOCKSOURCE 0 |
| @@ -28,6 +29,8 @@ | |||
| 28 | #define KVM_FEATURE_PV_TLB_FLUSH 9 | 29 | #define KVM_FEATURE_PV_TLB_FLUSH 9 |
| 29 | #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 | 30 | #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 |
| 30 | 31 | ||
| 32 | #define KVM_HINTS_DEDICATED 0 | ||
| 33 | |||
| 31 | /* The last 8 bits are used to indicate how to interpret the flags field | 34 | /* The last 8 bits are used to indicate how to interpret the flags field |
| 32 | * in pvclock structure. If no bits are set, all flags are ignored. | 35 | * in pvclock structure. If no bits are set, all flags are ignored. |
| 33 | */ | 36 | */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 348cf4821240..4702fbd98f92 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -487,7 +487,7 @@ void load_percpu_segment(int cpu) | |||
| 487 | loadsegment(fs, __KERNEL_PERCPU); | 487 | loadsegment(fs, __KERNEL_PERCPU); |
| 488 | #else | 488 | #else |
| 489 | __loadsegment_simple(gs, 0); | 489 | __loadsegment_simple(gs, 0); |
| 490 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | 490 | wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); |
| 491 | #endif | 491 | #endif |
| 492 | load_stack_canary_segment(); | 492 | load_stack_canary_segment(); |
| 493 | } | 493 | } |
| @@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid); | |||
| 1398 | #ifdef CONFIG_X86_64 | 1398 | #ifdef CONFIG_X86_64 |
| 1399 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1399 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
| 1400 | irq_stack_union) __aligned(PAGE_SIZE) __visible; | 1400 | irq_stack_union) __aligned(PAGE_SIZE) __visible; |
| 1401 | EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); | ||
| 1401 | 1402 | ||
| 1402 | /* | 1403 | /* |
| 1403 | * The following percpu variables are hot. Align current_task to | 1404 | * The following percpu variables are hot. Align current_task to |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4488cf0dd499..031082c96db8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <linux/kexec.h> | 22 | #include <linux/kexec.h> |
| 23 | #include <asm/processor.h> | 23 | #include <asm/processor.h> |
| 24 | #include <asm/hypervisor.h> | 24 | #include <asm/hypervisor.h> |
| 25 | #include <asm/hyperv.h> | 25 | #include <asm/hyperv-tlfs.h> |
| 26 | #include <asm/mshyperv.h> | 26 | #include <asm/mshyperv.h> |
| 27 | #include <asm/desc.h> | 27 | #include <asm/desc.h> |
| 28 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
| @@ -216,8 +216,8 @@ static void __init ms_hyperv_init_platform(void) | |||
| 216 | pr_info("Hyper-V: features 0x%x, hints 0x%x\n", | 216 | pr_info("Hyper-V: features 0x%x, hints 0x%x\n", |
| 217 | ms_hyperv.features, ms_hyperv.hints); | 217 | ms_hyperv.features, ms_hyperv.hints); |
| 218 | 218 | ||
| 219 | ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); | 219 | ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); |
| 220 | ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); | 220 | ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); |
| 221 | 221 | ||
| 222 | pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", | 222 | pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", |
| 223 | ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); | 223 | ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); |
| @@ -225,11 +225,12 @@ static void __init ms_hyperv_init_platform(void) | |||
| 225 | /* | 225 | /* |
| 226 | * Extract host information. | 226 | * Extract host information. |
| 227 | */ | 227 | */ |
| 228 | if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) { | 228 | if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >= |
| 229 | hv_host_info_eax = cpuid_eax(HVCPUID_VERSION); | 229 | HYPERV_CPUID_VERSION) { |
| 230 | hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION); | 230 | hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); |
| 231 | hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION); | 231 | hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); |
| 232 | hv_host_info_edx = cpuid_edx(HVCPUID_VERSION); | 232 | hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); |
| 233 | hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); | ||
| 233 | 234 | ||
| 234 | pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", | 235 | pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", |
| 235 | hv_host_info_eax, hv_host_info_ebx >> 16, | 236 | hv_host_info_eax, hv_host_info_ebx >> 16, |
| @@ -243,6 +244,11 @@ static void __init ms_hyperv_init_platform(void) | |||
| 243 | x86_platform.calibrate_cpu = hv_get_tsc_khz; | 244 | x86_platform.calibrate_cpu = hv_get_tsc_khz; |
| 244 | } | 245 | } |
| 245 | 246 | ||
| 247 | if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { | ||
| 248 | ms_hyperv.nested_features = | ||
| 249 | cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); | ||
| 250 | } | ||
| 251 | |||
| 246 | #ifdef CONFIG_X86_LOCAL_APIC | 252 | #ifdef CONFIG_X86_LOCAL_APIC |
| 247 | if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && | 253 | if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && |
| 248 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { | 254 | ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index fae86e36e399..7867417cfaff 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void) | |||
| 454 | } | 454 | } |
| 455 | 455 | ||
| 456 | #ifdef CONFIG_SMP | 456 | #ifdef CONFIG_SMP |
| 457 | static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) | ||
| 458 | { | ||
| 459 | native_smp_prepare_cpus(max_cpus); | ||
| 460 | if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) | ||
| 461 | static_branch_disable(&virt_spin_lock_key); | ||
| 462 | } | ||
| 463 | |||
| 457 | static void __init kvm_smp_prepare_boot_cpu(void) | 464 | static void __init kvm_smp_prepare_boot_cpu(void) |
| 458 | { | 465 | { |
| 459 | /* | 466 | /* |
| @@ -546,6 +553,7 @@ static void __init kvm_guest_init(void) | |||
| 546 | } | 553 | } |
| 547 | 554 | ||
| 548 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && | 555 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
| 556 | !kvm_para_has_hint(KVM_HINTS_DEDICATED) && | ||
| 549 | kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) | 557 | kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) |
| 550 | pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; | 558 | pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; |
| 551 | 559 | ||
| @@ -556,6 +564,7 @@ static void __init kvm_guest_init(void) | |||
| 556 | kvm_setup_vsyscall_timeinfo(); | 564 | kvm_setup_vsyscall_timeinfo(); |
| 557 | 565 | ||
| 558 | #ifdef CONFIG_SMP | 566 | #ifdef CONFIG_SMP |
| 567 | smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; | ||
| 559 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 568 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
| 560 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", | 569 | if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", |
| 561 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) | 570 | kvm_cpu_online, kvm_cpu_down_prepare) < 0) |
| @@ -605,6 +614,11 @@ unsigned int kvm_arch_para_features(void) | |||
| 605 | return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); | 614 | return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); |
| 606 | } | 615 | } |
| 607 | 616 | ||
| 617 | unsigned int kvm_arch_para_hints(void) | ||
| 618 | { | ||
| 619 | return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); | ||
| 620 | } | ||
| 621 | |||
| 608 | static uint32_t __init kvm_detect(void) | 622 | static uint32_t __init kvm_detect(void) |
| 609 | { | 623 | { |
| 610 | return kvm_cpuid_base(); | 624 | return kvm_cpuid_base(); |
| @@ -635,6 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) | |||
| 635 | int cpu; | 649 | int cpu; |
| 636 | 650 | ||
| 637 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && | 651 | if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && |
| 652 | !kvm_para_has_hint(KVM_HINTS_DEDICATED) && | ||
| 638 | kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { | 653 | kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { |
| 639 | for_each_possible_cpu(cpu) { | 654 | for_each_possible_cpu(cpu) { |
| 640 | zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), | 655 | zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), |
| @@ -730,6 +745,9 @@ void __init kvm_spinlock_init(void) | |||
| 730 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) | 745 | if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) |
| 731 | return; | 746 | return; |
| 732 | 747 | ||
| 748 | if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) | ||
| 749 | return; | ||
| 750 | |||
| 733 | __pv_init_lock_hash(); | 751 | __pv_init_lock_hash(); |
| 734 | pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; | 752 | pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; |
| 735 | pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); | 753 | pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9eb448c7859d..4b100fe0f508 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task) | |||
| 205 | save_base_legacy(task, task->thread.gsindex, GS); | 205 | save_base_legacy(task, task->thread.gsindex, GS); |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | #if IS_ENABLED(CONFIG_KVM) | ||
| 209 | /* | ||
| 210 | * While a process is running,current->thread.fsbase and current->thread.gsbase | ||
| 211 | * may not match the corresponding CPU registers (see save_base_legacy()). KVM | ||
| 212 | * wants an efficient way to save and restore FSBASE and GSBASE. | ||
| 213 | * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. | ||
| 214 | */ | ||
| 215 | void save_fsgs_for_kvm(void) | ||
| 216 | { | ||
| 217 | save_fsgs(current); | ||
| 218 | } | ||
| 219 | EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); | ||
| 220 | #endif | ||
| 221 | |||
| 208 | static __always_inline void loadseg(enum which_selector which, | 222 | static __always_inline void loadseg(enum which_selector which, |
| 209 | unsigned short sel) | 223 | unsigned short sel) |
| 210 | { | 224 | { |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b671fc2d0422..82055b90a8b3 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
| 135 | return -EINVAL; | 135 | return -EINVAL; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); | ||
| 139 | if (kvm_hlt_in_guest(vcpu->kvm) && best && | ||
| 140 | (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) | ||
| 141 | best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); | ||
| 142 | |||
| 138 | /* Update physical-address width */ | 143 | /* Update physical-address width */ |
| 139 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); | 144 | vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |
| 140 | kvm_mmu_reset_context(vcpu); | 145 | kvm_mmu_reset_context(vcpu); |
| @@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 370 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | | 375 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
| 371 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | | 376 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
| 372 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | | 377 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | |
| 373 | F(TOPOEXT); | 378 | F(TOPOEXT) | F(PERFCTR_CORE); |
| 374 | 379 | ||
| 375 | /* cpuid 0x80000008.ebx */ | 380 | /* cpuid 0x80000008.ebx */ |
| 376 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = | 381 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d91eaeb01034..b3705ae52824 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "x86.h" | 30 | #include "x86.h" |
| 31 | #include "tss.h" | 31 | #include "tss.h" |
| 32 | #include "mmu.h" | 32 | #include "mmu.h" |
| 33 | #include "pmu.h" | ||
| 33 | 34 | ||
| 34 | /* | 35 | /* |
| 35 | * Operand types | 36 | * Operand types |
| @@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | |||
| 2887 | return ctxt->ops->cpl(ctxt) > iopl; | 2888 | return ctxt->ops->cpl(ctxt) > iopl; |
| 2888 | } | 2889 | } |
| 2889 | 2890 | ||
| 2891 | #define VMWARE_PORT_VMPORT (0x5658) | ||
| 2892 | #define VMWARE_PORT_VMRPC (0x5659) | ||
| 2893 | |||
| 2890 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | 2894 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, |
| 2891 | u16 port, u16 len) | 2895 | u16 port, u16 len) |
| 2892 | { | 2896 | { |
| @@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
| 2898 | unsigned mask = (1 << len) - 1; | 2902 | unsigned mask = (1 << len) - 1; |
| 2899 | unsigned long base; | 2903 | unsigned long base; |
| 2900 | 2904 | ||
| 2905 | /* | ||
| 2906 | * VMware allows access to these ports even if denied | ||
| 2907 | * by TSS I/O permission bitmap. Mimic behavior. | ||
| 2908 | */ | ||
| 2909 | if (enable_vmware_backdoor && | ||
| 2910 | ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC))) | ||
| 2911 | return true; | ||
| 2912 | |||
| 2901 | ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); | 2913 | ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); |
| 2902 | if (!tr_seg.p) | 2914 | if (!tr_seg.p) |
| 2903 | return false; | 2915 | return false; |
| @@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
| 4282 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); | 4294 | u64 cr4 = ctxt->ops->get_cr(ctxt, 4); |
| 4283 | u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); | 4295 | u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); |
| 4284 | 4296 | ||
| 4297 | /* | ||
| 4298 | * VMware allows access to these Pseduo-PMCs even when read via RDPMC | ||
| 4299 | * in Ring3 when CR4.PCE=0. | ||
| 4300 | */ | ||
| 4301 | if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx)) | ||
| 4302 | return X86EMUL_CONTINUE; | ||
| 4303 | |||
| 4285 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | 4304 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || |
| 4286 | ctxt->ops->check_pmc(ctxt, rcx)) | 4305 | ctxt->ops->check_pmc(ctxt, rcx)) |
| 4287 | return emulate_gp(ctxt, 0); | 4306 | return emulate_gp(ctxt, 0); |
| @@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = { | |||
| 4498 | ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, | 4517 | ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, |
| 4499 | }; | 4518 | }; |
| 4500 | 4519 | ||
| 4520 | static const struct gprefix pfx_0f_10_0f_11 = { | ||
| 4521 | I(Unaligned, em_mov), I(Unaligned, em_mov), N, N, | ||
| 4522 | }; | ||
| 4523 | |||
| 4501 | static const struct gprefix pfx_0f_28_0f_29 = { | 4524 | static const struct gprefix pfx_0f_28_0f_29 = { |
| 4502 | I(Aligned, em_mov), I(Aligned, em_mov), N, N, | 4525 | I(Aligned, em_mov), I(Aligned, em_mov), N, N, |
| 4503 | }; | 4526 | }; |
| @@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = { | |||
| 4709 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 4732 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
| 4710 | N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, | 4733 | N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, |
| 4711 | /* 0x10 - 0x1F */ | 4734 | /* 0x10 - 0x1F */ |
| 4712 | N, N, N, N, N, N, N, N, | 4735 | GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), |
| 4736 | GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), | ||
| 4737 | N, N, N, N, N, N, | ||
| 4713 | D(ImplicitOps | ModRM | SrcMem | NoAccess), | 4738 | D(ImplicitOps | ModRM | SrcMem | NoAccess), |
| 4714 | N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), | 4739 | N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), |
| 4715 | /* 0x20 - 0x2F */ | 4740 | /* 0x20 - 0x2F */ |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index dc97f2544b6f..98618e397342 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/kvm_host.h> | 29 | #include <linux/kvm_host.h> |
| 30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
| 31 | #include <linux/sched/cputime.h> | 31 | #include <linux/sched/cputime.h> |
| 32 | #include <linux/eventfd.h> | ||
| 32 | 33 | ||
| 33 | #include <asm/apicdef.h> | 34 | #include <asm/apicdef.h> |
| 34 | #include <trace/events/kvm.h> | 35 | #include <trace/events/kvm.h> |
| @@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, | |||
| 74 | return false; | 75 | return false; |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 78 | static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, | ||
| 79 | int vector) | ||
| 80 | { | ||
| 81 | if (vector < HV_SYNIC_FIRST_VALID_VECTOR) | ||
| 82 | return; | ||
| 83 | |||
| 84 | if (synic_has_vector_connected(synic, vector)) | ||
| 85 | __set_bit(vector, synic->vec_bitmap); | ||
| 86 | else | ||
| 87 | __clear_bit(vector, synic->vec_bitmap); | ||
| 88 | |||
| 89 | if (synic_has_vector_auto_eoi(synic, vector)) | ||
| 90 | __set_bit(vector, synic->auto_eoi_bitmap); | ||
| 91 | else | ||
| 92 | __clear_bit(vector, synic->auto_eoi_bitmap); | ||
| 93 | } | ||
| 94 | |||
| 77 | static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, | 95 | static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, |
| 78 | u64 data, bool host) | 96 | u64 data, bool host) |
| 79 | { | 97 | { |
| 80 | int vector; | 98 | int vector, old_vector; |
| 99 | bool masked; | ||
| 81 | 100 | ||
| 82 | vector = data & HV_SYNIC_SINT_VECTOR_MASK; | 101 | vector = data & HV_SYNIC_SINT_VECTOR_MASK; |
| 83 | if (vector < 16 && !host) | 102 | masked = data & HV_SYNIC_SINT_MASKED; |
| 103 | |||
| 104 | /* | ||
| 105 | * Valid vectors are 16-255, however, nested Hyper-V attempts to write | ||
| 106 | * default '0x10000' value on boot and this should not #GP. We need to | ||
| 107 | * allow zero-initing the register from host as well. | ||
| 108 | */ | ||
| 109 | if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) | ||
| 84 | return 1; | 110 | return 1; |
| 85 | /* | 111 | /* |
| 86 | * Guest may configure multiple SINTs to use the same vector, so | 112 | * Guest may configure multiple SINTs to use the same vector, so |
| @@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, | |||
| 88 | * bitmap of vectors with auto-eoi behavior. The bitmaps are | 114 | * bitmap of vectors with auto-eoi behavior. The bitmaps are |
| 89 | * updated here, and atomically queried on fast paths. | 115 | * updated here, and atomically queried on fast paths. |
| 90 | */ | 116 | */ |
| 117 | old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; | ||
| 91 | 118 | ||
| 92 | atomic64_set(&synic->sint[sint], data); | 119 | atomic64_set(&synic->sint[sint], data); |
| 93 | 120 | ||
| 94 | if (synic_has_vector_connected(synic, vector)) | 121 | synic_update_vector(synic, old_vector); |
| 95 | __set_bit(vector, synic->vec_bitmap); | ||
| 96 | else | ||
| 97 | __clear_bit(vector, synic->vec_bitmap); | ||
| 98 | 122 | ||
| 99 | if (synic_has_vector_auto_eoi(synic, vector)) | 123 | synic_update_vector(synic, vector); |
| 100 | __set_bit(vector, synic->auto_eoi_bitmap); | ||
| 101 | else | ||
| 102 | __clear_bit(vector, synic->auto_eoi_bitmap); | ||
| 103 | 124 | ||
| 104 | /* Load SynIC vectors into EOI exit bitmap */ | 125 | /* Load SynIC vectors into EOI exit bitmap */ |
| 105 | kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); | 126 | kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); |
| @@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
| 736 | case HV_X64_MSR_CRASH_CTL: | 757 | case HV_X64_MSR_CRASH_CTL: |
| 737 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 758 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
| 738 | case HV_X64_MSR_RESET: | 759 | case HV_X64_MSR_RESET: |
| 760 | case HV_X64_MSR_REENLIGHTENMENT_CONTROL: | ||
| 761 | case HV_X64_MSR_TSC_EMULATION_CONTROL: | ||
| 762 | case HV_X64_MSR_TSC_EMULATION_STATUS: | ||
| 739 | r = true; | 763 | r = true; |
| 740 | break; | 764 | break; |
| 741 | } | 765 | } |
| @@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, | |||
| 981 | kvm_make_request(KVM_REQ_HV_RESET, vcpu); | 1005 | kvm_make_request(KVM_REQ_HV_RESET, vcpu); |
| 982 | } | 1006 | } |
| 983 | break; | 1007 | break; |
| 1008 | case HV_X64_MSR_REENLIGHTENMENT_CONTROL: | ||
| 1009 | hv->hv_reenlightenment_control = data; | ||
| 1010 | break; | ||
| 1011 | case HV_X64_MSR_TSC_EMULATION_CONTROL: | ||
| 1012 | hv->hv_tsc_emulation_control = data; | ||
| 1013 | break; | ||
| 1014 | case HV_X64_MSR_TSC_EMULATION_STATUS: | ||
| 1015 | hv->hv_tsc_emulation_status = data; | ||
| 1016 | break; | ||
| 984 | default: | 1017 | default: |
| 985 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", | 1018 | vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", |
| 986 | msr, data); | 1019 | msr, data); |
| @@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) | |||
| 1009 | return 1; | 1042 | return 1; |
| 1010 | hv->vp_index = (u32)data; | 1043 | hv->vp_index = (u32)data; |
| 1011 | break; | 1044 | break; |
| 1012 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | 1045 | case HV_X64_MSR_VP_ASSIST_PAGE: { |
| 1013 | u64 gfn; | 1046 | u64 gfn; |
| 1014 | unsigned long addr; | 1047 | unsigned long addr; |
| 1015 | 1048 | ||
| 1016 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | 1049 | if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { |
| 1017 | hv->hv_vapic = data; | 1050 | hv->hv_vapic = data; |
| 1018 | if (kvm_lapic_enable_pv_eoi(vcpu, 0)) | 1051 | if (kvm_lapic_enable_pv_eoi(vcpu, 0)) |
| 1019 | return 1; | 1052 | return 1; |
| 1020 | break; | 1053 | break; |
| 1021 | } | 1054 | } |
| 1022 | gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; | 1055 | gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; |
| 1023 | addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); | 1056 | addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); |
| 1024 | if (kvm_is_error_hva(addr)) | 1057 | if (kvm_is_error_hva(addr)) |
| 1025 | return 1; | 1058 | return 1; |
| @@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1105 | case HV_X64_MSR_RESET: | 1138 | case HV_X64_MSR_RESET: |
| 1106 | data = 0; | 1139 | data = 0; |
| 1107 | break; | 1140 | break; |
| 1141 | case HV_X64_MSR_REENLIGHTENMENT_CONTROL: | ||
| 1142 | data = hv->hv_reenlightenment_control; | ||
| 1143 | break; | ||
| 1144 | case HV_X64_MSR_TSC_EMULATION_CONTROL: | ||
| 1145 | data = hv->hv_tsc_emulation_control; | ||
| 1146 | break; | ||
| 1147 | case HV_X64_MSR_TSC_EMULATION_STATUS: | ||
| 1148 | data = hv->hv_tsc_emulation_status; | ||
| 1149 | break; | ||
| 1108 | default: | 1150 | default: |
| 1109 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 1151 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
| 1110 | return 1; | 1152 | return 1; |
| @@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 1129 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | 1171 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); |
| 1130 | case HV_X64_MSR_TPR: | 1172 | case HV_X64_MSR_TPR: |
| 1131 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | 1173 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); |
| 1132 | case HV_X64_MSR_APIC_ASSIST_PAGE: | 1174 | case HV_X64_MSR_VP_ASSIST_PAGE: |
| 1133 | data = hv->hv_vapic; | 1175 | data = hv->hv_vapic; |
| 1134 | break; | 1176 | break; |
| 1135 | case HV_X64_MSR_VP_RUNTIME: | 1177 | case HV_X64_MSR_VP_RUNTIME: |
| @@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) | |||
| 1226 | return 1; | 1268 | return 1; |
| 1227 | } | 1269 | } |
| 1228 | 1270 | ||
| 1271 | static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) | ||
| 1272 | { | ||
| 1273 | struct eventfd_ctx *eventfd; | ||
| 1274 | |||
| 1275 | if (unlikely(!fast)) { | ||
| 1276 | int ret; | ||
| 1277 | gpa_t gpa = param; | ||
| 1278 | |||
| 1279 | if ((gpa & (__alignof__(param) - 1)) || | ||
| 1280 | offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) | ||
| 1281 | return HV_STATUS_INVALID_ALIGNMENT; | ||
| 1282 | |||
| 1283 | ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); | ||
| 1284 | if (ret < 0) | ||
| 1285 | return HV_STATUS_INVALID_ALIGNMENT; | ||
| 1286 | } | ||
| 1287 | |||
| 1288 | /* | ||
| 1289 | * Per spec, bits 32-47 contain the extra "flag number". However, we | ||
| 1290 | * have no use for it, and in all known usecases it is zero, so just | ||
| 1291 | * report lookup failure if it isn't. | ||
| 1292 | */ | ||
| 1293 | if (param & 0xffff00000000ULL) | ||
| 1294 | return HV_STATUS_INVALID_PORT_ID; | ||
| 1295 | /* remaining bits are reserved-zero */ | ||
| 1296 | if (param & ~KVM_HYPERV_CONN_ID_MASK) | ||
| 1297 | return HV_STATUS_INVALID_HYPERCALL_INPUT; | ||
| 1298 | |||
| 1299 | /* conn_to_evt is protected by vcpu->kvm->srcu */ | ||
| 1300 | eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); | ||
| 1301 | if (!eventfd) | ||
| 1302 | return HV_STATUS_INVALID_PORT_ID; | ||
| 1303 | |||
| 1304 | eventfd_signal(eventfd, 1); | ||
| 1305 | return HV_STATUS_SUCCESS; | ||
| 1306 | } | ||
| 1307 | |||
| 1229 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | 1308 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) |
| 1230 | { | 1309 | { |
| 1231 | u64 param, ingpa, outgpa, ret; | 1310 | u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; |
| 1232 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | 1311 | uint16_t code, rep_idx, rep_cnt; |
| 1233 | bool fast, longmode; | 1312 | bool fast, longmode; |
| 1234 | 1313 | ||
| 1235 | /* | 1314 | /* |
| @@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
| 1268 | 1347 | ||
| 1269 | /* Hypercall continuation is not supported yet */ | 1348 | /* Hypercall continuation is not supported yet */ |
| 1270 | if (rep_cnt || rep_idx) { | 1349 | if (rep_cnt || rep_idx) { |
| 1271 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | 1350 | ret = HV_STATUS_INVALID_HYPERCALL_CODE; |
| 1272 | goto set_result; | 1351 | goto set_result; |
| 1273 | } | 1352 | } |
| 1274 | 1353 | ||
| @@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
| 1276 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: | 1355 | case HVCALL_NOTIFY_LONG_SPIN_WAIT: |
| 1277 | kvm_vcpu_on_spin(vcpu, true); | 1356 | kvm_vcpu_on_spin(vcpu, true); |
| 1278 | break; | 1357 | break; |
| 1279 | case HVCALL_POST_MESSAGE: | ||
| 1280 | case HVCALL_SIGNAL_EVENT: | 1358 | case HVCALL_SIGNAL_EVENT: |
| 1359 | ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); | ||
| 1360 | if (ret != HV_STATUS_INVALID_PORT_ID) | ||
| 1361 | break; | ||
| 1362 | /* maybe userspace knows this conn_id: fall through */ | ||
| 1363 | case HVCALL_POST_MESSAGE: | ||
| 1281 | /* don't bother userspace if it has no way to handle it */ | 1364 | /* don't bother userspace if it has no way to handle it */ |
| 1282 | if (!vcpu_to_synic(vcpu)->active) { | 1365 | if (!vcpu_to_synic(vcpu)->active) { |
| 1283 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | 1366 | ret = HV_STATUS_INVALID_HYPERCALL_CODE; |
| 1284 | break; | 1367 | break; |
| 1285 | } | 1368 | } |
| 1286 | vcpu->run->exit_reason = KVM_EXIT_HYPERV; | 1369 | vcpu->run->exit_reason = KVM_EXIT_HYPERV; |
| @@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
| 1292 | kvm_hv_hypercall_complete_userspace; | 1375 | kvm_hv_hypercall_complete_userspace; |
| 1293 | return 0; | 1376 | return 0; |
| 1294 | default: | 1377 | default: |
| 1295 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | 1378 | ret = HV_STATUS_INVALID_HYPERCALL_CODE; |
| 1296 | break; | 1379 | break; |
| 1297 | } | 1380 | } |
| 1298 | 1381 | ||
| 1299 | set_result: | 1382 | set_result: |
| 1300 | ret = res | (((u64)rep_done & 0xfff) << 32); | ||
| 1301 | kvm_hv_hypercall_set_result(vcpu, ret); | 1383 | kvm_hv_hypercall_set_result(vcpu, ret); |
| 1302 | return 1; | 1384 | return 1; |
| 1303 | } | 1385 | } |
| 1386 | |||
| 1387 | void kvm_hv_init_vm(struct kvm *kvm) | ||
| 1388 | { | ||
| 1389 | mutex_init(&kvm->arch.hyperv.hv_lock); | ||
| 1390 | idr_init(&kvm->arch.hyperv.conn_to_evt); | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | void kvm_hv_destroy_vm(struct kvm *kvm) | ||
| 1394 | { | ||
| 1395 | struct eventfd_ctx *eventfd; | ||
| 1396 | int i; | ||
| 1397 | |||
| 1398 | idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) | ||
| 1399 | eventfd_ctx_put(eventfd); | ||
| 1400 | idr_destroy(&kvm->arch.hyperv.conn_to_evt); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) | ||
| 1404 | { | ||
| 1405 | struct kvm_hv *hv = &kvm->arch.hyperv; | ||
| 1406 | struct eventfd_ctx *eventfd; | ||
| 1407 | int ret; | ||
| 1408 | |||
| 1409 | eventfd = eventfd_ctx_fdget(fd); | ||
| 1410 | if (IS_ERR(eventfd)) | ||
| 1411 | return PTR_ERR(eventfd); | ||
| 1412 | |||
| 1413 | mutex_lock(&hv->hv_lock); | ||
| 1414 | ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, | ||
| 1415 | GFP_KERNEL); | ||
| 1416 | mutex_unlock(&hv->hv_lock); | ||
| 1417 | |||
| 1418 | if (ret >= 0) | ||
| 1419 | return 0; | ||
| 1420 | |||
| 1421 | if (ret == -ENOSPC) | ||
| 1422 | ret = -EEXIST; | ||
| 1423 | eventfd_ctx_put(eventfd); | ||
| 1424 | return ret; | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) | ||
| 1428 | { | ||
| 1429 | struct kvm_hv *hv = &kvm->arch.hyperv; | ||
| 1430 | struct eventfd_ctx *eventfd; | ||
| 1431 | |||
| 1432 | mutex_lock(&hv->hv_lock); | ||
| 1433 | eventfd = idr_remove(&hv->conn_to_evt, conn_id); | ||
| 1434 | mutex_unlock(&hv->hv_lock); | ||
| 1435 | |||
| 1436 | if (!eventfd) | ||
| 1437 | return -ENOENT; | ||
| 1438 | |||
| 1439 | synchronize_srcu(&kvm->srcu); | ||
| 1440 | eventfd_ctx_put(eventfd); | ||
| 1441 | return 0; | ||
| 1442 | } | ||
| 1443 | |||
| 1444 | int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) | ||
| 1445 | { | ||
| 1446 | if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || | ||
| 1447 | (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) | ||
| 1448 | return -EINVAL; | ||
| 1449 | |||
| 1450 | if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) | ||
| 1451 | return kvm_hv_eventfd_deassign(kvm, args->conn_id); | ||
| 1452 | return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); | ||
| 1453 | } | ||
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e637631a9574..837465d69c6d 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h | |||
| @@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); | |||
| 88 | void kvm_hv_setup_tsc_page(struct kvm *kvm, | 88 | void kvm_hv_setup_tsc_page(struct kvm *kvm, |
| 89 | struct pvclock_vcpu_time_info *hv_clock); | 89 | struct pvclock_vcpu_time_info *hv_clock); |
| 90 | 90 | ||
| 91 | void kvm_hv_init_vm(struct kvm *kvm); | ||
| 92 | void kvm_hv_destroy_vm(struct kvm *kvm); | ||
| 93 | int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); | ||
| 94 | |||
| 91 | #endif | 95 | #endif |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index f171051eecf3..faa264822cee 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) | |||
| 73 | */ | 73 | */ |
| 74 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | 74 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) |
| 75 | { | 75 | { |
| 76 | /* | ||
| 77 | * FIXME: interrupt.injected represents an interrupt that it's | ||
| 78 | * side-effects have already been applied (e.g. bit from IRR | ||
| 79 | * already moved to ISR). Therefore, it is incorrect to rely | ||
| 80 | * on interrupt.injected to know if there is a pending | ||
| 81 | * interrupt in the user-mode LAPIC. | ||
| 82 | * This leads to nVMX/nSVM not be able to distinguish | ||
| 83 | * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on | ||
| 84 | * pending interrupt or should re-inject an injected | ||
| 85 | * interrupt. | ||
| 86 | */ | ||
| 76 | if (!lapic_in_kernel(v)) | 87 | if (!lapic_in_kernel(v)) |
| 77 | return v->arch.interrupt.pending; | 88 | return v->arch.interrupt.injected; |
| 78 | 89 | ||
| 79 | if (kvm_cpu_has_extint(v)) | 90 | if (kvm_cpu_has_extint(v)) |
| 80 | return 1; | 91 | return 1; |
| @@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) | |||
| 91 | */ | 102 | */ |
| 92 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | 103 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v) |
| 93 | { | 104 | { |
| 105 | /* | ||
| 106 | * FIXME: interrupt.injected represents an interrupt that it's | ||
| 107 | * side-effects have already been applied (e.g. bit from IRR | ||
| 108 | * already moved to ISR). Therefore, it is incorrect to rely | ||
| 109 | * on interrupt.injected to know if there is a pending | ||
| 110 | * interrupt in the user-mode LAPIC. | ||
| 111 | * This leads to nVMX/nSVM not be able to distinguish | ||
| 112 | * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on | ||
| 113 | * pending interrupt or should re-inject an injected | ||
| 114 | * interrupt. | ||
| 115 | */ | ||
| 94 | if (!lapic_in_kernel(v)) | 116 | if (!lapic_in_kernel(v)) |
| 95 | return v->arch.interrupt.pending; | 117 | return v->arch.interrupt.injected; |
| 96 | 118 | ||
| 97 | if (kvm_cpu_has_extint(v)) | 119 | if (kvm_cpu_has_extint(v)) |
| 98 | return 1; | 120 | return 1; |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f500293dad8d..9619dcc2b325 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
| 41 | 41 | ||
| 42 | if (!test_bit(VCPU_EXREG_PDPTR, | 42 | if (!test_bit(VCPU_EXREG_PDPTR, |
| 43 | (unsigned long *)&vcpu->arch.regs_avail)) | 43 | (unsigned long *)&vcpu->arch.regs_avail)) |
| 44 | kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); | 44 | kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR); |
| 45 | 45 | ||
| 46 | return vcpu->arch.walk_mmu->pdptrs[index]; | 46 | return vcpu->arch.walk_mmu->pdptrs[index]; |
| 47 | } | 47 | } |
| @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) | |||
| 93 | static inline void leave_guest_mode(struct kvm_vcpu *vcpu) | 93 | static inline void leave_guest_mode(struct kvm_vcpu *vcpu) |
| 94 | { | 94 | { |
| 95 | vcpu->arch.hflags &= ~HF_GUEST_MASK; | 95 | vcpu->arch.hflags &= ~HF_GUEST_MASK; |
| 96 | |||
| 97 | if (vcpu->arch.load_eoi_exitmap_pending) { | ||
| 98 | vcpu->arch.load_eoi_exitmap_pending = false; | ||
| 99 | kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); | ||
| 100 | } | ||
| 96 | } | 101 | } |
| 97 | 102 | ||
| 98 | static inline bool is_guest_mode(struct kvm_vcpu *vcpu) | 103 | static inline bool is_guest_mode(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 391dda8d43b7..70dcb5548022 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) | |||
| 321 | if (!lapic_in_kernel(vcpu)) | 321 | if (!lapic_in_kernel(vcpu)) |
| 322 | return; | 322 | return; |
| 323 | 323 | ||
| 324 | /* | ||
| 325 | * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) | ||
| 326 | * which doesn't have EOI register; Some buggy OSes (e.g. Windows with | ||
| 327 | * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC | ||
| 328 | * version first and level-triggered interrupts never get EOIed in | ||
| 329 | * IOAPIC. | ||
| 330 | */ | ||
| 324 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); | 331 | feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); |
| 325 | if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) | 332 | if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && |
| 333 | !ioapic_in_kernel(vcpu->kvm)) | ||
| 326 | v |= APIC_LVR_DIRECTED_EOI; | 334 | v |= APIC_LVR_DIRECTED_EOI; |
| 327 | kvm_lapic_set_reg(apic, APIC_LVR, v); | 335 | kvm_lapic_set_reg(apic, APIC_LVR, v); |
| 328 | } | 336 | } |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 56c36014f7b7..edce055e9fd7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | |||
| 109 | 109 | ||
| 110 | static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | 110 | static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) |
| 111 | { | 111 | { |
| 112 | return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | 112 | return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; |
| 113 | } | 113 | } |
| 114 | 114 | ||
| 115 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); | 115 | int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 763bb3bade63..8494dbae41b9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -3031,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) | |||
| 3031 | return RET_PF_RETRY; | 3031 | return RET_PF_RETRY; |
| 3032 | } | 3032 | } |
| 3033 | 3033 | ||
| 3034 | return RET_PF_EMULATE; | 3034 | return -EFAULT; |
| 3035 | } | 3035 | } |
| 3036 | 3036 | ||
| 3037 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | 3037 | static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5abae72266b7..6288e9d7068e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -452,14 +452,21 @@ error: | |||
| 452 | * done by is_rsvd_bits_set() above. | 452 | * done by is_rsvd_bits_set() above. |
| 453 | * | 453 | * |
| 454 | * We set up the value of exit_qualification to inject: | 454 | * We set up the value of exit_qualification to inject: |
| 455 | * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation | 455 | * [2:0] - Derive from the access bits. The exit_qualification might be |
| 456 | * out of date if it is serving an EPT misconfiguration. | ||
| 456 | * [5:3] - Calculated by the page walk of the guest EPT page tables | 457 | * [5:3] - Calculated by the page walk of the guest EPT page tables |
| 457 | * [7:8] - Derived from [7:8] of real exit_qualification | 458 | * [7:8] - Derived from [7:8] of real exit_qualification |
| 458 | * | 459 | * |
| 459 | * The other bits are set to 0. | 460 | * The other bits are set to 0. |
| 460 | */ | 461 | */ |
| 461 | if (!(errcode & PFERR_RSVD_MASK)) { | 462 | if (!(errcode & PFERR_RSVD_MASK)) { |
| 462 | vcpu->arch.exit_qualification &= 0x187; | 463 | vcpu->arch.exit_qualification &= 0x180; |
| 464 | if (write_fault) | ||
| 465 | vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; | ||
| 466 | if (user_fault) | ||
| 467 | vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; | ||
| 468 | if (fetch_fault) | ||
| 469 | vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; | ||
| 463 | vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; | 470 | vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; |
| 464 | } | 471 | } |
| 465 | #endif | 472 | #endif |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 026db42a86c3..58ead7db71a3 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
| @@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) | |||
| 244 | return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); | 244 | return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | bool is_vmware_backdoor_pmc(u32 pmc_idx) | ||
| 248 | { | ||
| 249 | switch (pmc_idx) { | ||
| 250 | case VMWARE_BACKDOOR_PMC_HOST_TSC: | ||
| 251 | case VMWARE_BACKDOOR_PMC_REAL_TIME: | ||
| 252 | case VMWARE_BACKDOOR_PMC_APPARENT_TIME: | ||
| 253 | return true; | ||
| 254 | } | ||
| 255 | return false; | ||
| 256 | } | ||
| 257 | |||
| 258 | static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) | ||
| 259 | { | ||
| 260 | u64 ctr_val; | ||
| 261 | |||
| 262 | switch (idx) { | ||
| 263 | case VMWARE_BACKDOOR_PMC_HOST_TSC: | ||
| 264 | ctr_val = rdtsc(); | ||
| 265 | break; | ||
| 266 | case VMWARE_BACKDOOR_PMC_REAL_TIME: | ||
| 267 | ctr_val = ktime_get_boot_ns(); | ||
| 268 | break; | ||
| 269 | case VMWARE_BACKDOOR_PMC_APPARENT_TIME: | ||
| 270 | ctr_val = ktime_get_boot_ns() + | ||
| 271 | vcpu->kvm->arch.kvmclock_offset; | ||
| 272 | break; | ||
| 273 | default: | ||
| 274 | return 1; | ||
| 275 | } | ||
| 276 | |||
| 277 | *data = ctr_val; | ||
| 278 | return 0; | ||
| 279 | } | ||
| 280 | |||
| 247 | int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) | 281 | int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) |
| 248 | { | 282 | { |
| 249 | bool fast_mode = idx & (1u << 31); | 283 | bool fast_mode = idx & (1u << 31); |
| 250 | struct kvm_pmc *pmc; | 284 | struct kvm_pmc *pmc; |
| 251 | u64 ctr_val; | 285 | u64 ctr_val; |
| 252 | 286 | ||
| 287 | if (is_vmware_backdoor_pmc(idx)) | ||
| 288 | return kvm_pmu_rdpmc_vmware(vcpu, idx, data); | ||
| 289 | |||
| 253 | pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); | 290 | pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); |
| 254 | if (!pmc) | 291 | if (!pmc) |
| 255 | return 1; | 292 | return 1; |
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a9a62b9a73e2..ba8898e1a854 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h | |||
| @@ -9,6 +9,10 @@ | |||
| 9 | /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ | 9 | /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ |
| 10 | #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) | 10 | #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) |
| 11 | 11 | ||
| 12 | #define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000 | ||
| 13 | #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001 | ||
| 14 | #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002 | ||
| 15 | |||
| 12 | struct kvm_event_hw_type_mapping { | 16 | struct kvm_event_hw_type_mapping { |
| 13 | u8 eventsel; | 17 | u8 eventsel; |
| 14 | u8 unit_mask; | 18 | u8 unit_mask; |
| @@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); | |||
| 114 | void kvm_pmu_init(struct kvm_vcpu *vcpu); | 118 | void kvm_pmu_init(struct kvm_vcpu *vcpu); |
| 115 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); | 119 | void kvm_pmu_destroy(struct kvm_vcpu *vcpu); |
| 116 | 120 | ||
| 121 | bool is_vmware_backdoor_pmc(u32 pmc_idx); | ||
| 122 | |||
| 117 | extern struct kvm_pmu_ops intel_pmu_ops; | 123 | extern struct kvm_pmu_ops intel_pmu_ops; |
| 118 | extern struct kvm_pmu_ops amd_pmu_ops; | 124 | extern struct kvm_pmu_ops amd_pmu_ops; |
| 119 | #endif /* __KVM_X86_PMU_H */ | 125 | #endif /* __KVM_X86_PMU_H */ |
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index cd944435dfbd..1495a735b38e 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c | |||
| @@ -19,6 +19,21 @@ | |||
| 19 | #include "lapic.h" | 19 | #include "lapic.h" |
| 20 | #include "pmu.h" | 20 | #include "pmu.h" |
| 21 | 21 | ||
| 22 | enum pmu_type { | ||
| 23 | PMU_TYPE_COUNTER = 0, | ||
| 24 | PMU_TYPE_EVNTSEL, | ||
| 25 | }; | ||
| 26 | |||
| 27 | enum index { | ||
| 28 | INDEX_ZERO = 0, | ||
| 29 | INDEX_ONE, | ||
| 30 | INDEX_TWO, | ||
| 31 | INDEX_THREE, | ||
| 32 | INDEX_FOUR, | ||
| 33 | INDEX_FIVE, | ||
| 34 | INDEX_ERROR, | ||
| 35 | }; | ||
| 36 | |||
| 22 | /* duplicated from amd_perfmon_event_map, K7 and above should work. */ | 37 | /* duplicated from amd_perfmon_event_map, K7 and above should work. */ |
| 23 | static struct kvm_event_hw_type_mapping amd_event_mapping[] = { | 38 | static struct kvm_event_hw_type_mapping amd_event_mapping[] = { |
| 24 | [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, | 39 | [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, |
| @@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { | |||
| 31 | [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | 46 | [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, |
| 32 | }; | 47 | }; |
| 33 | 48 | ||
| 49 | static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) | ||
| 50 | { | ||
| 51 | struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); | ||
| 52 | |||
| 53 | if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { | ||
| 54 | if (type == PMU_TYPE_COUNTER) | ||
| 55 | return MSR_F15H_PERF_CTR; | ||
| 56 | else | ||
| 57 | return MSR_F15H_PERF_CTL; | ||
| 58 | } else { | ||
| 59 | if (type == PMU_TYPE_COUNTER) | ||
| 60 | return MSR_K7_PERFCTR0; | ||
| 61 | else | ||
| 62 | return MSR_K7_EVNTSEL0; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | static enum index msr_to_index(u32 msr) | ||
| 67 | { | ||
| 68 | switch (msr) { | ||
| 69 | case MSR_F15H_PERF_CTL0: | ||
| 70 | case MSR_F15H_PERF_CTR0: | ||
| 71 | case MSR_K7_EVNTSEL0: | ||
| 72 | case MSR_K7_PERFCTR0: | ||
| 73 | return INDEX_ZERO; | ||
| 74 | case MSR_F15H_PERF_CTL1: | ||
| 75 | case MSR_F15H_PERF_CTR1: | ||
| 76 | case MSR_K7_EVNTSEL1: | ||
| 77 | case MSR_K7_PERFCTR1: | ||
| 78 | return INDEX_ONE; | ||
| 79 | case MSR_F15H_PERF_CTL2: | ||
| 80 | case MSR_F15H_PERF_CTR2: | ||
| 81 | case MSR_K7_EVNTSEL2: | ||
| 82 | case MSR_K7_PERFCTR2: | ||
| 83 | return INDEX_TWO; | ||
| 84 | case MSR_F15H_PERF_CTL3: | ||
| 85 | case MSR_F15H_PERF_CTR3: | ||
| 86 | case MSR_K7_EVNTSEL3: | ||
| 87 | case MSR_K7_PERFCTR3: | ||
| 88 | return INDEX_THREE; | ||
| 89 | case MSR_F15H_PERF_CTL4: | ||
| 90 | case MSR_F15H_PERF_CTR4: | ||
| 91 | return INDEX_FOUR; | ||
| 92 | case MSR_F15H_PERF_CTL5: | ||
| 93 | case MSR_F15H_PERF_CTR5: | ||
| 94 | return INDEX_FIVE; | ||
| 95 | default: | ||
| 96 | return INDEX_ERROR; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, | ||
| 101 | enum pmu_type type) | ||
| 102 | { | ||
| 103 | switch (msr) { | ||
| 104 | case MSR_F15H_PERF_CTL0: | ||
| 105 | case MSR_F15H_PERF_CTL1: | ||
| 106 | case MSR_F15H_PERF_CTL2: | ||
| 107 | case MSR_F15H_PERF_CTL3: | ||
| 108 | case MSR_F15H_PERF_CTL4: | ||
| 109 | case MSR_F15H_PERF_CTL5: | ||
| 110 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: | ||
| 111 | if (type != PMU_TYPE_EVNTSEL) | ||
| 112 | return NULL; | ||
| 113 | break; | ||
| 114 | case MSR_F15H_PERF_CTR0: | ||
| 115 | case MSR_F15H_PERF_CTR1: | ||
| 116 | case MSR_F15H_PERF_CTR2: | ||
| 117 | case MSR_F15H_PERF_CTR3: | ||
| 118 | case MSR_F15H_PERF_CTR4: | ||
| 119 | case MSR_F15H_PERF_CTR5: | ||
| 120 | case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: | ||
| 121 | if (type != PMU_TYPE_COUNTER) | ||
| 122 | return NULL; | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | return NULL; | ||
| 126 | } | ||
| 127 | |||
| 128 | return &pmu->gp_counters[msr_to_index(msr)]; | ||
| 129 | } | ||
| 130 | |||
| 34 | static unsigned amd_find_arch_event(struct kvm_pmu *pmu, | 131 | static unsigned amd_find_arch_event(struct kvm_pmu *pmu, |
| 35 | u8 event_select, | 132 | u8 event_select, |
| 36 | u8 unit_mask) | 133 | u8 unit_mask) |
| @@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) | |||
| 64 | 161 | ||
| 65 | static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) | 162 | static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) |
| 66 | { | 163 | { |
| 67 | return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0); | 164 | unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); |
| 165 | struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); | ||
| 166 | |||
| 167 | if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { | ||
| 168 | /* | ||
| 169 | * The idx is contiguous. The MSRs are not. The counter MSRs | ||
| 170 | * are interleaved with the event select MSRs. | ||
| 171 | */ | ||
| 172 | pmc_idx *= 2; | ||
| 173 | } | ||
| 174 | |||
| 175 | return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); | ||
| 68 | } | 176 | } |
| 69 | 177 | ||
| 70 | /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ | 178 | /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ |
| @@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) | |||
| 96 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | 204 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 97 | int ret = false; | 205 | int ret = false; |
| 98 | 206 | ||
| 99 | ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || | 207 | ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) || |
| 100 | get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | 208 | get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); |
| 101 | 209 | ||
| 102 | return ret; | 210 | return ret; |
| 103 | } | 211 | } |
| @@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
| 107 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | 215 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 108 | struct kvm_pmc *pmc; | 216 | struct kvm_pmc *pmc; |
| 109 | 217 | ||
| 110 | /* MSR_K7_PERFCTRn */ | 218 | /* MSR_PERFCTRn */ |
| 111 | pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); | 219 | pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); |
| 112 | if (pmc) { | 220 | if (pmc) { |
| 113 | *data = pmc_read_counter(pmc); | 221 | *data = pmc_read_counter(pmc); |
| 114 | return 0; | 222 | return 0; |
| 115 | } | 223 | } |
| 116 | /* MSR_K7_EVNTSELn */ | 224 | /* MSR_EVNTSELn */ |
| 117 | pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | 225 | pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); |
| 118 | if (pmc) { | 226 | if (pmc) { |
| 119 | *data = pmc->eventsel; | 227 | *data = pmc->eventsel; |
| 120 | return 0; | 228 | return 0; |
| @@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 130 | u32 msr = msr_info->index; | 238 | u32 msr = msr_info->index; |
| 131 | u64 data = msr_info->data; | 239 | u64 data = msr_info->data; |
| 132 | 240 | ||
| 133 | /* MSR_K7_PERFCTRn */ | 241 | /* MSR_PERFCTRn */ |
| 134 | pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); | 242 | pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); |
| 135 | if (pmc) { | 243 | if (pmc) { |
| 136 | pmc->counter += data - pmc_read_counter(pmc); | 244 | pmc->counter += data - pmc_read_counter(pmc); |
| 137 | return 0; | 245 | return 0; |
| 138 | } | 246 | } |
| 139 | /* MSR_K7_EVNTSELn */ | 247 | /* MSR_EVNTSELn */ |
| 140 | pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); | 248 | pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); |
| 141 | if (pmc) { | 249 | if (pmc) { |
| 142 | if (data == pmc->eventsel) | 250 | if (data == pmc->eventsel) |
| 143 | return 0; | 251 | return 0; |
| @@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) | |||
| 154 | { | 262 | { |
| 155 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | 263 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 156 | 264 | ||
| 157 | pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; | 265 | if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) |
| 266 | pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE; | ||
| 267 | else | ||
| 268 | pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; | ||
| 269 | |||
| 158 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; | 270 | pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; |
| 159 | pmu->reserved_bits = 0xffffffff00200000ull; | 271 | pmu->reserved_bits = 0xffffffff00200000ull; |
| 160 | /* not applicable to AMD; but clean them to prevent any fall out */ | 272 | /* not applicable to AMD; but clean them to prevent any fall out */ |
| @@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) | |||
| 169 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | 281 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 170 | int i; | 282 | int i; |
| 171 | 283 | ||
| 172 | for (i = 0; i < AMD64_NUM_COUNTERS ; i++) { | 284 | BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); |
| 285 | |||
| 286 | for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { | ||
| 173 | pmu->gp_counters[i].type = KVM_PMC_GP; | 287 | pmu->gp_counters[i].type = KVM_PMC_GP; |
| 174 | pmu->gp_counters[i].vcpu = vcpu; | 288 | pmu->gp_counters[i].vcpu = vcpu; |
| 175 | pmu->gp_counters[i].idx = i; | 289 | pmu->gp_counters[i].idx = i; |
| @@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) | |||
| 181 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); | 295 | struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); |
| 182 | int i; | 296 | int i; |
| 183 | 297 | ||
| 184 | for (i = 0; i < AMD64_NUM_COUNTERS; i++) { | 298 | for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { |
| 185 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; | 299 | struct kvm_pmc *pmc = &pmu->gp_counters[i]; |
| 186 | 300 | ||
| 187 | pmc_stop_counter(pmc); | 301 | pmc_stop_counter(pmc); |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9d2043f94e29..b58787daf9f8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = { | |||
| 131 | 131 | ||
| 132 | #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) | 132 | #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) |
| 133 | 133 | ||
| 134 | struct kvm_sev_info { | ||
| 135 | bool active; /* SEV enabled guest */ | ||
| 136 | unsigned int asid; /* ASID used for this guest */ | ||
| 137 | unsigned int handle; /* SEV firmware handle */ | ||
| 138 | int fd; /* SEV device fd */ | ||
| 139 | unsigned long pages_locked; /* Number of pages locked */ | ||
| 140 | struct list_head regions_list; /* List of registered regions */ | ||
| 141 | }; | ||
| 142 | |||
| 143 | struct kvm_svm { | ||
| 144 | struct kvm kvm; | ||
| 145 | |||
| 146 | /* Struct members for AVIC */ | ||
| 147 | u32 avic_vm_id; | ||
| 148 | u32 ldr_mode; | ||
| 149 | struct page *avic_logical_id_table_page; | ||
| 150 | struct page *avic_physical_id_table_page; | ||
| 151 | struct hlist_node hnode; | ||
| 152 | |||
| 153 | struct kvm_sev_info sev_info; | ||
| 154 | }; | ||
| 155 | |||
| 134 | struct kvm_vcpu; | 156 | struct kvm_vcpu; |
| 135 | 157 | ||
| 136 | struct nested_state { | 158 | struct nested_state { |
| @@ -276,6 +298,54 @@ static bool npt_enabled = true; | |||
| 276 | static bool npt_enabled; | 298 | static bool npt_enabled; |
| 277 | #endif | 299 | #endif |
| 278 | 300 | ||
| 301 | /* | ||
| 302 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
| 303 | * pause_filter_count: On processors that support Pause filtering(indicated | ||
| 304 | * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter | ||
| 305 | * count value. On VMRUN this value is loaded into an internal counter. | ||
| 306 | * Each time a pause instruction is executed, this counter is decremented | ||
| 307 | * until it reaches zero at which time a #VMEXIT is generated if pause | ||
| 308 | * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause | ||
| 309 | * Intercept Filtering for more details. | ||
| 310 | * This also indicate if ple logic enabled. | ||
| 311 | * | ||
| 312 | * pause_filter_thresh: In addition, some processor families support advanced | ||
| 313 | * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on | ||
| 314 | * the amount of time a guest is allowed to execute in a pause loop. | ||
| 315 | * In this mode, a 16-bit pause filter threshold field is added in the | ||
| 316 | * VMCB. The threshold value is a cycle count that is used to reset the | ||
| 317 | * pause counter. As with simple pause filtering, VMRUN loads the pause | ||
| 318 | * count value from VMCB into an internal counter. Then, on each pause | ||
| 319 | * instruction the hardware checks the elapsed number of cycles since | ||
| 320 | * the most recent pause instruction against the pause filter threshold. | ||
| 321 | * If the elapsed cycle count is greater than the pause filter threshold, | ||
| 322 | * then the internal pause count is reloaded from the VMCB and execution | ||
| 323 | * continues. If the elapsed cycle count is less than the pause filter | ||
| 324 | * threshold, then the internal pause count is decremented. If the count | ||
| 325 | * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is | ||
| 326 | * triggered. If advanced pause filtering is supported and pause filter | ||
| 327 | * threshold field is set to zero, the filter will operate in the simpler, | ||
| 328 | * count only mode. | ||
| 329 | */ | ||
| 330 | |||
| 331 | static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; | ||
| 332 | module_param(pause_filter_thresh, ushort, 0444); | ||
| 333 | |||
| 334 | static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; | ||
| 335 | module_param(pause_filter_count, ushort, 0444); | ||
| 336 | |||
| 337 | /* Default doubles per-vcpu window every exit. */ | ||
| 338 | static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; | ||
| 339 | module_param(pause_filter_count_grow, ushort, 0444); | ||
| 340 | |||
| 341 | /* Default resets per-vcpu window every exit to pause_filter_count. */ | ||
| 342 | static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; | ||
| 343 | module_param(pause_filter_count_shrink, ushort, 0444); | ||
| 344 | |||
| 345 | /* Default is to compute the maximum so we can never overflow. */ | ||
| 346 | static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; | ||
| 347 | module_param(pause_filter_count_max, ushort, 0444); | ||
| 348 | |||
| 279 | /* allow nested paging (virtualized MMU) for all guests */ | 349 | /* allow nested paging (virtualized MMU) for all guests */ |
| 280 | static int npt = true; | 350 | static int npt = true; |
| 281 | module_param(npt, int, S_IRUGO); | 351 | module_param(npt, int, S_IRUGO); |
| @@ -352,6 +422,12 @@ struct enc_region { | |||
| 352 | unsigned long size; | 422 | unsigned long size; |
| 353 | }; | 423 | }; |
| 354 | 424 | ||
| 425 | |||
| 426 | static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) | ||
| 427 | { | ||
| 428 | return container_of(kvm, struct kvm_svm, kvm); | ||
| 429 | } | ||
| 430 | |||
| 355 | static inline bool svm_sev_enabled(void) | 431 | static inline bool svm_sev_enabled(void) |
| 356 | { | 432 | { |
| 357 | return max_sev_asid; | 433 | return max_sev_asid; |
| @@ -359,14 +435,14 @@ static inline bool svm_sev_enabled(void) | |||
| 359 | 435 | ||
| 360 | static inline bool sev_guest(struct kvm *kvm) | 436 | static inline bool sev_guest(struct kvm *kvm) |
| 361 | { | 437 | { |
| 362 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 438 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 363 | 439 | ||
| 364 | return sev->active; | 440 | return sev->active; |
| 365 | } | 441 | } |
| 366 | 442 | ||
| 367 | static inline int sev_get_asid(struct kvm *kvm) | 443 | static inline int sev_get_asid(struct kvm *kvm) |
| 368 | { | 444 | { |
| 369 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 445 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 370 | 446 | ||
| 371 | return sev->asid; | 447 | return sev->asid; |
| 372 | } | 448 | } |
| @@ -1083,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) | |||
| 1083 | } | 1159 | } |
| 1084 | 1160 | ||
| 1085 | /* Note: | 1161 | /* Note: |
| 1086 | * This hash table is used to map VM_ID to a struct kvm_arch, | 1162 | * This hash table is used to map VM_ID to a struct kvm_svm, |
| 1087 | * when handling AMD IOMMU GALOG notification to schedule in | 1163 | * when handling AMD IOMMU GALOG notification to schedule in |
| 1088 | * a particular vCPU. | 1164 | * a particular vCPU. |
| 1089 | */ | 1165 | */ |
| @@ -1100,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock); | |||
| 1100 | static int avic_ga_log_notifier(u32 ga_tag) | 1176 | static int avic_ga_log_notifier(u32 ga_tag) |
| 1101 | { | 1177 | { |
| 1102 | unsigned long flags; | 1178 | unsigned long flags; |
| 1103 | struct kvm_arch *ka = NULL; | 1179 | struct kvm_svm *kvm_svm; |
| 1104 | struct kvm_vcpu *vcpu = NULL; | 1180 | struct kvm_vcpu *vcpu = NULL; |
| 1105 | u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); | 1181 | u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); |
| 1106 | u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); | 1182 | u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); |
| @@ -1108,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag) | |||
| 1108 | pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); | 1184 | pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); |
| 1109 | 1185 | ||
| 1110 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | 1186 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); |
| 1111 | hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { | 1187 | hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { |
| 1112 | struct kvm *kvm = container_of(ka, struct kvm, arch); | 1188 | if (kvm_svm->avic_vm_id != vm_id) |
| 1113 | struct kvm_arch *vm_data = &kvm->arch; | ||
| 1114 | |||
| 1115 | if (vm_data->avic_vm_id != vm_id) | ||
| 1116 | continue; | 1189 | continue; |
| 1117 | vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); | 1190 | vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); |
| 1118 | break; | 1191 | break; |
| 1119 | } | 1192 | } |
| 1120 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | 1193 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); |
| @@ -1172,6 +1245,42 @@ err: | |||
| 1172 | return rc; | 1245 | return rc; |
| 1173 | } | 1246 | } |
| 1174 | 1247 | ||
| 1248 | static void grow_ple_window(struct kvm_vcpu *vcpu) | ||
| 1249 | { | ||
| 1250 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1251 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
| 1252 | int old = control->pause_filter_count; | ||
| 1253 | |||
| 1254 | control->pause_filter_count = __grow_ple_window(old, | ||
| 1255 | pause_filter_count, | ||
| 1256 | pause_filter_count_grow, | ||
| 1257 | pause_filter_count_max); | ||
| 1258 | |||
| 1259 | if (control->pause_filter_count != old) | ||
| 1260 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
| 1261 | |||
| 1262 | trace_kvm_ple_window_grow(vcpu->vcpu_id, | ||
| 1263 | control->pause_filter_count, old); | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | static void shrink_ple_window(struct kvm_vcpu *vcpu) | ||
| 1267 | { | ||
| 1268 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 1269 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
| 1270 | int old = control->pause_filter_count; | ||
| 1271 | |||
| 1272 | control->pause_filter_count = | ||
| 1273 | __shrink_ple_window(old, | ||
| 1274 | pause_filter_count, | ||
| 1275 | pause_filter_count_shrink, | ||
| 1276 | pause_filter_count); | ||
| 1277 | if (control->pause_filter_count != old) | ||
| 1278 | mark_dirty(svm->vmcb, VMCB_INTERCEPTS); | ||
| 1279 | |||
| 1280 | trace_kvm_ple_window_shrink(vcpu->vcpu_id, | ||
| 1281 | control->pause_filter_count, old); | ||
| 1282 | } | ||
| 1283 | |||
| 1175 | static __init int svm_hardware_setup(void) | 1284 | static __init int svm_hardware_setup(void) |
| 1176 | { | 1285 | { |
| 1177 | int cpu; | 1286 | int cpu; |
| @@ -1202,6 +1311,14 @@ static __init int svm_hardware_setup(void) | |||
| 1202 | kvm_tsc_scaling_ratio_frac_bits = 32; | 1311 | kvm_tsc_scaling_ratio_frac_bits = 32; |
| 1203 | } | 1312 | } |
| 1204 | 1313 | ||
| 1314 | /* Check for pause filtering support */ | ||
| 1315 | if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { | ||
| 1316 | pause_filter_count = 0; | ||
| 1317 | pause_filter_thresh = 0; | ||
| 1318 | } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { | ||
| 1319 | pause_filter_thresh = 0; | ||
| 1320 | } | ||
| 1321 | |||
| 1205 | if (nested) { | 1322 | if (nested) { |
| 1206 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); | 1323 | printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); |
| 1207 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); | 1324 | kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); |
| @@ -1328,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) | |||
| 1328 | static void avic_init_vmcb(struct vcpu_svm *svm) | 1445 | static void avic_init_vmcb(struct vcpu_svm *svm) |
| 1329 | { | 1446 | { |
| 1330 | struct vmcb *vmcb = svm->vmcb; | 1447 | struct vmcb *vmcb = svm->vmcb; |
| 1331 | struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; | 1448 | struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); |
| 1332 | phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); | 1449 | phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); |
| 1333 | phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); | 1450 | phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); |
| 1334 | phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); | 1451 | phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); |
| 1335 | 1452 | ||
| 1336 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; | 1453 | vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; |
| 1337 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; | 1454 | vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; |
| @@ -1363,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1363 | set_exception_intercept(svm, MC_VECTOR); | 1480 | set_exception_intercept(svm, MC_VECTOR); |
| 1364 | set_exception_intercept(svm, AC_VECTOR); | 1481 | set_exception_intercept(svm, AC_VECTOR); |
| 1365 | set_exception_intercept(svm, DB_VECTOR); | 1482 | set_exception_intercept(svm, DB_VECTOR); |
| 1483 | /* | ||
| 1484 | * Guest access to VMware backdoor ports could legitimately | ||
| 1485 | * trigger #GP because of TSS I/O permission bitmap. | ||
| 1486 | * We intercept those #GP and allow access to them anyway | ||
| 1487 | * as VMware does. | ||
| 1488 | */ | ||
| 1489 | if (enable_vmware_backdoor) | ||
| 1490 | set_exception_intercept(svm, GP_VECTOR); | ||
| 1366 | 1491 | ||
| 1367 | set_intercept(svm, INTERCEPT_INTR); | 1492 | set_intercept(svm, INTERCEPT_INTR); |
| 1368 | set_intercept(svm, INTERCEPT_NMI); | 1493 | set_intercept(svm, INTERCEPT_NMI); |
| @@ -1371,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1371 | set_intercept(svm, INTERCEPT_RDPMC); | 1496 | set_intercept(svm, INTERCEPT_RDPMC); |
| 1372 | set_intercept(svm, INTERCEPT_CPUID); | 1497 | set_intercept(svm, INTERCEPT_CPUID); |
| 1373 | set_intercept(svm, INTERCEPT_INVD); | 1498 | set_intercept(svm, INTERCEPT_INVD); |
| 1374 | set_intercept(svm, INTERCEPT_HLT); | ||
| 1375 | set_intercept(svm, INTERCEPT_INVLPG); | 1499 | set_intercept(svm, INTERCEPT_INVLPG); |
| 1376 | set_intercept(svm, INTERCEPT_INVLPGA); | 1500 | set_intercept(svm, INTERCEPT_INVLPGA); |
| 1377 | set_intercept(svm, INTERCEPT_IOIO_PROT); | 1501 | set_intercept(svm, INTERCEPT_IOIO_PROT); |
| @@ -1389,11 +1513,14 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1389 | set_intercept(svm, INTERCEPT_XSETBV); | 1513 | set_intercept(svm, INTERCEPT_XSETBV); |
| 1390 | set_intercept(svm, INTERCEPT_RSM); | 1514 | set_intercept(svm, INTERCEPT_RSM); |
| 1391 | 1515 | ||
| 1392 | if (!kvm_mwait_in_guest()) { | 1516 | if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { |
| 1393 | set_intercept(svm, INTERCEPT_MONITOR); | 1517 | set_intercept(svm, INTERCEPT_MONITOR); |
| 1394 | set_intercept(svm, INTERCEPT_MWAIT); | 1518 | set_intercept(svm, INTERCEPT_MWAIT); |
| 1395 | } | 1519 | } |
| 1396 | 1520 | ||
| 1521 | if (!kvm_hlt_in_guest(svm->vcpu.kvm)) | ||
| 1522 | set_intercept(svm, INTERCEPT_HLT); | ||
| 1523 | |||
| 1397 | control->iopm_base_pa = __sme_set(iopm_base); | 1524 | control->iopm_base_pa = __sme_set(iopm_base); |
| 1398 | control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); | 1525 | control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); |
| 1399 | control->int_ctl = V_INTR_MASKING_MASK; | 1526 | control->int_ctl = V_INTR_MASKING_MASK; |
| @@ -1449,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 1449 | svm->nested.vmcb = 0; | 1576 | svm->nested.vmcb = 0; |
| 1450 | svm->vcpu.arch.hflags = 0; | 1577 | svm->vcpu.arch.hflags = 0; |
| 1451 | 1578 | ||
| 1452 | if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { | 1579 | if (pause_filter_count) { |
| 1453 | control->pause_filter_count = 3000; | 1580 | control->pause_filter_count = pause_filter_count; |
| 1581 | if (pause_filter_thresh) | ||
| 1582 | control->pause_filter_thresh = pause_filter_thresh; | ||
| 1454 | set_intercept(svm, INTERCEPT_PAUSE); | 1583 | set_intercept(svm, INTERCEPT_PAUSE); |
| 1584 | } else { | ||
| 1585 | clr_intercept(svm, INTERCEPT_PAUSE); | ||
| 1455 | } | 1586 | } |
| 1456 | 1587 | ||
| 1457 | if (kvm_vcpu_apicv_active(&svm->vcpu)) | 1588 | if (kvm_vcpu_apicv_active(&svm->vcpu)) |
| @@ -1488,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, | |||
| 1488 | unsigned int index) | 1619 | unsigned int index) |
| 1489 | { | 1620 | { |
| 1490 | u64 *avic_physical_id_table; | 1621 | u64 *avic_physical_id_table; |
| 1491 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | 1622 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); |
| 1492 | 1623 | ||
| 1493 | if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) | 1624 | if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) |
| 1494 | return NULL; | 1625 | return NULL; |
| 1495 | 1626 | ||
| 1496 | avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page); | 1627 | avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); |
| 1497 | 1628 | ||
| 1498 | return &avic_physical_id_table[index]; | 1629 | return &avic_physical_id_table[index]; |
| 1499 | } | 1630 | } |
| @@ -1576,7 +1707,7 @@ static void __sev_asid_free(int asid) | |||
| 1576 | 1707 | ||
| 1577 | static void sev_asid_free(struct kvm *kvm) | 1708 | static void sev_asid_free(struct kvm *kvm) |
| 1578 | { | 1709 | { |
| 1579 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 1710 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 1580 | 1711 | ||
| 1581 | __sev_asid_free(sev->asid); | 1712 | __sev_asid_free(sev->asid); |
| 1582 | } | 1713 | } |
| @@ -1616,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, | |||
| 1616 | unsigned long ulen, unsigned long *n, | 1747 | unsigned long ulen, unsigned long *n, |
| 1617 | int write) | 1748 | int write) |
| 1618 | { | 1749 | { |
| 1619 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 1750 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 1620 | unsigned long npages, npinned, size; | 1751 | unsigned long npages, npinned, size; |
| 1621 | unsigned long locked, lock_limit; | 1752 | unsigned long locked, lock_limit; |
| 1622 | struct page **pages; | 1753 | struct page **pages; |
| @@ -1667,7 +1798,7 @@ err: | |||
| 1667 | static void sev_unpin_memory(struct kvm *kvm, struct page **pages, | 1798 | static void sev_unpin_memory(struct kvm *kvm, struct page **pages, |
| 1668 | unsigned long npages) | 1799 | unsigned long npages) |
| 1669 | { | 1800 | { |
| 1670 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 1801 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 1671 | 1802 | ||
| 1672 | release_pages(pages, npages); | 1803 | release_pages(pages, npages); |
| 1673 | kvfree(pages); | 1804 | kvfree(pages); |
| @@ -1705,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm, | |||
| 1705 | kfree(region); | 1836 | kfree(region); |
| 1706 | } | 1837 | } |
| 1707 | 1838 | ||
| 1839 | static struct kvm *svm_vm_alloc(void) | ||
| 1840 | { | ||
| 1841 | struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL); | ||
| 1842 | return &kvm_svm->kvm; | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | static void svm_vm_free(struct kvm *kvm) | ||
| 1846 | { | ||
| 1847 | kfree(to_kvm_svm(kvm)); | ||
| 1848 | } | ||
| 1849 | |||
| 1708 | static void sev_vm_destroy(struct kvm *kvm) | 1850 | static void sev_vm_destroy(struct kvm *kvm) |
| 1709 | { | 1851 | { |
| 1710 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 1852 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 1711 | struct list_head *head = &sev->regions_list; | 1853 | struct list_head *head = &sev->regions_list; |
| 1712 | struct list_head *pos, *q; | 1854 | struct list_head *pos, *q; |
| 1713 | 1855 | ||
| @@ -1736,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm) | |||
| 1736 | static void avic_vm_destroy(struct kvm *kvm) | 1878 | static void avic_vm_destroy(struct kvm *kvm) |
| 1737 | { | 1879 | { |
| 1738 | unsigned long flags; | 1880 | unsigned long flags; |
| 1739 | struct kvm_arch *vm_data = &kvm->arch; | 1881 | struct kvm_svm *kvm_svm = to_kvm_svm(kvm); |
| 1740 | 1882 | ||
| 1741 | if (!avic) | 1883 | if (!avic) |
| 1742 | return; | 1884 | return; |
| 1743 | 1885 | ||
| 1744 | if (vm_data->avic_logical_id_table_page) | 1886 | if (kvm_svm->avic_logical_id_table_page) |
| 1745 | __free_page(vm_data->avic_logical_id_table_page); | 1887 | __free_page(kvm_svm->avic_logical_id_table_page); |
| 1746 | if (vm_data->avic_physical_id_table_page) | 1888 | if (kvm_svm->avic_physical_id_table_page) |
| 1747 | __free_page(vm_data->avic_physical_id_table_page); | 1889 | __free_page(kvm_svm->avic_physical_id_table_page); |
| 1748 | 1890 | ||
| 1749 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | 1891 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); |
| 1750 | hash_del(&vm_data->hnode); | 1892 | hash_del(&kvm_svm->hnode); |
| 1751 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | 1893 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); |
| 1752 | } | 1894 | } |
| 1753 | 1895 | ||
| @@ -1761,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1761 | { | 1903 | { |
| 1762 | unsigned long flags; | 1904 | unsigned long flags; |
| 1763 | int err = -ENOMEM; | 1905 | int err = -ENOMEM; |
| 1764 | struct kvm_arch *vm_data = &kvm->arch; | 1906 | struct kvm_svm *kvm_svm = to_kvm_svm(kvm); |
| 1907 | struct kvm_svm *k2; | ||
| 1765 | struct page *p_page; | 1908 | struct page *p_page; |
| 1766 | struct page *l_page; | 1909 | struct page *l_page; |
| 1767 | struct kvm_arch *ka; | ||
| 1768 | u32 vm_id; | 1910 | u32 vm_id; |
| 1769 | 1911 | ||
| 1770 | if (!avic) | 1912 | if (!avic) |
| @@ -1775,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1775 | if (!p_page) | 1917 | if (!p_page) |
| 1776 | goto free_avic; | 1918 | goto free_avic; |
| 1777 | 1919 | ||
| 1778 | vm_data->avic_physical_id_table_page = p_page; | 1920 | kvm_svm->avic_physical_id_table_page = p_page; |
| 1779 | clear_page(page_address(p_page)); | 1921 | clear_page(page_address(p_page)); |
| 1780 | 1922 | ||
| 1781 | /* Allocating logical APIC ID table (4KB) */ | 1923 | /* Allocating logical APIC ID table (4KB) */ |
| @@ -1783,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1783 | if (!l_page) | 1925 | if (!l_page) |
| 1784 | goto free_avic; | 1926 | goto free_avic; |
| 1785 | 1927 | ||
| 1786 | vm_data->avic_logical_id_table_page = l_page; | 1928 | kvm_svm->avic_logical_id_table_page = l_page; |
| 1787 | clear_page(page_address(l_page)); | 1929 | clear_page(page_address(l_page)); |
| 1788 | 1930 | ||
| 1789 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); | 1931 | spin_lock_irqsave(&svm_vm_data_hash_lock, flags); |
| @@ -1795,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm) | |||
| 1795 | } | 1937 | } |
| 1796 | /* Is it still in use? Only possible if wrapped at least once */ | 1938 | /* Is it still in use? Only possible if wrapped at least once */ |
| 1797 | if (next_vm_id_wrapped) { | 1939 | if (next_vm_id_wrapped) { |
| 1798 | hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { | 1940 | hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { |
| 1799 | struct kvm *k2 = container_of(ka, struct kvm, arch); | 1941 | if (k2->avic_vm_id == vm_id) |
| 1800 | struct kvm_arch *vd2 = &k2->arch; | ||
| 1801 | if (vd2->avic_vm_id == vm_id) | ||
| 1802 | goto again; | 1942 | goto again; |
| 1803 | } | 1943 | } |
| 1804 | } | 1944 | } |
| 1805 | vm_data->avic_vm_id = vm_id; | 1945 | kvm_svm->avic_vm_id = vm_id; |
| 1806 | hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); | 1946 | hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); |
| 1807 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); | 1947 | spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); |
| 1808 | 1948 | ||
| 1809 | return 0; | 1949 | return 0; |
| @@ -2535,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm) | |||
| 2535 | 2675 | ||
| 2536 | static int ud_interception(struct vcpu_svm *svm) | 2676 | static int ud_interception(struct vcpu_svm *svm) |
| 2537 | { | 2677 | { |
| 2538 | int er; | 2678 | return handle_ud(&svm->vcpu); |
| 2539 | |||
| 2540 | er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); | ||
| 2541 | if (er == EMULATE_USER_EXIT) | ||
| 2542 | return 0; | ||
| 2543 | if (er != EMULATE_DONE) | ||
| 2544 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
| 2545 | return 1; | ||
| 2546 | } | 2679 | } |
| 2547 | 2680 | ||
| 2548 | static int ac_interception(struct vcpu_svm *svm) | 2681 | static int ac_interception(struct vcpu_svm *svm) |
| @@ -2551,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm) | |||
| 2551 | return 1; | 2684 | return 1; |
| 2552 | } | 2685 | } |
| 2553 | 2686 | ||
| 2687 | static int gp_interception(struct vcpu_svm *svm) | ||
| 2688 | { | ||
| 2689 | struct kvm_vcpu *vcpu = &svm->vcpu; | ||
| 2690 | u32 error_code = svm->vmcb->control.exit_info_1; | ||
| 2691 | int er; | ||
| 2692 | |||
| 2693 | WARN_ON_ONCE(!enable_vmware_backdoor); | ||
| 2694 | |||
| 2695 | er = emulate_instruction(vcpu, | ||
| 2696 | EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); | ||
| 2697 | if (er == EMULATE_USER_EXIT) | ||
| 2698 | return 0; | ||
| 2699 | else if (er != EMULATE_DONE) | ||
| 2700 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | ||
| 2701 | return 1; | ||
| 2702 | } | ||
| 2703 | |||
| 2554 | static bool is_erratum_383(void) | 2704 | static bool is_erratum_383(void) |
| 2555 | { | 2705 | { |
| 2556 | int err, i; | 2706 | int err, i; |
| @@ -2639,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm) | |||
| 2639 | { | 2789 | { |
| 2640 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2790 | struct kvm_vcpu *vcpu = &svm->vcpu; |
| 2641 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 2791 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
| 2642 | int size, in, string, ret; | 2792 | int size, in, string; |
| 2643 | unsigned port; | 2793 | unsigned port; |
| 2644 | 2794 | ||
| 2645 | ++svm->vcpu.stat.io_exits; | 2795 | ++svm->vcpu.stat.io_exits; |
| @@ -2651,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm) | |||
| 2651 | port = io_info >> 16; | 2801 | port = io_info >> 16; |
| 2652 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 2802 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| 2653 | svm->next_rip = svm->vmcb->control.exit_info_2; | 2803 | svm->next_rip = svm->vmcb->control.exit_info_2; |
| 2654 | ret = kvm_skip_emulated_instruction(&svm->vcpu); | ||
| 2655 | 2804 | ||
| 2656 | /* | 2805 | return kvm_fast_pio(&svm->vcpu, size, port, in); |
| 2657 | * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered | ||
| 2658 | * KVM_EXIT_DEBUG here. | ||
| 2659 | */ | ||
| 2660 | if (in) | ||
| 2661 | return kvm_fast_pio_in(vcpu, size, port) && ret; | ||
| 2662 | else | ||
| 2663 | return kvm_fast_pio_out(vcpu, size, port) && ret; | ||
| 2664 | } | 2806 | } |
| 2665 | 2807 | ||
| 2666 | static int nmi_interception(struct vcpu_svm *svm) | 2808 | static int nmi_interception(struct vcpu_svm *svm) |
| @@ -4233,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm) | |||
| 4233 | struct kvm_vcpu *vcpu = &svm->vcpu; | 4375 | struct kvm_vcpu *vcpu = &svm->vcpu; |
| 4234 | bool in_kernel = (svm_get_cpl(vcpu) == 0); | 4376 | bool in_kernel = (svm_get_cpl(vcpu) == 0); |
| 4235 | 4377 | ||
| 4378 | if (pause_filter_thresh) | ||
| 4379 | grow_ple_window(vcpu); | ||
| 4380 | |||
| 4236 | kvm_vcpu_on_spin(vcpu, in_kernel); | 4381 | kvm_vcpu_on_spin(vcpu, in_kernel); |
| 4237 | return 1; | 4382 | return 1; |
| 4238 | } | 4383 | } |
| @@ -4323,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) | |||
| 4323 | 4468 | ||
| 4324 | static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) | 4469 | static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) |
| 4325 | { | 4470 | { |
| 4326 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | 4471 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); |
| 4327 | int index; | 4472 | int index; |
| 4328 | u32 *logical_apic_id_table; | 4473 | u32 *logical_apic_id_table; |
| 4329 | int dlid = GET_APIC_LOGICAL_ID(ldr); | 4474 | int dlid = GET_APIC_LOGICAL_ID(ldr); |
| @@ -4345,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) | |||
| 4345 | index = (cluster << 2) + apic; | 4490 | index = (cluster << 2) + apic; |
| 4346 | } | 4491 | } |
| 4347 | 4492 | ||
| 4348 | logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); | 4493 | logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); |
| 4349 | 4494 | ||
| 4350 | return &logical_apic_id_table[index]; | 4495 | return &logical_apic_id_table[index]; |
| 4351 | } | 4496 | } |
| @@ -4425,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) | |||
| 4425 | static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) | 4570 | static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) |
| 4426 | { | 4571 | { |
| 4427 | struct vcpu_svm *svm = to_svm(vcpu); | 4572 | struct vcpu_svm *svm = to_svm(vcpu); |
| 4428 | struct kvm_arch *vm_data = &vcpu->kvm->arch; | 4573 | struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); |
| 4429 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); | 4574 | u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); |
| 4430 | u32 mod = (dfr >> 28) & 0xf; | 4575 | u32 mod = (dfr >> 28) & 0xf; |
| 4431 | 4576 | ||
| @@ -4434,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) | |||
| 4434 | * If this changes, we need to flush the AVIC logical | 4579 | * If this changes, we need to flush the AVIC logical |
| 4435 | * APID id table. | 4580 | * APID id table. |
| 4436 | */ | 4581 | */ |
| 4437 | if (vm_data->ldr_mode == mod) | 4582 | if (kvm_svm->ldr_mode == mod) |
| 4438 | return 0; | 4583 | return 0; |
| 4439 | 4584 | ||
| 4440 | clear_page(page_address(vm_data->avic_logical_id_table_page)); | 4585 | clear_page(page_address(kvm_svm->avic_logical_id_table_page)); |
| 4441 | vm_data->ldr_mode = mod; | 4586 | kvm_svm->ldr_mode = mod; |
| 4442 | 4587 | ||
| 4443 | if (svm->ldr_reg) | 4588 | if (svm->ldr_reg) |
| 4444 | avic_handle_ldr_update(vcpu); | 4589 | avic_handle_ldr_update(vcpu); |
| @@ -4558,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
| 4558 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 4703 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
| 4559 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 4704 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
| 4560 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, | 4705 | [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, |
| 4706 | [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, | ||
| 4561 | [SVM_EXIT_INTR] = intr_interception, | 4707 | [SVM_EXIT_INTR] = intr_interception, |
| 4562 | [SVM_EXIT_NMI] = nmi_interception, | 4708 | [SVM_EXIT_NMI] = nmi_interception, |
| 4563 | [SVM_EXIT_SMI] = nop_on_interception, | 4709 | [SVM_EXIT_SMI] = nop_on_interception, |
| @@ -4606,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) | |||
| 4606 | pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); | 4752 | pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); |
| 4607 | pr_err("%-20s%016llx\n", "intercepts:", control->intercept); | 4753 | pr_err("%-20s%016llx\n", "intercepts:", control->intercept); |
| 4608 | pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); | 4754 | pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); |
| 4755 | pr_err("%-20s%d\n", "pause filter threshold:", | ||
| 4756 | control->pause_filter_thresh); | ||
| 4609 | pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); | 4757 | pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); |
| 4610 | pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); | 4758 | pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); |
| 4611 | pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); | 4759 | pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); |
| @@ -5073,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
| 5073 | /* Try to enable guest_mode in IRTE */ | 5221 | /* Try to enable guest_mode in IRTE */ |
| 5074 | pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & | 5222 | pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & |
| 5075 | AVIC_HPA_MASK); | 5223 | AVIC_HPA_MASK); |
| 5076 | pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, | 5224 | pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, |
| 5077 | svm->vcpu.vcpu_id); | 5225 | svm->vcpu.vcpu_id); |
| 5078 | pi.is_guest_mode = true; | 5226 | pi.is_guest_mode = true; |
| 5079 | pi.vcpu_data = &vcpu_info; | 5227 | pi.vcpu_data = &vcpu_info; |
| @@ -5237,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
| 5237 | return 0; | 5385 | return 0; |
| 5238 | } | 5386 | } |
| 5239 | 5387 | ||
| 5388 | static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) | ||
| 5389 | { | ||
| 5390 | return 0; | ||
| 5391 | } | ||
| 5392 | |||
| 5240 | static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) | 5393 | static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) |
| 5241 | { | 5394 | { |
| 5242 | struct vcpu_svm *svm = to_svm(vcpu); | 5395 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -5538,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 5538 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 5691 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
| 5539 | 5692 | ||
| 5540 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | 5693 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) |
| 5541 | kvm_before_handle_nmi(&svm->vcpu); | 5694 | kvm_before_interrupt(&svm->vcpu); |
| 5542 | 5695 | ||
| 5543 | stgi(); | 5696 | stgi(); |
| 5544 | 5697 | ||
| 5545 | /* Any pending NMI will happen here */ | 5698 | /* Any pending NMI will happen here */ |
| 5546 | 5699 | ||
| 5547 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | 5700 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) |
| 5548 | kvm_after_handle_nmi(&svm->vcpu); | 5701 | kvm_after_interrupt(&svm->vcpu); |
| 5549 | 5702 | ||
| 5550 | sync_cr8_to_lapic(vcpu); | 5703 | sync_cr8_to_lapic(vcpu); |
| 5551 | 5704 | ||
| @@ -5921,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | |||
| 5921 | 6074 | ||
| 5922 | static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) | 6075 | static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) |
| 5923 | { | 6076 | { |
| 6077 | if (pause_filter_thresh) | ||
| 6078 | shrink_ple_window(vcpu); | ||
| 5924 | } | 6079 | } |
| 5925 | 6080 | ||
| 5926 | static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) | 6081 | static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) |
| @@ -6037,7 +6192,7 @@ static int sev_asid_new(void) | |||
| 6037 | 6192 | ||
| 6038 | static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6193 | static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6039 | { | 6194 | { |
| 6040 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6195 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6041 | int asid, ret; | 6196 | int asid, ret; |
| 6042 | 6197 | ||
| 6043 | ret = -EBUSY; | 6198 | ret = -EBUSY; |
| @@ -6102,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error) | |||
| 6102 | 6257 | ||
| 6103 | static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) | 6258 | static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) |
| 6104 | { | 6259 | { |
| 6105 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6260 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6106 | 6261 | ||
| 6107 | return __sev_issue_cmd(sev->fd, id, data, error); | 6262 | return __sev_issue_cmd(sev->fd, id, data, error); |
| 6108 | } | 6263 | } |
| 6109 | 6264 | ||
| 6110 | static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6265 | static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6111 | { | 6266 | { |
| 6112 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6267 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6113 | struct sev_data_launch_start *start; | 6268 | struct sev_data_launch_start *start; |
| 6114 | struct kvm_sev_launch_start params; | 6269 | struct kvm_sev_launch_start params; |
| 6115 | void *dh_blob, *session_blob; | 6270 | void *dh_blob, *session_blob; |
| @@ -6207,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages, | |||
| 6207 | static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6362 | static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6208 | { | 6363 | { |
| 6209 | unsigned long vaddr, vaddr_end, next_vaddr, npages, size; | 6364 | unsigned long vaddr, vaddr_end, next_vaddr, npages, size; |
| 6210 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6365 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6211 | struct kvm_sev_launch_update_data params; | 6366 | struct kvm_sev_launch_update_data params; |
| 6212 | struct sev_data_launch_update_data *data; | 6367 | struct sev_data_launch_update_data *data; |
| 6213 | struct page **inpages; | 6368 | struct page **inpages; |
| @@ -6283,7 +6438,7 @@ e_free: | |||
| 6283 | static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6438 | static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6284 | { | 6439 | { |
| 6285 | void __user *measure = (void __user *)(uintptr_t)argp->data; | 6440 | void __user *measure = (void __user *)(uintptr_t)argp->data; |
| 6286 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6441 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6287 | struct sev_data_launch_measure *data; | 6442 | struct sev_data_launch_measure *data; |
| 6288 | struct kvm_sev_launch_measure params; | 6443 | struct kvm_sev_launch_measure params; |
| 6289 | void __user *p = NULL; | 6444 | void __user *p = NULL; |
| @@ -6351,7 +6506,7 @@ e_free: | |||
| 6351 | 6506 | ||
| 6352 | static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6507 | static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6353 | { | 6508 | { |
| 6354 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6509 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6355 | struct sev_data_launch_finish *data; | 6510 | struct sev_data_launch_finish *data; |
| 6356 | int ret; | 6511 | int ret; |
| 6357 | 6512 | ||
| @@ -6371,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) | |||
| 6371 | 6526 | ||
| 6372 | static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6527 | static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6373 | { | 6528 | { |
| 6374 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6529 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6375 | struct kvm_sev_guest_status params; | 6530 | struct kvm_sev_guest_status params; |
| 6376 | struct sev_data_guest_status *data; | 6531 | struct sev_data_guest_status *data; |
| 6377 | int ret; | 6532 | int ret; |
| @@ -6403,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, | |||
| 6403 | unsigned long dst, int size, | 6558 | unsigned long dst, int size, |
| 6404 | int *error, bool enc) | 6559 | int *error, bool enc) |
| 6405 | { | 6560 | { |
| 6406 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6561 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6407 | struct sev_data_dbg *data; | 6562 | struct sev_data_dbg *data; |
| 6408 | int ret; | 6563 | int ret; |
| 6409 | 6564 | ||
| @@ -6635,7 +6790,7 @@ err: | |||
| 6635 | 6790 | ||
| 6636 | static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) | 6791 | static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) |
| 6637 | { | 6792 | { |
| 6638 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6793 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6639 | struct sev_data_launch_secret *data; | 6794 | struct sev_data_launch_secret *data; |
| 6640 | struct kvm_sev_launch_secret params; | 6795 | struct kvm_sev_launch_secret params; |
| 6641 | struct page **pages; | 6796 | struct page **pages; |
| @@ -6759,7 +6914,7 @@ out: | |||
| 6759 | static int svm_register_enc_region(struct kvm *kvm, | 6914 | static int svm_register_enc_region(struct kvm *kvm, |
| 6760 | struct kvm_enc_region *range) | 6915 | struct kvm_enc_region *range) |
| 6761 | { | 6916 | { |
| 6762 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6917 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6763 | struct enc_region *region; | 6918 | struct enc_region *region; |
| 6764 | int ret = 0; | 6919 | int ret = 0; |
| 6765 | 6920 | ||
| @@ -6801,7 +6956,7 @@ e_free: | |||
| 6801 | static struct enc_region * | 6956 | static struct enc_region * |
| 6802 | find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) | 6957 | find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) |
| 6803 | { | 6958 | { |
| 6804 | struct kvm_sev_info *sev = &kvm->arch.sev_info; | 6959 | struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; |
| 6805 | struct list_head *head = &sev->regions_list; | 6960 | struct list_head *head = &sev->regions_list; |
| 6806 | struct enc_region *i; | 6961 | struct enc_region *i; |
| 6807 | 6962 | ||
| @@ -6859,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
| 6859 | .vcpu_free = svm_free_vcpu, | 7014 | .vcpu_free = svm_free_vcpu, |
| 6860 | .vcpu_reset = svm_vcpu_reset, | 7015 | .vcpu_reset = svm_vcpu_reset, |
| 6861 | 7016 | ||
| 7017 | .vm_alloc = svm_vm_alloc, | ||
| 7018 | .vm_free = svm_vm_free, | ||
| 6862 | .vm_init = avic_vm_init, | 7019 | .vm_init = avic_vm_init, |
| 6863 | .vm_destroy = svm_vm_destroy, | 7020 | .vm_destroy = svm_vm_destroy, |
| 6864 | 7021 | ||
| @@ -6925,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { | |||
| 6925 | .apicv_post_state_restore = avic_post_state_restore, | 7082 | .apicv_post_state_restore = avic_post_state_restore, |
| 6926 | 7083 | ||
| 6927 | .set_tss_addr = svm_set_tss_addr, | 7084 | .set_tss_addr = svm_set_tss_addr, |
| 7085 | .set_identity_map_addr = svm_set_identity_map_addr, | ||
| 6928 | .get_tdp_level = get_npt_level, | 7086 | .get_tdp_level = get_npt_level, |
| 6929 | .get_mt_mask = svm_get_mt_mask, | 7087 | .get_mt_mask = svm_get_mt_mask, |
| 6930 | 7088 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 92496b9b5f2b..aafcc9881e88 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -52,9 +52,11 @@ | |||
| 52 | #include <asm/irq_remapping.h> | 52 | #include <asm/irq_remapping.h> |
| 53 | #include <asm/mmu_context.h> | 53 | #include <asm/mmu_context.h> |
| 54 | #include <asm/nospec-branch.h> | 54 | #include <asm/nospec-branch.h> |
| 55 | #include <asm/mshyperv.h> | ||
| 55 | 56 | ||
| 56 | #include "trace.h" | 57 | #include "trace.h" |
| 57 | #include "pmu.h" | 58 | #include "pmu.h" |
| 59 | #include "vmx_evmcs.h" | ||
| 58 | 60 | ||
| 59 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 61 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
| 60 | #define __ex_clear(x, reg) \ | 62 | #define __ex_clear(x, reg) \ |
| @@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); | |||
| 130 | #endif | 132 | #endif |
| 131 | 133 | ||
| 132 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) | 134 | #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) |
| 133 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) | 135 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE |
| 134 | #define KVM_VM_CR0_ALWAYS_ON \ | 136 | #define KVM_VM_CR0_ALWAYS_ON \ |
| 135 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | 137 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ |
| 138 | X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) | ||
| 136 | #define KVM_CR4_GUEST_OWNED_BITS \ | 139 | #define KVM_CR4_GUEST_OWNED_BITS \ |
| 137 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 140 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
| 138 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) | 141 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) |
| 139 | 142 | ||
| 143 | #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE | ||
| 140 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | 144 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) |
| 141 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | 145 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) |
| 142 | 146 | ||
| @@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); | |||
| 165 | * Time is measured based on a counter that runs at the same rate as the TSC, | 169 | * Time is measured based on a counter that runs at the same rate as the TSC, |
| 166 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 170 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
| 167 | */ | 171 | */ |
| 168 | #define KVM_VMX_DEFAULT_PLE_GAP 128 | 172 | static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; |
| 169 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
| 170 | #define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 | ||
| 171 | #define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 | ||
| 172 | #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ | ||
| 173 | INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW | ||
| 174 | 173 | ||
| 175 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 174 | static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
| 176 | module_param(ple_gap, int, S_IRUGO); | 175 | module_param(ple_window, uint, 0444); |
| 177 | |||
| 178 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
| 179 | module_param(ple_window, int, S_IRUGO); | ||
| 180 | 176 | ||
| 181 | /* Default doubles per-vcpu window every exit. */ | 177 | /* Default doubles per-vcpu window every exit. */ |
| 182 | static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; | 178 | static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; |
| 183 | module_param(ple_window_grow, int, S_IRUGO); | 179 | module_param(ple_window_grow, uint, 0444); |
| 184 | 180 | ||
| 185 | /* Default resets per-vcpu window every exit to ple_window. */ | 181 | /* Default resets per-vcpu window every exit to ple_window. */ |
| 186 | static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; | 182 | static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; |
| 187 | module_param(ple_window_shrink, int, S_IRUGO); | 183 | module_param(ple_window_shrink, uint, 0444); |
| 188 | 184 | ||
| 189 | /* Default is to compute the maximum so we can never overflow. */ | 185 | /* Default is to compute the maximum so we can never overflow. */ |
| 190 | static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | 186 | static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; |
| 191 | static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | 187 | module_param(ple_window_max, uint, 0444); |
| 192 | module_param(ple_window_max, int, S_IRUGO); | ||
| 193 | 188 | ||
| 194 | extern const ulong vmx_return; | 189 | extern const ulong vmx_return; |
| 195 | 190 | ||
| 191 | struct kvm_vmx { | ||
| 192 | struct kvm kvm; | ||
| 193 | |||
| 194 | unsigned int tss_addr; | ||
| 195 | bool ept_identity_pagetable_done; | ||
| 196 | gpa_t ept_identity_map_addr; | ||
| 197 | }; | ||
| 198 | |||
| 196 | #define NR_AUTOLOAD_MSRS 8 | 199 | #define NR_AUTOLOAD_MSRS 8 |
| 197 | 200 | ||
| 198 | struct vmcs { | 201 | struct vmcs { |
| @@ -424,6 +427,35 @@ struct __packed vmcs12 { | |||
| 424 | */ | 427 | */ |
| 425 | #define VMCS12_MAX_FIELD_INDEX 0x17 | 428 | #define VMCS12_MAX_FIELD_INDEX 0x17 |
| 426 | 429 | ||
| 430 | struct nested_vmx_msrs { | ||
| 431 | /* | ||
| 432 | * We only store the "true" versions of the VMX capability MSRs. We | ||
| 433 | * generate the "non-true" versions by setting the must-be-1 bits | ||
| 434 | * according to the SDM. | ||
| 435 | */ | ||
| 436 | u32 procbased_ctls_low; | ||
| 437 | u32 procbased_ctls_high; | ||
| 438 | u32 secondary_ctls_low; | ||
| 439 | u32 secondary_ctls_high; | ||
| 440 | u32 pinbased_ctls_low; | ||
| 441 | u32 pinbased_ctls_high; | ||
| 442 | u32 exit_ctls_low; | ||
| 443 | u32 exit_ctls_high; | ||
| 444 | u32 entry_ctls_low; | ||
| 445 | u32 entry_ctls_high; | ||
| 446 | u32 misc_low; | ||
| 447 | u32 misc_high; | ||
| 448 | u32 ept_caps; | ||
| 449 | u32 vpid_caps; | ||
| 450 | u64 basic; | ||
| 451 | u64 cr0_fixed0; | ||
| 452 | u64 cr0_fixed1; | ||
| 453 | u64 cr4_fixed0; | ||
| 454 | u64 cr4_fixed1; | ||
| 455 | u64 vmcs_enum; | ||
| 456 | u64 vmfunc_controls; | ||
| 457 | }; | ||
| 458 | |||
| 427 | /* | 459 | /* |
| 428 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need | 460 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
| 429 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. | 461 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
| @@ -475,32 +507,7 @@ struct nested_vmx { | |||
| 475 | u16 vpid02; | 507 | u16 vpid02; |
| 476 | u16 last_vpid; | 508 | u16 last_vpid; |
| 477 | 509 | ||
| 478 | /* | 510 | struct nested_vmx_msrs msrs; |
| 479 | * We only store the "true" versions of the VMX capability MSRs. We | ||
| 480 | * generate the "non-true" versions by setting the must-be-1 bits | ||
| 481 | * according to the SDM. | ||
| 482 | */ | ||
| 483 | u32 nested_vmx_procbased_ctls_low; | ||
| 484 | u32 nested_vmx_procbased_ctls_high; | ||
| 485 | u32 nested_vmx_secondary_ctls_low; | ||
| 486 | u32 nested_vmx_secondary_ctls_high; | ||
| 487 | u32 nested_vmx_pinbased_ctls_low; | ||
| 488 | u32 nested_vmx_pinbased_ctls_high; | ||
| 489 | u32 nested_vmx_exit_ctls_low; | ||
| 490 | u32 nested_vmx_exit_ctls_high; | ||
| 491 | u32 nested_vmx_entry_ctls_low; | ||
| 492 | u32 nested_vmx_entry_ctls_high; | ||
| 493 | u32 nested_vmx_misc_low; | ||
| 494 | u32 nested_vmx_misc_high; | ||
| 495 | u32 nested_vmx_ept_caps; | ||
| 496 | u32 nested_vmx_vpid_caps; | ||
| 497 | u64 nested_vmx_basic; | ||
| 498 | u64 nested_vmx_cr0_fixed0; | ||
| 499 | u64 nested_vmx_cr0_fixed1; | ||
| 500 | u64 nested_vmx_cr4_fixed0; | ||
| 501 | u64 nested_vmx_cr4_fixed1; | ||
| 502 | u64 nested_vmx_vmcs_enum; | ||
| 503 | u64 nested_vmx_vmfunc_controls; | ||
| 504 | 511 | ||
| 505 | /* SMM related state */ | 512 | /* SMM related state */ |
| 506 | struct { | 513 | struct { |
| @@ -691,6 +698,11 @@ enum segment_cache_field { | |||
| 691 | SEG_FIELD_NR = 4 | 698 | SEG_FIELD_NR = 4 |
| 692 | }; | 699 | }; |
| 693 | 700 | ||
| 701 | static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) | ||
| 702 | { | ||
| 703 | return container_of(kvm, struct kvm_vmx, kvm); | ||
| 704 | } | ||
| 705 | |||
| 694 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 706 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
| 695 | { | 707 | { |
| 696 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 708 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
| @@ -953,6 +965,7 @@ static struct vmcs_config { | |||
| 953 | u32 cpu_based_2nd_exec_ctrl; | 965 | u32 cpu_based_2nd_exec_ctrl; |
| 954 | u32 vmexit_ctrl; | 966 | u32 vmexit_ctrl; |
| 955 | u32 vmentry_ctrl; | 967 | u32 vmentry_ctrl; |
| 968 | struct nested_vmx_msrs nested; | ||
| 956 | } vmcs_config; | 969 | } vmcs_config; |
| 957 | 970 | ||
| 958 | static struct vmx_capability { | 971 | static struct vmx_capability { |
| @@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = { | |||
| 999 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, | 1012 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, |
| 1000 | }; | 1013 | }; |
| 1001 | 1014 | ||
| 1015 | DEFINE_STATIC_KEY_FALSE(enable_evmcs); | ||
| 1016 | |||
| 1017 | #define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs)) | ||
| 1018 | |||
| 1019 | #define KVM_EVMCS_VERSION 1 | ||
| 1020 | |||
| 1021 | #if IS_ENABLED(CONFIG_HYPERV) | ||
| 1022 | static bool __read_mostly enlightened_vmcs = true; | ||
| 1023 | module_param(enlightened_vmcs, bool, 0444); | ||
| 1024 | |||
| 1025 | static inline void evmcs_write64(unsigned long field, u64 value) | ||
| 1026 | { | ||
| 1027 | u16 clean_field; | ||
| 1028 | int offset = get_evmcs_offset(field, &clean_field); | ||
| 1029 | |||
| 1030 | if (offset < 0) | ||
| 1031 | return; | ||
| 1032 | |||
| 1033 | *(u64 *)((char *)current_evmcs + offset) = value; | ||
| 1034 | |||
| 1035 | current_evmcs->hv_clean_fields &= ~clean_field; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | static inline void evmcs_write32(unsigned long field, u32 value) | ||
| 1039 | { | ||
| 1040 | u16 clean_field; | ||
| 1041 | int offset = get_evmcs_offset(field, &clean_field); | ||
| 1042 | |||
| 1043 | if (offset < 0) | ||
| 1044 | return; | ||
| 1045 | |||
| 1046 | *(u32 *)((char *)current_evmcs + offset) = value; | ||
| 1047 | current_evmcs->hv_clean_fields &= ~clean_field; | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | static inline void evmcs_write16(unsigned long field, u16 value) | ||
| 1051 | { | ||
| 1052 | u16 clean_field; | ||
| 1053 | int offset = get_evmcs_offset(field, &clean_field); | ||
| 1054 | |||
| 1055 | if (offset < 0) | ||
| 1056 | return; | ||
| 1057 | |||
| 1058 | *(u16 *)((char *)current_evmcs + offset) = value; | ||
| 1059 | current_evmcs->hv_clean_fields &= ~clean_field; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | static inline u64 evmcs_read64(unsigned long field) | ||
| 1063 | { | ||
| 1064 | int offset = get_evmcs_offset(field, NULL); | ||
| 1065 | |||
| 1066 | if (offset < 0) | ||
| 1067 | return 0; | ||
| 1068 | |||
| 1069 | return *(u64 *)((char *)current_evmcs + offset); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | static inline u32 evmcs_read32(unsigned long field) | ||
| 1073 | { | ||
| 1074 | int offset = get_evmcs_offset(field, NULL); | ||
| 1075 | |||
| 1076 | if (offset < 0) | ||
| 1077 | return 0; | ||
| 1078 | |||
| 1079 | return *(u32 *)((char *)current_evmcs + offset); | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | static inline u16 evmcs_read16(unsigned long field) | ||
| 1083 | { | ||
| 1084 | int offset = get_evmcs_offset(field, NULL); | ||
| 1085 | |||
| 1086 | if (offset < 0) | ||
| 1087 | return 0; | ||
| 1088 | |||
| 1089 | return *(u16 *)((char *)current_evmcs + offset); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | static void evmcs_load(u64 phys_addr) | ||
| 1093 | { | ||
| 1094 | struct hv_vp_assist_page *vp_ap = | ||
| 1095 | hv_get_vp_assist_page(smp_processor_id()); | ||
| 1096 | |||
| 1097 | vp_ap->current_nested_vmcs = phys_addr; | ||
| 1098 | vp_ap->enlighten_vmentry = 1; | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) | ||
| 1102 | { | ||
| 1103 | /* | ||
| 1104 | * Enlightened VMCSv1 doesn't support these: | ||
| 1105 | * | ||
| 1106 | * POSTED_INTR_NV = 0x00000002, | ||
| 1107 | * GUEST_INTR_STATUS = 0x00000810, | ||
| 1108 | * APIC_ACCESS_ADDR = 0x00002014, | ||
| 1109 | * POSTED_INTR_DESC_ADDR = 0x00002016, | ||
| 1110 | * EOI_EXIT_BITMAP0 = 0x0000201c, | ||
| 1111 | * EOI_EXIT_BITMAP1 = 0x0000201e, | ||
| 1112 | * EOI_EXIT_BITMAP2 = 0x00002020, | ||
| 1113 | * EOI_EXIT_BITMAP3 = 0x00002022, | ||
| 1114 | */ | ||
| 1115 | vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | ||
| 1116 | vmcs_conf->cpu_based_2nd_exec_ctrl &= | ||
| 1117 | ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | ||
| 1118 | vmcs_conf->cpu_based_2nd_exec_ctrl &= | ||
| 1119 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
| 1120 | vmcs_conf->cpu_based_2nd_exec_ctrl &= | ||
| 1121 | ~SECONDARY_EXEC_APIC_REGISTER_VIRT; | ||
| 1122 | |||
| 1123 | /* | ||
| 1124 | * GUEST_PML_INDEX = 0x00000812, | ||
| 1125 | * PML_ADDRESS = 0x0000200e, | ||
| 1126 | */ | ||
| 1127 | vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML; | ||
| 1128 | |||
| 1129 | /* VM_FUNCTION_CONTROL = 0x00002018, */ | ||
| 1130 | vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC; | ||
| 1131 | |||
| 1132 | /* | ||
| 1133 | * EPTP_LIST_ADDRESS = 0x00002024, | ||
| 1134 | * VMREAD_BITMAP = 0x00002026, | ||
| 1135 | * VMWRITE_BITMAP = 0x00002028, | ||
| 1136 | */ | ||
| 1137 | vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
| 1138 | |||
| 1139 | /* | ||
| 1140 | * TSC_MULTIPLIER = 0x00002032, | ||
| 1141 | */ | ||
| 1142 | vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; | ||
| 1143 | |||
| 1144 | /* | ||
| 1145 | * PLE_GAP = 0x00004020, | ||
| 1146 | * PLE_WINDOW = 0x00004022, | ||
| 1147 | */ | ||
| 1148 | vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
| 1149 | |||
| 1150 | /* | ||
| 1151 | * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, | ||
| 1152 | */ | ||
| 1153 | vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | ||
| 1154 | |||
| 1155 | /* | ||
| 1156 | * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, | ||
| 1157 | * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, | ||
| 1158 | */ | ||
| 1159 | vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; | ||
| 1160 | vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; | ||
| 1161 | |||
| 1162 | /* | ||
| 1163 | * Currently unsupported in KVM: | ||
| 1164 | * GUEST_IA32_RTIT_CTL = 0x00002814, | ||
| 1165 | */ | ||
| 1166 | } | ||
| 1167 | #else /* !IS_ENABLED(CONFIG_HYPERV) */ | ||
| 1168 | static inline void evmcs_write64(unsigned long field, u64 value) {} | ||
| 1169 | static inline void evmcs_write32(unsigned long field, u32 value) {} | ||
| 1170 | static inline void evmcs_write16(unsigned long field, u16 value) {} | ||
| 1171 | static inline u64 evmcs_read64(unsigned long field) { return 0; } | ||
| 1172 | static inline u32 evmcs_read32(unsigned long field) { return 0; } | ||
| 1173 | static inline u16 evmcs_read16(unsigned long field) { return 0; } | ||
| 1174 | static inline void evmcs_load(u64 phys_addr) {} | ||
| 1175 | static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} | ||
| 1176 | #endif /* IS_ENABLED(CONFIG_HYPERV) */ | ||
| 1177 | |||
| 1002 | static inline bool is_exception_n(u32 intr_info, u8 vector) | 1178 | static inline bool is_exception_n(u32 intr_info, u8 vector) |
| 1003 | { | 1179 | { |
| 1004 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 1180 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| @@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info) | |||
| 1031 | return is_exception_n(intr_info, UD_VECTOR); | 1207 | return is_exception_n(intr_info, UD_VECTOR); |
| 1032 | } | 1208 | } |
| 1033 | 1209 | ||
| 1210 | static inline bool is_gp_fault(u32 intr_info) | ||
| 1211 | { | ||
| 1212 | return is_exception_n(intr_info, GP_VECTOR); | ||
| 1213 | } | ||
| 1214 | |||
| 1034 | static inline bool is_external_interrupt(u32 intr_info) | 1215 | static inline bool is_external_interrupt(u32 intr_info) |
| 1035 | { | 1216 | { |
| 1036 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) | 1217 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) |
| @@ -1320,7 +1501,7 @@ static inline bool report_flexpriority(void) | |||
| 1320 | 1501 | ||
| 1321 | static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) | 1502 | static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) |
| 1322 | { | 1503 | { |
| 1323 | return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low); | 1504 | return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low); |
| 1324 | } | 1505 | } |
| 1325 | 1506 | ||
| 1326 | static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) | 1507 | static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) |
| @@ -1341,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) | |||
| 1341 | PIN_BASED_VMX_PREEMPTION_TIMER; | 1522 | PIN_BASED_VMX_PREEMPTION_TIMER; |
| 1342 | } | 1523 | } |
| 1343 | 1524 | ||
| 1525 | static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12) | ||
| 1526 | { | ||
| 1527 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING; | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) | ||
| 1531 | { | ||
| 1532 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | ||
| 1533 | } | ||
| 1534 | |||
| 1344 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | 1535 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) |
| 1345 | { | 1536 | { |
| 1346 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | 1537 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); |
| @@ -1479,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs) | |||
| 1479 | u64 phys_addr = __pa(vmcs); | 1670 | u64 phys_addr = __pa(vmcs); |
| 1480 | u8 error; | 1671 | u8 error; |
| 1481 | 1672 | ||
| 1673 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1674 | return evmcs_load(phys_addr); | ||
| 1675 | |||
| 1482 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" | 1676 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
| 1483 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) | 1677 | : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) |
| 1484 | : "cc", "memory"); | 1678 | : "cc", "memory"); |
| @@ -1652,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) | |||
| 1652 | static __always_inline u16 vmcs_read16(unsigned long field) | 1846 | static __always_inline u16 vmcs_read16(unsigned long field) |
| 1653 | { | 1847 | { |
| 1654 | vmcs_check16(field); | 1848 | vmcs_check16(field); |
| 1849 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1850 | return evmcs_read16(field); | ||
| 1655 | return __vmcs_readl(field); | 1851 | return __vmcs_readl(field); |
| 1656 | } | 1852 | } |
| 1657 | 1853 | ||
| 1658 | static __always_inline u32 vmcs_read32(unsigned long field) | 1854 | static __always_inline u32 vmcs_read32(unsigned long field) |
| 1659 | { | 1855 | { |
| 1660 | vmcs_check32(field); | 1856 | vmcs_check32(field); |
| 1857 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1858 | return evmcs_read32(field); | ||
| 1661 | return __vmcs_readl(field); | 1859 | return __vmcs_readl(field); |
| 1662 | } | 1860 | } |
| 1663 | 1861 | ||
| 1664 | static __always_inline u64 vmcs_read64(unsigned long field) | 1862 | static __always_inline u64 vmcs_read64(unsigned long field) |
| 1665 | { | 1863 | { |
| 1666 | vmcs_check64(field); | 1864 | vmcs_check64(field); |
| 1865 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1866 | return evmcs_read64(field); | ||
| 1667 | #ifdef CONFIG_X86_64 | 1867 | #ifdef CONFIG_X86_64 |
| 1668 | return __vmcs_readl(field); | 1868 | return __vmcs_readl(field); |
| 1669 | #else | 1869 | #else |
| @@ -1674,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field) | |||
| 1674 | static __always_inline unsigned long vmcs_readl(unsigned long field) | 1874 | static __always_inline unsigned long vmcs_readl(unsigned long field) |
| 1675 | { | 1875 | { |
| 1676 | vmcs_checkl(field); | 1876 | vmcs_checkl(field); |
| 1877 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1878 | return evmcs_read64(field); | ||
| 1677 | return __vmcs_readl(field); | 1879 | return __vmcs_readl(field); |
| 1678 | } | 1880 | } |
| 1679 | 1881 | ||
| @@ -1697,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val | |||
| 1697 | static __always_inline void vmcs_write16(unsigned long field, u16 value) | 1899 | static __always_inline void vmcs_write16(unsigned long field, u16 value) |
| 1698 | { | 1900 | { |
| 1699 | vmcs_check16(field); | 1901 | vmcs_check16(field); |
| 1902 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1903 | return evmcs_write16(field, value); | ||
| 1904 | |||
| 1700 | __vmcs_writel(field, value); | 1905 | __vmcs_writel(field, value); |
| 1701 | } | 1906 | } |
| 1702 | 1907 | ||
| 1703 | static __always_inline void vmcs_write32(unsigned long field, u32 value) | 1908 | static __always_inline void vmcs_write32(unsigned long field, u32 value) |
| 1704 | { | 1909 | { |
| 1705 | vmcs_check32(field); | 1910 | vmcs_check32(field); |
| 1911 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1912 | return evmcs_write32(field, value); | ||
| 1913 | |||
| 1706 | __vmcs_writel(field, value); | 1914 | __vmcs_writel(field, value); |
| 1707 | } | 1915 | } |
| 1708 | 1916 | ||
| 1709 | static __always_inline void vmcs_write64(unsigned long field, u64 value) | 1917 | static __always_inline void vmcs_write64(unsigned long field, u64 value) |
| 1710 | { | 1918 | { |
| 1711 | vmcs_check64(field); | 1919 | vmcs_check64(field); |
| 1920 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1921 | return evmcs_write64(field, value); | ||
| 1922 | |||
| 1712 | __vmcs_writel(field, value); | 1923 | __vmcs_writel(field, value); |
| 1713 | #ifndef CONFIG_X86_64 | 1924 | #ifndef CONFIG_X86_64 |
| 1714 | asm volatile (""); | 1925 | asm volatile (""); |
| @@ -1719,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value) | |||
| 1719 | static __always_inline void vmcs_writel(unsigned long field, unsigned long value) | 1930 | static __always_inline void vmcs_writel(unsigned long field, unsigned long value) |
| 1720 | { | 1931 | { |
| 1721 | vmcs_checkl(field); | 1932 | vmcs_checkl(field); |
| 1933 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1934 | return evmcs_write64(field, value); | ||
| 1935 | |||
| 1722 | __vmcs_writel(field, value); | 1936 | __vmcs_writel(field, value); |
| 1723 | } | 1937 | } |
| 1724 | 1938 | ||
| @@ -1726,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) | |||
| 1726 | { | 1940 | { |
| 1727 | BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, | 1941 | BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, |
| 1728 | "vmcs_clear_bits does not support 64-bit fields"); | 1942 | "vmcs_clear_bits does not support 64-bit fields"); |
| 1943 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1944 | return evmcs_write32(field, evmcs_read32(field) & ~mask); | ||
| 1945 | |||
| 1729 | __vmcs_writel(field, __vmcs_readl(field) & ~mask); | 1946 | __vmcs_writel(field, __vmcs_readl(field) & ~mask); |
| 1730 | } | 1947 | } |
| 1731 | 1948 | ||
| @@ -1733,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) | |||
| 1733 | { | 1950 | { |
| 1734 | BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, | 1951 | BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, |
| 1735 | "vmcs_set_bits does not support 64-bit fields"); | 1952 | "vmcs_set_bits does not support 64-bit fields"); |
| 1953 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 1954 | return evmcs_write32(field, evmcs_read32(field) | mask); | ||
| 1955 | |||
| 1736 | __vmcs_writel(field, __vmcs_readl(field) | mask); | 1956 | __vmcs_writel(field, __vmcs_readl(field) | mask); |
| 1737 | } | 1957 | } |
| 1738 | 1958 | ||
| @@ -1864,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 1864 | 2084 | ||
| 1865 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | | 2085 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
| 1866 | (1u << DB_VECTOR) | (1u << AC_VECTOR); | 2086 | (1u << DB_VECTOR) | (1u << AC_VECTOR); |
| 2087 | /* | ||
| 2088 | * Guest access to VMware backdoor ports could legitimately | ||
| 2089 | * trigger #GP because of TSS I/O permission bitmap. | ||
| 2090 | * We intercept those #GP and allow access to them anyway | ||
| 2091 | * as VMware does. | ||
| 2092 | */ | ||
| 2093 | if (enable_vmware_backdoor) | ||
| 2094 | eb |= (1u << GP_VECTOR); | ||
| 1867 | if ((vcpu->guest_debug & | 2095 | if ((vcpu->guest_debug & |
| 1868 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == | 2096 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
| 1869 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) | 2097 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
| @@ -2129,6 +2357,9 @@ static unsigned long segment_base(u16 selector) | |||
| 2129 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 2357 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
| 2130 | { | 2358 | { |
| 2131 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2359 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2360 | #ifdef CONFIG_X86_64 | ||
| 2361 | int cpu = raw_smp_processor_id(); | ||
| 2362 | #endif | ||
| 2132 | int i; | 2363 | int i; |
| 2133 | 2364 | ||
| 2134 | if (vmx->host_state.loaded) | 2365 | if (vmx->host_state.loaded) |
| @@ -2141,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
| 2141 | */ | 2372 | */ |
| 2142 | vmx->host_state.ldt_sel = kvm_read_ldt(); | 2373 | vmx->host_state.ldt_sel = kvm_read_ldt(); |
| 2143 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; | 2374 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; |
| 2375 | |||
| 2376 | #ifdef CONFIG_X86_64 | ||
| 2377 | save_fsgs_for_kvm(); | ||
| 2378 | vmx->host_state.fs_sel = current->thread.fsindex; | ||
| 2379 | vmx->host_state.gs_sel = current->thread.gsindex; | ||
| 2380 | #else | ||
| 2144 | savesegment(fs, vmx->host_state.fs_sel); | 2381 | savesegment(fs, vmx->host_state.fs_sel); |
| 2382 | savesegment(gs, vmx->host_state.gs_sel); | ||
| 2383 | #endif | ||
| 2145 | if (!(vmx->host_state.fs_sel & 7)) { | 2384 | if (!(vmx->host_state.fs_sel & 7)) { |
| 2146 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); | 2385 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); |
| 2147 | vmx->host_state.fs_reload_needed = 0; | 2386 | vmx->host_state.fs_reload_needed = 0; |
| @@ -2149,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
| 2149 | vmcs_write16(HOST_FS_SELECTOR, 0); | 2388 | vmcs_write16(HOST_FS_SELECTOR, 0); |
| 2150 | vmx->host_state.fs_reload_needed = 1; | 2389 | vmx->host_state.fs_reload_needed = 1; |
| 2151 | } | 2390 | } |
| 2152 | savesegment(gs, vmx->host_state.gs_sel); | ||
| 2153 | if (!(vmx->host_state.gs_sel & 7)) | 2391 | if (!(vmx->host_state.gs_sel & 7)) |
| 2154 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); | 2392 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); |
| 2155 | else { | 2393 | else { |
| @@ -2160,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
| 2160 | #ifdef CONFIG_X86_64 | 2398 | #ifdef CONFIG_X86_64 |
| 2161 | savesegment(ds, vmx->host_state.ds_sel); | 2399 | savesegment(ds, vmx->host_state.ds_sel); |
| 2162 | savesegment(es, vmx->host_state.es_sel); | 2400 | savesegment(es, vmx->host_state.es_sel); |
| 2163 | #endif | ||
| 2164 | 2401 | ||
| 2165 | #ifdef CONFIG_X86_64 | 2402 | vmcs_writel(HOST_FS_BASE, current->thread.fsbase); |
| 2166 | vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); | 2403 | vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu)); |
| 2167 | vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); | ||
| 2168 | #else | ||
| 2169 | vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); | ||
| 2170 | vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); | ||
| 2171 | #endif | ||
| 2172 | 2404 | ||
| 2173 | #ifdef CONFIG_X86_64 | 2405 | vmx->msr_host_kernel_gs_base = current->thread.gsbase; |
| 2174 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
| 2175 | if (is_long_mode(&vmx->vcpu)) | 2406 | if (is_long_mode(&vmx->vcpu)) |
| 2176 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 2407 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
| 2408 | #else | ||
| 2409 | vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); | ||
| 2410 | vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); | ||
| 2177 | #endif | 2411 | #endif |
| 2178 | if (boot_cpu_has(X86_FEATURE_MPX)) | 2412 | if (boot_cpu_has(X86_FEATURE_MPX)) |
| 2179 | rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | 2413 | rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); |
| @@ -2532,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit | |||
| 2532 | return 0; | 2766 | return 0; |
| 2533 | } | 2767 | } |
| 2534 | 2768 | ||
| 2769 | static void vmx_clear_hlt(struct kvm_vcpu *vcpu) | ||
| 2770 | { | ||
| 2771 | /* | ||
| 2772 | * Ensure that we clear the HLT state in the VMCS. We don't need to | ||
| 2773 | * explicitly skip the instruction because if the HLT state is set, | ||
| 2774 | * then the instruction is already executing and RIP has already been | ||
| 2775 | * advanced. | ||
| 2776 | */ | ||
| 2777 | if (kvm_hlt_in_guest(vcpu->kvm) && | ||
| 2778 | vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) | ||
| 2779 | vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); | ||
| 2780 | } | ||
| 2781 | |||
| 2535 | static void vmx_queue_exception(struct kvm_vcpu *vcpu) | 2782 | static void vmx_queue_exception(struct kvm_vcpu *vcpu) |
| 2536 | { | 2783 | { |
| 2537 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2784 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -2554,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) | |||
| 2554 | return; | 2801 | return; |
| 2555 | } | 2802 | } |
| 2556 | 2803 | ||
| 2804 | WARN_ON_ONCE(vmx->emulation_required); | ||
| 2805 | |||
| 2557 | if (kvm_exception_is_soft(nr)) { | 2806 | if (kvm_exception_is_soft(nr)) { |
| 2558 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 2807 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
| 2559 | vmx->vcpu.arch.event_exit_inst_len); | 2808 | vmx->vcpu.arch.event_exit_inst_len); |
| @@ -2562,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) | |||
| 2562 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 2811 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
| 2563 | 2812 | ||
| 2564 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 2813 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
| 2814 | |||
| 2815 | vmx_clear_hlt(vcpu); | ||
| 2565 | } | 2816 | } |
| 2566 | 2817 | ||
| 2567 | static bool vmx_rdtscp_supported(void) | 2818 | static bool vmx_rdtscp_supported(void) |
| @@ -2689,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | |||
| 2689 | * bit in the high half is on if the corresponding bit in the control field | 2940 | * bit in the high half is on if the corresponding bit in the control field |
| 2690 | * may be on. See also vmx_control_verify(). | 2941 | * may be on. See also vmx_control_verify(). |
| 2691 | */ | 2942 | */ |
| 2692 | static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | 2943 | static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) |
| 2693 | { | 2944 | { |
| 2945 | if (!nested) { | ||
| 2946 | memset(msrs, 0, sizeof(*msrs)); | ||
| 2947 | return; | ||
| 2948 | } | ||
| 2949 | |||
| 2694 | /* | 2950 | /* |
| 2695 | * Note that as a general rule, the high half of the MSRs (bits in | 2951 | * Note that as a general rule, the high half of the MSRs (bits in |
| 2696 | * the control fields which may be 1) should be initialized by the | 2952 | * the control fields which may be 1) should be initialized by the |
| @@ -2708,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2708 | 2964 | ||
| 2709 | /* pin-based controls */ | 2965 | /* pin-based controls */ |
| 2710 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | 2966 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, |
| 2711 | vmx->nested.nested_vmx_pinbased_ctls_low, | 2967 | msrs->pinbased_ctls_low, |
| 2712 | vmx->nested.nested_vmx_pinbased_ctls_high); | 2968 | msrs->pinbased_ctls_high); |
| 2713 | vmx->nested.nested_vmx_pinbased_ctls_low |= | 2969 | msrs->pinbased_ctls_low |= |
| 2714 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2970 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
| 2715 | vmx->nested.nested_vmx_pinbased_ctls_high &= | 2971 | msrs->pinbased_ctls_high &= |
| 2716 | PIN_BASED_EXT_INTR_MASK | | 2972 | PIN_BASED_EXT_INTR_MASK | |
| 2717 | PIN_BASED_NMI_EXITING | | 2973 | PIN_BASED_NMI_EXITING | |
| 2718 | PIN_BASED_VIRTUAL_NMIS; | 2974 | PIN_BASED_VIRTUAL_NMIS | |
| 2719 | vmx->nested.nested_vmx_pinbased_ctls_high |= | 2975 | (apicv ? PIN_BASED_POSTED_INTR : 0); |
| 2976 | msrs->pinbased_ctls_high |= | ||
| 2720 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2977 | PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
| 2721 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2978 | PIN_BASED_VMX_PREEMPTION_TIMER; |
| 2722 | if (kvm_vcpu_apicv_active(&vmx->vcpu)) | ||
| 2723 | vmx->nested.nested_vmx_pinbased_ctls_high |= | ||
| 2724 | PIN_BASED_POSTED_INTR; | ||
| 2725 | 2979 | ||
| 2726 | /* exit controls */ | 2980 | /* exit controls */ |
| 2727 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | 2981 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, |
| 2728 | vmx->nested.nested_vmx_exit_ctls_low, | 2982 | msrs->exit_ctls_low, |
| 2729 | vmx->nested.nested_vmx_exit_ctls_high); | 2983 | msrs->exit_ctls_high); |
| 2730 | vmx->nested.nested_vmx_exit_ctls_low = | 2984 | msrs->exit_ctls_low = |
| 2731 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2985 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
| 2732 | 2986 | ||
| 2733 | vmx->nested.nested_vmx_exit_ctls_high &= | 2987 | msrs->exit_ctls_high &= |
| 2734 | #ifdef CONFIG_X86_64 | 2988 | #ifdef CONFIG_X86_64 |
| 2735 | VM_EXIT_HOST_ADDR_SPACE_SIZE | | 2989 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
| 2736 | #endif | 2990 | #endif |
| 2737 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; | 2991 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
| 2738 | vmx->nested.nested_vmx_exit_ctls_high |= | 2992 | msrs->exit_ctls_high |= |
| 2739 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | 2993 | VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | |
| 2740 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | | 2994 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | |
| 2741 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; | 2995 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; |
| 2742 | 2996 | ||
| 2743 | if (kvm_mpx_supported()) | 2997 | if (kvm_mpx_supported()) |
| 2744 | vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; | 2998 | msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
| 2745 | 2999 | ||
| 2746 | /* We support free control of debug control saving. */ | 3000 | /* We support free control of debug control saving. */ |
| 2747 | vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; | 3001 | msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; |
| 2748 | 3002 | ||
| 2749 | /* entry controls */ | 3003 | /* entry controls */ |
| 2750 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 3004 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
| 2751 | vmx->nested.nested_vmx_entry_ctls_low, | 3005 | msrs->entry_ctls_low, |
| 2752 | vmx->nested.nested_vmx_entry_ctls_high); | 3006 | msrs->entry_ctls_high); |
| 2753 | vmx->nested.nested_vmx_entry_ctls_low = | 3007 | msrs->entry_ctls_low = |
| 2754 | VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 3008 | VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
| 2755 | vmx->nested.nested_vmx_entry_ctls_high &= | 3009 | msrs->entry_ctls_high &= |
| 2756 | #ifdef CONFIG_X86_64 | 3010 | #ifdef CONFIG_X86_64 |
| 2757 | VM_ENTRY_IA32E_MODE | | 3011 | VM_ENTRY_IA32E_MODE | |
| 2758 | #endif | 3012 | #endif |
| 2759 | VM_ENTRY_LOAD_IA32_PAT; | 3013 | VM_ENTRY_LOAD_IA32_PAT; |
| 2760 | vmx->nested.nested_vmx_entry_ctls_high |= | 3014 | msrs->entry_ctls_high |= |
| 2761 | (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); | 3015 | (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); |
| 2762 | if (kvm_mpx_supported()) | 3016 | if (kvm_mpx_supported()) |
| 2763 | vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | 3017 | msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; |
| 2764 | 3018 | ||
| 2765 | /* We support free control of debug control loading. */ | 3019 | /* We support free control of debug control loading. */ |
| 2766 | vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; | 3020 | msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; |
| 2767 | 3021 | ||
| 2768 | /* cpu-based controls */ | 3022 | /* cpu-based controls */ |
| 2769 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 3023 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
| 2770 | vmx->nested.nested_vmx_procbased_ctls_low, | 3024 | msrs->procbased_ctls_low, |
| 2771 | vmx->nested.nested_vmx_procbased_ctls_high); | 3025 | msrs->procbased_ctls_high); |
| 2772 | vmx->nested.nested_vmx_procbased_ctls_low = | 3026 | msrs->procbased_ctls_low = |
| 2773 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 3027 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
| 2774 | vmx->nested.nested_vmx_procbased_ctls_high &= | 3028 | msrs->procbased_ctls_high &= |
| 2775 | CPU_BASED_VIRTUAL_INTR_PENDING | | 3029 | CPU_BASED_VIRTUAL_INTR_PENDING | |
| 2776 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | | 3030 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | |
| 2777 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | | 3031 | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | |
| @@ -2791,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2791 | * can use it to avoid exits to L1 - even when L0 runs L2 | 3045 | * can use it to avoid exits to L1 - even when L0 runs L2 |
| 2792 | * without MSR bitmaps. | 3046 | * without MSR bitmaps. |
| 2793 | */ | 3047 | */ |
| 2794 | vmx->nested.nested_vmx_procbased_ctls_high |= | 3048 | msrs->procbased_ctls_high |= |
| 2795 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 3049 | CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
| 2796 | CPU_BASED_USE_MSR_BITMAPS; | 3050 | CPU_BASED_USE_MSR_BITMAPS; |
| 2797 | 3051 | ||
| 2798 | /* We support free control of CR3 access interception. */ | 3052 | /* We support free control of CR3 access interception. */ |
| 2799 | vmx->nested.nested_vmx_procbased_ctls_low &= | 3053 | msrs->procbased_ctls_low &= |
| 2800 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); | 3054 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); |
| 2801 | 3055 | ||
| 2802 | /* | 3056 | /* |
| @@ -2804,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2804 | * depend on CPUID bits, they are added later by vmx_cpuid_update. | 3058 | * depend on CPUID bits, they are added later by vmx_cpuid_update. |
| 2805 | */ | 3059 | */ |
| 2806 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 3060 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
| 2807 | vmx->nested.nested_vmx_secondary_ctls_low, | 3061 | msrs->secondary_ctls_low, |
| 2808 | vmx->nested.nested_vmx_secondary_ctls_high); | 3062 | msrs->secondary_ctls_high); |
| 2809 | vmx->nested.nested_vmx_secondary_ctls_low = 0; | 3063 | msrs->secondary_ctls_low = 0; |
| 2810 | vmx->nested.nested_vmx_secondary_ctls_high &= | 3064 | msrs->secondary_ctls_high &= |
| 2811 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | | 3065 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
| 2812 | SECONDARY_EXEC_DESC | | 3066 | SECONDARY_EXEC_DESC | |
| 2813 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | | 3067 | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | |
| @@ -2817,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2817 | 3071 | ||
| 2818 | if (enable_ept) { | 3072 | if (enable_ept) { |
| 2819 | /* nested EPT: emulate EPT also to L1 */ | 3073 | /* nested EPT: emulate EPT also to L1 */ |
| 2820 | vmx->nested.nested_vmx_secondary_ctls_high |= | 3074 | msrs->secondary_ctls_high |= |
| 2821 | SECONDARY_EXEC_ENABLE_EPT; | 3075 | SECONDARY_EXEC_ENABLE_EPT; |
| 2822 | vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | | 3076 | msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT | |
| 2823 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; | 3077 | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; |
| 2824 | if (cpu_has_vmx_ept_execute_only()) | 3078 | if (cpu_has_vmx_ept_execute_only()) |
| 2825 | vmx->nested.nested_vmx_ept_caps |= | 3079 | msrs->ept_caps |= |
| 2826 | VMX_EPT_EXECUTE_ONLY_BIT; | 3080 | VMX_EPT_EXECUTE_ONLY_BIT; |
| 2827 | vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; | 3081 | msrs->ept_caps &= vmx_capability.ept; |
| 2828 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | | 3082 | msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | |
| 2829 | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | | 3083 | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | |
| 2830 | VMX_EPT_1GB_PAGE_BIT; | 3084 | VMX_EPT_1GB_PAGE_BIT; |
| 2831 | if (enable_ept_ad_bits) { | 3085 | if (enable_ept_ad_bits) { |
| 2832 | vmx->nested.nested_vmx_secondary_ctls_high |= | 3086 | msrs->secondary_ctls_high |= |
| 2833 | SECONDARY_EXEC_ENABLE_PML; | 3087 | SECONDARY_EXEC_ENABLE_PML; |
| 2834 | vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; | 3088 | msrs->ept_caps |= VMX_EPT_AD_BIT; |
| 2835 | } | 3089 | } |
| 2836 | } | 3090 | } |
| 2837 | 3091 | ||
| 2838 | if (cpu_has_vmx_vmfunc()) { | 3092 | if (cpu_has_vmx_vmfunc()) { |
| 2839 | vmx->nested.nested_vmx_secondary_ctls_high |= | 3093 | msrs->secondary_ctls_high |= |
| 2840 | SECONDARY_EXEC_ENABLE_VMFUNC; | 3094 | SECONDARY_EXEC_ENABLE_VMFUNC; |
| 2841 | /* | 3095 | /* |
| 2842 | * Advertise EPTP switching unconditionally | 3096 | * Advertise EPTP switching unconditionally |
| 2843 | * since we emulate it | 3097 | * since we emulate it |
| 2844 | */ | 3098 | */ |
| 2845 | if (enable_ept) | 3099 | if (enable_ept) |
| 2846 | vmx->nested.nested_vmx_vmfunc_controls = | 3100 | msrs->vmfunc_controls = |
| 2847 | VMX_VMFUNC_EPTP_SWITCHING; | 3101 | VMX_VMFUNC_EPTP_SWITCHING; |
| 2848 | } | 3102 | } |
| 2849 | 3103 | ||
| @@ -2854,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2854 | * not failing the single-context invvpid, and it is worse. | 3108 | * not failing the single-context invvpid, and it is worse. |
| 2855 | */ | 3109 | */ |
| 2856 | if (enable_vpid) { | 3110 | if (enable_vpid) { |
| 2857 | vmx->nested.nested_vmx_secondary_ctls_high |= | 3111 | msrs->secondary_ctls_high |= |
| 2858 | SECONDARY_EXEC_ENABLE_VPID; | 3112 | SECONDARY_EXEC_ENABLE_VPID; |
| 2859 | vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | | 3113 | msrs->vpid_caps = VMX_VPID_INVVPID_BIT | |
| 2860 | VMX_VPID_EXTENT_SUPPORTED_MASK; | 3114 | VMX_VPID_EXTENT_SUPPORTED_MASK; |
| 2861 | } | 3115 | } |
| 2862 | 3116 | ||
| 2863 | if (enable_unrestricted_guest) | 3117 | if (enable_unrestricted_guest) |
| 2864 | vmx->nested.nested_vmx_secondary_ctls_high |= | 3118 | msrs->secondary_ctls_high |= |
| 2865 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 3119 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 2866 | 3120 | ||
| 2867 | /* miscellaneous data */ | 3121 | /* miscellaneous data */ |
| 2868 | rdmsr(MSR_IA32_VMX_MISC, | 3122 | rdmsr(MSR_IA32_VMX_MISC, |
| 2869 | vmx->nested.nested_vmx_misc_low, | 3123 | msrs->misc_low, |
| 2870 | vmx->nested.nested_vmx_misc_high); | 3124 | msrs->misc_high); |
| 2871 | vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; | 3125 | msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; |
| 2872 | vmx->nested.nested_vmx_misc_low |= | 3126 | msrs->misc_low |= |
| 2873 | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | | 3127 | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | |
| 2874 | VMX_MISC_ACTIVITY_HLT; | 3128 | VMX_MISC_ACTIVITY_HLT; |
| 2875 | vmx->nested.nested_vmx_misc_high = 0; | 3129 | msrs->misc_high = 0; |
| 2876 | 3130 | ||
| 2877 | /* | 3131 | /* |
| 2878 | * This MSR reports some information about VMX support. We | 3132 | * This MSR reports some information about VMX support. We |
| @@ -2880,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2880 | * guest, and the VMCS structure we give it - not about the | 3134 | * guest, and the VMCS structure we give it - not about the |
| 2881 | * VMX support of the underlying hardware. | 3135 | * VMX support of the underlying hardware. |
| 2882 | */ | 3136 | */ |
| 2883 | vmx->nested.nested_vmx_basic = | 3137 | msrs->basic = |
| 2884 | VMCS12_REVISION | | 3138 | VMCS12_REVISION | |
| 2885 | VMX_BASIC_TRUE_CTLS | | 3139 | VMX_BASIC_TRUE_CTLS | |
| 2886 | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | | 3140 | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | |
| 2887 | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); | 3141 | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); |
| 2888 | 3142 | ||
| 2889 | if (cpu_has_vmx_basic_inout()) | 3143 | if (cpu_has_vmx_basic_inout()) |
| 2890 | vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT; | 3144 | msrs->basic |= VMX_BASIC_INOUT; |
| 2891 | 3145 | ||
| 2892 | /* | 3146 | /* |
| 2893 | * These MSRs specify bits which the guest must keep fixed on | 3147 | * These MSRs specify bits which the guest must keep fixed on |
| @@ -2896,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) | |||
| 2896 | */ | 3150 | */ |
| 2897 | #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) | 3151 | #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) |
| 2898 | #define VMXON_CR4_ALWAYSON X86_CR4_VMXE | 3152 | #define VMXON_CR4_ALWAYSON X86_CR4_VMXE |
| 2899 | vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON; | 3153 | msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; |
| 2900 | vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON; | 3154 | msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; |
| 2901 | 3155 | ||
| 2902 | /* These MSRs specify bits which the guest must keep fixed off. */ | 3156 | /* These MSRs specify bits which the guest must keep fixed off. */ |
| 2903 | rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1); | 3157 | rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); |
| 2904 | rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1); | 3158 | rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); |
| 2905 | 3159 | ||
| 2906 | /* highest index: VMX_PREEMPTION_TIMER_VALUE */ | 3160 | /* highest index: VMX_PREEMPTION_TIMER_VALUE */ |
| 2907 | vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; | 3161 | msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; |
| 2908 | } | 3162 | } |
| 2909 | 3163 | ||
| 2910 | /* | 3164 | /* |
| @@ -2941,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) | |||
| 2941 | BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | | 3195 | BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | |
| 2942 | /* reserved */ | 3196 | /* reserved */ |
| 2943 | BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); | 3197 | BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); |
| 2944 | u64 vmx_basic = vmx->nested.nested_vmx_basic; | 3198 | u64 vmx_basic = vmx->nested.msrs.basic; |
| 2945 | 3199 | ||
| 2946 | if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) | 3200 | if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) |
| 2947 | return -EINVAL; | 3201 | return -EINVAL; |
| @@ -2960,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) | |||
| 2960 | if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) | 3214 | if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) |
| 2961 | return -EINVAL; | 3215 | return -EINVAL; |
| 2962 | 3216 | ||
| 2963 | vmx->nested.nested_vmx_basic = data; | 3217 | vmx->nested.msrs.basic = data; |
| 2964 | return 0; | 3218 | return 0; |
| 2965 | } | 3219 | } |
| 2966 | 3220 | ||
| @@ -2972,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) | |||
| 2972 | 3226 | ||
| 2973 | switch (msr_index) { | 3227 | switch (msr_index) { |
| 2974 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: | 3228 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: |
| 2975 | lowp = &vmx->nested.nested_vmx_pinbased_ctls_low; | 3229 | lowp = &vmx->nested.msrs.pinbased_ctls_low; |
| 2976 | highp = &vmx->nested.nested_vmx_pinbased_ctls_high; | 3230 | highp = &vmx->nested.msrs.pinbased_ctls_high; |
| 2977 | break; | 3231 | break; |
| 2978 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: | 3232 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: |
| 2979 | lowp = &vmx->nested.nested_vmx_procbased_ctls_low; | 3233 | lowp = &vmx->nested.msrs.procbased_ctls_low; |
| 2980 | highp = &vmx->nested.nested_vmx_procbased_ctls_high; | 3234 | highp = &vmx->nested.msrs.procbased_ctls_high; |
| 2981 | break; | 3235 | break; |
| 2982 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: | 3236 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: |
| 2983 | lowp = &vmx->nested.nested_vmx_exit_ctls_low; | 3237 | lowp = &vmx->nested.msrs.exit_ctls_low; |
| 2984 | highp = &vmx->nested.nested_vmx_exit_ctls_high; | 3238 | highp = &vmx->nested.msrs.exit_ctls_high; |
| 2985 | break; | 3239 | break; |
| 2986 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: | 3240 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: |
| 2987 | lowp = &vmx->nested.nested_vmx_entry_ctls_low; | 3241 | lowp = &vmx->nested.msrs.entry_ctls_low; |
| 2988 | highp = &vmx->nested.nested_vmx_entry_ctls_high; | 3242 | highp = &vmx->nested.msrs.entry_ctls_high; |
| 2989 | break; | 3243 | break; |
| 2990 | case MSR_IA32_VMX_PROCBASED_CTLS2: | 3244 | case MSR_IA32_VMX_PROCBASED_CTLS2: |
| 2991 | lowp = &vmx->nested.nested_vmx_secondary_ctls_low; | 3245 | lowp = &vmx->nested.msrs.secondary_ctls_low; |
| 2992 | highp = &vmx->nested.nested_vmx_secondary_ctls_high; | 3246 | highp = &vmx->nested.msrs.secondary_ctls_high; |
| 2993 | break; | 3247 | break; |
| 2994 | default: | 3248 | default: |
| 2995 | BUG(); | 3249 | BUG(); |
| @@ -3020,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) | |||
| 3020 | GENMASK_ULL(13, 9) | BIT_ULL(31); | 3274 | GENMASK_ULL(13, 9) | BIT_ULL(31); |
| 3021 | u64 vmx_misc; | 3275 | u64 vmx_misc; |
| 3022 | 3276 | ||
| 3023 | vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low, | 3277 | vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, |
| 3024 | vmx->nested.nested_vmx_misc_high); | 3278 | vmx->nested.msrs.misc_high); |
| 3025 | 3279 | ||
| 3026 | if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) | 3280 | if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) |
| 3027 | return -EINVAL; | 3281 | return -EINVAL; |
| 3028 | 3282 | ||
| 3029 | if ((vmx->nested.nested_vmx_pinbased_ctls_high & | 3283 | if ((vmx->nested.msrs.pinbased_ctls_high & |
| 3030 | PIN_BASED_VMX_PREEMPTION_TIMER) && | 3284 | PIN_BASED_VMX_PREEMPTION_TIMER) && |
| 3031 | vmx_misc_preemption_timer_rate(data) != | 3285 | vmx_misc_preemption_timer_rate(data) != |
| 3032 | vmx_misc_preemption_timer_rate(vmx_misc)) | 3286 | vmx_misc_preemption_timer_rate(vmx_misc)) |
| @@ -3041,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) | |||
| 3041 | if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) | 3295 | if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) |
| 3042 | return -EINVAL; | 3296 | return -EINVAL; |
| 3043 | 3297 | ||
| 3044 | vmx->nested.nested_vmx_misc_low = data; | 3298 | vmx->nested.msrs.misc_low = data; |
| 3045 | vmx->nested.nested_vmx_misc_high = data >> 32; | 3299 | vmx->nested.msrs.misc_high = data >> 32; |
| 3046 | return 0; | 3300 | return 0; |
| 3047 | } | 3301 | } |
| 3048 | 3302 | ||
| @@ -3050,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) | |||
| 3050 | { | 3304 | { |
| 3051 | u64 vmx_ept_vpid_cap; | 3305 | u64 vmx_ept_vpid_cap; |
| 3052 | 3306 | ||
| 3053 | vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps, | 3307 | vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, |
| 3054 | vmx->nested.nested_vmx_vpid_caps); | 3308 | vmx->nested.msrs.vpid_caps); |
| 3055 | 3309 | ||
| 3056 | /* Every bit is either reserved or a feature bit. */ | 3310 | /* Every bit is either reserved or a feature bit. */ |
| 3057 | if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) | 3311 | if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) |
| 3058 | return -EINVAL; | 3312 | return -EINVAL; |
| 3059 | 3313 | ||
| 3060 | vmx->nested.nested_vmx_ept_caps = data; | 3314 | vmx->nested.msrs.ept_caps = data; |
| 3061 | vmx->nested.nested_vmx_vpid_caps = data >> 32; | 3315 | vmx->nested.msrs.vpid_caps = data >> 32; |
| 3062 | return 0; | 3316 | return 0; |
| 3063 | } | 3317 | } |
| 3064 | 3318 | ||
| @@ -3068,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) | |||
| 3068 | 3322 | ||
| 3069 | switch (msr_index) { | 3323 | switch (msr_index) { |
| 3070 | case MSR_IA32_VMX_CR0_FIXED0: | 3324 | case MSR_IA32_VMX_CR0_FIXED0: |
| 3071 | msr = &vmx->nested.nested_vmx_cr0_fixed0; | 3325 | msr = &vmx->nested.msrs.cr0_fixed0; |
| 3072 | break; | 3326 | break; |
| 3073 | case MSR_IA32_VMX_CR4_FIXED0: | 3327 | case MSR_IA32_VMX_CR4_FIXED0: |
| 3074 | msr = &vmx->nested.nested_vmx_cr4_fixed0; | 3328 | msr = &vmx->nested.msrs.cr4_fixed0; |
| 3075 | break; | 3329 | break; |
| 3076 | default: | 3330 | default: |
| 3077 | BUG(); | 3331 | BUG(); |
| @@ -3135,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 3135 | case MSR_IA32_VMX_EPT_VPID_CAP: | 3389 | case MSR_IA32_VMX_EPT_VPID_CAP: |
| 3136 | return vmx_restore_vmx_ept_vpid_cap(vmx, data); | 3390 | return vmx_restore_vmx_ept_vpid_cap(vmx, data); |
| 3137 | case MSR_IA32_VMX_VMCS_ENUM: | 3391 | case MSR_IA32_VMX_VMCS_ENUM: |
| 3138 | vmx->nested.nested_vmx_vmcs_enum = data; | 3392 | vmx->nested.msrs.vmcs_enum = data; |
| 3139 | return 0; | 3393 | return 0; |
| 3140 | default: | 3394 | default: |
| 3141 | /* | 3395 | /* |
| @@ -3146,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 3146 | } | 3400 | } |
| 3147 | 3401 | ||
| 3148 | /* Returns 0 on success, non-0 otherwise. */ | 3402 | /* Returns 0 on success, non-0 otherwise. */ |
| 3149 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 3403 | static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) |
| 3150 | { | 3404 | { |
| 3151 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 3152 | |||
| 3153 | switch (msr_index) { | 3405 | switch (msr_index) { |
| 3154 | case MSR_IA32_VMX_BASIC: | 3406 | case MSR_IA32_VMX_BASIC: |
| 3155 | *pdata = vmx->nested.nested_vmx_basic; | 3407 | *pdata = msrs->basic; |
| 3156 | break; | 3408 | break; |
| 3157 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: | 3409 | case MSR_IA32_VMX_TRUE_PINBASED_CTLS: |
| 3158 | case MSR_IA32_VMX_PINBASED_CTLS: | 3410 | case MSR_IA32_VMX_PINBASED_CTLS: |
| 3159 | *pdata = vmx_control_msr( | 3411 | *pdata = vmx_control_msr( |
| 3160 | vmx->nested.nested_vmx_pinbased_ctls_low, | 3412 | msrs->pinbased_ctls_low, |
| 3161 | vmx->nested.nested_vmx_pinbased_ctls_high); | 3413 | msrs->pinbased_ctls_high); |
| 3162 | if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) | 3414 | if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) |
| 3163 | *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 3415 | *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
| 3164 | break; | 3416 | break; |
| 3165 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: | 3417 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: |
| 3166 | case MSR_IA32_VMX_PROCBASED_CTLS: | 3418 | case MSR_IA32_VMX_PROCBASED_CTLS: |
| 3167 | *pdata = vmx_control_msr( | 3419 | *pdata = vmx_control_msr( |
| 3168 | vmx->nested.nested_vmx_procbased_ctls_low, | 3420 | msrs->procbased_ctls_low, |
| 3169 | vmx->nested.nested_vmx_procbased_ctls_high); | 3421 | msrs->procbased_ctls_high); |
| 3170 | if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) | 3422 | if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) |
| 3171 | *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 3423 | *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
| 3172 | break; | 3424 | break; |
| 3173 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: | 3425 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: |
| 3174 | case MSR_IA32_VMX_EXIT_CTLS: | 3426 | case MSR_IA32_VMX_EXIT_CTLS: |
| 3175 | *pdata = vmx_control_msr( | 3427 | *pdata = vmx_control_msr( |
| 3176 | vmx->nested.nested_vmx_exit_ctls_low, | 3428 | msrs->exit_ctls_low, |
| 3177 | vmx->nested.nested_vmx_exit_ctls_high); | 3429 | msrs->exit_ctls_high); |
| 3178 | if (msr_index == MSR_IA32_VMX_EXIT_CTLS) | 3430 | if (msr_index == MSR_IA32_VMX_EXIT_CTLS) |
| 3179 | *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 3431 | *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
| 3180 | break; | 3432 | break; |
| 3181 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: | 3433 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: |
| 3182 | case MSR_IA32_VMX_ENTRY_CTLS: | 3434 | case MSR_IA32_VMX_ENTRY_CTLS: |
| 3183 | *pdata = vmx_control_msr( | 3435 | *pdata = vmx_control_msr( |
| 3184 | vmx->nested.nested_vmx_entry_ctls_low, | 3436 | msrs->entry_ctls_low, |
| 3185 | vmx->nested.nested_vmx_entry_ctls_high); | 3437 | msrs->entry_ctls_high); |
| 3186 | if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) | 3438 | if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) |
| 3187 | *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 3439 | *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
| 3188 | break; | 3440 | break; |
| 3189 | case MSR_IA32_VMX_MISC: | 3441 | case MSR_IA32_VMX_MISC: |
| 3190 | *pdata = vmx_control_msr( | 3442 | *pdata = vmx_control_msr( |
| 3191 | vmx->nested.nested_vmx_misc_low, | 3443 | msrs->misc_low, |
| 3192 | vmx->nested.nested_vmx_misc_high); | 3444 | msrs->misc_high); |
| 3193 | break; | 3445 | break; |
| 3194 | case MSR_IA32_VMX_CR0_FIXED0: | 3446 | case MSR_IA32_VMX_CR0_FIXED0: |
| 3195 | *pdata = vmx->nested.nested_vmx_cr0_fixed0; | 3447 | *pdata = msrs->cr0_fixed0; |
| 3196 | break; | 3448 | break; |
| 3197 | case MSR_IA32_VMX_CR0_FIXED1: | 3449 | case MSR_IA32_VMX_CR0_FIXED1: |
| 3198 | *pdata = vmx->nested.nested_vmx_cr0_fixed1; | 3450 | *pdata = msrs->cr0_fixed1; |
| 3199 | break; | 3451 | break; |
| 3200 | case MSR_IA32_VMX_CR4_FIXED0: | 3452 | case MSR_IA32_VMX_CR4_FIXED0: |
| 3201 | *pdata = vmx->nested.nested_vmx_cr4_fixed0; | 3453 | *pdata = msrs->cr4_fixed0; |
| 3202 | break; | 3454 | break; |
| 3203 | case MSR_IA32_VMX_CR4_FIXED1: | 3455 | case MSR_IA32_VMX_CR4_FIXED1: |
| 3204 | *pdata = vmx->nested.nested_vmx_cr4_fixed1; | 3456 | *pdata = msrs->cr4_fixed1; |
| 3205 | break; | 3457 | break; |
| 3206 | case MSR_IA32_VMX_VMCS_ENUM: | 3458 | case MSR_IA32_VMX_VMCS_ENUM: |
| 3207 | *pdata = vmx->nested.nested_vmx_vmcs_enum; | 3459 | *pdata = msrs->vmcs_enum; |
| 3208 | break; | 3460 | break; |
| 3209 | case MSR_IA32_VMX_PROCBASED_CTLS2: | 3461 | case MSR_IA32_VMX_PROCBASED_CTLS2: |
| 3210 | *pdata = vmx_control_msr( | 3462 | *pdata = vmx_control_msr( |
| 3211 | vmx->nested.nested_vmx_secondary_ctls_low, | 3463 | msrs->secondary_ctls_low, |
| 3212 | vmx->nested.nested_vmx_secondary_ctls_high); | 3464 | msrs->secondary_ctls_high); |
| 3213 | break; | 3465 | break; |
| 3214 | case MSR_IA32_VMX_EPT_VPID_CAP: | 3466 | case MSR_IA32_VMX_EPT_VPID_CAP: |
| 3215 | *pdata = vmx->nested.nested_vmx_ept_caps | | 3467 | *pdata = msrs->ept_caps | |
| 3216 | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); | 3468 | ((u64)msrs->vpid_caps << 32); |
| 3217 | break; | 3469 | break; |
| 3218 | case MSR_IA32_VMX_VMFUNC: | 3470 | case MSR_IA32_VMX_VMFUNC: |
| 3219 | *pdata = vmx->nested.nested_vmx_vmfunc_controls; | 3471 | *pdata = msrs->vmfunc_controls; |
| 3220 | break; | 3472 | break; |
| 3221 | default: | 3473 | default: |
| 3222 | return 1; | 3474 | return 1; |
| @@ -3235,7 +3487,16 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, | |||
| 3235 | 3487 | ||
| 3236 | static int vmx_get_msr_feature(struct kvm_msr_entry *msr) | 3488 | static int vmx_get_msr_feature(struct kvm_msr_entry *msr) |
| 3237 | { | 3489 | { |
| 3238 | return 1; | 3490 | switch (msr->index) { |
| 3491 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
| 3492 | if (!nested) | ||
| 3493 | return 1; | ||
| 3494 | return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); | ||
| 3495 | default: | ||
| 3496 | return 1; | ||
| 3497 | } | ||
| 3498 | |||
| 3499 | return 0; | ||
| 3239 | } | 3500 | } |
| 3240 | 3501 | ||
| 3241 | /* | 3502 | /* |
| @@ -3309,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 3309 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | 3570 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: |
| 3310 | if (!nested_vmx_allowed(vcpu)) | 3571 | if (!nested_vmx_allowed(vcpu)) |
| 3311 | return 1; | 3572 | return 1; |
| 3312 | return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data); | 3573 | return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, |
| 3574 | &msr_info->data); | ||
| 3313 | case MSR_IA32_XSS: | 3575 | case MSR_IA32_XSS: |
| 3314 | if (!vmx_xsaves_supported()) | 3576 | if (!vmx_xsaves_supported()) |
| 3315 | return 1; | 3577 | return 1; |
| @@ -3602,6 +3864,14 @@ static int hardware_enable(void) | |||
| 3602 | if (cr4_read_shadow() & X86_CR4_VMXE) | 3864 | if (cr4_read_shadow() & X86_CR4_VMXE) |
| 3603 | return -EBUSY; | 3865 | return -EBUSY; |
| 3604 | 3866 | ||
| 3867 | /* | ||
| 3868 | * This can happen if we hot-added a CPU but failed to allocate | ||
| 3869 | * VP assist page for it. | ||
| 3870 | */ | ||
| 3871 | if (static_branch_unlikely(&enable_evmcs) && | ||
| 3872 | !hv_get_vp_assist_page(cpu)) | ||
| 3873 | return -EFAULT; | ||
| 3874 | |||
| 3605 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); | 3875 | INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); |
| 3606 | INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); | 3876 | INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); |
| 3607 | spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | 3877 | spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); |
| @@ -3700,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 3700 | u32 _vmexit_control = 0; | 3970 | u32 _vmexit_control = 0; |
| 3701 | u32 _vmentry_control = 0; | 3971 | u32 _vmentry_control = 0; |
| 3702 | 3972 | ||
| 3973 | memset(vmcs_conf, 0, sizeof(*vmcs_conf)); | ||
| 3703 | min = CPU_BASED_HLT_EXITING | | 3974 | min = CPU_BASED_HLT_EXITING | |
| 3704 | #ifdef CONFIG_X86_64 | 3975 | #ifdef CONFIG_X86_64 |
| 3705 | CPU_BASED_CR8_LOAD_EXITING | | 3976 | CPU_BASED_CR8_LOAD_EXITING | |
| @@ -3710,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 3710 | CPU_BASED_UNCOND_IO_EXITING | | 3981 | CPU_BASED_UNCOND_IO_EXITING | |
| 3711 | CPU_BASED_MOV_DR_EXITING | | 3982 | CPU_BASED_MOV_DR_EXITING | |
| 3712 | CPU_BASED_USE_TSC_OFFSETING | | 3983 | CPU_BASED_USE_TSC_OFFSETING | |
| 3984 | CPU_BASED_MWAIT_EXITING | | ||
| 3985 | CPU_BASED_MONITOR_EXITING | | ||
| 3713 | CPU_BASED_INVLPG_EXITING | | 3986 | CPU_BASED_INVLPG_EXITING | |
| 3714 | CPU_BASED_RDPMC_EXITING; | 3987 | CPU_BASED_RDPMC_EXITING; |
| 3715 | 3988 | ||
| 3716 | if (!kvm_mwait_in_guest()) | ||
| 3717 | min |= CPU_BASED_MWAIT_EXITING | | ||
| 3718 | CPU_BASED_MONITOR_EXITING; | ||
| 3719 | |||
| 3720 | opt = CPU_BASED_TPR_SHADOW | | 3989 | opt = CPU_BASED_TPR_SHADOW | |
| 3721 | CPU_BASED_USE_MSR_BITMAPS | | 3990 | CPU_BASED_USE_MSR_BITMAPS | |
| 3722 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 3991 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| @@ -3835,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 3835 | vmcs_conf->size = vmx_msr_high & 0x1fff; | 4104 | vmcs_conf->size = vmx_msr_high & 0x1fff; |
| 3836 | vmcs_conf->order = get_order(vmcs_conf->size); | 4105 | vmcs_conf->order = get_order(vmcs_conf->size); |
| 3837 | vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; | 4106 | vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; |
| 3838 | vmcs_conf->revision_id = vmx_msr_low; | 4107 | |
| 4108 | /* KVM supports Enlightened VMCS v1 only */ | ||
| 4109 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 4110 | vmcs_conf->revision_id = KVM_EVMCS_VERSION; | ||
| 4111 | else | ||
| 4112 | vmcs_conf->revision_id = vmx_msr_low; | ||
| 3839 | 4113 | ||
| 3840 | vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; | 4114 | vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; |
| 3841 | vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; | 4115 | vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; |
| @@ -3843,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 3843 | vmcs_conf->vmexit_ctrl = _vmexit_control; | 4117 | vmcs_conf->vmexit_ctrl = _vmexit_control; |
| 3844 | vmcs_conf->vmentry_ctrl = _vmentry_control; | 4118 | vmcs_conf->vmentry_ctrl = _vmentry_control; |
| 3845 | 4119 | ||
| 4120 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 4121 | evmcs_sanitize_exec_ctrls(vmcs_conf); | ||
| 4122 | |||
| 3846 | cpu_has_load_ia32_efer = | 4123 | cpu_has_load_ia32_efer = |
| 3847 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, | 4124 | allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, |
| 3848 | VM_ENTRY_LOAD_IA32_EFER) | 4125 | VM_ENTRY_LOAD_IA32_EFER) |
| @@ -4162,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 4162 | { | 4439 | { |
| 4163 | unsigned long flags; | 4440 | unsigned long flags; |
| 4164 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4441 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 4442 | struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); | ||
| 4165 | 4443 | ||
| 4166 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); | 4444 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); |
| 4167 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); | 4445 | vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); |
| @@ -4177,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 4177 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 4455 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
| 4178 | * vcpu. Warn the user that an update is overdue. | 4456 | * vcpu. Warn the user that an update is overdue. |
| 4179 | */ | 4457 | */ |
| 4180 | if (!vcpu->kvm->arch.tss_addr) | 4458 | if (!kvm_vmx->tss_addr) |
| 4181 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | 4459 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " |
| 4182 | "called before entering vcpu\n"); | 4460 | "called before entering vcpu\n"); |
| 4183 | 4461 | ||
| 4184 | vmx_segment_cache_clear(vmx); | 4462 | vmx_segment_cache_clear(vmx); |
| 4185 | 4463 | ||
| 4186 | vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); | 4464 | vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); |
| 4187 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 4465 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
| 4188 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 4466 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
| 4189 | 4467 | ||
| @@ -4291,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | |||
| 4291 | 4569 | ||
| 4292 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) | 4570 | static void vmx_decache_cr3(struct kvm_vcpu *vcpu) |
| 4293 | { | 4571 | { |
| 4294 | if (enable_ept && is_paging(vcpu)) | 4572 | if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) |
| 4295 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 4573 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
| 4296 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); | 4574 | __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); |
| 4297 | } | 4575 | } |
| @@ -4339,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) | |||
| 4339 | 4617 | ||
| 4340 | static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | 4618 | static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) |
| 4341 | { | 4619 | { |
| 4342 | u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; | 4620 | u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; |
| 4343 | u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; | 4621 | u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; |
| 4344 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4622 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
| 4345 | 4623 | ||
| 4346 | if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high & | 4624 | if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high & |
| 4347 | SECONDARY_EXEC_UNRESTRICTED_GUEST && | 4625 | SECONDARY_EXEC_UNRESTRICTED_GUEST && |
| 4348 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) | 4626 | nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) |
| 4349 | fixed0 &= ~(X86_CR0_PE | X86_CR0_PG); | 4627 | fixed0 &= ~(X86_CR0_PE | X86_CR0_PG); |
| @@ -4353,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | |||
| 4353 | 4631 | ||
| 4354 | static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) | 4632 | static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) |
| 4355 | { | 4633 | { |
| 4356 | u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; | 4634 | u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; |
| 4357 | u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; | 4635 | u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; |
| 4358 | 4636 | ||
| 4359 | return fixed_bits_valid(val, fixed0, fixed1); | 4637 | return fixed_bits_valid(val, fixed0, fixed1); |
| 4360 | } | 4638 | } |
| 4361 | 4639 | ||
| 4362 | static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) | 4640 | static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) |
| 4363 | { | 4641 | { |
| 4364 | u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0; | 4642 | u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; |
| 4365 | u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1; | 4643 | u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; |
| 4366 | 4644 | ||
| 4367 | return fixed_bits_valid(val, fixed0, fixed1); | 4645 | return fixed_bits_valid(val, fixed0, fixed1); |
| 4368 | } | 4646 | } |
| @@ -4428,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 4428 | } | 4706 | } |
| 4429 | #endif | 4707 | #endif |
| 4430 | 4708 | ||
| 4431 | if (enable_ept) | 4709 | if (enable_ept && !enable_unrestricted_guest) |
| 4432 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 4710 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
| 4433 | 4711 | ||
| 4434 | vmcs_writel(CR0_READ_SHADOW, cr0); | 4712 | vmcs_writel(CR0_READ_SHADOW, cr0); |
| @@ -4469,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 4469 | if (enable_ept) { | 4747 | if (enable_ept) { |
| 4470 | eptp = construct_eptp(vcpu, cr3); | 4748 | eptp = construct_eptp(vcpu, cr3); |
| 4471 | vmcs_write64(EPT_POINTER, eptp); | 4749 | vmcs_write64(EPT_POINTER, eptp); |
| 4472 | if (is_paging(vcpu) || is_guest_mode(vcpu)) | 4750 | if (enable_unrestricted_guest || is_paging(vcpu) || |
| 4751 | is_guest_mode(vcpu)) | ||
| 4473 | guest_cr3 = kvm_read_cr3(vcpu); | 4752 | guest_cr3 = kvm_read_cr3(vcpu); |
| 4474 | else | 4753 | else |
| 4475 | guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; | 4754 | guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr; |
| 4476 | ept_load_pdptrs(vcpu); | 4755 | ept_load_pdptrs(vcpu); |
| 4477 | } | 4756 | } |
| 4478 | 4757 | ||
| @@ -4487,11 +4766,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 4487 | * is in force while we are in guest mode. Do not let guests control | 4766 | * is in force while we are in guest mode. Do not let guests control |
| 4488 | * this bit, even if host CR4.MCE == 0. | 4767 | * this bit, even if host CR4.MCE == 0. |
| 4489 | */ | 4768 | */ |
| 4490 | unsigned long hw_cr4 = | 4769 | unsigned long hw_cr4; |
| 4491 | (cr4_read_shadow() & X86_CR4_MCE) | | 4770 | |
| 4492 | (cr4 & ~X86_CR4_MCE) | | 4771 | hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); |
| 4493 | (to_vmx(vcpu)->rmode.vm86_active ? | 4772 | if (enable_unrestricted_guest) |
| 4494 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 4773 | hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; |
| 4774 | else if (to_vmx(vcpu)->rmode.vm86_active) | ||
| 4775 | hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; | ||
| 4776 | else | ||
| 4777 | hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; | ||
| 4495 | 4778 | ||
| 4496 | if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { | 4779 | if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { |
| 4497 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, | 4780 | vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, |
| @@ -4517,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 4517 | return 1; | 4800 | return 1; |
| 4518 | 4801 | ||
| 4519 | vcpu->arch.cr4 = cr4; | 4802 | vcpu->arch.cr4 = cr4; |
| 4520 | if (enable_ept) { | 4803 | |
| 4521 | if (!is_paging(vcpu)) { | 4804 | if (!enable_unrestricted_guest) { |
| 4522 | hw_cr4 &= ~X86_CR4_PAE; | 4805 | if (enable_ept) { |
| 4523 | hw_cr4 |= X86_CR4_PSE; | 4806 | if (!is_paging(vcpu)) { |
| 4524 | } else if (!(cr4 & X86_CR4_PAE)) { | 4807 | hw_cr4 &= ~X86_CR4_PAE; |
| 4525 | hw_cr4 &= ~X86_CR4_PAE; | 4808 | hw_cr4 |= X86_CR4_PSE; |
| 4809 | } else if (!(cr4 & X86_CR4_PAE)) { | ||
| 4810 | hw_cr4 &= ~X86_CR4_PAE; | ||
| 4811 | } | ||
| 4526 | } | 4812 | } |
| 4527 | } | ||
| 4528 | 4813 | ||
| 4529 | if (!enable_unrestricted_guest && !is_paging(vcpu)) | ||
| 4530 | /* | 4814 | /* |
| 4531 | * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in | 4815 | * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in |
| 4532 | * hardware. To emulate this behavior, SMEP/SMAP/PKU needs | 4816 | * hardware. To emulate this behavior, SMEP/SMAP/PKU needs |
| @@ -4538,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 4538 | * If enable_unrestricted_guest, the CPU automatically | 4822 | * If enable_unrestricted_guest, the CPU automatically |
| 4539 | * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. | 4823 | * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. |
| 4540 | */ | 4824 | */ |
| 4541 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); | 4825 | if (!is_paging(vcpu)) |
| 4826 | hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); | ||
| 4827 | } | ||
| 4542 | 4828 | ||
| 4543 | vmcs_writel(CR4_READ_SHADOW, cr4); | 4829 | vmcs_writel(CR4_READ_SHADOW, cr4); |
| 4544 | vmcs_writel(GUEST_CR4, hw_cr4); | 4830 | vmcs_writel(GUEST_CR4, hw_cr4); |
| @@ -4906,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm) | |||
| 4906 | int idx, r; | 5192 | int idx, r; |
| 4907 | 5193 | ||
| 4908 | idx = srcu_read_lock(&kvm->srcu); | 5194 | idx = srcu_read_lock(&kvm->srcu); |
| 4909 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; | 5195 | fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; |
| 4910 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 5196 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
| 4911 | if (r < 0) | 5197 | if (r < 0) |
| 4912 | goto out; | 5198 | goto out; |
| @@ -4932,22 +5218,23 @@ out: | |||
| 4932 | 5218 | ||
| 4933 | static int init_rmode_identity_map(struct kvm *kvm) | 5219 | static int init_rmode_identity_map(struct kvm *kvm) |
| 4934 | { | 5220 | { |
| 5221 | struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); | ||
| 4935 | int i, idx, r = 0; | 5222 | int i, idx, r = 0; |
| 4936 | kvm_pfn_t identity_map_pfn; | 5223 | kvm_pfn_t identity_map_pfn; |
| 4937 | u32 tmp; | 5224 | u32 tmp; |
| 4938 | 5225 | ||
| 4939 | /* Protect kvm->arch.ept_identity_pagetable_done. */ | 5226 | /* Protect kvm_vmx->ept_identity_pagetable_done. */ |
| 4940 | mutex_lock(&kvm->slots_lock); | 5227 | mutex_lock(&kvm->slots_lock); |
| 4941 | 5228 | ||
| 4942 | if (likely(kvm->arch.ept_identity_pagetable_done)) | 5229 | if (likely(kvm_vmx->ept_identity_pagetable_done)) |
| 4943 | goto out2; | 5230 | goto out2; |
| 4944 | 5231 | ||
| 4945 | if (!kvm->arch.ept_identity_map_addr) | 5232 | if (!kvm_vmx->ept_identity_map_addr) |
| 4946 | kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 5233 | kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
| 4947 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 5234 | identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; |
| 4948 | 5235 | ||
| 4949 | r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, | 5236 | r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, |
| 4950 | kvm->arch.ept_identity_map_addr, PAGE_SIZE); | 5237 | kvm_vmx->ept_identity_map_addr, PAGE_SIZE); |
| 4951 | if (r < 0) | 5238 | if (r < 0) |
| 4952 | goto out2; | 5239 | goto out2; |
| 4953 | 5240 | ||
| @@ -4964,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
| 4964 | if (r < 0) | 5251 | if (r < 0) |
| 4965 | goto out; | 5252 | goto out; |
| 4966 | } | 5253 | } |
| 4967 | kvm->arch.ept_identity_pagetable_done = true; | 5254 | kvm_vmx->ept_identity_pagetable_done = true; |
| 4968 | 5255 | ||
| 4969 | out: | 5256 | out: |
| 4970 | srcu_read_unlock(&kvm->srcu, idx); | 5257 | srcu_read_unlock(&kvm->srcu, idx); |
| @@ -5500,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
| 5500 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 5787 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
| 5501 | CPU_BASED_CR3_LOAD_EXITING | | 5788 | CPU_BASED_CR3_LOAD_EXITING | |
| 5502 | CPU_BASED_INVLPG_EXITING; | 5789 | CPU_BASED_INVLPG_EXITING; |
| 5790 | if (kvm_mwait_in_guest(vmx->vcpu.kvm)) | ||
| 5791 | exec_control &= ~(CPU_BASED_MWAIT_EXITING | | ||
| 5792 | CPU_BASED_MONITOR_EXITING); | ||
| 5793 | if (kvm_hlt_in_guest(vmx->vcpu.kvm)) | ||
| 5794 | exec_control &= ~CPU_BASED_HLT_EXITING; | ||
| 5503 | return exec_control; | 5795 | return exec_control; |
| 5504 | } | 5796 | } |
| 5505 | 5797 | ||
| @@ -5533,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5533 | } | 5825 | } |
| 5534 | if (!enable_unrestricted_guest) | 5826 | if (!enable_unrestricted_guest) |
| 5535 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 5827 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 5536 | if (!ple_gap) | 5828 | if (kvm_pause_in_guest(vmx->vcpu.kvm)) |
| 5537 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 5829 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; |
| 5538 | if (!kvm_vcpu_apicv_active(vcpu)) | 5830 | if (!kvm_vcpu_apicv_active(vcpu)) |
| 5539 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 5831 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
| @@ -5565,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5565 | 5857 | ||
| 5566 | if (nested) { | 5858 | if (nested) { |
| 5567 | if (xsaves_enabled) | 5859 | if (xsaves_enabled) |
| 5568 | vmx->nested.nested_vmx_secondary_ctls_high |= | 5860 | vmx->nested.msrs.secondary_ctls_high |= |
| 5569 | SECONDARY_EXEC_XSAVES; | 5861 | SECONDARY_EXEC_XSAVES; |
| 5570 | else | 5862 | else |
| 5571 | vmx->nested.nested_vmx_secondary_ctls_high &= | 5863 | vmx->nested.msrs.secondary_ctls_high &= |
| 5572 | ~SECONDARY_EXEC_XSAVES; | 5864 | ~SECONDARY_EXEC_XSAVES; |
| 5573 | } | 5865 | } |
| 5574 | } | 5866 | } |
| @@ -5580,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5580 | 5872 | ||
| 5581 | if (nested) { | 5873 | if (nested) { |
| 5582 | if (rdtscp_enabled) | 5874 | if (rdtscp_enabled) |
| 5583 | vmx->nested.nested_vmx_secondary_ctls_high |= | 5875 | vmx->nested.msrs.secondary_ctls_high |= |
| 5584 | SECONDARY_EXEC_RDTSCP; | 5876 | SECONDARY_EXEC_RDTSCP; |
| 5585 | else | 5877 | else |
| 5586 | vmx->nested.nested_vmx_secondary_ctls_high &= | 5878 | vmx->nested.msrs.secondary_ctls_high &= |
| 5587 | ~SECONDARY_EXEC_RDTSCP; | 5879 | ~SECONDARY_EXEC_RDTSCP; |
| 5588 | } | 5880 | } |
| 5589 | } | 5881 | } |
| @@ -5601,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5601 | 5893 | ||
| 5602 | if (nested) { | 5894 | if (nested) { |
| 5603 | if (invpcid_enabled) | 5895 | if (invpcid_enabled) |
| 5604 | vmx->nested.nested_vmx_secondary_ctls_high |= | 5896 | vmx->nested.msrs.secondary_ctls_high |= |
| 5605 | SECONDARY_EXEC_ENABLE_INVPCID; | 5897 | SECONDARY_EXEC_ENABLE_INVPCID; |
| 5606 | else | 5898 | else |
| 5607 | vmx->nested.nested_vmx_secondary_ctls_high &= | 5899 | vmx->nested.msrs.secondary_ctls_high &= |
| 5608 | ~SECONDARY_EXEC_ENABLE_INVPCID; | 5900 | ~SECONDARY_EXEC_ENABLE_INVPCID; |
| 5609 | } | 5901 | } |
| 5610 | } | 5902 | } |
| @@ -5616,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5616 | 5908 | ||
| 5617 | if (nested) { | 5909 | if (nested) { |
| 5618 | if (rdrand_enabled) | 5910 | if (rdrand_enabled) |
| 5619 | vmx->nested.nested_vmx_secondary_ctls_high |= | 5911 | vmx->nested.msrs.secondary_ctls_high |= |
| 5620 | SECONDARY_EXEC_RDRAND_EXITING; | 5912 | SECONDARY_EXEC_RDRAND_EXITING; |
| 5621 | else | 5913 | else |
| 5622 | vmx->nested.nested_vmx_secondary_ctls_high &= | 5914 | vmx->nested.msrs.secondary_ctls_high &= |
| 5623 | ~SECONDARY_EXEC_RDRAND_EXITING; | 5915 | ~SECONDARY_EXEC_RDRAND_EXITING; |
| 5624 | } | 5916 | } |
| 5625 | } | 5917 | } |
| @@ -5631,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) | |||
| 5631 | 5923 | ||
| 5632 | if (nested) { | 5924 | if (nested) { |
| 5633 | if (rdseed_enabled) | 5925 | if (rdseed_enabled) |
| 5634 | vmx->nested.nested_vmx_secondary_ctls_high |= | 5926 | vmx->nested.msrs.secondary_ctls_high |= |
| 5635 | SECONDARY_EXEC_RDSEED_EXITING; | 5927 | SECONDARY_EXEC_RDSEED_EXITING; |
| 5636 | else | 5928 | else |
| 5637 | vmx->nested.nested_vmx_secondary_ctls_high &= | 5929 | vmx->nested.msrs.secondary_ctls_high &= |
| 5638 | ~SECONDARY_EXEC_RDSEED_EXITING; | 5930 | ~SECONDARY_EXEC_RDSEED_EXITING; |
| 5639 | } | 5931 | } |
| 5640 | } | 5932 | } |
| @@ -5696,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 5696 | vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); | 5988 | vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); |
| 5697 | } | 5989 | } |
| 5698 | 5990 | ||
| 5699 | if (ple_gap) { | 5991 | if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { |
| 5700 | vmcs_write32(PLE_GAP, ple_gap); | 5992 | vmcs_write32(PLE_GAP, ple_gap); |
| 5701 | vmx->ple_window = ple_window; | 5993 | vmx->ple_window = ple_window; |
| 5702 | vmx->ple_window_dirty = true; | 5994 | vmx->ple_window_dirty = true; |
| @@ -5861,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
| 5861 | update_exception_bitmap(vcpu); | 6153 | update_exception_bitmap(vcpu); |
| 5862 | 6154 | ||
| 5863 | vpid_sync_context(vmx->vpid); | 6155 | vpid_sync_context(vmx->vpid); |
| 6156 | if (init_event) | ||
| 6157 | vmx_clear_hlt(vcpu); | ||
| 5864 | } | 6158 | } |
| 5865 | 6159 | ||
| 5866 | /* | 6160 | /* |
| @@ -5885,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) | |||
| 5885 | 6179 | ||
| 5886 | static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) | 6180 | static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) |
| 5887 | { | 6181 | { |
| 5888 | return get_vmcs12(vcpu)->pin_based_vm_exec_control & | 6182 | return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); |
| 5889 | PIN_BASED_NMI_EXITING; | ||
| 5890 | } | 6183 | } |
| 5891 | 6184 | ||
| 5892 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 6185 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
| @@ -5932,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) | |||
| 5932 | } else | 6225 | } else |
| 5933 | intr |= INTR_TYPE_EXT_INTR; | 6226 | intr |= INTR_TYPE_EXT_INTR; |
| 5934 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | 6227 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); |
| 6228 | |||
| 6229 | vmx_clear_hlt(vcpu); | ||
| 5935 | } | 6230 | } |
| 5936 | 6231 | ||
| 5937 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 6232 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
| @@ -5962,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 5962 | 6257 | ||
| 5963 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 6258 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 5964 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 6259 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
| 6260 | |||
| 6261 | vmx_clear_hlt(vcpu); | ||
| 5965 | } | 6262 | } |
| 5966 | 6263 | ||
| 5967 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 6264 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
| @@ -6024,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
| 6024 | { | 6321 | { |
| 6025 | int ret; | 6322 | int ret; |
| 6026 | 6323 | ||
| 6324 | if (enable_unrestricted_guest) | ||
| 6325 | return 0; | ||
| 6326 | |||
| 6027 | ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, | 6327 | ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, |
| 6028 | PAGE_SIZE * 3); | 6328 | PAGE_SIZE * 3); |
| 6029 | if (ret) | 6329 | if (ret) |
| 6030 | return ret; | 6330 | return ret; |
| 6031 | kvm->arch.tss_addr = addr; | 6331 | to_kvm_vmx(kvm)->tss_addr = addr; |
| 6032 | return init_rmode_tss(kvm); | 6332 | return init_rmode_tss(kvm); |
| 6033 | } | 6333 | } |
| 6034 | 6334 | ||
| 6335 | static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) | ||
| 6336 | { | ||
| 6337 | to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; | ||
| 6338 | return 0; | ||
| 6339 | } | ||
| 6340 | |||
| 6035 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) | 6341 | static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) |
| 6036 | { | 6342 | { |
| 6037 | switch (vec) { | 6343 | switch (vec) { |
| @@ -6134,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
| 6134 | if (is_nmi(intr_info)) | 6440 | if (is_nmi(intr_info)) |
| 6135 | return 1; /* already handled by vmx_vcpu_run() */ | 6441 | return 1; /* already handled by vmx_vcpu_run() */ |
| 6136 | 6442 | ||
| 6137 | if (is_invalid_opcode(intr_info)) { | 6443 | if (is_invalid_opcode(intr_info)) |
| 6138 | er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); | 6444 | return handle_ud(vcpu); |
| 6139 | if (er == EMULATE_USER_EXIT) | ||
| 6140 | return 0; | ||
| 6141 | if (er != EMULATE_DONE) | ||
| 6142 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 6143 | return 1; | ||
| 6144 | } | ||
| 6145 | 6445 | ||
| 6146 | error_code = 0; | 6446 | error_code = 0; |
| 6147 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 6447 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
| 6148 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 6448 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 6149 | 6449 | ||
| 6450 | if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { | ||
| 6451 | WARN_ON_ONCE(!enable_vmware_backdoor); | ||
| 6452 | er = emulate_instruction(vcpu, | ||
| 6453 | EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); | ||
| 6454 | if (er == EMULATE_USER_EXIT) | ||
| 6455 | return 0; | ||
| 6456 | else if (er != EMULATE_DONE) | ||
| 6457 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | ||
| 6458 | return 1; | ||
| 6459 | } | ||
| 6460 | |||
| 6150 | /* | 6461 | /* |
| 6151 | * The #PF with PFEC.RSVD = 1 indicates the guest is accessing | 6462 | * The #PF with PFEC.RSVD = 1 indicates the guest is accessing |
| 6152 | * MMIO, it is better to report an internal error. | 6463 | * MMIO, it is better to report an internal error. |
| @@ -6232,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu) | |||
| 6232 | static int handle_io(struct kvm_vcpu *vcpu) | 6543 | static int handle_io(struct kvm_vcpu *vcpu) |
| 6233 | { | 6544 | { |
| 6234 | unsigned long exit_qualification; | 6545 | unsigned long exit_qualification; |
| 6235 | int size, in, string, ret; | 6546 | int size, in, string; |
| 6236 | unsigned port; | 6547 | unsigned port; |
| 6237 | 6548 | ||
| 6238 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6549 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 6239 | string = (exit_qualification & 16) != 0; | 6550 | string = (exit_qualification & 16) != 0; |
| 6240 | in = (exit_qualification & 8) != 0; | ||
| 6241 | 6551 | ||
| 6242 | ++vcpu->stat.io_exits; | 6552 | ++vcpu->stat.io_exits; |
| 6243 | 6553 | ||
| 6244 | if (string || in) | 6554 | if (string) |
| 6245 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; | 6555 | return emulate_instruction(vcpu, 0) == EMULATE_DONE; |
| 6246 | 6556 | ||
| 6247 | port = exit_qualification >> 16; | 6557 | port = exit_qualification >> 16; |
| 6248 | size = (exit_qualification & 7) + 1; | 6558 | size = (exit_qualification & 7) + 1; |
| 6559 | in = (exit_qualification & 8) != 0; | ||
| 6249 | 6560 | ||
| 6250 | ret = kvm_skip_emulated_instruction(vcpu); | 6561 | return kvm_fast_pio(vcpu, size, port, in); |
| 6251 | |||
| 6252 | /* | ||
| 6253 | * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered | ||
| 6254 | * KVM_EXIT_DEBUG here. | ||
| 6255 | */ | ||
| 6256 | return kvm_fast_pio_out(vcpu, size, port) && ret; | ||
| 6257 | } | 6562 | } |
| 6258 | 6563 | ||
| 6259 | static void | 6564 | static void |
| @@ -6344,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 6344 | err = handle_set_cr0(vcpu, val); | 6649 | err = handle_set_cr0(vcpu, val); |
| 6345 | return kvm_complete_insn_gp(vcpu, err); | 6650 | return kvm_complete_insn_gp(vcpu, err); |
| 6346 | case 3: | 6651 | case 3: |
| 6652 | WARN_ON_ONCE(enable_unrestricted_guest); | ||
| 6347 | err = kvm_set_cr3(vcpu, val); | 6653 | err = kvm_set_cr3(vcpu, val); |
| 6348 | return kvm_complete_insn_gp(vcpu, err); | 6654 | return kvm_complete_insn_gp(vcpu, err); |
| 6349 | case 4: | 6655 | case 4: |
| @@ -6376,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
| 6376 | case 1: /*mov from cr*/ | 6682 | case 1: /*mov from cr*/ |
| 6377 | switch (cr) { | 6683 | switch (cr) { |
| 6378 | case 3: | 6684 | case 3: |
| 6685 | WARN_ON_ONCE(enable_unrestricted_guest); | ||
| 6379 | val = kvm_read_cr3(vcpu); | 6686 | val = kvm_read_cr3(vcpu); |
| 6380 | kvm_register_write(vcpu, reg, val); | 6687 | kvm_register_write(vcpu, reg, val); |
| 6381 | trace_kvm_cr_read(cr, val); | 6688 | trace_kvm_cr_read(cr, val); |
| @@ -6769,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) | |||
| 6769 | 7076 | ||
| 6770 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | 7077 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
| 6771 | { | 7078 | { |
| 6772 | int ret; | ||
| 6773 | gpa_t gpa; | 7079 | gpa_t gpa; |
| 6774 | 7080 | ||
| 6775 | /* | 7081 | /* |
| @@ -6797,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) | |||
| 6797 | NULL, 0) == EMULATE_DONE; | 7103 | NULL, 0) == EMULATE_DONE; |
| 6798 | } | 7104 | } |
| 6799 | 7105 | ||
| 6800 | ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); | 7106 | return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); |
| 6801 | if (ret >= 0) | ||
| 6802 | return ret; | ||
| 6803 | |||
| 6804 | /* It is the real ept misconfig */ | ||
| 6805 | WARN_ON(1); | ||
| 6806 | |||
| 6807 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; | ||
| 6808 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | ||
| 6809 | |||
| 6810 | return 0; | ||
| 6811 | } | 7107 | } |
| 6812 | 7108 | ||
| 6813 | static int handle_nmi_window(struct kvm_vcpu *vcpu) | 7109 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
| @@ -6830,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 6830 | bool intr_window_requested; | 7126 | bool intr_window_requested; |
| 6831 | unsigned count = 130; | 7127 | unsigned count = 130; |
| 6832 | 7128 | ||
| 7129 | /* | ||
| 7130 | * We should never reach the point where we are emulating L2 | ||
| 7131 | * due to invalid guest state as that means we incorrectly | ||
| 7132 | * allowed a nested VMEntry with an invalid vmcs12. | ||
| 7133 | */ | ||
| 7134 | WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); | ||
| 7135 | |||
| 6833 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 7136 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 6834 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | 7137 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; |
| 6835 | 7138 | ||
| @@ -6848,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 6848 | goto out; | 7151 | goto out; |
| 6849 | } | 7152 | } |
| 6850 | 7153 | ||
| 6851 | if (err != EMULATE_DONE) { | 7154 | if (err != EMULATE_DONE) |
| 6852 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 7155 | goto emulation_error; |
| 6853 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 7156 | |
| 6854 | vcpu->run->internal.ndata = 0; | 7157 | if (vmx->emulation_required && !vmx->rmode.vm86_active && |
| 6855 | return 0; | 7158 | vcpu->arch.exception.pending) |
| 6856 | } | 7159 | goto emulation_error; |
| 6857 | 7160 | ||
| 6858 | if (vcpu->arch.halt_request) { | 7161 | if (vcpu->arch.halt_request) { |
| 6859 | vcpu->arch.halt_request = 0; | 7162 | vcpu->arch.halt_request = 0; |
| @@ -6869,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
| 6869 | 7172 | ||
| 6870 | out: | 7173 | out: |
| 6871 | return ret; | 7174 | return ret; |
| 6872 | } | ||
| 6873 | |||
| 6874 | static int __grow_ple_window(int val) | ||
| 6875 | { | ||
| 6876 | if (ple_window_grow < 1) | ||
| 6877 | return ple_window; | ||
| 6878 | |||
| 6879 | val = min(val, ple_window_actual_max); | ||
| 6880 | |||
| 6881 | if (ple_window_grow < ple_window) | ||
| 6882 | val *= ple_window_grow; | ||
| 6883 | else | ||
| 6884 | val += ple_window_grow; | ||
| 6885 | |||
| 6886 | return val; | ||
| 6887 | } | ||
| 6888 | 7175 | ||
| 6889 | static int __shrink_ple_window(int val, int modifier, int minimum) | 7176 | emulation_error: |
| 6890 | { | 7177 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
| 6891 | if (modifier < 1) | 7178 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
| 6892 | return ple_window; | 7179 | vcpu->run->internal.ndata = 0; |
| 6893 | 7180 | return 0; | |
| 6894 | if (modifier < ple_window) | ||
| 6895 | val /= modifier; | ||
| 6896 | else | ||
| 6897 | val -= modifier; | ||
| 6898 | |||
| 6899 | return max(val, minimum); | ||
| 6900 | } | 7181 | } |
| 6901 | 7182 | ||
| 6902 | static void grow_ple_window(struct kvm_vcpu *vcpu) | 7183 | static void grow_ple_window(struct kvm_vcpu *vcpu) |
| @@ -6904,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) | |||
| 6904 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7185 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 6905 | int old = vmx->ple_window; | 7186 | int old = vmx->ple_window; |
| 6906 | 7187 | ||
| 6907 | vmx->ple_window = __grow_ple_window(old); | 7188 | vmx->ple_window = __grow_ple_window(old, ple_window, |
| 7189 | ple_window_grow, | ||
| 7190 | ple_window_max); | ||
| 6908 | 7191 | ||
| 6909 | if (vmx->ple_window != old) | 7192 | if (vmx->ple_window != old) |
| 6910 | vmx->ple_window_dirty = true; | 7193 | vmx->ple_window_dirty = true; |
| @@ -6917,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) | |||
| 6917 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7200 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 6918 | int old = vmx->ple_window; | 7201 | int old = vmx->ple_window; |
| 6919 | 7202 | ||
| 6920 | vmx->ple_window = __shrink_ple_window(old, | 7203 | vmx->ple_window = __shrink_ple_window(old, ple_window, |
| 6921 | ple_window_shrink, ple_window); | 7204 | ple_window_shrink, |
| 7205 | ple_window); | ||
| 6922 | 7206 | ||
| 6923 | if (vmx->ple_window != old) | 7207 | if (vmx->ple_window != old) |
| 6924 | vmx->ple_window_dirty = true; | 7208 | vmx->ple_window_dirty = true; |
| @@ -6927,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) | |||
| 6927 | } | 7211 | } |
| 6928 | 7212 | ||
| 6929 | /* | 7213 | /* |
| 6930 | * ple_window_actual_max is computed to be one grow_ple_window() below | ||
| 6931 | * ple_window_max. (See __grow_ple_window for the reason.) | ||
| 6932 | * This prevents overflows, because ple_window_max is int. | ||
| 6933 | * ple_window_max effectively rounded down to a multiple of ple_window_grow in | ||
| 6934 | * this process. | ||
| 6935 | * ple_window_max is also prevented from setting vmx->ple_window < ple_window. | ||
| 6936 | */ | ||
| 6937 | static void update_ple_window_actual_max(void) | ||
| 6938 | { | ||
| 6939 | ple_window_actual_max = | ||
| 6940 | __shrink_ple_window(max(ple_window_max, ple_window), | ||
| 6941 | ple_window_grow, INT_MIN); | ||
| 6942 | } | ||
| 6943 | |||
| 6944 | /* | ||
| 6945 | * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. | 7214 | * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. |
| 6946 | */ | 7215 | */ |
| 6947 | static void wakeup_handler(void) | 7216 | static void wakeup_handler(void) |
| @@ -6960,7 +7229,7 @@ static void wakeup_handler(void) | |||
| 6960 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); | 7229 | spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); |
| 6961 | } | 7230 | } |
| 6962 | 7231 | ||
| 6963 | void vmx_enable_tdp(void) | 7232 | static void vmx_enable_tdp(void) |
| 6964 | { | 7233 | { |
| 6965 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, | 7234 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, |
| 6966 | enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, | 7235 | enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, |
| @@ -7061,8 +7330,6 @@ static __init int hardware_setup(void) | |||
| 7061 | else | 7330 | else |
| 7062 | kvm_disable_tdp(); | 7331 | kvm_disable_tdp(); |
| 7063 | 7332 | ||
| 7064 | update_ple_window_actual_max(); | ||
| 7065 | |||
| 7066 | /* | 7333 | /* |
| 7067 | * Only enable PML when hardware supports PML feature, and both EPT | 7334 | * Only enable PML when hardware supports PML feature, and both EPT |
| 7068 | * and EPT A/D bit features are enabled -- PML depends on them to work. | 7335 | * and EPT A/D bit features are enabled -- PML depends on them to work. |
| @@ -7094,6 +7361,7 @@ static __init int hardware_setup(void) | |||
| 7094 | init_vmcs_shadow_fields(); | 7361 | init_vmcs_shadow_fields(); |
| 7095 | 7362 | ||
| 7096 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); | 7363 | kvm_set_posted_intr_wakeup_handler(wakeup_handler); |
| 7364 | nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv); | ||
| 7097 | 7365 | ||
| 7098 | kvm_mce_cap_supported |= MCG_LMCE_P; | 7366 | kvm_mce_cap_supported |= MCG_LMCE_P; |
| 7099 | 7367 | ||
| @@ -7122,7 +7390,7 @@ static __exit void hardware_unsetup(void) | |||
| 7122 | */ | 7390 | */ |
| 7123 | static int handle_pause(struct kvm_vcpu *vcpu) | 7391 | static int handle_pause(struct kvm_vcpu *vcpu) |
| 7124 | { | 7392 | { |
| 7125 | if (ple_gap) | 7393 | if (!kvm_pause_in_guest(vcpu->kvm)) |
| 7126 | grow_ple_window(vcpu); | 7394 | grow_ple_window(vcpu); |
| 7127 | 7395 | ||
| 7128 | /* | 7396 | /* |
| @@ -7954,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
| 7954 | u64 eptp, gpa; | 8222 | u64 eptp, gpa; |
| 7955 | } operand; | 8223 | } operand; |
| 7956 | 8224 | ||
| 7957 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & | 8225 | if (!(vmx->nested.msrs.secondary_ctls_high & |
| 7958 | SECONDARY_EXEC_ENABLE_EPT) || | 8226 | SECONDARY_EXEC_ENABLE_EPT) || |
| 7959 | !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { | 8227 | !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { |
| 7960 | kvm_queue_exception(vcpu, UD_VECTOR); | 8228 | kvm_queue_exception(vcpu, UD_VECTOR); |
| 7961 | return 1; | 8229 | return 1; |
| 7962 | } | 8230 | } |
| @@ -7967,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
| 7967 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 8235 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
| 7968 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); | 8236 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); |
| 7969 | 8237 | ||
| 7970 | types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | 8238 | types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; |
| 7971 | 8239 | ||
| 7972 | if (type >= 32 || !(types & (1 << type))) { | 8240 | if (type >= 32 || !(types & (1 << type))) { |
| 7973 | nested_vmx_failValid(vcpu, | 8241 | nested_vmx_failValid(vcpu, |
| @@ -8018,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
| 8018 | u64 gla; | 8286 | u64 gla; |
| 8019 | } operand; | 8287 | } operand; |
| 8020 | 8288 | ||
| 8021 | if (!(vmx->nested.nested_vmx_secondary_ctls_high & | 8289 | if (!(vmx->nested.msrs.secondary_ctls_high & |
| 8022 | SECONDARY_EXEC_ENABLE_VPID) || | 8290 | SECONDARY_EXEC_ENABLE_VPID) || |
| 8023 | !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { | 8291 | !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { |
| 8024 | kvm_queue_exception(vcpu, UD_VECTOR); | 8292 | kvm_queue_exception(vcpu, UD_VECTOR); |
| 8025 | return 1; | 8293 | return 1; |
| 8026 | } | 8294 | } |
| @@ -8031,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
| 8031 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 8299 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
| 8032 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); | 8300 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); |
| 8033 | 8301 | ||
| 8034 | types = (vmx->nested.nested_vmx_vpid_caps & | 8302 | types = (vmx->nested.msrs.vpid_caps & |
| 8035 | VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; | 8303 | VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; |
| 8036 | 8304 | ||
| 8037 | if (type >= 32 || !(types & (1 << type))) { | 8305 | if (type >= 32 || !(types & (1 << type))) { |
| @@ -8125,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) | |||
| 8125 | /* Check for memory type validity */ | 8393 | /* Check for memory type validity */ |
| 8126 | switch (address & VMX_EPTP_MT_MASK) { | 8394 | switch (address & VMX_EPTP_MT_MASK) { |
| 8127 | case VMX_EPTP_MT_UC: | 8395 | case VMX_EPTP_MT_UC: |
| 8128 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) | 8396 | if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)) |
| 8129 | return false; | 8397 | return false; |
| 8130 | break; | 8398 | break; |
| 8131 | case VMX_EPTP_MT_WB: | 8399 | case VMX_EPTP_MT_WB: |
| 8132 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) | 8400 | if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)) |
| 8133 | return false; | 8401 | return false; |
| 8134 | break; | 8402 | break; |
| 8135 | default: | 8403 | default: |
| @@ -8146,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) | |||
| 8146 | 8414 | ||
| 8147 | /* AD, if set, should be supported */ | 8415 | /* AD, if set, should be supported */ |
| 8148 | if (address & VMX_EPTP_AD_ENABLE_BIT) { | 8416 | if (address & VMX_EPTP_AD_ENABLE_BIT) { |
| 8149 | if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) | 8417 | if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)) |
| 8150 | return false; | 8418 | return false; |
| 8151 | } | 8419 | } |
| 8152 | 8420 | ||
| @@ -8790,7 +9058,8 @@ static void dump_vmcs(void) | |||
| 8790 | pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", | 9058 | pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", |
| 8791 | vmcs_read64(GUEST_IA32_DEBUGCTL), | 9059 | vmcs_read64(GUEST_IA32_DEBUGCTL), |
| 8792 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); | 9060 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); |
| 8793 | if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) | 9061 | if (cpu_has_load_perf_global_ctrl && |
| 9062 | vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
| 8794 | pr_err("PerfGlobCtl = 0x%016llx\n", | 9063 | pr_err("PerfGlobCtl = 0x%016llx\n", |
| 8795 | vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); | 9064 | vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); |
| 8796 | if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) | 9065 | if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) |
| @@ -8826,7 +9095,8 @@ static void dump_vmcs(void) | |||
| 8826 | pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", | 9095 | pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", |
| 8827 | vmcs_read64(HOST_IA32_EFER), | 9096 | vmcs_read64(HOST_IA32_EFER), |
| 8828 | vmcs_read64(HOST_IA32_PAT)); | 9097 | vmcs_read64(HOST_IA32_PAT)); |
| 8829 | if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 9098 | if (cpu_has_load_perf_global_ctrl && |
| 9099 | vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | ||
| 8830 | pr_err("PerfGlobCtl = 0x%016llx\n", | 9100 | pr_err("PerfGlobCtl = 0x%016llx\n", |
| 8831 | vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); | 9101 | vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); |
| 8832 | 9102 | ||
| @@ -9178,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
| 9178 | 9448 | ||
| 9179 | /* We need to handle NMIs before interrupts are enabled */ | 9449 | /* We need to handle NMIs before interrupts are enabled */ |
| 9180 | if (is_nmi(exit_intr_info)) { | 9450 | if (is_nmi(exit_intr_info)) { |
| 9181 | kvm_before_handle_nmi(&vmx->vcpu); | 9451 | kvm_before_interrupt(&vmx->vcpu); |
| 9182 | asm("int $2"); | 9452 | asm("int $2"); |
| 9183 | kvm_after_handle_nmi(&vmx->vcpu); | 9453 | kvm_after_interrupt(&vmx->vcpu); |
| 9184 | } | 9454 | } |
| 9185 | } | 9455 | } |
| 9186 | 9456 | ||
| @@ -9403,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) | |||
| 9403 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | 9673 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
| 9404 | { | 9674 | { |
| 9405 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9675 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 9406 | unsigned long cr3, cr4; | 9676 | unsigned long cr3, cr4, evmcs_rsp; |
| 9407 | 9677 | ||
| 9408 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 9678 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
| 9409 | if (unlikely(!enable_vnmi && | 9679 | if (unlikely(!enable_vnmi && |
| @@ -9469,6 +9739,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9469 | native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | 9739 | native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
| 9470 | 9740 | ||
| 9471 | vmx->__launched = vmx->loaded_vmcs->launched; | 9741 | vmx->__launched = vmx->loaded_vmcs->launched; |
| 9742 | |||
| 9743 | evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? | ||
| 9744 | (unsigned long)¤t_evmcs->host_rsp : 0; | ||
| 9745 | |||
| 9472 | asm( | 9746 | asm( |
| 9473 | /* Store host registers */ | 9747 | /* Store host registers */ |
| 9474 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" | 9748 | "push %%" _ASM_DX "; push %%" _ASM_BP ";" |
| @@ -9477,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9477 | "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" | 9751 | "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
| 9478 | "je 1f \n\t" | 9752 | "je 1f \n\t" |
| 9479 | "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" | 9753 | "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" |
| 9754 | /* Avoid VMWRITE when Enlightened VMCS is in use */ | ||
| 9755 | "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" | ||
| 9756 | "jz 2f \n\t" | ||
| 9757 | "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" | ||
| 9758 | "jmp 1f \n\t" | ||
| 9759 | "2: \n\t" | ||
| 9480 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 9760 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
| 9481 | "1: \n\t" | 9761 | "1: \n\t" |
| 9482 | /* Reload cr2 if changed */ | 9762 | /* Reload cr2 if changed */ |
| 9483 | "mov %c[cr2](%0), %%" _ASM_AX " \n\t" | 9763 | "mov %c[cr2](%0), %%" _ASM_AX " \n\t" |
| 9484 | "mov %%cr2, %%" _ASM_DX " \n\t" | 9764 | "mov %%cr2, %%" _ASM_DX " \n\t" |
| 9485 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" | 9765 | "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" |
| 9486 | "je 2f \n\t" | 9766 | "je 3f \n\t" |
| 9487 | "mov %%" _ASM_AX", %%cr2 \n\t" | 9767 | "mov %%" _ASM_AX", %%cr2 \n\t" |
| 9488 | "2: \n\t" | 9768 | "3: \n\t" |
| 9489 | /* Check if vmlaunch of vmresume is needed */ | 9769 | /* Check if vmlaunch of vmresume is needed */ |
| 9490 | "cmpl $0, %c[launched](%0) \n\t" | 9770 | "cmpl $0, %c[launched](%0) \n\t" |
| 9491 | /* Load guest registers. Don't clobber flags. */ | 9771 | /* Load guest registers. Don't clobber flags. */ |
| @@ -9554,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9554 | ".global vmx_return \n\t" | 9834 | ".global vmx_return \n\t" |
| 9555 | "vmx_return: " _ASM_PTR " 2b \n\t" | 9835 | "vmx_return: " _ASM_PTR " 2b \n\t" |
| 9556 | ".popsection" | 9836 | ".popsection" |
| 9557 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 9837 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), |
| 9558 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), | 9838 | [launched]"i"(offsetof(struct vcpu_vmx, __launched)), |
| 9559 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 9839 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
| 9560 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | 9840 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), |
| @@ -9579,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9579 | [wordsize]"i"(sizeof(ulong)) | 9859 | [wordsize]"i"(sizeof(ulong)) |
| 9580 | : "cc", "memory" | 9860 | : "cc", "memory" |
| 9581 | #ifdef CONFIG_X86_64 | 9861 | #ifdef CONFIG_X86_64 |
| 9582 | , "rax", "rbx", "rdi", "rsi" | 9862 | , "rax", "rbx", "rdi" |
| 9583 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 9863 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
| 9584 | #else | 9864 | #else |
| 9585 | , "eax", "ebx", "edi", "esi" | 9865 | , "eax", "ebx", "edi" |
| 9586 | #endif | 9866 | #endif |
| 9587 | ); | 9867 | ); |
| 9588 | 9868 | ||
| @@ -9610,6 +9890,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9610 | /* Eliminate branch target predictions from guest mode */ | 9890 | /* Eliminate branch target predictions from guest mode */ |
| 9611 | vmexit_fill_RSB(); | 9891 | vmexit_fill_RSB(); |
| 9612 | 9892 | ||
| 9893 | /* All fields are clean at this point */ | ||
| 9894 | if (static_branch_unlikely(&enable_evmcs)) | ||
| 9895 | current_evmcs->hv_clean_fields |= | ||
| 9896 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; | ||
| 9897 | |||
| 9613 | /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ | 9898 | /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ |
| 9614 | if (vmx->host_debugctlmsr) | 9899 | if (vmx->host_debugctlmsr) |
| 9615 | update_debugctlmsr(vmx->host_debugctlmsr); | 9900 | update_debugctlmsr(vmx->host_debugctlmsr); |
| @@ -9646,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9646 | __write_pkru(vmx->host_pkru); | 9931 | __write_pkru(vmx->host_pkru); |
| 9647 | } | 9932 | } |
| 9648 | 9933 | ||
| 9649 | /* | ||
| 9650 | * the KVM_REQ_EVENT optimization bit is only on for one entry, and if | ||
| 9651 | * we did not inject a still-pending event to L1 now because of | ||
| 9652 | * nested_run_pending, we need to re-enable this bit. | ||
| 9653 | */ | ||
| 9654 | if (vmx->nested.nested_run_pending) | ||
| 9655 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
| 9656 | |||
| 9657 | vmx->nested.nested_run_pending = 0; | 9934 | vmx->nested.nested_run_pending = 0; |
| 9658 | vmx->idt_vectoring_info = 0; | 9935 | vmx->idt_vectoring_info = 0; |
| 9659 | 9936 | ||
| @@ -9670,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
| 9670 | } | 9947 | } |
| 9671 | STACK_FRAME_NON_STANDARD(vmx_vcpu_run); | 9948 | STACK_FRAME_NON_STANDARD(vmx_vcpu_run); |
| 9672 | 9949 | ||
| 9950 | static struct kvm *vmx_vm_alloc(void) | ||
| 9951 | { | ||
| 9952 | struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL); | ||
| 9953 | return &kvm_vmx->kvm; | ||
| 9954 | } | ||
| 9955 | |||
| 9956 | static void vmx_vm_free(struct kvm *kvm) | ||
| 9957 | { | ||
| 9958 | kfree(to_kvm_vmx(kvm)); | ||
| 9959 | } | ||
| 9960 | |||
| 9673 | static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) | 9961 | static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) |
| 9674 | { | 9962 | { |
| 9675 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 9963 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -9777,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 9777 | goto free_vmcs; | 10065 | goto free_vmcs; |
| 9778 | } | 10066 | } |
| 9779 | 10067 | ||
| 9780 | if (enable_ept) { | 10068 | if (enable_ept && !enable_unrestricted_guest) { |
| 9781 | err = init_rmode_identity_map(kvm); | 10069 | err = init_rmode_identity_map(kvm); |
| 9782 | if (err) | 10070 | if (err) |
| 9783 | goto free_vmcs; | 10071 | goto free_vmcs; |
| 9784 | } | 10072 | } |
| 9785 | 10073 | ||
| 9786 | if (nested) { | 10074 | if (nested) { |
| 9787 | nested_vmx_setup_ctls_msrs(vmx); | 10075 | nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, |
| 10076 | kvm_vcpu_apicv_active(&vmx->vcpu)); | ||
| 9788 | vmx->nested.vpid02 = allocate_vpid(); | 10077 | vmx->nested.vpid02 = allocate_vpid(); |
| 9789 | } | 10078 | } |
| 9790 | 10079 | ||
| @@ -9817,6 +10106,13 @@ free_vcpu: | |||
| 9817 | return ERR_PTR(err); | 10106 | return ERR_PTR(err); |
| 9818 | } | 10107 | } |
| 9819 | 10108 | ||
| 10109 | static int vmx_vm_init(struct kvm *kvm) | ||
| 10110 | { | ||
| 10111 | if (!ple_gap) | ||
| 10112 | kvm->arch.pause_in_guest = true; | ||
| 10113 | return 0; | ||
| 10114 | } | ||
| 10115 | |||
| 9820 | static void __init vmx_check_processor_compat(void *rtn) | 10116 | static void __init vmx_check_processor_compat(void *rtn) |
| 9821 | { | 10117 | { |
| 9822 | struct vmcs_config vmcs_conf; | 10118 | struct vmcs_config vmcs_conf; |
| @@ -9824,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn) | |||
| 9824 | *(int *)rtn = 0; | 10120 | *(int *)rtn = 0; |
| 9825 | if (setup_vmcs_config(&vmcs_conf) < 0) | 10121 | if (setup_vmcs_config(&vmcs_conf) < 0) |
| 9826 | *(int *)rtn = -EIO; | 10122 | *(int *)rtn = -EIO; |
| 10123 | nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv); | ||
| 9827 | if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { | 10124 | if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { |
| 9828 | printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", | 10125 | printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", |
| 9829 | smp_processor_id()); | 10126 | smp_processor_id()); |
| @@ -9911,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) | |||
| 9911 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 10208 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 9912 | struct kvm_cpuid_entry2 *entry; | 10209 | struct kvm_cpuid_entry2 *entry; |
| 9913 | 10210 | ||
| 9914 | vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff; | 10211 | vmx->nested.msrs.cr0_fixed1 = 0xffffffff; |
| 9915 | vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE; | 10212 | vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; |
| 9916 | 10213 | ||
| 9917 | #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ | 10214 | #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ |
| 9918 | if (entry && (entry->_reg & (_cpuid_mask))) \ | 10215 | if (entry && (entry->_reg & (_cpuid_mask))) \ |
| 9919 | vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \ | 10216 | vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ |
| 9920 | } while (0) | 10217 | } while (0) |
| 9921 | 10218 | ||
| 9922 | entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); | 10219 | entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); |
| @@ -10013,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) | |||
| 10013 | 10310 | ||
| 10014 | kvm_mmu_unload(vcpu); | 10311 | kvm_mmu_unload(vcpu); |
| 10015 | kvm_init_shadow_ept_mmu(vcpu, | 10312 | kvm_init_shadow_ept_mmu(vcpu, |
| 10016 | to_vmx(vcpu)->nested.nested_vmx_ept_caps & | 10313 | to_vmx(vcpu)->nested.msrs.ept_caps & |
| 10017 | VMX_EPT_EXECUTE_ONLY_BIT, | 10314 | VMX_EPT_EXECUTE_ONLY_BIT, |
| 10018 | nested_ept_ad_enabled(vcpu)); | 10315 | nested_ept_ad_enabled(vcpu)); |
| 10019 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; | 10316 | vcpu->arch.mmu.set_cr3 = vmx_set_cr3; |
| @@ -10952,6 +11249,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10952 | /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ | 11249 | /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ |
| 10953 | vmx_set_efer(vcpu, vcpu->arch.efer); | 11250 | vmx_set_efer(vcpu, vcpu->arch.efer); |
| 10954 | 11251 | ||
| 11252 | /* | ||
| 11253 | * Guest state is invalid and unrestricted guest is disabled, | ||
| 11254 | * which means L1 attempted VMEntry to L2 with invalid state. | ||
| 11255 | * Fail the VMEntry. | ||
| 11256 | */ | ||
| 11257 | if (vmx->emulation_required) { | ||
| 11258 | *entry_failure_code = ENTRY_FAIL_DEFAULT; | ||
| 11259 | return 1; | ||
| 11260 | } | ||
| 11261 | |||
| 10955 | /* Shadow page tables on either EPT or shadow page tables. */ | 11262 | /* Shadow page tables on either EPT or shadow page tables. */ |
| 10956 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), | 11263 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), |
| 10957 | entry_failure_code)) | 11264 | entry_failure_code)) |
| @@ -10965,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
| 10965 | return 0; | 11272 | return 0; |
| 10966 | } | 11273 | } |
| 10967 | 11274 | ||
| 11275 | static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) | ||
| 11276 | { | ||
| 11277 | if (!nested_cpu_has_nmi_exiting(vmcs12) && | ||
| 11278 | nested_cpu_has_virtual_nmis(vmcs12)) | ||
| 11279 | return -EINVAL; | ||
| 11280 | |||
| 11281 | if (!nested_cpu_has_virtual_nmis(vmcs12) && | ||
| 11282 | nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)) | ||
| 11283 | return -EINVAL; | ||
| 11284 | |||
| 11285 | return 0; | ||
| 11286 | } | ||
| 11287 | |||
| 10968 | static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 11288 | static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
| 10969 | { | 11289 | { |
| 10970 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 11290 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -10992,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
| 10992 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 11312 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 10993 | 11313 | ||
| 10994 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | 11314 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, |
| 10995 | vmx->nested.nested_vmx_procbased_ctls_low, | 11315 | vmx->nested.msrs.procbased_ctls_low, |
| 10996 | vmx->nested.nested_vmx_procbased_ctls_high) || | 11316 | vmx->nested.msrs.procbased_ctls_high) || |
| 10997 | (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && | 11317 | (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && |
| 10998 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, | 11318 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, |
| 10999 | vmx->nested.nested_vmx_secondary_ctls_low, | 11319 | vmx->nested.msrs.secondary_ctls_low, |
| 11000 | vmx->nested.nested_vmx_secondary_ctls_high)) || | 11320 | vmx->nested.msrs.secondary_ctls_high)) || |
| 11001 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, | 11321 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, |
| 11002 | vmx->nested.nested_vmx_pinbased_ctls_low, | 11322 | vmx->nested.msrs.pinbased_ctls_low, |
| 11003 | vmx->nested.nested_vmx_pinbased_ctls_high) || | 11323 | vmx->nested.msrs.pinbased_ctls_high) || |
| 11004 | !vmx_control_verify(vmcs12->vm_exit_controls, | 11324 | !vmx_control_verify(vmcs12->vm_exit_controls, |
| 11005 | vmx->nested.nested_vmx_exit_ctls_low, | 11325 | vmx->nested.msrs.exit_ctls_low, |
| 11006 | vmx->nested.nested_vmx_exit_ctls_high) || | 11326 | vmx->nested.msrs.exit_ctls_high) || |
| 11007 | !vmx_control_verify(vmcs12->vm_entry_controls, | 11327 | !vmx_control_verify(vmcs12->vm_entry_controls, |
| 11008 | vmx->nested.nested_vmx_entry_ctls_low, | 11328 | vmx->nested.msrs.entry_ctls_low, |
| 11009 | vmx->nested.nested_vmx_entry_ctls_high)) | 11329 | vmx->nested.msrs.entry_ctls_high)) |
| 11330 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
| 11331 | |||
| 11332 | if (nested_vmx_check_nmi_controls(vmcs12)) | ||
| 11010 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 11333 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 11011 | 11334 | ||
| 11012 | if (nested_cpu_has_vmfunc(vmcs12)) { | 11335 | if (nested_cpu_has_vmfunc(vmcs12)) { |
| 11013 | if (vmcs12->vm_function_control & | 11336 | if (vmcs12->vm_function_control & |
| 11014 | ~vmx->nested.nested_vmx_vmfunc_controls) | 11337 | ~vmx->nested.msrs.vmfunc_controls) |
| 11015 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | 11338 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; |
| 11016 | 11339 | ||
| 11017 | if (nested_cpu_has_eptp_switching(vmcs12)) { | 11340 | if (nested_cpu_has_eptp_switching(vmcs12)) { |
| @@ -11293,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
| 11293 | } else if (vcpu->arch.nmi_injected) { | 11616 | } else if (vcpu->arch.nmi_injected) { |
| 11294 | vmcs12->idt_vectoring_info_field = | 11617 | vmcs12->idt_vectoring_info_field = |
| 11295 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; | 11618 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; |
| 11296 | } else if (vcpu->arch.interrupt.pending) { | 11619 | } else if (vcpu->arch.interrupt.injected) { |
| 11297 | nr = vcpu->arch.interrupt.nr; | 11620 | nr = vcpu->arch.interrupt.nr; |
| 11298 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | 11621 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; |
| 11299 | 11622 | ||
| @@ -11941,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) | |||
| 11941 | 12264 | ||
| 11942 | static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) | 12265 | static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) |
| 11943 | { | 12266 | { |
| 11944 | if (ple_gap) | 12267 | if (!kvm_pause_in_guest(vcpu->kvm)) |
| 11945 | shrink_ple_window(vcpu); | 12268 | shrink_ple_window(vcpu); |
| 11946 | } | 12269 | } |
| 11947 | 12270 | ||
| @@ -12259,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) | |||
| 12259 | 12582 | ||
| 12260 | vmx->nested.smm.vmxon = vmx->nested.vmxon; | 12583 | vmx->nested.smm.vmxon = vmx->nested.vmxon; |
| 12261 | vmx->nested.vmxon = false; | 12584 | vmx->nested.vmxon = false; |
| 12585 | vmx_clear_hlt(vcpu); | ||
| 12262 | return 0; | 12586 | return 0; |
| 12263 | } | 12587 | } |
| 12264 | 12588 | ||
| @@ -12300,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 12300 | .cpu_has_accelerated_tpr = report_flexpriority, | 12624 | .cpu_has_accelerated_tpr = report_flexpriority, |
| 12301 | .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, | 12625 | .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, |
| 12302 | 12626 | ||
| 12627 | .vm_init = vmx_vm_init, | ||
| 12628 | .vm_alloc = vmx_vm_alloc, | ||
| 12629 | .vm_free = vmx_vm_free, | ||
| 12630 | |||
| 12303 | .vcpu_create = vmx_create_vcpu, | 12631 | .vcpu_create = vmx_create_vcpu, |
| 12304 | .vcpu_free = vmx_free_vcpu, | 12632 | .vcpu_free = vmx_free_vcpu, |
| 12305 | .vcpu_reset = vmx_vcpu_reset, | 12633 | .vcpu_reset = vmx_vcpu_reset, |
| @@ -12367,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 12367 | .deliver_posted_interrupt = vmx_deliver_posted_interrupt, | 12695 | .deliver_posted_interrupt = vmx_deliver_posted_interrupt, |
| 12368 | 12696 | ||
| 12369 | .set_tss_addr = vmx_set_tss_addr, | 12697 | .set_tss_addr = vmx_set_tss_addr, |
| 12698 | .set_identity_map_addr = vmx_set_identity_map_addr, | ||
| 12370 | .get_tdp_level = get_ept_level, | 12699 | .get_tdp_level = get_ept_level, |
| 12371 | .get_mt_mask = vmx_get_mt_mask, | 12700 | .get_mt_mask = vmx_get_mt_mask, |
| 12372 | 12701 | ||
| @@ -12425,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { | |||
| 12425 | 12754 | ||
| 12426 | static int __init vmx_init(void) | 12755 | static int __init vmx_init(void) |
| 12427 | { | 12756 | { |
| 12428 | int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | 12757 | int r; |
| 12758 | |||
| 12759 | #if IS_ENABLED(CONFIG_HYPERV) | ||
| 12760 | /* | ||
| 12761 | * Enlightened VMCS usage should be recommended and the host needs | ||
| 12762 | * to support eVMCS v1 or above. We can also disable eVMCS support | ||
| 12763 | * with module parameter. | ||
| 12764 | */ | ||
| 12765 | if (enlightened_vmcs && | ||
| 12766 | ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && | ||
| 12767 | (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= | ||
| 12768 | KVM_EVMCS_VERSION) { | ||
| 12769 | int cpu; | ||
| 12770 | |||
| 12771 | /* Check that we have assist pages on all online CPUs */ | ||
| 12772 | for_each_online_cpu(cpu) { | ||
| 12773 | if (!hv_get_vp_assist_page(cpu)) { | ||
| 12774 | enlightened_vmcs = false; | ||
| 12775 | break; | ||
| 12776 | } | ||
| 12777 | } | ||
| 12778 | |||
| 12779 | if (enlightened_vmcs) { | ||
| 12780 | pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); | ||
| 12781 | static_branch_enable(&enable_evmcs); | ||
| 12782 | } | ||
| 12783 | } else { | ||
| 12784 | enlightened_vmcs = false; | ||
| 12785 | } | ||
| 12786 | #endif | ||
| 12787 | |||
| 12788 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | ||
| 12429 | __alignof__(struct vcpu_vmx), THIS_MODULE); | 12789 | __alignof__(struct vcpu_vmx), THIS_MODULE); |
| 12430 | if (r) | 12790 | if (r) |
| 12431 | return r; | 12791 | return r; |
| @@ -12446,6 +12806,29 @@ static void __exit vmx_exit(void) | |||
| 12446 | #endif | 12806 | #endif |
| 12447 | 12807 | ||
| 12448 | kvm_exit(); | 12808 | kvm_exit(); |
| 12809 | |||
| 12810 | #if IS_ENABLED(CONFIG_HYPERV) | ||
| 12811 | if (static_branch_unlikely(&enable_evmcs)) { | ||
| 12812 | int cpu; | ||
| 12813 | struct hv_vp_assist_page *vp_ap; | ||
| 12814 | /* | ||
| 12815 | * Reset everything to support using non-enlightened VMCS | ||
| 12816 | * access later (e.g. when we reload the module with | ||
| 12817 | * enlightened_vmcs=0) | ||
| 12818 | */ | ||
| 12819 | for_each_online_cpu(cpu) { | ||
| 12820 | vp_ap = hv_get_vp_assist_page(cpu); | ||
| 12821 | |||
| 12822 | if (!vp_ap) | ||
| 12823 | continue; | ||
| 12824 | |||
| 12825 | vp_ap->current_nested_vmcs = 0; | ||
| 12826 | vp_ap->enlighten_vmentry = 0; | ||
| 12827 | } | ||
| 12828 | |||
| 12829 | static_branch_disable(&enable_evmcs); | ||
| 12830 | } | ||
| 12831 | #endif | ||
| 12449 | } | 12832 | } |
| 12450 | 12833 | ||
| 12451 | module_init(vmx_init) | 12834 | module_init(vmx_init) |
diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h new file mode 100644 index 000000000000..210a884090ad --- /dev/null +++ b/arch/x86/kvm/vmx_evmcs.h | |||
| @@ -0,0 +1,324 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __KVM_X86_VMX_EVMCS_H | ||
| 3 | #define __KVM_X86_VMX_EVMCS_H | ||
| 4 | |||
| 5 | #include <asm/hyperv-tlfs.h> | ||
| 6 | |||
| 7 | #define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) | ||
| 8 | #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) | ||
| 9 | #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ | ||
| 10 | {EVMCS1_OFFSET(name), clean_field} | ||
| 11 | |||
| 12 | struct evmcs_field { | ||
| 13 | u16 offset; | ||
| 14 | u16 clean_field; | ||
| 15 | }; | ||
| 16 | |||
| 17 | static const struct evmcs_field vmcs_field_to_evmcs_1[] = { | ||
| 18 | /* 64 bit rw */ | ||
| 19 | EVMCS1_FIELD(GUEST_RIP, guest_rip, | ||
| 20 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 21 | EVMCS1_FIELD(GUEST_RSP, guest_rsp, | ||
| 22 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), | ||
| 23 | EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags, | ||
| 24 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), | ||
| 25 | EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat, | ||
| 26 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 27 | EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer, | ||
| 28 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 29 | EVMCS1_FIELD(HOST_CR0, host_cr0, | ||
| 30 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 31 | EVMCS1_FIELD(HOST_CR3, host_cr3, | ||
| 32 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 33 | EVMCS1_FIELD(HOST_CR4, host_cr4, | ||
| 34 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 35 | EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp, | ||
| 36 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 37 | EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip, | ||
| 38 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 39 | EVMCS1_FIELD(HOST_RIP, host_rip, | ||
| 40 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 41 | EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a, | ||
| 42 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), | ||
| 43 | EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b, | ||
| 44 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), | ||
| 45 | EVMCS1_FIELD(MSR_BITMAP, msr_bitmap, | ||
| 46 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP), | ||
| 47 | EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base, | ||
| 48 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 49 | EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base, | ||
| 50 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 51 | EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base, | ||
| 52 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 53 | EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base, | ||
| 54 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 55 | EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base, | ||
| 56 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 57 | EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base, | ||
| 58 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 59 | EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base, | ||
| 60 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 61 | EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base, | ||
| 62 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 63 | EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base, | ||
| 64 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 65 | EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base, | ||
| 66 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 67 | EVMCS1_FIELD(TSC_OFFSET, tsc_offset, | ||
| 68 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), | ||
| 69 | EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr, | ||
| 70 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), | ||
| 71 | EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer, | ||
| 72 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 73 | EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl, | ||
| 74 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 75 | EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat, | ||
| 76 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 77 | EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer, | ||
| 78 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 79 | EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0, | ||
| 80 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 81 | EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1, | ||
| 82 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 83 | EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2, | ||
| 84 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 85 | EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3, | ||
| 86 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 87 | EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions, | ||
| 88 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 89 | EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp, | ||
| 90 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 91 | EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip, | ||
| 92 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 93 | EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask, | ||
| 94 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 95 | EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask, | ||
| 96 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 97 | EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow, | ||
| 98 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 99 | EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow, | ||
| 100 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 101 | EVMCS1_FIELD(GUEST_CR0, guest_cr0, | ||
| 102 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 103 | EVMCS1_FIELD(GUEST_CR3, guest_cr3, | ||
| 104 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 105 | EVMCS1_FIELD(GUEST_CR4, guest_cr4, | ||
| 106 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 107 | EVMCS1_FIELD(GUEST_DR7, guest_dr7, | ||
| 108 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), | ||
| 109 | EVMCS1_FIELD(HOST_FS_BASE, host_fs_base, | ||
| 110 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 111 | EVMCS1_FIELD(HOST_GS_BASE, host_gs_base, | ||
| 112 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 113 | EVMCS1_FIELD(HOST_TR_BASE, host_tr_base, | ||
| 114 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 115 | EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base, | ||
| 116 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 117 | EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base, | ||
| 118 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 119 | EVMCS1_FIELD(HOST_RSP, host_rsp, | ||
| 120 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), | ||
| 121 | EVMCS1_FIELD(EPT_POINTER, ept_pointer, | ||
| 122 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), | ||
| 123 | EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs, | ||
| 124 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 125 | EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap, | ||
| 126 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), | ||
| 127 | |||
| 128 | /* 64 bit read only */ | ||
| 129 | EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address, | ||
| 130 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 131 | EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification, | ||
| 132 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 133 | /* | ||
| 134 | * Not defined in KVM: | ||
| 135 | * | ||
| 136 | * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx, | ||
| 137 | * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); | ||
| 138 | * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi, | ||
| 139 | * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); | ||
| 140 | * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi, | ||
| 141 | * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); | ||
| 142 | * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip, | ||
| 143 | * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); | ||
| 144 | */ | ||
| 145 | EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address, | ||
| 146 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 147 | |||
| 148 | /* | ||
| 149 | * No mask defined in the spec as Hyper-V doesn't currently support | ||
| 150 | * these. Future proof by resetting the whole clean field mask on | ||
| 151 | * access. | ||
| 152 | */ | ||
| 153 | EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr, | ||
| 154 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 155 | EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr, | ||
| 156 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 157 | EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr, | ||
| 158 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 159 | EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0, | ||
| 160 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 161 | EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1, | ||
| 162 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 163 | EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2, | ||
| 164 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 165 | EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3, | ||
| 166 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 167 | |||
| 168 | /* 32 bit rw */ | ||
| 169 | EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold, | ||
| 170 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 171 | EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info, | ||
| 172 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), | ||
| 173 | EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control, | ||
| 174 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC), | ||
| 175 | EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap, | ||
| 176 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN), | ||
| 177 | EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls, | ||
| 178 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY), | ||
| 179 | EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field, | ||
| 180 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), | ||
| 181 | EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
| 182 | vm_entry_exception_error_code, | ||
| 183 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), | ||
| 184 | EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len, | ||
| 185 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), | ||
| 186 | EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs, | ||
| 187 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 188 | EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control, | ||
| 189 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), | ||
| 190 | EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls, | ||
| 191 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), | ||
| 192 | EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control, | ||
| 193 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), | ||
| 194 | EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit, | ||
| 195 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 196 | EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit, | ||
| 197 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 198 | EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit, | ||
| 199 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 200 | EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit, | ||
| 201 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 202 | EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit, | ||
| 203 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 204 | EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit, | ||
| 205 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 206 | EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit, | ||
| 207 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 208 | EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit, | ||
| 209 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 210 | EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit, | ||
| 211 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 212 | EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit, | ||
| 213 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 214 | EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes, | ||
| 215 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 216 | EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes, | ||
| 217 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 218 | EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes, | ||
| 219 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 220 | EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes, | ||
| 221 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 222 | EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes, | ||
| 223 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 224 | EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes, | ||
| 225 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 226 | EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes, | ||
| 227 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 228 | EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes, | ||
| 229 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 230 | EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state, | ||
| 231 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 232 | EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs, | ||
| 233 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), | ||
| 234 | |||
| 235 | /* 32 bit read only */ | ||
| 236 | EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error, | ||
| 237 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 238 | EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason, | ||
| 239 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 240 | EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info, | ||
| 241 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 242 | EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code, | ||
| 243 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 244 | EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field, | ||
| 245 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 246 | EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code, | ||
| 247 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 248 | EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len, | ||
| 249 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 250 | EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info, | ||
| 251 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), | ||
| 252 | |||
| 253 | /* No mask defined in the spec (not used) */ | ||
| 254 | EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask, | ||
| 255 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 256 | EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match, | ||
| 257 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 258 | EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count, | ||
| 259 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 260 | EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count, | ||
| 261 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 262 | EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count, | ||
| 263 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 264 | EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count, | ||
| 265 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), | ||
| 266 | |||
| 267 | /* 16 bit rw */ | ||
| 268 | EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector, | ||
| 269 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 270 | EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector, | ||
| 271 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 272 | EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector, | ||
| 273 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 274 | EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector, | ||
| 275 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 276 | EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector, | ||
| 277 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 278 | EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector, | ||
| 279 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 280 | EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector, | ||
| 281 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), | ||
| 282 | EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector, | ||
| 283 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 284 | EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector, | ||
| 285 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 286 | EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector, | ||
| 287 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 288 | EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector, | ||
| 289 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 290 | EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector, | ||
| 291 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 292 | EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector, | ||
| 293 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 294 | EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector, | ||
| 295 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 296 | EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector, | ||
| 297 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), | ||
| 298 | EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id, | ||
| 299 | HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), | ||
| 300 | }; | ||
| 301 | |||
| 302 | static __always_inline int get_evmcs_offset(unsigned long field, | ||
| 303 | u16 *clean_field) | ||
| 304 | { | ||
| 305 | unsigned int index = ROL16(field, 6); | ||
| 306 | const struct evmcs_field *evmcs_field; | ||
| 307 | |||
| 308 | if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) { | ||
| 309 | WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", | ||
| 310 | field); | ||
| 311 | return -ENOENT; | ||
| 312 | } | ||
| 313 | |||
| 314 | evmcs_field = &vmcs_field_to_evmcs_1[index]; | ||
| 315 | |||
| 316 | if (clean_field) | ||
| 317 | *clean_field = evmcs_field->clean_field; | ||
| 318 | |||
| 319 | return evmcs_field->offset; | ||
| 320 | } | ||
| 321 | |||
| 322 | #undef ROL16 | ||
| 323 | |||
| 324 | #endif /* __KVM_X86_VMX_EVMCS_H */ | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 18b5ca7a3197..b2ff74b12ec4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); | |||
| 102 | static void process_nmi(struct kvm_vcpu *vcpu); | 102 | static void process_nmi(struct kvm_vcpu *vcpu); |
| 103 | static void enter_smm(struct kvm_vcpu *vcpu); | 103 | static void enter_smm(struct kvm_vcpu *vcpu); |
| 104 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | 104 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); |
| 105 | static void store_regs(struct kvm_vcpu *vcpu); | ||
| 106 | static int sync_regs(struct kvm_vcpu *vcpu); | ||
| 105 | 107 | ||
| 106 | struct kvm_x86_ops *kvm_x86_ops __read_mostly; | 108 | struct kvm_x86_ops *kvm_x86_ops __read_mostly; |
| 107 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 109 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
| @@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); | |||
| 140 | static bool __read_mostly vector_hashing = true; | 142 | static bool __read_mostly vector_hashing = true; |
| 141 | module_param(vector_hashing, bool, S_IRUGO); | 143 | module_param(vector_hashing, bool, S_IRUGO); |
| 142 | 144 | ||
| 145 | bool __read_mostly enable_vmware_backdoor = false; | ||
| 146 | module_param(enable_vmware_backdoor, bool, S_IRUGO); | ||
| 147 | EXPORT_SYMBOL_GPL(enable_vmware_backdoor); | ||
| 148 | |||
| 149 | static bool __read_mostly force_emulation_prefix = false; | ||
| 150 | module_param(force_emulation_prefix, bool, S_IRUGO); | ||
| 151 | |||
| 143 | #define KVM_NR_SHARED_MSRS 16 | 152 | #define KVM_NR_SHARED_MSRS 16 |
| 144 | 153 | ||
| 145 | struct kvm_shared_msrs_global { | 154 | struct kvm_shared_msrs_global { |
| @@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = { | |||
| 1032 | HV_X64_MSR_VP_RUNTIME, | 1041 | HV_X64_MSR_VP_RUNTIME, |
| 1033 | HV_X64_MSR_SCONTROL, | 1042 | HV_X64_MSR_SCONTROL, |
| 1034 | HV_X64_MSR_STIMER0_CONFIG, | 1043 | HV_X64_MSR_STIMER0_CONFIG, |
| 1035 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 1044 | HV_X64_MSR_VP_ASSIST_PAGE, |
| 1045 | HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, | ||
| 1046 | HV_X64_MSR_TSC_EMULATION_STATUS, | ||
| 1047 | |||
| 1048 | MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | ||
| 1036 | MSR_KVM_PV_EOI_EN, | 1049 | MSR_KVM_PV_EOI_EN, |
| 1037 | 1050 | ||
| 1038 | MSR_IA32_TSC_ADJUST, | 1051 | MSR_IA32_TSC_ADJUST, |
| @@ -1054,6 +1067,25 @@ static unsigned num_emulated_msrs; | |||
| 1054 | * can be used by a hypervisor to validate requested CPU features. | 1067 | * can be used by a hypervisor to validate requested CPU features. |
| 1055 | */ | 1068 | */ |
| 1056 | static u32 msr_based_features[] = { | 1069 | static u32 msr_based_features[] = { |
| 1070 | MSR_IA32_VMX_BASIC, | ||
| 1071 | MSR_IA32_VMX_TRUE_PINBASED_CTLS, | ||
| 1072 | MSR_IA32_VMX_PINBASED_CTLS, | ||
| 1073 | MSR_IA32_VMX_TRUE_PROCBASED_CTLS, | ||
| 1074 | MSR_IA32_VMX_PROCBASED_CTLS, | ||
| 1075 | MSR_IA32_VMX_TRUE_EXIT_CTLS, | ||
| 1076 | MSR_IA32_VMX_EXIT_CTLS, | ||
| 1077 | MSR_IA32_VMX_TRUE_ENTRY_CTLS, | ||
| 1078 | MSR_IA32_VMX_ENTRY_CTLS, | ||
| 1079 | MSR_IA32_VMX_MISC, | ||
| 1080 | MSR_IA32_VMX_CR0_FIXED0, | ||
| 1081 | MSR_IA32_VMX_CR0_FIXED1, | ||
| 1082 | MSR_IA32_VMX_CR4_FIXED0, | ||
| 1083 | MSR_IA32_VMX_CR4_FIXED1, | ||
| 1084 | MSR_IA32_VMX_VMCS_ENUM, | ||
| 1085 | MSR_IA32_VMX_PROCBASED_CTLS2, | ||
| 1086 | MSR_IA32_VMX_EPT_VPID_CAP, | ||
| 1087 | MSR_IA32_VMX_VMFUNC, | ||
| 1088 | |||
| 1057 | MSR_F10H_DECFG, | 1089 | MSR_F10H_DECFG, |
| 1058 | MSR_IA32_UCODE_REV, | 1090 | MSR_IA32_UCODE_REV, |
| 1059 | }; | 1091 | }; |
| @@ -2432,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2432 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 2464 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
| 2433 | case HV_X64_MSR_CRASH_CTL: | 2465 | case HV_X64_MSR_CRASH_CTL: |
| 2434 | case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: | 2466 | case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: |
| 2467 | case HV_X64_MSR_REENLIGHTENMENT_CONTROL: | ||
| 2468 | case HV_X64_MSR_TSC_EMULATION_CONTROL: | ||
| 2469 | case HV_X64_MSR_TSC_EMULATION_STATUS: | ||
| 2435 | return kvm_hv_set_msr_common(vcpu, msr, data, | 2470 | return kvm_hv_set_msr_common(vcpu, msr, data, |
| 2436 | msr_info->host_initiated); | 2471 | msr_info->host_initiated); |
| 2437 | case MSR_IA32_BBL_CR_CTL3: | 2472 | case MSR_IA32_BBL_CR_CTL3: |
| @@ -2558,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2558 | case MSR_AMD64_DC_CFG: | 2593 | case MSR_AMD64_DC_CFG: |
| 2559 | msr_info->data = 0; | 2594 | msr_info->data = 0; |
| 2560 | break; | 2595 | break; |
| 2596 | case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: | ||
| 2561 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: | 2597 | case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: |
| 2562 | case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: | 2598 | case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: |
| 2563 | case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: | 2599 | case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: |
| @@ -2661,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
| 2661 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: | 2697 | case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: |
| 2662 | case HV_X64_MSR_CRASH_CTL: | 2698 | case HV_X64_MSR_CRASH_CTL: |
| 2663 | case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: | 2699 | case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: |
| 2700 | case HV_X64_MSR_REENLIGHTENMENT_CONTROL: | ||
| 2701 | case HV_X64_MSR_TSC_EMULATION_CONTROL: | ||
| 2702 | case HV_X64_MSR_TSC_EMULATION_STATUS: | ||
| 2664 | return kvm_hv_get_msr_common(vcpu, | 2703 | return kvm_hv_get_msr_common(vcpu, |
| 2665 | msr_info->index, &msr_info->data); | 2704 | msr_info->index, &msr_info->data); |
| 2666 | break; | 2705 | break; |
| @@ -2777,9 +2816,15 @@ out: | |||
| 2777 | return r; | 2816 | return r; |
| 2778 | } | 2817 | } |
| 2779 | 2818 | ||
| 2819 | static inline bool kvm_can_mwait_in_guest(void) | ||
| 2820 | { | ||
| 2821 | return boot_cpu_has(X86_FEATURE_MWAIT) && | ||
| 2822 | !boot_cpu_has_bug(X86_BUG_MONITOR); | ||
| 2823 | } | ||
| 2824 | |||
| 2780 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | 2825 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
| 2781 | { | 2826 | { |
| 2782 | int r; | 2827 | int r = 0; |
| 2783 | 2828 | ||
| 2784 | switch (ext) { | 2829 | switch (ext) { |
| 2785 | case KVM_CAP_IRQCHIP: | 2830 | case KVM_CAP_IRQCHIP: |
| @@ -2809,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 2809 | case KVM_CAP_HYPERV_SYNIC: | 2854 | case KVM_CAP_HYPERV_SYNIC: |
| 2810 | case KVM_CAP_HYPERV_SYNIC2: | 2855 | case KVM_CAP_HYPERV_SYNIC2: |
| 2811 | case KVM_CAP_HYPERV_VP_INDEX: | 2856 | case KVM_CAP_HYPERV_VP_INDEX: |
| 2857 | case KVM_CAP_HYPERV_EVENTFD: | ||
| 2812 | case KVM_CAP_PCI_SEGMENT: | 2858 | case KVM_CAP_PCI_SEGMENT: |
| 2813 | case KVM_CAP_DEBUGREGS: | 2859 | case KVM_CAP_DEBUGREGS: |
| 2814 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | 2860 | case KVM_CAP_X86_ROBUST_SINGLESTEP: |
| @@ -2828,11 +2874,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 2828 | case KVM_CAP_GET_MSR_FEATURES: | 2874 | case KVM_CAP_GET_MSR_FEATURES: |
| 2829 | r = 1; | 2875 | r = 1; |
| 2830 | break; | 2876 | break; |
| 2877 | case KVM_CAP_SYNC_REGS: | ||
| 2878 | r = KVM_SYNC_X86_VALID_FIELDS; | ||
| 2879 | break; | ||
| 2831 | case KVM_CAP_ADJUST_CLOCK: | 2880 | case KVM_CAP_ADJUST_CLOCK: |
| 2832 | r = KVM_CLOCK_TSC_STABLE; | 2881 | r = KVM_CLOCK_TSC_STABLE; |
| 2833 | break; | 2882 | break; |
| 2834 | case KVM_CAP_X86_GUEST_MWAIT: | 2883 | case KVM_CAP_X86_DISABLE_EXITS: |
| 2835 | r = kvm_mwait_in_guest(); | 2884 | r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE; |
| 2885 | if(kvm_can_mwait_in_guest()) | ||
| 2886 | r |= KVM_X86_DISABLE_EXITS_MWAIT; | ||
| 2836 | break; | 2887 | break; |
| 2837 | case KVM_CAP_X86_SMM: | 2888 | case KVM_CAP_X86_SMM: |
| 2838 | /* SMBASE is usually relocated above 1M on modern chipsets, | 2889 | /* SMBASE is usually relocated above 1M on modern chipsets, |
| @@ -2873,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
| 2873 | r = KVM_X2APIC_API_VALID_FLAGS; | 2924 | r = KVM_X2APIC_API_VALID_FLAGS; |
| 2874 | break; | 2925 | break; |
| 2875 | default: | 2926 | default: |
| 2876 | r = 0; | ||
| 2877 | break; | 2927 | break; |
| 2878 | } | 2928 | } |
| 2879 | return r; | 2929 | return r; |
| @@ -3265,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 3265 | events->exception.error_code = vcpu->arch.exception.error_code; | 3315 | events->exception.error_code = vcpu->arch.exception.error_code; |
| 3266 | 3316 | ||
| 3267 | events->interrupt.injected = | 3317 | events->interrupt.injected = |
| 3268 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | 3318 | vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; |
| 3269 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 3319 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
| 3270 | events->interrupt.soft = 0; | 3320 | events->interrupt.soft = 0; |
| 3271 | events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); | 3321 | events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); |
| @@ -3318,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
| 3318 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | 3368 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; |
| 3319 | vcpu->arch.exception.error_code = events->exception.error_code; | 3369 | vcpu->arch.exception.error_code = events->exception.error_code; |
| 3320 | 3370 | ||
| 3321 | vcpu->arch.interrupt.pending = events->interrupt.injected; | 3371 | vcpu->arch.interrupt.injected = events->interrupt.injected; |
| 3322 | vcpu->arch.interrupt.nr = events->interrupt.nr; | 3372 | vcpu->arch.interrupt.nr = events->interrupt.nr; |
| 3323 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 3373 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
| 3324 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | 3374 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) |
| @@ -3917,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) | |||
| 3917 | static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, | 3967 | static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, |
| 3918 | u64 ident_addr) | 3968 | u64 ident_addr) |
| 3919 | { | 3969 | { |
| 3920 | kvm->arch.ept_identity_map_addr = ident_addr; | 3970 | return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr); |
| 3921 | return 0; | ||
| 3922 | } | 3971 | } |
| 3923 | 3972 | ||
| 3924 | static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | 3973 | static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, |
| @@ -4178,6 +4227,20 @@ split_irqchip_unlock: | |||
| 4178 | 4227 | ||
| 4179 | r = 0; | 4228 | r = 0; |
| 4180 | break; | 4229 | break; |
| 4230 | case KVM_CAP_X86_DISABLE_EXITS: | ||
| 4231 | r = -EINVAL; | ||
| 4232 | if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) | ||
| 4233 | break; | ||
| 4234 | |||
| 4235 | if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && | ||
| 4236 | kvm_can_mwait_in_guest()) | ||
| 4237 | kvm->arch.mwait_in_guest = true; | ||
| 4238 | if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL) | ||
| 4239 | kvm->arch.hlt_in_guest = true; | ||
| 4240 | if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) | ||
| 4241 | kvm->arch.pause_in_guest = true; | ||
| 4242 | r = 0; | ||
| 4243 | break; | ||
| 4181 | default: | 4244 | default: |
| 4182 | r = -EINVAL; | 4245 | r = -EINVAL; |
| 4183 | break; | 4246 | break; |
| @@ -4482,6 +4545,15 @@ set_identity_unlock: | |||
| 4482 | r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); | 4545 | r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); |
| 4483 | break; | 4546 | break; |
| 4484 | } | 4547 | } |
| 4548 | case KVM_HYPERV_EVENTFD: { | ||
| 4549 | struct kvm_hyperv_eventfd hvevfd; | ||
| 4550 | |||
| 4551 | r = -EFAULT; | ||
| 4552 | if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) | ||
| 4553 | goto out; | ||
| 4554 | r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); | ||
| 4555 | break; | ||
| 4556 | } | ||
| 4485 | default: | 4557 | default: |
| 4486 | r = -ENOTTY; | 4558 | r = -ENOTTY; |
| 4487 | } | 4559 | } |
| @@ -4771,6 +4843,30 @@ out: | |||
| 4771 | } | 4843 | } |
| 4772 | EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); | 4844 | EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); |
| 4773 | 4845 | ||
| 4846 | int handle_ud(struct kvm_vcpu *vcpu) | ||
| 4847 | { | ||
| 4848 | int emul_type = EMULTYPE_TRAP_UD; | ||
| 4849 | enum emulation_result er; | ||
| 4850 | char sig[5]; /* ud2; .ascii "kvm" */ | ||
| 4851 | struct x86_exception e; | ||
| 4852 | |||
| 4853 | if (force_emulation_prefix && | ||
| 4854 | kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, | ||
| 4855 | kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 && | ||
| 4856 | memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { | ||
| 4857 | kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); | ||
| 4858 | emul_type = 0; | ||
| 4859 | } | ||
| 4860 | |||
| 4861 | er = emulate_instruction(vcpu, emul_type); | ||
| 4862 | if (er == EMULATE_USER_EXIT) | ||
| 4863 | return 0; | ||
| 4864 | if (er != EMULATE_DONE) | ||
| 4865 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
| 4866 | return 1; | ||
| 4867 | } | ||
| 4868 | EXPORT_SYMBOL_GPL(handle_ud); | ||
| 4869 | |||
| 4774 | static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, | 4870 | static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, |
| 4775 | gpa_t gpa, bool write) | 4871 | gpa_t gpa, bool write) |
| 4776 | { | 4872 | { |
| @@ -5612,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) | |||
| 5612 | kvm_rip_write(vcpu, ctxt->eip); | 5708 | kvm_rip_write(vcpu, ctxt->eip); |
| 5613 | kvm_set_rflags(vcpu, ctxt->eflags); | 5709 | kvm_set_rflags(vcpu, ctxt->eflags); |
| 5614 | 5710 | ||
| 5615 | if (irq == NMI_VECTOR) | ||
| 5616 | vcpu->arch.nmi_pending = 0; | ||
| 5617 | else | ||
| 5618 | vcpu->arch.interrupt.pending = false; | ||
| 5619 | |||
| 5620 | return EMULATE_DONE; | 5711 | return EMULATE_DONE; |
| 5621 | } | 5712 | } |
| 5622 | EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); | 5713 | EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); |
| 5623 | 5714 | ||
| 5624 | static int handle_emulation_failure(struct kvm_vcpu *vcpu) | 5715 | static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) |
| 5625 | { | 5716 | { |
| 5626 | int r = EMULATE_DONE; | 5717 | int r = EMULATE_DONE; |
| 5627 | 5718 | ||
| 5628 | ++vcpu->stat.insn_emulation_fail; | 5719 | ++vcpu->stat.insn_emulation_fail; |
| 5629 | trace_kvm_emulate_insn_failed(vcpu); | 5720 | trace_kvm_emulate_insn_failed(vcpu); |
| 5721 | |||
| 5722 | if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) | ||
| 5723 | return EMULATE_FAIL; | ||
| 5724 | |||
| 5630 | if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { | 5725 | if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { |
| 5631 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 5726 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
| 5632 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 5727 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
| 5633 | vcpu->run->internal.ndata = 0; | 5728 | vcpu->run->internal.ndata = 0; |
| 5634 | r = EMULATE_USER_EXIT; | 5729 | r = EMULATE_USER_EXIT; |
| 5635 | } | 5730 | } |
| 5731 | |||
| 5636 | kvm_queue_exception(vcpu, UD_VECTOR); | 5732 | kvm_queue_exception(vcpu, UD_VECTOR); |
| 5637 | 5733 | ||
| 5638 | return r; | 5734 | return r; |
| @@ -5876,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | |||
| 5876 | return false; | 5972 | return false; |
| 5877 | } | 5973 | } |
| 5878 | 5974 | ||
| 5975 | static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) | ||
| 5976 | { | ||
| 5977 | switch (ctxt->opcode_len) { | ||
| 5978 | case 1: | ||
| 5979 | switch (ctxt->b) { | ||
| 5980 | case 0xe4: /* IN */ | ||
| 5981 | case 0xe5: | ||
| 5982 | case 0xec: | ||
| 5983 | case 0xed: | ||
| 5984 | case 0xe6: /* OUT */ | ||
| 5985 | case 0xe7: | ||
| 5986 | case 0xee: | ||
| 5987 | case 0xef: | ||
| 5988 | case 0x6c: /* INS */ | ||
| 5989 | case 0x6d: | ||
| 5990 | case 0x6e: /* OUTS */ | ||
| 5991 | case 0x6f: | ||
| 5992 | return true; | ||
| 5993 | } | ||
| 5994 | break; | ||
| 5995 | case 2: | ||
| 5996 | switch (ctxt->b) { | ||
| 5997 | case 0x33: /* RDPMC */ | ||
| 5998 | return true; | ||
| 5999 | } | ||
| 6000 | break; | ||
| 6001 | } | ||
| 6002 | |||
| 6003 | return false; | ||
| 6004 | } | ||
| 6005 | |||
| 5879 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, | 6006 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, |
| 5880 | unsigned long cr2, | 6007 | unsigned long cr2, |
| 5881 | int emulation_type, | 6008 | int emulation_type, |
| @@ -5928,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 5928 | return EMULATE_DONE; | 6055 | return EMULATE_DONE; |
| 5929 | if (emulation_type & EMULTYPE_SKIP) | 6056 | if (emulation_type & EMULTYPE_SKIP) |
| 5930 | return EMULATE_FAIL; | 6057 | return EMULATE_FAIL; |
| 5931 | return handle_emulation_failure(vcpu); | 6058 | return handle_emulation_failure(vcpu, emulation_type); |
| 5932 | } | 6059 | } |
| 5933 | } | 6060 | } |
| 5934 | 6061 | ||
| 6062 | if ((emulation_type & EMULTYPE_VMWARE) && | ||
| 6063 | !is_vmware_backdoor_opcode(ctxt)) | ||
| 6064 | return EMULATE_FAIL; | ||
| 6065 | |||
| 5935 | if (emulation_type & EMULTYPE_SKIP) { | 6066 | if (emulation_type & EMULTYPE_SKIP) { |
| 5936 | kvm_rip_write(vcpu, ctxt->_eip); | 6067 | kvm_rip_write(vcpu, ctxt->_eip); |
| 5937 | if (ctxt->eflags & X86_EFLAGS_RF) | 6068 | if (ctxt->eflags & X86_EFLAGS_RF) |
| @@ -5963,7 +6094,7 @@ restart: | |||
| 5963 | emulation_type)) | 6094 | emulation_type)) |
| 5964 | return EMULATE_DONE; | 6095 | return EMULATE_DONE; |
| 5965 | 6096 | ||
| 5966 | return handle_emulation_failure(vcpu); | 6097 | return handle_emulation_failure(vcpu, emulation_type); |
| 5967 | } | 6098 | } |
| 5968 | 6099 | ||
| 5969 | if (ctxt->have_exception) { | 6100 | if (ctxt->have_exception) { |
| @@ -6016,7 +6147,8 @@ restart: | |||
| 6016 | } | 6147 | } |
| 6017 | EXPORT_SYMBOL_GPL(x86_emulate_instruction); | 6148 | EXPORT_SYMBOL_GPL(x86_emulate_instruction); |
| 6018 | 6149 | ||
| 6019 | int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | 6150 | static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, |
| 6151 | unsigned short port) | ||
| 6020 | { | 6152 | { |
| 6021 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 6153 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 6022 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, | 6154 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
| @@ -6025,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) | |||
| 6025 | vcpu->arch.pio.count = 0; | 6157 | vcpu->arch.pio.count = 0; |
| 6026 | return ret; | 6158 | return ret; |
| 6027 | } | 6159 | } |
| 6028 | EXPORT_SYMBOL_GPL(kvm_fast_pio_out); | ||
| 6029 | 6160 | ||
| 6030 | static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | 6161 | static int complete_fast_pio_in(struct kvm_vcpu *vcpu) |
| 6031 | { | 6162 | { |
| @@ -6049,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | |||
| 6049 | return 1; | 6180 | return 1; |
| 6050 | } | 6181 | } |
| 6051 | 6182 | ||
| 6052 | int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) | 6183 | static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, |
| 6184 | unsigned short port) | ||
| 6053 | { | 6185 | { |
| 6054 | unsigned long val; | 6186 | unsigned long val; |
| 6055 | int ret; | 6187 | int ret; |
| @@ -6068,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) | |||
| 6068 | 6200 | ||
| 6069 | return 0; | 6201 | return 0; |
| 6070 | } | 6202 | } |
| 6071 | EXPORT_SYMBOL_GPL(kvm_fast_pio_in); | 6203 | |
| 6204 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) | ||
| 6205 | { | ||
| 6206 | int ret = kvm_skip_emulated_instruction(vcpu); | ||
| 6207 | |||
| 6208 | /* | ||
| 6209 | * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered | ||
| 6210 | * KVM_EXIT_DEBUG here. | ||
| 6211 | */ | ||
| 6212 | if (in) | ||
| 6213 | return kvm_fast_pio_in(vcpu, size, port) && ret; | ||
| 6214 | else | ||
| 6215 | return kvm_fast_pio_out(vcpu, size, port) && ret; | ||
| 6216 | } | ||
| 6217 | EXPORT_SYMBOL_GPL(kvm_fast_pio); | ||
| 6072 | 6218 | ||
| 6073 | static int kvmclock_cpu_down_prep(unsigned int cpu) | 6219 | static int kvmclock_cpu_down_prep(unsigned int cpu) |
| 6074 | { | 6220 | { |
| @@ -6246,7 +6392,8 @@ static void kvm_timer_init(void) | |||
| 6246 | kvmclock_cpu_online, kvmclock_cpu_down_prep); | 6392 | kvmclock_cpu_online, kvmclock_cpu_down_prep); |
| 6247 | } | 6393 | } |
| 6248 | 6394 | ||
| 6249 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 6395 | DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
| 6396 | EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); | ||
| 6250 | 6397 | ||
| 6251 | int kvm_is_in_guest(void) | 6398 | int kvm_is_in_guest(void) |
| 6252 | { | 6399 | { |
| @@ -6279,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { | |||
| 6279 | .get_guest_ip = kvm_get_guest_ip, | 6426 | .get_guest_ip = kvm_get_guest_ip, |
| 6280 | }; | 6427 | }; |
| 6281 | 6428 | ||
| 6282 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 6283 | { | ||
| 6284 | __this_cpu_write(current_vcpu, vcpu); | ||
| 6285 | } | ||
| 6286 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | ||
| 6287 | |||
| 6288 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | ||
| 6289 | { | ||
| 6290 | __this_cpu_write(current_vcpu, NULL); | ||
| 6291 | } | ||
| 6292 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | ||
| 6293 | |||
| 6294 | static void kvm_set_mmio_spte_mask(void) | 6429 | static void kvm_set_mmio_spte_mask(void) |
| 6295 | { | 6430 | { |
| 6296 | u64 mask; | 6431 | u64 mask; |
| @@ -6644,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
| 6644 | int r; | 6779 | int r; |
| 6645 | 6780 | ||
| 6646 | /* try to reinject previous events if any */ | 6781 | /* try to reinject previous events if any */ |
| 6647 | if (vcpu->arch.exception.injected) { | ||
| 6648 | kvm_x86_ops->queue_exception(vcpu); | ||
| 6649 | return 0; | ||
| 6650 | } | ||
| 6651 | 6782 | ||
| 6783 | if (vcpu->arch.exception.injected) | ||
| 6784 | kvm_x86_ops->queue_exception(vcpu); | ||
| 6652 | /* | 6785 | /* |
| 6653 | * Exceptions must be injected immediately, or the exception | 6786 | * Do not inject an NMI or interrupt if there is a pending |
| 6654 | * frame will have the address of the NMI or interrupt handler. | 6787 | * exception. Exceptions and interrupts are recognized at |
| 6788 | * instruction boundaries, i.e. the start of an instruction. | ||
| 6789 | * Trap-like exceptions, e.g. #DB, have higher priority than | ||
| 6790 | * NMIs and interrupts, i.e. traps are recognized before an | ||
| 6791 | * NMI/interrupt that's pending on the same instruction. | ||
| 6792 | * Fault-like exceptions, e.g. #GP and #PF, are the lowest | ||
| 6793 | * priority, but are only generated (pended) during instruction | ||
| 6794 | * execution, i.e. a pending fault-like exception means the | ||
| 6795 | * fault occurred on the *previous* instruction and must be | ||
| 6796 | * serviced prior to recognizing any new events in order to | ||
| 6797 | * fully complete the previous instruction. | ||
| 6655 | */ | 6798 | */ |
| 6656 | if (!vcpu->arch.exception.pending) { | 6799 | else if (!vcpu->arch.exception.pending) { |
| 6657 | if (vcpu->arch.nmi_injected) { | 6800 | if (vcpu->arch.nmi_injected) |
| 6658 | kvm_x86_ops->set_nmi(vcpu); | 6801 | kvm_x86_ops->set_nmi(vcpu); |
| 6659 | return 0; | 6802 | else if (vcpu->arch.interrupt.injected) |
| 6660 | } | ||
| 6661 | |||
| 6662 | if (vcpu->arch.interrupt.pending) { | ||
| 6663 | kvm_x86_ops->set_irq(vcpu); | 6803 | kvm_x86_ops->set_irq(vcpu); |
| 6664 | return 0; | ||
| 6665 | } | ||
| 6666 | } | 6804 | } |
| 6667 | 6805 | ||
| 6806 | /* | ||
| 6807 | * Call check_nested_events() even if we reinjected a previous event | ||
| 6808 | * in order for caller to determine if it should require immediate-exit | ||
| 6809 | * from L2 to L1 due to pending L1 events which require exit | ||
| 6810 | * from L2 to L1. | ||
| 6811 | */ | ||
| 6668 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | 6812 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { |
| 6669 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | 6813 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); |
| 6670 | if (r != 0) | 6814 | if (r != 0) |
| @@ -6677,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
| 6677 | vcpu->arch.exception.has_error_code, | 6821 | vcpu->arch.exception.has_error_code, |
| 6678 | vcpu->arch.exception.error_code); | 6822 | vcpu->arch.exception.error_code); |
| 6679 | 6823 | ||
| 6824 | WARN_ON_ONCE(vcpu->arch.exception.injected); | ||
| 6680 | vcpu->arch.exception.pending = false; | 6825 | vcpu->arch.exception.pending = false; |
| 6681 | vcpu->arch.exception.injected = true; | 6826 | vcpu->arch.exception.injected = true; |
| 6682 | 6827 | ||
| @@ -6691,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
| 6691 | } | 6836 | } |
| 6692 | 6837 | ||
| 6693 | kvm_x86_ops->queue_exception(vcpu); | 6838 | kvm_x86_ops->queue_exception(vcpu); |
| 6694 | } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) { | 6839 | } |
| 6840 | |||
| 6841 | /* Don't consider new event if we re-injected an event */ | ||
| 6842 | if (kvm_event_needs_reinjection(vcpu)) | ||
| 6843 | return 0; | ||
| 6844 | |||
| 6845 | if (vcpu->arch.smi_pending && !is_smm(vcpu) && | ||
| 6846 | kvm_x86_ops->smi_allowed(vcpu)) { | ||
| 6695 | vcpu->arch.smi_pending = false; | 6847 | vcpu->arch.smi_pending = false; |
| 6696 | ++vcpu->arch.smi_count; | 6848 | ++vcpu->arch.smi_count; |
| 6697 | enter_smm(vcpu); | 6849 | enter_smm(vcpu); |
| @@ -6985,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) | |||
| 6985 | 7137 | ||
| 6986 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | 7138 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
| 6987 | { | 7139 | { |
| 6988 | u64 eoi_exit_bitmap[4]; | ||
| 6989 | |||
| 6990 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | 7140 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) |
| 6991 | return; | 7141 | return; |
| 6992 | 7142 | ||
| @@ -6999,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
| 6999 | kvm_x86_ops->sync_pir_to_irr(vcpu); | 7149 | kvm_x86_ops->sync_pir_to_irr(vcpu); |
| 7000 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); | 7150 | kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); |
| 7001 | } | 7151 | } |
| 7152 | |||
| 7153 | if (is_guest_mode(vcpu)) | ||
| 7154 | vcpu->arch.load_eoi_exitmap_pending = true; | ||
| 7155 | else | ||
| 7156 | kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); | ||
| 7157 | } | ||
| 7158 | |||
| 7159 | static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) | ||
| 7160 | { | ||
| 7161 | u64 eoi_exit_bitmap[4]; | ||
| 7162 | |||
| 7163 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | ||
| 7164 | return; | ||
| 7165 | |||
| 7002 | bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, | 7166 | bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, |
| 7003 | vcpu_to_synic(vcpu)->vec_bitmap, 256); | 7167 | vcpu_to_synic(vcpu)->vec_bitmap, 256); |
| 7004 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 7168 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); |
| @@ -7113,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 7113 | } | 7277 | } |
| 7114 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) | 7278 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
| 7115 | vcpu_scan_ioapic(vcpu); | 7279 | vcpu_scan_ioapic(vcpu); |
| 7280 | if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) | ||
| 7281 | vcpu_load_eoi_exitmap(vcpu); | ||
| 7116 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) | 7282 | if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) |
| 7117 | kvm_vcpu_reload_apic_access_page(vcpu); | 7283 | kvm_vcpu_reload_apic_access_page(vcpu); |
| 7118 | if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { | 7284 | if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { |
| @@ -7291,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
| 7291 | 7457 | ||
| 7292 | kvm_put_guest_xcr0(vcpu); | 7458 | kvm_put_guest_xcr0(vcpu); |
| 7293 | 7459 | ||
| 7460 | kvm_before_interrupt(vcpu); | ||
| 7294 | kvm_x86_ops->handle_external_intr(vcpu); | 7461 | kvm_x86_ops->handle_external_intr(vcpu); |
| 7462 | kvm_after_interrupt(vcpu); | ||
| 7295 | 7463 | ||
| 7296 | ++vcpu->stat.exits; | 7464 | ++vcpu->stat.exits; |
| 7297 | 7465 | ||
| @@ -7500,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
| 7500 | return 0; | 7668 | return 0; |
| 7501 | } | 7669 | } |
| 7502 | 7670 | ||
| 7503 | |||
| 7504 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 7671 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 7505 | { | 7672 | { |
| 7506 | int r; | 7673 | int r; |
| @@ -7526,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 7526 | goto out; | 7693 | goto out; |
| 7527 | } | 7694 | } |
| 7528 | 7695 | ||
| 7696 | if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { | ||
| 7697 | r = -EINVAL; | ||
| 7698 | goto out; | ||
| 7699 | } | ||
| 7700 | |||
| 7701 | if (vcpu->run->kvm_dirty_regs) { | ||
| 7702 | r = sync_regs(vcpu); | ||
| 7703 | if (r != 0) | ||
| 7704 | goto out; | ||
| 7705 | } | ||
| 7706 | |||
| 7529 | /* re-sync apic's tpr */ | 7707 | /* re-sync apic's tpr */ |
| 7530 | if (!lapic_in_kernel(vcpu)) { | 7708 | if (!lapic_in_kernel(vcpu)) { |
| 7531 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { | 7709 | if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { |
| @@ -7550,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 7550 | 7728 | ||
| 7551 | out: | 7729 | out: |
| 7552 | kvm_put_guest_fpu(vcpu); | 7730 | kvm_put_guest_fpu(vcpu); |
| 7731 | if (vcpu->run->kvm_valid_regs) | ||
| 7732 | store_regs(vcpu); | ||
| 7553 | post_kvm_run_save(vcpu); | 7733 | post_kvm_run_save(vcpu); |
| 7554 | kvm_sigset_deactivate(vcpu); | 7734 | kvm_sigset_deactivate(vcpu); |
| 7555 | 7735 | ||
| @@ -7557,10 +7737,8 @@ out: | |||
| 7557 | return r; | 7737 | return r; |
| 7558 | } | 7738 | } |
| 7559 | 7739 | ||
| 7560 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 7740 | static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 7561 | { | 7741 | { |
| 7562 | vcpu_load(vcpu); | ||
| 7563 | |||
| 7564 | if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { | 7742 | if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { |
| 7565 | /* | 7743 | /* |
| 7566 | * We are here if userspace calls get_regs() in the middle of | 7744 | * We are here if userspace calls get_regs() in the middle of |
| @@ -7593,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 7593 | 7771 | ||
| 7594 | regs->rip = kvm_rip_read(vcpu); | 7772 | regs->rip = kvm_rip_read(vcpu); |
| 7595 | regs->rflags = kvm_get_rflags(vcpu); | 7773 | regs->rflags = kvm_get_rflags(vcpu); |
| 7774 | } | ||
| 7596 | 7775 | ||
| 7776 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
| 7777 | { | ||
| 7778 | vcpu_load(vcpu); | ||
| 7779 | __get_regs(vcpu, regs); | ||
| 7597 | vcpu_put(vcpu); | 7780 | vcpu_put(vcpu); |
| 7598 | return 0; | 7781 | return 0; |
| 7599 | } | 7782 | } |
| 7600 | 7783 | ||
| 7601 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 7784 | static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
| 7602 | { | 7785 | { |
| 7603 | vcpu_load(vcpu); | ||
| 7604 | |||
| 7605 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; | 7786 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; |
| 7606 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 7787 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
| 7607 | 7788 | ||
| @@ -7630,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 7630 | vcpu->arch.exception.pending = false; | 7811 | vcpu->arch.exception.pending = false; |
| 7631 | 7812 | ||
| 7632 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 7813 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
| 7814 | } | ||
| 7633 | 7815 | ||
| 7816 | int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | ||
| 7817 | { | ||
| 7818 | vcpu_load(vcpu); | ||
| 7819 | __set_regs(vcpu, regs); | ||
| 7634 | vcpu_put(vcpu); | 7820 | vcpu_put(vcpu); |
| 7635 | return 0; | 7821 | return 0; |
| 7636 | } | 7822 | } |
| @@ -7645,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
| 7645 | } | 7831 | } |
| 7646 | EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); | 7832 | EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); |
| 7647 | 7833 | ||
| 7648 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | 7834 | static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) |
| 7649 | struct kvm_sregs *sregs) | ||
| 7650 | { | 7835 | { |
| 7651 | struct desc_ptr dt; | 7836 | struct desc_ptr dt; |
| 7652 | 7837 | ||
| 7653 | vcpu_load(vcpu); | ||
| 7654 | |||
| 7655 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 7838 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
| 7656 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 7839 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
| 7657 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 7840 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
| @@ -7679,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 7679 | 7862 | ||
| 7680 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); | 7863 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
| 7681 | 7864 | ||
| 7682 | if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) | 7865 | if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft) |
| 7683 | set_bit(vcpu->arch.interrupt.nr, | 7866 | set_bit(vcpu->arch.interrupt.nr, |
| 7684 | (unsigned long *)sregs->interrupt_bitmap); | 7867 | (unsigned long *)sregs->interrupt_bitmap); |
| 7868 | } | ||
| 7685 | 7869 | ||
| 7870 | int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | ||
| 7871 | struct kvm_sregs *sregs) | ||
| 7872 | { | ||
| 7873 | vcpu_load(vcpu); | ||
| 7874 | __get_sregs(vcpu, sregs); | ||
| 7686 | vcpu_put(vcpu); | 7875 | vcpu_put(vcpu); |
| 7687 | return 0; | 7876 | return 0; |
| 7688 | } | 7877 | } |
| @@ -7754,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, | |||
| 7754 | } | 7943 | } |
| 7755 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 7944 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
| 7756 | 7945 | ||
| 7757 | int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | 7946 | static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) |
| 7758 | { | 7947 | { |
| 7759 | if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { | 7948 | if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { |
| 7760 | /* | 7949 | /* |
| @@ -7777,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
| 7777 | return 0; | 7966 | return 0; |
| 7778 | } | 7967 | } |
| 7779 | 7968 | ||
| 7780 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 7969 | static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) |
| 7781 | struct kvm_sregs *sregs) | ||
| 7782 | { | 7970 | { |
| 7783 | struct msr_data apic_base_msr; | 7971 | struct msr_data apic_base_msr; |
| 7784 | int mmu_reset_needed = 0; | 7972 | int mmu_reset_needed = 0; |
| @@ -7786,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 7786 | struct desc_ptr dt; | 7974 | struct desc_ptr dt; |
| 7787 | int ret = -EINVAL; | 7975 | int ret = -EINVAL; |
| 7788 | 7976 | ||
| 7789 | vcpu_load(vcpu); | ||
| 7790 | |||
| 7791 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && | 7977 | if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && |
| 7792 | (sregs->cr4 & X86_CR4_OSXSAVE)) | 7978 | (sregs->cr4 & X86_CR4_OSXSAVE)) |
| 7793 | goto out; | 7979 | goto out; |
| @@ -7866,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 7866 | 8052 | ||
| 7867 | ret = 0; | 8053 | ret = 0; |
| 7868 | out: | 8054 | out: |
| 8055 | return ret; | ||
| 8056 | } | ||
| 8057 | |||
| 8058 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | ||
| 8059 | struct kvm_sregs *sregs) | ||
| 8060 | { | ||
| 8061 | int ret; | ||
| 8062 | |||
| 8063 | vcpu_load(vcpu); | ||
| 8064 | ret = __set_sregs(vcpu, sregs); | ||
| 7869 | vcpu_put(vcpu); | 8065 | vcpu_put(vcpu); |
| 7870 | return ret; | 8066 | return ret; |
| 7871 | } | 8067 | } |
| @@ -7992,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 7992 | return 0; | 8188 | return 0; |
| 7993 | } | 8189 | } |
| 7994 | 8190 | ||
| 8191 | static void store_regs(struct kvm_vcpu *vcpu) | ||
| 8192 | { | ||
| 8193 | BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); | ||
| 8194 | |||
| 8195 | if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) | ||
| 8196 | __get_regs(vcpu, &vcpu->run->s.regs.regs); | ||
| 8197 | |||
| 8198 | if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) | ||
| 8199 | __get_sregs(vcpu, &vcpu->run->s.regs.sregs); | ||
| 8200 | |||
| 8201 | if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) | ||
| 8202 | kvm_vcpu_ioctl_x86_get_vcpu_events( | ||
| 8203 | vcpu, &vcpu->run->s.regs.events); | ||
| 8204 | } | ||
| 8205 | |||
| 8206 | static int sync_regs(struct kvm_vcpu *vcpu) | ||
| 8207 | { | ||
| 8208 | if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) | ||
| 8209 | return -EINVAL; | ||
| 8210 | |||
| 8211 | if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { | ||
| 8212 | __set_regs(vcpu, &vcpu->run->s.regs.regs); | ||
| 8213 | vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; | ||
| 8214 | } | ||
| 8215 | if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { | ||
| 8216 | if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) | ||
| 8217 | return -EINVAL; | ||
| 8218 | vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; | ||
| 8219 | } | ||
| 8220 | if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { | ||
| 8221 | if (kvm_vcpu_ioctl_x86_set_vcpu_events( | ||
| 8222 | vcpu, &vcpu->run->s.regs.events)) | ||
| 8223 | return -EINVAL; | ||
| 8224 | vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; | ||
| 8225 | } | ||
| 8226 | |||
| 8227 | return 0; | ||
| 8228 | } | ||
| 8229 | |||
| 7995 | static void fx_init(struct kvm_vcpu *vcpu) | 8230 | static void fx_init(struct kvm_vcpu *vcpu) |
| 7996 | { | 8231 | { |
| 7997 | fpstate_init(&vcpu->arch.guest_fpu.state); | 8232 | fpstate_init(&vcpu->arch.guest_fpu.state); |
| @@ -8447,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 8447 | 8682 | ||
| 8448 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); | 8683 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
| 8449 | mutex_init(&kvm->arch.apic_map_lock); | 8684 | mutex_init(&kvm->arch.apic_map_lock); |
| 8450 | mutex_init(&kvm->arch.hyperv.hv_lock); | ||
| 8451 | spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); | 8685 | spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); |
| 8452 | 8686 | ||
| 8453 | kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); | 8687 | kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); |
| @@ -8456,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
| 8456 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); | 8690 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); |
| 8457 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); | 8691 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); |
| 8458 | 8692 | ||
| 8693 | kvm_hv_init_vm(kvm); | ||
| 8459 | kvm_page_track_init(kvm); | 8694 | kvm_page_track_init(kvm); |
| 8460 | kvm_mmu_init_vm(kvm); | 8695 | kvm_mmu_init_vm(kvm); |
| 8461 | 8696 | ||
| @@ -8586,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
| 8586 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); | 8821 | kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); |
| 8587 | kvm_mmu_uninit_vm(kvm); | 8822 | kvm_mmu_uninit_vm(kvm); |
| 8588 | kvm_page_track_cleanup(kvm); | 8823 | kvm_page_track_cleanup(kvm); |
| 8824 | kvm_hv_destroy_vm(kvm); | ||
| 8589 | } | 8825 | } |
| 8590 | 8826 | ||
| 8591 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, | 8827 | void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b91215d1fd80..7d35ce672989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -2,12 +2,48 @@ | |||
| 2 | #ifndef ARCH_X86_KVM_X86_H | 2 | #ifndef ARCH_X86_KVM_X86_H |
| 3 | #define ARCH_X86_KVM_X86_H | 3 | #define ARCH_X86_KVM_X86_H |
| 4 | 4 | ||
| 5 | #include <asm/processor.h> | ||
| 6 | #include <asm/mwait.h> | ||
| 7 | #include <linux/kvm_host.h> | 5 | #include <linux/kvm_host.h> |
| 8 | #include <asm/pvclock.h> | 6 | #include <asm/pvclock.h> |
| 9 | #include "kvm_cache_regs.h" | 7 | #include "kvm_cache_regs.h" |
| 10 | 8 | ||
| 9 | #define KVM_DEFAULT_PLE_GAP 128 | ||
| 10 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
| 11 | #define KVM_DEFAULT_PLE_WINDOW_GROW 2 | ||
| 12 | #define KVM_DEFAULT_PLE_WINDOW_SHRINK 0 | ||
| 13 | #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX | ||
| 14 | #define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX | ||
| 15 | #define KVM_SVM_DEFAULT_PLE_WINDOW 3000 | ||
| 16 | |||
| 17 | static inline unsigned int __grow_ple_window(unsigned int val, | ||
| 18 | unsigned int base, unsigned int modifier, unsigned int max) | ||
| 19 | { | ||
| 20 | u64 ret = val; | ||
| 21 | |||
| 22 | if (modifier < 1) | ||
| 23 | return base; | ||
| 24 | |||
| 25 | if (modifier < base) | ||
| 26 | ret *= modifier; | ||
| 27 | else | ||
| 28 | ret += modifier; | ||
| 29 | |||
| 30 | return min(ret, (u64)max); | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline unsigned int __shrink_ple_window(unsigned int val, | ||
| 34 | unsigned int base, unsigned int modifier, unsigned int min) | ||
| 35 | { | ||
| 36 | if (modifier < 1) | ||
| 37 | return base; | ||
| 38 | |||
| 39 | if (modifier < base) | ||
| 40 | val /= modifier; | ||
| 41 | else | ||
| 42 | val -= modifier; | ||
| 43 | |||
| 44 | return max(val, min); | ||
| 45 | } | ||
| 46 | |||
| 11 | #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL | 47 | #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL |
| 12 | 48 | ||
| 13 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 49 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
| @@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | |||
| 19 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, | 55 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, |
| 20 | bool soft) | 56 | bool soft) |
| 21 | { | 57 | { |
| 22 | vcpu->arch.interrupt.pending = true; | 58 | vcpu->arch.interrupt.injected = true; |
| 23 | vcpu->arch.interrupt.soft = soft; | 59 | vcpu->arch.interrupt.soft = soft; |
| 24 | vcpu->arch.interrupt.nr = vector; | 60 | vcpu->arch.interrupt.nr = vector; |
| 25 | } | 61 | } |
| 26 | 62 | ||
| 27 | static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | 63 | static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) |
| 28 | { | 64 | { |
| 29 | vcpu->arch.interrupt.pending = false; | 65 | vcpu->arch.interrupt.injected = false; |
| 30 | } | 66 | } |
| 31 | 67 | ||
| 32 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) | 68 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) |
| 33 | { | 69 | { |
| 34 | return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || | 70 | return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected || |
| 35 | vcpu->arch.nmi_injected; | 71 | vcpu->arch.nmi_injected; |
| 36 | } | 72 | } |
| 37 | 73 | ||
| @@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) | |||
| 205 | return !(kvm->arch.disabled_quirks & quirk); | 241 | return !(kvm->arch.disabled_quirks & quirk); |
| 206 | } | 242 | } |
| 207 | 243 | ||
| 208 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 209 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | ||
| 210 | void kvm_set_pending_timer(struct kvm_vcpu *vcpu); | 244 | void kvm_set_pending_timer(struct kvm_vcpu *vcpu); |
| 211 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | 245 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |
| 212 | 246 | ||
| @@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
| 221 | gva_t addr, void *val, unsigned int bytes, | 255 | gva_t addr, void *val, unsigned int bytes, |
| 222 | struct x86_exception *exception); | 256 | struct x86_exception *exception); |
| 223 | 257 | ||
| 258 | int handle_ud(struct kvm_vcpu *vcpu); | ||
| 259 | |||
| 224 | void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); | 260 | void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); |
| 225 | u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | 261 | u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); |
| 226 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 262 | bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
| @@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us; | |||
| 242 | 278 | ||
| 243 | extern unsigned int lapic_timer_advance_ns; | 279 | extern unsigned int lapic_timer_advance_ns; |
| 244 | 280 | ||
| 281 | extern bool enable_vmware_backdoor; | ||
| 282 | |||
| 245 | extern struct static_key kvm_no_apic_vcpu; | 283 | extern struct static_key kvm_no_apic_vcpu; |
| 246 | 284 | ||
| 247 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | 285 | static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) |
| @@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | |||
| 264 | __rem; \ | 302 | __rem; \ |
| 265 | }) | 303 | }) |
| 266 | 304 | ||
| 267 | static inline bool kvm_mwait_in_guest(void) | 305 | #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) |
| 306 | #define KVM_X86_DISABLE_EXITS_HTL (1 << 1) | ||
| 307 | #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) | ||
| 308 | #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ | ||
| 309 | KVM_X86_DISABLE_EXITS_HTL | \ | ||
| 310 | KVM_X86_DISABLE_EXITS_PAUSE) | ||
| 311 | |||
| 312 | static inline bool kvm_mwait_in_guest(struct kvm *kvm) | ||
| 313 | { | ||
| 314 | return kvm->arch.mwait_in_guest; | ||
| 315 | } | ||
| 316 | |||
| 317 | static inline bool kvm_hlt_in_guest(struct kvm *kvm) | ||
| 318 | { | ||
| 319 | return kvm->arch.hlt_in_guest; | ||
| 320 | } | ||
| 321 | |||
| 322 | static inline bool kvm_pause_in_guest(struct kvm *kvm) | ||
| 323 | { | ||
| 324 | return kvm->arch.pause_in_guest; | ||
| 325 | } | ||
| 326 | |||
| 327 | DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); | ||
| 328 | |||
| 329 | static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) | ||
| 330 | { | ||
| 331 | __this_cpu_write(current_vcpu, vcpu); | ||
| 332 | } | ||
| 333 | |||
| 334 | static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) | ||
| 268 | { | 335 | { |
| 269 | return boot_cpu_has(X86_FEATURE_MWAIT) && | 336 | __this_cpu_write(current_vcpu, NULL); |
| 270 | !boot_cpu_has_bug(X86_BUG_MONITOR); | ||
| 271 | } | 337 | } |
| 272 | 338 | ||
| 273 | #endif | 339 | #endif |
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 447371f4de56..72855182b191 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c | |||
| @@ -31,7 +31,6 @@ | |||
| 31 | #include <linux/vmalloc.h> | 31 | #include <linux/vmalloc.h> |
| 32 | #include <linux/hyperv.h> | 32 | #include <linux/hyperv.h> |
| 33 | #include <linux/export.h> | 33 | #include <linux/export.h> |
| 34 | #include <asm/hyperv.h> | ||
| 35 | #include <asm/mshyperv.h> | 34 | #include <asm/mshyperv.h> |
| 36 | 35 | ||
| 37 | #include "hyperv_vmbus.h" | 36 | #include "hyperv_vmbus.h" |
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 8137b3885b99..9b82549cbbc8 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c | |||
| @@ -29,7 +29,6 @@ | |||
| 29 | #include <linux/version.h> | 29 | #include <linux/version.h> |
| 30 | #include <linux/random.h> | 30 | #include <linux/random.h> |
| 31 | #include <linux/clockchips.h> | 31 | #include <linux/clockchips.h> |
| 32 | #include <asm/hyperv.h> | ||
| 33 | #include <asm/mshyperv.h> | 32 | #include <asm/mshyperv.h> |
| 34 | #include "hyperv_vmbus.h" | 33 | #include "hyperv_vmbus.h" |
| 35 | 34 | ||
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 36d34fe3ccb3..f761bef36e77 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | 27 | ||
| 28 | #include <linux/list.h> | 28 | #include <linux/list.h> |
| 29 | #include <asm/sync_bitops.h> | 29 | #include <asm/sync_bitops.h> |
| 30 | #include <asm/hyperv-tlfs.h> | ||
| 30 | #include <linux/atomic.h> | 31 | #include <linux/atomic.h> |
| 31 | #include <linux/hyperv.h> | 32 | #include <linux/hyperv.h> |
| 32 | #include <linux/interrupt.h> | 33 | #include <linux/interrupt.h> |
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index bc65c4d79c1f..b10fe26c4891 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include <linux/cpu.h> | 36 | #include <linux/cpu.h> |
| 37 | #include <linux/sched/task_stack.h> | 37 | #include <linux/sched/task_stack.h> |
| 38 | 38 | ||
| 39 | #include <asm/hyperv.h> | ||
| 40 | #include <asm/mshyperv.h> | 39 | #include <asm/mshyperv.h> |
| 41 | #include <linux/notifier.h> | 40 | #include <linux/notifier.h> |
| 42 | #include <linux/ptrace.h> | 41 | #include <linux/ptrace.h> |
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 18c6abe81fbd..728e5c5706c4 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h | |||
| @@ -19,6 +19,11 @@ static inline unsigned int kvm_arch_para_features(void) | |||
| 19 | return 0; | 19 | return 0; |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | static inline unsigned int kvm_arch_para_hints(void) | ||
| 23 | { | ||
| 24 | return 0; | ||
| 25 | } | ||
| 26 | |||
| 22 | static inline bool kvm_para_available(void) | 27 | static inline bool kvm_para_available(void) |
| 23 | { | 28 | { |
| 24 | return false; | 29 | return false; |
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 02924ae2527e..24f03941ada8 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h | |||
| @@ -57,11 +57,15 @@ struct vgic_global { | |||
| 57 | /* Physical address of vgic virtual cpu interface */ | 57 | /* Physical address of vgic virtual cpu interface */ |
| 58 | phys_addr_t vcpu_base; | 58 | phys_addr_t vcpu_base; |
| 59 | 59 | ||
| 60 | /* GICV mapping */ | 60 | /* GICV mapping, kernel VA */ |
| 61 | void __iomem *vcpu_base_va; | 61 | void __iomem *vcpu_base_va; |
| 62 | /* GICV mapping, HYP VA */ | ||
| 63 | void __iomem *vcpu_hyp_va; | ||
| 62 | 64 | ||
| 63 | /* virtual control interface mapping */ | 65 | /* virtual control interface mapping, kernel VA */ |
| 64 | void __iomem *vctrl_base; | 66 | void __iomem *vctrl_base; |
| 67 | /* virtual control interface mapping, HYP VA */ | ||
| 68 | void __iomem *vctrl_hyp; | ||
| 65 | 69 | ||
| 66 | /* Number of implemented list registers */ | 70 | /* Number of implemented list registers */ |
| 67 | int nr_lr; | 71 | int nr_lr; |
| @@ -209,10 +213,6 @@ struct vgic_dist { | |||
| 209 | 213 | ||
| 210 | int nr_spis; | 214 | int nr_spis; |
| 211 | 215 | ||
| 212 | /* TODO: Consider moving to global state */ | ||
| 213 | /* Virtual control interface mapping */ | ||
| 214 | void __iomem *vctrl_base; | ||
| 215 | |||
| 216 | /* base addresses in guest physical address space: */ | 216 | /* base addresses in guest physical address space: */ |
| 217 | gpa_t vgic_dist_base; /* distributor */ | 217 | gpa_t vgic_dist_base; /* distributor */ |
| 218 | union { | 218 | union { |
| @@ -263,7 +263,6 @@ struct vgic_dist { | |||
| 263 | struct vgic_v2_cpu_if { | 263 | struct vgic_v2_cpu_if { |
| 264 | u32 vgic_hcr; | 264 | u32 vgic_hcr; |
| 265 | u32 vgic_vmcr; | 265 | u32 vgic_vmcr; |
| 266 | u64 vgic_elrsr; /* Saved only */ | ||
| 267 | u32 vgic_apr; | 266 | u32 vgic_apr; |
| 268 | u32 vgic_lr[VGIC_V2_MAX_LRS]; | 267 | u32 vgic_lr[VGIC_V2_MAX_LRS]; |
| 269 | }; | 268 | }; |
| @@ -272,7 +271,6 @@ struct vgic_v3_cpu_if { | |||
| 272 | u32 vgic_hcr; | 271 | u32 vgic_hcr; |
| 273 | u32 vgic_vmcr; | 272 | u32 vgic_vmcr; |
| 274 | u32 vgic_sre; /* Restored only, change ignored */ | 273 | u32 vgic_sre; /* Restored only, change ignored */ |
| 275 | u32 vgic_elrsr; /* Saved only */ | ||
| 276 | u32 vgic_ap0r[4]; | 274 | u32 vgic_ap0r[4]; |
| 277 | u32 vgic_ap1r[4]; | 275 | u32 vgic_ap1r[4]; |
| 278 | u64 vgic_lr[VGIC_V3_MAX_LRS]; | 276 | u64 vgic_lr[VGIC_V3_MAX_LRS]; |
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2048f3c3b68a..192ed8fbc403 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h | |||
| @@ -26,7 +26,6 @@ | |||
| 26 | #define _HYPERV_H | 26 | #define _HYPERV_H |
| 27 | 27 | ||
| 28 | #include <uapi/linux/hyperv.h> | 28 | #include <uapi/linux/hyperv.h> |
| 29 | #include <uapi/asm/hyperv.h> | ||
| 30 | 29 | ||
| 31 | #include <linux/types.h> | 30 | #include <linux/types.h> |
| 32 | #include <linux/scatterlist.h> | 31 | #include <linux/scatterlist.h> |
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 51f6ef2c2ff4..f23b90b02898 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h | |||
| @@ -9,4 +9,9 @@ static inline bool kvm_para_has_feature(unsigned int feature) | |||
| 9 | { | 9 | { |
| 10 | return !!(kvm_arch_para_features() & (1UL << feature)); | 10 | return !!(kvm_arch_para_features() & (1UL << feature)); |
| 11 | } | 11 | } |
| 12 | |||
| 13 | static inline bool kvm_para_has_hint(unsigned int feature) | ||
| 14 | { | ||
| 15 | return !!(kvm_arch_para_hints() & (1UL << feature)); | ||
| 16 | } | ||
| 12 | #endif /* __LINUX_KVM_PARA_H */ | 17 | #endif /* __LINUX_KVM_PARA_H */ |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7b26d4b0b052..1065006c9bf5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
| @@ -396,6 +396,10 @@ struct kvm_run { | |||
| 396 | char padding[256]; | 396 | char padding[256]; |
| 397 | }; | 397 | }; |
| 398 | 398 | ||
| 399 | /* 2048 is the size of the char array used to bound/pad the size | ||
| 400 | * of the union that holds sync regs. | ||
| 401 | */ | ||
| 402 | #define SYNC_REGS_SIZE_BYTES 2048 | ||
| 399 | /* | 403 | /* |
| 400 | * shared registers between kvm and userspace. | 404 | * shared registers between kvm and userspace. |
| 401 | * kvm_valid_regs specifies the register classes set by the host | 405 | * kvm_valid_regs specifies the register classes set by the host |
| @@ -407,7 +411,7 @@ struct kvm_run { | |||
| 407 | __u64 kvm_dirty_regs; | 411 | __u64 kvm_dirty_regs; |
| 408 | union { | 412 | union { |
| 409 | struct kvm_sync_regs regs; | 413 | struct kvm_sync_regs regs; |
| 410 | char padding[2048]; | 414 | char padding[SYNC_REGS_SIZE_BYTES]; |
| 411 | } s; | 415 | } s; |
| 412 | }; | 416 | }; |
| 413 | 417 | ||
| @@ -925,7 +929,7 @@ struct kvm_ppc_resize_hpt { | |||
| 925 | #define KVM_CAP_S390_GS 140 | 929 | #define KVM_CAP_S390_GS 140 |
| 926 | #define KVM_CAP_S390_AIS 141 | 930 | #define KVM_CAP_S390_AIS 141 |
| 927 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | 931 | #define KVM_CAP_SPAPR_TCE_VFIO 142 |
| 928 | #define KVM_CAP_X86_GUEST_MWAIT 143 | 932 | #define KVM_CAP_X86_DISABLE_EXITS 143 |
| 929 | #define KVM_CAP_ARM_USER_IRQ 144 | 933 | #define KVM_CAP_ARM_USER_IRQ 144 |
| 930 | #define KVM_CAP_S390_CMMA_MIGRATION 145 | 934 | #define KVM_CAP_S390_CMMA_MIGRATION 145 |
| 931 | #define KVM_CAP_PPC_FWNMI 146 | 935 | #define KVM_CAP_PPC_FWNMI 146 |
| @@ -936,6 +940,7 @@ struct kvm_ppc_resize_hpt { | |||
| 936 | #define KVM_CAP_PPC_GET_CPU_CHAR 151 | 940 | #define KVM_CAP_PPC_GET_CPU_CHAR 151 |
| 937 | #define KVM_CAP_S390_BPB 152 | 941 | #define KVM_CAP_S390_BPB 152 |
| 938 | #define KVM_CAP_GET_MSR_FEATURES 153 | 942 | #define KVM_CAP_GET_MSR_FEATURES 153 |
| 943 | #define KVM_CAP_HYPERV_EVENTFD 154 | ||
| 939 | 944 | ||
| 940 | #ifdef KVM_CAP_IRQ_ROUTING | 945 | #ifdef KVM_CAP_IRQ_ROUTING |
| 941 | 946 | ||
| @@ -1375,6 +1380,10 @@ struct kvm_enc_region { | |||
| 1375 | #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) | 1380 | #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) |
| 1376 | #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) | 1381 | #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) |
| 1377 | 1382 | ||
| 1383 | /* Available with KVM_CAP_HYPERV_EVENTFD */ | ||
| 1384 | #define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) | ||
| 1385 | |||
| 1386 | |||
| 1378 | /* Secure Encrypted Virtualization command */ | 1387 | /* Secure Encrypted Virtualization command */ |
| 1379 | enum sev_cmd_id { | 1388 | enum sev_cmd_id { |
| 1380 | /* Guest initialization commands */ | 1389 | /* Guest initialization commands */ |
| @@ -1515,4 +1524,14 @@ struct kvm_assigned_msix_entry { | |||
| 1515 | #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) | 1524 | #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) |
| 1516 | #define KVM_ARM_DEV_PMU (1 << 2) | 1525 | #define KVM_ARM_DEV_PMU (1 << 2) |
| 1517 | 1526 | ||
| 1527 | struct kvm_hyperv_eventfd { | ||
| 1528 | __u32 conn_id; | ||
| 1529 | __s32 fd; | ||
| 1530 | __u32 flags; | ||
| 1531 | __u32 padding[3]; | ||
| 1532 | }; | ||
| 1533 | |||
| 1534 | #define KVM_HYPERV_CONN_ID_MASK 0x00ffffff | ||
| 1535 | #define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) | ||
| 1536 | |||
| 1518 | #endif /* __LINUX_KVM_H */ | 1537 | #endif /* __LINUX_KVM_H */ |
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7b26d4b0b052..6b89f87db200 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h | |||
| @@ -925,7 +925,7 @@ struct kvm_ppc_resize_hpt { | |||
| 925 | #define KVM_CAP_S390_GS 140 | 925 | #define KVM_CAP_S390_GS 140 |
| 926 | #define KVM_CAP_S390_AIS 141 | 926 | #define KVM_CAP_S390_AIS 141 |
| 927 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | 927 | #define KVM_CAP_SPAPR_TCE_VFIO 142 |
| 928 | #define KVM_CAP_X86_GUEST_MWAIT 143 | 928 | #define KVM_CAP_X86_DISABLE_EXITS 143 |
| 929 | #define KVM_CAP_ARM_USER_IRQ 144 | 929 | #define KVM_CAP_ARM_USER_IRQ 144 |
| 930 | #define KVM_CAP_S390_CMMA_MIGRATION 145 | 930 | #define KVM_CAP_S390_CMMA_MIGRATION 145 |
| 931 | #define KVM_CAP_PPC_FWNMI 146 | 931 | #define KVM_CAP_PPC_FWNMI 146 |
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 5898c22ba310..56c4b3f8a01b 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat | |||
| @@ -1121,9 +1121,6 @@ class Tui(object): | |||
| 1121 | self.screen.refresh() | 1121 | self.screen.refresh() |
| 1122 | 1122 | ||
| 1123 | def _refresh_body(self, sleeptime): | 1123 | def _refresh_body(self, sleeptime): |
| 1124 | def is_child_field(field): | ||
| 1125 | return field.find('(') != -1 | ||
| 1126 | |||
| 1127 | def insert_child(sorted_items, child, values, parent): | 1124 | def insert_child(sorted_items, child, values, parent): |
| 1128 | num = len(sorted_items) | 1125 | num = len(sorted_items) |
| 1129 | for i in range(0, num): | 1126 | for i in range(0, num): |
| @@ -1134,12 +1131,14 @@ class Tui(object): | |||
| 1134 | def get_sorted_events(self, stats): | 1131 | def get_sorted_events(self, stats): |
| 1135 | """ separate parent and child events """ | 1132 | """ separate parent and child events """ |
| 1136 | if self._sorting == SORT_DEFAULT: | 1133 | if self._sorting == SORT_DEFAULT: |
| 1137 | def sortkey((_k, v)): | 1134 | def sortkey(pair): |
| 1138 | # sort by (delta value, overall value) | 1135 | # sort by (delta value, overall value) |
| 1136 | v = pair[1] | ||
| 1139 | return (v.delta, v.value) | 1137 | return (v.delta, v.value) |
| 1140 | else: | 1138 | else: |
| 1141 | def sortkey((_k, v)): | 1139 | def sortkey(pair): |
| 1142 | # sort by overall value | 1140 | # sort by overall value |
| 1141 | v = pair[1] | ||
| 1143 | return v.value | 1142 | return v.value |
| 1144 | 1143 | ||
| 1145 | childs = [] | 1144 | childs = [] |
| @@ -1613,7 +1612,7 @@ def assign_globals(): | |||
| 1613 | global PATH_DEBUGFS_TRACING | 1612 | global PATH_DEBUGFS_TRACING |
| 1614 | 1613 | ||
| 1615 | debugfs = '' | 1614 | debugfs = '' |
| 1616 | for line in file('/proc/mounts'): | 1615 | for line in open('/proc/mounts'): |
| 1617 | if line.split(' ')[0] == 'debugfs': | 1616 | if line.split(' ')[0] == 'debugfs': |
| 1618 | debugfs = line.split(' ')[1] | 1617 | debugfs = line.split(' ')[1] |
| 1619 | break | 1618 | break |
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bae6a4e9f2ee..2fc410bc4f33 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile | |||
| @@ -15,6 +15,7 @@ TARGETS += gpio | |||
| 15 | TARGETS += intel_pstate | 15 | TARGETS += intel_pstate |
| 16 | TARGETS += ipc | 16 | TARGETS += ipc |
| 17 | TARGETS += kcmp | 17 | TARGETS += kcmp |
| 18 | TARGETS += kvm | ||
| 18 | TARGETS += lib | 19 | TARGETS += lib |
| 19 | TARGETS += membarrier | 20 | TARGETS += membarrier |
| 20 | TARGETS += memfd | 21 | TARGETS += memfd |
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile new file mode 100644 index 000000000000..dc44de904797 --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | all: | ||
| 2 | |||
| 3 | top_srcdir = ../../../../ | ||
| 4 | UNAME_M := $(shell uname -m) | ||
| 5 | |||
| 6 | LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c | ||
| 7 | LIBKVM_x86_64 = lib/x86.c | ||
| 8 | |||
| 9 | TEST_GEN_PROGS_x86_64 = set_sregs_test | ||
| 10 | TEST_GEN_PROGS_x86_64 += sync_regs_test | ||
| 11 | |||
| 12 | TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) | ||
| 13 | LIBKVM += $(LIBKVM_$(UNAME_M)) | ||
| 14 | |||
| 15 | INSTALL_HDR_PATH = $(top_srcdir)/usr | ||
| 16 | LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ | ||
| 17 | CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) | ||
| 18 | |||
| 19 | # After inclusion, $(OUTPUT) is defined and | ||
| 20 | # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ | ||
| 21 | include ../lib.mk | ||
| 22 | |||
| 23 | STATIC_LIBS := $(OUTPUT)/libkvm.a | ||
| 24 | LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) | ||
| 25 | EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) | ||
| 26 | |||
| 27 | x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) | ||
| 28 | $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c | ||
| 29 | $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ | ||
| 30 | |||
| 31 | $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) | ||
| 32 | $(AR) crs $@ $^ | ||
| 33 | |||
| 34 | $(LINUX_HDR_PATH): | ||
| 35 | make -C $(top_srcdir) headers_install | ||
| 36 | |||
| 37 | all: $(STATIC_LIBS) $(LINUX_HDR_PATH) | ||
| 38 | $(TEST_GEN_PROGS): $(STATIC_LIBS) | ||
| 39 | $(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH) | ||
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h new file mode 100644 index 000000000000..57974ad46373 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util.h | |||
| @@ -0,0 +1,142 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/include/kvm_util.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | #ifndef SELFTEST_KVM_UTIL_H | ||
| 10 | #define SELFTEST_KVM_UTIL_H 1 | ||
| 11 | |||
| 12 | #include "test_util.h" | ||
| 13 | |||
| 14 | #include "asm/kvm.h" | ||
| 15 | #include "linux/kvm.h" | ||
| 16 | #include <sys/ioctl.h> | ||
| 17 | |||
| 18 | #include "sparsebit.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be | ||
| 22 | * created. Only applies to VMs using EPT. | ||
| 23 | */ | ||
| 24 | #define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul | ||
| 25 | |||
| 26 | |||
| 27 | /* Callers of kvm_util only have an incomplete/opaque description of the | ||
| 28 | * structure kvm_util is using to maintain the state of a VM. | ||
| 29 | */ | ||
| 30 | struct kvm_vm; | ||
| 31 | |||
| 32 | typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ | ||
| 33 | typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ | ||
| 34 | |||
| 35 | /* Minimum allocated guest virtual and physical addresses */ | ||
| 36 | #define KVM_UTIL_MIN_VADDR 0x2000 | ||
| 37 | |||
| 38 | #define DEFAULT_GUEST_PHY_PAGES 512 | ||
| 39 | #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 | ||
| 40 | #define DEFAULT_STACK_PGS 5 | ||
| 41 | |||
| 42 | enum vm_guest_mode { | ||
| 43 | VM_MODE_FLAT48PG, | ||
| 44 | }; | ||
| 45 | |||
| 46 | enum vm_mem_backing_src_type { | ||
| 47 | VM_MEM_SRC_ANONYMOUS, | ||
| 48 | VM_MEM_SRC_ANONYMOUS_THP, | ||
| 49 | VM_MEM_SRC_ANONYMOUS_HUGETLB, | ||
| 50 | }; | ||
| 51 | |||
| 52 | int kvm_check_cap(long cap); | ||
| 53 | |||
| 54 | struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); | ||
| 55 | void kvm_vm_free(struct kvm_vm *vmp); | ||
| 56 | |||
| 57 | int kvm_memcmp_hva_gva(void *hva, | ||
| 58 | struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); | ||
| 59 | |||
| 60 | void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, | ||
| 61 | uint32_t data_memslot, uint32_t pgd_memslot); | ||
| 62 | |||
| 63 | void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); | ||
| 64 | void vcpu_dump(FILE *stream, struct kvm_vm *vm, | ||
| 65 | uint32_t vcpuid, uint8_t indent); | ||
| 66 | |||
| 67 | void vm_create_irqchip(struct kvm_vm *vm); | ||
| 68 | |||
| 69 | void vm_userspace_mem_region_add(struct kvm_vm *vm, | ||
| 70 | enum vm_mem_backing_src_type src_type, | ||
| 71 | uint64_t guest_paddr, uint32_t slot, uint64_t npages, | ||
| 72 | uint32_t flags); | ||
| 73 | |||
| 74 | void vcpu_ioctl(struct kvm_vm *vm, | ||
| 75 | uint32_t vcpuid, unsigned long ioctl, void *arg); | ||
| 76 | void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); | ||
| 77 | void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); | ||
| 78 | void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); | ||
| 79 | vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, | ||
| 80 | uint32_t data_memslot, uint32_t pgd_memslot); | ||
| 81 | void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); | ||
| 82 | void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); | ||
| 83 | vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); | ||
| 84 | vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); | ||
| 85 | |||
| 86 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); | ||
| 87 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | ||
| 88 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); | ||
| 89 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 90 | struct kvm_mp_state *mp_state); | ||
| 91 | void vcpu_regs_get(struct kvm_vm *vm, | ||
| 92 | uint32_t vcpuid, struct kvm_regs *regs); | ||
| 93 | void vcpu_regs_set(struct kvm_vm *vm, | ||
| 94 | uint32_t vcpuid, struct kvm_regs *regs); | ||
| 95 | void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); | ||
| 96 | void vcpu_sregs_get(struct kvm_vm *vm, | ||
| 97 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
| 98 | void vcpu_sregs_set(struct kvm_vm *vm, | ||
| 99 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
| 100 | int _vcpu_sregs_set(struct kvm_vm *vm, | ||
| 101 | uint32_t vcpuid, struct kvm_sregs *sregs); | ||
| 102 | void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 103 | struct kvm_vcpu_events *events); | ||
| 104 | void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 105 | struct kvm_vcpu_events *events); | ||
| 106 | |||
| 107 | const char *exit_reason_str(unsigned int exit_reason); | ||
| 108 | |||
| 109 | void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); | ||
| 110 | void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, | ||
| 111 | uint32_t pgd_memslot); | ||
| 112 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | ||
| 113 | vm_paddr_t paddr_min, uint32_t memslot); | ||
| 114 | |||
| 115 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); | ||
| 116 | void vcpu_set_cpuid( | ||
| 117 | struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); | ||
| 118 | |||
| 119 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void); | ||
| 120 | struct kvm_cpuid_entry2 * | ||
| 121 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | ||
| 122 | uint32_t index); | ||
| 123 | |||
| 124 | static inline struct kvm_cpuid_entry2 * | ||
| 125 | find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) | ||
| 126 | { | ||
| 127 | return find_cpuid_index_entry(cpuid, function, 0); | ||
| 128 | } | ||
| 129 | |||
| 130 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); | ||
| 131 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); | ||
| 132 | |||
| 133 | struct kvm_userspace_memory_region * | ||
| 134 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, | ||
| 135 | uint64_t end); | ||
| 136 | |||
| 137 | struct kvm_dirty_log * | ||
| 138 | allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); | ||
| 139 | |||
| 140 | int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); | ||
| 141 | |||
| 142 | #endif /* SELFTEST_KVM_UTIL_H */ | ||
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h new file mode 100644 index 000000000000..54cfeb6568d3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/sparsebit.h | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/include/sparsebit.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | * | ||
| 9 | * Header file that describes API to the sparsebit library. | ||
| 10 | * This library provides a memory efficient means of storing | ||
| 11 | * the settings of bits indexed via a uint64_t. Memory usage | ||
| 12 | * is reasonable, significantly less than (2^64 / 8) bytes, as | ||
| 13 | * long as bits that are mostly set or mostly cleared are close | ||
| 14 | * to each other. This library is efficient in memory usage | ||
| 15 | * even in the case where most bits are set. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #ifndef _TEST_SPARSEBIT_H_ | ||
| 19 | #define _TEST_SPARSEBIT_H_ | ||
| 20 | |||
| 21 | #include <stdbool.h> | ||
| 22 | #include <stdint.h> | ||
| 23 | #include <stdio.h> | ||
| 24 | |||
| 25 | #ifdef __cplusplus | ||
| 26 | extern "C" { | ||
| 27 | #endif | ||
| 28 | |||
| 29 | struct sparsebit; | ||
| 30 | typedef uint64_t sparsebit_idx_t; | ||
| 31 | typedef uint64_t sparsebit_num_t; | ||
| 32 | |||
| 33 | struct sparsebit *sparsebit_alloc(void); | ||
| 34 | void sparsebit_free(struct sparsebit **sbitp); | ||
| 35 | void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); | ||
| 36 | |||
| 37 | bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); | ||
| 38 | bool sparsebit_is_set_num(struct sparsebit *sbit, | ||
| 39 | sparsebit_idx_t idx, sparsebit_num_t num); | ||
| 40 | bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); | ||
| 41 | bool sparsebit_is_clear_num(struct sparsebit *sbit, | ||
| 42 | sparsebit_idx_t idx, sparsebit_num_t num); | ||
| 43 | sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); | ||
| 44 | bool sparsebit_any_set(struct sparsebit *sbit); | ||
| 45 | bool sparsebit_any_clear(struct sparsebit *sbit); | ||
| 46 | bool sparsebit_all_set(struct sparsebit *sbit); | ||
| 47 | bool sparsebit_all_clear(struct sparsebit *sbit); | ||
| 48 | sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); | ||
| 49 | sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); | ||
| 50 | sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); | ||
| 51 | sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); | ||
| 52 | sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, | ||
| 53 | sparsebit_idx_t start, sparsebit_num_t num); | ||
| 54 | sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, | ||
| 55 | sparsebit_idx_t start, sparsebit_num_t num); | ||
| 56 | |||
| 57 | void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); | ||
| 58 | void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start, | ||
| 59 | sparsebit_num_t num); | ||
| 60 | void sparsebit_set_all(struct sparsebit *sbitp); | ||
| 61 | |||
| 62 | void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx); | ||
| 63 | void sparsebit_clear_num(struct sparsebit *sbitp, | ||
| 64 | sparsebit_idx_t start, sparsebit_num_t num); | ||
| 65 | void sparsebit_clear_all(struct sparsebit *sbitp); | ||
| 66 | |||
| 67 | void sparsebit_dump(FILE *stream, struct sparsebit *sbit, | ||
| 68 | unsigned int indent); | ||
| 69 | void sparsebit_validate_internal(struct sparsebit *sbit); | ||
| 70 | |||
| 71 | #ifdef __cplusplus | ||
| 72 | } | ||
| 73 | #endif | ||
| 74 | |||
| 75 | #endif /* _TEST_SPARSEBIT_H_ */ | ||
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h new file mode 100644 index 000000000000..7ab98e41324f --- /dev/null +++ b/tools/testing/selftests/kvm/include/test_util.h | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/include/test_util.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef TEST_UTIL_H | ||
| 11 | #define TEST_UTIL_H 1 | ||
| 12 | |||
| 13 | #include <stdlib.h> | ||
| 14 | #include <stdarg.h> | ||
| 15 | #include <stdbool.h> | ||
| 16 | #include <stdio.h> | ||
| 17 | #include <string.h> | ||
| 18 | #include <inttypes.h> | ||
| 19 | #include <errno.h> | ||
| 20 | #include <unistd.h> | ||
| 21 | #include <fcntl.h> | ||
| 22 | |||
| 23 | ssize_t test_write(int fd, const void *buf, size_t count); | ||
| 24 | ssize_t test_read(int fd, void *buf, size_t count); | ||
| 25 | int test_seq_read(const char *path, char **bufp, size_t *sizep); | ||
| 26 | |||
| 27 | void test_assert(bool exp, const char *exp_str, | ||
| 28 | const char *file, unsigned int line, const char *fmt, ...); | ||
| 29 | |||
| 30 | #define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) | ||
| 31 | |||
| 32 | #define TEST_ASSERT(e, fmt, ...) \ | ||
| 33 | test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) | ||
| 34 | |||
| 35 | #define ASSERT_EQ(a, b) do { \ | ||
| 36 | typeof(a) __a = (a); \ | ||
| 37 | typeof(b) __b = (b); \ | ||
| 38 | TEST_ASSERT(__a == __b, \ | ||
| 39 | "ASSERT_EQ(%s, %s) failed.\n" \ | ||
| 40 | "\t%s is %#lx\n" \ | ||
| 41 | "\t%s is %#lx", \ | ||
| 42 | #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ | ||
| 43 | } while (0) | ||
| 44 | |||
| 45 | #endif /* TEST_UTIL_H */ | ||
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h new file mode 100644 index 000000000000..4a5b2c4c1a0f --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86.h | |||
| @@ -0,0 +1,1043 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/include/x86.h | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef SELFTEST_KVM_X86_H | ||
| 11 | #define SELFTEST_KVM_X86_H | ||
| 12 | |||
| 13 | #include <assert.h> | ||
| 14 | #include <stdint.h> | ||
| 15 | |||
| 16 | #define X86_EFLAGS_FIXED (1u << 1) | ||
| 17 | |||
| 18 | #define X86_CR4_VME (1ul << 0) | ||
| 19 | #define X86_CR4_PVI (1ul << 1) | ||
| 20 | #define X86_CR4_TSD (1ul << 2) | ||
| 21 | #define X86_CR4_DE (1ul << 3) | ||
| 22 | #define X86_CR4_PSE (1ul << 4) | ||
| 23 | #define X86_CR4_PAE (1ul << 5) | ||
| 24 | #define X86_CR4_MCE (1ul << 6) | ||
| 25 | #define X86_CR4_PGE (1ul << 7) | ||
| 26 | #define X86_CR4_PCE (1ul << 8) | ||
| 27 | #define X86_CR4_OSFXSR (1ul << 9) | ||
| 28 | #define X86_CR4_OSXMMEXCPT (1ul << 10) | ||
| 29 | #define X86_CR4_UMIP (1ul << 11) | ||
| 30 | #define X86_CR4_VMXE (1ul << 13) | ||
| 31 | #define X86_CR4_SMXE (1ul << 14) | ||
| 32 | #define X86_CR4_FSGSBASE (1ul << 16) | ||
| 33 | #define X86_CR4_PCIDE (1ul << 17) | ||
| 34 | #define X86_CR4_OSXSAVE (1ul << 18) | ||
| 35 | #define X86_CR4_SMEP (1ul << 20) | ||
| 36 | #define X86_CR4_SMAP (1ul << 21) | ||
| 37 | #define X86_CR4_PKE (1ul << 22) | ||
| 38 | |||
| 39 | /* The enum values match the intruction encoding of each register */ | ||
| 40 | enum x86_register { | ||
| 41 | RAX = 0, | ||
| 42 | RCX, | ||
| 43 | RDX, | ||
| 44 | RBX, | ||
| 45 | RSP, | ||
| 46 | RBP, | ||
| 47 | RSI, | ||
| 48 | RDI, | ||
| 49 | R8, | ||
| 50 | R9, | ||
| 51 | R10, | ||
| 52 | R11, | ||
| 53 | R12, | ||
| 54 | R13, | ||
| 55 | R14, | ||
| 56 | R15, | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct desc64 { | ||
| 60 | uint16_t limit0; | ||
| 61 | uint16_t base0; | ||
| 62 | unsigned base1:8, type:5, dpl:2, p:1; | ||
| 63 | unsigned limit1:4, zero0:3, g:1, base2:8; | ||
| 64 | uint32_t base3; | ||
| 65 | uint32_t zero1; | ||
| 66 | } __attribute__((packed)); | ||
| 67 | |||
| 68 | struct desc_ptr { | ||
| 69 | uint16_t size; | ||
| 70 | uint64_t address; | ||
| 71 | } __attribute__((packed)); | ||
| 72 | |||
| 73 | static inline uint64_t get_desc64_base(const struct desc64 *desc) | ||
| 74 | { | ||
| 75 | return ((uint64_t)desc->base3 << 32) | | ||
| 76 | (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); | ||
| 77 | } | ||
| 78 | |||
| 79 | static inline uint64_t rdtsc(void) | ||
| 80 | { | ||
| 81 | uint32_t eax, edx; | ||
| 82 | |||
| 83 | /* | ||
| 84 | * The lfence is to wait (on Intel CPUs) until all previous | ||
| 85 | * instructions have been executed. | ||
| 86 | */ | ||
| 87 | __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); | ||
| 88 | return ((uint64_t)edx) << 32 | eax; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline uint64_t rdtscp(uint32_t *aux) | ||
| 92 | { | ||
| 93 | uint32_t eax, edx; | ||
| 94 | |||
| 95 | __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); | ||
| 96 | return ((uint64_t)edx) << 32 | eax; | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline uint64_t rdmsr(uint32_t msr) | ||
| 100 | { | ||
| 101 | uint32_t a, d; | ||
| 102 | |||
| 103 | __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); | ||
| 104 | |||
| 105 | return a | ((uint64_t) d << 32); | ||
| 106 | } | ||
| 107 | |||
| 108 | static inline void wrmsr(uint32_t msr, uint64_t value) | ||
| 109 | { | ||
| 110 | uint32_t a = value; | ||
| 111 | uint32_t d = value >> 32; | ||
| 112 | |||
| 113 | __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); | ||
| 114 | } | ||
| 115 | |||
| 116 | |||
| 117 | static inline uint16_t inw(uint16_t port) | ||
| 118 | { | ||
| 119 | uint16_t tmp; | ||
| 120 | |||
| 121 | __asm__ __volatile__("in %%dx, %%ax" | ||
| 122 | : /* output */ "=a" (tmp) | ||
| 123 | : /* input */ "d" (port)); | ||
| 124 | |||
| 125 | return tmp; | ||
| 126 | } | ||
| 127 | |||
| 128 | static inline uint16_t get_es(void) | ||
| 129 | { | ||
| 130 | uint16_t es; | ||
| 131 | |||
| 132 | __asm__ __volatile__("mov %%es, %[es]" | ||
| 133 | : /* output */ [es]"=rm"(es)); | ||
| 134 | return es; | ||
| 135 | } | ||
| 136 | |||
| 137 | static inline uint16_t get_cs(void) | ||
| 138 | { | ||
| 139 | uint16_t cs; | ||
| 140 | |||
| 141 | __asm__ __volatile__("mov %%cs, %[cs]" | ||
| 142 | : /* output */ [cs]"=rm"(cs)); | ||
| 143 | return cs; | ||
| 144 | } | ||
| 145 | |||
| 146 | static inline uint16_t get_ss(void) | ||
| 147 | { | ||
| 148 | uint16_t ss; | ||
| 149 | |||
| 150 | __asm__ __volatile__("mov %%ss, %[ss]" | ||
| 151 | : /* output */ [ss]"=rm"(ss)); | ||
| 152 | return ss; | ||
| 153 | } | ||
| 154 | |||
| 155 | static inline uint16_t get_ds(void) | ||
| 156 | { | ||
| 157 | uint16_t ds; | ||
| 158 | |||
| 159 | __asm__ __volatile__("mov %%ds, %[ds]" | ||
| 160 | : /* output */ [ds]"=rm"(ds)); | ||
| 161 | return ds; | ||
| 162 | } | ||
| 163 | |||
| 164 | static inline uint16_t get_fs(void) | ||
| 165 | { | ||
| 166 | uint16_t fs; | ||
| 167 | |||
| 168 | __asm__ __volatile__("mov %%fs, %[fs]" | ||
| 169 | : /* output */ [fs]"=rm"(fs)); | ||
| 170 | return fs; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline uint16_t get_gs(void) | ||
| 174 | { | ||
| 175 | uint16_t gs; | ||
| 176 | |||
| 177 | __asm__ __volatile__("mov %%gs, %[gs]" | ||
| 178 | : /* output */ [gs]"=rm"(gs)); | ||
| 179 | return gs; | ||
| 180 | } | ||
| 181 | |||
| 182 | static inline uint16_t get_tr(void) | ||
| 183 | { | ||
| 184 | uint16_t tr; | ||
| 185 | |||
| 186 | __asm__ __volatile__("str %[tr]" | ||
| 187 | : /* output */ [tr]"=rm"(tr)); | ||
| 188 | return tr; | ||
| 189 | } | ||
| 190 | |||
| 191 | static inline uint64_t get_cr0(void) | ||
| 192 | { | ||
| 193 | uint64_t cr0; | ||
| 194 | |||
| 195 | __asm__ __volatile__("mov %%cr0, %[cr0]" | ||
| 196 | : /* output */ [cr0]"=r"(cr0)); | ||
| 197 | return cr0; | ||
| 198 | } | ||
| 199 | |||
| 200 | static inline uint64_t get_cr3(void) | ||
| 201 | { | ||
| 202 | uint64_t cr3; | ||
| 203 | |||
| 204 | __asm__ __volatile__("mov %%cr3, %[cr3]" | ||
| 205 | : /* output */ [cr3]"=r"(cr3)); | ||
| 206 | return cr3; | ||
| 207 | } | ||
| 208 | |||
| 209 | static inline uint64_t get_cr4(void) | ||
| 210 | { | ||
| 211 | uint64_t cr4; | ||
| 212 | |||
| 213 | __asm__ __volatile__("mov %%cr4, %[cr4]" | ||
| 214 | : /* output */ [cr4]"=r"(cr4)); | ||
| 215 | return cr4; | ||
| 216 | } | ||
| 217 | |||
| 218 | static inline void set_cr4(uint64_t val) | ||
| 219 | { | ||
| 220 | __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); | ||
| 221 | } | ||
| 222 | |||
| 223 | static inline uint64_t get_gdt_base(void) | ||
| 224 | { | ||
| 225 | struct desc_ptr gdt; | ||
| 226 | __asm__ __volatile__("sgdt %[gdt]" | ||
| 227 | : /* output */ [gdt]"=m"(gdt)); | ||
| 228 | return gdt.address; | ||
| 229 | } | ||
| 230 | |||
| 231 | static inline uint64_t get_idt_base(void) | ||
| 232 | { | ||
| 233 | struct desc_ptr idt; | ||
| 234 | __asm__ __volatile__("sidt %[idt]" | ||
| 235 | : /* output */ [idt]"=m"(idt)); | ||
| 236 | return idt.address; | ||
| 237 | } | ||
| 238 | |||
| 239 | #define SET_XMM(__var, __xmm) \ | ||
| 240 | asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) | ||
| 241 | |||
| 242 | static inline void set_xmm(int n, unsigned long val) | ||
| 243 | { | ||
| 244 | switch (n) { | ||
| 245 | case 0: | ||
| 246 | SET_XMM(val, xmm0); | ||
| 247 | break; | ||
| 248 | case 1: | ||
| 249 | SET_XMM(val, xmm1); | ||
| 250 | break; | ||
| 251 | case 2: | ||
| 252 | SET_XMM(val, xmm2); | ||
| 253 | break; | ||
| 254 | case 3: | ||
| 255 | SET_XMM(val, xmm3); | ||
| 256 | break; | ||
| 257 | case 4: | ||
| 258 | SET_XMM(val, xmm4); | ||
| 259 | break; | ||
| 260 | case 5: | ||
| 261 | SET_XMM(val, xmm5); | ||
| 262 | break; | ||
| 263 | case 6: | ||
| 264 | SET_XMM(val, xmm6); | ||
| 265 | break; | ||
| 266 | case 7: | ||
| 267 | SET_XMM(val, xmm7); | ||
| 268 | break; | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | typedef unsigned long v1di __attribute__ ((vector_size (8))); | ||
| 273 | static inline unsigned long get_xmm(int n) | ||
| 274 | { | ||
| 275 | assert(n >= 0 && n <= 7); | ||
| 276 | |||
| 277 | register v1di xmm0 __asm__("%xmm0"); | ||
| 278 | register v1di xmm1 __asm__("%xmm1"); | ||
| 279 | register v1di xmm2 __asm__("%xmm2"); | ||
| 280 | register v1di xmm3 __asm__("%xmm3"); | ||
| 281 | register v1di xmm4 __asm__("%xmm4"); | ||
| 282 | register v1di xmm5 __asm__("%xmm5"); | ||
| 283 | register v1di xmm6 __asm__("%xmm6"); | ||
| 284 | register v1di xmm7 __asm__("%xmm7"); | ||
| 285 | switch (n) { | ||
| 286 | case 0: | ||
| 287 | return (unsigned long)xmm0; | ||
| 288 | case 1: | ||
| 289 | return (unsigned long)xmm1; | ||
| 290 | case 2: | ||
| 291 | return (unsigned long)xmm2; | ||
| 292 | case 3: | ||
| 293 | return (unsigned long)xmm3; | ||
| 294 | case 4: | ||
| 295 | return (unsigned long)xmm4; | ||
| 296 | case 5: | ||
| 297 | return (unsigned long)xmm5; | ||
| 298 | case 6: | ||
| 299 | return (unsigned long)xmm6; | ||
| 300 | case 7: | ||
| 301 | return (unsigned long)xmm7; | ||
| 302 | } | ||
| 303 | return 0; | ||
| 304 | } | ||
| 305 | |||
| 306 | /* | ||
| 307 | * Basic CPU control in CR0 | ||
| 308 | */ | ||
| 309 | #define X86_CR0_PE (1UL<<0) /* Protection Enable */ | ||
| 310 | #define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ | ||
| 311 | #define X86_CR0_EM (1UL<<2) /* Emulation */ | ||
| 312 | #define X86_CR0_TS (1UL<<3) /* Task Switched */ | ||
| 313 | #define X86_CR0_ET (1UL<<4) /* Extension Type */ | ||
| 314 | #define X86_CR0_NE (1UL<<5) /* Numeric Error */ | ||
| 315 | #define X86_CR0_WP (1UL<<16) /* Write Protect */ | ||
| 316 | #define X86_CR0_AM (1UL<<18) /* Alignment Mask */ | ||
| 317 | #define X86_CR0_NW (1UL<<29) /* Not Write-through */ | ||
| 318 | #define X86_CR0_CD (1UL<<30) /* Cache Disable */ | ||
| 319 | #define X86_CR0_PG (1UL<<31) /* Paging */ | ||
| 320 | |||
| 321 | /* | ||
| 322 | * CPU model specific register (MSR) numbers. | ||
| 323 | */ | ||
| 324 | |||
| 325 | /* x86-64 specific MSRs */ | ||
| 326 | #define MSR_EFER 0xc0000080 /* extended feature register */ | ||
| 327 | #define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ | ||
| 328 | #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ | ||
| 329 | #define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ | ||
| 330 | #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ | ||
| 331 | #define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ | ||
| 332 | #define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ | ||
| 333 | #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ | ||
| 334 | #define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ | ||
| 335 | |||
| 336 | /* EFER bits: */ | ||
| 337 | #define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ | ||
| 338 | #define EFER_LME (1<<8) /* Long mode enable */ | ||
| 339 | #define EFER_LMA (1<<10) /* Long mode active (read-only) */ | ||
| 340 | #define EFER_NX (1<<11) /* No execute enable */ | ||
| 341 | #define EFER_SVME (1<<12) /* Enable virtualization */ | ||
| 342 | #define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ | ||
| 343 | #define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ | ||
| 344 | |||
| 345 | /* Intel MSRs. Some also available on other CPUs */ | ||
| 346 | |||
| 347 | #define MSR_PPIN_CTL 0x0000004e | ||
| 348 | #define MSR_PPIN 0x0000004f | ||
| 349 | |||
| 350 | #define MSR_IA32_PERFCTR0 0x000000c1 | ||
| 351 | #define MSR_IA32_PERFCTR1 0x000000c2 | ||
| 352 | #define MSR_FSB_FREQ 0x000000cd | ||
| 353 | #define MSR_PLATFORM_INFO 0x000000ce | ||
| 354 | #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 | ||
| 355 | #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) | ||
| 356 | |||
| 357 | #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 | ||
| 358 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | ||
| 359 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) | ||
| 360 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) | ||
| 361 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
| 362 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
| 363 | |||
| 364 | #define MSR_MTRRcap 0x000000fe | ||
| 365 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | ||
| 366 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
| 367 | |||
| 368 | #define MSR_IA32_SYSENTER_CS 0x00000174 | ||
| 369 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | ||
| 370 | #define MSR_IA32_SYSENTER_EIP 0x00000176 | ||
| 371 | |||
| 372 | #define MSR_IA32_MCG_CAP 0x00000179 | ||
| 373 | #define MSR_IA32_MCG_STATUS 0x0000017a | ||
| 374 | #define MSR_IA32_MCG_CTL 0x0000017b | ||
| 375 | #define MSR_IA32_MCG_EXT_CTL 0x000004d0 | ||
| 376 | |||
| 377 | #define MSR_OFFCORE_RSP_0 0x000001a6 | ||
| 378 | #define MSR_OFFCORE_RSP_1 0x000001a7 | ||
| 379 | #define MSR_TURBO_RATIO_LIMIT 0x000001ad | ||
| 380 | #define MSR_TURBO_RATIO_LIMIT1 0x000001ae | ||
| 381 | #define MSR_TURBO_RATIO_LIMIT2 0x000001af | ||
| 382 | |||
| 383 | #define MSR_LBR_SELECT 0x000001c8 | ||
| 384 | #define MSR_LBR_TOS 0x000001c9 | ||
| 385 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
| 386 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
| 387 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
| 388 | #define MSR_LBR_CORE_TO 0x00000060 | ||
| 389 | |||
| 390 | #define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ | ||
| 391 | #define LBR_INFO_MISPRED BIT_ULL(63) | ||
| 392 | #define LBR_INFO_IN_TX BIT_ULL(62) | ||
| 393 | #define LBR_INFO_ABORT BIT_ULL(61) | ||
| 394 | #define LBR_INFO_CYCLES 0xffff | ||
| 395 | |||
| 396 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | ||
| 397 | #define MSR_IA32_DS_AREA 0x00000600 | ||
| 398 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | ||
| 399 | #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 | ||
| 400 | |||
| 401 | #define MSR_IA32_RTIT_CTL 0x00000570 | ||
| 402 | #define MSR_IA32_RTIT_STATUS 0x00000571 | ||
| 403 | #define MSR_IA32_RTIT_ADDR0_A 0x00000580 | ||
| 404 | #define MSR_IA32_RTIT_ADDR0_B 0x00000581 | ||
| 405 | #define MSR_IA32_RTIT_ADDR1_A 0x00000582 | ||
| 406 | #define MSR_IA32_RTIT_ADDR1_B 0x00000583 | ||
| 407 | #define MSR_IA32_RTIT_ADDR2_A 0x00000584 | ||
| 408 | #define MSR_IA32_RTIT_ADDR2_B 0x00000585 | ||
| 409 | #define MSR_IA32_RTIT_ADDR3_A 0x00000586 | ||
| 410 | #define MSR_IA32_RTIT_ADDR3_B 0x00000587 | ||
| 411 | #define MSR_IA32_RTIT_CR3_MATCH 0x00000572 | ||
| 412 | #define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 | ||
| 413 | #define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 | ||
| 414 | |||
| 415 | #define MSR_MTRRfix64K_00000 0x00000250 | ||
| 416 | #define MSR_MTRRfix16K_80000 0x00000258 | ||
| 417 | #define MSR_MTRRfix16K_A0000 0x00000259 | ||
| 418 | #define MSR_MTRRfix4K_C0000 0x00000268 | ||
| 419 | #define MSR_MTRRfix4K_C8000 0x00000269 | ||
| 420 | #define MSR_MTRRfix4K_D0000 0x0000026a | ||
| 421 | #define MSR_MTRRfix4K_D8000 0x0000026b | ||
| 422 | #define MSR_MTRRfix4K_E0000 0x0000026c | ||
| 423 | #define MSR_MTRRfix4K_E8000 0x0000026d | ||
| 424 | #define MSR_MTRRfix4K_F0000 0x0000026e | ||
| 425 | #define MSR_MTRRfix4K_F8000 0x0000026f | ||
| 426 | #define MSR_MTRRdefType 0x000002ff | ||
| 427 | |||
| 428 | #define MSR_IA32_CR_PAT 0x00000277 | ||
| 429 | |||
| 430 | #define MSR_IA32_DEBUGCTLMSR 0x000001d9 | ||
| 431 | #define MSR_IA32_LASTBRANCHFROMIP 0x000001db | ||
| 432 | #define MSR_IA32_LASTBRANCHTOIP 0x000001dc | ||
| 433 | #define MSR_IA32_LASTINTFROMIP 0x000001dd | ||
| 434 | #define MSR_IA32_LASTINTTOIP 0x000001de | ||
| 435 | |||
| 436 | /* DEBUGCTLMSR bits (others vary by model): */ | ||
| 437 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ | ||
| 438 | #define DEBUGCTLMSR_BTF_SHIFT 1 | ||
| 439 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ | ||
| 440 | #define DEBUGCTLMSR_TR (1UL << 6) | ||
| 441 | #define DEBUGCTLMSR_BTS (1UL << 7) | ||
| 442 | #define DEBUGCTLMSR_BTINT (1UL << 8) | ||
| 443 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
| 444 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
| 445 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
| 446 | #define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 | ||
| 447 | #define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) | ||
| 448 | |||
| 449 | #define MSR_PEBS_FRONTEND 0x000003f7 | ||
| 450 | |||
| 451 | #define MSR_IA32_POWER_CTL 0x000001fc | ||
| 452 | |||
| 453 | #define MSR_IA32_MC0_CTL 0x00000400 | ||
| 454 | #define MSR_IA32_MC0_STATUS 0x00000401 | ||
| 455 | #define MSR_IA32_MC0_ADDR 0x00000402 | ||
| 456 | #define MSR_IA32_MC0_MISC 0x00000403 | ||
| 457 | |||
| 458 | /* C-state Residency Counters */ | ||
| 459 | #define MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
| 460 | #define MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
| 461 | #define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa | ||
| 462 | #define MSR_PKG_C7_RESIDENCY 0x000003fa | ||
| 463 | #define MSR_CORE_C3_RESIDENCY 0x000003fc | ||
| 464 | #define MSR_CORE_C6_RESIDENCY 0x000003fd | ||
| 465 | #define MSR_CORE_C7_RESIDENCY 0x000003fe | ||
| 466 | #define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff | ||
| 467 | #define MSR_PKG_C2_RESIDENCY 0x0000060d | ||
| 468 | #define MSR_PKG_C8_RESIDENCY 0x00000630 | ||
| 469 | #define MSR_PKG_C9_RESIDENCY 0x00000631 | ||
| 470 | #define MSR_PKG_C10_RESIDENCY 0x00000632 | ||
| 471 | |||
| 472 | /* Interrupt Response Limit */ | ||
| 473 | #define MSR_PKGC3_IRTL 0x0000060a | ||
| 474 | #define MSR_PKGC6_IRTL 0x0000060b | ||
| 475 | #define MSR_PKGC7_IRTL 0x0000060c | ||
| 476 | #define MSR_PKGC8_IRTL 0x00000633 | ||
| 477 | #define MSR_PKGC9_IRTL 0x00000634 | ||
| 478 | #define MSR_PKGC10_IRTL 0x00000635 | ||
| 479 | |||
| 480 | /* Run Time Average Power Limiting (RAPL) Interface */ | ||
| 481 | |||
| 482 | #define MSR_RAPL_POWER_UNIT 0x00000606 | ||
| 483 | |||
| 484 | #define MSR_PKG_POWER_LIMIT 0x00000610 | ||
| 485 | #define MSR_PKG_ENERGY_STATUS 0x00000611 | ||
| 486 | #define MSR_PKG_PERF_STATUS 0x00000613 | ||
| 487 | #define MSR_PKG_POWER_INFO 0x00000614 | ||
| 488 | |||
| 489 | #define MSR_DRAM_POWER_LIMIT 0x00000618 | ||
| 490 | #define MSR_DRAM_ENERGY_STATUS 0x00000619 | ||
| 491 | #define MSR_DRAM_PERF_STATUS 0x0000061b | ||
| 492 | #define MSR_DRAM_POWER_INFO 0x0000061c | ||
| 493 | |||
| 494 | #define MSR_PP0_POWER_LIMIT 0x00000638 | ||
| 495 | #define MSR_PP0_ENERGY_STATUS 0x00000639 | ||
| 496 | #define MSR_PP0_POLICY 0x0000063a | ||
| 497 | #define MSR_PP0_PERF_STATUS 0x0000063b | ||
| 498 | |||
| 499 | #define MSR_PP1_POWER_LIMIT 0x00000640 | ||
| 500 | #define MSR_PP1_ENERGY_STATUS 0x00000641 | ||
| 501 | #define MSR_PP1_POLICY 0x00000642 | ||
| 502 | |||
| 503 | /* Config TDP MSRs */ | ||
| 504 | #define MSR_CONFIG_TDP_NOMINAL 0x00000648 | ||
| 505 | #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 | ||
| 506 | #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A | ||
| 507 | #define MSR_CONFIG_TDP_CONTROL 0x0000064B | ||
| 508 | #define MSR_TURBO_ACTIVATION_RATIO 0x0000064C | ||
| 509 | |||
| 510 | #define MSR_PLATFORM_ENERGY_STATUS 0x0000064D | ||
| 511 | |||
| 512 | #define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 | ||
| 513 | #define MSR_PKG_ANY_CORE_C0_RES 0x00000659 | ||
| 514 | #define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A | ||
| 515 | #define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B | ||
| 516 | |||
| 517 | #define MSR_CORE_C1_RES 0x00000660 | ||
| 518 | #define MSR_MODULE_C6_RES_MS 0x00000664 | ||
| 519 | |||
| 520 | #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 | ||
| 521 | #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 | ||
| 522 | |||
| 523 | #define MSR_ATOM_CORE_RATIOS 0x0000066a | ||
| 524 | #define MSR_ATOM_CORE_VIDS 0x0000066b | ||
| 525 | #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c | ||
| 526 | #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d | ||
| 527 | |||
| 528 | |||
| 529 | #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 | ||
| 530 | #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 | ||
| 531 | #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 | ||
| 532 | |||
| 533 | /* Hardware P state interface */ | ||
| 534 | #define MSR_PPERF 0x0000064e | ||
| 535 | #define MSR_PERF_LIMIT_REASONS 0x0000064f | ||
| 536 | #define MSR_PM_ENABLE 0x00000770 | ||
| 537 | #define MSR_HWP_CAPABILITIES 0x00000771 | ||
| 538 | #define MSR_HWP_REQUEST_PKG 0x00000772 | ||
| 539 | #define MSR_HWP_INTERRUPT 0x00000773 | ||
| 540 | #define MSR_HWP_REQUEST 0x00000774 | ||
| 541 | #define MSR_HWP_STATUS 0x00000777 | ||
| 542 | |||
| 543 | /* CPUID.6.EAX */ | ||
| 544 | #define HWP_BASE_BIT (1<<7) | ||
| 545 | #define HWP_NOTIFICATIONS_BIT (1<<8) | ||
| 546 | #define HWP_ACTIVITY_WINDOW_BIT (1<<9) | ||
| 547 | #define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) | ||
| 548 | #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) | ||
| 549 | |||
| 550 | /* IA32_HWP_CAPABILITIES */ | ||
| 551 | #define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) | ||
| 552 | #define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) | ||
| 553 | #define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) | ||
| 554 | #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) | ||
| 555 | |||
| 556 | /* IA32_HWP_REQUEST */ | ||
| 557 | #define HWP_MIN_PERF(x) (x & 0xff) | ||
| 558 | #define HWP_MAX_PERF(x) ((x & 0xff) << 8) | ||
| 559 | #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) | ||
| 560 | #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) | ||
| 561 | #define HWP_EPP_PERFORMANCE 0x00 | ||
| 562 | #define HWP_EPP_BALANCE_PERFORMANCE 0x80 | ||
| 563 | #define HWP_EPP_BALANCE_POWERSAVE 0xC0 | ||
| 564 | #define HWP_EPP_POWERSAVE 0xFF | ||
| 565 | #define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) | ||
| 566 | #define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) | ||
| 567 | |||
| 568 | /* IA32_HWP_STATUS */ | ||
| 569 | #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) | ||
| 570 | #define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) | ||
| 571 | |||
| 572 | /* IA32_HWP_INTERRUPT */ | ||
| 573 | #define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) | ||
| 574 | #define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) | ||
| 575 | |||
| 576 | #define MSR_AMD64_MC0_MASK 0xc0010044 | ||
| 577 | |||
| 578 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | ||
| 579 | #define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) | ||
| 580 | #define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) | ||
| 581 | #define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) | ||
| 582 | |||
| 583 | #define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) | ||
| 584 | |||
| 585 | /* These are consecutive and not in the normal 4er MCE bank block */ | ||
| 586 | #define MSR_IA32_MC0_CTL2 0x00000280 | ||
| 587 | #define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) | ||
| 588 | |||
| 589 | #define MSR_P6_PERFCTR0 0x000000c1 | ||
| 590 | #define MSR_P6_PERFCTR1 0x000000c2 | ||
| 591 | #define MSR_P6_EVNTSEL0 0x00000186 | ||
| 592 | #define MSR_P6_EVNTSEL1 0x00000187 | ||
| 593 | |||
| 594 | #define MSR_KNC_PERFCTR0 0x00000020 | ||
| 595 | #define MSR_KNC_PERFCTR1 0x00000021 | ||
| 596 | #define MSR_KNC_EVNTSEL0 0x00000028 | ||
| 597 | #define MSR_KNC_EVNTSEL1 0x00000029 | ||
| 598 | |||
| 599 | /* Alternative perfctr range with full access. */ | ||
| 600 | #define MSR_IA32_PMC0 0x000004c1 | ||
| 601 | |||
| 602 | /* AMD64 MSRs. Not complete. See the architecture manual for a more | ||
| 603 | complete list. */ | ||
| 604 | |||
| 605 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | ||
| 606 | #define MSR_AMD64_TSC_RATIO 0xc0000104 | ||
| 607 | #define MSR_AMD64_NB_CFG 0xc001001f | ||
| 608 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | ||
| 609 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | ||
| 610 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 | ||
| 611 | #define MSR_AMD64_LS_CFG 0xc0011020 | ||
| 612 | #define MSR_AMD64_DC_CFG 0xc0011022 | ||
| 613 | #define MSR_AMD64_BU_CFG2 0xc001102a | ||
| 614 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | ||
| 615 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | ||
| 616 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | ||
| 617 | #define MSR_AMD64_IBSFETCH_REG_COUNT 3 | ||
| 618 | #define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) | ||
| 619 | #define MSR_AMD64_IBSOPCTL 0xc0011033 | ||
| 620 | #define MSR_AMD64_IBSOPRIP 0xc0011034 | ||
| 621 | #define MSR_AMD64_IBSOPDATA 0xc0011035 | ||
| 622 | #define MSR_AMD64_IBSOPDATA2 0xc0011036 | ||
| 623 | #define MSR_AMD64_IBSOPDATA3 0xc0011037 | ||
| 624 | #define MSR_AMD64_IBSDCLINAD 0xc0011038 | ||
| 625 | #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 | ||
| 626 | #define MSR_AMD64_IBSOP_REG_COUNT 7 | ||
| 627 | #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) | ||
| 628 | #define MSR_AMD64_IBSCTL 0xc001103a | ||
| 629 | #define MSR_AMD64_IBSBRTARGET 0xc001103b | ||
| 630 | #define MSR_AMD64_IBSOPDATA4 0xc001103d | ||
| 631 | #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ | ||
| 632 | #define MSR_AMD64_SEV 0xc0010131 | ||
| 633 | #define MSR_AMD64_SEV_ENABLED_BIT 0 | ||
| 634 | #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) | ||
| 635 | |||
| 636 | /* Fam 17h MSRs */ | ||
| 637 | #define MSR_F17H_IRPERF 0xc00000e9 | ||
| 638 | |||
| 639 | /* Fam 16h MSRs */ | ||
| 640 | #define MSR_F16H_L2I_PERF_CTL 0xc0010230 | ||
| 641 | #define MSR_F16H_L2I_PERF_CTR 0xc0010231 | ||
| 642 | #define MSR_F16H_DR1_ADDR_MASK 0xc0011019 | ||
| 643 | #define MSR_F16H_DR2_ADDR_MASK 0xc001101a | ||
| 644 | #define MSR_F16H_DR3_ADDR_MASK 0xc001101b | ||
| 645 | #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 | ||
| 646 | |||
| 647 | /* Fam 15h MSRs */ | ||
| 648 | #define MSR_F15H_PERF_CTL 0xc0010200 | ||
| 649 | #define MSR_F15H_PERF_CTR 0xc0010201 | ||
| 650 | #define MSR_F15H_NB_PERF_CTL 0xc0010240 | ||
| 651 | #define MSR_F15H_NB_PERF_CTR 0xc0010241 | ||
| 652 | #define MSR_F15H_PTSC 0xc0010280 | ||
| 653 | #define MSR_F15H_IC_CFG 0xc0011021 | ||
| 654 | |||
| 655 | /* Fam 10h MSRs */ | ||
| 656 | #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 | ||
| 657 | #define FAM10H_MMIO_CONF_ENABLE (1<<0) | ||
| 658 | #define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf | ||
| 659 | #define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 | ||
| 660 | #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL | ||
| 661 | #define FAM10H_MMIO_CONF_BASE_SHIFT 20 | ||
| 662 | #define MSR_FAM10H_NODE_ID 0xc001100c | ||
| 663 | #define MSR_F10H_DECFG 0xc0011029 | ||
| 664 | #define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 | ||
| 665 | #define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) | ||
| 666 | |||
| 667 | /* K8 MSRs */ | ||
| 668 | #define MSR_K8_TOP_MEM1 0xc001001a | ||
| 669 | #define MSR_K8_TOP_MEM2 0xc001001d | ||
| 670 | #define MSR_K8_SYSCFG 0xc0010010 | ||
| 671 | #define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 | ||
| 672 | #define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) | ||
| 673 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 | ||
| 674 | /* C1E active bits in int pending message */ | ||
| 675 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 | ||
| 676 | #define MSR_K8_TSEG_ADDR 0xc0010112 | ||
| 677 | #define MSR_K8_TSEG_MASK 0xc0010113 | ||
| 678 | #define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ | ||
| 679 | #define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ | ||
| 680 | #define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ | ||
| 681 | |||
| 682 | /* K7 MSRs */ | ||
| 683 | #define MSR_K7_EVNTSEL0 0xc0010000 | ||
| 684 | #define MSR_K7_PERFCTR0 0xc0010004 | ||
| 685 | #define MSR_K7_EVNTSEL1 0xc0010001 | ||
| 686 | #define MSR_K7_PERFCTR1 0xc0010005 | ||
| 687 | #define MSR_K7_EVNTSEL2 0xc0010002 | ||
| 688 | #define MSR_K7_PERFCTR2 0xc0010006 | ||
| 689 | #define MSR_K7_EVNTSEL3 0xc0010003 | ||
| 690 | #define MSR_K7_PERFCTR3 0xc0010007 | ||
| 691 | #define MSR_K7_CLK_CTL 0xc001001b | ||
| 692 | #define MSR_K7_HWCR 0xc0010015 | ||
| 693 | #define MSR_K7_HWCR_SMMLOCK_BIT 0 | ||
| 694 | #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) | ||
| 695 | #define MSR_K7_FID_VID_CTL 0xc0010041 | ||
| 696 | #define MSR_K7_FID_VID_STATUS 0xc0010042 | ||
| 697 | |||
| 698 | /* K6 MSRs */ | ||
| 699 | #define MSR_K6_WHCR 0xc0000082 | ||
| 700 | #define MSR_K6_UWCCR 0xc0000085 | ||
| 701 | #define MSR_K6_EPMR 0xc0000086 | ||
| 702 | #define MSR_K6_PSOR 0xc0000087 | ||
| 703 | #define MSR_K6_PFIR 0xc0000088 | ||
| 704 | |||
| 705 | /* Centaur-Hauls/IDT defined MSRs. */ | ||
| 706 | #define MSR_IDT_FCR1 0x00000107 | ||
| 707 | #define MSR_IDT_FCR2 0x00000108 | ||
| 708 | #define MSR_IDT_FCR3 0x00000109 | ||
| 709 | #define MSR_IDT_FCR4 0x0000010a | ||
| 710 | |||
| 711 | #define MSR_IDT_MCR0 0x00000110 | ||
| 712 | #define MSR_IDT_MCR1 0x00000111 | ||
| 713 | #define MSR_IDT_MCR2 0x00000112 | ||
| 714 | #define MSR_IDT_MCR3 0x00000113 | ||
| 715 | #define MSR_IDT_MCR4 0x00000114 | ||
| 716 | #define MSR_IDT_MCR5 0x00000115 | ||
| 717 | #define MSR_IDT_MCR6 0x00000116 | ||
| 718 | #define MSR_IDT_MCR7 0x00000117 | ||
| 719 | #define MSR_IDT_MCR_CTRL 0x00000120 | ||
| 720 | |||
| 721 | /* VIA Cyrix defined MSRs*/ | ||
| 722 | #define MSR_VIA_FCR 0x00001107 | ||
| 723 | #define MSR_VIA_LONGHAUL 0x0000110a | ||
| 724 | #define MSR_VIA_RNG 0x0000110b | ||
| 725 | #define MSR_VIA_BCR2 0x00001147 | ||
| 726 | |||
| 727 | /* Transmeta defined MSRs */ | ||
| 728 | #define MSR_TMTA_LONGRUN_CTRL 0x80868010 | ||
| 729 | #define MSR_TMTA_LONGRUN_FLAGS 0x80868011 | ||
| 730 | #define MSR_TMTA_LRTI_READOUT 0x80868018 | ||
| 731 | #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a | ||
| 732 | |||
| 733 | /* Intel defined MSRs. */ | ||
| 734 | #define MSR_IA32_P5_MC_ADDR 0x00000000 | ||
| 735 | #define MSR_IA32_P5_MC_TYPE 0x00000001 | ||
| 736 | #define MSR_IA32_TSC 0x00000010 | ||
| 737 | #define MSR_IA32_PLATFORM_ID 0x00000017 | ||
| 738 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | ||
| 739 | #define MSR_EBC_FREQUENCY_ID 0x0000002c | ||
| 740 | #define MSR_SMI_COUNT 0x00000034 | ||
| 741 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | ||
| 742 | #define MSR_IA32_TSC_ADJUST 0x0000003b | ||
| 743 | #define MSR_IA32_BNDCFGS 0x00000d90 | ||
| 744 | |||
| 745 | #define MSR_IA32_BNDCFGS_RSVD 0x00000ffc | ||
| 746 | |||
| 747 | #define MSR_IA32_XSS 0x00000da0 | ||
| 748 | |||
| 749 | #define FEATURE_CONTROL_LOCKED (1<<0) | ||
| 750 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) | ||
| 751 | #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) | ||
| 752 | #define FEATURE_CONTROL_LMCE (1<<20) | ||
| 753 | |||
| 754 | #define MSR_IA32_APICBASE 0x0000001b | ||
| 755 | #define MSR_IA32_APICBASE_BSP (1<<8) | ||
| 756 | #define MSR_IA32_APICBASE_ENABLE (1<<11) | ||
| 757 | #define MSR_IA32_APICBASE_BASE (0xfffff<<12) | ||
| 758 | |||
| 759 | #define MSR_IA32_TSCDEADLINE 0x000006e0 | ||
| 760 | |||
| 761 | #define MSR_IA32_UCODE_WRITE 0x00000079 | ||
| 762 | #define MSR_IA32_UCODE_REV 0x0000008b | ||
| 763 | |||
| 764 | #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b | ||
| 765 | #define MSR_IA32_SMBASE 0x0000009e | ||
| 766 | |||
| 767 | #define MSR_IA32_PERF_STATUS 0x00000198 | ||
| 768 | #define MSR_IA32_PERF_CTL 0x00000199 | ||
| 769 | #define INTEL_PERF_CTL_MASK 0xffff | ||
| 770 | #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 | ||
| 771 | #define MSR_AMD_PERF_STATUS 0xc0010063 | ||
| 772 | #define MSR_AMD_PERF_CTL 0xc0010062 | ||
| 773 | |||
| 774 | #define MSR_IA32_MPERF 0x000000e7 | ||
| 775 | #define MSR_IA32_APERF 0x000000e8 | ||
| 776 | |||
| 777 | #define MSR_IA32_THERM_CONTROL 0x0000019a | ||
| 778 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | ||
| 779 | |||
| 780 | #define THERM_INT_HIGH_ENABLE (1 << 0) | ||
| 781 | #define THERM_INT_LOW_ENABLE (1 << 1) | ||
| 782 | #define THERM_INT_PLN_ENABLE (1 << 24) | ||
| 783 | |||
| 784 | #define MSR_IA32_THERM_STATUS 0x0000019c | ||
| 785 | |||
| 786 | #define THERM_STATUS_PROCHOT (1 << 0) | ||
| 787 | #define THERM_STATUS_POWER_LIMIT (1 << 10) | ||
| 788 | |||
| 789 | #define MSR_THERM2_CTL 0x0000019d | ||
| 790 | |||
| 791 | #define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) | ||
| 792 | |||
| 793 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | ||
| 794 | |||
| 795 | #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 | ||
| 796 | |||
| 797 | #define MSR_MISC_FEATURE_CONTROL 0x000001a4 | ||
| 798 | #define MSR_MISC_PWR_MGMT 0x000001aa | ||
| 799 | |||
| 800 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 | ||
| 801 | #define ENERGY_PERF_BIAS_PERFORMANCE 0 | ||
| 802 | #define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 | ||
| 803 | #define ENERGY_PERF_BIAS_NORMAL 6 | ||
| 804 | #define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 | ||
| 805 | #define ENERGY_PERF_BIAS_POWERSAVE 15 | ||
| 806 | |||
| 807 | #define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 | ||
| 808 | |||
| 809 | #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) | ||
| 810 | #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) | ||
| 811 | |||
| 812 | #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 | ||
| 813 | |||
| 814 | #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) | ||
| 815 | #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) | ||
| 816 | #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) | ||
| 817 | |||
| 818 | /* Thermal Thresholds Support */ | ||
| 819 | #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) | ||
| 820 | #define THERM_SHIFT_THRESHOLD0 8 | ||
| 821 | #define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) | ||
| 822 | #define THERM_INT_THRESHOLD1_ENABLE (1 << 23) | ||
| 823 | #define THERM_SHIFT_THRESHOLD1 16 | ||
| 824 | #define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) | ||
| 825 | #define THERM_STATUS_THRESHOLD0 (1 << 6) | ||
| 826 | #define THERM_LOG_THRESHOLD0 (1 << 7) | ||
| 827 | #define THERM_STATUS_THRESHOLD1 (1 << 8) | ||
| 828 | #define THERM_LOG_THRESHOLD1 (1 << 9) | ||
| 829 | |||
| 830 | /* MISC_ENABLE bits: architectural */ | ||
| 831 | #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 | ||
| 832 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) | ||
| 833 | #define MSR_IA32_MISC_ENABLE_TCC_BIT 1 | ||
| 834 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) | ||
| 835 | #define MSR_IA32_MISC_ENABLE_EMON_BIT 7 | ||
| 836 | #define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) | ||
| 837 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 | ||
| 838 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) | ||
| 839 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 | ||
| 840 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) | ||
| 841 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 | ||
| 842 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) | ||
| 843 | #define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 | ||
| 844 | #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) | ||
| 845 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 | ||
| 846 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) | ||
| 847 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 | ||
| 848 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) | ||
| 849 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 | ||
| 850 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) | ||
| 851 | |||
| 852 | /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ | ||
| 853 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 | ||
| 854 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) | ||
| 855 | #define MSR_IA32_MISC_ENABLE_TM1_BIT 3 | ||
| 856 | #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) | ||
| 857 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 | ||
| 858 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) | ||
| 859 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 | ||
| 860 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) | ||
| 861 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 | ||
| 862 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) | ||
| 863 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 | ||
| 864 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) | ||
| 865 | #define MSR_IA32_MISC_ENABLE_FERR_BIT 10 | ||
| 866 | #define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) | ||
| 867 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 | ||
| 868 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) | ||
| 869 | #define MSR_IA32_MISC_ENABLE_TM2_BIT 13 | ||
| 870 | #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) | ||
| 871 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 | ||
| 872 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) | ||
| 873 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 | ||
| 874 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) | ||
| 875 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 | ||
| 876 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) | ||
| 877 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 | ||
| 878 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) | ||
| 879 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 | ||
| 880 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) | ||
| 881 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 | ||
| 882 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) | ||
| 883 | |||
| 884 | /* MISC_FEATURES_ENABLES non-architectural features */ | ||
| 885 | #define MSR_MISC_FEATURES_ENABLES 0x00000140 | ||
| 886 | |||
| 887 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 | ||
| 888 | #define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) | ||
| 889 | #define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 | ||
| 890 | |||
| 891 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 | ||
| 892 | |||
| 893 | /* P4/Xeon+ specific */ | ||
| 894 | #define MSR_IA32_MCG_EAX 0x00000180 | ||
| 895 | #define MSR_IA32_MCG_EBX 0x00000181 | ||
| 896 | #define MSR_IA32_MCG_ECX 0x00000182 | ||
| 897 | #define MSR_IA32_MCG_EDX 0x00000183 | ||
| 898 | #define MSR_IA32_MCG_ESI 0x00000184 | ||
| 899 | #define MSR_IA32_MCG_EDI 0x00000185 | ||
| 900 | #define MSR_IA32_MCG_EBP 0x00000186 | ||
| 901 | #define MSR_IA32_MCG_ESP 0x00000187 | ||
| 902 | #define MSR_IA32_MCG_EFLAGS 0x00000188 | ||
| 903 | #define MSR_IA32_MCG_EIP 0x00000189 | ||
| 904 | #define MSR_IA32_MCG_RESERVED 0x0000018a | ||
| 905 | |||
| 906 | /* Pentium IV performance counter MSRs */ | ||
| 907 | #define MSR_P4_BPU_PERFCTR0 0x00000300 | ||
| 908 | #define MSR_P4_BPU_PERFCTR1 0x00000301 | ||
| 909 | #define MSR_P4_BPU_PERFCTR2 0x00000302 | ||
| 910 | #define MSR_P4_BPU_PERFCTR3 0x00000303 | ||
| 911 | #define MSR_P4_MS_PERFCTR0 0x00000304 | ||
| 912 | #define MSR_P4_MS_PERFCTR1 0x00000305 | ||
| 913 | #define MSR_P4_MS_PERFCTR2 0x00000306 | ||
| 914 | #define MSR_P4_MS_PERFCTR3 0x00000307 | ||
| 915 | #define MSR_P4_FLAME_PERFCTR0 0x00000308 | ||
| 916 | #define MSR_P4_FLAME_PERFCTR1 0x00000309 | ||
| 917 | #define MSR_P4_FLAME_PERFCTR2 0x0000030a | ||
| 918 | #define MSR_P4_FLAME_PERFCTR3 0x0000030b | ||
| 919 | #define MSR_P4_IQ_PERFCTR0 0x0000030c | ||
| 920 | #define MSR_P4_IQ_PERFCTR1 0x0000030d | ||
| 921 | #define MSR_P4_IQ_PERFCTR2 0x0000030e | ||
| 922 | #define MSR_P4_IQ_PERFCTR3 0x0000030f | ||
| 923 | #define MSR_P4_IQ_PERFCTR4 0x00000310 | ||
| 924 | #define MSR_P4_IQ_PERFCTR5 0x00000311 | ||
| 925 | #define MSR_P4_BPU_CCCR0 0x00000360 | ||
| 926 | #define MSR_P4_BPU_CCCR1 0x00000361 | ||
| 927 | #define MSR_P4_BPU_CCCR2 0x00000362 | ||
| 928 | #define MSR_P4_BPU_CCCR3 0x00000363 | ||
| 929 | #define MSR_P4_MS_CCCR0 0x00000364 | ||
| 930 | #define MSR_P4_MS_CCCR1 0x00000365 | ||
| 931 | #define MSR_P4_MS_CCCR2 0x00000366 | ||
| 932 | #define MSR_P4_MS_CCCR3 0x00000367 | ||
| 933 | #define MSR_P4_FLAME_CCCR0 0x00000368 | ||
| 934 | #define MSR_P4_FLAME_CCCR1 0x00000369 | ||
| 935 | #define MSR_P4_FLAME_CCCR2 0x0000036a | ||
| 936 | #define MSR_P4_FLAME_CCCR3 0x0000036b | ||
| 937 | #define MSR_P4_IQ_CCCR0 0x0000036c | ||
| 938 | #define MSR_P4_IQ_CCCR1 0x0000036d | ||
| 939 | #define MSR_P4_IQ_CCCR2 0x0000036e | ||
| 940 | #define MSR_P4_IQ_CCCR3 0x0000036f | ||
| 941 | #define MSR_P4_IQ_CCCR4 0x00000370 | ||
| 942 | #define MSR_P4_IQ_CCCR5 0x00000371 | ||
| 943 | #define MSR_P4_ALF_ESCR0 0x000003ca | ||
| 944 | #define MSR_P4_ALF_ESCR1 0x000003cb | ||
| 945 | #define MSR_P4_BPU_ESCR0 0x000003b2 | ||
| 946 | #define MSR_P4_BPU_ESCR1 0x000003b3 | ||
| 947 | #define MSR_P4_BSU_ESCR0 0x000003a0 | ||
| 948 | #define MSR_P4_BSU_ESCR1 0x000003a1 | ||
| 949 | #define MSR_P4_CRU_ESCR0 0x000003b8 | ||
| 950 | #define MSR_P4_CRU_ESCR1 0x000003b9 | ||
| 951 | #define MSR_P4_CRU_ESCR2 0x000003cc | ||
| 952 | #define MSR_P4_CRU_ESCR3 0x000003cd | ||
| 953 | #define MSR_P4_CRU_ESCR4 0x000003e0 | ||
| 954 | #define MSR_P4_CRU_ESCR5 0x000003e1 | ||
| 955 | #define MSR_P4_DAC_ESCR0 0x000003a8 | ||
| 956 | #define MSR_P4_DAC_ESCR1 0x000003a9 | ||
| 957 | #define MSR_P4_FIRM_ESCR0 0x000003a4 | ||
| 958 | #define MSR_P4_FIRM_ESCR1 0x000003a5 | ||
| 959 | #define MSR_P4_FLAME_ESCR0 0x000003a6 | ||
| 960 | #define MSR_P4_FLAME_ESCR1 0x000003a7 | ||
| 961 | #define MSR_P4_FSB_ESCR0 0x000003a2 | ||
| 962 | #define MSR_P4_FSB_ESCR1 0x000003a3 | ||
| 963 | #define MSR_P4_IQ_ESCR0 0x000003ba | ||
| 964 | #define MSR_P4_IQ_ESCR1 0x000003bb | ||
| 965 | #define MSR_P4_IS_ESCR0 0x000003b4 | ||
| 966 | #define MSR_P4_IS_ESCR1 0x000003b5 | ||
| 967 | #define MSR_P4_ITLB_ESCR0 0x000003b6 | ||
| 968 | #define MSR_P4_ITLB_ESCR1 0x000003b7 | ||
| 969 | #define MSR_P4_IX_ESCR0 0x000003c8 | ||
| 970 | #define MSR_P4_IX_ESCR1 0x000003c9 | ||
| 971 | #define MSR_P4_MOB_ESCR0 0x000003aa | ||
| 972 | #define MSR_P4_MOB_ESCR1 0x000003ab | ||
| 973 | #define MSR_P4_MS_ESCR0 0x000003c0 | ||
| 974 | #define MSR_P4_MS_ESCR1 0x000003c1 | ||
| 975 | #define MSR_P4_PMH_ESCR0 0x000003ac | ||
| 976 | #define MSR_P4_PMH_ESCR1 0x000003ad | ||
| 977 | #define MSR_P4_RAT_ESCR0 0x000003bc | ||
| 978 | #define MSR_P4_RAT_ESCR1 0x000003bd | ||
| 979 | #define MSR_P4_SAAT_ESCR0 0x000003ae | ||
| 980 | #define MSR_P4_SAAT_ESCR1 0x000003af | ||
| 981 | #define MSR_P4_SSU_ESCR0 0x000003be | ||
| 982 | #define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ | ||
| 983 | |||
| 984 | #define MSR_P4_TBPU_ESCR0 0x000003c2 | ||
| 985 | #define MSR_P4_TBPU_ESCR1 0x000003c3 | ||
| 986 | #define MSR_P4_TC_ESCR0 0x000003c4 | ||
| 987 | #define MSR_P4_TC_ESCR1 0x000003c5 | ||
| 988 | #define MSR_P4_U2L_ESCR0 0x000003b0 | ||
| 989 | #define MSR_P4_U2L_ESCR1 0x000003b1 | ||
| 990 | |||
| 991 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
| 992 | |||
| 993 | /* Intel Core-based CPU performance counters */ | ||
| 994 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | ||
| 995 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | ||
| 996 | #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b | ||
| 997 | #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d | ||
| 998 | #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e | ||
| 999 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f | ||
| 1000 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 | ||
| 1001 | |||
| 1002 | /* Geode defined MSRs */ | ||
| 1003 | #define MSR_GEODE_BUSCONT_CONF0 0x00001900 | ||
| 1004 | |||
| 1005 | /* Intel VT MSRs */ | ||
| 1006 | #define MSR_IA32_VMX_BASIC 0x00000480 | ||
| 1007 | #define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 | ||
| 1008 | #define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 | ||
| 1009 | #define MSR_IA32_VMX_EXIT_CTLS 0x00000483 | ||
| 1010 | #define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 | ||
| 1011 | #define MSR_IA32_VMX_MISC 0x00000485 | ||
| 1012 | #define MSR_IA32_VMX_CR0_FIXED0 0x00000486 | ||
| 1013 | #define MSR_IA32_VMX_CR0_FIXED1 0x00000487 | ||
| 1014 | #define MSR_IA32_VMX_CR4_FIXED0 0x00000488 | ||
| 1015 | #define MSR_IA32_VMX_CR4_FIXED1 0x00000489 | ||
| 1016 | #define MSR_IA32_VMX_VMCS_ENUM 0x0000048a | ||
| 1017 | #define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b | ||
| 1018 | #define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c | ||
| 1019 | #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d | ||
| 1020 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | ||
| 1021 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | ||
| 1022 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | ||
| 1023 | #define MSR_IA32_VMX_VMFUNC 0x00000491 | ||
| 1024 | |||
| 1025 | /* VMX_BASIC bits and bitmasks */ | ||
| 1026 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | ||
| 1027 | #define VMX_BASIC_TRUE_CTLS (1ULL << 55) | ||
| 1028 | #define VMX_BASIC_64 0x0001000000000000LLU | ||
| 1029 | #define VMX_BASIC_MEM_TYPE_SHIFT 50 | ||
| 1030 | #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU | ||
| 1031 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | ||
| 1032 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | ||
| 1033 | |||
| 1034 | /* MSR_IA32_VMX_MISC bits */ | ||
| 1035 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | ||
| 1036 | #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F | ||
| 1037 | /* AMD-V MSRs */ | ||
| 1038 | |||
| 1039 | #define MSR_VM_CR 0xc0010114 | ||
| 1040 | #define MSR_VM_IGNNE 0xc0010115 | ||
| 1041 | #define MSR_VM_HSAVE_PA 0xc0010117 | ||
| 1042 | |||
| 1043 | #endif /* !SELFTEST_KVM_X86_H */ | ||
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c new file mode 100644 index 000000000000..c9f5b7d4ce38 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/assert.c | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/assert.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ | ||
| 10 | |||
| 11 | #include "test_util.h" | ||
| 12 | |||
| 13 | #include <execinfo.h> | ||
| 14 | #include <sys/syscall.h> | ||
| 15 | |||
| 16 | /* Dumps the current stack trace to stderr. */ | ||
| 17 | static void __attribute__((noinline)) test_dump_stack(void); | ||
| 18 | static void test_dump_stack(void) | ||
| 19 | { | ||
| 20 | /* | ||
| 21 | * Build and run this command: | ||
| 22 | * | ||
| 23 | * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ | ||
| 24 | * grep -v test_dump_stack | cat -n 1>&2 | ||
| 25 | * | ||
| 26 | * Note that the spacing is different and there's no newline. | ||
| 27 | */ | ||
| 28 | size_t i; | ||
| 29 | size_t n = 20; | ||
| 30 | void *stack[n]; | ||
| 31 | const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai"; | ||
| 32 | const char *pipeline = "|cat -n 1>&2"; | ||
| 33 | char cmd[strlen(addr2line) + strlen(pipeline) + | ||
| 34 | /* N bytes per addr * 2 digits per byte + 1 space per addr: */ | ||
| 35 | n * (((sizeof(void *)) * 2) + 1) + | ||
| 36 | /* Null terminator: */ | ||
| 37 | 1]; | ||
| 38 | char *c; | ||
| 39 | |||
| 40 | n = backtrace(stack, n); | ||
| 41 | c = &cmd[0]; | ||
| 42 | c += sprintf(c, "%s", addr2line); | ||
| 43 | /* | ||
| 44 | * Skip the first 3 frames: backtrace, test_dump_stack, and | ||
| 45 | * test_assert. We hope that backtrace isn't inlined and the other two | ||
| 46 | * we've declared noinline. | ||
| 47 | */ | ||
| 48 | for (i = 2; i < n; i++) | ||
| 49 | c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); | ||
| 50 | c += sprintf(c, "%s", pipeline); | ||
| 51 | #pragma GCC diagnostic push | ||
| 52 | #pragma GCC diagnostic ignored "-Wunused-result" | ||
| 53 | system(cmd); | ||
| 54 | #pragma GCC diagnostic pop | ||
| 55 | } | ||
| 56 | |||
| 57 | static pid_t gettid(void) | ||
| 58 | { | ||
| 59 | return syscall(SYS_gettid); | ||
| 60 | } | ||
| 61 | |||
| 62 | void __attribute__((noinline)) | ||
| 63 | test_assert(bool exp, const char *exp_str, | ||
| 64 | const char *file, unsigned int line, const char *fmt, ...) | ||
| 65 | { | ||
| 66 | va_list ap; | ||
| 67 | |||
| 68 | if (!(exp)) { | ||
| 69 | va_start(ap, fmt); | ||
| 70 | |||
| 71 | fprintf(stderr, "==== Test Assertion Failure ====\n" | ||
| 72 | " %s:%u: %s\n" | ||
| 73 | " pid=%d tid=%d\n", | ||
| 74 | file, line, exp_str, getpid(), gettid()); | ||
| 75 | test_dump_stack(); | ||
| 76 | if (fmt) { | ||
| 77 | fputs(" ", stderr); | ||
| 78 | vfprintf(stderr, fmt, ap); | ||
| 79 | fputs("\n", stderr); | ||
| 80 | } | ||
| 81 | va_end(ap); | ||
| 82 | |||
| 83 | exit(254); | ||
| 84 | } | ||
| 85 | |||
| 86 | return; | ||
| 87 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c new file mode 100644 index 000000000000..5eb857584aa3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/elf.c | |||
| @@ -0,0 +1,197 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/elf.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | #include <bits/endian.h> | ||
| 12 | #include <linux/elf.h> | ||
| 13 | |||
| 14 | #include "kvm_util.h" | ||
| 15 | #include "kvm_util_internal.h" | ||
| 16 | |||
| 17 | static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp) | ||
| 18 | { | ||
| 19 | off_t offset_rv; | ||
| 20 | |||
| 21 | /* Open the ELF file. */ | ||
| 22 | int fd; | ||
| 23 | fd = open(filename, O_RDONLY); | ||
| 24 | TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" | ||
| 25 | " filename: %s\n" | ||
| 26 | " rv: %i errno: %i", filename, fd, errno); | ||
| 27 | |||
| 28 | /* Read in and validate ELF Identification Record. | ||
| 29 | * The ELF Identification record is the first 16 (EI_NIDENT) bytes | ||
| 30 | * of the ELF header, which is at the beginning of the ELF file. | ||
| 31 | * For now it is only safe to read the first EI_NIDENT bytes. Once | ||
| 32 | * read and validated, the value of e_ehsize can be used to determine | ||
| 33 | * the real size of the ELF header. | ||
| 34 | */ | ||
| 35 | unsigned char ident[EI_NIDENT]; | ||
| 36 | test_read(fd, ident, sizeof(ident)); | ||
| 37 | TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1) | ||
| 38 | && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3), | ||
| 39 | "ELF MAGIC Mismatch,\n" | ||
| 40 | " filename: %s\n" | ||
| 41 | " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n" | ||
| 42 | " Expected: %02x %02x %02x %02x", | ||
| 43 | filename, | ||
| 44 | ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3], | ||
| 45 | ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3); | ||
| 46 | TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64, | ||
| 47 | "Current implementation only able to handle ELFCLASS64,\n" | ||
| 48 | " filename: %s\n" | ||
| 49 | " ident[EI_CLASS]: %02x\n" | ||
| 50 | " expected: %02x", | ||
| 51 | filename, | ||
| 52 | ident[EI_CLASS], ELFCLASS64); | ||
| 53 | TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN) | ||
| 54 | && (ident[EI_DATA] == ELFDATA2LSB)) | ||
| 55 | || ((BYTE_ORDER == BIG_ENDIAN) | ||
| 56 | && (ident[EI_DATA] == ELFDATA2MSB)), "Current " | ||
| 57 | "implementation only able to handle\n" | ||
| 58 | "cases where the host and ELF file endianness\n" | ||
| 59 | "is the same:\n" | ||
| 60 | " host BYTE_ORDER: %u\n" | ||
| 61 | " host LITTLE_ENDIAN: %u\n" | ||
| 62 | " host BIG_ENDIAN: %u\n" | ||
| 63 | " ident[EI_DATA]: %u\n" | ||
| 64 | " ELFDATA2LSB: %u\n" | ||
| 65 | " ELFDATA2MSB: %u", | ||
| 66 | BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN, | ||
| 67 | ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB); | ||
| 68 | TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT, | ||
| 69 | "Current implementation only able to handle current " | ||
| 70 | "ELF version,\n" | ||
| 71 | " filename: %s\n" | ||
| 72 | " ident[EI_VERSION]: %02x\n" | ||
| 73 | " expected: %02x", | ||
| 74 | filename, ident[EI_VERSION], EV_CURRENT); | ||
| 75 | |||
| 76 | /* Read in the ELF header. | ||
| 77 | * With the ELF Identification portion of the ELF header | ||
| 78 | * validated, especially that the value at EI_VERSION is | ||
| 79 | * as expected, it is now safe to read the entire ELF header. | ||
| 80 | */ | ||
| 81 | offset_rv = lseek(fd, 0, SEEK_SET); | ||
| 82 | TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n" | ||
| 83 | " rv: %zi expected: %i", offset_rv, 0); | ||
| 84 | test_read(fd, hdrp, sizeof(*hdrp)); | ||
| 85 | TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr), | ||
| 86 | "Unexpected physical header size,\n" | ||
| 87 | " hdrp->e_phentsize: %x\n" | ||
| 88 | " expected: %zx", | ||
| 89 | hdrp->e_phentsize, sizeof(Elf64_Phdr)); | ||
| 90 | TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr), | ||
| 91 | "Unexpected section header size,\n" | ||
| 92 | " hdrp->e_shentsize: %x\n" | ||
| 93 | " expected: %zx", | ||
| 94 | hdrp->e_shentsize, sizeof(Elf64_Shdr)); | ||
| 95 | } | ||
| 96 | |||
| 97 | /* VM ELF Load | ||
| 98 | * | ||
| 99 | * Input Args: | ||
| 100 | * filename - Path to ELF file | ||
| 101 | * | ||
| 102 | * Output Args: None | ||
| 103 | * | ||
| 104 | * Input/Output Args: | ||
| 105 | * vm - Pointer to opaque type that describes the VM. | ||
| 106 | * | ||
| 107 | * Return: None, TEST_ASSERT failures for all error conditions | ||
| 108 | * | ||
| 109 | * Loads the program image of the ELF file specified by filename, | ||
| 110 | * into the virtual address space of the VM pointed to by vm. On entry | ||
| 111 | * the VM needs to not be using any of the virtual address space used | ||
| 112 | * by the image and it needs to have sufficient available physical pages, to | ||
| 113 | * back the virtual pages used to load the image. | ||
| 114 | */ | ||
| 115 | void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, | ||
| 116 | uint32_t data_memslot, uint32_t pgd_memslot) | ||
| 117 | { | ||
| 118 | off_t offset, offset_rv; | ||
| 119 | Elf64_Ehdr hdr; | ||
| 120 | |||
| 121 | /* Open the ELF file. */ | ||
| 122 | int fd; | ||
| 123 | fd = open(filename, O_RDONLY); | ||
| 124 | TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" | ||
| 125 | " filename: %s\n" | ||
| 126 | " rv: %i errno: %i", filename, fd, errno); | ||
| 127 | |||
| 128 | /* Read in the ELF header. */ | ||
| 129 | elfhdr_get(filename, &hdr); | ||
| 130 | |||
| 131 | /* For each program header. | ||
| 132 | * The following ELF header members specify the location | ||
| 133 | * and size of the program headers: | ||
| 134 | * | ||
| 135 | * e_phoff - File offset to start of program headers | ||
| 136 | * e_phentsize - Size of each program header | ||
| 137 | * e_phnum - Number of program header entries | ||
| 138 | */ | ||
| 139 | for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) { | ||
| 140 | /* Seek to the beginning of the program header. */ | ||
| 141 | offset = hdr.e_phoff + (n1 * hdr.e_phentsize); | ||
| 142 | offset_rv = lseek(fd, offset, SEEK_SET); | ||
| 143 | TEST_ASSERT(offset_rv == offset, | ||
| 144 | "Failed to seek to begining of program header %u,\n" | ||
| 145 | " filename: %s\n" | ||
| 146 | " rv: %jd errno: %i", | ||
| 147 | n1, filename, (intmax_t) offset_rv, errno); | ||
| 148 | |||
| 149 | /* Read in the program header. */ | ||
| 150 | Elf64_Phdr phdr; | ||
| 151 | test_read(fd, &phdr, sizeof(phdr)); | ||
| 152 | |||
| 153 | /* Skip if this header doesn't describe a loadable segment. */ | ||
| 154 | if (phdr.p_type != PT_LOAD) | ||
| 155 | continue; | ||
| 156 | |||
| 157 | /* Allocate memory for this segment within the VM. */ | ||
| 158 | TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " | ||
| 159 | "memsize of 0,\n" | ||
| 160 | " phdr index: %u p_memsz: 0x%" PRIx64, | ||
| 161 | n1, (uint64_t) phdr.p_memsz); | ||
| 162 | vm_vaddr_t seg_vstart = phdr.p_vaddr; | ||
| 163 | seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); | ||
| 164 | vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; | ||
| 165 | seg_vend |= vm->page_size - 1; | ||
| 166 | size_t seg_size = seg_vend - seg_vstart + 1; | ||
| 167 | |||
| 168 | vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, | ||
| 169 | data_memslot, pgd_memslot); | ||
| 170 | TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " | ||
| 171 | "virtual memory for segment at requested min addr,\n" | ||
| 172 | " segment idx: %u\n" | ||
| 173 | " seg_vstart: 0x%lx\n" | ||
| 174 | " vaddr: 0x%lx", | ||
| 175 | n1, seg_vstart, vaddr); | ||
| 176 | memset(addr_gva2hva(vm, vaddr), 0, seg_size); | ||
| 177 | /* TODO(lhuemill): Set permissions of each memory segment | ||
| 178 | * based on the least-significant 3 bits of phdr.p_flags. | ||
| 179 | */ | ||
| 180 | |||
| 181 | /* Load portion of initial state that is contained within | ||
| 182 | * the ELF file. | ||
| 183 | */ | ||
| 184 | if (phdr.p_filesz) { | ||
| 185 | offset_rv = lseek(fd, phdr.p_offset, SEEK_SET); | ||
| 186 | TEST_ASSERT(offset_rv == phdr.p_offset, | ||
| 187 | "Seek to program segment offset failed,\n" | ||
| 188 | " program header idx: %u errno: %i\n" | ||
| 189 | " offset_rv: 0x%jx\n" | ||
| 190 | " expected: 0x%jx\n", | ||
| 191 | n1, errno, (intmax_t) offset_rv, | ||
| 192 | (intmax_t) phdr.p_offset); | ||
| 193 | test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), | ||
| 194 | phdr.p_filesz); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c new file mode 100644 index 000000000000..cff869ffe6ee --- /dev/null +++ b/tools/testing/selftests/kvm/lib/io.c | |||
| @@ -0,0 +1,158 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/io.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "test_util.h" | ||
| 10 | |||
| 11 | /* Test Write | ||
| 12 | * | ||
| 13 | * A wrapper for write(2), that automatically handles the following | ||
| 14 | * special conditions: | ||
| 15 | * | ||
| 16 | * + Interrupted system call (EINTR) | ||
| 17 | * + Write of less than requested amount | ||
| 18 | * + Non-block return (EAGAIN) | ||
| 19 | * | ||
| 20 | * For each of the above, an additional write is performed to automatically | ||
| 21 | * continue writing the requested data. | ||
| 22 | * There are also many cases where write(2) can return an unexpected | ||
| 23 | * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. | ||
| 24 | * | ||
| 25 | * Note, for function signature compatibility with write(2), this function | ||
| 26 | * returns the number of bytes written, but that value will always be equal | ||
| 27 | * to the number of requested bytes. All other conditions in this and | ||
| 28 | * future enhancements to this function either automatically issue another | ||
| 29 | * write(2) or cause a TEST_ASSERT failure. | ||
| 30 | * | ||
| 31 | * Args: | ||
| 32 | * fd - Opened file descriptor to file to be written. | ||
| 33 | * count - Number of bytes to write. | ||
| 34 | * | ||
| 35 | * Output: | ||
| 36 | * buf - Starting address of data to be written. | ||
| 37 | * | ||
| 38 | * Return: | ||
| 39 | * On success, number of bytes written. | ||
| 40 | * On failure, a TEST_ASSERT failure is caused. | ||
| 41 | */ | ||
| 42 | ssize_t test_write(int fd, const void *buf, size_t count) | ||
| 43 | { | ||
| 44 | ssize_t rc; | ||
| 45 | ssize_t num_written = 0; | ||
| 46 | size_t num_left = count; | ||
| 47 | const char *ptr = buf; | ||
| 48 | |||
| 49 | /* Note: Count of zero is allowed (see "RETURN VALUE" portion of | ||
| 50 | * write(2) manpage for details. | ||
| 51 | */ | ||
| 52 | TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); | ||
| 53 | |||
| 54 | do { | ||
| 55 | rc = write(fd, ptr, num_left); | ||
| 56 | |||
| 57 | switch (rc) { | ||
| 58 | case -1: | ||
| 59 | TEST_ASSERT(errno == EAGAIN || errno == EINTR, | ||
| 60 | "Unexpected write failure,\n" | ||
| 61 | " rc: %zi errno: %i", rc, errno); | ||
| 62 | continue; | ||
| 63 | |||
| 64 | case 0: | ||
| 65 | TEST_ASSERT(false, "Unexpected EOF,\n" | ||
| 66 | " rc: %zi num_written: %zi num_left: %zu", | ||
| 67 | rc, num_written, num_left); | ||
| 68 | break; | ||
| 69 | |||
| 70 | default: | ||
| 71 | TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n" | ||
| 72 | " rc: %zi errno: %i", rc, errno); | ||
| 73 | num_written += rc; | ||
| 74 | num_left -= rc; | ||
| 75 | ptr += rc; | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | } while (num_written < count); | ||
| 79 | |||
| 80 | return num_written; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* Test Read | ||
| 84 | * | ||
| 85 | * A wrapper for read(2), that automatically handles the following | ||
| 86 | * special conditions: | ||
| 87 | * | ||
| 88 | * + Interrupted system call (EINTR) | ||
| 89 | * + Read of less than requested amount | ||
| 90 | * + Non-block return (EAGAIN) | ||
| 91 | * | ||
| 92 | * For each of the above, an additional read is performed to automatically | ||
| 93 | * continue reading the requested data. | ||
| 94 | * There are also many cases where read(2) can return an unexpected | ||
| 95 | * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note, | ||
| 96 | * it is expected that the file opened by fd at the current file position | ||
| 97 | * contains at least the number of requested bytes to be read. A TEST_ASSERT | ||
| 98 | * failure is produced if an End-Of-File condition occurs, before all the | ||
| 99 | * data is read. It is the callers responsibility to assure that sufficient | ||
| 100 | * data exists. | ||
| 101 | * | ||
| 102 | * Note, for function signature compatibility with read(2), this function | ||
| 103 | * returns the number of bytes read, but that value will always be equal | ||
| 104 | * to the number of requested bytes. All other conditions in this and | ||
| 105 | * future enhancements to this function either automatically issue another | ||
| 106 | * read(2) or cause a TEST_ASSERT failure. | ||
| 107 | * | ||
| 108 | * Args: | ||
| 109 | * fd - Opened file descriptor to file to be read. | ||
| 110 | * count - Number of bytes to read. | ||
| 111 | * | ||
| 112 | * Output: | ||
| 113 | * buf - Starting address of where to write the bytes read. | ||
| 114 | * | ||
| 115 | * Return: | ||
| 116 | * On success, number of bytes read. | ||
| 117 | * On failure, a TEST_ASSERT failure is caused. | ||
| 118 | */ | ||
| 119 | ssize_t test_read(int fd, void *buf, size_t count) | ||
| 120 | { | ||
| 121 | ssize_t rc; | ||
| 122 | ssize_t num_read = 0; | ||
| 123 | size_t num_left = count; | ||
| 124 | char *ptr = buf; | ||
| 125 | |||
| 126 | /* Note: Count of zero is allowed (see "If count is zero" portion of | ||
| 127 | * read(2) manpage for details. | ||
| 128 | */ | ||
| 129 | TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); | ||
| 130 | |||
| 131 | do { | ||
| 132 | rc = read(fd, ptr, num_left); | ||
| 133 | |||
| 134 | switch (rc) { | ||
| 135 | case -1: | ||
| 136 | TEST_ASSERT(errno == EAGAIN || errno == EINTR, | ||
| 137 | "Unexpected read failure,\n" | ||
| 138 | " rc: %zi errno: %i", rc, errno); | ||
| 139 | break; | ||
| 140 | |||
| 141 | case 0: | ||
| 142 | TEST_ASSERT(false, "Unexpected EOF,\n" | ||
| 143 | " rc: %zi num_read: %zi num_left: %zu", | ||
| 144 | rc, num_read, num_left); | ||
| 145 | break; | ||
| 146 | |||
| 147 | default: | ||
| 148 | TEST_ASSERT(rc > 0, "Unexpected ret from read,\n" | ||
| 149 | " rc: %zi errno: %i", rc, errno); | ||
| 150 | num_read += rc; | ||
| 151 | num_left -= rc; | ||
| 152 | ptr += rc; | ||
| 153 | break; | ||
| 154 | } | ||
| 155 | } while (num_read < count); | ||
| 156 | |||
| 157 | return num_read; | ||
| 158 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c new file mode 100644 index 000000000000..7ca1bb40c498 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util.c | |||
| @@ -0,0 +1,1480 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/kvm_util.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include "test_util.h" | ||
| 10 | #include "kvm_util.h" | ||
| 11 | #include "kvm_util_internal.h" | ||
| 12 | |||
| 13 | #include <assert.h> | ||
| 14 | #include <sys/mman.h> | ||
| 15 | #include <sys/types.h> | ||
| 16 | #include <sys/stat.h> | ||
| 17 | |||
| 18 | #define KVM_DEV_PATH "/dev/kvm" | ||
| 19 | |||
| 20 | #define KVM_UTIL_PGS_PER_HUGEPG 512 | ||
| 21 | #define KVM_UTIL_MIN_PADDR 0x2000 | ||
| 22 | |||
| 23 | /* Aligns x up to the next multiple of size. Size must be a power of 2. */ | ||
| 24 | static void *align(void *x, size_t size) | ||
| 25 | { | ||
| 26 | size_t mask = size - 1; | ||
| 27 | TEST_ASSERT(size != 0 && !(size & (size - 1)), | ||
| 28 | "size not a power of 2: %lu", size); | ||
| 29 | return (void *) (((size_t) x + mask) & ~mask); | ||
| 30 | } | ||
| 31 | |||
| 32 | /* Capability | ||
| 33 | * | ||
| 34 | * Input Args: | ||
| 35 | * cap - Capability | ||
| 36 | * | ||
| 37 | * Output Args: None | ||
| 38 | * | ||
| 39 | * Return: | ||
| 40 | * On success, the Value corresponding to the capability (KVM_CAP_*) | ||
| 41 | * specified by the value of cap. On failure a TEST_ASSERT failure | ||
| 42 | * is produced. | ||
| 43 | * | ||
| 44 | * Looks up and returns the value corresponding to the capability | ||
| 45 | * (KVM_CAP_*) given by cap. | ||
| 46 | */ | ||
| 47 | int kvm_check_cap(long cap) | ||
| 48 | { | ||
| 49 | int ret; | ||
| 50 | int kvm_fd; | ||
| 51 | |||
| 52 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
| 53 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
| 54 | KVM_DEV_PATH, kvm_fd, errno); | ||
| 55 | |||
| 56 | ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); | ||
| 57 | TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" | ||
| 58 | " rc: %i errno: %i", ret, errno); | ||
| 59 | |||
| 60 | close(kvm_fd); | ||
| 61 | |||
| 62 | return ret; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* VM Create | ||
| 66 | * | ||
| 67 | * Input Args: | ||
| 68 | * mode - VM Mode (e.g. VM_MODE_FLAT48PG) | ||
| 69 | * phy_pages - Physical memory pages | ||
| 70 | * perm - permission | ||
| 71 | * | ||
| 72 | * Output Args: None | ||
| 73 | * | ||
| 74 | * Return: | ||
| 75 | * Pointer to opaque structure that describes the created VM. | ||
| 76 | * | ||
| 77 | * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). | ||
| 78 | * When phy_pages is non-zero, a memory region of phy_pages physical pages | ||
| 79 | * is created and mapped starting at guest physical address 0. The file | ||
| 80 | * descriptor to control the created VM is created with the permissions | ||
| 81 | * given by perm (e.g. O_RDWR). | ||
| 82 | */ | ||
| 83 | struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) | ||
| 84 | { | ||
| 85 | struct kvm_vm *vm; | ||
| 86 | int kvm_fd; | ||
| 87 | |||
| 88 | /* Allocate memory. */ | ||
| 89 | vm = calloc(1, sizeof(*vm)); | ||
| 90 | TEST_ASSERT(vm != NULL, "Insufficent Memory"); | ||
| 91 | |||
| 92 | vm->mode = mode; | ||
| 93 | kvm_fd = open(KVM_DEV_PATH, perm); | ||
| 94 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
| 95 | KVM_DEV_PATH, kvm_fd, errno); | ||
| 96 | |||
| 97 | /* Create VM. */ | ||
| 98 | vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); | ||
| 99 | TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " | ||
| 100 | "rc: %i errno: %i", vm->fd, errno); | ||
| 101 | |||
| 102 | close(kvm_fd); | ||
| 103 | |||
| 104 | /* Setup mode specific traits. */ | ||
| 105 | switch (vm->mode) { | ||
| 106 | case VM_MODE_FLAT48PG: | ||
| 107 | vm->page_size = 0x1000; | ||
| 108 | vm->page_shift = 12; | ||
| 109 | |||
| 110 | /* Limit to 48-bit canonical virtual addresses. */ | ||
| 111 | vm->vpages_valid = sparsebit_alloc(); | ||
| 112 | sparsebit_set_num(vm->vpages_valid, | ||
| 113 | 0, (1ULL << (48 - 1)) >> vm->page_shift); | ||
| 114 | sparsebit_set_num(vm->vpages_valid, | ||
| 115 | (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, | ||
| 116 | (1ULL << (48 - 1)) >> vm->page_shift); | ||
| 117 | |||
| 118 | /* Limit physical addresses to 52-bits. */ | ||
| 119 | vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; | ||
| 120 | break; | ||
| 121 | |||
| 122 | default: | ||
| 123 | TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); | ||
| 124 | } | ||
| 125 | |||
| 126 | /* Allocate and setup memory for guest. */ | ||
| 127 | vm->vpages_mapped = sparsebit_alloc(); | ||
| 128 | if (phy_pages != 0) | ||
| 129 | vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, | ||
| 130 | 0, 0, phy_pages, 0); | ||
| 131 | |||
| 132 | return vm; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* Userspace Memory Region Find | ||
| 136 | * | ||
| 137 | * Input Args: | ||
| 138 | * vm - Virtual Machine | ||
| 139 | * start - Starting VM physical address | ||
| 140 | * end - Ending VM physical address, inclusive. | ||
| 141 | * | ||
| 142 | * Output Args: None | ||
| 143 | * | ||
| 144 | * Return: | ||
| 145 | * Pointer to overlapping region, NULL if no such region. | ||
| 146 | * | ||
| 147 | * Searches for a region with any physical memory that overlaps with | ||
| 148 | * any portion of the guest physical addresses from start to end | ||
| 149 | * inclusive. If multiple overlapping regions exist, a pointer to any | ||
| 150 | * of the regions is returned. Null is returned only when no overlapping | ||
| 151 | * region exists. | ||
| 152 | */ | ||
| 153 | static struct userspace_mem_region *userspace_mem_region_find( | ||
| 154 | struct kvm_vm *vm, uint64_t start, uint64_t end) | ||
| 155 | { | ||
| 156 | struct userspace_mem_region *region; | ||
| 157 | |||
| 158 | for (region = vm->userspace_mem_region_head; region; | ||
| 159 | region = region->next) { | ||
| 160 | uint64_t existing_start = region->region.guest_phys_addr; | ||
| 161 | uint64_t existing_end = region->region.guest_phys_addr | ||
| 162 | + region->region.memory_size - 1; | ||
| 163 | if (start <= existing_end && end >= existing_start) | ||
| 164 | return region; | ||
| 165 | } | ||
| 166 | |||
| 167 | return NULL; | ||
| 168 | } | ||
| 169 | |||
| 170 | /* KVM Userspace Memory Region Find | ||
| 171 | * | ||
| 172 | * Input Args: | ||
| 173 | * vm - Virtual Machine | ||
| 174 | * start - Starting VM physical address | ||
| 175 | * end - Ending VM physical address, inclusive. | ||
| 176 | * | ||
| 177 | * Output Args: None | ||
| 178 | * | ||
| 179 | * Return: | ||
| 180 | * Pointer to overlapping region, NULL if no such region. | ||
| 181 | * | ||
| 182 | * Public interface to userspace_mem_region_find. Allows tests to look up | ||
| 183 | * the memslot datastructure for a given range of guest physical memory. | ||
| 184 | */ | ||
| 185 | struct kvm_userspace_memory_region * | ||
| 186 | kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, | ||
| 187 | uint64_t end) | ||
| 188 | { | ||
| 189 | struct userspace_mem_region *region; | ||
| 190 | |||
| 191 | region = userspace_mem_region_find(vm, start, end); | ||
| 192 | if (!region) | ||
| 193 | return NULL; | ||
| 194 | |||
| 195 | return ®ion->region; | ||
| 196 | } | ||
| 197 | |||
| 198 | /* VCPU Find | ||
| 199 | * | ||
| 200 | * Input Args: | ||
| 201 | * vm - Virtual Machine | ||
| 202 | * vcpuid - VCPU ID | ||
| 203 | * | ||
| 204 | * Output Args: None | ||
| 205 | * | ||
| 206 | * Return: | ||
| 207 | * Pointer to VCPU structure | ||
| 208 | * | ||
| 209 | * Locates a vcpu structure that describes the VCPU specified by vcpuid and | ||
| 210 | * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU | ||
| 211 | * for the specified vcpuid. | ||
| 212 | */ | ||
| 213 | struct vcpu *vcpu_find(struct kvm_vm *vm, | ||
| 214 | uint32_t vcpuid) | ||
| 215 | { | ||
| 216 | struct vcpu *vcpup; | ||
| 217 | |||
| 218 | for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) { | ||
| 219 | if (vcpup->id == vcpuid) | ||
| 220 | return vcpup; | ||
| 221 | } | ||
| 222 | |||
| 223 | return NULL; | ||
| 224 | } | ||
| 225 | |||
| 226 | /* VM VCPU Remove | ||
| 227 | * | ||
| 228 | * Input Args: | ||
| 229 | * vm - Virtual Machine | ||
| 230 | * vcpuid - VCPU ID | ||
| 231 | * | ||
| 232 | * Output Args: None | ||
| 233 | * | ||
| 234 | * Return: None, TEST_ASSERT failures for all error conditions | ||
| 235 | * | ||
| 236 | * Within the VM specified by vm, removes the VCPU given by vcpuid. | ||
| 237 | */ | ||
| 238 | static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) | ||
| 239 | { | ||
| 240 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 241 | |||
| 242 | int ret = close(vcpu->fd); | ||
| 243 | TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " | ||
| 244 | "errno: %i", ret, errno); | ||
| 245 | |||
| 246 | if (vcpu->next) | ||
| 247 | vcpu->next->prev = vcpu->prev; | ||
| 248 | if (vcpu->prev) | ||
| 249 | vcpu->prev->next = vcpu->next; | ||
| 250 | else | ||
| 251 | vm->vcpu_head = vcpu->next; | ||
| 252 | free(vcpu); | ||
| 253 | } | ||
| 254 | |||
| 255 | |||
| 256 | /* Destroys and frees the VM pointed to by vmp. | ||
| 257 | */ | ||
| 258 | void kvm_vm_free(struct kvm_vm *vmp) | ||
| 259 | { | ||
| 260 | int ret; | ||
| 261 | |||
| 262 | if (vmp == NULL) | ||
| 263 | return; | ||
| 264 | |||
| 265 | /* Free userspace_mem_regions. */ | ||
| 266 | while (vmp->userspace_mem_region_head) { | ||
| 267 | struct userspace_mem_region *region | ||
| 268 | = vmp->userspace_mem_region_head; | ||
| 269 | |||
| 270 | region->region.memory_size = 0; | ||
| 271 | ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, | ||
| 272 | ®ion->region); | ||
| 273 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " | ||
| 274 | "rc: %i errno: %i", ret, errno); | ||
| 275 | |||
| 276 | vmp->userspace_mem_region_head = region->next; | ||
| 277 | sparsebit_free(®ion->unused_phy_pages); | ||
| 278 | ret = munmap(region->mmap_start, region->mmap_size); | ||
| 279 | TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", | ||
| 280 | ret, errno); | ||
| 281 | |||
| 282 | free(region); | ||
| 283 | } | ||
| 284 | |||
| 285 | /* Free VCPUs. */ | ||
| 286 | while (vmp->vcpu_head) | ||
| 287 | vm_vcpu_rm(vmp, vmp->vcpu_head->id); | ||
| 288 | |||
| 289 | /* Free sparsebit arrays. */ | ||
| 290 | sparsebit_free(&vmp->vpages_valid); | ||
| 291 | sparsebit_free(&vmp->vpages_mapped); | ||
| 292 | |||
| 293 | /* Close file descriptor for the VM. */ | ||
| 294 | ret = close(vmp->fd); | ||
| 295 | TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" | ||
| 296 | " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); | ||
| 297 | |||
| 298 | /* Free the structure describing the VM. */ | ||
| 299 | free(vmp); | ||
| 300 | } | ||
| 301 | |||
| 302 | /* Memory Compare, host virtual to guest virtual | ||
| 303 | * | ||
| 304 | * Input Args: | ||
| 305 | * hva - Starting host virtual address | ||
| 306 | * vm - Virtual Machine | ||
| 307 | * gva - Starting guest virtual address | ||
| 308 | * len - number of bytes to compare | ||
| 309 | * | ||
| 310 | * Output Args: None | ||
| 311 | * | ||
| 312 | * Input/Output Args: None | ||
| 313 | * | ||
| 314 | * Return: | ||
| 315 | * Returns 0 if the bytes starting at hva for a length of len | ||
| 316 | * are equal the guest virtual bytes starting at gva. Returns | ||
| 317 | * a value < 0, if bytes at hva are less than those at gva. | ||
| 318 | * Otherwise a value > 0 is returned. | ||
| 319 | * | ||
| 320 | * Compares the bytes starting at the host virtual address hva, for | ||
| 321 | * a length of len, to the guest bytes starting at the guest virtual | ||
| 322 | * address given by gva. | ||
| 323 | */ | ||
| 324 | int kvm_memcmp_hva_gva(void *hva, | ||
| 325 | struct kvm_vm *vm, vm_vaddr_t gva, size_t len) | ||
| 326 | { | ||
| 327 | size_t amt; | ||
| 328 | |||
| 329 | /* Compare a batch of bytes until either a match is found | ||
| 330 | * or all the bytes have been compared. | ||
| 331 | */ | ||
| 332 | for (uintptr_t offset = 0; offset < len; offset += amt) { | ||
| 333 | uintptr_t ptr1 = (uintptr_t)hva + offset; | ||
| 334 | |||
| 335 | /* Determine host address for guest virtual address | ||
| 336 | * at offset. | ||
| 337 | */ | ||
| 338 | uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); | ||
| 339 | |||
| 340 | /* Determine amount to compare on this pass. | ||
| 341 | * Don't allow the comparsion to cross a page boundary. | ||
| 342 | */ | ||
| 343 | amt = len - offset; | ||
| 344 | if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) | ||
| 345 | amt = vm->page_size - (ptr1 % vm->page_size); | ||
| 346 | if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) | ||
| 347 | amt = vm->page_size - (ptr2 % vm->page_size); | ||
| 348 | |||
| 349 | assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); | ||
| 350 | assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); | ||
| 351 | |||
| 352 | /* Perform the comparison. If there is a difference | ||
| 353 | * return that result to the caller, otherwise need | ||
| 354 | * to continue on looking for a mismatch. | ||
| 355 | */ | ||
| 356 | int ret = memcmp((void *)ptr1, (void *)ptr2, amt); | ||
| 357 | if (ret != 0) | ||
| 358 | return ret; | ||
| 359 | } | ||
| 360 | |||
| 361 | /* No mismatch found. Let the caller know the two memory | ||
| 362 | * areas are equal. | ||
| 363 | */ | ||
| 364 | return 0; | ||
| 365 | } | ||
| 366 | |||
| 367 | /* Allocate an instance of struct kvm_cpuid2 | ||
| 368 | * | ||
| 369 | * Input Args: None | ||
| 370 | * | ||
| 371 | * Output Args: None | ||
| 372 | * | ||
| 373 | * Return: A pointer to the allocated struct. The caller is responsible | ||
| 374 | * for freeing this struct. | ||
| 375 | * | ||
| 376 | * Since kvm_cpuid2 uses a 0-length array to allow a the size of the | ||
| 377 | * array to be decided at allocation time, allocation is slightly | ||
| 378 | * complicated. This function uses a reasonable default length for | ||
| 379 | * the array and performs the appropriate allocation. | ||
| 380 | */ | ||
| 381 | struct kvm_cpuid2 *allocate_kvm_cpuid2(void) | ||
| 382 | { | ||
| 383 | struct kvm_cpuid2 *cpuid; | ||
| 384 | int nent = 100; | ||
| 385 | size_t size; | ||
| 386 | |||
| 387 | size = sizeof(*cpuid); | ||
| 388 | size += nent * sizeof(struct kvm_cpuid_entry2); | ||
| 389 | cpuid = malloc(size); | ||
| 390 | if (!cpuid) { | ||
| 391 | perror("malloc"); | ||
| 392 | abort(); | ||
| 393 | } | ||
| 394 | |||
| 395 | cpuid->nent = nent; | ||
| 396 | |||
| 397 | return cpuid; | ||
| 398 | } | ||
| 399 | |||
| 400 | /* KVM Supported CPUID Get | ||
| 401 | * | ||
| 402 | * Input Args: None | ||
| 403 | * | ||
| 404 | * Output Args: | ||
| 405 | * cpuid - The supported KVM CPUID | ||
| 406 | * | ||
| 407 | * Return: void | ||
| 408 | * | ||
| 409 | * Get the guest CPUID supported by KVM. | ||
| 410 | */ | ||
| 411 | void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) | ||
| 412 | { | ||
| 413 | int ret; | ||
| 414 | int kvm_fd; | ||
| 415 | |||
| 416 | kvm_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
| 417 | TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", | ||
| 418 | KVM_DEV_PATH, kvm_fd, errno); | ||
| 419 | |||
| 420 | ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); | ||
| 421 | TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", | ||
| 422 | ret, errno); | ||
| 423 | |||
| 424 | close(kvm_fd); | ||
| 425 | } | ||
| 426 | |||
| 427 | /* Locate a cpuid entry. | ||
| 428 | * | ||
| 429 | * Input Args: | ||
| 430 | * cpuid: The cpuid. | ||
| 431 | * function: The function of the cpuid entry to find. | ||
| 432 | * | ||
| 433 | * Output Args: None | ||
| 434 | * | ||
| 435 | * Return: A pointer to the cpuid entry. Never returns NULL. | ||
| 436 | */ | ||
| 437 | struct kvm_cpuid_entry2 * | ||
| 438 | find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, | ||
| 439 | uint32_t index) | ||
| 440 | { | ||
| 441 | struct kvm_cpuid_entry2 *entry = NULL; | ||
| 442 | int i; | ||
| 443 | |||
| 444 | for (i = 0; i < cpuid->nent; i++) { | ||
| 445 | if (cpuid->entries[i].function == function && | ||
| 446 | cpuid->entries[i].index == index) { | ||
| 447 | entry = &cpuid->entries[i]; | ||
| 448 | break; | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", | ||
| 453 | function, index); | ||
| 454 | return entry; | ||
| 455 | } | ||
| 456 | |||
| 457 | /* VM Userspace Memory Region Add | ||
| 458 | * | ||
| 459 | * Input Args: | ||
| 460 | * vm - Virtual Machine | ||
| 461 | * backing_src - Storage source for this region. | ||
| 462 | * NULL to use anonymous memory. | ||
| 463 | * guest_paddr - Starting guest physical address | ||
| 464 | * slot - KVM region slot | ||
| 465 | * npages - Number of physical pages | ||
| 466 | * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) | ||
| 467 | * | ||
| 468 | * Output Args: None | ||
| 469 | * | ||
| 470 | * Return: None | ||
| 471 | * | ||
| 472 | * Allocates a memory area of the number of pages specified by npages | ||
| 473 | * and maps it to the VM specified by vm, at a starting physical address | ||
| 474 | * given by guest_paddr. The region is created with a KVM region slot | ||
| 475 | * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The | ||
| 476 | * region is created with the flags given by flags. | ||
| 477 | */ | ||
| 478 | void vm_userspace_mem_region_add(struct kvm_vm *vm, | ||
| 479 | enum vm_mem_backing_src_type src_type, | ||
| 480 | uint64_t guest_paddr, uint32_t slot, uint64_t npages, | ||
| 481 | uint32_t flags) | ||
| 482 | { | ||
| 483 | int ret; | ||
| 484 | unsigned long pmem_size = 0; | ||
| 485 | struct userspace_mem_region *region; | ||
| 486 | size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; | ||
| 487 | |||
| 488 | TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " | ||
| 489 | "address not on a page boundary.\n" | ||
| 490 | " guest_paddr: 0x%lx vm->page_size: 0x%x", | ||
| 491 | guest_paddr, vm->page_size); | ||
| 492 | TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) | ||
| 493 | <= vm->max_gfn, "Physical range beyond maximum " | ||
| 494 | "supported physical address,\n" | ||
| 495 | " guest_paddr: 0x%lx npages: 0x%lx\n" | ||
| 496 | " vm->max_gfn: 0x%lx vm->page_size: 0x%x", | ||
| 497 | guest_paddr, npages, vm->max_gfn, vm->page_size); | ||
| 498 | |||
| 499 | /* Confirm a mem region with an overlapping address doesn't | ||
| 500 | * already exist. | ||
| 501 | */ | ||
| 502 | region = (struct userspace_mem_region *) userspace_mem_region_find( | ||
| 503 | vm, guest_paddr, guest_paddr + npages * vm->page_size); | ||
| 504 | if (region != NULL) | ||
| 505 | TEST_ASSERT(false, "overlapping userspace_mem_region already " | ||
| 506 | "exists\n" | ||
| 507 | " requested guest_paddr: 0x%lx npages: 0x%lx " | ||
| 508 | "page_size: 0x%x\n" | ||
| 509 | " existing guest_paddr: 0x%lx size: 0x%lx", | ||
| 510 | guest_paddr, npages, vm->page_size, | ||
| 511 | (uint64_t) region->region.guest_phys_addr, | ||
| 512 | (uint64_t) region->region.memory_size); | ||
| 513 | |||
| 514 | /* Confirm no region with the requested slot already exists. */ | ||
| 515 | for (region = vm->userspace_mem_region_head; region; | ||
| 516 | region = region->next) { | ||
| 517 | if (region->region.slot == slot) | ||
| 518 | break; | ||
| 519 | if ((guest_paddr <= (region->region.guest_phys_addr | ||
| 520 | + region->region.memory_size)) | ||
| 521 | && ((guest_paddr + npages * vm->page_size) | ||
| 522 | >= region->region.guest_phys_addr)) | ||
| 523 | break; | ||
| 524 | } | ||
| 525 | if (region != NULL) | ||
| 526 | TEST_ASSERT(false, "A mem region with the requested slot " | ||
| 527 | "or overlapping physical memory range already exists.\n" | ||
| 528 | " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" | ||
| 529 | " existing slot: %u paddr: 0x%lx size: 0x%lx", | ||
| 530 | slot, guest_paddr, npages, | ||
| 531 | region->region.slot, | ||
| 532 | (uint64_t) region->region.guest_phys_addr, | ||
| 533 | (uint64_t) region->region.memory_size); | ||
| 534 | |||
| 535 | /* Allocate and initialize new mem region structure. */ | ||
| 536 | region = calloc(1, sizeof(*region)); | ||
| 537 | TEST_ASSERT(region != NULL, "Insufficient Memory"); | ||
| 538 | region->mmap_size = npages * vm->page_size; | ||
| 539 | |||
| 540 | /* Enough memory to align up to a huge page. */ | ||
| 541 | if (src_type == VM_MEM_SRC_ANONYMOUS_THP) | ||
| 542 | region->mmap_size += huge_page_size; | ||
| 543 | region->mmap_start = mmap(NULL, region->mmap_size, | ||
| 544 | PROT_READ | PROT_WRITE, | ||
| 545 | MAP_PRIVATE | MAP_ANONYMOUS | ||
| 546 | | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), | ||
| 547 | -1, 0); | ||
| 548 | TEST_ASSERT(region->mmap_start != MAP_FAILED, | ||
| 549 | "test_malloc failed, mmap_start: %p errno: %i", | ||
| 550 | region->mmap_start, errno); | ||
| 551 | |||
| 552 | /* Align THP allocation up to start of a huge page. */ | ||
| 553 | region->host_mem = align(region->mmap_start, | ||
| 554 | src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); | ||
| 555 | |||
| 556 | /* As needed perform madvise */ | ||
| 557 | if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { | ||
| 558 | ret = madvise(region->host_mem, npages * vm->page_size, | ||
| 559 | src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); | ||
| 560 | TEST_ASSERT(ret == 0, "madvise failed,\n" | ||
| 561 | " addr: %p\n" | ||
| 562 | " length: 0x%lx\n" | ||
| 563 | " src_type: %x", | ||
| 564 | region->host_mem, npages * vm->page_size, src_type); | ||
| 565 | } | ||
| 566 | |||
| 567 | region->unused_phy_pages = sparsebit_alloc(); | ||
| 568 | sparsebit_set_num(region->unused_phy_pages, | ||
| 569 | guest_paddr >> vm->page_shift, npages); | ||
| 570 | region->region.slot = slot; | ||
| 571 | region->region.flags = flags; | ||
| 572 | region->region.guest_phys_addr = guest_paddr; | ||
| 573 | region->region.memory_size = npages * vm->page_size; | ||
| 574 | region->region.userspace_addr = (uintptr_t) region->host_mem; | ||
| 575 | ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); | ||
| 576 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" | ||
| 577 | " rc: %i errno: %i\n" | ||
| 578 | " slot: %u flags: 0x%x\n" | ||
| 579 | " guest_phys_addr: 0x%lx size: 0x%lx", | ||
| 580 | ret, errno, slot, flags, | ||
| 581 | guest_paddr, (uint64_t) region->region.memory_size); | ||
| 582 | |||
| 583 | /* Add to linked-list of memory regions. */ | ||
| 584 | if (vm->userspace_mem_region_head) | ||
| 585 | vm->userspace_mem_region_head->prev = region; | ||
| 586 | region->next = vm->userspace_mem_region_head; | ||
| 587 | vm->userspace_mem_region_head = region; | ||
| 588 | } | ||
| 589 | |||
| 590 | /* Memslot to region | ||
| 591 | * | ||
| 592 | * Input Args: | ||
| 593 | * vm - Virtual Machine | ||
| 594 | * memslot - KVM memory slot ID | ||
| 595 | * | ||
| 596 | * Output Args: None | ||
| 597 | * | ||
| 598 | * Return: | ||
| 599 | * Pointer to memory region structure that describe memory region | ||
| 600 | * using kvm memory slot ID given by memslot. TEST_ASSERT failure | ||
| 601 | * on error (e.g. currently no memory region using memslot as a KVM | ||
| 602 | * memory slot ID). | ||
| 603 | */ | ||
| 604 | static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, | ||
| 605 | uint32_t memslot) | ||
| 606 | { | ||
| 607 | struct userspace_mem_region *region; | ||
| 608 | |||
| 609 | for (region = vm->userspace_mem_region_head; region; | ||
| 610 | region = region->next) { | ||
| 611 | if (region->region.slot == memslot) | ||
| 612 | break; | ||
| 613 | } | ||
| 614 | if (region == NULL) { | ||
| 615 | fprintf(stderr, "No mem region with the requested slot found,\n" | ||
| 616 | " requested slot: %u\n", memslot); | ||
| 617 | fputs("---- vm dump ----\n", stderr); | ||
| 618 | vm_dump(stderr, vm, 2); | ||
| 619 | TEST_ASSERT(false, "Mem region not found"); | ||
| 620 | } | ||
| 621 | |||
| 622 | return region; | ||
| 623 | } | ||
| 624 | |||
| 625 | /* VM Memory Region Flags Set | ||
| 626 | * | ||
| 627 | * Input Args: | ||
| 628 | * vm - Virtual Machine | ||
| 629 | * flags - Starting guest physical address | ||
| 630 | * | ||
| 631 | * Output Args: None | ||
| 632 | * | ||
| 633 | * Return: None | ||
| 634 | * | ||
| 635 | * Sets the flags of the memory region specified by the value of slot, | ||
| 636 | * to the values given by flags. | ||
| 637 | */ | ||
| 638 | void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) | ||
| 639 | { | ||
| 640 | int ret; | ||
| 641 | struct userspace_mem_region *region; | ||
| 642 | |||
| 643 | /* Locate memory region. */ | ||
| 644 | region = memslot2region(vm, slot); | ||
| 645 | |||
| 646 | region->region.flags = flags; | ||
| 647 | |||
| 648 | ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); | ||
| 649 | |||
| 650 | TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" | ||
| 651 | " rc: %i errno: %i slot: %u flags: 0x%x", | ||
| 652 | ret, errno, slot, flags); | ||
| 653 | } | ||
| 654 | |||
| 655 | /* VCPU mmap Size | ||
| 656 | * | ||
| 657 | * Input Args: None | ||
| 658 | * | ||
| 659 | * Output Args: None | ||
| 660 | * | ||
| 661 | * Return: | ||
| 662 | * Size of VCPU state | ||
| 663 | * | ||
| 664 | * Returns the size of the structure pointed to by the return value | ||
| 665 | * of vcpu_state(). | ||
| 666 | */ | ||
| 667 | static int vcpu_mmap_sz(void) | ||
| 668 | { | ||
| 669 | int dev_fd, ret; | ||
| 670 | |||
| 671 | dev_fd = open(KVM_DEV_PATH, O_RDONLY); | ||
| 672 | TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", | ||
| 673 | __func__, KVM_DEV_PATH, dev_fd, errno); | ||
| 674 | |||
| 675 | ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); | ||
| 676 | TEST_ASSERT(ret >= sizeof(struct kvm_run), | ||
| 677 | "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", | ||
| 678 | __func__, ret, errno); | ||
| 679 | |||
| 680 | close(dev_fd); | ||
| 681 | |||
| 682 | return ret; | ||
| 683 | } | ||
| 684 | |||
| 685 | /* VM VCPU Add | ||
| 686 | * | ||
| 687 | * Input Args: | ||
| 688 | * vm - Virtual Machine | ||
| 689 | * vcpuid - VCPU ID | ||
| 690 | * | ||
| 691 | * Output Args: None | ||
| 692 | * | ||
| 693 | * Return: None | ||
| 694 | * | ||
| 695 | * Creates and adds to the VM specified by vm and virtual CPU with | ||
| 696 | * the ID given by vcpuid. | ||
| 697 | */ | ||
| 698 | void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) | ||
| 699 | { | ||
| 700 | struct vcpu *vcpu; | ||
| 701 | |||
| 702 | /* Confirm a vcpu with the specified id doesn't already exist. */ | ||
| 703 | vcpu = vcpu_find(vm, vcpuid); | ||
| 704 | if (vcpu != NULL) | ||
| 705 | TEST_ASSERT(false, "vcpu with the specified id " | ||
| 706 | "already exists,\n" | ||
| 707 | " requested vcpuid: %u\n" | ||
| 708 | " existing vcpuid: %u state: %p", | ||
| 709 | vcpuid, vcpu->id, vcpu->state); | ||
| 710 | |||
| 711 | /* Allocate and initialize new vcpu structure. */ | ||
| 712 | vcpu = calloc(1, sizeof(*vcpu)); | ||
| 713 | TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); | ||
| 714 | vcpu->id = vcpuid; | ||
| 715 | vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); | ||
| 716 | TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", | ||
| 717 | vcpu->fd, errno); | ||
| 718 | |||
| 719 | TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " | ||
| 720 | "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", | ||
| 721 | vcpu_mmap_sz(), sizeof(*vcpu->state)); | ||
| 722 | vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), | ||
| 723 | PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); | ||
| 724 | TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " | ||
| 725 | "vcpu id: %u errno: %i", vcpuid, errno); | ||
| 726 | |||
| 727 | /* Add to linked-list of VCPUs. */ | ||
| 728 | if (vm->vcpu_head) | ||
| 729 | vm->vcpu_head->prev = vcpu; | ||
| 730 | vcpu->next = vm->vcpu_head; | ||
| 731 | vm->vcpu_head = vcpu; | ||
| 732 | |||
| 733 | vcpu_setup(vm, vcpuid); | ||
| 734 | } | ||
| 735 | |||
| 736 | /* VM Virtual Address Unused Gap | ||
| 737 | * | ||
| 738 | * Input Args: | ||
| 739 | * vm - Virtual Machine | ||
| 740 | * sz - Size (bytes) | ||
| 741 | * vaddr_min - Minimum Virtual Address | ||
| 742 | * | ||
| 743 | * Output Args: None | ||
| 744 | * | ||
| 745 | * Return: | ||
| 746 | * Lowest virtual address at or below vaddr_min, with at least | ||
| 747 | * sz unused bytes. TEST_ASSERT failure if no area of at least | ||
| 748 | * size sz is available. | ||
| 749 | * | ||
| 750 | * Within the VM specified by vm, locates the lowest starting virtual | ||
| 751 | * address >= vaddr_min, that has at least sz unallocated bytes. A | ||
| 752 | * TEST_ASSERT failure occurs for invalid input or no area of at least | ||
| 753 | * sz unallocated bytes >= vaddr_min is available. | ||
| 754 | */ | ||
| 755 | static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, | ||
| 756 | vm_vaddr_t vaddr_min) | ||
| 757 | { | ||
| 758 | uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; | ||
| 759 | |||
| 760 | /* Determine lowest permitted virtual page index. */ | ||
| 761 | uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; | ||
| 762 | if ((pgidx_start * vm->page_size) < vaddr_min) | ||
| 763 | goto no_va_found; | ||
| 764 | |||
| 765 | /* Loop over section with enough valid virtual page indexes. */ | ||
| 766 | if (!sparsebit_is_set_num(vm->vpages_valid, | ||
| 767 | pgidx_start, pages)) | ||
| 768 | pgidx_start = sparsebit_next_set_num(vm->vpages_valid, | ||
| 769 | pgidx_start, pages); | ||
| 770 | do { | ||
| 771 | /* | ||
| 772 | * Are there enough unused virtual pages available at | ||
| 773 | * the currently proposed starting virtual page index. | ||
| 774 | * If not, adjust proposed starting index to next | ||
| 775 | * possible. | ||
| 776 | */ | ||
| 777 | if (sparsebit_is_clear_num(vm->vpages_mapped, | ||
| 778 | pgidx_start, pages)) | ||
| 779 | goto va_found; | ||
| 780 | pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, | ||
| 781 | pgidx_start, pages); | ||
| 782 | if (pgidx_start == 0) | ||
| 783 | goto no_va_found; | ||
| 784 | |||
| 785 | /* | ||
| 786 | * If needed, adjust proposed starting virtual address, | ||
| 787 | * to next range of valid virtual addresses. | ||
| 788 | */ | ||
| 789 | if (!sparsebit_is_set_num(vm->vpages_valid, | ||
| 790 | pgidx_start, pages)) { | ||
| 791 | pgidx_start = sparsebit_next_set_num( | ||
| 792 | vm->vpages_valid, pgidx_start, pages); | ||
| 793 | if (pgidx_start == 0) | ||
| 794 | goto no_va_found; | ||
| 795 | } | ||
| 796 | } while (pgidx_start != 0); | ||
| 797 | |||
| 798 | no_va_found: | ||
| 799 | TEST_ASSERT(false, "No vaddr of specified pages available, " | ||
| 800 | "pages: 0x%lx", pages); | ||
| 801 | |||
| 802 | /* NOT REACHED */ | ||
| 803 | return -1; | ||
| 804 | |||
| 805 | va_found: | ||
| 806 | TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, | ||
| 807 | pgidx_start, pages), | ||
| 808 | "Unexpected, invalid virtual page index range,\n" | ||
| 809 | " pgidx_start: 0x%lx\n" | ||
| 810 | " pages: 0x%lx", | ||
| 811 | pgidx_start, pages); | ||
| 812 | TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, | ||
| 813 | pgidx_start, pages), | ||
| 814 | "Unexpected, pages already mapped,\n" | ||
| 815 | " pgidx_start: 0x%lx\n" | ||
| 816 | " pages: 0x%lx", | ||
| 817 | pgidx_start, pages); | ||
| 818 | |||
| 819 | return pgidx_start * vm->page_size; | ||
| 820 | } | ||
| 821 | |||
| 822 | /* VM Virtual Address Allocate | ||
| 823 | * | ||
| 824 | * Input Args: | ||
| 825 | * vm - Virtual Machine | ||
| 826 | * sz - Size in bytes | ||
| 827 | * vaddr_min - Minimum starting virtual address | ||
| 828 | * data_memslot - Memory region slot for data pages | ||
| 829 | * pgd_memslot - Memory region slot for new virtual translation tables | ||
| 830 | * | ||
| 831 | * Output Args: None | ||
| 832 | * | ||
| 833 | * Return: | ||
| 834 | * Starting guest virtual address | ||
| 835 | * | ||
| 836 | * Allocates at least sz bytes within the virtual address space of the vm | ||
| 837 | * given by vm. The allocated bytes are mapped to a virtual address >= | ||
| 838 | * the address given by vaddr_min. Note that each allocation uses a | ||
| 839 | * a unique set of pages, with the minimum real allocation being at least | ||
| 840 | * a page. | ||
| 841 | */ | ||
| 842 | vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, | ||
| 843 | uint32_t data_memslot, uint32_t pgd_memslot) | ||
| 844 | { | ||
| 845 | uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); | ||
| 846 | |||
| 847 | virt_pgd_alloc(vm, pgd_memslot); | ||
| 848 | |||
| 849 | /* Find an unused range of virtual page addresses of at least | ||
| 850 | * pages in length. | ||
| 851 | */ | ||
| 852 | vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); | ||
| 853 | |||
| 854 | /* Map the virtual pages. */ | ||
| 855 | for (vm_vaddr_t vaddr = vaddr_start; pages > 0; | ||
| 856 | pages--, vaddr += vm->page_size) { | ||
| 857 | vm_paddr_t paddr; | ||
| 858 | |||
| 859 | paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); | ||
| 860 | |||
| 861 | virt_pg_map(vm, vaddr, paddr, pgd_memslot); | ||
| 862 | |||
| 863 | sparsebit_set(vm->vpages_mapped, | ||
| 864 | vaddr >> vm->page_shift); | ||
| 865 | } | ||
| 866 | |||
| 867 | return vaddr_start; | ||
| 868 | } | ||
| 869 | |||
| 870 | /* Address VM Physical to Host Virtual | ||
| 871 | * | ||
| 872 | * Input Args: | ||
| 873 | * vm - Virtual Machine | ||
| 874 | * gpa - VM physical address | ||
| 875 | * | ||
| 876 | * Output Args: None | ||
| 877 | * | ||
| 878 | * Return: | ||
| 879 | * Equivalent host virtual address | ||
| 880 | * | ||
| 881 | * Locates the memory region containing the VM physical address given | ||
| 882 | * by gpa, within the VM given by vm. When found, the host virtual | ||
| 883 | * address providing the memory to the vm physical address is returned. | ||
| 884 | * A TEST_ASSERT failure occurs if no region containing gpa exists. | ||
| 885 | */ | ||
| 886 | void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) | ||
| 887 | { | ||
| 888 | struct userspace_mem_region *region; | ||
| 889 | for (region = vm->userspace_mem_region_head; region; | ||
| 890 | region = region->next) { | ||
| 891 | if ((gpa >= region->region.guest_phys_addr) | ||
| 892 | && (gpa <= (region->region.guest_phys_addr | ||
| 893 | + region->region.memory_size - 1))) | ||
| 894 | return (void *) ((uintptr_t) region->host_mem | ||
| 895 | + (gpa - region->region.guest_phys_addr)); | ||
| 896 | } | ||
| 897 | |||
| 898 | TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); | ||
| 899 | return NULL; | ||
| 900 | } | ||
| 901 | |||
| 902 | /* Address Host Virtual to VM Physical | ||
| 903 | * | ||
| 904 | * Input Args: | ||
| 905 | * vm - Virtual Machine | ||
| 906 | * hva - Host virtual address | ||
| 907 | * | ||
| 908 | * Output Args: None | ||
| 909 | * | ||
| 910 | * Return: | ||
| 911 | * Equivalent VM physical address | ||
| 912 | * | ||
| 913 | * Locates the memory region containing the host virtual address given | ||
| 914 | * by hva, within the VM given by vm. When found, the equivalent | ||
| 915 | * VM physical address is returned. A TEST_ASSERT failure occurs if no | ||
| 916 | * region containing hva exists. | ||
| 917 | */ | ||
| 918 | vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) | ||
| 919 | { | ||
| 920 | struct userspace_mem_region *region; | ||
| 921 | for (region = vm->userspace_mem_region_head; region; | ||
| 922 | region = region->next) { | ||
| 923 | if ((hva >= region->host_mem) | ||
| 924 | && (hva <= (region->host_mem | ||
| 925 | + region->region.memory_size - 1))) | ||
| 926 | return (vm_paddr_t) ((uintptr_t) | ||
| 927 | region->region.guest_phys_addr | ||
| 928 | + (hva - (uintptr_t) region->host_mem)); | ||
| 929 | } | ||
| 930 | |||
| 931 | TEST_ASSERT(false, "No mapping to a guest physical address, " | ||
| 932 | "hva: %p", hva); | ||
| 933 | return -1; | ||
| 934 | } | ||
| 935 | |||
| 936 | /* VM Create IRQ Chip | ||
| 937 | * | ||
| 938 | * Input Args: | ||
| 939 | * vm - Virtual Machine | ||
| 940 | * | ||
| 941 | * Output Args: None | ||
| 942 | * | ||
| 943 | * Return: None | ||
| 944 | * | ||
| 945 | * Creates an interrupt controller chip for the VM specified by vm. | ||
| 946 | */ | ||
| 947 | void vm_create_irqchip(struct kvm_vm *vm) | ||
| 948 | { | ||
| 949 | int ret; | ||
| 950 | |||
| 951 | ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); | ||
| 952 | TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " | ||
| 953 | "rc: %i errno: %i", ret, errno); | ||
| 954 | } | ||
| 955 | |||
| 956 | /* VM VCPU State | ||
| 957 | * | ||
| 958 | * Input Args: | ||
| 959 | * vm - Virtual Machine | ||
| 960 | * vcpuid - VCPU ID | ||
| 961 | * | ||
| 962 | * Output Args: None | ||
| 963 | * | ||
| 964 | * Return: | ||
| 965 | * Pointer to structure that describes the state of the VCPU. | ||
| 966 | * | ||
| 967 | * Locates and returns a pointer to a structure that describes the | ||
| 968 | * state of the VCPU with the given vcpuid. | ||
| 969 | */ | ||
| 970 | struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) | ||
| 971 | { | ||
| 972 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 973 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 974 | |||
| 975 | return vcpu->state; | ||
| 976 | } | ||
| 977 | |||
| 978 | /* VM VCPU Run | ||
| 979 | * | ||
| 980 | * Input Args: | ||
| 981 | * vm - Virtual Machine | ||
| 982 | * vcpuid - VCPU ID | ||
| 983 | * | ||
| 984 | * Output Args: None | ||
| 985 | * | ||
| 986 | * Return: None | ||
| 987 | * | ||
| 988 | * Switch to executing the code for the VCPU given by vcpuid, within the VM | ||
| 989 | * given by vm. | ||
| 990 | */ | ||
| 991 | void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) | ||
| 992 | { | ||
| 993 | int ret = _vcpu_run(vm, vcpuid); | ||
| 994 | TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " | ||
| 995 | "rc: %i errno: %i", ret, errno); | ||
| 996 | } | ||
| 997 | |||
| 998 | int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) | ||
| 999 | { | ||
| 1000 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1001 | int rc; | ||
| 1002 | |||
| 1003 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1004 | do { | ||
| 1005 | rc = ioctl(vcpu->fd, KVM_RUN, NULL); | ||
| 1006 | } while (rc == -1 && errno == EINTR); | ||
| 1007 | return rc; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | /* VM VCPU Set MP State | ||
| 1011 | * | ||
| 1012 | * Input Args: | ||
| 1013 | * vm - Virtual Machine | ||
| 1014 | * vcpuid - VCPU ID | ||
| 1015 | * mp_state - mp_state to be set | ||
| 1016 | * | ||
| 1017 | * Output Args: None | ||
| 1018 | * | ||
| 1019 | * Return: None | ||
| 1020 | * | ||
| 1021 | * Sets the MP state of the VCPU given by vcpuid, to the state given | ||
| 1022 | * by mp_state. | ||
| 1023 | */ | ||
| 1024 | void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 1025 | struct kvm_mp_state *mp_state) | ||
| 1026 | { | ||
| 1027 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1028 | int ret; | ||
| 1029 | |||
| 1030 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1031 | |||
| 1032 | ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); | ||
| 1033 | TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " | ||
| 1034 | "rc: %i errno: %i", ret, errno); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /* VM VCPU Regs Get | ||
| 1038 | * | ||
| 1039 | * Input Args: | ||
| 1040 | * vm - Virtual Machine | ||
| 1041 | * vcpuid - VCPU ID | ||
| 1042 | * | ||
| 1043 | * Output Args: | ||
| 1044 | * regs - current state of VCPU regs | ||
| 1045 | * | ||
| 1046 | * Return: None | ||
| 1047 | * | ||
| 1048 | * Obtains the current register state for the VCPU specified by vcpuid | ||
| 1049 | * and stores it at the location given by regs. | ||
| 1050 | */ | ||
| 1051 | void vcpu_regs_get(struct kvm_vm *vm, | ||
| 1052 | uint32_t vcpuid, struct kvm_regs *regs) | ||
| 1053 | { | ||
| 1054 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1055 | int ret; | ||
| 1056 | |||
| 1057 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1058 | |||
| 1059 | /* Get the regs. */ | ||
| 1060 | ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); | ||
| 1061 | TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", | ||
| 1062 | ret, errno); | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | /* VM VCPU Regs Set | ||
| 1066 | * | ||
| 1067 | * Input Args: | ||
| 1068 | * vm - Virtual Machine | ||
| 1069 | * vcpuid - VCPU ID | ||
| 1070 | * regs - Values to set VCPU regs to | ||
| 1071 | * | ||
| 1072 | * Output Args: None | ||
| 1073 | * | ||
| 1074 | * Return: None | ||
| 1075 | * | ||
| 1076 | * Sets the regs of the VCPU specified by vcpuid to the values | ||
| 1077 | * given by regs. | ||
| 1078 | */ | ||
| 1079 | void vcpu_regs_set(struct kvm_vm *vm, | ||
| 1080 | uint32_t vcpuid, struct kvm_regs *regs) | ||
| 1081 | { | ||
| 1082 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1083 | int ret; | ||
| 1084 | |||
| 1085 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1086 | |||
| 1087 | /* Set the regs. */ | ||
| 1088 | ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); | ||
| 1089 | TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", | ||
| 1090 | ret, errno); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 1094 | struct kvm_vcpu_events *events) | ||
| 1095 | { | ||
| 1096 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1097 | int ret; | ||
| 1098 | |||
| 1099 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1100 | |||
| 1101 | /* Get the regs. */ | ||
| 1102 | ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); | ||
| 1103 | TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", | ||
| 1104 | ret, errno); | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, | ||
| 1108 | struct kvm_vcpu_events *events) | ||
| 1109 | { | ||
| 1110 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1111 | int ret; | ||
| 1112 | |||
| 1113 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1114 | |||
| 1115 | /* Set the regs. */ | ||
| 1116 | ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); | ||
| 1117 | TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", | ||
| 1118 | ret, errno); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | /* VM VCPU Args Set | ||
| 1122 | * | ||
| 1123 | * Input Args: | ||
| 1124 | * vm - Virtual Machine | ||
| 1125 | * vcpuid - VCPU ID | ||
| 1126 | * num - number of arguments | ||
| 1127 | * ... - arguments, each of type uint64_t | ||
| 1128 | * | ||
| 1129 | * Output Args: None | ||
| 1130 | * | ||
| 1131 | * Return: None | ||
| 1132 | * | ||
| 1133 | * Sets the first num function input arguments to the values | ||
| 1134 | * given as variable args. Each of the variable args is expected to | ||
| 1135 | * be of type uint64_t. | ||
| 1136 | */ | ||
| 1137 | void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) | ||
| 1138 | { | ||
| 1139 | va_list ap; | ||
| 1140 | struct kvm_regs regs; | ||
| 1141 | |||
| 1142 | TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" | ||
| 1143 | " num: %u\n", | ||
| 1144 | num); | ||
| 1145 | |||
| 1146 | va_start(ap, num); | ||
| 1147 | vcpu_regs_get(vm, vcpuid, ®s); | ||
| 1148 | |||
| 1149 | if (num >= 1) | ||
| 1150 | regs.rdi = va_arg(ap, uint64_t); | ||
| 1151 | |||
| 1152 | if (num >= 2) | ||
| 1153 | regs.rsi = va_arg(ap, uint64_t); | ||
| 1154 | |||
| 1155 | if (num >= 3) | ||
| 1156 | regs.rdx = va_arg(ap, uint64_t); | ||
| 1157 | |||
| 1158 | if (num >= 4) | ||
| 1159 | regs.rcx = va_arg(ap, uint64_t); | ||
| 1160 | |||
| 1161 | if (num >= 5) | ||
| 1162 | regs.r8 = va_arg(ap, uint64_t); | ||
| 1163 | |||
| 1164 | if (num >= 6) | ||
| 1165 | regs.r9 = va_arg(ap, uint64_t); | ||
| 1166 | |||
| 1167 | vcpu_regs_set(vm, vcpuid, ®s); | ||
| 1168 | va_end(ap); | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | /* VM VCPU System Regs Get | ||
| 1172 | * | ||
| 1173 | * Input Args: | ||
| 1174 | * vm - Virtual Machine | ||
| 1175 | * vcpuid - VCPU ID | ||
| 1176 | * | ||
| 1177 | * Output Args: | ||
| 1178 | * sregs - current state of VCPU system regs | ||
| 1179 | * | ||
| 1180 | * Return: None | ||
| 1181 | * | ||
| 1182 | * Obtains the current system register state for the VCPU specified by | ||
| 1183 | * vcpuid and stores it at the location given by sregs. | ||
| 1184 | */ | ||
| 1185 | void vcpu_sregs_get(struct kvm_vm *vm, | ||
| 1186 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
| 1187 | { | ||
| 1188 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1189 | int ret; | ||
| 1190 | |||
| 1191 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1192 | |||
| 1193 | /* Get the regs. */ | ||
| 1194 | /* Get the regs. */ | ||
| 1195 | ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); | ||
| 1196 | TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", | ||
| 1197 | ret, errno); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | /* VM VCPU System Regs Set | ||
| 1201 | * | ||
| 1202 | * Input Args: | ||
| 1203 | * vm - Virtual Machine | ||
| 1204 | * vcpuid - VCPU ID | ||
| 1205 | * sregs - Values to set VCPU system regs to | ||
| 1206 | * | ||
| 1207 | * Output Args: None | ||
| 1208 | * | ||
| 1209 | * Return: None | ||
| 1210 | * | ||
| 1211 | * Sets the system regs of the VCPU specified by vcpuid to the values | ||
| 1212 | * given by sregs. | ||
| 1213 | */ | ||
| 1214 | void vcpu_sregs_set(struct kvm_vm *vm, | ||
| 1215 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
| 1216 | { | ||
| 1217 | int ret = _vcpu_sregs_set(vm, vcpuid, sregs); | ||
| 1218 | TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " | ||
| 1219 | "rc: %i errno: %i", ret, errno); | ||
| 1220 | } | ||
| 1221 | |||
| 1222 | int _vcpu_sregs_set(struct kvm_vm *vm, | ||
| 1223 | uint32_t vcpuid, struct kvm_sregs *sregs) | ||
| 1224 | { | ||
| 1225 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1226 | int ret; | ||
| 1227 | |||
| 1228 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1229 | |||
| 1230 | /* Get the regs. */ | ||
| 1231 | return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | /* VCPU Ioctl | ||
| 1235 | * | ||
| 1236 | * Input Args: | ||
| 1237 | * vm - Virtual Machine | ||
| 1238 | * vcpuid - VCPU ID | ||
| 1239 | * cmd - Ioctl number | ||
| 1240 | * arg - Argument to pass to the ioctl | ||
| 1241 | * | ||
| 1242 | * Return: None | ||
| 1243 | * | ||
| 1244 | * Issues an arbitrary ioctl on a VCPU fd. | ||
| 1245 | */ | ||
| 1246 | void vcpu_ioctl(struct kvm_vm *vm, | ||
| 1247 | uint32_t vcpuid, unsigned long cmd, void *arg) | ||
| 1248 | { | ||
| 1249 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 1250 | int ret; | ||
| 1251 | |||
| 1252 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 1253 | |||
| 1254 | ret = ioctl(vcpu->fd, cmd, arg); | ||
| 1255 | TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", | ||
| 1256 | cmd, ret, errno, strerror(errno)); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* VM Ioctl | ||
| 1260 | * | ||
| 1261 | * Input Args: | ||
| 1262 | * vm - Virtual Machine | ||
| 1263 | * cmd - Ioctl number | ||
| 1264 | * arg - Argument to pass to the ioctl | ||
| 1265 | * | ||
| 1266 | * Return: None | ||
| 1267 | * | ||
| 1268 | * Issues an arbitrary ioctl on a VM fd. | ||
| 1269 | */ | ||
| 1270 | void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) | ||
| 1271 | { | ||
| 1272 | int ret; | ||
| 1273 | |||
| 1274 | ret = ioctl(vm->fd, cmd, arg); | ||
| 1275 | TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", | ||
| 1276 | cmd, ret, errno, strerror(errno)); | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | /* VM Dump | ||
| 1280 | * | ||
| 1281 | * Input Args: | ||
| 1282 | * vm - Virtual Machine | ||
| 1283 | * indent - Left margin indent amount | ||
| 1284 | * | ||
| 1285 | * Output Args: | ||
| 1286 | * stream - Output FILE stream | ||
| 1287 | * | ||
| 1288 | * Return: None | ||
| 1289 | * | ||
| 1290 | * Dumps the current state of the VM given by vm, to the FILE stream | ||
| 1291 | * given by stream. | ||
| 1292 | */ | ||
| 1293 | void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) | ||
| 1294 | { | ||
| 1295 | struct userspace_mem_region *region; | ||
| 1296 | struct vcpu *vcpu; | ||
| 1297 | |||
| 1298 | fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); | ||
| 1299 | fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); | ||
| 1300 | fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); | ||
| 1301 | fprintf(stream, "%*sMem Regions:\n", indent, ""); | ||
| 1302 | for (region = vm->userspace_mem_region_head; region; | ||
| 1303 | region = region->next) { | ||
| 1304 | fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " | ||
| 1305 | "host_virt: %p\n", indent + 2, "", | ||
| 1306 | (uint64_t) region->region.guest_phys_addr, | ||
| 1307 | (uint64_t) region->region.memory_size, | ||
| 1308 | region->host_mem); | ||
| 1309 | fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); | ||
| 1310 | sparsebit_dump(stream, region->unused_phy_pages, 0); | ||
| 1311 | } | ||
| 1312 | fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); | ||
| 1313 | sparsebit_dump(stream, vm->vpages_mapped, indent + 2); | ||
| 1314 | fprintf(stream, "%*spgd_created: %u\n", indent, "", | ||
| 1315 | vm->pgd_created); | ||
| 1316 | if (vm->pgd_created) { | ||
| 1317 | fprintf(stream, "%*sVirtual Translation Tables:\n", | ||
| 1318 | indent + 2, ""); | ||
| 1319 | virt_dump(stream, vm, indent + 4); | ||
| 1320 | } | ||
| 1321 | fprintf(stream, "%*sVCPUs:\n", indent, ""); | ||
| 1322 | for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next) | ||
| 1323 | vcpu_dump(stream, vm, vcpu->id, indent + 2); | ||
| 1324 | } | ||
| 1325 | |||
| 1326 | /* VM VCPU Dump | ||
| 1327 | * | ||
| 1328 | * Input Args: | ||
| 1329 | * vm - Virtual Machine | ||
| 1330 | * vcpuid - VCPU ID | ||
| 1331 | * indent - Left margin indent amount | ||
| 1332 | * | ||
| 1333 | * Output Args: | ||
| 1334 | * stream - Output FILE stream | ||
| 1335 | * | ||
| 1336 | * Return: None | ||
| 1337 | * | ||
| 1338 | * Dumps the current state of the VCPU specified by vcpuid, within the VM | ||
| 1339 | * given by vm, to the FILE stream given by stream. | ||
| 1340 | */ | ||
| 1341 | void vcpu_dump(FILE *stream, struct kvm_vm *vm, | ||
| 1342 | uint32_t vcpuid, uint8_t indent) | ||
| 1343 | { | ||
| 1344 | struct kvm_regs regs; | ||
| 1345 | struct kvm_sregs sregs; | ||
| 1346 | |||
| 1347 | fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); | ||
| 1348 | |||
| 1349 | fprintf(stream, "%*sregs:\n", indent + 2, ""); | ||
| 1350 | vcpu_regs_get(vm, vcpuid, ®s); | ||
| 1351 | regs_dump(stream, ®s, indent + 4); | ||
| 1352 | |||
| 1353 | fprintf(stream, "%*ssregs:\n", indent + 2, ""); | ||
| 1354 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
| 1355 | sregs_dump(stream, &sregs, indent + 4); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | /* Known KVM exit reasons */ | ||
| 1359 | static struct exit_reason { | ||
| 1360 | unsigned int reason; | ||
| 1361 | const char *name; | ||
| 1362 | } exit_reasons_known[] = { | ||
| 1363 | {KVM_EXIT_UNKNOWN, "UNKNOWN"}, | ||
| 1364 | {KVM_EXIT_EXCEPTION, "EXCEPTION"}, | ||
| 1365 | {KVM_EXIT_IO, "IO"}, | ||
| 1366 | {KVM_EXIT_HYPERCALL, "HYPERCALL"}, | ||
| 1367 | {KVM_EXIT_DEBUG, "DEBUG"}, | ||
| 1368 | {KVM_EXIT_HLT, "HLT"}, | ||
| 1369 | {KVM_EXIT_MMIO, "MMIO"}, | ||
| 1370 | {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, | ||
| 1371 | {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, | ||
| 1372 | {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, | ||
| 1373 | {KVM_EXIT_INTR, "INTR"}, | ||
| 1374 | {KVM_EXIT_SET_TPR, "SET_TPR"}, | ||
| 1375 | {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, | ||
| 1376 | {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, | ||
| 1377 | {KVM_EXIT_S390_RESET, "S390_RESET"}, | ||
| 1378 | {KVM_EXIT_DCR, "DCR"}, | ||
| 1379 | {KVM_EXIT_NMI, "NMI"}, | ||
| 1380 | {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, | ||
| 1381 | {KVM_EXIT_OSI, "OSI"}, | ||
| 1382 | {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, | ||
| 1383 | #ifdef KVM_EXIT_MEMORY_NOT_PRESENT | ||
| 1384 | {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, | ||
| 1385 | #endif | ||
| 1386 | }; | ||
| 1387 | |||
| 1388 | /* Exit Reason String | ||
| 1389 | * | ||
| 1390 | * Input Args: | ||
| 1391 | * exit_reason - Exit reason | ||
| 1392 | * | ||
| 1393 | * Output Args: None | ||
| 1394 | * | ||
| 1395 | * Return: | ||
| 1396 | * Constant string pointer describing the exit reason. | ||
| 1397 | * | ||
| 1398 | * Locates and returns a constant string that describes the KVM exit | ||
| 1399 | * reason given by exit_reason. If no such string is found, a constant | ||
| 1400 | * string of "Unknown" is returned. | ||
| 1401 | */ | ||
| 1402 | const char *exit_reason_str(unsigned int exit_reason) | ||
| 1403 | { | ||
| 1404 | unsigned int n1; | ||
| 1405 | |||
| 1406 | for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { | ||
| 1407 | if (exit_reason == exit_reasons_known[n1].reason) | ||
| 1408 | return exit_reasons_known[n1].name; | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | return "Unknown"; | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | /* Physical Page Allocate | ||
| 1415 | * | ||
| 1416 | * Input Args: | ||
| 1417 | * vm - Virtual Machine | ||
| 1418 | * paddr_min - Physical address minimum | ||
| 1419 | * memslot - Memory region to allocate page from | ||
| 1420 | * | ||
| 1421 | * Output Args: None | ||
| 1422 | * | ||
| 1423 | * Return: | ||
| 1424 | * Starting physical address | ||
| 1425 | * | ||
| 1426 | * Within the VM specified by vm, locates an available physical page | ||
| 1427 | * at or above paddr_min. If found, the page is marked as in use | ||
| 1428 | * and its address is returned. A TEST_ASSERT failure occurs if no | ||
| 1429 | * page is available at or above paddr_min. | ||
| 1430 | */ | ||
| 1431 | vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, | ||
| 1432 | vm_paddr_t paddr_min, uint32_t memslot) | ||
| 1433 | { | ||
| 1434 | struct userspace_mem_region *region; | ||
| 1435 | sparsebit_idx_t pg; | ||
| 1436 | |||
| 1437 | TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " | ||
| 1438 | "not divisable by page size.\n" | ||
| 1439 | " paddr_min: 0x%lx page_size: 0x%x", | ||
| 1440 | paddr_min, vm->page_size); | ||
| 1441 | |||
| 1442 | /* Locate memory region. */ | ||
| 1443 | region = memslot2region(vm, memslot); | ||
| 1444 | |||
| 1445 | /* Locate next available physical page at or above paddr_min. */ | ||
| 1446 | pg = paddr_min >> vm->page_shift; | ||
| 1447 | |||
| 1448 | if (!sparsebit_is_set(region->unused_phy_pages, pg)) { | ||
| 1449 | pg = sparsebit_next_set(region->unused_phy_pages, pg); | ||
| 1450 | if (pg == 0) { | ||
| 1451 | fprintf(stderr, "No guest physical page available, " | ||
| 1452 | "paddr_min: 0x%lx page_size: 0x%x memslot: %u", | ||
| 1453 | paddr_min, vm->page_size, memslot); | ||
| 1454 | fputs("---- vm dump ----\n", stderr); | ||
| 1455 | vm_dump(stderr, vm, 2); | ||
| 1456 | abort(); | ||
| 1457 | } | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | /* Specify page as in use and return its address. */ | ||
| 1461 | sparsebit_clear(region->unused_phy_pages, pg); | ||
| 1462 | |||
| 1463 | return pg * vm->page_size; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | /* Address Guest Virtual to Host Virtual | ||
| 1467 | * | ||
| 1468 | * Input Args: | ||
| 1469 | * vm - Virtual Machine | ||
| 1470 | * gva - VM virtual address | ||
| 1471 | * | ||
| 1472 | * Output Args: None | ||
| 1473 | * | ||
| 1474 | * Return: | ||
| 1475 | * Equivalent host virtual address | ||
| 1476 | */ | ||
| 1477 | void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) | ||
| 1478 | { | ||
| 1479 | return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); | ||
| 1480 | } | ||
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h new file mode 100644 index 000000000000..a0bd1980c81c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/kvm_util.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #ifndef KVM_UTIL_INTERNAL_H | ||
| 10 | #define KVM_UTIL_INTERNAL_H 1 | ||
| 11 | |||
| 12 | #include "sparsebit.h" | ||
| 13 | |||
| 14 | #ifndef BITS_PER_BYTE | ||
| 15 | #define BITS_PER_BYTE 8 | ||
| 16 | #endif | ||
| 17 | |||
| 18 | #ifndef BITS_PER_LONG | ||
| 19 | #define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) | ||
| 20 | #endif | ||
| 21 | |||
| 22 | #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) | ||
| 23 | #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) | ||
| 24 | |||
| 25 | /* Concrete definition of struct kvm_vm. */ | ||
| 26 | struct userspace_mem_region { | ||
| 27 | struct userspace_mem_region *next, *prev; | ||
| 28 | struct kvm_userspace_memory_region region; | ||
| 29 | struct sparsebit *unused_phy_pages; | ||
| 30 | int fd; | ||
| 31 | off_t offset; | ||
| 32 | void *host_mem; | ||
| 33 | void *mmap_start; | ||
| 34 | size_t mmap_size; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct vcpu { | ||
| 38 | struct vcpu *next, *prev; | ||
| 39 | uint32_t id; | ||
| 40 | int fd; | ||
| 41 | struct kvm_run *state; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct kvm_vm { | ||
| 45 | int mode; | ||
| 46 | int fd; | ||
| 47 | unsigned int page_size; | ||
| 48 | unsigned int page_shift; | ||
| 49 | uint64_t max_gfn; | ||
| 50 | struct vcpu *vcpu_head; | ||
| 51 | struct userspace_mem_region *userspace_mem_region_head; | ||
| 52 | struct sparsebit *vpages_valid; | ||
| 53 | struct sparsebit *vpages_mapped; | ||
| 54 | bool pgd_created; | ||
| 55 | vm_paddr_t pgd; | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct vcpu *vcpu_find(struct kvm_vm *vm, | ||
| 59 | uint32_t vcpuid); | ||
| 60 | void vcpu_setup(struct kvm_vm *vm, int vcpuid); | ||
| 61 | void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); | ||
| 62 | void regs_dump(FILE *stream, struct kvm_regs *regs, | ||
| 63 | uint8_t indent); | ||
| 64 | void sregs_dump(FILE *stream, struct kvm_sregs *sregs, | ||
| 65 | uint8_t indent); | ||
| 66 | |||
| 67 | #endif | ||
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c new file mode 100644 index 000000000000..0c5cf3e0cb6f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/sparsebit.c | |||
| @@ -0,0 +1,2087 @@ | |||
| 1 | /* | ||
| 2 | * Sparse bit array | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) | ||
| 6 | * | ||
| 7 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 8 | * | ||
| 9 | * This library provides functions to support a memory efficient bit array, | ||
| 10 | * with an index size of 2^64. A sparsebit array is allocated through | ||
| 11 | * the use sparsebit_alloc() and free'd via sparsebit_free(), | ||
| 12 | * such as in the following: | ||
| 13 | * | ||
| 14 | * struct sparsebit *s; | ||
| 15 | * s = sparsebit_alloc(); | ||
| 16 | * sparsebit_free(&s); | ||
| 17 | * | ||
| 18 | * The struct sparsebit type resolves down to a struct sparsebit. | ||
| 19 | * Note that, sparsebit_free() takes a pointer to the sparsebit | ||
| 20 | * structure. This is so that sparsebit_free() is able to poison | ||
| 21 | * the pointer (e.g. set it to NULL) to the struct sparsebit before | ||
| 22 | * returning to the caller. | ||
| 23 | * | ||
| 24 | * Between the return of sparsebit_alloc() and the call of | ||
| 25 | * sparsebit_free(), there are multiple query and modifying operations | ||
| 26 | * that can be performed on the allocated sparsebit array. All of | ||
| 27 | * these operations take as a parameter the value returned from | ||
| 28 | * sparsebit_alloc() and most also take a bit index. Frequently | ||
| 29 | * used routines include: | ||
| 30 | * | ||
| 31 | * ---- Query Operations | ||
| 32 | * sparsebit_is_set(s, idx) | ||
| 33 | * sparsebit_is_clear(s, idx) | ||
| 34 | * sparsebit_any_set(s) | ||
| 35 | * sparsebit_first_set(s) | ||
| 36 | * sparsebit_next_set(s, prev_idx) | ||
| 37 | * | ||
| 38 | * ---- Modifying Operations | ||
| 39 | * sparsebit_set(s, idx) | ||
| 40 | * sparsebit_clear(s, idx) | ||
| 41 | * sparsebit_set_num(s, idx, num); | ||
| 42 | * sparsebit_clear_num(s, idx, num); | ||
| 43 | * | ||
| 44 | * A common operation, is to itterate over all the bits set in a test | ||
| 45 | * sparsebit array. This can be done via code with the following structure: | ||
| 46 | * | ||
| 47 | * sparsebit_idx_t idx; | ||
| 48 | * if (sparsebit_any_set(s)) { | ||
| 49 | * idx = sparsebit_first_set(s); | ||
| 50 | * do { | ||
| 51 | * ... | ||
| 52 | * idx = sparsebit_next_set(s, idx); | ||
| 53 | * } while (idx != 0); | ||
| 54 | * } | ||
| 55 | * | ||
| 56 | * The index of the first bit set needs to be obtained via | ||
| 57 | * sparsebit_first_set(), because sparsebit_next_set(), needs | ||
| 58 | * the index of the previously set. The sparsebit_idx_t type is | ||
| 59 | * unsigned, so there is no previous index before 0 that is available. | ||
| 60 | * Also, the call to sparsebit_first_set() is not made unless there | ||
| 61 | * is at least 1 bit in the array set. This is because sparsebit_first_set() | ||
| 62 | * aborts if sparsebit_first_set() is called with no bits set. | ||
| 63 | * It is the callers responsibility to assure that the | ||
| 64 | * sparsebit array has at least a single bit set before calling | ||
| 65 | * sparsebit_first_set(). | ||
| 66 | * | ||
| 67 | * ==== Implementation Overview ==== | ||
| 68 | * For the most part the internal implementation of sparsebit is | ||
| 69 | * opaque to the caller. One important implementation detail that the | ||
| 70 | * caller may need to be aware of is the spatial complexity of the | ||
| 71 | * implementation. This implementation of a sparsebit array is not | ||
| 72 | * only sparse, in that it uses memory proportional to the number of bits | ||
| 73 | * set. It is also efficient in memory usage when most of the bits are | ||
| 74 | * set. | ||
| 75 | * | ||
| 76 | * At a high-level the state of the bit settings are maintained through | ||
| 77 | * the use of a binary-search tree, where each node contains at least | ||
| 78 | * the following members: | ||
| 79 | * | ||
| 80 | * typedef uint64_t sparsebit_idx_t; | ||
| 81 | * typedef uint64_t sparsebit_num_t; | ||
| 82 | * | ||
| 83 | * sparsebit_idx_t idx; | ||
| 84 | * uint32_t mask; | ||
| 85 | * sparsebit_num_t num_after; | ||
| 86 | * | ||
| 87 | * The idx member contains the bit index of the first bit described by this | ||
| 88 | * node, while the mask member stores the setting of the first 32-bits. | ||
| 89 | * The setting of the bit at idx + n, where 0 <= n < 32, is located in the | ||
| 90 | * mask member at 1 << n. | ||
| 91 | * | ||
| 92 | * Nodes are sorted by idx and the bits described by two nodes will never | ||
| 93 | * overlap. The idx member is always aligned to the mask size, i.e. a | ||
| 94 | * multiple of 32. | ||
| 95 | * | ||
| 96 | * Beyond a typical implementation, the nodes in this implementation also | ||
| 97 | * contains a member named num_after. The num_after member holds the | ||
| 98 | * number of bits immediately after the mask bits that are contiguously set. | ||
| 99 | * The use of the num_after member allows this implementation to efficiently | ||
| 100 | * represent cases where most bits are set. For example, the case of all | ||
| 101 | * but the last two bits set, is represented by the following two nodes: | ||
| 102 | * | ||
| 103 | * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 | ||
| 104 | * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 | ||
| 105 | * | ||
| 106 | * ==== Invariants ==== | ||
| 107 | * This implementation usses the following invariants: | ||
| 108 | * | ||
| 109 | * + Node are only used to represent bits that are set. | ||
| 110 | * Nodes with a mask of 0 and num_after of 0 are not allowed. | ||
| 111 | * | ||
| 112 | * + Sum of bits set in all the nodes is equal to the value of | ||
| 113 | * the struct sparsebit_pvt num_set member. | ||
| 114 | * | ||
| 115 | * + The setting of at least one bit is always described in a nodes | ||
| 116 | * mask (mask >= 1). | ||
| 117 | * | ||
| 118 | * + A node with all mask bits set only occurs when the last bit | ||
| 119 | * described by the previous node is not equal to this nodes | ||
| 120 | * starting index - 1. All such occurences of this condition are | ||
| 121 | * avoided by moving the setting of the nodes mask bits into | ||
| 122 | * the previous nodes num_after setting. | ||
| 123 | * | ||
| 124 | * + Node starting index is evenly divisable by the number of bits | ||
| 125 | * within a nodes mask member. | ||
| 126 | * | ||
| 127 | * + Nodes never represent a range of bits that wrap around the | ||
| 128 | * highest supported index. | ||
| 129 | * | ||
| 130 | * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) | ||
| 131 | * | ||
| 132 | * As a consequence of the above, the num_after member of a node | ||
| 133 | * will always be <=: | ||
| 134 | * | ||
| 135 | * maximum_index - nodes_starting_index - number_of_mask_bits | ||
| 136 | * | ||
| 137 | * + Nodes within the binary search tree are sorted based on each | ||
| 138 | * nodes starting index. | ||
| 139 | * | ||
| 140 | * + The range of bits described by any two nodes do not overlap. The | ||
| 141 | * range of bits described by a single node is: | ||
| 142 | * | ||
| 143 | * start: node->idx | ||
| 144 | * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; | ||
| 145 | * | ||
| 146 | * Note, at times these invariants are temporarily violated for a | ||
| 147 | * specific portion of the code. For example, when setting a mask | ||
| 148 | * bit, there is a small delay between when the mask bit is set and the | ||
| 149 | * value in the struct sparsebit_pvt num_set member is updated. Other | ||
| 150 | * temporary violations occur when node_split() is called with a specified | ||
| 151 | * index and assures that a node where its mask represents the bit | ||
| 152 | * at the specified index exists. At times to do this node_split() | ||
| 153 | * must split an existing node into two nodes or create a node that | ||
| 154 | * has no bits set. Such temporary violations must be corrected before | ||
| 155 | * returning to the caller. These corrections are typically performed | ||
| 156 | * by the local function node_reduce(). | ||
| 157 | */ | ||
| 158 | |||
| 159 | #include "test_util.h" | ||
| 160 | #include "sparsebit.h" | ||
| 161 | #include <limits.h> | ||
| 162 | #include <assert.h> | ||
| 163 | |||
| 164 | #define DUMP_LINE_MAX 100 /* Does not include indent amount */ | ||
| 165 | |||
| 166 | typedef uint32_t mask_t; | ||
| 167 | #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) | ||
| 168 | |||
| 169 | struct node { | ||
| 170 | struct node *parent; | ||
| 171 | struct node *left; | ||
| 172 | struct node *right; | ||
| 173 | sparsebit_idx_t idx; /* index of least-significant bit in mask */ | ||
| 174 | sparsebit_num_t num_after; /* num contiguously set after mask */ | ||
| 175 | mask_t mask; | ||
| 176 | }; | ||
| 177 | |||
| 178 | struct sparsebit { | ||
| 179 | /* | ||
| 180 | * Points to root node of the binary search | ||
| 181 | * tree. Equal to NULL when no bits are set in | ||
| 182 | * the entire sparsebit array. | ||
| 183 | */ | ||
| 184 | struct node *root; | ||
| 185 | |||
| 186 | /* | ||
| 187 | * A redundant count of the total number of bits set. Used for | ||
| 188 | * diagnostic purposes and to change the time complexity of | ||
| 189 | * sparsebit_num_set() from O(n) to O(1). | ||
| 190 | * Note: Due to overflow, a value of 0 means none or all set. | ||
| 191 | */ | ||
| 192 | sparsebit_num_t num_set; | ||
| 193 | }; | ||
| 194 | |||
| 195 | /* Returns the number of set bits described by the settings | ||
| 196 | * of the node pointed to by nodep. | ||
| 197 | */ | ||
| 198 | static sparsebit_num_t node_num_set(struct node *nodep) | ||
| 199 | { | ||
| 200 | return nodep->num_after + __builtin_popcount(nodep->mask); | ||
| 201 | } | ||
| 202 | |||
| 203 | /* Returns a pointer to the node that describes the | ||
| 204 | * lowest bit index. | ||
| 205 | */ | ||
| 206 | static struct node *node_first(struct sparsebit *s) | ||
| 207 | { | ||
| 208 | struct node *nodep; | ||
| 209 | |||
| 210 | for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) | ||
| 211 | ; | ||
| 212 | |||
| 213 | return nodep; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* Returns a pointer to the node that describes the | ||
| 217 | * lowest bit index > the index of the node pointed to by np. | ||
| 218 | * Returns NULL if no node with a higher index exists. | ||
| 219 | */ | ||
| 220 | static struct node *node_next(struct sparsebit *s, struct node *np) | ||
| 221 | { | ||
| 222 | struct node *nodep = np; | ||
| 223 | |||
| 224 | /* | ||
| 225 | * If current node has a right child, next node is the left-most | ||
| 226 | * of the right child. | ||
| 227 | */ | ||
| 228 | if (nodep->right) { | ||
| 229 | for (nodep = nodep->right; nodep->left; nodep = nodep->left) | ||
| 230 | ; | ||
| 231 | return nodep; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* | ||
| 235 | * No right child. Go up until node is left child of a parent. | ||
| 236 | * That parent is then the next node. | ||
| 237 | */ | ||
| 238 | while (nodep->parent && nodep == nodep->parent->right) | ||
| 239 | nodep = nodep->parent; | ||
| 240 | |||
| 241 | return nodep->parent; | ||
| 242 | } | ||
| 243 | |||
| 244 | /* Searches for and returns a pointer to the node that describes the | ||
| 245 | * highest index < the index of the node pointed to by np. | ||
| 246 | * Returns NULL if no node with a lower index exists. | ||
| 247 | */ | ||
| 248 | static struct node *node_prev(struct sparsebit *s, struct node *np) | ||
| 249 | { | ||
| 250 | struct node *nodep = np; | ||
| 251 | |||
| 252 | /* | ||
| 253 | * If current node has a left child, next node is the right-most | ||
| 254 | * of the left child. | ||
| 255 | */ | ||
| 256 | if (nodep->left) { | ||
| 257 | for (nodep = nodep->left; nodep->right; nodep = nodep->right) | ||
| 258 | ; | ||
| 259 | return (struct node *) nodep; | ||
| 260 | } | ||
| 261 | |||
| 262 | /* | ||
| 263 | * No left child. Go up until node is right child of a parent. | ||
| 264 | * That parent is then the next node. | ||
| 265 | */ | ||
| 266 | while (nodep->parent && nodep == nodep->parent->left) | ||
| 267 | nodep = nodep->parent; | ||
| 268 | |||
| 269 | return (struct node *) nodep->parent; | ||
| 270 | } | ||
| 271 | |||
| 272 | |||
| 273 | /* Allocates space to hold a copy of the node sub-tree pointed to by | ||
| 274 | * subtree and duplicates the bit settings to the newly allocated nodes. | ||
| 275 | * Returns the newly allocated copy of subtree. | ||
| 276 | */ | ||
| 277 | static struct node *node_copy_subtree(struct node *subtree) | ||
| 278 | { | ||
| 279 | struct node *root; | ||
| 280 | |||
| 281 | /* Duplicate the node at the root of the subtree */ | ||
| 282 | root = calloc(1, sizeof(*root)); | ||
| 283 | if (!root) { | ||
| 284 | perror("calloc"); | ||
| 285 | abort(); | ||
| 286 | } | ||
| 287 | |||
| 288 | root->idx = subtree->idx; | ||
| 289 | root->mask = subtree->mask; | ||
| 290 | root->num_after = subtree->num_after; | ||
| 291 | |||
| 292 | /* As needed, recursively duplicate the left and right subtrees */ | ||
| 293 | if (subtree->left) { | ||
| 294 | root->left = node_copy_subtree(subtree->left); | ||
| 295 | root->left->parent = root; | ||
| 296 | } | ||
| 297 | |||
| 298 | if (subtree->right) { | ||
| 299 | root->right = node_copy_subtree(subtree->right); | ||
| 300 | root->right->parent = root; | ||
| 301 | } | ||
| 302 | |||
| 303 | return root; | ||
| 304 | } | ||
| 305 | |||
| 306 | /* Searches for and returns a pointer to the node that describes the setting | ||
| 307 | * of the bit given by idx. A node describes the setting of a bit if its | ||
| 308 | * index is within the bits described by the mask bits or the number of | ||
| 309 | * contiguous bits set after the mask. Returns NULL if there is no such node. | ||
| 310 | */ | ||
| 311 | static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 312 | { | ||
| 313 | struct node *nodep; | ||
| 314 | |||
| 315 | /* Find the node that describes the setting of the bit at idx */ | ||
| 316 | for (nodep = s->root; nodep; | ||
| 317 | nodep = nodep->idx > idx ? nodep->left : nodep->right) { | ||
| 318 | if (idx >= nodep->idx && | ||
| 319 | idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
| 320 | break; | ||
| 321 | } | ||
| 322 | |||
| 323 | return nodep; | ||
| 324 | } | ||
| 325 | |||
| 326 | /* Entry Requirements: | ||
| 327 | * + A node that describes the setting of idx is not already present. | ||
| 328 | * | ||
| 329 | * Adds a new node to describe the setting of the bit at the index given | ||
| 330 | * by idx. Returns a pointer to the newly added node. | ||
| 331 | * | ||
| 332 | * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. | ||
| 333 | */ | ||
| 334 | static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 335 | { | ||
| 336 | struct node *nodep, *parentp, *prev; | ||
| 337 | |||
| 338 | /* Allocate and initialize the new node. */ | ||
| 339 | nodep = calloc(1, sizeof(*nodep)); | ||
| 340 | if (!nodep) { | ||
| 341 | perror("calloc"); | ||
| 342 | abort(); | ||
| 343 | } | ||
| 344 | |||
| 345 | nodep->idx = idx & -MASK_BITS; | ||
| 346 | |||
| 347 | /* If no nodes, set it up as the root node. */ | ||
| 348 | if (!s->root) { | ||
| 349 | s->root = nodep; | ||
| 350 | return nodep; | ||
| 351 | } | ||
| 352 | |||
| 353 | /* | ||
| 354 | * Find the parent where the new node should be attached | ||
| 355 | * and add the node there. | ||
| 356 | */ | ||
| 357 | parentp = s->root; | ||
| 358 | while (true) { | ||
| 359 | if (idx < parentp->idx) { | ||
| 360 | if (!parentp->left) { | ||
| 361 | parentp->left = nodep; | ||
| 362 | nodep->parent = parentp; | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | parentp = parentp->left; | ||
| 366 | } else { | ||
| 367 | assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); | ||
| 368 | if (!parentp->right) { | ||
| 369 | parentp->right = nodep; | ||
| 370 | nodep->parent = parentp; | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | parentp = parentp->right; | ||
| 374 | } | ||
| 375 | } | ||
| 376 | |||
| 377 | /* | ||
| 378 | * Does num_after bits of previous node overlap with the mask | ||
| 379 | * of the new node? If so set the bits in the new nodes mask | ||
| 380 | * and reduce the previous nodes num_after. | ||
| 381 | */ | ||
| 382 | prev = node_prev(s, nodep); | ||
| 383 | while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { | ||
| 384 | unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) | ||
| 385 | - nodep->idx; | ||
| 386 | assert(prev->num_after > 0); | ||
| 387 | assert(n1 < MASK_BITS); | ||
| 388 | assert(!(nodep->mask & (1 << n1))); | ||
| 389 | nodep->mask |= (1 << n1); | ||
| 390 | prev->num_after--; | ||
| 391 | } | ||
| 392 | |||
| 393 | return nodep; | ||
| 394 | } | ||
| 395 | |||
| 396 | /* Returns whether all the bits in the sparsebit array are set. */ | ||
| 397 | bool sparsebit_all_set(struct sparsebit *s) | ||
| 398 | { | ||
| 399 | /* | ||
| 400 | * If any nodes there must be at least one bit set. Only case | ||
| 401 | * where a bit is set and total num set is 0, is when all bits | ||
| 402 | * are set. | ||
| 403 | */ | ||
| 404 | return s->root && s->num_set == 0; | ||
| 405 | } | ||
| 406 | |||
| 407 | /* Clears all bits described by the node pointed to by nodep, then | ||
| 408 | * removes the node. | ||
| 409 | */ | ||
| 410 | static void node_rm(struct sparsebit *s, struct node *nodep) | ||
| 411 | { | ||
| 412 | struct node *tmp; | ||
| 413 | sparsebit_num_t num_set; | ||
| 414 | |||
| 415 | num_set = node_num_set(nodep); | ||
| 416 | assert(s->num_set >= num_set || sparsebit_all_set(s)); | ||
| 417 | s->num_set -= node_num_set(nodep); | ||
| 418 | |||
| 419 | /* Have both left and right child */ | ||
| 420 | if (nodep->left && nodep->right) { | ||
| 421 | /* | ||
| 422 | * Move left children to the leftmost leaf node | ||
| 423 | * of the right child. | ||
| 424 | */ | ||
| 425 | for (tmp = nodep->right; tmp->left; tmp = tmp->left) | ||
| 426 | ; | ||
| 427 | tmp->left = nodep->left; | ||
| 428 | nodep->left = NULL; | ||
| 429 | tmp->left->parent = tmp; | ||
| 430 | } | ||
| 431 | |||
| 432 | /* Left only child */ | ||
| 433 | if (nodep->left) { | ||
| 434 | if (!nodep->parent) { | ||
| 435 | s->root = nodep->left; | ||
| 436 | nodep->left->parent = NULL; | ||
| 437 | } else { | ||
| 438 | nodep->left->parent = nodep->parent; | ||
| 439 | if (nodep == nodep->parent->left) | ||
| 440 | nodep->parent->left = nodep->left; | ||
| 441 | else { | ||
| 442 | assert(nodep == nodep->parent->right); | ||
| 443 | nodep->parent->right = nodep->left; | ||
| 444 | } | ||
| 445 | } | ||
| 446 | |||
| 447 | nodep->parent = nodep->left = nodep->right = NULL; | ||
| 448 | free(nodep); | ||
| 449 | |||
| 450 | return; | ||
| 451 | } | ||
| 452 | |||
| 453 | |||
| 454 | /* Right only child */ | ||
| 455 | if (nodep->right) { | ||
| 456 | if (!nodep->parent) { | ||
| 457 | s->root = nodep->right; | ||
| 458 | nodep->right->parent = NULL; | ||
| 459 | } else { | ||
| 460 | nodep->right->parent = nodep->parent; | ||
| 461 | if (nodep == nodep->parent->left) | ||
| 462 | nodep->parent->left = nodep->right; | ||
| 463 | else { | ||
| 464 | assert(nodep == nodep->parent->right); | ||
| 465 | nodep->parent->right = nodep->right; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | |||
| 469 | nodep->parent = nodep->left = nodep->right = NULL; | ||
| 470 | free(nodep); | ||
| 471 | |||
| 472 | return; | ||
| 473 | } | ||
| 474 | |||
| 475 | /* Leaf Node */ | ||
| 476 | if (!nodep->parent) { | ||
| 477 | s->root = NULL; | ||
| 478 | } else { | ||
| 479 | if (nodep->parent->left == nodep) | ||
| 480 | nodep->parent->left = NULL; | ||
| 481 | else { | ||
| 482 | assert(nodep == nodep->parent->right); | ||
| 483 | nodep->parent->right = NULL; | ||
| 484 | } | ||
| 485 | } | ||
| 486 | |||
| 487 | nodep->parent = nodep->left = nodep->right = NULL; | ||
| 488 | free(nodep); | ||
| 489 | |||
| 490 | return; | ||
| 491 | } | ||
| 492 | |||
| 493 | /* Splits the node containing the bit at idx so that there is a node | ||
| 494 | * that starts at the specified index. If no such node exists, a new | ||
| 495 | * node at the specified index is created. Returns the new node. | ||
| 496 | * | ||
| 497 | * idx must start of a mask boundary. | ||
| 498 | */ | ||
| 499 | static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 500 | { | ||
| 501 | struct node *nodep1, *nodep2; | ||
| 502 | sparsebit_idx_t offset; | ||
| 503 | sparsebit_num_t orig_num_after; | ||
| 504 | |||
| 505 | assert(!(idx % MASK_BITS)); | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Is there a node that describes the setting of idx? | ||
| 509 | * If not, add it. | ||
| 510 | */ | ||
| 511 | nodep1 = node_find(s, idx); | ||
| 512 | if (!nodep1) | ||
| 513 | return node_add(s, idx); | ||
| 514 | |||
| 515 | /* | ||
| 516 | * All done if the starting index of the node is where the | ||
| 517 | * split should occur. | ||
| 518 | */ | ||
| 519 | if (nodep1->idx == idx) | ||
| 520 | return nodep1; | ||
| 521 | |||
| 522 | /* | ||
| 523 | * Split point not at start of mask, so it must be part of | ||
| 524 | * bits described by num_after. | ||
| 525 | */ | ||
| 526 | |||
| 527 | /* | ||
| 528 | * Calculate offset within num_after for where the split is | ||
| 529 | * to occur. | ||
| 530 | */ | ||
| 531 | offset = idx - (nodep1->idx + MASK_BITS); | ||
| 532 | orig_num_after = nodep1->num_after; | ||
| 533 | |||
| 534 | /* | ||
| 535 | * Add a new node to describe the bits starting at | ||
| 536 | * the split point. | ||
| 537 | */ | ||
| 538 | nodep1->num_after = offset; | ||
| 539 | nodep2 = node_add(s, idx); | ||
| 540 | |||
| 541 | /* Move bits after the split point into the new node */ | ||
| 542 | nodep2->num_after = orig_num_after - offset; | ||
| 543 | if (nodep2->num_after >= MASK_BITS) { | ||
| 544 | nodep2->mask = ~(mask_t) 0; | ||
| 545 | nodep2->num_after -= MASK_BITS; | ||
| 546 | } else { | ||
| 547 | nodep2->mask = (1 << nodep2->num_after) - 1; | ||
| 548 | nodep2->num_after = 0; | ||
| 549 | } | ||
| 550 | |||
| 551 | return nodep2; | ||
| 552 | } | ||
| 553 | |||
| 554 | /* Iteratively reduces the node pointed to by nodep and its adjacent | ||
| 555 | * nodes into a more compact form. For example, a node with a mask with | ||
| 556 | * all bits set adjacent to a previous node, will get combined into a | ||
| 557 | * single node with an increased num_after setting. | ||
| 558 | * | ||
| 559 | * After each reduction, a further check is made to see if additional | ||
| 560 | * reductions are possible with the new previous and next nodes. Note, | ||
| 561 | * a search for a reduction is only done across the nodes nearest nodep | ||
| 562 | * and those that became part of a reduction. Reductions beyond nodep | ||
| 563 | * and the adjacent nodes that are reduced are not discovered. It is the | ||
| 564 | * responsibility of the caller to pass a nodep that is within one node | ||
| 565 | * of each possible reduction. | ||
| 566 | * | ||
| 567 | * This function does not fix the temporary violation of all invariants. | ||
| 568 | * For example it does not fix the case where the bit settings described | ||
| 569 | * by two or more nodes overlap. Such a violation introduces the potential | ||
| 570 | * complication of a bit setting for a specific index having different settings | ||
| 571 | * in different nodes. This would then introduce the further complication | ||
| 572 | * of which node has the correct setting of the bit and thus such conditions | ||
| 573 | * are not allowed. | ||
| 574 | * | ||
| 575 | * This function is designed to fix invariant violations that are introduced | ||
| 576 | * by node_split() and by changes to the nodes mask or num_after members. | ||
| 577 | * For example, when setting a bit within a nodes mask, the function that | ||
| 578 | * sets the bit doesn't have to worry about whether the setting of that | ||
| 579 | * bit caused the mask to have leading only or trailing only bits set. | ||
| 580 | * Instead, the function can call node_reduce(), with nodep equal to the | ||
| 581 | * node address that it set a mask bit in, and node_reduce() will notice | ||
| 582 | * the cases of leading or trailing only bits and that there is an | ||
| 583 | * adjacent node that the bit settings could be merged into. | ||
| 584 | * | ||
| 585 | * This implementation specifically detects and corrects violation of the | ||
| 586 | * following invariants: | ||
| 587 | * | ||
| 588 | * + Node are only used to represent bits that are set. | ||
| 589 | * Nodes with a mask of 0 and num_after of 0 are not allowed. | ||
| 590 | * | ||
| 591 | * + The setting of at least one bit is always described in a nodes | ||
| 592 | * mask (mask >= 1). | ||
| 593 | * | ||
| 594 | * + A node with all mask bits set only occurs when the last bit | ||
| 595 | * described by the previous node is not equal to this nodes | ||
| 596 | * starting index - 1. All such occurences of this condition are | ||
| 597 | * avoided by moving the setting of the nodes mask bits into | ||
| 598 | * the previous nodes num_after setting. | ||
| 599 | */ | ||
| 600 | static void node_reduce(struct sparsebit *s, struct node *nodep) | ||
| 601 | { | ||
| 602 | bool reduction_performed; | ||
| 603 | |||
| 604 | do { | ||
| 605 | reduction_performed = false; | ||
| 606 | struct node *prev, *next, *tmp; | ||
| 607 | |||
| 608 | /* 1) Potential reductions within the current node. */ | ||
| 609 | |||
| 610 | /* Nodes with all bits cleared may be removed. */ | ||
| 611 | if (nodep->mask == 0 && nodep->num_after == 0) { | ||
| 612 | /* | ||
| 613 | * About to remove the node pointed to by | ||
| 614 | * nodep, which normally would cause a problem | ||
| 615 | * for the next pass through the reduction loop, | ||
| 616 | * because the node at the starting point no longer | ||
| 617 | * exists. This potential problem is handled | ||
| 618 | * by first remembering the location of the next | ||
| 619 | * or previous nodes. Doesn't matter which, because | ||
| 620 | * once the node at nodep is removed, there will be | ||
| 621 | * no other nodes between prev and next. | ||
| 622 | * | ||
| 623 | * Note, the checks performed on nodep against both | ||
| 624 | * both prev and next both check for an adjacent | ||
| 625 | * node that can be reduced into a single node. As | ||
| 626 | * such, after removing the node at nodep, doesn't | ||
| 627 | * matter whether the nodep for the next pass | ||
| 628 | * through the loop is equal to the previous pass | ||
| 629 | * prev or next node. Either way, on the next pass | ||
| 630 | * the one not selected will become either the | ||
| 631 | * prev or next node. | ||
| 632 | */ | ||
| 633 | tmp = node_next(s, nodep); | ||
| 634 | if (!tmp) | ||
| 635 | tmp = node_prev(s, nodep); | ||
| 636 | |||
| 637 | node_rm(s, nodep); | ||
| 638 | nodep = NULL; | ||
| 639 | |||
| 640 | nodep = tmp; | ||
| 641 | reduction_performed = true; | ||
| 642 | continue; | ||
| 643 | } | ||
| 644 | |||
| 645 | /* | ||
| 646 | * When the mask is 0, can reduce the amount of num_after | ||
| 647 | * bits by moving the initial num_after bits into the mask. | ||
| 648 | */ | ||
| 649 | if (nodep->mask == 0) { | ||
| 650 | assert(nodep->num_after != 0); | ||
| 651 | assert(nodep->idx + MASK_BITS > nodep->idx); | ||
| 652 | |||
| 653 | nodep->idx += MASK_BITS; | ||
| 654 | |||
| 655 | if (nodep->num_after >= MASK_BITS) { | ||
| 656 | nodep->mask = ~0; | ||
| 657 | nodep->num_after -= MASK_BITS; | ||
| 658 | } else { | ||
| 659 | nodep->mask = (1u << nodep->num_after) - 1; | ||
| 660 | nodep->num_after = 0; | ||
| 661 | } | ||
| 662 | |||
| 663 | reduction_performed = true; | ||
| 664 | continue; | ||
| 665 | } | ||
| 666 | |||
| 667 | /* | ||
| 668 | * 2) Potential reductions between the current and | ||
| 669 | * previous nodes. | ||
| 670 | */ | ||
| 671 | prev = node_prev(s, nodep); | ||
| 672 | if (prev) { | ||
| 673 | sparsebit_idx_t prev_highest_bit; | ||
| 674 | |||
| 675 | /* Nodes with no bits set can be removed. */ | ||
| 676 | if (prev->mask == 0 && prev->num_after == 0) { | ||
| 677 | node_rm(s, prev); | ||
| 678 | |||
| 679 | reduction_performed = true; | ||
| 680 | continue; | ||
| 681 | } | ||
| 682 | |||
| 683 | /* | ||
| 684 | * All mask bits set and previous node has | ||
| 685 | * adjacent index. | ||
| 686 | */ | ||
| 687 | if (nodep->mask + 1 == 0 && | ||
| 688 | prev->idx + MASK_BITS == nodep->idx) { | ||
| 689 | prev->num_after += MASK_BITS + nodep->num_after; | ||
| 690 | nodep->mask = 0; | ||
| 691 | nodep->num_after = 0; | ||
| 692 | |||
| 693 | reduction_performed = true; | ||
| 694 | continue; | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Is node adjacent to previous node and the node | ||
| 699 | * contains a single contiguous range of bits | ||
| 700 | * starting from the beginning of the mask? | ||
| 701 | */ | ||
| 702 | prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; | ||
| 703 | if (prev_highest_bit + 1 == nodep->idx && | ||
| 704 | (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { | ||
| 705 | /* | ||
| 706 | * How many contiguous bits are there? | ||
| 707 | * Is equal to the total number of set | ||
| 708 | * bits, due to an earlier check that | ||
| 709 | * there is a single contiguous range of | ||
| 710 | * set bits. | ||
| 711 | */ | ||
| 712 | unsigned int num_contiguous | ||
| 713 | = __builtin_popcount(nodep->mask); | ||
| 714 | assert((num_contiguous > 0) && | ||
| 715 | ((1ULL << num_contiguous) - 1) == nodep->mask); | ||
| 716 | |||
| 717 | prev->num_after += num_contiguous; | ||
| 718 | nodep->mask = 0; | ||
| 719 | |||
| 720 | /* | ||
| 721 | * For predictable performance, handle special | ||
| 722 | * case where all mask bits are set and there | ||
| 723 | * is a non-zero num_after setting. This code | ||
| 724 | * is functionally correct without the following | ||
| 725 | * conditionalized statements, but without them | ||
| 726 | * the value of num_after is only reduced by | ||
| 727 | * the number of mask bits per pass. There are | ||
| 728 | * cases where num_after can be close to 2^64. | ||
| 729 | * Without this code it could take nearly | ||
| 730 | * (2^64) / 32 passes to perform the full | ||
| 731 | * reduction. | ||
| 732 | */ | ||
| 733 | if (num_contiguous == MASK_BITS) { | ||
| 734 | prev->num_after += nodep->num_after; | ||
| 735 | nodep->num_after = 0; | ||
| 736 | } | ||
| 737 | |||
| 738 | reduction_performed = true; | ||
| 739 | continue; | ||
| 740 | } | ||
| 741 | } | ||
| 742 | |||
| 743 | /* | ||
| 744 | * 3) Potential reductions between the current and | ||
| 745 | * next nodes. | ||
| 746 | */ | ||
| 747 | next = node_next(s, nodep); | ||
| 748 | if (next) { | ||
| 749 | /* Nodes with no bits set can be removed. */ | ||
| 750 | if (next->mask == 0 && next->num_after == 0) { | ||
| 751 | node_rm(s, next); | ||
| 752 | reduction_performed = true; | ||
| 753 | continue; | ||
| 754 | } | ||
| 755 | |||
| 756 | /* | ||
| 757 | * Is next node index adjacent to current node | ||
| 758 | * and has a mask with all bits set? | ||
| 759 | */ | ||
| 760 | if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && | ||
| 761 | next->mask == ~(mask_t) 0) { | ||
| 762 | nodep->num_after += MASK_BITS; | ||
| 763 | next->mask = 0; | ||
| 764 | nodep->num_after += next->num_after; | ||
| 765 | next->num_after = 0; | ||
| 766 | |||
| 767 | node_rm(s, next); | ||
| 768 | next = NULL; | ||
| 769 | |||
| 770 | reduction_performed = true; | ||
| 771 | continue; | ||
| 772 | } | ||
| 773 | } | ||
| 774 | } while (nodep && reduction_performed); | ||
| 775 | } | ||
| 776 | |||
| 777 | /* Returns whether the bit at the index given by idx, within the | ||
| 778 | * sparsebit array is set or not. | ||
| 779 | */ | ||
| 780 | bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 781 | { | ||
| 782 | struct node *nodep; | ||
| 783 | |||
| 784 | /* Find the node that describes the setting of the bit at idx */ | ||
| 785 | for (nodep = s->root; nodep; | ||
| 786 | nodep = nodep->idx > idx ? nodep->left : nodep->right) | ||
| 787 | if (idx >= nodep->idx && | ||
| 788 | idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
| 789 | goto have_node; | ||
| 790 | |||
| 791 | return false; | ||
| 792 | |||
| 793 | have_node: | ||
| 794 | /* Bit is set if it is any of the bits described by num_after */ | ||
| 795 | if (nodep->num_after && idx >= nodep->idx + MASK_BITS) | ||
| 796 | return true; | ||
| 797 | |||
| 798 | /* Is the corresponding mask bit set */ | ||
| 799 | assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); | ||
| 800 | return !!(nodep->mask & (1 << (idx - nodep->idx))); | ||
| 801 | } | ||
| 802 | |||
| 803 | /* Within the sparsebit array pointed to by s, sets the bit | ||
| 804 | * at the index given by idx. | ||
| 805 | */ | ||
| 806 | static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 807 | { | ||
| 808 | struct node *nodep; | ||
| 809 | |||
| 810 | /* Skip bits that are already set */ | ||
| 811 | if (sparsebit_is_set(s, idx)) | ||
| 812 | return; | ||
| 813 | |||
| 814 | /* | ||
| 815 | * Get a node where the bit at idx is described by the mask. | ||
| 816 | * The node_split will also create a node, if there isn't | ||
| 817 | * already a node that describes the setting of bit. | ||
| 818 | */ | ||
| 819 | nodep = node_split(s, idx & -MASK_BITS); | ||
| 820 | |||
| 821 | /* Set the bit within the nodes mask */ | ||
| 822 | assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); | ||
| 823 | assert(!(nodep->mask & (1 << (idx - nodep->idx)))); | ||
| 824 | nodep->mask |= 1 << (idx - nodep->idx); | ||
| 825 | s->num_set++; | ||
| 826 | |||
| 827 | node_reduce(s, nodep); | ||
| 828 | } | ||
| 829 | |||
| 830 | /* Within the sparsebit array pointed to by s, clears the bit | ||
| 831 | * at the index given by idx. | ||
| 832 | */ | ||
| 833 | static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 834 | { | ||
| 835 | struct node *nodep; | ||
| 836 | |||
| 837 | /* Skip bits that are already cleared */ | ||
| 838 | if (!sparsebit_is_set(s, idx)) | ||
| 839 | return; | ||
| 840 | |||
| 841 | /* Is there a node that describes the setting of this bit? */ | ||
| 842 | nodep = node_find(s, idx); | ||
| 843 | if (!nodep) | ||
| 844 | return; | ||
| 845 | |||
| 846 | /* | ||
| 847 | * If a num_after bit, split the node, so that the bit is | ||
| 848 | * part of a node mask. | ||
| 849 | */ | ||
| 850 | if (idx >= nodep->idx + MASK_BITS) | ||
| 851 | nodep = node_split(s, idx & -MASK_BITS); | ||
| 852 | |||
| 853 | /* | ||
| 854 | * After node_split above, bit at idx should be within the mask. | ||
| 855 | * Clear that bit. | ||
| 856 | */ | ||
| 857 | assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); | ||
| 858 | assert(nodep->mask & (1 << (idx - nodep->idx))); | ||
| 859 | nodep->mask &= ~(1 << (idx - nodep->idx)); | ||
| 860 | assert(s->num_set > 0 || sparsebit_all_set(s)); | ||
| 861 | s->num_set--; | ||
| 862 | |||
| 863 | node_reduce(s, nodep); | ||
| 864 | } | ||
| 865 | |||
| 866 | /* Recursively dumps to the FILE stream given by stream the contents | ||
| 867 | * of the sub-tree of nodes pointed to by nodep. Each line of output | ||
| 868 | * is prefixed by the number of spaces given by indent. On each | ||
| 869 | * recursion, the indent amount is increased by 2. This causes nodes | ||
| 870 | * at each level deeper into the binary search tree to be displayed | ||
| 871 | * with a greater indent. | ||
| 872 | */ | ||
| 873 | static void dump_nodes(FILE *stream, struct node *nodep, | ||
| 874 | unsigned int indent) | ||
| 875 | { | ||
| 876 | char *node_type; | ||
| 877 | |||
| 878 | /* Dump contents of node */ | ||
| 879 | if (!nodep->parent) | ||
| 880 | node_type = "root"; | ||
| 881 | else if (nodep == nodep->parent->left) | ||
| 882 | node_type = "left"; | ||
| 883 | else { | ||
| 884 | assert(nodep == nodep->parent->right); | ||
| 885 | node_type = "right"; | ||
| 886 | } | ||
| 887 | fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); | ||
| 888 | fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", | ||
| 889 | nodep->parent, nodep->left, nodep->right); | ||
| 890 | fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", | ||
| 891 | indent, "", nodep->idx, nodep->mask, nodep->num_after); | ||
| 892 | |||
| 893 | /* If present, dump contents of left child nodes */ | ||
| 894 | if (nodep->left) | ||
| 895 | dump_nodes(stream, nodep->left, indent + 2); | ||
| 896 | |||
| 897 | /* If present, dump contents of right child nodes */ | ||
| 898 | if (nodep->right) | ||
| 899 | dump_nodes(stream, nodep->right, indent + 2); | ||
| 900 | } | ||
| 901 | |||
| 902 | static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) | ||
| 903 | { | ||
| 904 | mask_t leading = (mask_t)1 << start; | ||
| 905 | int n1 = __builtin_ctz(nodep->mask & -leading); | ||
| 906 | |||
| 907 | return nodep->idx + n1; | ||
| 908 | } | ||
| 909 | |||
| 910 | static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) | ||
| 911 | { | ||
| 912 | mask_t leading = (mask_t)1 << start; | ||
| 913 | int n1 = __builtin_ctz(~nodep->mask & -leading); | ||
| 914 | |||
| 915 | return nodep->idx + n1; | ||
| 916 | } | ||
| 917 | |||
| 918 | /* Dumps to the FILE stream specified by stream, the implementation dependent | ||
| 919 | * internal state of s. Each line of output is prefixed with the number | ||
| 920 | * of spaces given by indent. The output is completely implementation | ||
| 921 | * dependent and subject to change. Output from this function should only | ||
| 922 | * be used for diagnostic purposes. For example, this function can be | ||
| 923 | * used by test cases after they detect an unexpected condition, as a means | ||
| 924 | * to capture diagnostic information. | ||
| 925 | */ | ||
| 926 | static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, | ||
| 927 | unsigned int indent) | ||
| 928 | { | ||
| 929 | /* Dump the contents of s */ | ||
| 930 | fprintf(stream, "%*sroot: %p\n", indent, "", s->root); | ||
| 931 | fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); | ||
| 932 | |||
| 933 | if (s->root) | ||
| 934 | dump_nodes(stream, s->root, indent); | ||
| 935 | } | ||
| 936 | |||
| 937 | /* Allocates and returns a new sparsebit array. The initial state | ||
| 938 | * of the newly allocated sparsebit array has all bits cleared. | ||
| 939 | */ | ||
| 940 | struct sparsebit *sparsebit_alloc(void) | ||
| 941 | { | ||
| 942 | struct sparsebit *s; | ||
| 943 | |||
| 944 | /* Allocate top level structure. */ | ||
| 945 | s = calloc(1, sizeof(*s)); | ||
| 946 | if (!s) { | ||
| 947 | perror("calloc"); | ||
| 948 | abort(); | ||
| 949 | } | ||
| 950 | |||
| 951 | return s; | ||
| 952 | } | ||
| 953 | |||
| 954 | /* Frees the implementation dependent data for the sparsebit array | ||
| 955 | * pointed to by s and poisons the pointer to that data. | ||
| 956 | */ | ||
| 957 | void sparsebit_free(struct sparsebit **sbitp) | ||
| 958 | { | ||
| 959 | struct sparsebit *s = *sbitp; | ||
| 960 | |||
| 961 | if (!s) | ||
| 962 | return; | ||
| 963 | |||
| 964 | sparsebit_clear_all(s); | ||
| 965 | free(s); | ||
| 966 | *sbitp = NULL; | ||
| 967 | } | ||
| 968 | |||
| 969 | /* Makes a copy of the sparsebit array given by s, to the sparsebit | ||
| 970 | * array given by d. Note, d must have already been allocated via | ||
| 971 | * sparsebit_alloc(). It can though already have bits set, which | ||
| 972 | * if different from src will be cleared. | ||
| 973 | */ | ||
| 974 | void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) | ||
| 975 | { | ||
| 976 | /* First clear any bits already set in the destination */ | ||
| 977 | sparsebit_clear_all(d); | ||
| 978 | |||
| 979 | if (s->root) { | ||
| 980 | d->root = node_copy_subtree(s->root); | ||
| 981 | d->num_set = s->num_set; | ||
| 982 | } | ||
| 983 | } | ||
| 984 | |||
| 985 | /* Returns whether num consecutive bits starting at idx are all set. */ | ||
| 986 | bool sparsebit_is_set_num(struct sparsebit *s, | ||
| 987 | sparsebit_idx_t idx, sparsebit_num_t num) | ||
| 988 | { | ||
| 989 | sparsebit_idx_t next_cleared; | ||
| 990 | |||
| 991 | assert(num > 0); | ||
| 992 | assert(idx + num - 1 >= idx); | ||
| 993 | |||
| 994 | /* With num > 0, the first bit must be set. */ | ||
| 995 | if (!sparsebit_is_set(s, idx)) | ||
| 996 | return false; | ||
| 997 | |||
| 998 | /* Find the next cleared bit */ | ||
| 999 | next_cleared = sparsebit_next_clear(s, idx); | ||
| 1000 | |||
| 1001 | /* | ||
| 1002 | * If no cleared bits beyond idx, then there are at least num | ||
| 1003 | * set bits. idx + num doesn't wrap. Otherwise check if | ||
| 1004 | * there are enough set bits between idx and the next cleared bit. | ||
| 1005 | */ | ||
| 1006 | return next_cleared == 0 || next_cleared - idx >= num; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | /* Returns whether the bit at the index given by idx. */ | ||
| 1010 | bool sparsebit_is_clear(struct sparsebit *s, | ||
| 1011 | sparsebit_idx_t idx) | ||
| 1012 | { | ||
| 1013 | return !sparsebit_is_set(s, idx); | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | /* Returns whether num consecutive bits starting at idx are all cleared. */ | ||
| 1017 | bool sparsebit_is_clear_num(struct sparsebit *s, | ||
| 1018 | sparsebit_idx_t idx, sparsebit_num_t num) | ||
| 1019 | { | ||
| 1020 | sparsebit_idx_t next_set; | ||
| 1021 | |||
| 1022 | assert(num > 0); | ||
| 1023 | assert(idx + num - 1 >= idx); | ||
| 1024 | |||
| 1025 | /* With num > 0, the first bit must be cleared. */ | ||
| 1026 | if (!sparsebit_is_clear(s, idx)) | ||
| 1027 | return false; | ||
| 1028 | |||
| 1029 | /* Find the next set bit */ | ||
| 1030 | next_set = sparsebit_next_set(s, idx); | ||
| 1031 | |||
| 1032 | /* | ||
| 1033 | * If no set bits beyond idx, then there are at least num | ||
| 1034 | * cleared bits. idx + num doesn't wrap. Otherwise check if | ||
| 1035 | * there are enough cleared bits between idx and the next set bit. | ||
| 1036 | */ | ||
| 1037 | return next_set == 0 || next_set - idx >= num; | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | /* Returns the total number of bits set. Note: 0 is also returned for | ||
| 1041 | * the case of all bits set. This is because with all bits set, there | ||
| 1042 | * is 1 additional bit set beyond what can be represented in the return | ||
| 1043 | * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, | ||
| 1044 | * to determine if the sparsebit array has any bits set. | ||
| 1045 | */ | ||
| 1046 | sparsebit_num_t sparsebit_num_set(struct sparsebit *s) | ||
| 1047 | { | ||
| 1048 | return s->num_set; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | /* Returns whether any bit is set in the sparsebit array. */ | ||
| 1052 | bool sparsebit_any_set(struct sparsebit *s) | ||
| 1053 | { | ||
| 1054 | /* | ||
| 1055 | * Nodes only describe set bits. If any nodes then there | ||
| 1056 | * is at least 1 bit set. | ||
| 1057 | */ | ||
| 1058 | if (!s->root) | ||
| 1059 | return false; | ||
| 1060 | |||
| 1061 | /* | ||
| 1062 | * Every node should have a non-zero mask. For now will | ||
| 1063 | * just assure that the root node has a non-zero mask, | ||
| 1064 | * which is a quick check that at least 1 bit is set. | ||
| 1065 | */ | ||
| 1066 | assert(s->root->mask != 0); | ||
| 1067 | assert(s->num_set > 0 || | ||
| 1068 | (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && | ||
| 1069 | s->root->mask == ~(mask_t) 0)); | ||
| 1070 | |||
| 1071 | return true; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | /* Returns whether all the bits in the sparsebit array are cleared. */ | ||
| 1075 | bool sparsebit_all_clear(struct sparsebit *s) | ||
| 1076 | { | ||
| 1077 | return !sparsebit_any_set(s); | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | /* Returns whether all the bits in the sparsebit array are set. */ | ||
| 1081 | bool sparsebit_any_clear(struct sparsebit *s) | ||
| 1082 | { | ||
| 1083 | return !sparsebit_all_set(s); | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /* Returns the index of the first set bit. Abort if no bits are set. | ||
| 1087 | */ | ||
| 1088 | sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) | ||
| 1089 | { | ||
| 1090 | struct node *nodep; | ||
| 1091 | |||
| 1092 | /* Validate at least 1 bit is set */ | ||
| 1093 | assert(sparsebit_any_set(s)); | ||
| 1094 | |||
| 1095 | nodep = node_first(s); | ||
| 1096 | return node_first_set(nodep, 0); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | /* Returns the index of the first cleared bit. Abort if | ||
| 1100 | * no bits are cleared. | ||
| 1101 | */ | ||
| 1102 | sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) | ||
| 1103 | { | ||
| 1104 | struct node *nodep1, *nodep2; | ||
| 1105 | |||
| 1106 | /* Validate at least 1 bit is cleared. */ | ||
| 1107 | assert(sparsebit_any_clear(s)); | ||
| 1108 | |||
| 1109 | /* If no nodes or first node index > 0 then lowest cleared is 0 */ | ||
| 1110 | nodep1 = node_first(s); | ||
| 1111 | if (!nodep1 || nodep1->idx > 0) | ||
| 1112 | return 0; | ||
| 1113 | |||
| 1114 | /* Does the mask in the first node contain any cleared bits. */ | ||
| 1115 | if (nodep1->mask != ~(mask_t) 0) | ||
| 1116 | return node_first_clear(nodep1, 0); | ||
| 1117 | |||
| 1118 | /* | ||
| 1119 | * All mask bits set in first node. If there isn't a second node | ||
| 1120 | * then the first cleared bit is the first bit after the bits | ||
| 1121 | * described by the first node. | ||
| 1122 | */ | ||
| 1123 | nodep2 = node_next(s, nodep1); | ||
| 1124 | if (!nodep2) { | ||
| 1125 | /* | ||
| 1126 | * No second node. First cleared bit is first bit beyond | ||
| 1127 | * bits described by first node. | ||
| 1128 | */ | ||
| 1129 | assert(nodep1->mask == ~(mask_t) 0); | ||
| 1130 | assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); | ||
| 1131 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | /* | ||
| 1135 | * There is a second node. | ||
| 1136 | * If it is not adjacent to the first node, then there is a gap | ||
| 1137 | * of cleared bits between the nodes, and the first cleared bit | ||
| 1138 | * is the first bit within the gap. | ||
| 1139 | */ | ||
| 1140 | if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) | ||
| 1141 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
| 1142 | |||
| 1143 | /* | ||
| 1144 | * Second node is adjacent to the first node. | ||
| 1145 | * Because it is adjacent, its mask should be non-zero. If all | ||
| 1146 | * its mask bits are set, then with it being adjacent, it should | ||
| 1147 | * have had the mask bits moved into the num_after setting of the | ||
| 1148 | * previous node. | ||
| 1149 | */ | ||
| 1150 | return node_first_clear(nodep2, 0); | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | /* Returns index of next bit set within s after the index given by prev. | ||
| 1154 | * Returns 0 if there are no bits after prev that are set. | ||
| 1155 | */ | ||
| 1156 | sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, | ||
| 1157 | sparsebit_idx_t prev) | ||
| 1158 | { | ||
| 1159 | sparsebit_idx_t lowest_possible = prev + 1; | ||
| 1160 | sparsebit_idx_t start; | ||
| 1161 | struct node *nodep; | ||
| 1162 | |||
| 1163 | /* A bit after the highest index can't be set. */ | ||
| 1164 | if (lowest_possible == 0) | ||
| 1165 | return 0; | ||
| 1166 | |||
| 1167 | /* | ||
| 1168 | * Find the leftmost 'candidate' overlapping or to the right | ||
| 1169 | * of lowest_possible. | ||
| 1170 | */ | ||
| 1171 | struct node *candidate = NULL; | ||
| 1172 | |||
| 1173 | /* True iff lowest_possible is within candidate */ | ||
| 1174 | bool contains = false; | ||
| 1175 | |||
| 1176 | /* | ||
| 1177 | * Find node that describes setting of bit at lowest_possible. | ||
| 1178 | * If such a node doesn't exist, find the node with the lowest | ||
| 1179 | * starting index that is > lowest_possible. | ||
| 1180 | */ | ||
| 1181 | for (nodep = s->root; nodep;) { | ||
| 1182 | if ((nodep->idx + MASK_BITS + nodep->num_after - 1) | ||
| 1183 | >= lowest_possible) { | ||
| 1184 | candidate = nodep; | ||
| 1185 | if (candidate->idx <= lowest_possible) { | ||
| 1186 | contains = true; | ||
| 1187 | break; | ||
| 1188 | } | ||
| 1189 | nodep = nodep->left; | ||
| 1190 | } else { | ||
| 1191 | nodep = nodep->right; | ||
| 1192 | } | ||
| 1193 | } | ||
| 1194 | if (!candidate) | ||
| 1195 | return 0; | ||
| 1196 | |||
| 1197 | assert(candidate->mask != 0); | ||
| 1198 | |||
| 1199 | /* Does the candidate node describe the setting of lowest_possible? */ | ||
| 1200 | if (!contains) { | ||
| 1201 | /* | ||
| 1202 | * Candidate doesn't describe setting of bit at lowest_possible. | ||
| 1203 | * Candidate points to the first node with a starting index | ||
| 1204 | * > lowest_possible. | ||
| 1205 | */ | ||
| 1206 | assert(candidate->idx > lowest_possible); | ||
| 1207 | |||
| 1208 | return node_first_set(candidate, 0); | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | /* | ||
| 1212 | * Candidate describes setting of bit at lowest_possible. | ||
| 1213 | * Note: although the node describes the setting of the bit | ||
| 1214 | * at lowest_possible, its possible that its setting and the | ||
| 1215 | * setting of all latter bits described by this node are 0. | ||
| 1216 | * For now, just handle the cases where this node describes | ||
| 1217 | * a bit at or after an index of lowest_possible that is set. | ||
| 1218 | */ | ||
| 1219 | start = lowest_possible - candidate->idx; | ||
| 1220 | |||
| 1221 | if (start < MASK_BITS && candidate->mask >= (1 << start)) | ||
| 1222 | return node_first_set(candidate, start); | ||
| 1223 | |||
| 1224 | if (candidate->num_after) { | ||
| 1225 | sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; | ||
| 1226 | |||
| 1227 | return lowest_possible < first_num_after_idx | ||
| 1228 | ? first_num_after_idx : lowest_possible; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | /* | ||
| 1232 | * Although candidate node describes setting of bit at | ||
| 1233 | * the index of lowest_possible, all bits at that index and | ||
| 1234 | * latter that are described by candidate are cleared. With | ||
| 1235 | * this, the next bit is the first bit in the next node, if | ||
| 1236 | * such a node exists. If a next node doesn't exist, then | ||
| 1237 | * there is no next set bit. | ||
| 1238 | */ | ||
| 1239 | candidate = node_next(s, candidate); | ||
| 1240 | if (!candidate) | ||
| 1241 | return 0; | ||
| 1242 | |||
| 1243 | return node_first_set(candidate, 0); | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | /* Returns index of next bit cleared within s after the index given by prev. | ||
| 1247 | * Returns 0 if there are no bits after prev that are cleared. | ||
| 1248 | */ | ||
| 1249 | sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, | ||
| 1250 | sparsebit_idx_t prev) | ||
| 1251 | { | ||
| 1252 | sparsebit_idx_t lowest_possible = prev + 1; | ||
| 1253 | sparsebit_idx_t idx; | ||
| 1254 | struct node *nodep1, *nodep2; | ||
| 1255 | |||
| 1256 | /* A bit after the highest index can't be set. */ | ||
| 1257 | if (lowest_possible == 0) | ||
| 1258 | return 0; | ||
| 1259 | |||
| 1260 | /* | ||
| 1261 | * Does a node describing the setting of lowest_possible exist? | ||
| 1262 | * If not, the bit at lowest_possible is cleared. | ||
| 1263 | */ | ||
| 1264 | nodep1 = node_find(s, lowest_possible); | ||
| 1265 | if (!nodep1) | ||
| 1266 | return lowest_possible; | ||
| 1267 | |||
| 1268 | /* Does a mask bit in node 1 describe the next cleared bit. */ | ||
| 1269 | for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) | ||
| 1270 | if (!(nodep1->mask & (1 << idx))) | ||
| 1271 | return nodep1->idx + idx; | ||
| 1272 | |||
| 1273 | /* | ||
| 1274 | * Next cleared bit is not described by node 1. If there | ||
| 1275 | * isn't a next node, then next cleared bit is described | ||
| 1276 | * by bit after the bits described by the first node. | ||
| 1277 | */ | ||
| 1278 | nodep2 = node_next(s, nodep1); | ||
| 1279 | if (!nodep2) | ||
| 1280 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
| 1281 | |||
| 1282 | /* | ||
| 1283 | * There is a second node. | ||
| 1284 | * If it is not adjacent to the first node, then there is a gap | ||
| 1285 | * of cleared bits between the nodes, and the next cleared bit | ||
| 1286 | * is the first bit within the gap. | ||
| 1287 | */ | ||
| 1288 | if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) | ||
| 1289 | return nodep1->idx + MASK_BITS + nodep1->num_after; | ||
| 1290 | |||
| 1291 | /* | ||
| 1292 | * Second node is adjacent to the first node. | ||
| 1293 | * Because it is adjacent, its mask should be non-zero. If all | ||
| 1294 | * its mask bits are set, then with it being adjacent, it should | ||
| 1295 | * have had the mask bits moved into the num_after setting of the | ||
| 1296 | * previous node. | ||
| 1297 | */ | ||
| 1298 | return node_first_clear(nodep2, 0); | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | /* Starting with the index 1 greater than the index given by start, finds | ||
| 1302 | * and returns the index of the first sequence of num consecutively set | ||
| 1303 | * bits. Returns a value of 0 of no such sequence exists. | ||
| 1304 | */ | ||
| 1305 | sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, | ||
| 1306 | sparsebit_idx_t start, sparsebit_num_t num) | ||
| 1307 | { | ||
| 1308 | sparsebit_idx_t idx; | ||
| 1309 | |||
| 1310 | assert(num >= 1); | ||
| 1311 | |||
| 1312 | for (idx = sparsebit_next_set(s, start); | ||
| 1313 | idx != 0 && idx + num - 1 >= idx; | ||
| 1314 | idx = sparsebit_next_set(s, idx)) { | ||
| 1315 | assert(sparsebit_is_set(s, idx)); | ||
| 1316 | |||
| 1317 | /* | ||
| 1318 | * Does the sequence of bits starting at idx consist of | ||
| 1319 | * num set bits? | ||
| 1320 | */ | ||
| 1321 | if (sparsebit_is_set_num(s, idx, num)) | ||
| 1322 | return idx; | ||
| 1323 | |||
| 1324 | /* | ||
| 1325 | * Sequence of set bits at idx isn't large enough. | ||
| 1326 | * Skip this entire sequence of set bits. | ||
| 1327 | */ | ||
| 1328 | idx = sparsebit_next_clear(s, idx); | ||
| 1329 | if (idx == 0) | ||
| 1330 | return 0; | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | return 0; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | /* Starting with the index 1 greater than the index given by start, finds | ||
| 1337 | * and returns the index of the first sequence of num consecutively cleared | ||
| 1338 | * bits. Returns a value of 0 of no such sequence exists. | ||
| 1339 | */ | ||
| 1340 | sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, | ||
| 1341 | sparsebit_idx_t start, sparsebit_num_t num) | ||
| 1342 | { | ||
| 1343 | sparsebit_idx_t idx; | ||
| 1344 | |||
| 1345 | assert(num >= 1); | ||
| 1346 | |||
| 1347 | for (idx = sparsebit_next_clear(s, start); | ||
| 1348 | idx != 0 && idx + num - 1 >= idx; | ||
| 1349 | idx = sparsebit_next_clear(s, idx)) { | ||
| 1350 | assert(sparsebit_is_clear(s, idx)); | ||
| 1351 | |||
| 1352 | /* | ||
| 1353 | * Does the sequence of bits starting at idx consist of | ||
| 1354 | * num cleared bits? | ||
| 1355 | */ | ||
| 1356 | if (sparsebit_is_clear_num(s, idx, num)) | ||
| 1357 | return idx; | ||
| 1358 | |||
| 1359 | /* | ||
| 1360 | * Sequence of cleared bits at idx isn't large enough. | ||
| 1361 | * Skip this entire sequence of cleared bits. | ||
| 1362 | */ | ||
| 1363 | idx = sparsebit_next_set(s, idx); | ||
| 1364 | if (idx == 0) | ||
| 1365 | return 0; | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | return 0; | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /* Sets the bits * in the inclusive range idx through idx + num - 1. */ | ||
| 1372 | void sparsebit_set_num(struct sparsebit *s, | ||
| 1373 | sparsebit_idx_t start, sparsebit_num_t num) | ||
| 1374 | { | ||
| 1375 | struct node *nodep, *next; | ||
| 1376 | unsigned int n1; | ||
| 1377 | sparsebit_idx_t idx; | ||
| 1378 | sparsebit_num_t n; | ||
| 1379 | sparsebit_idx_t middle_start, middle_end; | ||
| 1380 | |||
| 1381 | assert(num > 0); | ||
| 1382 | assert(start + num - 1 >= start); | ||
| 1383 | |||
| 1384 | /* | ||
| 1385 | * Leading - bits before first mask boundary. | ||
| 1386 | * | ||
| 1387 | * TODO(lhuemill): With some effort it may be possible to | ||
| 1388 | * replace the following loop with a sequential sequence | ||
| 1389 | * of statements. High level sequence would be: | ||
| 1390 | * | ||
| 1391 | * 1. Use node_split() to force node that describes setting | ||
| 1392 | * of idx to be within the mask portion of a node. | ||
| 1393 | * 2. Form mask of bits to be set. | ||
| 1394 | * 3. Determine number of mask bits already set in the node | ||
| 1395 | * and store in a local variable named num_already_set. | ||
| 1396 | * 4. Set the appropriate mask bits within the node. | ||
| 1397 | * 5. Increment struct sparsebit_pvt num_set member | ||
| 1398 | * by the number of bits that were actually set. | ||
| 1399 | * Exclude from the counts bits that were already set. | ||
| 1400 | * 6. Before returning to the caller, use node_reduce() to | ||
| 1401 | * handle the multiple corner cases that this method | ||
| 1402 | * introduces. | ||
| 1403 | */ | ||
| 1404 | for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) | ||
| 1405 | bit_set(s, idx); | ||
| 1406 | |||
| 1407 | /* Middle - bits spanning one or more entire mask */ | ||
| 1408 | middle_start = idx; | ||
| 1409 | middle_end = middle_start + (n & -MASK_BITS) - 1; | ||
| 1410 | if (n >= MASK_BITS) { | ||
| 1411 | nodep = node_split(s, middle_start); | ||
| 1412 | |||
| 1413 | /* | ||
| 1414 | * As needed, split just after end of middle bits. | ||
| 1415 | * No split needed if end of middle bits is at highest | ||
| 1416 | * supported bit index. | ||
| 1417 | */ | ||
| 1418 | if (middle_end + 1 > middle_end) | ||
| 1419 | (void) node_split(s, middle_end + 1); | ||
| 1420 | |||
| 1421 | /* Delete nodes that only describe bits within the middle. */ | ||
| 1422 | for (next = node_next(s, nodep); | ||
| 1423 | next && (next->idx < middle_end); | ||
| 1424 | next = node_next(s, nodep)) { | ||
| 1425 | assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); | ||
| 1426 | node_rm(s, next); | ||
| 1427 | next = NULL; | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | /* As needed set each of the mask bits */ | ||
| 1431 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
| 1432 | if (!(nodep->mask & (1 << n1))) { | ||
| 1433 | nodep->mask |= 1 << n1; | ||
| 1434 | s->num_set++; | ||
| 1435 | } | ||
| 1436 | } | ||
| 1437 | |||
| 1438 | s->num_set -= nodep->num_after; | ||
| 1439 | nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; | ||
| 1440 | s->num_set += nodep->num_after; | ||
| 1441 | |||
| 1442 | node_reduce(s, nodep); | ||
| 1443 | } | ||
| 1444 | idx = middle_end + 1; | ||
| 1445 | n -= middle_end - middle_start + 1; | ||
| 1446 | |||
| 1447 | /* Trailing - bits at and beyond last mask boundary */ | ||
| 1448 | assert(n < MASK_BITS); | ||
| 1449 | for (; n > 0; idx++, n--) | ||
| 1450 | bit_set(s, idx); | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | /* Clears the bits * in the inclusive range idx through idx + num - 1. */ | ||
| 1454 | void sparsebit_clear_num(struct sparsebit *s, | ||
| 1455 | sparsebit_idx_t start, sparsebit_num_t num) | ||
| 1456 | { | ||
| 1457 | struct node *nodep, *next; | ||
| 1458 | unsigned int n1; | ||
| 1459 | sparsebit_idx_t idx; | ||
| 1460 | sparsebit_num_t n; | ||
| 1461 | sparsebit_idx_t middle_start, middle_end; | ||
| 1462 | |||
| 1463 | assert(num > 0); | ||
| 1464 | assert(start + num - 1 >= start); | ||
| 1465 | |||
| 1466 | /* Leading - bits before first mask boundary */ | ||
| 1467 | for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) | ||
| 1468 | bit_clear(s, idx); | ||
| 1469 | |||
| 1470 | /* Middle - bits spanning one or more entire mask */ | ||
| 1471 | middle_start = idx; | ||
| 1472 | middle_end = middle_start + (n & -MASK_BITS) - 1; | ||
| 1473 | if (n >= MASK_BITS) { | ||
| 1474 | nodep = node_split(s, middle_start); | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * As needed, split just after end of middle bits. | ||
| 1478 | * No split needed if end of middle bits is at highest | ||
| 1479 | * supported bit index. | ||
| 1480 | */ | ||
| 1481 | if (middle_end + 1 > middle_end) | ||
| 1482 | (void) node_split(s, middle_end + 1); | ||
| 1483 | |||
| 1484 | /* Delete nodes that only describe bits within the middle. */ | ||
| 1485 | for (next = node_next(s, nodep); | ||
| 1486 | next && (next->idx < middle_end); | ||
| 1487 | next = node_next(s, nodep)) { | ||
| 1488 | assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); | ||
| 1489 | node_rm(s, next); | ||
| 1490 | next = NULL; | ||
| 1491 | } | ||
| 1492 | |||
| 1493 | /* As needed clear each of the mask bits */ | ||
| 1494 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
| 1495 | if (nodep->mask & (1 << n1)) { | ||
| 1496 | nodep->mask &= ~(1 << n1); | ||
| 1497 | s->num_set--; | ||
| 1498 | } | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | /* Clear any bits described by num_after */ | ||
| 1502 | s->num_set -= nodep->num_after; | ||
| 1503 | nodep->num_after = 0; | ||
| 1504 | |||
| 1505 | /* | ||
| 1506 | * Delete the node that describes the beginning of | ||
| 1507 | * the middle bits and perform any allowed reductions | ||
| 1508 | * with the nodes prev or next of nodep. | ||
| 1509 | */ | ||
| 1510 | node_reduce(s, nodep); | ||
| 1511 | nodep = NULL; | ||
| 1512 | } | ||
| 1513 | idx = middle_end + 1; | ||
| 1514 | n -= middle_end - middle_start + 1; | ||
| 1515 | |||
| 1516 | /* Trailing - bits at and beyond last mask boundary */ | ||
| 1517 | assert(n < MASK_BITS); | ||
| 1518 | for (; n > 0; idx++, n--) | ||
| 1519 | bit_clear(s, idx); | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | /* Sets the bit at the index given by idx. */ | ||
| 1523 | void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 1524 | { | ||
| 1525 | sparsebit_set_num(s, idx, 1); | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | /* Clears the bit at the index given by idx. */ | ||
| 1529 | void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) | ||
| 1530 | { | ||
| 1531 | sparsebit_clear_num(s, idx, 1); | ||
| 1532 | } | ||
| 1533 | |||
| 1534 | /* Sets the bits in the entire addressable range of the sparsebit array. */ | ||
| 1535 | void sparsebit_set_all(struct sparsebit *s) | ||
| 1536 | { | ||
| 1537 | sparsebit_set(s, 0); | ||
| 1538 | sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); | ||
| 1539 | assert(sparsebit_all_set(s)); | ||
| 1540 | } | ||
| 1541 | |||
| 1542 | /* Clears the bits in the entire addressable range of the sparsebit array. */ | ||
| 1543 | void sparsebit_clear_all(struct sparsebit *s) | ||
| 1544 | { | ||
| 1545 | sparsebit_clear(s, 0); | ||
| 1546 | sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); | ||
| 1547 | assert(!sparsebit_any_set(s)); | ||
| 1548 | } | ||
| 1549 | |||
| 1550 | static size_t display_range(FILE *stream, sparsebit_idx_t low, | ||
| 1551 | sparsebit_idx_t high, bool prepend_comma_space) | ||
| 1552 | { | ||
| 1553 | char *fmt_str; | ||
| 1554 | size_t sz; | ||
| 1555 | |||
| 1556 | /* Determine the printf format string */ | ||
| 1557 | if (low == high) | ||
| 1558 | fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; | ||
| 1559 | else | ||
| 1560 | fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; | ||
| 1561 | |||
| 1562 | /* | ||
| 1563 | * When stream is NULL, just determine the size of what would | ||
| 1564 | * have been printed, else print the range. | ||
| 1565 | */ | ||
| 1566 | if (!stream) | ||
| 1567 | sz = snprintf(NULL, 0, fmt_str, low, high); | ||
| 1568 | else | ||
| 1569 | sz = fprintf(stream, fmt_str, low, high); | ||
| 1570 | |||
| 1571 | return sz; | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | |||
| 1575 | /* Dumps to the FILE stream given by stream, the bit settings | ||
| 1576 | * of s. Each line of output is prefixed with the number of | ||
| 1577 | * spaces given by indent. The length of each line is implementation | ||
| 1578 | * dependent and does not depend on the indent amount. The following | ||
| 1579 | * is an example output of a sparsebit array that has bits: | ||
| 1580 | * | ||
| 1581 | * 0x5, 0x8, 0xa:0xe, 0x12 | ||
| 1582 | * | ||
| 1583 | * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 | ||
| 1584 | * are set. Note that a ':', instead of a '-' is used to specify a range of | ||
| 1585 | * contiguous bits. This is done because '-' is used to specify command-line | ||
| 1586 | * options, and sometimes ranges are specified as command-line arguments. | ||
| 1587 | */ | ||
| 1588 | void sparsebit_dump(FILE *stream, struct sparsebit *s, | ||
| 1589 | unsigned int indent) | ||
| 1590 | { | ||
| 1591 | size_t current_line_len = 0; | ||
| 1592 | size_t sz; | ||
| 1593 | struct node *nodep; | ||
| 1594 | |||
| 1595 | if (!sparsebit_any_set(s)) | ||
| 1596 | return; | ||
| 1597 | |||
| 1598 | /* Display initial indent */ | ||
| 1599 | fprintf(stream, "%*s", indent, ""); | ||
| 1600 | |||
| 1601 | /* For each node */ | ||
| 1602 | for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { | ||
| 1603 | unsigned int n1; | ||
| 1604 | sparsebit_idx_t low, high; | ||
| 1605 | |||
| 1606 | /* For each group of bits in the mask */ | ||
| 1607 | for (n1 = 0; n1 < MASK_BITS; n1++) { | ||
| 1608 | if (nodep->mask & (1 << n1)) { | ||
| 1609 | low = high = nodep->idx + n1; | ||
| 1610 | |||
| 1611 | for (; n1 < MASK_BITS; n1++) { | ||
| 1612 | if (nodep->mask & (1 << n1)) | ||
| 1613 | high = nodep->idx + n1; | ||
| 1614 | else | ||
| 1615 | break; | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | if ((n1 == MASK_BITS) && nodep->num_after) | ||
| 1619 | high += nodep->num_after; | ||
| 1620 | |||
| 1621 | /* | ||
| 1622 | * How much room will it take to display | ||
| 1623 | * this range. | ||
| 1624 | */ | ||
| 1625 | sz = display_range(NULL, low, high, | ||
| 1626 | current_line_len != 0); | ||
| 1627 | |||
| 1628 | /* | ||
| 1629 | * If there is not enough room, display | ||
| 1630 | * a newline plus the indent of the next | ||
| 1631 | * line. | ||
| 1632 | */ | ||
| 1633 | if (current_line_len + sz > DUMP_LINE_MAX) { | ||
| 1634 | fputs("\n", stream); | ||
| 1635 | fprintf(stream, "%*s", indent, ""); | ||
| 1636 | current_line_len = 0; | ||
| 1637 | } | ||
| 1638 | |||
| 1639 | /* Display the range */ | ||
| 1640 | sz = display_range(stream, low, high, | ||
| 1641 | current_line_len != 0); | ||
| 1642 | current_line_len += sz; | ||
| 1643 | } | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | /* | ||
| 1647 | * If num_after and most significant-bit of mask is not | ||
| 1648 | * set, then still need to display a range for the bits | ||
| 1649 | * described by num_after. | ||
| 1650 | */ | ||
| 1651 | if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { | ||
| 1652 | low = nodep->idx + MASK_BITS; | ||
| 1653 | high = nodep->idx + MASK_BITS + nodep->num_after - 1; | ||
| 1654 | |||
| 1655 | /* | ||
| 1656 | * How much room will it take to display | ||
| 1657 | * this range. | ||
| 1658 | */ | ||
| 1659 | sz = display_range(NULL, low, high, | ||
| 1660 | current_line_len != 0); | ||
| 1661 | |||
| 1662 | /* | ||
| 1663 | * If there is not enough room, display | ||
| 1664 | * a newline plus the indent of the next | ||
| 1665 | * line. | ||
| 1666 | */ | ||
| 1667 | if (current_line_len + sz > DUMP_LINE_MAX) { | ||
| 1668 | fputs("\n", stream); | ||
| 1669 | fprintf(stream, "%*s", indent, ""); | ||
| 1670 | current_line_len = 0; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | /* Display the range */ | ||
| 1674 | sz = display_range(stream, low, high, | ||
| 1675 | current_line_len != 0); | ||
| 1676 | current_line_len += sz; | ||
| 1677 | } | ||
| 1678 | } | ||
| 1679 | fputs("\n", stream); | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | /* Validates the internal state of the sparsebit array given by | ||
| 1683 | * s. On error, diagnostic information is printed to stderr and | ||
| 1684 | * abort is called. | ||
| 1685 | */ | ||
| 1686 | void sparsebit_validate_internal(struct sparsebit *s) | ||
| 1687 | { | ||
| 1688 | bool error_detected = false; | ||
| 1689 | struct node *nodep, *prev = NULL; | ||
| 1690 | sparsebit_num_t total_bits_set = 0; | ||
| 1691 | unsigned int n1; | ||
| 1692 | |||
| 1693 | /* For each node */ | ||
| 1694 | for (nodep = node_first(s); nodep; | ||
| 1695 | prev = nodep, nodep = node_next(s, nodep)) { | ||
| 1696 | |||
| 1697 | /* | ||
| 1698 | * Increase total bits set by the number of bits set | ||
| 1699 | * in this node. | ||
| 1700 | */ | ||
| 1701 | for (n1 = 0; n1 < MASK_BITS; n1++) | ||
| 1702 | if (nodep->mask & (1 << n1)) | ||
| 1703 | total_bits_set++; | ||
| 1704 | |||
| 1705 | total_bits_set += nodep->num_after; | ||
| 1706 | |||
| 1707 | /* | ||
| 1708 | * Arbitrary choice as to whether a mask of 0 is allowed | ||
| 1709 | * or not. For diagnostic purposes it is beneficial to | ||
| 1710 | * have only one valid means to represent a set of bits. | ||
| 1711 | * To support this an arbitrary choice has been made | ||
| 1712 | * to not allow a mask of zero. | ||
| 1713 | */ | ||
| 1714 | if (nodep->mask == 0) { | ||
| 1715 | fprintf(stderr, "Node mask of zero, " | ||
| 1716 | "nodep: %p nodep->mask: 0x%x", | ||
| 1717 | nodep, nodep->mask); | ||
| 1718 | error_detected = true; | ||
| 1719 | break; | ||
| 1720 | } | ||
| 1721 | |||
| 1722 | /* | ||
| 1723 | * Validate num_after is not greater than the max index | ||
| 1724 | * - the number of mask bits. The num_after member | ||
| 1725 | * uses 0-based indexing and thus has no value that | ||
| 1726 | * represents all bits set. This limitation is handled | ||
| 1727 | * by requiring a non-zero mask. With a non-zero mask, | ||
| 1728 | * MASK_BITS worth of bits are described by the mask, | ||
| 1729 | * which makes the largest needed num_after equal to: | ||
| 1730 | * | ||
| 1731 | * (~(sparsebit_num_t) 0) - MASK_BITS + 1 | ||
| 1732 | */ | ||
| 1733 | if (nodep->num_after | ||
| 1734 | > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { | ||
| 1735 | fprintf(stderr, "num_after too large, " | ||
| 1736 | "nodep: %p nodep->num_after: 0x%lx", | ||
| 1737 | nodep, nodep->num_after); | ||
| 1738 | error_detected = true; | ||
| 1739 | break; | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | /* Validate node index is divisible by the mask size */ | ||
| 1743 | if (nodep->idx % MASK_BITS) { | ||
| 1744 | fprintf(stderr, "Node index not divisable by " | ||
| 1745 | "mask size,\n" | ||
| 1746 | " nodep: %p nodep->idx: 0x%lx " | ||
| 1747 | "MASK_BITS: %lu\n", | ||
| 1748 | nodep, nodep->idx, MASK_BITS); | ||
| 1749 | error_detected = true; | ||
| 1750 | break; | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | /* | ||
| 1754 | * Validate bits described by node don't wrap beyond the | ||
| 1755 | * highest supported index. | ||
| 1756 | */ | ||
| 1757 | if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { | ||
| 1758 | fprintf(stderr, "Bits described by node wrap " | ||
| 1759 | "beyond highest supported index,\n" | ||
| 1760 | " nodep: %p nodep->idx: 0x%lx\n" | ||
| 1761 | " MASK_BITS: %lu nodep->num_after: 0x%lx", | ||
| 1762 | nodep, nodep->idx, MASK_BITS, nodep->num_after); | ||
| 1763 | error_detected = true; | ||
| 1764 | break; | ||
| 1765 | } | ||
| 1766 | |||
| 1767 | /* Check parent pointers. */ | ||
| 1768 | if (nodep->left) { | ||
| 1769 | if (nodep->left->parent != nodep) { | ||
| 1770 | fprintf(stderr, "Left child parent pointer " | ||
| 1771 | "doesn't point to this node,\n" | ||
| 1772 | " nodep: %p nodep->left: %p " | ||
| 1773 | "nodep->left->parent: %p", | ||
| 1774 | nodep, nodep->left, | ||
| 1775 | nodep->left->parent); | ||
| 1776 | error_detected = true; | ||
| 1777 | break; | ||
| 1778 | } | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | if (nodep->right) { | ||
| 1782 | if (nodep->right->parent != nodep) { | ||
| 1783 | fprintf(stderr, "Right child parent pointer " | ||
| 1784 | "doesn't point to this node,\n" | ||
| 1785 | " nodep: %p nodep->right: %p " | ||
| 1786 | "nodep->right->parent: %p", | ||
| 1787 | nodep, nodep->right, | ||
| 1788 | nodep->right->parent); | ||
| 1789 | error_detected = true; | ||
| 1790 | break; | ||
| 1791 | } | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | if (!nodep->parent) { | ||
| 1795 | if (s->root != nodep) { | ||
| 1796 | fprintf(stderr, "Unexpected root node, " | ||
| 1797 | "s->root: %p nodep: %p", | ||
| 1798 | s->root, nodep); | ||
| 1799 | error_detected = true; | ||
| 1800 | break; | ||
| 1801 | } | ||
| 1802 | } | ||
| 1803 | |||
| 1804 | if (prev) { | ||
| 1805 | /* | ||
| 1806 | * Is index of previous node before index of | ||
| 1807 | * current node? | ||
| 1808 | */ | ||
| 1809 | if (prev->idx >= nodep->idx) { | ||
| 1810 | fprintf(stderr, "Previous node index " | ||
| 1811 | ">= current node index,\n" | ||
| 1812 | " prev: %p prev->idx: 0x%lx\n" | ||
| 1813 | " nodep: %p nodep->idx: 0x%lx", | ||
| 1814 | prev, prev->idx, nodep, nodep->idx); | ||
| 1815 | error_detected = true; | ||
| 1816 | break; | ||
| 1817 | } | ||
| 1818 | |||
| 1819 | /* | ||
| 1820 | * Nodes occur in asscending order, based on each | ||
| 1821 | * nodes starting index. | ||
| 1822 | */ | ||
| 1823 | if ((prev->idx + MASK_BITS + prev->num_after - 1) | ||
| 1824 | >= nodep->idx) { | ||
| 1825 | fprintf(stderr, "Previous node bit range " | ||
| 1826 | "overlap with current node bit range,\n" | ||
| 1827 | " prev: %p prev->idx: 0x%lx " | ||
| 1828 | "prev->num_after: 0x%lx\n" | ||
| 1829 | " nodep: %p nodep->idx: 0x%lx " | ||
| 1830 | "nodep->num_after: 0x%lx\n" | ||
| 1831 | " MASK_BITS: %lu", | ||
| 1832 | prev, prev->idx, prev->num_after, | ||
| 1833 | nodep, nodep->idx, nodep->num_after, | ||
| 1834 | MASK_BITS); | ||
| 1835 | error_detected = true; | ||
| 1836 | break; | ||
| 1837 | } | ||
| 1838 | |||
| 1839 | /* | ||
| 1840 | * When the node has all mask bits set, it shouldn't | ||
| 1841 | * be adjacent to the last bit described by the | ||
| 1842 | * previous node. | ||
| 1843 | */ | ||
| 1844 | if (nodep->mask == ~(mask_t) 0 && | ||
| 1845 | prev->idx + MASK_BITS + prev->num_after == nodep->idx) { | ||
| 1846 | fprintf(stderr, "Current node has mask with " | ||
| 1847 | "all bits set and is adjacent to the " | ||
| 1848 | "previous node,\n" | ||
| 1849 | " prev: %p prev->idx: 0x%lx " | ||
| 1850 | "prev->num_after: 0x%lx\n" | ||
| 1851 | " nodep: %p nodep->idx: 0x%lx " | ||
| 1852 | "nodep->num_after: 0x%lx\n" | ||
| 1853 | " MASK_BITS: %lu", | ||
| 1854 | prev, prev->idx, prev->num_after, | ||
| 1855 | nodep, nodep->idx, nodep->num_after, | ||
| 1856 | MASK_BITS); | ||
| 1857 | |||
| 1858 | error_detected = true; | ||
| 1859 | break; | ||
| 1860 | } | ||
| 1861 | } | ||
| 1862 | } | ||
| 1863 | |||
| 1864 | if (!error_detected) { | ||
| 1865 | /* | ||
| 1866 | * Is sum of bits set in each node equal to the count | ||
| 1867 | * of total bits set. | ||
| 1868 | */ | ||
| 1869 | if (s->num_set != total_bits_set) { | ||
| 1870 | fprintf(stderr, "Number of bits set missmatch,\n" | ||
| 1871 | " s->num_set: 0x%lx total_bits_set: 0x%lx", | ||
| 1872 | s->num_set, total_bits_set); | ||
| 1873 | |||
| 1874 | error_detected = true; | ||
| 1875 | } | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | if (error_detected) { | ||
| 1879 | fputs(" dump_internal:\n", stderr); | ||
| 1880 | sparsebit_dump_internal(stderr, s, 4); | ||
| 1881 | abort(); | ||
| 1882 | } | ||
| 1883 | } | ||
| 1884 | |||
| 1885 | |||
| 1886 | #ifdef FUZZ | ||
| 1887 | /* A simple but effective fuzzing driver. Look for bugs with the help | ||
| 1888 | * of some invariants and of a trivial representation of sparsebit. | ||
| 1889 | * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let | ||
| 1890 | * afl-fuzz do the magic. :) | ||
| 1891 | */ | ||
| 1892 | |||
| 1893 | #include <stdlib.h> | ||
| 1894 | #include <assert.h> | ||
| 1895 | |||
| 1896 | struct range { | ||
| 1897 | sparsebit_idx_t first, last; | ||
| 1898 | bool set; | ||
| 1899 | }; | ||
| 1900 | |||
| 1901 | struct sparsebit *s; | ||
| 1902 | struct range ranges[1000]; | ||
| 1903 | int num_ranges; | ||
| 1904 | |||
| 1905 | static bool get_value(sparsebit_idx_t idx) | ||
| 1906 | { | ||
| 1907 | int i; | ||
| 1908 | |||
| 1909 | for (i = num_ranges; --i >= 0; ) | ||
| 1910 | if (ranges[i].first <= idx && idx <= ranges[i].last) | ||
| 1911 | return ranges[i].set; | ||
| 1912 | |||
| 1913 | return false; | ||
| 1914 | } | ||
| 1915 | |||
| 1916 | static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) | ||
| 1917 | { | ||
| 1918 | sparsebit_num_t num; | ||
| 1919 | sparsebit_idx_t next; | ||
| 1920 | |||
| 1921 | if (first < last) { | ||
| 1922 | num = last - first + 1; | ||
| 1923 | } else { | ||
| 1924 | num = first - last + 1; | ||
| 1925 | first = last; | ||
| 1926 | last = first + num - 1; | ||
| 1927 | } | ||
| 1928 | |||
| 1929 | switch (code) { | ||
| 1930 | case 0: | ||
| 1931 | sparsebit_set(s, first); | ||
| 1932 | assert(sparsebit_is_set(s, first)); | ||
| 1933 | assert(!sparsebit_is_clear(s, first)); | ||
| 1934 | assert(sparsebit_any_set(s)); | ||
| 1935 | assert(!sparsebit_all_clear(s)); | ||
| 1936 | if (get_value(first)) | ||
| 1937 | return; | ||
| 1938 | if (num_ranges == 1000) | ||
| 1939 | exit(0); | ||
| 1940 | ranges[num_ranges++] = (struct range) | ||
| 1941 | { .first = first, .last = first, .set = true }; | ||
| 1942 | break; | ||
| 1943 | case 1: | ||
| 1944 | sparsebit_clear(s, first); | ||
| 1945 | assert(!sparsebit_is_set(s, first)); | ||
| 1946 | assert(sparsebit_is_clear(s, first)); | ||
| 1947 | assert(sparsebit_any_clear(s)); | ||
| 1948 | assert(!sparsebit_all_set(s)); | ||
| 1949 | if (!get_value(first)) | ||
| 1950 | return; | ||
| 1951 | if (num_ranges == 1000) | ||
| 1952 | exit(0); | ||
| 1953 | ranges[num_ranges++] = (struct range) | ||
| 1954 | { .first = first, .last = first, .set = false }; | ||
| 1955 | break; | ||
| 1956 | case 2: | ||
| 1957 | assert(sparsebit_is_set(s, first) == get_value(first)); | ||
| 1958 | assert(sparsebit_is_clear(s, first) == !get_value(first)); | ||
| 1959 | break; | ||
| 1960 | case 3: | ||
| 1961 | if (sparsebit_any_set(s)) | ||
| 1962 | assert(get_value(sparsebit_first_set(s))); | ||
| 1963 | if (sparsebit_any_clear(s)) | ||
| 1964 | assert(!get_value(sparsebit_first_clear(s))); | ||
| 1965 | sparsebit_set_all(s); | ||
| 1966 | assert(!sparsebit_any_clear(s)); | ||
| 1967 | assert(sparsebit_all_set(s)); | ||
| 1968 | num_ranges = 0; | ||
| 1969 | ranges[num_ranges++] = (struct range) | ||
| 1970 | { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; | ||
| 1971 | break; | ||
| 1972 | case 4: | ||
| 1973 | if (sparsebit_any_set(s)) | ||
| 1974 | assert(get_value(sparsebit_first_set(s))); | ||
| 1975 | if (sparsebit_any_clear(s)) | ||
| 1976 | assert(!get_value(sparsebit_first_clear(s))); | ||
| 1977 | sparsebit_clear_all(s); | ||
| 1978 | assert(!sparsebit_any_set(s)); | ||
| 1979 | assert(sparsebit_all_clear(s)); | ||
| 1980 | num_ranges = 0; | ||
| 1981 | break; | ||
| 1982 | case 5: | ||
| 1983 | next = sparsebit_next_set(s, first); | ||
| 1984 | assert(next == 0 || next > first); | ||
| 1985 | assert(next == 0 || get_value(next)); | ||
| 1986 | break; | ||
| 1987 | case 6: | ||
| 1988 | next = sparsebit_next_clear(s, first); | ||
| 1989 | assert(next == 0 || next > first); | ||
| 1990 | assert(next == 0 || !get_value(next)); | ||
| 1991 | break; | ||
| 1992 | case 7: | ||
| 1993 | next = sparsebit_next_clear(s, first); | ||
| 1994 | if (sparsebit_is_set_num(s, first, num)) { | ||
| 1995 | assert(next == 0 || next > last); | ||
| 1996 | if (first) | ||
| 1997 | next = sparsebit_next_set(s, first - 1); | ||
| 1998 | else if (sparsebit_any_set(s)) | ||
| 1999 | next = sparsebit_first_set(s); | ||
| 2000 | else | ||
| 2001 | return; | ||
| 2002 | assert(next == first); | ||
| 2003 | } else { | ||
| 2004 | assert(sparsebit_is_clear(s, first) || next <= last); | ||
| 2005 | } | ||
| 2006 | break; | ||
| 2007 | case 8: | ||
| 2008 | next = sparsebit_next_set(s, first); | ||
| 2009 | if (sparsebit_is_clear_num(s, first, num)) { | ||
| 2010 | assert(next == 0 || next > last); | ||
| 2011 | if (first) | ||
| 2012 | next = sparsebit_next_clear(s, first - 1); | ||
| 2013 | else if (sparsebit_any_clear(s)) | ||
| 2014 | next = sparsebit_first_clear(s); | ||
| 2015 | else | ||
| 2016 | return; | ||
| 2017 | assert(next == first); | ||
| 2018 | } else { | ||
| 2019 | assert(sparsebit_is_set(s, first) || next <= last); | ||
| 2020 | } | ||
| 2021 | break; | ||
| 2022 | case 9: | ||
| 2023 | sparsebit_set_num(s, first, num); | ||
| 2024 | assert(sparsebit_is_set_num(s, first, num)); | ||
| 2025 | assert(!sparsebit_is_clear_num(s, first, num)); | ||
| 2026 | assert(sparsebit_any_set(s)); | ||
| 2027 | assert(!sparsebit_all_clear(s)); | ||
| 2028 | if (num_ranges == 1000) | ||
| 2029 | exit(0); | ||
| 2030 | ranges[num_ranges++] = (struct range) | ||
| 2031 | { .first = first, .last = last, .set = true }; | ||
| 2032 | break; | ||
| 2033 | case 10: | ||
| 2034 | sparsebit_clear_num(s, first, num); | ||
| 2035 | assert(!sparsebit_is_set_num(s, first, num)); | ||
| 2036 | assert(sparsebit_is_clear_num(s, first, num)); | ||
| 2037 | assert(sparsebit_any_clear(s)); | ||
| 2038 | assert(!sparsebit_all_set(s)); | ||
| 2039 | if (num_ranges == 1000) | ||
| 2040 | exit(0); | ||
| 2041 | ranges[num_ranges++] = (struct range) | ||
| 2042 | { .first = first, .last = last, .set = false }; | ||
| 2043 | break; | ||
| 2044 | case 11: | ||
| 2045 | sparsebit_validate_internal(s); | ||
| 2046 | break; | ||
| 2047 | default: | ||
| 2048 | break; | ||
| 2049 | } | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | unsigned char get8(void) | ||
| 2053 | { | ||
| 2054 | int ch; | ||
| 2055 | |||
| 2056 | ch = getchar(); | ||
| 2057 | if (ch == EOF) | ||
| 2058 | exit(0); | ||
| 2059 | return ch; | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | uint64_t get64(void) | ||
| 2063 | { | ||
| 2064 | uint64_t x; | ||
| 2065 | |||
| 2066 | x = get8(); | ||
| 2067 | x = (x << 8) | get8(); | ||
| 2068 | x = (x << 8) | get8(); | ||
| 2069 | x = (x << 8) | get8(); | ||
| 2070 | x = (x << 8) | get8(); | ||
| 2071 | x = (x << 8) | get8(); | ||
| 2072 | x = (x << 8) | get8(); | ||
| 2073 | return (x << 8) | get8(); | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | int main(void) | ||
| 2077 | { | ||
| 2078 | s = sparsebit_alloc(); | ||
| 2079 | for (;;) { | ||
| 2080 | uint8_t op = get8() & 0xf; | ||
| 2081 | uint64_t first = get64(); | ||
| 2082 | uint64_t last = get64(); | ||
| 2083 | |||
| 2084 | operate(op, first, last); | ||
| 2085 | } | ||
| 2086 | } | ||
| 2087 | #endif | ||
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c new file mode 100644 index 000000000000..2f17675f4275 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86.c | |||
| @@ -0,0 +1,700 @@ | |||
| 1 | /* | ||
| 2 | * tools/testing/selftests/kvm/lib/x86.c | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #define _GNU_SOURCE /* for program_invocation_name */ | ||
| 10 | |||
| 11 | #include "test_util.h" | ||
| 12 | #include "kvm_util.h" | ||
| 13 | #include "kvm_util_internal.h" | ||
| 14 | #include "x86.h" | ||
| 15 | |||
| 16 | /* Minimum physical address used for virtual translation tables. */ | ||
| 17 | #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 | ||
| 18 | |||
| 19 | /* Virtual translation table structure declarations */ | ||
| 20 | struct pageMapL4Entry { | ||
| 21 | uint64_t present:1; | ||
| 22 | uint64_t writable:1; | ||
| 23 | uint64_t user:1; | ||
| 24 | uint64_t write_through:1; | ||
| 25 | uint64_t cache_disable:1; | ||
| 26 | uint64_t accessed:1; | ||
| 27 | uint64_t ignored_06:1; | ||
| 28 | uint64_t page_size:1; | ||
| 29 | uint64_t ignored_11_08:4; | ||
| 30 | uint64_t address:40; | ||
| 31 | uint64_t ignored_62_52:11; | ||
| 32 | uint64_t execute_disable:1; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct pageDirectoryPointerEntry { | ||
| 36 | uint64_t present:1; | ||
| 37 | uint64_t writable:1; | ||
| 38 | uint64_t user:1; | ||
| 39 | uint64_t write_through:1; | ||
| 40 | uint64_t cache_disable:1; | ||
| 41 | uint64_t accessed:1; | ||
| 42 | uint64_t ignored_06:1; | ||
| 43 | uint64_t page_size:1; | ||
| 44 | uint64_t ignored_11_08:4; | ||
| 45 | uint64_t address:40; | ||
| 46 | uint64_t ignored_62_52:11; | ||
| 47 | uint64_t execute_disable:1; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct pageDirectoryEntry { | ||
| 51 | uint64_t present:1; | ||
| 52 | uint64_t writable:1; | ||
| 53 | uint64_t user:1; | ||
| 54 | uint64_t write_through:1; | ||
| 55 | uint64_t cache_disable:1; | ||
| 56 | uint64_t accessed:1; | ||
| 57 | uint64_t ignored_06:1; | ||
| 58 | uint64_t page_size:1; | ||
| 59 | uint64_t ignored_11_08:4; | ||
| 60 | uint64_t address:40; | ||
| 61 | uint64_t ignored_62_52:11; | ||
| 62 | uint64_t execute_disable:1; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct pageTableEntry { | ||
| 66 | uint64_t present:1; | ||
| 67 | uint64_t writable:1; | ||
| 68 | uint64_t user:1; | ||
| 69 | uint64_t write_through:1; | ||
| 70 | uint64_t cache_disable:1; | ||
| 71 | uint64_t accessed:1; | ||
| 72 | uint64_t dirty:1; | ||
| 73 | uint64_t reserved_07:1; | ||
| 74 | uint64_t global:1; | ||
| 75 | uint64_t ignored_11_09:3; | ||
| 76 | uint64_t address:40; | ||
| 77 | uint64_t ignored_62_52:11; | ||
| 78 | uint64_t execute_disable:1; | ||
| 79 | }; | ||
| 80 | |||
| 81 | /* Register Dump | ||
| 82 | * | ||
| 83 | * Input Args: | ||
| 84 | * indent - Left margin indent amount | ||
| 85 | * regs - register | ||
| 86 | * | ||
| 87 | * Output Args: | ||
| 88 | * stream - Output FILE stream | ||
| 89 | * | ||
| 90 | * Return: None | ||
| 91 | * | ||
| 92 | * Dumps the state of the registers given by regs, to the FILE stream | ||
| 93 | * given by steam. | ||
| 94 | */ | ||
| 95 | void regs_dump(FILE *stream, struct kvm_regs *regs, | ||
| 96 | uint8_t indent) | ||
| 97 | { | ||
| 98 | fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " | ||
| 99 | "rcx: 0x%.16llx rdx: 0x%.16llx\n", | ||
| 100 | indent, "", | ||
| 101 | regs->rax, regs->rbx, regs->rcx, regs->rdx); | ||
| 102 | fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " | ||
| 103 | "rsp: 0x%.16llx rbp: 0x%.16llx\n", | ||
| 104 | indent, "", | ||
| 105 | regs->rsi, regs->rdi, regs->rsp, regs->rbp); | ||
| 106 | fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " | ||
| 107 | "r10: 0x%.16llx r11: 0x%.16llx\n", | ||
| 108 | indent, "", | ||
| 109 | regs->r8, regs->r9, regs->r10, regs->r11); | ||
| 110 | fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " | ||
| 111 | "r14: 0x%.16llx r15: 0x%.16llx\n", | ||
| 112 | indent, "", | ||
| 113 | regs->r12, regs->r13, regs->r14, regs->r15); | ||
| 114 | fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", | ||
| 115 | indent, "", | ||
| 116 | regs->rip, regs->rflags); | ||
| 117 | } | ||
| 118 | |||
| 119 | /* Segment Dump | ||
| 120 | * | ||
| 121 | * Input Args: | ||
| 122 | * indent - Left margin indent amount | ||
| 123 | * segment - KVM segment | ||
| 124 | * | ||
| 125 | * Output Args: | ||
| 126 | * stream - Output FILE stream | ||
| 127 | * | ||
| 128 | * Return: None | ||
| 129 | * | ||
| 130 | * Dumps the state of the KVM segment given by segment, to the FILE stream | ||
| 131 | * given by steam. | ||
| 132 | */ | ||
| 133 | static void segment_dump(FILE *stream, struct kvm_segment *segment, | ||
| 134 | uint8_t indent) | ||
| 135 | { | ||
| 136 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " | ||
| 137 | "selector: 0x%.4x type: 0x%.2x\n", | ||
| 138 | indent, "", segment->base, segment->limit, | ||
| 139 | segment->selector, segment->type); | ||
| 140 | fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " | ||
| 141 | "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", | ||
| 142 | indent, "", segment->present, segment->dpl, | ||
| 143 | segment->db, segment->s, segment->l); | ||
| 144 | fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " | ||
| 145 | "unusable: 0x%.2x padding: 0x%.2x\n", | ||
| 146 | indent, "", segment->g, segment->avl, | ||
| 147 | segment->unusable, segment->padding); | ||
| 148 | } | ||
| 149 | |||
| 150 | /* dtable Dump | ||
| 151 | * | ||
| 152 | * Input Args: | ||
| 153 | * indent - Left margin indent amount | ||
| 154 | * dtable - KVM dtable | ||
| 155 | * | ||
| 156 | * Output Args: | ||
| 157 | * stream - Output FILE stream | ||
| 158 | * | ||
| 159 | * Return: None | ||
| 160 | * | ||
| 161 | * Dumps the state of the KVM dtable given by dtable, to the FILE stream | ||
| 162 | * given by steam. | ||
| 163 | */ | ||
| 164 | static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, | ||
| 165 | uint8_t indent) | ||
| 166 | { | ||
| 167 | fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " | ||
| 168 | "padding: 0x%.4x 0x%.4x 0x%.4x\n", | ||
| 169 | indent, "", dtable->base, dtable->limit, | ||
| 170 | dtable->padding[0], dtable->padding[1], dtable->padding[2]); | ||
| 171 | } | ||
| 172 | |||
| 173 | /* System Register Dump | ||
| 174 | * | ||
| 175 | * Input Args: | ||
| 176 | * indent - Left margin indent amount | ||
| 177 | * sregs - System registers | ||
| 178 | * | ||
| 179 | * Output Args: | ||
| 180 | * stream - Output FILE stream | ||
| 181 | * | ||
| 182 | * Return: None | ||
| 183 | * | ||
| 184 | * Dumps the state of the system registers given by sregs, to the FILE stream | ||
| 185 | * given by steam. | ||
| 186 | */ | ||
| 187 | void sregs_dump(FILE *stream, struct kvm_sregs *sregs, | ||
| 188 | uint8_t indent) | ||
| 189 | { | ||
| 190 | unsigned int i; | ||
| 191 | |||
| 192 | fprintf(stream, "%*scs:\n", indent, ""); | ||
| 193 | segment_dump(stream, &sregs->cs, indent + 2); | ||
| 194 | fprintf(stream, "%*sds:\n", indent, ""); | ||
| 195 | segment_dump(stream, &sregs->ds, indent + 2); | ||
| 196 | fprintf(stream, "%*ses:\n", indent, ""); | ||
| 197 | segment_dump(stream, &sregs->es, indent + 2); | ||
| 198 | fprintf(stream, "%*sfs:\n", indent, ""); | ||
| 199 | segment_dump(stream, &sregs->fs, indent + 2); | ||
| 200 | fprintf(stream, "%*sgs:\n", indent, ""); | ||
| 201 | segment_dump(stream, &sregs->gs, indent + 2); | ||
| 202 | fprintf(stream, "%*sss:\n", indent, ""); | ||
| 203 | segment_dump(stream, &sregs->ss, indent + 2); | ||
| 204 | fprintf(stream, "%*str:\n", indent, ""); | ||
| 205 | segment_dump(stream, &sregs->tr, indent + 2); | ||
| 206 | fprintf(stream, "%*sldt:\n", indent, ""); | ||
| 207 | segment_dump(stream, &sregs->ldt, indent + 2); | ||
| 208 | |||
| 209 | fprintf(stream, "%*sgdt:\n", indent, ""); | ||
| 210 | dtable_dump(stream, &sregs->gdt, indent + 2); | ||
| 211 | fprintf(stream, "%*sidt:\n", indent, ""); | ||
| 212 | dtable_dump(stream, &sregs->idt, indent + 2); | ||
| 213 | |||
| 214 | fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " | ||
| 215 | "cr3: 0x%.16llx cr4: 0x%.16llx\n", | ||
| 216 | indent, "", | ||
| 217 | sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); | ||
| 218 | fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " | ||
| 219 | "apic_base: 0x%.16llx\n", | ||
| 220 | indent, "", | ||
| 221 | sregs->cr8, sregs->efer, sregs->apic_base); | ||
| 222 | |||
| 223 | fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); | ||
| 224 | for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { | ||
| 225 | fprintf(stream, "%*s%.16llx\n", indent + 2, "", | ||
| 226 | sregs->interrupt_bitmap[i]); | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) | ||
| 231 | { | ||
| 232 | int rc; | ||
| 233 | |||
| 234 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
| 235 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
| 236 | |||
| 237 | /* If needed, create page map l4 table. */ | ||
| 238 | if (!vm->pgd_created) { | ||
| 239 | vm_paddr_t paddr = vm_phy_page_alloc(vm, | ||
| 240 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); | ||
| 241 | vm->pgd = paddr; | ||
| 242 | |||
| 243 | /* Set pointer to pgd tables in all the VCPUs that | ||
| 244 | * have already been created. Future VCPUs will have | ||
| 245 | * the value set as each one is created. | ||
| 246 | */ | ||
| 247 | for (struct vcpu *vcpu = vm->vcpu_head; vcpu; | ||
| 248 | vcpu = vcpu->next) { | ||
| 249 | struct kvm_sregs sregs; | ||
| 250 | |||
| 251 | /* Obtain the current system register settings */ | ||
| 252 | vcpu_sregs_get(vm, vcpu->id, &sregs); | ||
| 253 | |||
| 254 | /* Set and store the pointer to the start of the | ||
| 255 | * pgd tables. | ||
| 256 | */ | ||
| 257 | sregs.cr3 = vm->pgd; | ||
| 258 | vcpu_sregs_set(vm, vcpu->id, &sregs); | ||
| 259 | } | ||
| 260 | |||
| 261 | vm->pgd_created = true; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | /* VM Virtual Page Map | ||
| 266 | * | ||
| 267 | * Input Args: | ||
| 268 | * vm - Virtual Machine | ||
| 269 | * vaddr - VM Virtual Address | ||
| 270 | * paddr - VM Physical Address | ||
| 271 | * pgd_memslot - Memory region slot for new virtual translation tables | ||
| 272 | * | ||
| 273 | * Output Args: None | ||
| 274 | * | ||
| 275 | * Return: None | ||
| 276 | * | ||
| 277 | * Within the VM given by vm, creates a virtual translation for the page | ||
| 278 | * starting at vaddr to the page starting at paddr. | ||
| 279 | */ | ||
| 280 | void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, | ||
| 281 | uint32_t pgd_memslot) | ||
| 282 | { | ||
| 283 | uint16_t index[4]; | ||
| 284 | struct pageMapL4Entry *pml4e; | ||
| 285 | |||
| 286 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
| 287 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
| 288 | |||
| 289 | TEST_ASSERT((vaddr % vm->page_size) == 0, | ||
| 290 | "Virtual address not on page boundary,\n" | ||
| 291 | " vaddr: 0x%lx vm->page_size: 0x%x", | ||
| 292 | vaddr, vm->page_size); | ||
| 293 | TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, | ||
| 294 | (vaddr >> vm->page_shift)), | ||
| 295 | "Invalid virtual address, vaddr: 0x%lx", | ||
| 296 | vaddr); | ||
| 297 | TEST_ASSERT((paddr % vm->page_size) == 0, | ||
| 298 | "Physical address not on page boundary,\n" | ||
| 299 | " paddr: 0x%lx vm->page_size: 0x%x", | ||
| 300 | paddr, vm->page_size); | ||
| 301 | TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, | ||
| 302 | "Physical address beyond beyond maximum supported,\n" | ||
| 303 | " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", | ||
| 304 | paddr, vm->max_gfn, vm->page_size); | ||
| 305 | |||
| 306 | index[0] = (vaddr >> 12) & 0x1ffu; | ||
| 307 | index[1] = (vaddr >> 21) & 0x1ffu; | ||
| 308 | index[2] = (vaddr >> 30) & 0x1ffu; | ||
| 309 | index[3] = (vaddr >> 39) & 0x1ffu; | ||
| 310 | |||
| 311 | /* Allocate page directory pointer table if not present. */ | ||
| 312 | pml4e = addr_gpa2hva(vm, vm->pgd); | ||
| 313 | if (!pml4e[index[3]].present) { | ||
| 314 | pml4e[index[3]].address = vm_phy_page_alloc(vm, | ||
| 315 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
| 316 | >> vm->page_shift; | ||
| 317 | pml4e[index[3]].writable = true; | ||
| 318 | pml4e[index[3]].present = true; | ||
| 319 | } | ||
| 320 | |||
| 321 | /* Allocate page directory table if not present. */ | ||
| 322 | struct pageDirectoryPointerEntry *pdpe; | ||
| 323 | pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); | ||
| 324 | if (!pdpe[index[2]].present) { | ||
| 325 | pdpe[index[2]].address = vm_phy_page_alloc(vm, | ||
| 326 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
| 327 | >> vm->page_shift; | ||
| 328 | pdpe[index[2]].writable = true; | ||
| 329 | pdpe[index[2]].present = true; | ||
| 330 | } | ||
| 331 | |||
| 332 | /* Allocate page table if not present. */ | ||
| 333 | struct pageDirectoryEntry *pde; | ||
| 334 | pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); | ||
| 335 | if (!pde[index[1]].present) { | ||
| 336 | pde[index[1]].address = vm_phy_page_alloc(vm, | ||
| 337 | KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) | ||
| 338 | >> vm->page_shift; | ||
| 339 | pde[index[1]].writable = true; | ||
| 340 | pde[index[1]].present = true; | ||
| 341 | } | ||
| 342 | |||
| 343 | /* Fill in page table entry. */ | ||
| 344 | struct pageTableEntry *pte; | ||
| 345 | pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); | ||
| 346 | pte[index[0]].address = paddr >> vm->page_shift; | ||
| 347 | pte[index[0]].writable = true; | ||
| 348 | pte[index[0]].present = 1; | ||
| 349 | } | ||
| 350 | |||
| 351 | /* Virtual Translation Tables Dump | ||
| 352 | * | ||
| 353 | * Input Args: | ||
| 354 | * vm - Virtual Machine | ||
| 355 | * indent - Left margin indent amount | ||
| 356 | * | ||
| 357 | * Output Args: | ||
| 358 | * stream - Output FILE stream | ||
| 359 | * | ||
| 360 | * Return: None | ||
| 361 | * | ||
| 362 | * Dumps to the FILE stream given by stream, the contents of all the | ||
| 363 | * virtual translation tables for the VM given by vm. | ||
| 364 | */ | ||
| 365 | void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) | ||
| 366 | { | ||
| 367 | struct pageMapL4Entry *pml4e, *pml4e_start; | ||
| 368 | struct pageDirectoryPointerEntry *pdpe, *pdpe_start; | ||
| 369 | struct pageDirectoryEntry *pde, *pde_start; | ||
| 370 | struct pageTableEntry *pte, *pte_start; | ||
| 371 | |||
| 372 | if (!vm->pgd_created) | ||
| 373 | return; | ||
| 374 | |||
| 375 | fprintf(stream, "%*s " | ||
| 376 | " no\n", indent, ""); | ||
| 377 | fprintf(stream, "%*s index hvaddr gpaddr " | ||
| 378 | "addr w exec dirty\n", | ||
| 379 | indent, ""); | ||
| 380 | pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, | ||
| 381 | vm->pgd); | ||
| 382 | for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { | ||
| 383 | pml4e = &pml4e_start[n1]; | ||
| 384 | if (!pml4e->present) | ||
| 385 | continue; | ||
| 386 | fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " | ||
| 387 | " %u\n", | ||
| 388 | indent, "", | ||
| 389 | pml4e - pml4e_start, pml4e, | ||
| 390 | addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, | ||
| 391 | pml4e->writable, pml4e->execute_disable); | ||
| 392 | |||
| 393 | pdpe_start = addr_gpa2hva(vm, pml4e->address | ||
| 394 | * vm->page_size); | ||
| 395 | for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { | ||
| 396 | pdpe = &pdpe_start[n2]; | ||
| 397 | if (!pdpe->present) | ||
| 398 | continue; | ||
| 399 | fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " | ||
| 400 | "%u %u\n", | ||
| 401 | indent, "", | ||
| 402 | pdpe - pdpe_start, pdpe, | ||
| 403 | addr_hva2gpa(vm, pdpe), | ||
| 404 | (uint64_t) pdpe->address, pdpe->writable, | ||
| 405 | pdpe->execute_disable); | ||
| 406 | |||
| 407 | pde_start = addr_gpa2hva(vm, | ||
| 408 | pdpe->address * vm->page_size); | ||
| 409 | for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { | ||
| 410 | pde = &pde_start[n3]; | ||
| 411 | if (!pde->present) | ||
| 412 | continue; | ||
| 413 | fprintf(stream, "%*spde 0x%-3zx %p " | ||
| 414 | "0x%-12lx 0x%-10lx %u %u\n", | ||
| 415 | indent, "", pde - pde_start, pde, | ||
| 416 | addr_hva2gpa(vm, pde), | ||
| 417 | (uint64_t) pde->address, pde->writable, | ||
| 418 | pde->execute_disable); | ||
| 419 | |||
| 420 | pte_start = addr_gpa2hva(vm, | ||
| 421 | pde->address * vm->page_size); | ||
| 422 | for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { | ||
| 423 | pte = &pte_start[n4]; | ||
| 424 | if (!pte->present) | ||
| 425 | continue; | ||
| 426 | fprintf(stream, "%*spte 0x%-3zx %p " | ||
| 427 | "0x%-12lx 0x%-10lx %u %u " | ||
| 428 | " %u 0x%-10lx\n", | ||
| 429 | indent, "", | ||
| 430 | pte - pte_start, pte, | ||
| 431 | addr_hva2gpa(vm, pte), | ||
| 432 | (uint64_t) pte->address, | ||
| 433 | pte->writable, | ||
| 434 | pte->execute_disable, | ||
| 435 | pte->dirty, | ||
| 436 | ((uint64_t) n1 << 27) | ||
| 437 | | ((uint64_t) n2 << 18) | ||
| 438 | | ((uint64_t) n3 << 9) | ||
| 439 | | ((uint64_t) n4)); | ||
| 440 | } | ||
| 441 | } | ||
| 442 | } | ||
| 443 | } | ||
| 444 | } | ||
| 445 | |||
| 446 | /* Set Unusable Segment | ||
| 447 | * | ||
| 448 | * Input Args: None | ||
| 449 | * | ||
| 450 | * Output Args: | ||
| 451 | * segp - Pointer to segment register | ||
| 452 | * | ||
| 453 | * Return: None | ||
| 454 | * | ||
| 455 | * Sets the segment register pointed to by segp to an unusable state. | ||
| 456 | */ | ||
| 457 | static void kvm_seg_set_unusable(struct kvm_segment *segp) | ||
| 458 | { | ||
| 459 | memset(segp, 0, sizeof(*segp)); | ||
| 460 | segp->unusable = true; | ||
| 461 | } | ||
| 462 | |||
| 463 | /* Set Long Mode Flat Kernel Code Segment | ||
| 464 | * | ||
| 465 | * Input Args: | ||
| 466 | * selector - selector value | ||
| 467 | * | ||
| 468 | * Output Args: | ||
| 469 | * segp - Pointer to KVM segment | ||
| 470 | * | ||
| 471 | * Return: None | ||
| 472 | * | ||
| 473 | * Sets up the KVM segment pointed to by segp, to be a code segment | ||
| 474 | * with the selector value given by selector. | ||
| 475 | */ | ||
| 476 | static void kvm_seg_set_kernel_code_64bit(uint16_t selector, | ||
| 477 | struct kvm_segment *segp) | ||
| 478 | { | ||
| 479 | memset(segp, 0, sizeof(*segp)); | ||
| 480 | segp->selector = selector; | ||
| 481 | segp->limit = 0xFFFFFFFFu; | ||
| 482 | segp->s = 0x1; /* kTypeCodeData */ | ||
| 483 | segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed | ||
| 484 | * | kFlagCodeReadable | ||
| 485 | */ | ||
| 486 | segp->g = true; | ||
| 487 | segp->l = true; | ||
| 488 | segp->present = 1; | ||
| 489 | } | ||
| 490 | |||
| 491 | /* Set Long Mode Flat Kernel Data Segment | ||
| 492 | * | ||
| 493 | * Input Args: | ||
| 494 | * selector - selector value | ||
| 495 | * | ||
| 496 | * Output Args: | ||
| 497 | * segp - Pointer to KVM segment | ||
| 498 | * | ||
| 499 | * Return: None | ||
| 500 | * | ||
| 501 | * Sets up the KVM segment pointed to by segp, to be a data segment | ||
| 502 | * with the selector value given by selector. | ||
| 503 | */ | ||
| 504 | static void kvm_seg_set_kernel_data_64bit(uint16_t selector, | ||
| 505 | struct kvm_segment *segp) | ||
| 506 | { | ||
| 507 | memset(segp, 0, sizeof(*segp)); | ||
| 508 | segp->selector = selector; | ||
| 509 | segp->limit = 0xFFFFFFFFu; | ||
| 510 | segp->s = 0x1; /* kTypeCodeData */ | ||
| 511 | segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed | ||
| 512 | * | kFlagDataWritable | ||
| 513 | */ | ||
| 514 | segp->g = true; | ||
| 515 | segp->present = true; | ||
| 516 | } | ||
| 517 | |||
| 518 | /* Address Guest Virtual to Guest Physical | ||
| 519 | * | ||
| 520 | * Input Args: | ||
| 521 | * vm - Virtual Machine | ||
| 522 | * gpa - VM virtual address | ||
| 523 | * | ||
| 524 | * Output Args: None | ||
| 525 | * | ||
| 526 | * Return: | ||
| 527 | * Equivalent VM physical address | ||
| 528 | * | ||
| 529 | * Translates the VM virtual address given by gva to a VM physical | ||
| 530 | * address and then locates the memory region containing the VM | ||
| 531 | * physical address, within the VM given by vm. When found, the host | ||
| 532 | * virtual address providing the memory to the vm physical address is returned. | ||
| 533 | * A TEST_ASSERT failure occurs if no region containing translated | ||
| 534 | * VM virtual address exists. | ||
| 535 | */ | ||
| 536 | vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) | ||
| 537 | { | ||
| 538 | uint16_t index[4]; | ||
| 539 | struct pageMapL4Entry *pml4e; | ||
| 540 | struct pageDirectoryPointerEntry *pdpe; | ||
| 541 | struct pageDirectoryEntry *pde; | ||
| 542 | struct pageTableEntry *pte; | ||
| 543 | void *hva; | ||
| 544 | |||
| 545 | TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " | ||
| 546 | "unknown or unsupported guest mode, mode: 0x%x", vm->mode); | ||
| 547 | |||
| 548 | index[0] = (gva >> 12) & 0x1ffu; | ||
| 549 | index[1] = (gva >> 21) & 0x1ffu; | ||
| 550 | index[2] = (gva >> 30) & 0x1ffu; | ||
| 551 | index[3] = (gva >> 39) & 0x1ffu; | ||
| 552 | |||
| 553 | if (!vm->pgd_created) | ||
| 554 | goto unmapped_gva; | ||
| 555 | pml4e = addr_gpa2hva(vm, vm->pgd); | ||
| 556 | if (!pml4e[index[3]].present) | ||
| 557 | goto unmapped_gva; | ||
| 558 | |||
| 559 | pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); | ||
| 560 | if (!pdpe[index[2]].present) | ||
| 561 | goto unmapped_gva; | ||
| 562 | |||
| 563 | pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); | ||
| 564 | if (!pde[index[1]].present) | ||
| 565 | goto unmapped_gva; | ||
| 566 | |||
| 567 | pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); | ||
| 568 | if (!pte[index[0]].present) | ||
| 569 | goto unmapped_gva; | ||
| 570 | |||
| 571 | return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); | ||
| 572 | |||
| 573 | unmapped_gva: | ||
| 574 | TEST_ASSERT(false, "No mapping for vm virtual address, " | ||
| 575 | "gva: 0x%lx", gva); | ||
| 576 | } | ||
| 577 | |||
| 578 | void vcpu_setup(struct kvm_vm *vm, int vcpuid) | ||
| 579 | { | ||
| 580 | struct kvm_sregs sregs; | ||
| 581 | |||
| 582 | /* Set mode specific system register values. */ | ||
| 583 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
| 584 | |||
| 585 | switch (vm->mode) { | ||
| 586 | case VM_MODE_FLAT48PG: | ||
| 587 | sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; | ||
| 588 | sregs.cr4 |= X86_CR4_PAE; | ||
| 589 | sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); | ||
| 590 | |||
| 591 | kvm_seg_set_unusable(&sregs.ldt); | ||
| 592 | kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); | ||
| 593 | kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); | ||
| 594 | kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); | ||
| 595 | break; | ||
| 596 | |||
| 597 | default: | ||
| 598 | TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); | ||
| 599 | } | ||
| 600 | vcpu_sregs_set(vm, vcpuid, &sregs); | ||
| 601 | |||
| 602 | /* If virtual translation table have been setup, set system register | ||
| 603 | * to point to the tables. It's okay if they haven't been setup yet, | ||
| 604 | * in that the code that sets up the virtual translation tables, will | ||
| 605 | * go back through any VCPUs that have already been created and set | ||
| 606 | * their values. | ||
| 607 | */ | ||
| 608 | if (vm->pgd_created) { | ||
| 609 | struct kvm_sregs sregs; | ||
| 610 | |||
| 611 | vcpu_sregs_get(vm, vcpuid, &sregs); | ||
| 612 | |||
| 613 | sregs.cr3 = vm->pgd; | ||
| 614 | vcpu_sregs_set(vm, vcpuid, &sregs); | ||
| 615 | } | ||
| 616 | } | ||
| 617 | /* Adds a vCPU with reasonable defaults (i.e., a stack) | ||
| 618 | * | ||
| 619 | * Input Args: | ||
| 620 | * vcpuid - The id of the VCPU to add to the VM. | ||
| 621 | * guest_code - The vCPU's entry point | ||
| 622 | */ | ||
| 623 | void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) | ||
| 624 | { | ||
| 625 | struct kvm_mp_state mp_state; | ||
| 626 | struct kvm_regs regs; | ||
| 627 | vm_vaddr_t stack_vaddr; | ||
| 628 | stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), | ||
| 629 | DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); | ||
| 630 | |||
| 631 | /* Create VCPU */ | ||
| 632 | vm_vcpu_add(vm, vcpuid); | ||
| 633 | |||
| 634 | /* Setup guest general purpose registers */ | ||
| 635 | vcpu_regs_get(vm, vcpuid, ®s); | ||
| 636 | regs.rflags = regs.rflags | 0x2; | ||
| 637 | regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); | ||
| 638 | regs.rip = (unsigned long) guest_code; | ||
| 639 | vcpu_regs_set(vm, vcpuid, ®s); | ||
| 640 | |||
| 641 | /* Setup the MP state */ | ||
| 642 | mp_state.mp_state = 0; | ||
| 643 | vcpu_set_mp_state(vm, vcpuid, &mp_state); | ||
| 644 | } | ||
| 645 | |||
| 646 | /* VM VCPU CPUID Set | ||
| 647 | * | ||
| 648 | * Input Args: | ||
| 649 | * vm - Virtual Machine | ||
| 650 | * vcpuid - VCPU id | ||
| 651 | * cpuid - The CPUID values to set. | ||
| 652 | * | ||
| 653 | * Output Args: None | ||
| 654 | * | ||
| 655 | * Return: void | ||
| 656 | * | ||
| 657 | * Set the VCPU's CPUID. | ||
| 658 | */ | ||
| 659 | void vcpu_set_cpuid(struct kvm_vm *vm, | ||
| 660 | uint32_t vcpuid, struct kvm_cpuid2 *cpuid) | ||
| 661 | { | ||
| 662 | struct vcpu *vcpu = vcpu_find(vm, vcpuid); | ||
| 663 | int rc; | ||
| 664 | |||
| 665 | TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); | ||
| 666 | |||
| 667 | rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); | ||
| 668 | TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", | ||
| 669 | rc, errno); | ||
| 670 | |||
| 671 | } | ||
| 672 | /* Create a VM with reasonable defaults | ||
| 673 | * | ||
| 674 | * Input Args: | ||
| 675 | * vcpuid - The id of the single VCPU to add to the VM. | ||
| 676 | * guest_code - The vCPU's entry point | ||
| 677 | * | ||
| 678 | * Output Args: None | ||
| 679 | * | ||
| 680 | * Return: | ||
| 681 | * Pointer to opaque structure that describes the created VM. | ||
| 682 | */ | ||
| 683 | struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) | ||
| 684 | { | ||
| 685 | struct kvm_vm *vm; | ||
| 686 | |||
| 687 | /* Create VM */ | ||
| 688 | vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR); | ||
| 689 | |||
| 690 | /* Setup guest code */ | ||
| 691 | kvm_vm_elf_load(vm, program_invocation_name, 0, 0); | ||
| 692 | |||
| 693 | /* Setup IRQ Chip */ | ||
| 694 | vm_create_irqchip(vm); | ||
| 695 | |||
| 696 | /* Add the first vCPU. */ | ||
| 697 | vm_vcpu_add_default(vm, vcpuid, guest_code); | ||
| 698 | |||
| 699 | return vm; | ||
| 700 | } | ||
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c new file mode 100644 index 000000000000..090fd3f19352 --- /dev/null +++ b/tools/testing/selftests/kvm/set_sregs_test.c | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | /* | ||
| 2 | * KVM_SET_SREGS tests | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | * This is a regression test for the bug fixed by the following commit: | ||
| 9 | * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") | ||
| 10 | * | ||
| 11 | * That bug allowed a user-mode program that called the KVM_SET_SREGS | ||
| 12 | * ioctl to put a VCPU's local APIC into an invalid state. | ||
| 13 | * | ||
| 14 | */ | ||
| 15 | #define _GNU_SOURCE /* for program_invocation_short_name */ | ||
| 16 | #include <fcntl.h> | ||
| 17 | #include <stdio.h> | ||
| 18 | #include <stdlib.h> | ||
| 19 | #include <string.h> | ||
| 20 | #include <sys/ioctl.h> | ||
| 21 | |||
| 22 | #include "test_util.h" | ||
| 23 | |||
| 24 | #include "kvm_util.h" | ||
| 25 | #include "x86.h" | ||
| 26 | |||
| 27 | #define VCPU_ID 5 | ||
| 28 | |||
| 29 | int main(int argc, char *argv[]) | ||
| 30 | { | ||
| 31 | struct kvm_sregs sregs; | ||
| 32 | struct kvm_vm *vm; | ||
| 33 | int rc; | ||
| 34 | |||
| 35 | /* Tell stdout not to buffer its content */ | ||
| 36 | setbuf(stdout, NULL); | ||
| 37 | |||
| 38 | /* Create VM */ | ||
| 39 | vm = vm_create_default(VCPU_ID, NULL); | ||
| 40 | |||
| 41 | vcpu_sregs_get(vm, VCPU_ID, &sregs); | ||
| 42 | sregs.apic_base = 1 << 10; | ||
| 43 | rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); | ||
| 44 | TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", | ||
| 45 | sregs.apic_base); | ||
| 46 | sregs.apic_base = 1 << 11; | ||
| 47 | rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); | ||
| 48 | TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", | ||
| 49 | sregs.apic_base); | ||
| 50 | |||
| 51 | kvm_vm_free(vm); | ||
| 52 | |||
| 53 | return 0; | ||
| 54 | } | ||
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c new file mode 100644 index 000000000000..428e9473f5e2 --- /dev/null +++ b/tools/testing/selftests/kvm/sync_regs_test.c | |||
| @@ -0,0 +1,232 @@ | |||
| 1 | /* | ||
| 2 | * Test for x86 KVM_CAP_SYNC_REGS | ||
| 3 | * | ||
| 4 | * Copyright (C) 2018, Google LLC. | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
| 7 | * | ||
| 8 | * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, | ||
| 9 | * including requesting an invalid register set, updates to/from values | ||
| 10 | * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. | ||
| 11 | */ | ||
| 12 | |||
| 13 | #define _GNU_SOURCE /* for program_invocation_short_name */ | ||
| 14 | #include <fcntl.h> | ||
| 15 | #include <stdio.h> | ||
| 16 | #include <stdlib.h> | ||
| 17 | #include <string.h> | ||
| 18 | #include <sys/ioctl.h> | ||
| 19 | |||
| 20 | #include "test_util.h" | ||
| 21 | #include "kvm_util.h" | ||
| 22 | #include "x86.h" | ||
| 23 | |||
| 24 | #define VCPU_ID 5 | ||
| 25 | #define PORT_HOST_SYNC 0x1000 | ||
| 26 | |||
| 27 | static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) | ||
| 28 | { | ||
| 29 | __asm__ __volatile__("in %[port], %%al" | ||
| 30 | : | ||
| 31 | : [port]"d"(port), "D"(arg0), "S"(arg1) | ||
| 32 | : "rax"); | ||
| 33 | } | ||
| 34 | |||
| 35 | #define exit_to_l0(_port, _arg0, _arg1) \ | ||
| 36 | __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) | ||
| 37 | |||
| 38 | #define GUEST_ASSERT(_condition) do { \ | ||
| 39 | if (!(_condition)) \ | ||
| 40 | exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\ | ||
| 41 | } while (0) | ||
| 42 | |||
| 43 | void guest_code(void) | ||
| 44 | { | ||
| 45 | for (;;) { | ||
| 46 | exit_to_l0(PORT_HOST_SYNC, "hello", 0); | ||
| 47 | asm volatile ("inc %r11"); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) | ||
| 52 | { | ||
| 53 | #define REG_COMPARE(reg) \ | ||
| 54 | TEST_ASSERT(left->reg == right->reg, \ | ||
| 55 | "Register " #reg \ | ||
| 56 | " values did not match: 0x%llx, 0x%llx\n", \ | ||
| 57 | left->reg, right->reg) | ||
| 58 | REG_COMPARE(rax); | ||
| 59 | REG_COMPARE(rbx); | ||
| 60 | REG_COMPARE(rcx); | ||
| 61 | REG_COMPARE(rdx); | ||
| 62 | REG_COMPARE(rsi); | ||
| 63 | REG_COMPARE(rdi); | ||
| 64 | REG_COMPARE(rsp); | ||
| 65 | REG_COMPARE(rbp); | ||
| 66 | REG_COMPARE(r8); | ||
| 67 | REG_COMPARE(r9); | ||
| 68 | REG_COMPARE(r10); | ||
| 69 | REG_COMPARE(r11); | ||
| 70 | REG_COMPARE(r12); | ||
| 71 | REG_COMPARE(r13); | ||
| 72 | REG_COMPARE(r14); | ||
| 73 | REG_COMPARE(r15); | ||
| 74 | REG_COMPARE(rip); | ||
| 75 | REG_COMPARE(rflags); | ||
| 76 | #undef REG_COMPARE | ||
| 77 | } | ||
| 78 | |||
| 79 | static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) | ||
| 80 | { | ||
| 81 | } | ||
| 82 | |||
| 83 | static void compare_vcpu_events(struct kvm_vcpu_events *left, | ||
| 84 | struct kvm_vcpu_events *right) | ||
| 85 | { | ||
| 86 | } | ||
| 87 | |||
| 88 | int main(int argc, char *argv[]) | ||
| 89 | { | ||
| 90 | struct kvm_vm *vm; | ||
| 91 | struct kvm_run *run; | ||
| 92 | struct kvm_regs regs; | ||
| 93 | struct kvm_sregs sregs; | ||
| 94 | struct kvm_vcpu_events events; | ||
| 95 | int rv, cap; | ||
| 96 | |||
| 97 | /* Tell stdout not to buffer its content */ | ||
| 98 | setbuf(stdout, NULL); | ||
| 99 | |||
| 100 | cap = kvm_check_cap(KVM_CAP_SYNC_REGS); | ||
| 101 | TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, | ||
| 102 | "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", | ||
| 103 | cap, KVM_SYNC_X86_VALID_FIELDS); | ||
| 104 | |||
| 105 | /* Create VM */ | ||
| 106 | vm = vm_create_default(VCPU_ID, guest_code); | ||
| 107 | |||
| 108 | run = vcpu_state(vm, VCPU_ID); | ||
| 109 | |||
| 110 | /* Request reading invalid register set from VCPU. */ | ||
| 111 | run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; | ||
| 112 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 113 | TEST_ASSERT(rv < 0 && errno == EINVAL, | ||
| 114 | "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", | ||
| 115 | rv); | ||
| 116 | vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; | ||
| 117 | |||
| 118 | /* Request setting invalid register set into VCPU. */ | ||
| 119 | run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; | ||
| 120 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 121 | TEST_ASSERT(rv < 0 && errno == EINVAL, | ||
| 122 | "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", | ||
| 123 | rv); | ||
| 124 | vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; | ||
| 125 | |||
| 126 | /* Request and verify all valid register sets. */ | ||
| 127 | /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ | ||
| 128 | run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; | ||
| 129 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 130 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 131 | "Unexpected exit reason: %u (%s),\n", | ||
| 132 | run->exit_reason, | ||
| 133 | exit_reason_str(run->exit_reason)); | ||
| 134 | |||
| 135 | vcpu_regs_get(vm, VCPU_ID, ®s); | ||
| 136 | compare_regs(®s, &run->s.regs.regs); | ||
| 137 | |||
| 138 | vcpu_sregs_get(vm, VCPU_ID, &sregs); | ||
| 139 | compare_sregs(&sregs, &run->s.regs.sregs); | ||
| 140 | |||
| 141 | vcpu_events_get(vm, VCPU_ID, &events); | ||
| 142 | compare_vcpu_events(&events, &run->s.regs.events); | ||
| 143 | |||
| 144 | /* Set and verify various register values. */ | ||
| 145 | run->s.regs.regs.r11 = 0xBAD1DEA; | ||
| 146 | run->s.regs.sregs.apic_base = 1 << 11; | ||
| 147 | /* TODO run->s.regs.events.XYZ = ABC; */ | ||
| 148 | |||
| 149 | run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; | ||
| 150 | run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; | ||
| 151 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 152 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 153 | "Unexpected exit reason: %u (%s),\n", | ||
| 154 | run->exit_reason, | ||
| 155 | exit_reason_str(run->exit_reason)); | ||
| 156 | TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, | ||
| 157 | "r11 sync regs value incorrect 0x%llx.", | ||
| 158 | run->s.regs.regs.r11); | ||
| 159 | TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, | ||
| 160 | "apic_base sync regs value incorrect 0x%llx.", | ||
| 161 | run->s.regs.sregs.apic_base); | ||
| 162 | |||
| 163 | vcpu_regs_get(vm, VCPU_ID, ®s); | ||
| 164 | compare_regs(®s, &run->s.regs.regs); | ||
| 165 | |||
| 166 | vcpu_sregs_get(vm, VCPU_ID, &sregs); | ||
| 167 | compare_sregs(&sregs, &run->s.regs.sregs); | ||
| 168 | |||
| 169 | vcpu_events_get(vm, VCPU_ID, &events); | ||
| 170 | compare_vcpu_events(&events, &run->s.regs.events); | ||
| 171 | |||
| 172 | /* Clear kvm_dirty_regs bits, verify new s.regs values are | ||
| 173 | * overwritten with existing guest values. | ||
| 174 | */ | ||
| 175 | run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; | ||
| 176 | run->kvm_dirty_regs = 0; | ||
| 177 | run->s.regs.regs.r11 = 0xDEADBEEF; | ||
| 178 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 179 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 180 | "Unexpected exit reason: %u (%s),\n", | ||
| 181 | run->exit_reason, | ||
| 182 | exit_reason_str(run->exit_reason)); | ||
| 183 | TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, | ||
| 184 | "r11 sync regs value incorrect 0x%llx.", | ||
| 185 | run->s.regs.regs.r11); | ||
| 186 | |||
| 187 | /* Clear kvm_valid_regs bits and kvm_dirty_bits. | ||
| 188 | * Verify s.regs values are not overwritten with existing guest values | ||
| 189 | * and that guest values are not overwritten with kvm_sync_regs values. | ||
| 190 | */ | ||
| 191 | run->kvm_valid_regs = 0; | ||
| 192 | run->kvm_dirty_regs = 0; | ||
| 193 | run->s.regs.regs.r11 = 0xAAAA; | ||
| 194 | regs.r11 = 0xBAC0; | ||
| 195 | vcpu_regs_set(vm, VCPU_ID, ®s); | ||
| 196 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 197 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 198 | "Unexpected exit reason: %u (%s),\n", | ||
| 199 | run->exit_reason, | ||
| 200 | exit_reason_str(run->exit_reason)); | ||
| 201 | TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, | ||
| 202 | "r11 sync regs value incorrect 0x%llx.", | ||
| 203 | run->s.regs.regs.r11); | ||
| 204 | vcpu_regs_get(vm, VCPU_ID, ®s); | ||
| 205 | TEST_ASSERT(regs.r11 == 0xBAC0 + 1, | ||
| 206 | "r11 guest value incorrect 0x%llx.", | ||
| 207 | regs.r11); | ||
| 208 | |||
| 209 | /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten | ||
| 210 | * with existing guest values but that guest values are overwritten | ||
| 211 | * with kvm_sync_regs values. | ||
| 212 | */ | ||
| 213 | run->kvm_valid_regs = 0; | ||
| 214 | run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; | ||
| 215 | run->s.regs.regs.r11 = 0xBBBB; | ||
| 216 | rv = _vcpu_run(vm, VCPU_ID); | ||
| 217 | TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, | ||
| 218 | "Unexpected exit reason: %u (%s),\n", | ||
| 219 | run->exit_reason, | ||
| 220 | exit_reason_str(run->exit_reason)); | ||
| 221 | TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, | ||
| 222 | "r11 sync regs value incorrect 0x%llx.", | ||
| 223 | run->s.regs.regs.r11); | ||
| 224 | vcpu_regs_get(vm, VCPU_ID, ®s); | ||
| 225 | TEST_ASSERT(regs.r11 == 0xBBBB + 1, | ||
| 226 | "r11 guest value incorrect 0x%llx.", | ||
| 227 | regs.r11); | ||
| 228 | |||
| 229 | kvm_vm_free(vm); | ||
| 230 | |||
| 231 | return 0; | ||
| 232 | } | ||
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 8bc479fa37e6..efc84cbe8277 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c | |||
| @@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) | |||
| 178 | *vcpu_cpsr(vcpu) = cpsr; | 178 | *vcpu_cpsr(vcpu) = cpsr; |
| 179 | 179 | ||
| 180 | /* Note: These now point to the banked copies */ | 180 | /* Note: These now point to the banked copies */ |
| 181 | *vcpu_spsr(vcpu) = new_spsr_value; | 181 | vcpu_write_spsr(vcpu, new_spsr_value); |
| 182 | *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; | 182 | *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; |
| 183 | 183 | ||
| 184 | /* Branch to exception vector */ | 184 | /* Branch to exception vector */ |
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 282389eb204f..bd3d57f40f1b 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c | |||
| @@ -545,9 +545,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 545 | * The kernel may decide to run userspace after calling vcpu_put, so | 545 | * The kernel may decide to run userspace after calling vcpu_put, so |
| 546 | * we reset cntvoff to 0 to ensure a consistent read between user | 546 | * we reset cntvoff to 0 to ensure a consistent read between user |
| 547 | * accesses to the virtual counter and kernel access to the physical | 547 | * accesses to the virtual counter and kernel access to the physical |
| 548 | * counter. | 548 | * counter of non-VHE case. For VHE, the virtual counter uses a fixed |
| 549 | * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. | ||
| 549 | */ | 550 | */ |
| 550 | set_cntvoff(0); | 551 | if (!has_vhe()) |
| 552 | set_cntvoff(0); | ||
| 551 | } | 553 | } |
| 552 | 554 | ||
| 553 | /* | 555 | /* |
| @@ -856,11 +858,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) | |||
| 856 | return ret; | 858 | return ret; |
| 857 | 859 | ||
| 858 | no_vgic: | 860 | no_vgic: |
| 859 | preempt_disable(); | ||
| 860 | timer->enabled = 1; | 861 | timer->enabled = 1; |
| 861 | kvm_timer_vcpu_load(vcpu); | ||
| 862 | preempt_enable(); | ||
| 863 | |||
| 864 | return 0; | 862 | return 0; |
| 865 | } | 863 | } |
| 866 | 864 | ||
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 53572304843b..dba629c5f8ac 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c | |||
| @@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 362 | kvm_arm_set_running_vcpu(vcpu); | 362 | kvm_arm_set_running_vcpu(vcpu); |
| 363 | kvm_vgic_load(vcpu); | 363 | kvm_vgic_load(vcpu); |
| 364 | kvm_timer_vcpu_load(vcpu); | 364 | kvm_timer_vcpu_load(vcpu); |
| 365 | kvm_vcpu_load_sysregs(vcpu); | ||
| 365 | } | 366 | } |
| 366 | 367 | ||
| 367 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 368 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
| 368 | { | 369 | { |
| 370 | kvm_vcpu_put_sysregs(vcpu); | ||
| 369 | kvm_timer_vcpu_put(vcpu); | 371 | kvm_timer_vcpu_put(vcpu); |
| 370 | kvm_vgic_put(vcpu); | 372 | kvm_vgic_put(vcpu); |
| 371 | 373 | ||
| @@ -420,7 +422,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
| 420 | */ | 422 | */ |
| 421 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 423 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
| 422 | { | 424 | { |
| 423 | return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) | 425 | bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); |
| 426 | return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) | ||
| 424 | && !v->arch.power_off && !v->arch.pause); | 427 | && !v->arch.power_off && !v->arch.pause); |
| 425 | } | 428 | } |
| 426 | 429 | ||
| @@ -632,27 +635,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 632 | if (unlikely(!kvm_vcpu_initialized(vcpu))) | 635 | if (unlikely(!kvm_vcpu_initialized(vcpu))) |
| 633 | return -ENOEXEC; | 636 | return -ENOEXEC; |
| 634 | 637 | ||
| 635 | vcpu_load(vcpu); | ||
| 636 | |||
| 637 | ret = kvm_vcpu_first_run_init(vcpu); | 638 | ret = kvm_vcpu_first_run_init(vcpu); |
| 638 | if (ret) | 639 | if (ret) |
| 639 | goto out; | 640 | return ret; |
| 640 | 641 | ||
| 641 | if (run->exit_reason == KVM_EXIT_MMIO) { | 642 | if (run->exit_reason == KVM_EXIT_MMIO) { |
| 642 | ret = kvm_handle_mmio_return(vcpu, vcpu->run); | 643 | ret = kvm_handle_mmio_return(vcpu, vcpu->run); |
| 643 | if (ret) | 644 | if (ret) |
| 644 | goto out; | 645 | return ret; |
| 645 | if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { | 646 | if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) |
| 646 | ret = 0; | 647 | return 0; |
| 647 | goto out; | ||
| 648 | } | ||
| 649 | |||
| 650 | } | 648 | } |
| 651 | 649 | ||
| 652 | if (run->immediate_exit) { | 650 | if (run->immediate_exit) |
| 653 | ret = -EINTR; | 651 | return -EINTR; |
| 654 | goto out; | 652 | |
| 655 | } | 653 | vcpu_load(vcpu); |
| 656 | 654 | ||
| 657 | kvm_sigset_activate(vcpu); | 655 | kvm_sigset_activate(vcpu); |
| 658 | 656 | ||
| @@ -719,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 719 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || | 717 | if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || |
| 720 | kvm_request_pending(vcpu)) { | 718 | kvm_request_pending(vcpu)) { |
| 721 | vcpu->mode = OUTSIDE_GUEST_MODE; | 719 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| 720 | isb(); /* Ensure work in x_flush_hwstate is committed */ | ||
| 722 | kvm_pmu_sync_hwstate(vcpu); | 721 | kvm_pmu_sync_hwstate(vcpu); |
| 723 | if (static_branch_unlikely(&userspace_irqchip_in_use)) | 722 | if (static_branch_unlikely(&userspace_irqchip_in_use)) |
| 724 | kvm_timer_sync_hwstate(vcpu); | 723 | kvm_timer_sync_hwstate(vcpu); |
| @@ -735,13 +734,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 735 | */ | 734 | */ |
| 736 | trace_kvm_entry(*vcpu_pc(vcpu)); | 735 | trace_kvm_entry(*vcpu_pc(vcpu)); |
| 737 | guest_enter_irqoff(); | 736 | guest_enter_irqoff(); |
| 738 | if (has_vhe()) | ||
| 739 | kvm_arm_vhe_guest_enter(); | ||
| 740 | |||
| 741 | ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); | ||
| 742 | 737 | ||
| 743 | if (has_vhe()) | 738 | if (has_vhe()) { |
| 739 | kvm_arm_vhe_guest_enter(); | ||
| 740 | ret = kvm_vcpu_run_vhe(vcpu); | ||
| 744 | kvm_arm_vhe_guest_exit(); | 741 | kvm_arm_vhe_guest_exit(); |
| 742 | } else { | ||
| 743 | ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); | ||
| 744 | } | ||
| 745 | |||
| 745 | vcpu->mode = OUTSIDE_GUEST_MODE; | 746 | vcpu->mode = OUTSIDE_GUEST_MODE; |
| 746 | vcpu->stat.exits++; | 747 | vcpu->stat.exits++; |
| 747 | /* | 748 | /* |
| @@ -811,7 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
| 811 | 812 | ||
| 812 | kvm_sigset_deactivate(vcpu); | 813 | kvm_sigset_deactivate(vcpu); |
| 813 | 814 | ||
| 814 | out: | ||
| 815 | vcpu_put(vcpu); | 815 | vcpu_put(vcpu); |
| 816 | return ret; | 816 | return ret; |
| 817 | } | 817 | } |
| @@ -820,18 +820,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) | |||
| 820 | { | 820 | { |
| 821 | int bit_index; | 821 | int bit_index; |
| 822 | bool set; | 822 | bool set; |
| 823 | unsigned long *ptr; | 823 | unsigned long *hcr; |
| 824 | 824 | ||
| 825 | if (number == KVM_ARM_IRQ_CPU_IRQ) | 825 | if (number == KVM_ARM_IRQ_CPU_IRQ) |
| 826 | bit_index = __ffs(HCR_VI); | 826 | bit_index = __ffs(HCR_VI); |
| 827 | else /* KVM_ARM_IRQ_CPU_FIQ */ | 827 | else /* KVM_ARM_IRQ_CPU_FIQ */ |
| 828 | bit_index = __ffs(HCR_VF); | 828 | bit_index = __ffs(HCR_VF); |
| 829 | 829 | ||
| 830 | ptr = (unsigned long *)&vcpu->arch.irq_lines; | 830 | hcr = vcpu_hcr(vcpu); |
| 831 | if (level) | 831 | if (level) |
| 832 | set = test_and_set_bit(bit_index, ptr); | 832 | set = test_and_set_bit(bit_index, hcr); |
| 833 | else | 833 | else |
| 834 | set = test_and_clear_bit(bit_index, ptr); | 834 | set = test_and_clear_bit(bit_index, hcr); |
| 835 | 835 | ||
| 836 | /* | 836 | /* |
| 837 | * If we didn't change anything, no need to wake up or kick other CPUs | 837 | * If we didn't change anything, no need to wake up or kick other CPUs |
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index f24404b3c8df..77754a62eb0c 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c | |||
| @@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) | |||
| 27 | write_sysreg(cntvoff, cntvoff_el2); | 27 | write_sysreg(cntvoff, cntvoff_el2); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | /* | ||
| 31 | * Should only be called on non-VHE systems. | ||
| 32 | * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). | ||
| 33 | */ | ||
| 30 | void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) | 34 | void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) |
| 31 | { | 35 | { |
| 32 | /* | 36 | u64 val; |
| 33 | * We don't need to do this for VHE since the host kernel runs in EL2 | ||
| 34 | * with HCR_EL2.TGE ==1, which makes those bits have no impact. | ||
| 35 | */ | ||
| 36 | if (!has_vhe()) { | ||
| 37 | u64 val; | ||
| 38 | 37 | ||
| 39 | /* Allow physical timer/counter access for the host */ | 38 | /* Allow physical timer/counter access for the host */ |
| 40 | val = read_sysreg(cnthctl_el2); | 39 | val = read_sysreg(cnthctl_el2); |
| 41 | val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; | 40 | val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; |
| 42 | write_sysreg(val, cnthctl_el2); | 41 | write_sysreg(val, cnthctl_el2); |
| 43 | } | ||
| 44 | } | 42 | } |
| 45 | 43 | ||
| 44 | /* | ||
| 45 | * Should only be called on non-VHE systems. | ||
| 46 | * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). | ||
| 47 | */ | ||
| 46 | void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) | 48 | void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) |
| 47 | { | 49 | { |
| 48 | if (!has_vhe()) { | 50 | u64 val; |
| 49 | u64 val; | ||
| 50 | 51 | ||
| 51 | /* | 52 | /* |
| 52 | * Disallow physical timer access for the guest | 53 | * Disallow physical timer access for the guest |
| 53 | * Physical counter access is allowed | 54 | * Physical counter access is allowed |
| 54 | */ | 55 | */ |
| 55 | val = read_sysreg(cnthctl_el2); | 56 | val = read_sysreg(cnthctl_el2); |
| 56 | val &= ~CNTHCTL_EL1PCEN; | 57 | val &= ~CNTHCTL_EL1PCEN; |
| 57 | val |= CNTHCTL_EL1PCTEN; | 58 | val |= CNTHCTL_EL1PCTEN; |
| 58 | write_sysreg(val, cnthctl_el2); | 59 | write_sysreg(val, cnthctl_el2); |
| 59 | } | ||
| 60 | } | 60 | } |
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c deleted file mode 100644 index 4fe6e797e8b3..000000000000 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ /dev/null | |||
| @@ -1,159 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 2012-2015 - ARM Ltd | ||
| 3 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify | ||
| 6 | * it under the terms of the GNU General Public License version 2 as | ||
| 7 | * published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/compiler.h> | ||
| 19 | #include <linux/irqchip/arm-gic.h> | ||
| 20 | #include <linux/kvm_host.h> | ||
| 21 | |||
| 22 | #include <asm/kvm_emulate.h> | ||
| 23 | #include <asm/kvm_hyp.h> | ||
| 24 | #include <asm/kvm_mmu.h> | ||
| 25 | |||
| 26 | static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) | ||
| 27 | { | ||
| 28 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 29 | int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; | ||
| 30 | u32 elrsr0, elrsr1; | ||
| 31 | |||
| 32 | elrsr0 = readl_relaxed(base + GICH_ELRSR0); | ||
| 33 | if (unlikely(nr_lr > 32)) | ||
| 34 | elrsr1 = readl_relaxed(base + GICH_ELRSR1); | ||
| 35 | else | ||
| 36 | elrsr1 = 0; | ||
| 37 | |||
| 38 | cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; | ||
| 39 | } | ||
| 40 | |||
| 41 | static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) | ||
| 42 | { | ||
| 43 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 44 | int i; | ||
| 45 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 46 | |||
| 47 | for (i = 0; i < used_lrs; i++) { | ||
| 48 | if (cpu_if->vgic_elrsr & (1UL << i)) | ||
| 49 | cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; | ||
| 50 | else | ||
| 51 | cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); | ||
| 52 | |||
| 53 | writel_relaxed(0, base + GICH_LR0 + (i * 4)); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | /* vcpu is already in the HYP VA space */ | ||
| 58 | void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) | ||
| 59 | { | ||
| 60 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
| 61 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 62 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
| 63 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
| 64 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 65 | |||
| 66 | if (!base) | ||
| 67 | return; | ||
| 68 | |||
| 69 | if (used_lrs) { | ||
| 70 | cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); | ||
| 71 | |||
| 72 | save_elrsr(vcpu, base); | ||
| 73 | save_lrs(vcpu, base); | ||
| 74 | |||
| 75 | writel_relaxed(0, base + GICH_HCR); | ||
| 76 | } else { | ||
| 77 | cpu_if->vgic_elrsr = ~0UL; | ||
| 78 | cpu_if->vgic_apr = 0; | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | /* vcpu is already in the HYP VA space */ | ||
| 83 | void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) | ||
| 84 | { | ||
| 85 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
| 86 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 87 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
| 88 | void __iomem *base = kern_hyp_va(vgic->vctrl_base); | ||
| 89 | int i; | ||
| 90 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 91 | |||
| 92 | if (!base) | ||
| 93 | return; | ||
| 94 | |||
| 95 | if (used_lrs) { | ||
| 96 | writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); | ||
| 97 | writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); | ||
| 98 | for (i = 0; i < used_lrs; i++) { | ||
| 99 | writel_relaxed(cpu_if->vgic_lr[i], | ||
| 100 | base + GICH_LR0 + (i * 4)); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | #ifdef CONFIG_ARM64 | ||
| 106 | /* | ||
| 107 | * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the | ||
| 108 | * guest. | ||
| 109 | * | ||
| 110 | * @vcpu: the offending vcpu | ||
| 111 | * | ||
| 112 | * Returns: | ||
| 113 | * 1: GICV access successfully performed | ||
| 114 | * 0: Not a GICV access | ||
| 115 | * -1: Illegal GICV access | ||
| 116 | */ | ||
| 117 | int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) | ||
| 118 | { | ||
| 119 | struct kvm *kvm = kern_hyp_va(vcpu->kvm); | ||
| 120 | struct vgic_dist *vgic = &kvm->arch.vgic; | ||
| 121 | phys_addr_t fault_ipa; | ||
| 122 | void __iomem *addr; | ||
| 123 | int rd; | ||
| 124 | |||
| 125 | /* Build the full address */ | ||
| 126 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); | ||
| 127 | fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); | ||
| 128 | |||
| 129 | /* If not for GICV, move on */ | ||
| 130 | if (fault_ipa < vgic->vgic_cpu_base || | ||
| 131 | fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) | ||
| 132 | return 0; | ||
| 133 | |||
| 134 | /* Reject anything but a 32bit access */ | ||
| 135 | if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) | ||
| 136 | return -1; | ||
| 137 | |||
| 138 | /* Not aligned? Don't bother */ | ||
| 139 | if (fault_ipa & 3) | ||
| 140 | return -1; | ||
| 141 | |||
| 142 | rd = kvm_vcpu_dabt_get_rd(vcpu); | ||
| 143 | addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); | ||
| 144 | addr += fault_ipa - vgic->vgic_cpu_base; | ||
| 145 | |||
| 146 | if (kvm_vcpu_dabt_iswrite(vcpu)) { | ||
| 147 | u32 data = vcpu_data_guest_to_host(vcpu, | ||
| 148 | vcpu_get_reg(vcpu, rd), | ||
| 149 | sizeof(u32)); | ||
| 150 | writel_relaxed(data, addr); | ||
| 151 | } else { | ||
| 152 | u32 data = readl_relaxed(addr); | ||
| 153 | vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, | ||
| 154 | sizeof(u32))); | ||
| 155 | } | ||
| 156 | |||
| 157 | return 1; | ||
| 158 | } | ||
| 159 | #endif | ||
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index b89ce5432214..616e5a433ab0 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | 21 | ||
| 22 | #include <asm/kvm_emulate.h> | 22 | #include <asm/kvm_emulate.h> |
| 23 | #include <asm/kvm_hyp.h> | 23 | #include <asm/kvm_hyp.h> |
| 24 | #include <asm/kvm_mmu.h> | ||
| 24 | 25 | ||
| 25 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) | 26 | #define vtr_to_max_lr_idx(v) ((v) & 0xf) |
| 26 | #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) | 27 | #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) |
| @@ -208,89 +209,68 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) | |||
| 208 | { | 209 | { |
| 209 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 210 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
| 210 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | 211 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; |
| 211 | u64 val; | ||
| 212 | 212 | ||
| 213 | /* | 213 | /* |
| 214 | * Make sure stores to the GIC via the memory mapped interface | 214 | * Make sure stores to the GIC via the memory mapped interface |
| 215 | * are now visible to the system register interface. | 215 | * are now visible to the system register interface when reading the |
| 216 | * LRs, and when reading back the VMCR on non-VHE systems. | ||
| 216 | */ | 217 | */ |
| 217 | if (!cpu_if->vgic_sre) { | 218 | if (used_lrs || !has_vhe()) { |
| 218 | dsb(sy); | 219 | if (!cpu_if->vgic_sre) { |
| 219 | isb(); | 220 | dsb(sy); |
| 220 | cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); | 221 | isb(); |
| 222 | } | ||
| 221 | } | 223 | } |
| 222 | 224 | ||
| 223 | if (used_lrs) { | 225 | if (used_lrs) { |
| 224 | int i; | 226 | int i; |
| 225 | u32 nr_pre_bits; | 227 | u32 elrsr; |
| 226 | 228 | ||
| 227 | cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); | 229 | elrsr = read_gicreg(ICH_ELSR_EL2); |
| 228 | 230 | ||
| 229 | write_gicreg(0, ICH_HCR_EL2); | 231 | write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); |
| 230 | val = read_gicreg(ICH_VTR_EL2); | ||
| 231 | nr_pre_bits = vtr_to_nr_pre_bits(val); | ||
| 232 | 232 | ||
| 233 | for (i = 0; i < used_lrs; i++) { | 233 | for (i = 0; i < used_lrs; i++) { |
| 234 | if (cpu_if->vgic_elrsr & (1 << i)) | 234 | if (elrsr & (1 << i)) |
| 235 | cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; | 235 | cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; |
| 236 | else | 236 | else |
| 237 | cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); | 237 | cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); |
| 238 | 238 | ||
| 239 | __gic_v3_set_lr(0, i); | 239 | __gic_v3_set_lr(0, i); |
| 240 | } | 240 | } |
| 241 | } | ||
| 242 | } | ||
| 241 | 243 | ||
| 242 | switch (nr_pre_bits) { | 244 | void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) |
| 243 | case 7: | 245 | { |
| 244 | cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); | 246 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
| 245 | cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); | 247 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; |
| 246 | case 6: | 248 | int i; |
| 247 | cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); | ||
| 248 | default: | ||
| 249 | cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); | ||
| 250 | } | ||
| 251 | 249 | ||
| 252 | switch (nr_pre_bits) { | 250 | if (used_lrs) { |
| 253 | case 7: | 251 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); |
| 254 | cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); | ||
| 255 | cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); | ||
| 256 | case 6: | ||
| 257 | cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); | ||
| 258 | default: | ||
| 259 | cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); | ||
| 260 | } | ||
| 261 | } else { | ||
| 262 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) || | ||
| 263 | cpu_if->its_vpe.its_vm) | ||
| 264 | write_gicreg(0, ICH_HCR_EL2); | ||
| 265 | |||
| 266 | cpu_if->vgic_elrsr = 0xffff; | ||
| 267 | cpu_if->vgic_ap0r[0] = 0; | ||
| 268 | cpu_if->vgic_ap0r[1] = 0; | ||
| 269 | cpu_if->vgic_ap0r[2] = 0; | ||
| 270 | cpu_if->vgic_ap0r[3] = 0; | ||
| 271 | cpu_if->vgic_ap1r[0] = 0; | ||
| 272 | cpu_if->vgic_ap1r[1] = 0; | ||
| 273 | cpu_if->vgic_ap1r[2] = 0; | ||
| 274 | cpu_if->vgic_ap1r[3] = 0; | ||
| 275 | } | ||
| 276 | 252 | ||
| 277 | val = read_gicreg(ICC_SRE_EL2); | 253 | for (i = 0; i < used_lrs; i++) |
| 278 | write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); | 254 | __gic_v3_set_lr(cpu_if->vgic_lr[i], i); |
| 255 | } | ||
| 279 | 256 | ||
| 280 | if (!cpu_if->vgic_sre) { | 257 | /* |
| 281 | /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ | 258 | * Ensure that writes to the LRs, and on non-VHE systems ensure that |
| 282 | isb(); | 259 | * the write to the VMCR in __vgic_v3_activate_traps(), will have |
| 283 | write_gicreg(1, ICC_SRE_EL1); | 260 | * reached the (re)distributors. This ensure the guest will read the |
| 261 | * correct values from the memory-mapped interface. | ||
| 262 | */ | ||
| 263 | if (used_lrs || !has_vhe()) { | ||
| 264 | if (!cpu_if->vgic_sre) { | ||
| 265 | isb(); | ||
| 266 | dsb(sy); | ||
| 267 | } | ||
| 284 | } | 268 | } |
| 285 | } | 269 | } |
| 286 | 270 | ||
| 287 | void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | 271 | void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) |
| 288 | { | 272 | { |
| 289 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | 273 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
| 290 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 291 | u64 val; | ||
| 292 | u32 nr_pre_bits; | ||
| 293 | int i; | ||
| 294 | 274 | ||
| 295 | /* | 275 | /* |
| 296 | * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a | 276 | * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a |
| @@ -299,70 +279,135 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) | |||
| 299 | * consequences. So we must make sure that ICC_SRE_EL1 has | 279 | * consequences. So we must make sure that ICC_SRE_EL1 has |
| 300 | * been actually programmed with the value we want before | 280 | * been actually programmed with the value we want before |
| 301 | * starting to mess with the rest of the GIC, and VMCR_EL2 in | 281 | * starting to mess with the rest of the GIC, and VMCR_EL2 in |
| 302 | * particular. | 282 | * particular. This logic must be called before |
| 283 | * __vgic_v3_restore_state(). | ||
| 303 | */ | 284 | */ |
| 304 | if (!cpu_if->vgic_sre) { | 285 | if (!cpu_if->vgic_sre) { |
| 305 | write_gicreg(0, ICC_SRE_EL1); | 286 | write_gicreg(0, ICC_SRE_EL1); |
| 306 | isb(); | 287 | isb(); |
| 307 | write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); | 288 | write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); |
| 289 | |||
| 290 | |||
| 291 | if (has_vhe()) { | ||
| 292 | /* | ||
| 293 | * Ensure that the write to the VMCR will have reached | ||
| 294 | * the (re)distributors. This ensure the guest will | ||
| 295 | * read the correct values from the memory-mapped | ||
| 296 | * interface. | ||
| 297 | */ | ||
| 298 | isb(); | ||
| 299 | dsb(sy); | ||
| 300 | } | ||
| 308 | } | 301 | } |
| 309 | 302 | ||
| 310 | val = read_gicreg(ICH_VTR_EL2); | 303 | /* |
| 311 | nr_pre_bits = vtr_to_nr_pre_bits(val); | 304 | * Prevent the guest from touching the GIC system registers if |
| 305 | * SRE isn't enabled for GICv3 emulation. | ||
| 306 | */ | ||
| 307 | write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, | ||
| 308 | ICC_SRE_EL2); | ||
| 312 | 309 | ||
| 313 | if (used_lrs) { | 310 | /* |
| 311 | * If we need to trap system registers, we must write | ||
| 312 | * ICH_HCR_EL2 anyway, even if no interrupts are being | ||
| 313 | * injected, | ||
| 314 | */ | ||
| 315 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) || | ||
| 316 | cpu_if->its_vpe.its_vm) | ||
| 314 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); | 317 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); |
| 318 | } | ||
| 315 | 319 | ||
| 316 | switch (nr_pre_bits) { | 320 | void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) |
| 317 | case 7: | 321 | { |
| 318 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); | 322 | struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; |
| 319 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); | 323 | u64 val; |
| 320 | case 6: | ||
| 321 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); | ||
| 322 | default: | ||
| 323 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); | ||
| 324 | } | ||
| 325 | |||
| 326 | switch (nr_pre_bits) { | ||
| 327 | case 7: | ||
| 328 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); | ||
| 329 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); | ||
| 330 | case 6: | ||
| 331 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); | ||
| 332 | default: | ||
| 333 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); | ||
| 334 | } | ||
| 335 | 324 | ||
| 336 | for (i = 0; i < used_lrs; i++) | 325 | if (!cpu_if->vgic_sre) { |
| 337 | __gic_v3_set_lr(cpu_if->vgic_lr[i], i); | 326 | cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); |
| 338 | } else { | ||
| 339 | /* | ||
| 340 | * If we need to trap system registers, we must write | ||
| 341 | * ICH_HCR_EL2 anyway, even if no interrupts are being | ||
| 342 | * injected. Same thing if GICv4 is used, as VLPI | ||
| 343 | * delivery is gated by ICH_HCR_EL2.En. | ||
| 344 | */ | ||
| 345 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) || | ||
| 346 | cpu_if->its_vpe.its_vm) | ||
| 347 | write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); | ||
| 348 | } | 327 | } |
| 349 | 328 | ||
| 350 | /* | 329 | val = read_gicreg(ICC_SRE_EL2); |
| 351 | * Ensures that the above will have reached the | 330 | write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); |
| 352 | * (re)distributors. This ensure the guest will read the | 331 | |
| 353 | * correct values from the memory-mapped interface. | ||
| 354 | */ | ||
| 355 | if (!cpu_if->vgic_sre) { | 332 | if (!cpu_if->vgic_sre) { |
| 333 | /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ | ||
| 356 | isb(); | 334 | isb(); |
| 357 | dsb(sy); | 335 | write_gicreg(1, ICC_SRE_EL1); |
| 358 | } | 336 | } |
| 359 | 337 | ||
| 360 | /* | 338 | /* |
| 361 | * Prevent the guest from touching the GIC system registers if | 339 | * If we were trapping system registers, we enabled the VGIC even if |
| 362 | * SRE isn't enabled for GICv3 emulation. | 340 | * no interrupts were being injected, and we disable it again here. |
| 363 | */ | 341 | */ |
| 364 | write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, | 342 | if (static_branch_unlikely(&vgic_v3_cpuif_trap) || |
| 365 | ICC_SRE_EL2); | 343 | cpu_if->its_vpe.its_vm) |
| 344 | write_gicreg(0, ICH_HCR_EL2); | ||
| 345 | } | ||
| 346 | |||
| 347 | void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) | ||
| 348 | { | ||
| 349 | struct vgic_v3_cpu_if *cpu_if; | ||
| 350 | u64 val; | ||
| 351 | u32 nr_pre_bits; | ||
| 352 | |||
| 353 | vcpu = kern_hyp_va(vcpu); | ||
| 354 | cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | ||
| 355 | |||
| 356 | val = read_gicreg(ICH_VTR_EL2); | ||
| 357 | nr_pre_bits = vtr_to_nr_pre_bits(val); | ||
| 358 | |||
| 359 | switch (nr_pre_bits) { | ||
| 360 | case 7: | ||
| 361 | cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); | ||
| 362 | cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); | ||
| 363 | case 6: | ||
| 364 | cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); | ||
| 365 | default: | ||
| 366 | cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); | ||
| 367 | } | ||
| 368 | |||
| 369 | switch (nr_pre_bits) { | ||
| 370 | case 7: | ||
| 371 | cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); | ||
| 372 | cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); | ||
| 373 | case 6: | ||
| 374 | cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); | ||
| 375 | default: | ||
| 376 | cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) | ||
| 381 | { | ||
| 382 | struct vgic_v3_cpu_if *cpu_if; | ||
| 383 | u64 val; | ||
| 384 | u32 nr_pre_bits; | ||
| 385 | |||
| 386 | vcpu = kern_hyp_va(vcpu); | ||
| 387 | cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; | ||
| 388 | |||
| 389 | val = read_gicreg(ICH_VTR_EL2); | ||
| 390 | nr_pre_bits = vtr_to_nr_pre_bits(val); | ||
| 391 | |||
| 392 | switch (nr_pre_bits) { | ||
| 393 | case 7: | ||
| 394 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); | ||
| 395 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); | ||
| 396 | case 6: | ||
| 397 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); | ||
| 398 | default: | ||
| 399 | __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); | ||
| 400 | } | ||
| 401 | |||
| 402 | switch (nr_pre_bits) { | ||
| 403 | case 7: | ||
| 404 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); | ||
| 405 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); | ||
| 406 | case 6: | ||
| 407 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); | ||
| 408 | default: | ||
| 409 | __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); | ||
| 410 | } | ||
| 366 | } | 411 | } |
| 367 | 412 | ||
| 368 | void __hyp_text __vgic_v3_init_lrs(void) | 413 | void __hyp_text __vgic_v3_init_lrs(void) |
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b960acdd0c05..7f6a944db23d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c | |||
| @@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start; | |||
| 43 | static unsigned long hyp_idmap_end; | 43 | static unsigned long hyp_idmap_end; |
| 44 | static phys_addr_t hyp_idmap_vector; | 44 | static phys_addr_t hyp_idmap_vector; |
| 45 | 45 | ||
| 46 | static unsigned long io_map_base; | ||
| 47 | |||
| 46 | #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) | 48 | #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) |
| 47 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) | 49 | #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) |
| 48 | 50 | ||
| @@ -479,7 +481,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) | |||
| 479 | clear_hyp_pgd_entry(pgd); | 481 | clear_hyp_pgd_entry(pgd); |
| 480 | } | 482 | } |
| 481 | 483 | ||
| 482 | static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) | 484 | static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) |
| 485 | { | ||
| 486 | return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); | ||
| 487 | } | ||
| 488 | |||
| 489 | static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, | ||
| 490 | phys_addr_t start, u64 size) | ||
| 483 | { | 491 | { |
| 484 | pgd_t *pgd; | 492 | pgd_t *pgd; |
| 485 | phys_addr_t addr = start, end = start + size; | 493 | phys_addr_t addr = start, end = start + size; |
| @@ -489,7 +497,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) | |||
| 489 | * We don't unmap anything from HYP, except at the hyp tear down. | 497 | * We don't unmap anything from HYP, except at the hyp tear down. |
| 490 | * Hence, we don't have to invalidate the TLBs here. | 498 | * Hence, we don't have to invalidate the TLBs here. |
| 491 | */ | 499 | */ |
| 492 | pgd = pgdp + pgd_index(addr); | 500 | pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); |
| 493 | do { | 501 | do { |
| 494 | next = pgd_addr_end(addr, end); | 502 | next = pgd_addr_end(addr, end); |
| 495 | if (!pgd_none(*pgd)) | 503 | if (!pgd_none(*pgd)) |
| @@ -497,32 +505,50 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) | |||
| 497 | } while (pgd++, addr = next, addr != end); | 505 | } while (pgd++, addr = next, addr != end); |
| 498 | } | 506 | } |
| 499 | 507 | ||
| 508 | static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) | ||
| 509 | { | ||
| 510 | __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); | ||
| 511 | } | ||
| 512 | |||
| 513 | static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) | ||
| 514 | { | ||
| 515 | __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); | ||
| 516 | } | ||
| 517 | |||
| 500 | /** | 518 | /** |
| 501 | * free_hyp_pgds - free Hyp-mode page tables | 519 | * free_hyp_pgds - free Hyp-mode page tables |
| 502 | * | 520 | * |
| 503 | * Assumes hyp_pgd is a page table used strictly in Hyp-mode and | 521 | * Assumes hyp_pgd is a page table used strictly in Hyp-mode and |
| 504 | * therefore contains either mappings in the kernel memory area (above | 522 | * therefore contains either mappings in the kernel memory area (above |
| 505 | * PAGE_OFFSET), or device mappings in the vmalloc range (from | 523 | * PAGE_OFFSET), or device mappings in the idmap range. |
| 506 | * VMALLOC_START to VMALLOC_END). | ||
| 507 | * | 524 | * |
| 508 | * boot_hyp_pgd should only map two pages for the init code. | 525 | * boot_hyp_pgd should only map the idmap range, and is only used in |
| 526 | * the extended idmap case. | ||
| 509 | */ | 527 | */ |
| 510 | void free_hyp_pgds(void) | 528 | void free_hyp_pgds(void) |
| 511 | { | 529 | { |
| 530 | pgd_t *id_pgd; | ||
| 531 | |||
| 512 | mutex_lock(&kvm_hyp_pgd_mutex); | 532 | mutex_lock(&kvm_hyp_pgd_mutex); |
| 513 | 533 | ||
| 534 | id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; | ||
| 535 | |||
| 536 | if (id_pgd) { | ||
| 537 | /* In case we never called hyp_mmu_init() */ | ||
| 538 | if (!io_map_base) | ||
| 539 | io_map_base = hyp_idmap_start; | ||
| 540 | unmap_hyp_idmap_range(id_pgd, io_map_base, | ||
| 541 | hyp_idmap_start + PAGE_SIZE - io_map_base); | ||
| 542 | } | ||
| 543 | |||
| 514 | if (boot_hyp_pgd) { | 544 | if (boot_hyp_pgd) { |
| 515 | unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); | ||
| 516 | free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); | 545 | free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); |
| 517 | boot_hyp_pgd = NULL; | 546 | boot_hyp_pgd = NULL; |
| 518 | } | 547 | } |
| 519 | 548 | ||
| 520 | if (hyp_pgd) { | 549 | if (hyp_pgd) { |
| 521 | unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); | ||
| 522 | unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), | 550 | unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), |
| 523 | (uintptr_t)high_memory - PAGE_OFFSET); | 551 | (uintptr_t)high_memory - PAGE_OFFSET); |
| 524 | unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), | ||
| 525 | VMALLOC_END - VMALLOC_START); | ||
| 526 | 552 | ||
| 527 | free_pages((unsigned long)hyp_pgd, hyp_pgd_order); | 553 | free_pages((unsigned long)hyp_pgd, hyp_pgd_order); |
| 528 | hyp_pgd = NULL; | 554 | hyp_pgd = NULL; |
| @@ -634,7 +660,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, | |||
| 634 | addr = start & PAGE_MASK; | 660 | addr = start & PAGE_MASK; |
| 635 | end = PAGE_ALIGN(end); | 661 | end = PAGE_ALIGN(end); |
| 636 | do { | 662 | do { |
| 637 | pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); | 663 | pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); |
| 638 | 664 | ||
| 639 | if (pgd_none(*pgd)) { | 665 | if (pgd_none(*pgd)) { |
| 640 | pud = pud_alloc_one(NULL, addr); | 666 | pud = pud_alloc_one(NULL, addr); |
| @@ -708,29 +734,115 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) | |||
| 708 | return 0; | 734 | return 0; |
| 709 | } | 735 | } |
| 710 | 736 | ||
| 737 | static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, | ||
| 738 | unsigned long *haddr, pgprot_t prot) | ||
| 739 | { | ||
| 740 | pgd_t *pgd = hyp_pgd; | ||
| 741 | unsigned long base; | ||
| 742 | int ret = 0; | ||
| 743 | |||
| 744 | mutex_lock(&kvm_hyp_pgd_mutex); | ||
| 745 | |||
| 746 | /* | ||
| 747 | * This assumes that we we have enough space below the idmap | ||
| 748 | * page to allocate our VAs. If not, the check below will | ||
| 749 | * kick. A potential alternative would be to detect that | ||
| 750 | * overflow and switch to an allocation above the idmap. | ||
| 751 | * | ||
| 752 | * The allocated size is always a multiple of PAGE_SIZE. | ||
| 753 | */ | ||
| 754 | size = PAGE_ALIGN(size + offset_in_page(phys_addr)); | ||
| 755 | base = io_map_base - size; | ||
| 756 | |||
| 757 | /* | ||
| 758 | * Verify that BIT(VA_BITS - 1) hasn't been flipped by | ||
| 759 | * allocating the new area, as it would indicate we've | ||
| 760 | * overflowed the idmap/IO address range. | ||
| 761 | */ | ||
| 762 | if ((base ^ io_map_base) & BIT(VA_BITS - 1)) | ||
| 763 | ret = -ENOMEM; | ||
| 764 | else | ||
| 765 | io_map_base = base; | ||
| 766 | |||
| 767 | mutex_unlock(&kvm_hyp_pgd_mutex); | ||
| 768 | |||
| 769 | if (ret) | ||
| 770 | goto out; | ||
| 771 | |||
| 772 | if (__kvm_cpu_uses_extended_idmap()) | ||
| 773 | pgd = boot_hyp_pgd; | ||
| 774 | |||
| 775 | ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), | ||
| 776 | base, base + size, | ||
| 777 | __phys_to_pfn(phys_addr), prot); | ||
| 778 | if (ret) | ||
| 779 | goto out; | ||
| 780 | |||
| 781 | *haddr = base + offset_in_page(phys_addr); | ||
| 782 | |||
| 783 | out: | ||
| 784 | return ret; | ||
| 785 | } | ||
| 786 | |||
| 711 | /** | 787 | /** |
| 712 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode | 788 | * create_hyp_io_mappings - Map IO into both kernel and HYP |
| 713 | * @from: The kernel start VA of the range | ||
| 714 | * @to: The kernel end VA of the range (exclusive) | ||
| 715 | * @phys_addr: The physical start address which gets mapped | 789 | * @phys_addr: The physical start address which gets mapped |
| 716 | * | 790 | * @size: Size of the region being mapped |
| 717 | * The resulting HYP VA is the same as the kernel VA, modulo | 791 | * @kaddr: Kernel VA for this mapping |
| 718 | * HYP_PAGE_OFFSET. | 792 | * @haddr: HYP VA for this mapping |
| 719 | */ | 793 | */ |
| 720 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) | 794 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, |
| 795 | void __iomem **kaddr, | ||
| 796 | void __iomem **haddr) | ||
| 721 | { | 797 | { |
| 722 | unsigned long start = kern_hyp_va((unsigned long)from); | 798 | unsigned long addr; |
| 723 | unsigned long end = kern_hyp_va((unsigned long)to); | 799 | int ret; |
| 724 | 800 | ||
| 725 | if (is_kernel_in_hyp_mode()) | 801 | *kaddr = ioremap(phys_addr, size); |
| 802 | if (!*kaddr) | ||
| 803 | return -ENOMEM; | ||
| 804 | |||
| 805 | if (is_kernel_in_hyp_mode()) { | ||
| 806 | *haddr = *kaddr; | ||
| 726 | return 0; | 807 | return 0; |
| 808 | } | ||
| 727 | 809 | ||
| 728 | /* Check for a valid kernel IO mapping */ | 810 | ret = __create_hyp_private_mapping(phys_addr, size, |
| 729 | if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) | 811 | &addr, PAGE_HYP_DEVICE); |
| 730 | return -EINVAL; | 812 | if (ret) { |
| 813 | iounmap(*kaddr); | ||
| 814 | *kaddr = NULL; | ||
| 815 | *haddr = NULL; | ||
| 816 | return ret; | ||
| 817 | } | ||
| 818 | |||
| 819 | *haddr = (void __iomem *)addr; | ||
| 820 | return 0; | ||
| 821 | } | ||
| 731 | 822 | ||
| 732 | return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, | 823 | /** |
| 733 | __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); | 824 | * create_hyp_exec_mappings - Map an executable range into HYP |
| 825 | * @phys_addr: The physical start address which gets mapped | ||
| 826 | * @size: Size of the region being mapped | ||
| 827 | * @haddr: HYP VA for this mapping | ||
| 828 | */ | ||
| 829 | int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, | ||
| 830 | void **haddr) | ||
| 831 | { | ||
| 832 | unsigned long addr; | ||
| 833 | int ret; | ||
| 834 | |||
| 835 | BUG_ON(is_kernel_in_hyp_mode()); | ||
| 836 | |||
| 837 | ret = __create_hyp_private_mapping(phys_addr, size, | ||
| 838 | &addr, PAGE_HYP_EXEC); | ||
| 839 | if (ret) { | ||
| 840 | *haddr = NULL; | ||
| 841 | return ret; | ||
| 842 | } | ||
| 843 | |||
| 844 | *haddr = (void *)addr; | ||
| 845 | return 0; | ||
| 734 | } | 846 | } |
| 735 | 847 | ||
| 736 | /** | 848 | /** |
| @@ -1801,7 +1913,9 @@ int kvm_mmu_init(void) | |||
| 1801 | int err; | 1913 | int err; |
| 1802 | 1914 | ||
| 1803 | hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); | 1915 | hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); |
| 1916 | hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); | ||
| 1804 | hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); | 1917 | hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); |
| 1918 | hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); | ||
| 1805 | hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); | 1919 | hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); |
| 1806 | 1920 | ||
| 1807 | /* | 1921 | /* |
| @@ -1812,10 +1926,11 @@ int kvm_mmu_init(void) | |||
| 1812 | 1926 | ||
| 1813 | kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); | 1927 | kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); |
| 1814 | kvm_debug("HYP VA range: %lx:%lx\n", | 1928 | kvm_debug("HYP VA range: %lx:%lx\n", |
| 1815 | kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); | 1929 | kern_hyp_va(PAGE_OFFSET), |
| 1930 | kern_hyp_va((unsigned long)high_memory - 1)); | ||
| 1816 | 1931 | ||
| 1817 | if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && | 1932 | if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && |
| 1818 | hyp_idmap_start < kern_hyp_va(~0UL) && | 1933 | hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && |
| 1819 | hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { | 1934 | hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { |
| 1820 | /* | 1935 | /* |
| 1821 | * The idmap page is intersecting with the VA space, | 1936 | * The idmap page is intersecting with the VA space, |
| @@ -1859,6 +1974,7 @@ int kvm_mmu_init(void) | |||
| 1859 | goto out; | 1974 | goto out; |
| 1860 | } | 1975 | } |
| 1861 | 1976 | ||
| 1977 | io_map_base = hyp_idmap_start; | ||
| 1862 | return 0; | 1978 | return 0; |
| 1863 | out: | 1979 | out: |
| 1864 | free_hyp_pgds(); | 1980 | free_hyp_pgds(); |
| @@ -2035,7 +2151,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | |||
| 2035 | */ | 2151 | */ |
| 2036 | void kvm_set_way_flush(struct kvm_vcpu *vcpu) | 2152 | void kvm_set_way_flush(struct kvm_vcpu *vcpu) |
| 2037 | { | 2153 | { |
| 2038 | unsigned long hcr = vcpu_get_hcr(vcpu); | 2154 | unsigned long hcr = *vcpu_hcr(vcpu); |
| 2039 | 2155 | ||
| 2040 | /* | 2156 | /* |
| 2041 | * If this is the first time we do a S/W operation | 2157 | * If this is the first time we do a S/W operation |
| @@ -2050,7 +2166,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu) | |||
| 2050 | trace_kvm_set_way_flush(*vcpu_pc(vcpu), | 2166 | trace_kvm_set_way_flush(*vcpu_pc(vcpu), |
| 2051 | vcpu_has_cache_enabled(vcpu)); | 2167 | vcpu_has_cache_enabled(vcpu)); |
| 2052 | stage2_flush_vm(vcpu->kvm); | 2168 | stage2_flush_vm(vcpu->kvm); |
| 2053 | vcpu_set_hcr(vcpu, hcr | HCR_TVM); | 2169 | *vcpu_hcr(vcpu) = hcr | HCR_TVM; |
| 2054 | } | 2170 | } |
| 2055 | } | 2171 | } |
| 2056 | 2172 | ||
| @@ -2068,7 +2184,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) | |||
| 2068 | 2184 | ||
| 2069 | /* Caches are now on, stop trapping VM ops (until a S/W op) */ | 2185 | /* Caches are now on, stop trapping VM ops (until a S/W op) */ |
| 2070 | if (now_enabled) | 2186 | if (now_enabled) |
| 2071 | vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); | 2187 | *vcpu_hcr(vcpu) &= ~HCR_TVM; |
| 2072 | 2188 | ||
| 2073 | trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); | 2189 | trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); |
| 2074 | } | 2190 | } |
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8a9c42366db7..1c5b76c46e26 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c | |||
| @@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) | |||
| 37 | 37 | ||
| 38 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) | 38 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) |
| 39 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; | 39 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; |
| 40 | counter = vcpu_sys_reg(vcpu, reg); | 40 | counter = __vcpu_sys_reg(vcpu, reg); |
| 41 | 41 | ||
| 42 | /* The real counter value is equal to the value of counter register plus | 42 | /* The real counter value is equal to the value of counter register plus |
| 43 | * the value perf event counts. | 43 | * the value perf event counts. |
| @@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) | |||
| 61 | 61 | ||
| 62 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) | 62 | reg = (select_idx == ARMV8_PMU_CYCLE_IDX) |
| 63 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; | 63 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; |
| 64 | vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); | 64 | __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | /** | 67 | /** |
| @@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) | |||
| 78 | counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); | 78 | counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); |
| 79 | reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) | 79 | reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) |
| 80 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; | 80 | ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; |
| 81 | vcpu_sys_reg(vcpu, reg) = counter; | 81 | __vcpu_sys_reg(vcpu, reg) = counter; |
| 82 | perf_event_disable(pmc->perf_event); | 82 | perf_event_disable(pmc->perf_event); |
| 83 | perf_event_release_kernel(pmc->perf_event); | 83 | perf_event_release_kernel(pmc->perf_event); |
| 84 | pmc->perf_event = NULL; | 84 | pmc->perf_event = NULL; |
| @@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 125 | 125 | ||
| 126 | u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) | 126 | u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) |
| 127 | { | 127 | { |
| 128 | u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; | 128 | u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; |
| 129 | 129 | ||
| 130 | val &= ARMV8_PMU_PMCR_N_MASK; | 130 | val &= ARMV8_PMU_PMCR_N_MASK; |
| 131 | if (val == 0) | 131 | if (val == 0) |
| @@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) | |||
| 147 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 147 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
| 148 | struct kvm_pmc *pmc; | 148 | struct kvm_pmc *pmc; |
| 149 | 149 | ||
| 150 | if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) | 150 | if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) |
| 151 | return; | 151 | return; |
| 152 | 152 | ||
| 153 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { | 153 | for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { |
| @@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) | |||
| 193 | { | 193 | { |
| 194 | u64 reg = 0; | 194 | u64 reg = 0; |
| 195 | 195 | ||
| 196 | if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { | 196 | if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { |
| 197 | reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); | 197 | reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); |
| 198 | reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); | 198 | reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); |
| 199 | reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); | 199 | reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); |
| 200 | reg &= kvm_pmu_valid_counter_mask(vcpu); | 200 | reg &= kvm_pmu_valid_counter_mask(vcpu); |
| 201 | } | 201 | } |
| 202 | 202 | ||
| @@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, | |||
| 295 | struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); | 295 | struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); |
| 296 | int idx = pmc->idx; | 296 | int idx = pmc->idx; |
| 297 | 297 | ||
| 298 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); | 298 | __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); |
| 299 | 299 | ||
| 300 | if (kvm_pmu_overflow_status(vcpu)) { | 300 | if (kvm_pmu_overflow_status(vcpu)) { |
| 301 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); | 301 | kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); |
| @@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) | |||
| 316 | if (val == 0) | 316 | if (val == 0) |
| 317 | return; | 317 | return; |
| 318 | 318 | ||
| 319 | enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); | 319 | enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); |
| 320 | for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { | 320 | for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { |
| 321 | if (!(val & BIT(i))) | 321 | if (!(val & BIT(i))) |
| 322 | continue; | 322 | continue; |
| 323 | type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) | 323 | type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) |
| 324 | & ARMV8_PMU_EVTYPE_EVENT; | 324 | & ARMV8_PMU_EVTYPE_EVENT; |
| 325 | if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) | 325 | if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) |
| 326 | && (enable & BIT(i))) { | 326 | && (enable & BIT(i))) { |
| 327 | reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; | 327 | reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; |
| 328 | reg = lower_32_bits(reg); | 328 | reg = lower_32_bits(reg); |
| 329 | vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; | 329 | __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; |
| 330 | if (!reg) | 330 | if (!reg) |
| 331 | vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); | 331 | __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); |
| 332 | } | 332 | } |
| 333 | } | 333 | } |
| 334 | } | 334 | } |
| @@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) | |||
| 348 | mask = kvm_pmu_valid_counter_mask(vcpu); | 348 | mask = kvm_pmu_valid_counter_mask(vcpu); |
| 349 | if (val & ARMV8_PMU_PMCR_E) { | 349 | if (val & ARMV8_PMU_PMCR_E) { |
| 350 | kvm_pmu_enable_counter(vcpu, | 350 | kvm_pmu_enable_counter(vcpu, |
| 351 | vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); | 351 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); |
| 352 | } else { | 352 | } else { |
| 353 | kvm_pmu_disable_counter(vcpu, mask); | 353 | kvm_pmu_disable_counter(vcpu, mask); |
| 354 | } | 354 | } |
| @@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) | |||
| 369 | 369 | ||
| 370 | static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) | 370 | static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) |
| 371 | { | 371 | { |
| 372 | return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && | 372 | return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && |
| 373 | (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); | 373 | (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); |
| 374 | } | 374 | } |
| 375 | 375 | ||
| 376 | /** | 376 | /** |
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 743ca5cb05ef..68378fe17a0e 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c | |||
| @@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) | |||
| 166 | kvm->arch.vgic.in_kernel = true; | 166 | kvm->arch.vgic.in_kernel = true; |
| 167 | kvm->arch.vgic.vgic_model = type; | 167 | kvm->arch.vgic.vgic_model = type; |
| 168 | 168 | ||
| 169 | /* | ||
| 170 | * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) | ||
| 171 | * it is stored in distributor struct for asm save/restore purpose | ||
| 172 | */ | ||
| 173 | kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; | ||
| 174 | |||
| 175 | kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; | 169 | kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; |
| 176 | kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; | 170 | kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; |
| 177 | kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; | 171 | kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; |
| @@ -302,17 +296,6 @@ int vgic_init(struct kvm *kvm) | |||
| 302 | 296 | ||
| 303 | dist->initialized = true; | 297 | dist->initialized = true; |
| 304 | 298 | ||
| 305 | /* | ||
| 306 | * If we're initializing GICv2 on-demand when first running the VCPU | ||
| 307 | * then we need to load the VGIC state onto the CPU. We can detect | ||
| 308 | * this easily by checking if we are in between vcpu_load and vcpu_put | ||
| 309 | * when we just initialized the VGIC. | ||
| 310 | */ | ||
| 311 | preempt_disable(); | ||
| 312 | vcpu = kvm_arm_get_running_vcpu(); | ||
| 313 | if (vcpu) | ||
| 314 | kvm_vgic_load(vcpu); | ||
| 315 | preempt_enable(); | ||
| 316 | out: | 299 | out: |
| 317 | return ret; | 300 | return ret; |
| 318 | } | 301 | } |
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 465095355666..a8f07243aa9f 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c | |||
| @@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) | |||
| 316 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 316 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
| 317 | struct vgic_irq *irq; | 317 | struct vgic_irq *irq; |
| 318 | u32 *intids; | 318 | u32 *intids; |
| 319 | int irq_count = dist->lpi_list_count, i = 0; | 319 | int irq_count, i = 0; |
| 320 | 320 | ||
| 321 | /* | 321 | /* |
| 322 | * We use the current value of the list length, which may change | 322 | * There is an obvious race between allocating the array and LPIs |
| 323 | * after the kmalloc. We don't care, because the guest shouldn't | 323 | * being mapped/unmapped. If we ended up here as a result of a |
| 324 | * change anything while the command handling is still running, | 324 | * command, we're safe (locks are held, preventing another |
| 325 | * and in the worst case we would miss a new IRQ, which one wouldn't | 325 | * command). If coming from another path (such as enabling LPIs), |
| 326 | * expect to be covered by this command anyway. | 326 | * we must be careful not to overrun the array. |
| 327 | */ | 327 | */ |
| 328 | irq_count = READ_ONCE(dist->lpi_list_count); | ||
| 328 | intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); | 329 | intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); |
| 329 | if (!intids) | 330 | if (!intids) |
| 330 | return -ENOMEM; | 331 | return -ENOMEM; |
| 331 | 332 | ||
| 332 | spin_lock(&dist->lpi_list_lock); | 333 | spin_lock(&dist->lpi_list_lock); |
| 333 | list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { | 334 | list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { |
| 335 | if (i == irq_count) | ||
| 336 | break; | ||
| 334 | /* We don't need to "get" the IRQ, as we hold the list lock. */ | 337 | /* We don't need to "get" the IRQ, as we hold the list lock. */ |
| 335 | if (irq->target_vcpu != vcpu) | 338 | if (irq->target_vcpu != vcpu) |
| 336 | continue; | 339 | continue; |
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 29556f71b691..45aa433f018f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c | |||
| @@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) | |||
| 105 | 105 | ||
| 106 | /* | 106 | /* |
| 107 | * Clear soft pending state when level irqs have been acked. | 107 | * Clear soft pending state when level irqs have been acked. |
| 108 | * Always regenerate the pending state. | ||
| 109 | */ | 108 | */ |
| 110 | if (irq->config == VGIC_CONFIG_LEVEL) { | 109 | if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) |
| 111 | if (!(val & GICH_LR_PENDING_BIT)) | 110 | irq->pending_latch = false; |
| 112 | irq->pending_latch = false; | ||
| 113 | } | ||
| 114 | 111 | ||
| 115 | /* | 112 | /* |
| 116 | * Level-triggered mapped IRQs are special because we only | 113 | * Level-triggered mapped IRQs are special because we only |
| @@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) | |||
| 153 | void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | 150 | void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) |
| 154 | { | 151 | { |
| 155 | u32 val = irq->intid; | 152 | u32 val = irq->intid; |
| 153 | bool allow_pending = true; | ||
| 154 | |||
| 155 | if (irq->active) | ||
| 156 | val |= GICH_LR_ACTIVE_BIT; | ||
| 157 | |||
| 158 | if (irq->hw) { | ||
| 159 | val |= GICH_LR_HW; | ||
| 160 | val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; | ||
| 161 | /* | ||
| 162 | * Never set pending+active on a HW interrupt, as the | ||
| 163 | * pending state is kept at the physical distributor | ||
| 164 | * level. | ||
| 165 | */ | ||
| 166 | if (irq->active) | ||
| 167 | allow_pending = false; | ||
| 168 | } else { | ||
| 169 | if (irq->config == VGIC_CONFIG_LEVEL) { | ||
| 170 | val |= GICH_LR_EOI; | ||
| 156 | 171 | ||
| 157 | if (irq_is_pending(irq)) { | 172 | /* |
| 173 | * Software resampling doesn't work very well | ||
| 174 | * if we allow P+A, so let's not do that. | ||
| 175 | */ | ||
| 176 | if (irq->active) | ||
| 177 | allow_pending = false; | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | if (allow_pending && irq_is_pending(irq)) { | ||
| 158 | val |= GICH_LR_PENDING_BIT; | 182 | val |= GICH_LR_PENDING_BIT; |
| 159 | 183 | ||
| 160 | if (irq->config == VGIC_CONFIG_EDGE) | 184 | if (irq->config == VGIC_CONFIG_EDGE) |
| @@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
| 171 | } | 195 | } |
| 172 | } | 196 | } |
| 173 | 197 | ||
| 174 | if (irq->active) | ||
| 175 | val |= GICH_LR_ACTIVE_BIT; | ||
| 176 | |||
| 177 | if (irq->hw) { | ||
| 178 | val |= GICH_LR_HW; | ||
| 179 | val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; | ||
| 180 | /* | ||
| 181 | * Never set pending+active on a HW interrupt, as the | ||
| 182 | * pending state is kept at the physical distributor | ||
| 183 | * level. | ||
| 184 | */ | ||
| 185 | if (irq->active && irq_is_pending(irq)) | ||
| 186 | val &= ~GICH_LR_PENDING_BIT; | ||
| 187 | } else { | ||
| 188 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
| 189 | val |= GICH_LR_EOI; | ||
| 190 | } | ||
| 191 | |||
| 192 | /* | 198 | /* |
| 193 | * Level-triggered mapped IRQs are special because we only observe | 199 | * Level-triggered mapped IRQs are special because we only observe |
| 194 | * rising edges as input to the VGIC. We therefore lower the line | 200 | * rising edges as input to the VGIC. We therefore lower the line |
| @@ -272,7 +278,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu) | |||
| 272 | * anyway. | 278 | * anyway. |
| 273 | */ | 279 | */ |
| 274 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; | 280 | vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; |
| 275 | vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; | ||
| 276 | 281 | ||
| 277 | /* Get the show on the road... */ | 282 | /* Get the show on the road... */ |
| 278 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; | 283 | vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; |
| @@ -368,16 +373,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info) | |||
| 368 | if (!PAGE_ALIGNED(info->vcpu.start) || | 373 | if (!PAGE_ALIGNED(info->vcpu.start) || |
| 369 | !PAGE_ALIGNED(resource_size(&info->vcpu))) { | 374 | !PAGE_ALIGNED(resource_size(&info->vcpu))) { |
| 370 | kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); | 375 | kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); |
| 371 | kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, | ||
| 372 | resource_size(&info->vcpu)); | ||
| 373 | if (!kvm_vgic_global_state.vcpu_base_va) { | ||
| 374 | kvm_err("Cannot ioremap GICV\n"); | ||
| 375 | return -ENOMEM; | ||
| 376 | } | ||
| 377 | 376 | ||
| 378 | ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, | 377 | ret = create_hyp_io_mappings(info->vcpu.start, |
| 379 | kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), | 378 | resource_size(&info->vcpu), |
| 380 | info->vcpu.start); | 379 | &kvm_vgic_global_state.vcpu_base_va, |
| 380 | &kvm_vgic_global_state.vcpu_hyp_va); | ||
| 381 | if (ret) { | 381 | if (ret) { |
| 382 | kvm_err("Cannot map GICV into hyp\n"); | 382 | kvm_err("Cannot map GICV into hyp\n"); |
| 383 | goto out; | 383 | goto out; |
| @@ -386,26 +386,18 @@ int vgic_v2_probe(const struct gic_kvm_info *info) | |||
| 386 | static_branch_enable(&vgic_v2_cpuif_trap); | 386 | static_branch_enable(&vgic_v2_cpuif_trap); |
| 387 | } | 387 | } |
| 388 | 388 | ||
| 389 | kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, | 389 | ret = create_hyp_io_mappings(info->vctrl.start, |
| 390 | resource_size(&info->vctrl)); | 390 | resource_size(&info->vctrl), |
| 391 | if (!kvm_vgic_global_state.vctrl_base) { | 391 | &kvm_vgic_global_state.vctrl_base, |
| 392 | kvm_err("Cannot ioremap GICH\n"); | 392 | &kvm_vgic_global_state.vctrl_hyp); |
| 393 | ret = -ENOMEM; | 393 | if (ret) { |
| 394 | kvm_err("Cannot map VCTRL into hyp\n"); | ||
| 394 | goto out; | 395 | goto out; |
| 395 | } | 396 | } |
| 396 | 397 | ||
| 397 | vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); | 398 | vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); |
| 398 | kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; | 399 | kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; |
| 399 | 400 | ||
| 400 | ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, | ||
| 401 | kvm_vgic_global_state.vctrl_base + | ||
| 402 | resource_size(&info->vctrl), | ||
| 403 | info->vctrl.start); | ||
| 404 | if (ret) { | ||
| 405 | kvm_err("Cannot map VCTRL into hyp\n"); | ||
| 406 | goto out; | ||
| 407 | } | ||
| 408 | |||
| 409 | ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); | 401 | ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); |
| 410 | if (ret) { | 402 | if (ret) { |
| 411 | kvm_err("Cannot register GICv2 KVM device\n"); | 403 | kvm_err("Cannot register GICv2 KVM device\n"); |
| @@ -429,18 +421,74 @@ out: | |||
| 429 | return ret; | 421 | return ret; |
| 430 | } | 422 | } |
| 431 | 423 | ||
| 424 | static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) | ||
| 425 | { | ||
| 426 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 427 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 428 | u64 elrsr; | ||
| 429 | int i; | ||
| 430 | |||
| 431 | elrsr = readl_relaxed(base + GICH_ELRSR0); | ||
| 432 | if (unlikely(used_lrs > 32)) | ||
| 433 | elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; | ||
| 434 | |||
| 435 | for (i = 0; i < used_lrs; i++) { | ||
| 436 | if (elrsr & (1UL << i)) | ||
| 437 | cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; | ||
| 438 | else | ||
| 439 | cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); | ||
| 440 | |||
| 441 | writel_relaxed(0, base + GICH_LR0 + (i * 4)); | ||
| 442 | } | ||
| 443 | } | ||
| 444 | |||
| 445 | void vgic_v2_save_state(struct kvm_vcpu *vcpu) | ||
| 446 | { | ||
| 447 | void __iomem *base = kvm_vgic_global_state.vctrl_base; | ||
| 448 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 449 | |||
| 450 | if (!base) | ||
| 451 | return; | ||
| 452 | |||
| 453 | if (used_lrs) { | ||
| 454 | save_lrs(vcpu, base); | ||
| 455 | writel_relaxed(0, base + GICH_HCR); | ||
| 456 | } | ||
| 457 | } | ||
| 458 | |||
| 459 | void vgic_v2_restore_state(struct kvm_vcpu *vcpu) | ||
| 460 | { | ||
| 461 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | ||
| 462 | void __iomem *base = kvm_vgic_global_state.vctrl_base; | ||
| 463 | u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; | ||
| 464 | int i; | ||
| 465 | |||
| 466 | if (!base) | ||
| 467 | return; | ||
| 468 | |||
| 469 | if (used_lrs) { | ||
| 470 | writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); | ||
| 471 | for (i = 0; i < used_lrs; i++) { | ||
| 472 | writel_relaxed(cpu_if->vgic_lr[i], | ||
| 473 | base + GICH_LR0 + (i * 4)); | ||
| 474 | } | ||
| 475 | } | ||
| 476 | } | ||
| 477 | |||
| 432 | void vgic_v2_load(struct kvm_vcpu *vcpu) | 478 | void vgic_v2_load(struct kvm_vcpu *vcpu) |
| 433 | { | 479 | { |
| 434 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 480 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
| 435 | struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; | ||
| 436 | 481 | ||
| 437 | writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); | 482 | writel_relaxed(cpu_if->vgic_vmcr, |
| 483 | kvm_vgic_global_state.vctrl_base + GICH_VMCR); | ||
| 484 | writel_relaxed(cpu_if->vgic_apr, | ||
| 485 | kvm_vgic_global_state.vctrl_base + GICH_APR); | ||
| 438 | } | 486 | } |
| 439 | 487 | ||
| 440 | void vgic_v2_put(struct kvm_vcpu *vcpu) | 488 | void vgic_v2_put(struct kvm_vcpu *vcpu) |
| 441 | { | 489 | { |
| 442 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; | 490 | struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; |
| 443 | struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; | ||
| 444 | 491 | ||
| 445 | cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); | 492 | cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); |
| 493 | cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); | ||
| 446 | } | 494 | } |
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 0ff2006f3781..8195f52ae6f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/kvm.h> | 16 | #include <linux/kvm.h> |
| 17 | #include <linux/kvm_host.h> | 17 | #include <linux/kvm_host.h> |
| 18 | #include <kvm/arm_vgic.h> | 18 | #include <kvm/arm_vgic.h> |
| 19 | #include <asm/kvm_hyp.h> | ||
| 19 | #include <asm/kvm_mmu.h> | 20 | #include <asm/kvm_mmu.h> |
| 20 | #include <asm/kvm_asm.h> | 21 | #include <asm/kvm_asm.h> |
| 21 | 22 | ||
| @@ -96,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) | |||
| 96 | 97 | ||
| 97 | /* | 98 | /* |
| 98 | * Clear soft pending state when level irqs have been acked. | 99 | * Clear soft pending state when level irqs have been acked. |
| 99 | * Always regenerate the pending state. | ||
| 100 | */ | 100 | */ |
| 101 | if (irq->config == VGIC_CONFIG_LEVEL) { | 101 | if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) |
| 102 | if (!(val & ICH_LR_PENDING_BIT)) | 102 | irq->pending_latch = false; |
| 103 | irq->pending_latch = false; | ||
| 104 | } | ||
| 105 | 103 | ||
| 106 | /* | 104 | /* |
| 107 | * Level-triggered mapped IRQs are special because we only | 105 | * Level-triggered mapped IRQs are special because we only |
| @@ -135,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
| 135 | { | 133 | { |
| 136 | u32 model = vcpu->kvm->arch.vgic.vgic_model; | 134 | u32 model = vcpu->kvm->arch.vgic.vgic_model; |
| 137 | u64 val = irq->intid; | 135 | u64 val = irq->intid; |
| 136 | bool allow_pending = true; | ||
| 137 | |||
| 138 | if (irq->active) | ||
| 139 | val |= ICH_LR_ACTIVE_BIT; | ||
| 140 | |||
| 141 | if (irq->hw) { | ||
| 142 | val |= ICH_LR_HW; | ||
| 143 | val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; | ||
| 144 | /* | ||
| 145 | * Never set pending+active on a HW interrupt, as the | ||
| 146 | * pending state is kept at the physical distributor | ||
| 147 | * level. | ||
| 148 | */ | ||
| 149 | if (irq->active) | ||
| 150 | allow_pending = false; | ||
| 151 | } else { | ||
| 152 | if (irq->config == VGIC_CONFIG_LEVEL) { | ||
| 153 | val |= ICH_LR_EOI; | ||
| 154 | |||
| 155 | /* | ||
| 156 | * Software resampling doesn't work very well | ||
| 157 | * if we allow P+A, so let's not do that. | ||
| 158 | */ | ||
| 159 | if (irq->active) | ||
| 160 | allow_pending = false; | ||
| 161 | } | ||
| 162 | } | ||
| 138 | 163 | ||
| 139 | if (irq_is_pending(irq)) { | 164 | if (allow_pending && irq_is_pending(irq)) { |
| 140 | val |= ICH_LR_PENDING_BIT; | 165 | val |= ICH_LR_PENDING_BIT; |
| 141 | 166 | ||
| 142 | if (irq->config == VGIC_CONFIG_EDGE) | 167 | if (irq->config == VGIC_CONFIG_EDGE) |
| @@ -154,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) | |||
| 154 | } | 179 | } |
| 155 | } | 180 | } |
| 156 | 181 | ||
| 157 | if (irq->active) | ||
| 158 | val |= ICH_LR_ACTIVE_BIT; | ||
| 159 | |||
| 160 | if (irq->hw) { | ||
| 161 | val |= ICH_LR_HW; | ||
| 162 | val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; | ||
| 163 | /* | ||
| 164 | * Never set pending+active on a HW interrupt, as the | ||
| 165 | * pending state is kept at the physical distributor | ||
| 166 | * level. | ||
| 167 | */ | ||
| 168 | if (irq->active && irq_is_pending(irq)) | ||
| 169 | val &= ~ICH_LR_PENDING_BIT; | ||
| 170 | } else { | ||
| 171 | if (irq->config == VGIC_CONFIG_LEVEL) | ||
| 172 | val |= ICH_LR_EOI; | ||
| 173 | } | ||
| 174 | |||
| 175 | /* | 182 | /* |
| 176 | * Level-triggered mapped IRQs are special because we only observe | 183 | * Level-triggered mapped IRQs are special because we only observe |
| 177 | * rising edges as input to the VGIC. We therefore lower the line | 184 | * rising edges as input to the VGIC. We therefore lower the line |
| @@ -274,7 +281,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) | |||
| 274 | * anyway. | 281 | * anyway. |
| 275 | */ | 282 | */ |
| 276 | vgic_v3->vgic_vmcr = 0; | 283 | vgic_v3->vgic_vmcr = 0; |
| 277 | vgic_v3->vgic_elrsr = ~0; | ||
| 278 | 284 | ||
| 279 | /* | 285 | /* |
| 280 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible | 286 | * If we are emulating a GICv3, we do it in an non-GICv2-compatible |
| @@ -595,6 +601,11 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) | |||
| 595 | */ | 601 | */ |
| 596 | if (likely(cpu_if->vgic_sre)) | 602 | if (likely(cpu_if->vgic_sre)) |
| 597 | kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); | 603 | kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); |
| 604 | |||
| 605 | kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); | ||
| 606 | |||
| 607 | if (has_vhe()) | ||
| 608 | __vgic_v3_activate_traps(vcpu); | ||
| 598 | } | 609 | } |
| 599 | 610 | ||
| 600 | void vgic_v3_put(struct kvm_vcpu *vcpu) | 611 | void vgic_v3_put(struct kvm_vcpu *vcpu) |
| @@ -603,4 +614,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) | |||
| 603 | 614 | ||
| 604 | if (likely(cpu_if->vgic_sre)) | 615 | if (likely(cpu_if->vgic_sre)) |
| 605 | cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); | 616 | cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); |
| 617 | |||
| 618 | kvm_call_hyp(__vgic_v3_save_aprs, vcpu); | ||
| 619 | |||
| 620 | if (has_vhe()) | ||
| 621 | __vgic_v3_deactivate_traps(vcpu); | ||
| 606 | } | 622 | } |
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 8201899126f6..e74baec76361 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/list_sort.h> | 19 | #include <linux/list_sort.h> |
| 20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
| 21 | #include <linux/irq.h> | 21 | #include <linux/irq.h> |
| 22 | #include <asm/kvm_hyp.h> | ||
| 22 | 23 | ||
| 23 | #include "vgic.h" | 24 | #include "vgic.h" |
| 24 | 25 | ||
| @@ -808,6 +809,24 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) | |||
| 808 | vgic_clear_lr(vcpu, count); | 809 | vgic_clear_lr(vcpu, count); |
| 809 | } | 810 | } |
| 810 | 811 | ||
| 812 | static inline bool can_access_vgic_from_kernel(void) | ||
| 813 | { | ||
| 814 | /* | ||
| 815 | * GICv2 can always be accessed from the kernel because it is | ||
| 816 | * memory-mapped, and VHE systems can access GICv3 EL2 system | ||
| 817 | * registers. | ||
| 818 | */ | ||
| 819 | return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); | ||
| 820 | } | ||
| 821 | |||
| 822 | static inline void vgic_save_state(struct kvm_vcpu *vcpu) | ||
| 823 | { | ||
| 824 | if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | ||
| 825 | vgic_v2_save_state(vcpu); | ||
| 826 | else | ||
| 827 | __vgic_v3_save_state(vcpu); | ||
| 828 | } | ||
| 829 | |||
| 811 | /* Sync back the hardware VGIC state into our emulation after a guest's run. */ | 830 | /* Sync back the hardware VGIC state into our emulation after a guest's run. */ |
| 812 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | 831 | void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) |
| 813 | { | 832 | { |
| @@ -819,11 +838,22 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
| 819 | if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) | 838 | if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) |
| 820 | return; | 839 | return; |
| 821 | 840 | ||
| 841 | if (can_access_vgic_from_kernel()) | ||
| 842 | vgic_save_state(vcpu); | ||
| 843 | |||
| 822 | if (vgic_cpu->used_lrs) | 844 | if (vgic_cpu->used_lrs) |
| 823 | vgic_fold_lr_state(vcpu); | 845 | vgic_fold_lr_state(vcpu); |
| 824 | vgic_prune_ap_list(vcpu); | 846 | vgic_prune_ap_list(vcpu); |
| 825 | } | 847 | } |
| 826 | 848 | ||
| 849 | static inline void vgic_restore_state(struct kvm_vcpu *vcpu) | ||
| 850 | { | ||
| 851 | if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) | ||
| 852 | vgic_v2_restore_state(vcpu); | ||
| 853 | else | ||
| 854 | __vgic_v3_restore_state(vcpu); | ||
| 855 | } | ||
| 856 | |||
| 827 | /* Flush our emulation state into the GIC hardware before entering the guest. */ | 857 | /* Flush our emulation state into the GIC hardware before entering the guest. */ |
| 828 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | 858 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) |
| 829 | { | 859 | { |
| @@ -846,6 +876,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | |||
| 846 | spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); | 876 | spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); |
| 847 | vgic_flush_lr_state(vcpu); | 877 | vgic_flush_lr_state(vcpu); |
| 848 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); | 878 | spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); |
| 879 | |||
| 880 | if (can_access_vgic_from_kernel()) | ||
| 881 | vgic_restore_state(vcpu); | ||
| 849 | } | 882 | } |
| 850 | 883 | ||
| 851 | void kvm_vgic_load(struct kvm_vcpu *vcpu) | 884 | void kvm_vgic_load(struct kvm_vcpu *vcpu) |
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index f5b8519e5546..830e815748a0 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h | |||
| @@ -178,6 +178,9 @@ void vgic_v2_init_lrs(void); | |||
| 178 | void vgic_v2_load(struct kvm_vcpu *vcpu); | 178 | void vgic_v2_load(struct kvm_vcpu *vcpu); |
| 179 | void vgic_v2_put(struct kvm_vcpu *vcpu); | 179 | void vgic_v2_put(struct kvm_vcpu *vcpu); |
| 180 | 180 | ||
| 181 | void vgic_v2_save_state(struct kvm_vcpu *vcpu); | ||
| 182 | void vgic_v2_restore_state(struct kvm_vcpu *vcpu); | ||
| 183 | |||
| 181 | static inline void vgic_get_irq_kref(struct vgic_irq *irq) | 184 | static inline void vgic_get_irq_kref(struct vgic_irq *irq) |
| 182 | { | 185 | { |
| 183 | if (irq->intid < VGIC_MIN_LPI) | 186 | if (irq->intid < VGIC_MIN_LPI) |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 65dea3ffef68..c7b2e927f699 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -3398,21 +3398,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | |||
| 3398 | return kvm_io_bus_cmp(p1, p2); | 3398 | return kvm_io_bus_cmp(p1, p2); |
| 3399 | } | 3399 | } |
| 3400 | 3400 | ||
| 3401 | static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | ||
| 3402 | gpa_t addr, int len) | ||
| 3403 | { | ||
| 3404 | bus->range[bus->dev_count++] = (struct kvm_io_range) { | ||
| 3405 | .addr = addr, | ||
| 3406 | .len = len, | ||
| 3407 | .dev = dev, | ||
| 3408 | }; | ||
| 3409 | |||
| 3410 | sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), | ||
| 3411 | kvm_io_bus_sort_cmp, NULL); | ||
| 3412 | |||
| 3413 | return 0; | ||
| 3414 | } | ||
| 3415 | |||
| 3416 | static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, | 3401 | static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, |
| 3417 | gpa_t addr, int len) | 3402 | gpa_t addr, int len) |
| 3418 | { | 3403 | { |
| @@ -3553,7 +3538,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3553 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | 3538 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
| 3554 | int len, struct kvm_io_device *dev) | 3539 | int len, struct kvm_io_device *dev) |
| 3555 | { | 3540 | { |
| 3541 | int i; | ||
| 3556 | struct kvm_io_bus *new_bus, *bus; | 3542 | struct kvm_io_bus *new_bus, *bus; |
| 3543 | struct kvm_io_range range; | ||
| 3557 | 3544 | ||
| 3558 | bus = kvm_get_bus(kvm, bus_idx); | 3545 | bus = kvm_get_bus(kvm, bus_idx); |
| 3559 | if (!bus) | 3546 | if (!bus) |
| @@ -3567,9 +3554,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |||
| 3567 | sizeof(struct kvm_io_range)), GFP_KERNEL); | 3554 | sizeof(struct kvm_io_range)), GFP_KERNEL); |
| 3568 | if (!new_bus) | 3555 | if (!new_bus) |
| 3569 | return -ENOMEM; | 3556 | return -ENOMEM; |
| 3570 | memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count * | 3557 | |
| 3571 | sizeof(struct kvm_io_range))); | 3558 | range = (struct kvm_io_range) { |
| 3572 | kvm_io_bus_insert_dev(new_bus, dev, addr, len); | 3559 | .addr = addr, |
| 3560 | .len = len, | ||
| 3561 | .dev = dev, | ||
| 3562 | }; | ||
| 3563 | |||
| 3564 | for (i = 0; i < bus->dev_count; i++) | ||
| 3565 | if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) | ||
| 3566 | break; | ||
| 3567 | |||
| 3568 | memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); | ||
| 3569 | new_bus->dev_count++; | ||
| 3570 | new_bus->range[i] = range; | ||
| 3571 | memcpy(new_bus->range + i + 1, bus->range + i, | ||
| 3572 | (bus->dev_count - i) * sizeof(struct kvm_io_range)); | ||
| 3573 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | 3573 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); |
| 3574 | synchronize_srcu_expedited(&kvm->srcu); | 3574 | synchronize_srcu_expedited(&kvm->srcu); |
| 3575 | kfree(bus); | 3575 | kfree(bus); |
