diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-17 13:33:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-17 13:33:30 -0400 |
commit | 0ef0fd351550130129bbdb77362488befd7b69d2 (patch) | |
tree | 23186172f5f85c06e18e3ee1a9619879df03c5df /arch | |
parent | 4489da7183099f569a7d3dd819c975073c04bc72 (diff) | |
parent | c011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- support for SVE and Pointer Authentication in guests
- PMU improvements
POWER:
- support for direct access to the POWER9 XIVE interrupt controller
- memory and performance optimizations
x86:
- support for accessing memory not backed by struct page
- fixes and refactoring
Generic:
- dirty page tracking improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits)
kvm: fix compilation on aarch64
Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU"
kvm: x86: Fix L1TF mitigation for shadow MMU
KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible
KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device
KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing"
KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs
kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete
tests: kvm: Add tests for KVM_SET_NESTED_STATE
KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state
tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID
tests: kvm: Add tests to .gitignore
KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one
KVM: Fix the bitmap range to copy during clear dirty
KVM: arm64: Fix ptrauth ID register masking logic
KVM: x86: use direct accessors for RIP and RSP
KVM: VMX: Use accessors for GPRs outside of dedicated caching logic
KVM: x86: Omit caching logic for always-available GPRs
kvm, x86: Properly check whether a pfn is an MMIO or not
...
Diffstat (limited to 'arch')
72 files changed, 4481 insertions, 911 deletions
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 8927cae7c966..efb0e2c0d84c 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h | |||
@@ -343,4 +343,6 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, | |||
343 | } | 343 | } |
344 | } | 344 | } |
345 | 345 | ||
346 | static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} | ||
347 | |||
346 | #endif /* __ARM_KVM_EMULATE_H__ */ | 348 | #endif /* __ARM_KVM_EMULATE_H__ */ |
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 770d73257ad9..075e1921fdd9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #ifndef __ARM_KVM_HOST_H__ | 19 | #ifndef __ARM_KVM_HOST_H__ |
20 | #define __ARM_KVM_HOST_H__ | 20 | #define __ARM_KVM_HOST_H__ |
21 | 21 | ||
22 | #include <linux/errno.h> | ||
22 | #include <linux/types.h> | 23 | #include <linux/types.h> |
23 | #include <linux/kvm_types.h> | 24 | #include <linux/kvm_types.h> |
24 | #include <asm/cputype.h> | 25 | #include <asm/cputype.h> |
@@ -53,6 +54,8 @@ | |||
53 | 54 | ||
54 | DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); | 55 | DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); |
55 | 56 | ||
57 | static inline int kvm_arm_init_sve(void) { return 0; } | ||
58 | |||
56 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); | 59 | u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); |
57 | int __attribute_const__ kvm_target_cpu(void); | 60 | int __attribute_const__ kvm_target_cpu(void); |
58 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 61 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
@@ -150,9 +153,13 @@ struct kvm_cpu_context { | |||
150 | u32 cp15[NR_CP15_REGS]; | 153 | u32 cp15[NR_CP15_REGS]; |
151 | }; | 154 | }; |
152 | 155 | ||
153 | typedef struct kvm_cpu_context kvm_cpu_context_t; | 156 | struct kvm_host_data { |
157 | struct kvm_cpu_context host_ctxt; | ||
158 | }; | ||
159 | |||
160 | typedef struct kvm_host_data kvm_host_data_t; | ||
154 | 161 | ||
155 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | 162 | static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, |
156 | int cpu) | 163 | int cpu) |
157 | { | 164 | { |
158 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | 165 | /* The host's MPIDR is immutable, so let's set it up at boot time */ |
@@ -182,7 +189,7 @@ struct kvm_vcpu_arch { | |||
182 | struct kvm_vcpu_fault_info fault; | 189 | struct kvm_vcpu_fault_info fault; |
183 | 190 | ||
184 | /* Host FP context */ | 191 | /* Host FP context */ |
185 | kvm_cpu_context_t *host_cpu_context; | 192 | struct kvm_cpu_context *host_cpu_context; |
186 | 193 | ||
187 | /* VGIC state */ | 194 | /* VGIC state */ |
188 | struct vgic_cpu vgic_cpu; | 195 | struct vgic_cpu vgic_cpu; |
@@ -361,6 +368,9 @@ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {} | |||
361 | static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {} | 368 | static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {} |
362 | static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} | 369 | static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} |
363 | 370 | ||
371 | static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {} | ||
372 | static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} | ||
373 | |||
364 | static inline void kvm_arm_vhe_guest_enter(void) {} | 374 | static inline void kvm_arm_vhe_guest_enter(void) {} |
365 | static inline void kvm_arm_vhe_guest_exit(void) {} | 375 | static inline void kvm_arm_vhe_guest_exit(void) {} |
366 | 376 | ||
@@ -409,4 +419,14 @@ static inline int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) | |||
409 | return 0; | 419 | return 0; |
410 | } | 420 | } |
411 | 421 | ||
422 | static inline int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) | ||
423 | { | ||
424 | return -EINVAL; | ||
425 | } | ||
426 | |||
427 | static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) | ||
428 | { | ||
429 | return true; | ||
430 | } | ||
431 | |||
412 | #endif /* __ARM_KVM_HOST_H__ */ | 432 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 69a59a5d1143..4780eb7af842 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -1341,6 +1341,7 @@ menu "ARMv8.3 architectural features" | |||
1341 | config ARM64_PTR_AUTH | 1341 | config ARM64_PTR_AUTH |
1342 | bool "Enable support for pointer authentication" | 1342 | bool "Enable support for pointer authentication" |
1343 | default y | 1343 | default y |
1344 | depends on !KVM || ARM64_VHE | ||
1344 | help | 1345 | help |
1345 | Pointer authentication (part of the ARMv8.3 Extensions) provides | 1346 | Pointer authentication (part of the ARMv8.3 Extensions) provides |
1346 | instructions for signing and authenticating pointers against secret | 1347 | instructions for signing and authenticating pointers against secret |
@@ -1354,8 +1355,9 @@ config ARM64_PTR_AUTH | |||
1354 | context-switched along with the process. | 1355 | context-switched along with the process. |
1355 | 1356 | ||
1356 | The feature is detected at runtime. If the feature is not present in | 1357 | The feature is detected at runtime. If the feature is not present in |
1357 | hardware it will not be advertised to userspace nor will it be | 1358 | hardware it will not be advertised to userspace/KVM guest nor will it |
1358 | enabled. | 1359 | be enabled. However, KVM guest also require VHE mode and hence |
1360 | CONFIG_ARM64_VHE=y option to use this feature. | ||
1359 | 1361 | ||
1360 | endmenu | 1362 | endmenu |
1361 | 1363 | ||
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index dd1ad3950ef5..df62bbd33a9a 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h | |||
@@ -24,10 +24,13 @@ | |||
24 | 24 | ||
25 | #ifndef __ASSEMBLY__ | 25 | #ifndef __ASSEMBLY__ |
26 | 26 | ||
27 | #include <linux/bitmap.h> | ||
27 | #include <linux/build_bug.h> | 28 | #include <linux/build_bug.h> |
29 | #include <linux/bug.h> | ||
28 | #include <linux/cache.h> | 30 | #include <linux/cache.h> |
29 | #include <linux/init.h> | 31 | #include <linux/init.h> |
30 | #include <linux/stddef.h> | 32 | #include <linux/stddef.h> |
33 | #include <linux/types.h> | ||
31 | 34 | ||
32 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 35 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
33 | /* Masks for extracting the FPSR and FPCR from the FPSCR */ | 36 | /* Masks for extracting the FPSR and FPCR from the FPSCR */ |
@@ -56,7 +59,8 @@ extern void fpsimd_restore_current_state(void); | |||
56 | extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); | 59 | extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); |
57 | 60 | ||
58 | extern void fpsimd_bind_task_to_cpu(void); | 61 | extern void fpsimd_bind_task_to_cpu(void); |
59 | extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state); | 62 | extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, |
63 | void *sve_state, unsigned int sve_vl); | ||
60 | 64 | ||
61 | extern void fpsimd_flush_task_state(struct task_struct *target); | 65 | extern void fpsimd_flush_task_state(struct task_struct *target); |
62 | extern void fpsimd_flush_cpu_state(void); | 66 | extern void fpsimd_flush_cpu_state(void); |
@@ -87,6 +91,29 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); | |||
87 | extern u64 read_zcr_features(void); | 91 | extern u64 read_zcr_features(void); |
88 | 92 | ||
89 | extern int __ro_after_init sve_max_vl; | 93 | extern int __ro_after_init sve_max_vl; |
94 | extern int __ro_after_init sve_max_virtualisable_vl; | ||
95 | extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); | ||
96 | |||
97 | /* | ||
98 | * Helpers to translate bit indices in sve_vq_map to VQ values (and | ||
99 | * vice versa). This allows find_next_bit() to be used to find the | ||
100 | * _maximum_ VQ not exceeding a certain value. | ||
101 | */ | ||
102 | static inline unsigned int __vq_to_bit(unsigned int vq) | ||
103 | { | ||
104 | return SVE_VQ_MAX - vq; | ||
105 | } | ||
106 | |||
107 | static inline unsigned int __bit_to_vq(unsigned int bit) | ||
108 | { | ||
109 | return SVE_VQ_MAX - bit; | ||
110 | } | ||
111 | |||
112 | /* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ | ||
113 | static inline bool sve_vq_available(unsigned int vq) | ||
114 | { | ||
115 | return test_bit(__vq_to_bit(vq), sve_vq_map); | ||
116 | } | ||
90 | 117 | ||
91 | #ifdef CONFIG_ARM64_SVE | 118 | #ifdef CONFIG_ARM64_SVE |
92 | 119 | ||
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f5b79e995f40..ff73f5462aca 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h | |||
@@ -108,7 +108,8 @@ extern u32 __kvm_get_mdcr_el2(void); | |||
108 | .endm | 108 | .endm |
109 | 109 | ||
110 | .macro get_host_ctxt reg, tmp | 110 | .macro get_host_ctxt reg, tmp |
111 | hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp | 111 | hyp_adr_this_cpu \reg, kvm_host_data, \tmp |
112 | add \reg, \reg, #HOST_DATA_CONTEXT | ||
112 | .endm | 113 | .endm |
113 | 114 | ||
114 | .macro get_vcpu_ptr vcpu, ctxt | 115 | .macro get_vcpu_ptr vcpu, ctxt |
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d3842791e1c4..613427fafff9 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h | |||
@@ -98,6 +98,22 @@ static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) | |||
98 | vcpu->arch.hcr_el2 |= HCR_TWE; | 98 | vcpu->arch.hcr_el2 |= HCR_TWE; |
99 | } | 99 | } |
100 | 100 | ||
101 | static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) | ||
102 | { | ||
103 | vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); | ||
104 | } | ||
105 | |||
106 | static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) | ||
107 | { | ||
108 | vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); | ||
109 | } | ||
110 | |||
111 | static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) | ||
112 | { | ||
113 | if (vcpu_has_ptrauth(vcpu)) | ||
114 | vcpu_ptrauth_disable(vcpu); | ||
115 | } | ||
116 | |||
101 | static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) | 117 | static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) |
102 | { | 118 | { |
103 | return vcpu->arch.vsesr_el2; | 119 | return vcpu->arch.vsesr_el2; |
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a01fe087e022..2a8d3f8ca22c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h | |||
@@ -22,9 +22,13 @@ | |||
22 | #ifndef __ARM64_KVM_HOST_H__ | 22 | #ifndef __ARM64_KVM_HOST_H__ |
23 | #define __ARM64_KVM_HOST_H__ | 23 | #define __ARM64_KVM_HOST_H__ |
24 | 24 | ||
25 | #include <linux/bitmap.h> | ||
25 | #include <linux/types.h> | 26 | #include <linux/types.h> |
27 | #include <linux/jump_label.h> | ||
26 | #include <linux/kvm_types.h> | 28 | #include <linux/kvm_types.h> |
29 | #include <linux/percpu.h> | ||
27 | #include <asm/arch_gicv3.h> | 30 | #include <asm/arch_gicv3.h> |
31 | #include <asm/barrier.h> | ||
28 | #include <asm/cpufeature.h> | 32 | #include <asm/cpufeature.h> |
29 | #include <asm/daifflags.h> | 33 | #include <asm/daifflags.h> |
30 | #include <asm/fpsimd.h> | 34 | #include <asm/fpsimd.h> |
@@ -45,7 +49,7 @@ | |||
45 | 49 | ||
46 | #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS | 50 | #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS |
47 | 51 | ||
48 | #define KVM_VCPU_MAX_FEATURES 4 | 52 | #define KVM_VCPU_MAX_FEATURES 7 |
49 | 53 | ||
50 | #define KVM_REQ_SLEEP \ | 54 | #define KVM_REQ_SLEEP \ |
51 | KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | 55 | KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
@@ -54,8 +58,12 @@ | |||
54 | 58 | ||
55 | DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); | 59 | DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); |
56 | 60 | ||
61 | extern unsigned int kvm_sve_max_vl; | ||
62 | int kvm_arm_init_sve(void); | ||
63 | |||
57 | int __attribute_const__ kvm_target_cpu(void); | 64 | int __attribute_const__ kvm_target_cpu(void); |
58 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); | 65 | int kvm_reset_vcpu(struct kvm_vcpu *vcpu); |
66 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu); | ||
59 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); | 67 | int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); |
60 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); | 68 | void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); |
61 | 69 | ||
@@ -117,6 +125,7 @@ enum vcpu_sysreg { | |||
117 | SCTLR_EL1, /* System Control Register */ | 125 | SCTLR_EL1, /* System Control Register */ |
118 | ACTLR_EL1, /* Auxiliary Control Register */ | 126 | ACTLR_EL1, /* Auxiliary Control Register */ |
119 | CPACR_EL1, /* Coprocessor Access Control */ | 127 | CPACR_EL1, /* Coprocessor Access Control */ |
128 | ZCR_EL1, /* SVE Control */ | ||
120 | TTBR0_EL1, /* Translation Table Base Register 0 */ | 129 | TTBR0_EL1, /* Translation Table Base Register 0 */ |
121 | TTBR1_EL1, /* Translation Table Base Register 1 */ | 130 | TTBR1_EL1, /* Translation Table Base Register 1 */ |
122 | TCR_EL1, /* Translation Control Register */ | 131 | TCR_EL1, /* Translation Control Register */ |
@@ -152,6 +161,18 @@ enum vcpu_sysreg { | |||
152 | PMSWINC_EL0, /* Software Increment Register */ | 161 | PMSWINC_EL0, /* Software Increment Register */ |
153 | PMUSERENR_EL0, /* User Enable Register */ | 162 | PMUSERENR_EL0, /* User Enable Register */ |
154 | 163 | ||
164 | /* Pointer Authentication Registers in a strict increasing order. */ | ||
165 | APIAKEYLO_EL1, | ||
166 | APIAKEYHI_EL1, | ||
167 | APIBKEYLO_EL1, | ||
168 | APIBKEYHI_EL1, | ||
169 | APDAKEYLO_EL1, | ||
170 | APDAKEYHI_EL1, | ||
171 | APDBKEYLO_EL1, | ||
172 | APDBKEYHI_EL1, | ||
173 | APGAKEYLO_EL1, | ||
174 | APGAKEYHI_EL1, | ||
175 | |||
155 | /* 32bit specific registers. Keep them at the end of the range */ | 176 | /* 32bit specific registers. Keep them at the end of the range */ |
156 | DACR32_EL2, /* Domain Access Control Register */ | 177 | DACR32_EL2, /* Domain Access Control Register */ |
157 | IFSR32_EL2, /* Instruction Fault Status Register */ | 178 | IFSR32_EL2, /* Instruction Fault Status Register */ |
@@ -212,7 +233,17 @@ struct kvm_cpu_context { | |||
212 | struct kvm_vcpu *__hyp_running_vcpu; | 233 | struct kvm_vcpu *__hyp_running_vcpu; |
213 | }; | 234 | }; |
214 | 235 | ||
215 | typedef struct kvm_cpu_context kvm_cpu_context_t; | 236 | struct kvm_pmu_events { |
237 | u32 events_host; | ||
238 | u32 events_guest; | ||
239 | }; | ||
240 | |||
241 | struct kvm_host_data { | ||
242 | struct kvm_cpu_context host_ctxt; | ||
243 | struct kvm_pmu_events pmu_events; | ||
244 | }; | ||
245 | |||
246 | typedef struct kvm_host_data kvm_host_data_t; | ||
216 | 247 | ||
217 | struct vcpu_reset_state { | 248 | struct vcpu_reset_state { |
218 | unsigned long pc; | 249 | unsigned long pc; |
@@ -223,6 +254,8 @@ struct vcpu_reset_state { | |||
223 | 254 | ||
224 | struct kvm_vcpu_arch { | 255 | struct kvm_vcpu_arch { |
225 | struct kvm_cpu_context ctxt; | 256 | struct kvm_cpu_context ctxt; |
257 | void *sve_state; | ||
258 | unsigned int sve_max_vl; | ||
226 | 259 | ||
227 | /* HYP configuration */ | 260 | /* HYP configuration */ |
228 | u64 hcr_el2; | 261 | u64 hcr_el2; |
@@ -255,7 +288,7 @@ struct kvm_vcpu_arch { | |||
255 | struct kvm_guest_debug_arch external_debug_state; | 288 | struct kvm_guest_debug_arch external_debug_state; |
256 | 289 | ||
257 | /* Pointer to host CPU context */ | 290 | /* Pointer to host CPU context */ |
258 | kvm_cpu_context_t *host_cpu_context; | 291 | struct kvm_cpu_context *host_cpu_context; |
259 | 292 | ||
260 | struct thread_info *host_thread_info; /* hyp VA */ | 293 | struct thread_info *host_thread_info; /* hyp VA */ |
261 | struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ | 294 | struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ |
@@ -318,12 +351,40 @@ struct kvm_vcpu_arch { | |||
318 | bool sysregs_loaded_on_cpu; | 351 | bool sysregs_loaded_on_cpu; |
319 | }; | 352 | }; |
320 | 353 | ||
354 | /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ | ||
355 | #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ | ||
356 | sve_ffr_offset((vcpu)->arch.sve_max_vl))) | ||
357 | |||
358 | #define vcpu_sve_state_size(vcpu) ({ \ | ||
359 | size_t __size_ret; \ | ||
360 | unsigned int __vcpu_vq; \ | ||
361 | \ | ||
362 | if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \ | ||
363 | __size_ret = 0; \ | ||
364 | } else { \ | ||
365 | __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \ | ||
366 | __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \ | ||
367 | } \ | ||
368 | \ | ||
369 | __size_ret; \ | ||
370 | }) | ||
371 | |||
321 | /* vcpu_arch flags field values: */ | 372 | /* vcpu_arch flags field values: */ |
322 | #define KVM_ARM64_DEBUG_DIRTY (1 << 0) | 373 | #define KVM_ARM64_DEBUG_DIRTY (1 << 0) |
323 | #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ | 374 | #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ |
324 | #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ | 375 | #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ |
325 | #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ | 376 | #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ |
326 | #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ | 377 | #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ |
378 | #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ | ||
379 | #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ | ||
380 | #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ | ||
381 | |||
382 | #define vcpu_has_sve(vcpu) (system_supports_sve() && \ | ||
383 | ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) | ||
384 | |||
385 | #define vcpu_has_ptrauth(vcpu) ((system_supports_address_auth() || \ | ||
386 | system_supports_generic_auth()) && \ | ||
387 | ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_PTRAUTH)) | ||
327 | 388 | ||
328 | #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) | 389 | #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) |
329 | 390 | ||
@@ -432,9 +493,9 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); | |||
432 | 493 | ||
433 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); | 494 | struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); |
434 | 495 | ||
435 | DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); | 496 | DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); |
436 | 497 | ||
437 | static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, | 498 | static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, |
438 | int cpu) | 499 | int cpu) |
439 | { | 500 | { |
440 | /* The host's MPIDR is immutable, so let's set it up at boot time */ | 501 | /* The host's MPIDR is immutable, so let's set it up at boot time */ |
@@ -452,8 +513,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, | |||
452 | * kernel's mapping to the linear mapping, and store it in tpidr_el2 | 513 | * kernel's mapping to the linear mapping, and store it in tpidr_el2 |
453 | * so that we can use adr_l to access per-cpu variables in EL2. | 514 | * so that we can use adr_l to access per-cpu variables in EL2. |
454 | */ | 515 | */ |
455 | u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) - | 516 | u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - |
456 | (u64)kvm_ksym_ref(kvm_host_cpu_state)); | 517 | (u64)kvm_ksym_ref(kvm_host_data)); |
457 | 518 | ||
458 | /* | 519 | /* |
459 | * Call initialization code, and switch to the full blown HYP code. | 520 | * Call initialization code, and switch to the full blown HYP code. |
@@ -491,9 +552,10 @@ static inline bool kvm_arch_requires_vhe(void) | |||
491 | return false; | 552 | return false; |
492 | } | 553 | } |
493 | 554 | ||
555 | void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); | ||
556 | |||
494 | static inline void kvm_arch_hardware_unsetup(void) {} | 557 | static inline void kvm_arch_hardware_unsetup(void) {} |
495 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} | 558 | static inline void kvm_arch_sync_events(struct kvm *kvm) {} |
496 | static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} | ||
497 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} | 559 | static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} |
498 | static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} | 560 | static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} |
499 | 561 | ||
@@ -516,11 +578,28 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); | |||
516 | void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); | 578 | void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); |
517 | void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); | 579 | void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); |
518 | 580 | ||
581 | static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) | ||
582 | { | ||
583 | return (!has_vhe() && attr->exclude_host); | ||
584 | } | ||
585 | |||
519 | #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ | 586 | #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ |
520 | static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) | 587 | static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) |
521 | { | 588 | { |
522 | return kvm_arch_vcpu_run_map_fp(vcpu); | 589 | return kvm_arch_vcpu_run_map_fp(vcpu); |
523 | } | 590 | } |
591 | |||
592 | void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); | ||
593 | void kvm_clr_pmu_events(u32 clr); | ||
594 | |||
595 | void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt); | ||
596 | bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt); | ||
597 | |||
598 | void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); | ||
599 | void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); | ||
600 | #else | ||
601 | static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} | ||
602 | static inline void kvm_clr_pmu_events(u32 clr) {} | ||
524 | #endif | 603 | #endif |
525 | 604 | ||
526 | static inline void kvm_arm_vhe_guest_enter(void) | 605 | static inline void kvm_arm_vhe_guest_enter(void) |
@@ -594,4 +673,10 @@ void kvm_arch_free_vm(struct kvm *kvm); | |||
594 | 673 | ||
595 | int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); | 674 | int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); |
596 | 675 | ||
676 | int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); | ||
677 | bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); | ||
678 | |||
679 | #define kvm_arm_vcpu_sve_finalized(vcpu) \ | ||
680 | ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) | ||
681 | |||
597 | #endif /* __ARM64_KVM_HOST_H__ */ | 682 | #endif /* __ARM64_KVM_HOST_H__ */ |
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c3060833b7a5..09fe8bd15f6e 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h | |||
@@ -149,7 +149,6 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu); | |||
149 | 149 | ||
150 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); | 150 | void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); |
151 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); | 151 | void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); |
152 | bool __fpsimd_enabled(void); | ||
153 | 152 | ||
154 | void activate_traps_vhe_load(struct kvm_vcpu *vcpu); | 153 | void activate_traps_vhe_load(struct kvm_vcpu *vcpu); |
155 | void deactivate_traps_vhe_put(void); | 154 | void deactivate_traps_vhe_put(void); |
diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h new file mode 100644 index 000000000000..6301813dcace --- /dev/null +++ b/arch/arm64/include/asm/kvm_ptrauth.h | |||
@@ -0,0 +1,111 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* arch/arm64/include/asm/kvm_ptrauth.h: Guest/host ptrauth save/restore | ||
3 | * Copyright 2019 Arm Limited | ||
4 | * Authors: Mark Rutland <mark.rutland@arm.com> | ||
5 | * Amit Daniel Kachhap <amit.kachhap@arm.com> | ||
6 | */ | ||
7 | |||
8 | #ifndef __ASM_KVM_PTRAUTH_H | ||
9 | #define __ASM_KVM_PTRAUTH_H | ||
10 | |||
11 | #ifdef __ASSEMBLY__ | ||
12 | |||
13 | #include <asm/sysreg.h> | ||
14 | |||
15 | #ifdef CONFIG_ARM64_PTR_AUTH | ||
16 | |||
17 | #define PTRAUTH_REG_OFFSET(x) (x - CPU_APIAKEYLO_EL1) | ||
18 | |||
19 | /* | ||
20 | * CPU_AP*_EL1 values exceed immediate offset range (512) for stp | ||
21 | * instruction so below macros takes CPU_APIAKEYLO_EL1 as base and | ||
22 | * calculates the offset of the keys from this base to avoid an extra add | ||
23 | * instruction. These macros assumes the keys offsets follow the order of | ||
24 | * the sysreg enum in kvm_host.h. | ||
25 | */ | ||
26 | .macro ptrauth_save_state base, reg1, reg2 | ||
27 | mrs_s \reg1, SYS_APIAKEYLO_EL1 | ||
28 | mrs_s \reg2, SYS_APIAKEYHI_EL1 | ||
29 | stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)] | ||
30 | mrs_s \reg1, SYS_APIBKEYLO_EL1 | ||
31 | mrs_s \reg2, SYS_APIBKEYHI_EL1 | ||
32 | stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)] | ||
33 | mrs_s \reg1, SYS_APDAKEYLO_EL1 | ||
34 | mrs_s \reg2, SYS_APDAKEYHI_EL1 | ||
35 | stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)] | ||
36 | mrs_s \reg1, SYS_APDBKEYLO_EL1 | ||
37 | mrs_s \reg2, SYS_APDBKEYHI_EL1 | ||
38 | stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)] | ||
39 | mrs_s \reg1, SYS_APGAKEYLO_EL1 | ||
40 | mrs_s \reg2, SYS_APGAKEYHI_EL1 | ||
41 | stp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)] | ||
42 | .endm | ||
43 | |||
44 | .macro ptrauth_restore_state base, reg1, reg2 | ||
45 | ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIAKEYLO_EL1)] | ||
46 | msr_s SYS_APIAKEYLO_EL1, \reg1 | ||
47 | msr_s SYS_APIAKEYHI_EL1, \reg2 | ||
48 | ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APIBKEYLO_EL1)] | ||
49 | msr_s SYS_APIBKEYLO_EL1, \reg1 | ||
50 | msr_s SYS_APIBKEYHI_EL1, \reg2 | ||
51 | ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDAKEYLO_EL1)] | ||
52 | msr_s SYS_APDAKEYLO_EL1, \reg1 | ||
53 | msr_s SYS_APDAKEYHI_EL1, \reg2 | ||
54 | ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APDBKEYLO_EL1)] | ||
55 | msr_s SYS_APDBKEYLO_EL1, \reg1 | ||
56 | msr_s SYS_APDBKEYHI_EL1, \reg2 | ||
57 | ldp \reg1, \reg2, [\base, #PTRAUTH_REG_OFFSET(CPU_APGAKEYLO_EL1)] | ||
58 | msr_s SYS_APGAKEYLO_EL1, \reg1 | ||
59 | msr_s SYS_APGAKEYHI_EL1, \reg2 | ||
60 | .endm | ||
61 | |||
62 | /* | ||
63 | * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will | ||
64 | * check for the presence of one of the cpufeature flag | ||
65 | * ARM64_HAS_ADDRESS_AUTH_ARCH or ARM64_HAS_ADDRESS_AUTH_IMP_DEF and | ||
66 | * then proceed ahead with the save/restore of Pointer Authentication | ||
67 | * key registers. | ||
68 | */ | ||
69 | .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 | ||
70 | alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH | ||
71 | b 1000f | ||
72 | alternative_else_nop_endif | ||
73 | alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF | ||
74 | b 1001f | ||
75 | alternative_else_nop_endif | ||
76 | 1000: | ||
77 | ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] | ||
78 | and \reg1, \reg1, #(HCR_API | HCR_APK) | ||
79 | cbz \reg1, 1001f | ||
80 | add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 | ||
81 | ptrauth_restore_state \reg1, \reg2, \reg3 | ||
82 | 1001: | ||
83 | .endm | ||
84 | |||
85 | .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 | ||
86 | alternative_if ARM64_HAS_ADDRESS_AUTH_ARCH | ||
87 | b 2000f | ||
88 | alternative_else_nop_endif | ||
89 | alternative_if_not ARM64_HAS_ADDRESS_AUTH_IMP_DEF | ||
90 | b 2001f | ||
91 | alternative_else_nop_endif | ||
92 | 2000: | ||
93 | ldr \reg1, [\g_ctxt, #(VCPU_HCR_EL2 - VCPU_CONTEXT)] | ||
94 | and \reg1, \reg1, #(HCR_API | HCR_APK) | ||
95 | cbz \reg1, 2001f | ||
96 | add \reg1, \g_ctxt, #CPU_APIAKEYLO_EL1 | ||
97 | ptrauth_save_state \reg1, \reg2, \reg3 | ||
98 | add \reg1, \h_ctxt, #CPU_APIAKEYLO_EL1 | ||
99 | ptrauth_restore_state \reg1, \reg2, \reg3 | ||
100 | isb | ||
101 | 2001: | ||
102 | .endm | ||
103 | |||
104 | #else /* !CONFIG_ARM64_PTR_AUTH */ | ||
105 | .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 | ||
106 | .endm | ||
107 | .macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 | ||
108 | .endm | ||
109 | #endif /* CONFIG_ARM64_PTR_AUTH */ | ||
110 | #endif /* __ASSEMBLY__ */ | ||
111 | #endif /* __ASM_KVM_PTRAUTH_H */ | ||
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 3f7b917e8f3a..902d75b60914 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h | |||
@@ -454,6 +454,9 @@ | |||
454 | #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) | 454 | #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6) |
455 | #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) | 455 | #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) |
456 | 456 | ||
457 | /* VHE encodings for architectural EL0/1 system registers */ | ||
458 | #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) | ||
459 | |||
457 | /* Common SCTLR_ELx flags. */ | 460 | /* Common SCTLR_ELx flags. */ |
458 | #define SCTLR_ELx_DSSBS (_BITUL(44)) | 461 | #define SCTLR_ELx_DSSBS (_BITUL(44)) |
459 | #define SCTLR_ELx_ENIA (_BITUL(31)) | 462 | #define SCTLR_ELx_ENIA (_BITUL(31)) |
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 97c3478ee6e7..7b7ac0f6cec9 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/psci.h> | 35 | #include <linux/psci.h> |
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <asm/ptrace.h> | 37 | #include <asm/ptrace.h> |
38 | #include <asm/sve_context.h> | ||
38 | 39 | ||
39 | #define __KVM_HAVE_GUEST_DEBUG | 40 | #define __KVM_HAVE_GUEST_DEBUG |
40 | #define __KVM_HAVE_IRQ_LINE | 41 | #define __KVM_HAVE_IRQ_LINE |
@@ -102,6 +103,9 @@ struct kvm_regs { | |||
102 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ | 103 | #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ |
103 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ | 104 | #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ |
104 | #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ | 105 | #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ |
106 | #define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ | ||
107 | #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ | ||
108 | #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ | ||
105 | 109 | ||
106 | struct kvm_vcpu_init { | 110 | struct kvm_vcpu_init { |
107 | __u32 target; | 111 | __u32 target; |
@@ -226,6 +230,45 @@ struct kvm_vcpu_events { | |||
226 | KVM_REG_ARM_FW | ((r) & 0xffff)) | 230 | KVM_REG_ARM_FW | ((r) & 0xffff)) |
227 | #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) | 231 | #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) |
228 | 232 | ||
233 | /* SVE registers */ | ||
234 | #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) | ||
235 | |||
236 | /* Z- and P-regs occupy blocks at the following offsets within this range: */ | ||
237 | #define KVM_REG_ARM64_SVE_ZREG_BASE 0 | ||
238 | #define KVM_REG_ARM64_SVE_PREG_BASE 0x400 | ||
239 | #define KVM_REG_ARM64_SVE_FFR_BASE 0x600 | ||
240 | |||
241 | #define KVM_ARM64_SVE_NUM_ZREGS __SVE_NUM_ZREGS | ||
242 | #define KVM_ARM64_SVE_NUM_PREGS __SVE_NUM_PREGS | ||
243 | |||
244 | #define KVM_ARM64_SVE_MAX_SLICES 32 | ||
245 | |||
246 | #define KVM_REG_ARM64_SVE_ZREG(n, i) \ | ||
247 | (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_ZREG_BASE | \ | ||
248 | KVM_REG_SIZE_U2048 | \ | ||
249 | (((n) & (KVM_ARM64_SVE_NUM_ZREGS - 1)) << 5) | \ | ||
250 | ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) | ||
251 | |||
252 | #define KVM_REG_ARM64_SVE_PREG(n, i) \ | ||
253 | (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_PREG_BASE | \ | ||
254 | KVM_REG_SIZE_U256 | \ | ||
255 | (((n) & (KVM_ARM64_SVE_NUM_PREGS - 1)) << 5) | \ | ||
256 | ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) | ||
257 | |||
258 | #define KVM_REG_ARM64_SVE_FFR(i) \ | ||
259 | (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_FFR_BASE | \ | ||
260 | KVM_REG_SIZE_U256 | \ | ||
261 | ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) | ||
262 | |||
263 | #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN | ||
264 | #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX | ||
265 | |||
266 | /* Vector lengths pseudo-register: */ | ||
267 | #define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \ | ||
268 | KVM_REG_SIZE_U512 | 0xffff) | ||
269 | #define KVM_ARM64_SVE_VLS_WORDS \ | ||
270 | ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) | ||
271 | |||
229 | /* Device Control API: ARM VGIC */ | 272 | /* Device Control API: ARM VGIC */ |
230 | #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 | 273 | #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 |
231 | #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 | 274 | #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 |
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e10e2a5d9ddc..947e39896e28 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c | |||
@@ -125,9 +125,16 @@ int main(void) | |||
125 | DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); | 125 | DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); |
126 | DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); | 126 | DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); |
127 | DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); | 127 | DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); |
128 | DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); | ||
128 | DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); | 129 | DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); |
130 | DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); | ||
131 | DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); | ||
132 | DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); | ||
133 | DEFINE(CPU_APDBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDBKEYLO_EL1])); | ||
134 | DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); | ||
129 | DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); | 135 | DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); |
130 | DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); | 136 | DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); |
137 | DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); | ||
131 | #endif | 138 | #endif |
132 | #ifdef CONFIG_CPU_PM | 139 | #ifdef CONFIG_CPU_PM |
133 | DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); | 140 | DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); |
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2b807f129e60..ca27e08e3d8a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c | |||
@@ -1913,7 +1913,7 @@ static void verify_sve_features(void) | |||
1913 | unsigned int len = zcr & ZCR_ELx_LEN_MASK; | 1913 | unsigned int len = zcr & ZCR_ELx_LEN_MASK; |
1914 | 1914 | ||
1915 | if (len < safe_len || sve_verify_vq_map()) { | 1915 | if (len < safe_len || sve_verify_vq_map()) { |
1916 | pr_crit("CPU%d: SVE: required vector length(s) missing\n", | 1916 | pr_crit("CPU%d: SVE: vector length support mismatch\n", |
1917 | smp_processor_id()); | 1917 | smp_processor_id()); |
1918 | cpu_die_early(); | 1918 | cpu_die_early(); |
1919 | } | 1919 | } |
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 735cf1f8b109..a38bf74bcca8 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/bitmap.h> | 20 | #include <linux/bitmap.h> |
21 | #include <linux/bitops.h> | ||
21 | #include <linux/bottom_half.h> | 22 | #include <linux/bottom_half.h> |
22 | #include <linux/bug.h> | 23 | #include <linux/bug.h> |
23 | #include <linux/cache.h> | 24 | #include <linux/cache.h> |
@@ -48,6 +49,7 @@ | |||
48 | #include <asm/sigcontext.h> | 49 | #include <asm/sigcontext.h> |
49 | #include <asm/sysreg.h> | 50 | #include <asm/sysreg.h> |
50 | #include <asm/traps.h> | 51 | #include <asm/traps.h> |
52 | #include <asm/virt.h> | ||
51 | 53 | ||
52 | #define FPEXC_IOF (1 << 0) | 54 | #define FPEXC_IOF (1 << 0) |
53 | #define FPEXC_DZF (1 << 1) | 55 | #define FPEXC_DZF (1 << 1) |
@@ -119,6 +121,8 @@ | |||
119 | */ | 121 | */ |
120 | struct fpsimd_last_state_struct { | 122 | struct fpsimd_last_state_struct { |
121 | struct user_fpsimd_state *st; | 123 | struct user_fpsimd_state *st; |
124 | void *sve_state; | ||
125 | unsigned int sve_vl; | ||
122 | }; | 126 | }; |
123 | 127 | ||
124 | static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); | 128 | static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); |
@@ -130,14 +134,23 @@ static int sve_default_vl = -1; | |||
130 | 134 | ||
131 | /* Maximum supported vector length across all CPUs (initially poisoned) */ | 135 | /* Maximum supported vector length across all CPUs (initially poisoned) */ |
132 | int __ro_after_init sve_max_vl = SVE_VL_MIN; | 136 | int __ro_after_init sve_max_vl = SVE_VL_MIN; |
133 | /* Set of available vector lengths, as vq_to_bit(vq): */ | 137 | int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; |
134 | static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); | 138 | |
139 | /* | ||
140 | * Set of available vector lengths, | ||
141 | * where length vq encoded as bit __vq_to_bit(vq): | ||
142 | */ | ||
143 | __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); | ||
144 | /* Set of vector lengths present on at least one cpu: */ | ||
145 | static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); | ||
146 | |||
135 | static void __percpu *efi_sve_state; | 147 | static void __percpu *efi_sve_state; |
136 | 148 | ||
137 | #else /* ! CONFIG_ARM64_SVE */ | 149 | #else /* ! CONFIG_ARM64_SVE */ |
138 | 150 | ||
139 | /* Dummy declaration for code that will be optimised out: */ | 151 | /* Dummy declaration for code that will be optimised out: */ |
140 | extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); | 152 | extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); |
153 | extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); | ||
141 | extern void __percpu *efi_sve_state; | 154 | extern void __percpu *efi_sve_state; |
142 | 155 | ||
143 | #endif /* ! CONFIG_ARM64_SVE */ | 156 | #endif /* ! CONFIG_ARM64_SVE */ |
@@ -235,14 +248,15 @@ static void task_fpsimd_load(void) | |||
235 | */ | 248 | */ |
236 | void fpsimd_save(void) | 249 | void fpsimd_save(void) |
237 | { | 250 | { |
238 | struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st); | 251 | struct fpsimd_last_state_struct const *last = |
252 | this_cpu_ptr(&fpsimd_last_state); | ||
239 | /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ | 253 | /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ |
240 | 254 | ||
241 | WARN_ON(!in_softirq() && !irqs_disabled()); | 255 | WARN_ON(!in_softirq() && !irqs_disabled()); |
242 | 256 | ||
243 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { | 257 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { |
244 | if (system_supports_sve() && test_thread_flag(TIF_SVE)) { | 258 | if (system_supports_sve() && test_thread_flag(TIF_SVE)) { |
245 | if (WARN_ON(sve_get_vl() != current->thread.sve_vl)) { | 259 | if (WARN_ON(sve_get_vl() != last->sve_vl)) { |
246 | /* | 260 | /* |
247 | * Can't save the user regs, so current would | 261 | * Can't save the user regs, so current would |
248 | * re-enter user with corrupt state. | 262 | * re-enter user with corrupt state. |
@@ -252,32 +266,15 @@ void fpsimd_save(void) | |||
252 | return; | 266 | return; |
253 | } | 267 | } |
254 | 268 | ||
255 | sve_save_state(sve_pffr(¤t->thread), &st->fpsr); | 269 | sve_save_state((char *)last->sve_state + |
270 | sve_ffr_offset(last->sve_vl), | ||
271 | &last->st->fpsr); | ||
256 | } else | 272 | } else |
257 | fpsimd_save_state(st); | 273 | fpsimd_save_state(last->st); |
258 | } | 274 | } |
259 | } | 275 | } |
260 | 276 | ||
261 | /* | 277 | /* |
262 | * Helpers to translate bit indices in sve_vq_map to VQ values (and | ||
263 | * vice versa). This allows find_next_bit() to be used to find the | ||
264 | * _maximum_ VQ not exceeding a certain value. | ||
265 | */ | ||
266 | |||
267 | static unsigned int vq_to_bit(unsigned int vq) | ||
268 | { | ||
269 | return SVE_VQ_MAX - vq; | ||
270 | } | ||
271 | |||
272 | static unsigned int bit_to_vq(unsigned int bit) | ||
273 | { | ||
274 | if (WARN_ON(bit >= SVE_VQ_MAX)) | ||
275 | bit = SVE_VQ_MAX - 1; | ||
276 | |||
277 | return SVE_VQ_MAX - bit; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * All vector length selection from userspace comes through here. | 278 | * All vector length selection from userspace comes through here. |
282 | * We're on a slow path, so some sanity-checks are included. | 279 | * We're on a slow path, so some sanity-checks are included. |
283 | * If things go wrong there's a bug somewhere, but try to fall back to a | 280 | * If things go wrong there's a bug somewhere, but try to fall back to a |
@@ -298,8 +295,8 @@ static unsigned int find_supported_vector_length(unsigned int vl) | |||
298 | vl = max_vl; | 295 | vl = max_vl; |
299 | 296 | ||
300 | bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, | 297 | bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, |
301 | vq_to_bit(sve_vq_from_vl(vl))); | 298 | __vq_to_bit(sve_vq_from_vl(vl))); |
302 | return sve_vl_from_vq(bit_to_vq(bit)); | 299 | return sve_vl_from_vq(__bit_to_vq(bit)); |
303 | } | 300 | } |
304 | 301 | ||
305 | #ifdef CONFIG_SYSCTL | 302 | #ifdef CONFIG_SYSCTL |
@@ -550,7 +547,6 @@ int sve_set_vector_length(struct task_struct *task, | |||
550 | local_bh_disable(); | 547 | local_bh_disable(); |
551 | 548 | ||
552 | fpsimd_save(); | 549 | fpsimd_save(); |
553 | set_thread_flag(TIF_FOREIGN_FPSTATE); | ||
554 | } | 550 | } |
555 | 551 | ||
556 | fpsimd_flush_task_state(task); | 552 | fpsimd_flush_task_state(task); |
@@ -624,12 +620,6 @@ int sve_get_current_vl(void) | |||
624 | return sve_prctl_status(0); | 620 | return sve_prctl_status(0); |
625 | } | 621 | } |
626 | 622 | ||
627 | /* | ||
628 | * Bitmap for temporary storage of the per-CPU set of supported vector lengths | ||
629 | * during secondary boot. | ||
630 | */ | ||
631 | static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX); | ||
632 | |||
633 | static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) | 623 | static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) |
634 | { | 624 | { |
635 | unsigned int vq, vl; | 625 | unsigned int vq, vl; |
@@ -644,40 +634,82 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) | |||
644 | write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */ | 634 | write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */ |
645 | vl = sve_get_vl(); | 635 | vl = sve_get_vl(); |
646 | vq = sve_vq_from_vl(vl); /* skip intervening lengths */ | 636 | vq = sve_vq_from_vl(vl); /* skip intervening lengths */ |
647 | set_bit(vq_to_bit(vq), map); | 637 | set_bit(__vq_to_bit(vq), map); |
648 | } | 638 | } |
649 | } | 639 | } |
650 | 640 | ||
641 | /* | ||
642 | * Initialise the set of known supported VQs for the boot CPU. | ||
643 | * This is called during kernel boot, before secondary CPUs are brought up. | ||
644 | */ | ||
651 | void __init sve_init_vq_map(void) | 645 | void __init sve_init_vq_map(void) |
652 | { | 646 | { |
653 | sve_probe_vqs(sve_vq_map); | 647 | sve_probe_vqs(sve_vq_map); |
648 | bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); | ||
654 | } | 649 | } |
655 | 650 | ||
656 | /* | 651 | /* |
657 | * If we haven't committed to the set of supported VQs yet, filter out | 652 | * If we haven't committed to the set of supported VQs yet, filter out |
658 | * those not supported by the current CPU. | 653 | * those not supported by the current CPU. |
654 | * This function is called during the bring-up of early secondary CPUs only. | ||
659 | */ | 655 | */ |
660 | void sve_update_vq_map(void) | 656 | void sve_update_vq_map(void) |
661 | { | 657 | { |
662 | sve_probe_vqs(sve_secondary_vq_map); | 658 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); |
663 | bitmap_and(sve_vq_map, sve_vq_map, sve_secondary_vq_map, SVE_VQ_MAX); | 659 | |
660 | sve_probe_vqs(tmp_map); | ||
661 | bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX); | ||
662 | bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX); | ||
664 | } | 663 | } |
665 | 664 | ||
666 | /* Check whether the current CPU supports all VQs in the committed set */ | 665 | /* |
666 | * Check whether the current CPU supports all VQs in the committed set. | ||
667 | * This function is called during the bring-up of late secondary CPUs only. | ||
668 | */ | ||
667 | int sve_verify_vq_map(void) | 669 | int sve_verify_vq_map(void) |
668 | { | 670 | { |
669 | int ret = 0; | 671 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); |
672 | unsigned long b; | ||
670 | 673 | ||
671 | sve_probe_vqs(sve_secondary_vq_map); | 674 | sve_probe_vqs(tmp_map); |
672 | bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map, | 675 | |
673 | SVE_VQ_MAX); | 676 | bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); |
674 | if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) { | 677 | if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) { |
675 | pr_warn("SVE: cpu%d: Required vector length(s) missing\n", | 678 | pr_warn("SVE: cpu%d: Required vector length(s) missing\n", |
676 | smp_processor_id()); | 679 | smp_processor_id()); |
677 | ret = -EINVAL; | 680 | return -EINVAL; |
678 | } | 681 | } |
679 | 682 | ||
680 | return ret; | 683 | if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available()) |
684 | return 0; | ||
685 | |||
686 | /* | ||
687 | * For KVM, it is necessary to ensure that this CPU doesn't | ||
688 | * support any vector length that guests may have probed as | ||
689 | * unsupported. | ||
690 | */ | ||
691 | |||
692 | /* Recover the set of supported VQs: */ | ||
693 | bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); | ||
694 | /* Find VQs supported that are not globally supported: */ | ||
695 | bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX); | ||
696 | |||
697 | /* Find the lowest such VQ, if any: */ | ||
698 | b = find_last_bit(tmp_map, SVE_VQ_MAX); | ||
699 | if (b >= SVE_VQ_MAX) | ||
700 | return 0; /* no mismatches */ | ||
701 | |||
702 | /* | ||
703 | * Mismatches above sve_max_virtualisable_vl are fine, since | ||
704 | * no guest is allowed to configure ZCR_EL2.LEN to exceed this: | ||
705 | */ | ||
706 | if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) { | ||
707 | pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n", | ||
708 | smp_processor_id()); | ||
709 | return -EINVAL; | ||
710 | } | ||
711 | |||
712 | return 0; | ||
681 | } | 713 | } |
682 | 714 | ||
683 | static void __init sve_efi_setup(void) | 715 | static void __init sve_efi_setup(void) |
@@ -744,6 +776,8 @@ u64 read_zcr_features(void) | |||
744 | void __init sve_setup(void) | 776 | void __init sve_setup(void) |
745 | { | 777 | { |
746 | u64 zcr; | 778 | u64 zcr; |
779 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); | ||
780 | unsigned long b; | ||
747 | 781 | ||
748 | if (!system_supports_sve()) | 782 | if (!system_supports_sve()) |
749 | return; | 783 | return; |
@@ -753,8 +787,8 @@ void __init sve_setup(void) | |||
753 | * so sve_vq_map must have at least SVE_VQ_MIN set. | 787 | * so sve_vq_map must have at least SVE_VQ_MIN set. |
754 | * If something went wrong, at least try to patch it up: | 788 | * If something went wrong, at least try to patch it up: |
755 | */ | 789 | */ |
756 | if (WARN_ON(!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map))) | 790 | if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map))) |
757 | set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map); | 791 | set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map); |
758 | 792 | ||
759 | zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); | 793 | zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); |
760 | sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); | 794 | sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); |
@@ -772,11 +806,31 @@ void __init sve_setup(void) | |||
772 | */ | 806 | */ |
773 | sve_default_vl = find_supported_vector_length(64); | 807 | sve_default_vl = find_supported_vector_length(64); |
774 | 808 | ||
809 | bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, | ||
810 | SVE_VQ_MAX); | ||
811 | |||
812 | b = find_last_bit(tmp_map, SVE_VQ_MAX); | ||
813 | if (b >= SVE_VQ_MAX) | ||
814 | /* No non-virtualisable VLs found */ | ||
815 | sve_max_virtualisable_vl = SVE_VQ_MAX; | ||
816 | else if (WARN_ON(b == SVE_VQ_MAX - 1)) | ||
817 | /* No virtualisable VLs? This is architecturally forbidden. */ | ||
818 | sve_max_virtualisable_vl = SVE_VQ_MIN; | ||
819 | else /* b + 1 < SVE_VQ_MAX */ | ||
820 | sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); | ||
821 | |||
822 | if (sve_max_virtualisable_vl > sve_max_vl) | ||
823 | sve_max_virtualisable_vl = sve_max_vl; | ||
824 | |||
775 | pr_info("SVE: maximum available vector length %u bytes per vector\n", | 825 | pr_info("SVE: maximum available vector length %u bytes per vector\n", |
776 | sve_max_vl); | 826 | sve_max_vl); |
777 | pr_info("SVE: default vector length %u bytes per vector\n", | 827 | pr_info("SVE: default vector length %u bytes per vector\n", |
778 | sve_default_vl); | 828 | sve_default_vl); |
779 | 829 | ||
830 | /* KVM decides whether to support mismatched systems. Just warn here: */ | ||
831 | if (sve_max_virtualisable_vl < sve_max_vl) | ||
832 | pr_warn("SVE: unvirtualisable vector lengths present\n"); | ||
833 | |||
780 | sve_efi_setup(); | 834 | sve_efi_setup(); |
781 | } | 835 | } |
782 | 836 | ||
@@ -816,12 +870,11 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) | |||
816 | local_bh_disable(); | 870 | local_bh_disable(); |
817 | 871 | ||
818 | fpsimd_save(); | 872 | fpsimd_save(); |
819 | fpsimd_to_sve(current); | ||
820 | 873 | ||
821 | /* Force ret_to_user to reload the registers: */ | 874 | /* Force ret_to_user to reload the registers: */ |
822 | fpsimd_flush_task_state(current); | 875 | fpsimd_flush_task_state(current); |
823 | set_thread_flag(TIF_FOREIGN_FPSTATE); | ||
824 | 876 | ||
877 | fpsimd_to_sve(current); | ||
825 | if (test_and_set_thread_flag(TIF_SVE)) | 878 | if (test_and_set_thread_flag(TIF_SVE)) |
826 | WARN_ON(1); /* SVE access shouldn't have trapped */ | 879 | WARN_ON(1); /* SVE access shouldn't have trapped */ |
827 | 880 | ||
@@ -894,9 +947,9 @@ void fpsimd_flush_thread(void) | |||
894 | 947 | ||
895 | local_bh_disable(); | 948 | local_bh_disable(); |
896 | 949 | ||
950 | fpsimd_flush_task_state(current); | ||
897 | memset(¤t->thread.uw.fpsimd_state, 0, | 951 | memset(¤t->thread.uw.fpsimd_state, 0, |
898 | sizeof(current->thread.uw.fpsimd_state)); | 952 | sizeof(current->thread.uw.fpsimd_state)); |
899 | fpsimd_flush_task_state(current); | ||
900 | 953 | ||
901 | if (system_supports_sve()) { | 954 | if (system_supports_sve()) { |
902 | clear_thread_flag(TIF_SVE); | 955 | clear_thread_flag(TIF_SVE); |
@@ -933,8 +986,6 @@ void fpsimd_flush_thread(void) | |||
933 | current->thread.sve_vl_onexec = 0; | 986 | current->thread.sve_vl_onexec = 0; |
934 | } | 987 | } |
935 | 988 | ||
936 | set_thread_flag(TIF_FOREIGN_FPSTATE); | ||
937 | |||
938 | local_bh_enable(); | 989 | local_bh_enable(); |
939 | } | 990 | } |
940 | 991 | ||
@@ -974,6 +1025,8 @@ void fpsimd_bind_task_to_cpu(void) | |||
974 | this_cpu_ptr(&fpsimd_last_state); | 1025 | this_cpu_ptr(&fpsimd_last_state); |
975 | 1026 | ||
976 | last->st = ¤t->thread.uw.fpsimd_state; | 1027 | last->st = ¤t->thread.uw.fpsimd_state; |
1028 | last->sve_state = current->thread.sve_state; | ||
1029 | last->sve_vl = current->thread.sve_vl; | ||
977 | current->thread.fpsimd_cpu = smp_processor_id(); | 1030 | current->thread.fpsimd_cpu = smp_processor_id(); |
978 | 1031 | ||
979 | if (system_supports_sve()) { | 1032 | if (system_supports_sve()) { |
@@ -987,7 +1040,8 @@ void fpsimd_bind_task_to_cpu(void) | |||
987 | } | 1040 | } |
988 | } | 1041 | } |
989 | 1042 | ||
990 | void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st) | 1043 | void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, |
1044 | unsigned int sve_vl) | ||
991 | { | 1045 | { |
992 | struct fpsimd_last_state_struct *last = | 1046 | struct fpsimd_last_state_struct *last = |
993 | this_cpu_ptr(&fpsimd_last_state); | 1047 | this_cpu_ptr(&fpsimd_last_state); |
@@ -995,6 +1049,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st) | |||
995 | WARN_ON(!in_softirq() && !irqs_disabled()); | 1049 | WARN_ON(!in_softirq() && !irqs_disabled()); |
996 | 1050 | ||
997 | last->st = st; | 1051 | last->st = st; |
1052 | last->sve_state = sve_state; | ||
1053 | last->sve_vl = sve_vl; | ||
998 | } | 1054 | } |
999 | 1055 | ||
1000 | /* | 1056 | /* |
@@ -1043,12 +1099,29 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) | |||
1043 | 1099 | ||
1044 | /* | 1100 | /* |
1045 | * Invalidate live CPU copies of task t's FPSIMD state | 1101 | * Invalidate live CPU copies of task t's FPSIMD state |
1102 | * | ||
1103 | * This function may be called with preemption enabled. The barrier() | ||
1104 | * ensures that the assignment to fpsimd_cpu is visible to any | ||
1105 | * preemption/softirq that could race with set_tsk_thread_flag(), so | ||
1106 | * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared. | ||
1107 | * | ||
1108 | * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any | ||
1109 | * subsequent code. | ||
1046 | */ | 1110 | */ |
1047 | void fpsimd_flush_task_state(struct task_struct *t) | 1111 | void fpsimd_flush_task_state(struct task_struct *t) |
1048 | { | 1112 | { |
1049 | t->thread.fpsimd_cpu = NR_CPUS; | 1113 | t->thread.fpsimd_cpu = NR_CPUS; |
1114 | |||
1115 | barrier(); | ||
1116 | set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); | ||
1117 | |||
1118 | barrier(); | ||
1050 | } | 1119 | } |
1051 | 1120 | ||
1121 | /* | ||
1122 | * Invalidate any task's FPSIMD state that is present on this cpu. | ||
1123 | * This function must be called with softirqs disabled. | ||
1124 | */ | ||
1052 | void fpsimd_flush_cpu_state(void) | 1125 | void fpsimd_flush_cpu_state(void) |
1053 | { | 1126 | { |
1054 | __this_cpu_write(fpsimd_last_state.st, NULL); | 1127 | __this_cpu_write(fpsimd_last_state.st, NULL); |
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 6164d389eed6..348d12eec566 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <linux/acpi.h> | 27 | #include <linux/acpi.h> |
28 | #include <linux/clocksource.h> | 28 | #include <linux/clocksource.h> |
29 | #include <linux/kvm_host.h> | ||
29 | #include <linux/of.h> | 30 | #include <linux/of.h> |
30 | #include <linux/perf/arm_pmu.h> | 31 | #include <linux/perf/arm_pmu.h> |
31 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
@@ -528,12 +529,21 @@ static inline int armv8pmu_enable_counter(int idx) | |||
528 | 529 | ||
529 | static inline void armv8pmu_enable_event_counter(struct perf_event *event) | 530 | static inline void armv8pmu_enable_event_counter(struct perf_event *event) |
530 | { | 531 | { |
532 | struct perf_event_attr *attr = &event->attr; | ||
531 | int idx = event->hw.idx; | 533 | int idx = event->hw.idx; |
534 | u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); | ||
532 | 535 | ||
533 | armv8pmu_enable_counter(idx); | ||
534 | if (armv8pmu_event_is_chained(event)) | 536 | if (armv8pmu_event_is_chained(event)) |
535 | armv8pmu_enable_counter(idx - 1); | 537 | counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); |
536 | isb(); | 538 | |
539 | kvm_set_pmu_events(counter_bits, attr); | ||
540 | |||
541 | /* We rely on the hypervisor switch code to enable guest counters */ | ||
542 | if (!kvm_pmu_counter_deferred(attr)) { | ||
543 | armv8pmu_enable_counter(idx); | ||
544 | if (armv8pmu_event_is_chained(event)) | ||
545 | armv8pmu_enable_counter(idx - 1); | ||
546 | } | ||
537 | } | 547 | } |
538 | 548 | ||
539 | static inline int armv8pmu_disable_counter(int idx) | 549 | static inline int armv8pmu_disable_counter(int idx) |
@@ -546,11 +556,21 @@ static inline int armv8pmu_disable_counter(int idx) | |||
546 | static inline void armv8pmu_disable_event_counter(struct perf_event *event) | 556 | static inline void armv8pmu_disable_event_counter(struct perf_event *event) |
547 | { | 557 | { |
548 | struct hw_perf_event *hwc = &event->hw; | 558 | struct hw_perf_event *hwc = &event->hw; |
559 | struct perf_event_attr *attr = &event->attr; | ||
549 | int idx = hwc->idx; | 560 | int idx = hwc->idx; |
561 | u32 counter_bits = BIT(ARMV8_IDX_TO_COUNTER(idx)); | ||
550 | 562 | ||
551 | if (armv8pmu_event_is_chained(event)) | 563 | if (armv8pmu_event_is_chained(event)) |
552 | armv8pmu_disable_counter(idx - 1); | 564 | counter_bits |= BIT(ARMV8_IDX_TO_COUNTER(idx - 1)); |
553 | armv8pmu_disable_counter(idx); | 565 | |
566 | kvm_clr_pmu_events(counter_bits); | ||
567 | |||
568 | /* We rely on the hypervisor switch code to disable guest counters */ | ||
569 | if (!kvm_pmu_counter_deferred(attr)) { | ||
570 | if (armv8pmu_event_is_chained(event)) | ||
571 | armv8pmu_disable_counter(idx - 1); | ||
572 | armv8pmu_disable_counter(idx); | ||
573 | } | ||
554 | } | 574 | } |
555 | 575 | ||
556 | static inline int armv8pmu_enable_intens(int idx) | 576 | static inline int armv8pmu_enable_intens(int idx) |
@@ -827,14 +847,23 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, | |||
827 | * with other architectures (x86 and Power). | 847 | * with other architectures (x86 and Power). |
828 | */ | 848 | */ |
829 | if (is_kernel_in_hyp_mode()) { | 849 | if (is_kernel_in_hyp_mode()) { |
830 | if (!attr->exclude_kernel) | 850 | if (!attr->exclude_kernel && !attr->exclude_host) |
831 | config_base |= ARMV8_PMU_INCLUDE_EL2; | 851 | config_base |= ARMV8_PMU_INCLUDE_EL2; |
832 | } else { | 852 | if (attr->exclude_guest) |
833 | if (attr->exclude_kernel) | ||
834 | config_base |= ARMV8_PMU_EXCLUDE_EL1; | 853 | config_base |= ARMV8_PMU_EXCLUDE_EL1; |
835 | if (!attr->exclude_hv) | 854 | if (attr->exclude_host) |
855 | config_base |= ARMV8_PMU_EXCLUDE_EL0; | ||
856 | } else { | ||
857 | if (!attr->exclude_hv && !attr->exclude_host) | ||
836 | config_base |= ARMV8_PMU_INCLUDE_EL2; | 858 | config_base |= ARMV8_PMU_INCLUDE_EL2; |
837 | } | 859 | } |
860 | |||
861 | /* | ||
862 | * Filter out !VHE kernels and guest kernels | ||
863 | */ | ||
864 | if (attr->exclude_kernel) | ||
865 | config_base |= ARMV8_PMU_EXCLUDE_EL1; | ||
866 | |||
838 | if (attr->exclude_user) | 867 | if (attr->exclude_user) |
839 | config_base |= ARMV8_PMU_EXCLUDE_EL0; | 868 | config_base |= ARMV8_PMU_EXCLUDE_EL0; |
840 | 869 | ||
@@ -864,6 +893,9 @@ static void armv8pmu_reset(void *info) | |||
864 | armv8pmu_disable_intens(idx); | 893 | armv8pmu_disable_intens(idx); |
865 | } | 894 | } |
866 | 895 | ||
896 | /* Clear the counters we flip at guest entry/exit */ | ||
897 | kvm_clr_pmu_events(U32_MAX); | ||
898 | |||
867 | /* | 899 | /* |
868 | * Initialize & Reset PMNC. Request overflow interrupt for | 900 | * Initialize & Reset PMNC. Request overflow interrupt for |
869 | * 64 bit cycle counter but cheat in armv8pmu_write_counter(). | 901 | * 64 bit cycle counter but cheat in armv8pmu_write_counter(). |
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 867a7cea70e5..a9b0485df074 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c | |||
@@ -296,11 +296,6 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) | |||
296 | */ | 296 | */ |
297 | 297 | ||
298 | fpsimd_flush_task_state(current); | 298 | fpsimd_flush_task_state(current); |
299 | barrier(); | ||
300 | /* From now, fpsimd_thread_switch() won't clear TIF_FOREIGN_FPSTATE */ | ||
301 | |||
302 | set_thread_flag(TIF_FOREIGN_FPSTATE); | ||
303 | barrier(); | ||
304 | /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ | 299 | /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ |
305 | 300 | ||
306 | sve_alloc(current); | 301 | sve_alloc(current); |
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 690e033a91c0..3ac1a64d2fb9 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile | |||
@@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o | |||
17 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o | 17 | kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o |
18 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o | 18 | kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o |
19 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o | 19 | kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o |
20 | kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o | 20 | kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o |
21 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o | 21 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o |
22 | 22 | ||
23 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o | 23 | kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o |
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index aac7808ce216..6e3c9c8b2df9 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
10 | #include <linux/thread_info.h> | 10 | #include <linux/thread_info.h> |
11 | #include <linux/kvm_host.h> | 11 | #include <linux/kvm_host.h> |
12 | #include <asm/fpsimd.h> | ||
12 | #include <asm/kvm_asm.h> | 13 | #include <asm/kvm_asm.h> |
13 | #include <asm/kvm_host.h> | 14 | #include <asm/kvm_host.h> |
14 | #include <asm/kvm_mmu.h> | 15 | #include <asm/kvm_mmu.h> |
@@ -85,9 +86,12 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) | |||
85 | WARN_ON_ONCE(!irqs_disabled()); | 86 | WARN_ON_ONCE(!irqs_disabled()); |
86 | 87 | ||
87 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { | 88 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { |
88 | fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs); | 89 | fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, |
90 | vcpu->arch.sve_state, | ||
91 | vcpu->arch.sve_max_vl); | ||
92 | |||
89 | clear_thread_flag(TIF_FOREIGN_FPSTATE); | 93 | clear_thread_flag(TIF_FOREIGN_FPSTATE); |
90 | clear_thread_flag(TIF_SVE); | 94 | update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); |
91 | } | 95 | } |
92 | } | 96 | } |
93 | 97 | ||
@@ -100,14 +104,21 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) | |||
100 | void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) | 104 | void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) |
101 | { | 105 | { |
102 | unsigned long flags; | 106 | unsigned long flags; |
107 | bool host_has_sve = system_supports_sve(); | ||
108 | bool guest_has_sve = vcpu_has_sve(vcpu); | ||
103 | 109 | ||
104 | local_irq_save(flags); | 110 | local_irq_save(flags); |
105 | 111 | ||
106 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { | 112 | if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { |
113 | u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; | ||
114 | |||
107 | /* Clean guest FP state to memory and invalidate cpu view */ | 115 | /* Clean guest FP state to memory and invalidate cpu view */ |
108 | fpsimd_save(); | 116 | fpsimd_save(); |
109 | fpsimd_flush_cpu_state(); | 117 | fpsimd_flush_cpu_state(); |
110 | } else if (system_supports_sve()) { | 118 | |
119 | if (guest_has_sve) | ||
120 | *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); | ||
121 | } else if (host_has_sve) { | ||
111 | /* | 122 | /* |
112 | * The FPSIMD/SVE state in the CPU has not been touched, and we | 123 | * The FPSIMD/SVE state in the CPU has not been touched, and we |
113 | * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been | 124 | * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been |
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dd436a50fce7..3ae2f82fca46 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c | |||
@@ -19,18 +19,25 @@ | |||
19 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | 19 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/bits.h> | ||
22 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
23 | #include <linux/err.h> | 24 | #include <linux/err.h> |
25 | #include <linux/nospec.h> | ||
24 | #include <linux/kvm_host.h> | 26 | #include <linux/kvm_host.h> |
25 | #include <linux/module.h> | 27 | #include <linux/module.h> |
28 | #include <linux/stddef.h> | ||
29 | #include <linux/string.h> | ||
26 | #include <linux/vmalloc.h> | 30 | #include <linux/vmalloc.h> |
27 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
28 | #include <kvm/arm_psci.h> | 32 | #include <kvm/arm_psci.h> |
29 | #include <asm/cputype.h> | 33 | #include <asm/cputype.h> |
30 | #include <linux/uaccess.h> | 34 | #include <linux/uaccess.h> |
35 | #include <asm/fpsimd.h> | ||
31 | #include <asm/kvm.h> | 36 | #include <asm/kvm.h> |
32 | #include <asm/kvm_emulate.h> | 37 | #include <asm/kvm_emulate.h> |
33 | #include <asm/kvm_coproc.h> | 38 | #include <asm/kvm_coproc.h> |
39 | #include <asm/kvm_host.h> | ||
40 | #include <asm/sigcontext.h> | ||
34 | 41 | ||
35 | #include "trace.h" | 42 | #include "trace.h" |
36 | 43 | ||
@@ -52,12 +59,19 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
52 | return 0; | 59 | return 0; |
53 | } | 60 | } |
54 | 61 | ||
62 | static bool core_reg_offset_is_vreg(u64 off) | ||
63 | { | ||
64 | return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) && | ||
65 | off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr); | ||
66 | } | ||
67 | |||
55 | static u64 core_reg_offset_from_id(u64 id) | 68 | static u64 core_reg_offset_from_id(u64 id) |
56 | { | 69 | { |
57 | return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); | 70 | return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); |
58 | } | 71 | } |
59 | 72 | ||
60 | static int validate_core_offset(const struct kvm_one_reg *reg) | 73 | static int validate_core_offset(const struct kvm_vcpu *vcpu, |
74 | const struct kvm_one_reg *reg) | ||
61 | { | 75 | { |
62 | u64 off = core_reg_offset_from_id(reg->id); | 76 | u64 off = core_reg_offset_from_id(reg->id); |
63 | int size; | 77 | int size; |
@@ -89,11 +103,19 @@ static int validate_core_offset(const struct kvm_one_reg *reg) | |||
89 | return -EINVAL; | 103 | return -EINVAL; |
90 | } | 104 | } |
91 | 105 | ||
92 | if (KVM_REG_SIZE(reg->id) == size && | 106 | if (KVM_REG_SIZE(reg->id) != size || |
93 | IS_ALIGNED(off, size / sizeof(__u32))) | 107 | !IS_ALIGNED(off, size / sizeof(__u32))) |
94 | return 0; | 108 | return -EINVAL; |
95 | 109 | ||
96 | return -EINVAL; | 110 | /* |
111 | * The KVM_REG_ARM64_SVE regs must be used instead of | ||
112 | * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on | ||
113 | * SVE-enabled vcpus: | ||
114 | */ | ||
115 | if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off)) | ||
116 | return -EINVAL; | ||
117 | |||
118 | return 0; | ||
97 | } | 119 | } |
98 | 120 | ||
99 | static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | 121 | static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) |
@@ -115,7 +137,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
115 | (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) | 137 | (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) |
116 | return -ENOENT; | 138 | return -ENOENT; |
117 | 139 | ||
118 | if (validate_core_offset(reg)) | 140 | if (validate_core_offset(vcpu, reg)) |
119 | return -EINVAL; | 141 | return -EINVAL; |
120 | 142 | ||
121 | if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) | 143 | if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) |
@@ -140,7 +162,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
140 | (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) | 162 | (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) |
141 | return -ENOENT; | 163 | return -ENOENT; |
142 | 164 | ||
143 | if (validate_core_offset(reg)) | 165 | if (validate_core_offset(vcpu, reg)) |
144 | return -EINVAL; | 166 | return -EINVAL; |
145 | 167 | ||
146 | if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) | 168 | if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) |
@@ -183,6 +205,239 @@ out: | |||
183 | return err; | 205 | return err; |
184 | } | 206 | } |
185 | 207 | ||
208 | #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) | ||
209 | #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) | ||
210 | |||
211 | static bool vq_present( | ||
212 | const u64 (*const vqs)[KVM_ARM64_SVE_VLS_WORDS], | ||
213 | unsigned int vq) | ||
214 | { | ||
215 | return (*vqs)[vq_word(vq)] & vq_mask(vq); | ||
216 | } | ||
217 | |||
218 | static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | ||
219 | { | ||
220 | unsigned int max_vq, vq; | ||
221 | u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; | ||
222 | |||
223 | if (!vcpu_has_sve(vcpu)) | ||
224 | return -ENOENT; | ||
225 | |||
226 | if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl))) | ||
227 | return -EINVAL; | ||
228 | |||
229 | memset(vqs, 0, sizeof(vqs)); | ||
230 | |||
231 | max_vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); | ||
232 | for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) | ||
233 | if (sve_vq_available(vq)) | ||
234 | vqs[vq_word(vq)] |= vq_mask(vq); | ||
235 | |||
236 | if (copy_to_user((void __user *)reg->addr, vqs, sizeof(vqs))) | ||
237 | return -EFAULT; | ||
238 | |||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | ||
243 | { | ||
244 | unsigned int max_vq, vq; | ||
245 | u64 vqs[KVM_ARM64_SVE_VLS_WORDS]; | ||
246 | |||
247 | if (!vcpu_has_sve(vcpu)) | ||
248 | return -ENOENT; | ||
249 | |||
250 | if (kvm_arm_vcpu_sve_finalized(vcpu)) | ||
251 | return -EPERM; /* too late! */ | ||
252 | |||
253 | if (WARN_ON(vcpu->arch.sve_state)) | ||
254 | return -EINVAL; | ||
255 | |||
256 | if (copy_from_user(vqs, (const void __user *)reg->addr, sizeof(vqs))) | ||
257 | return -EFAULT; | ||
258 | |||
259 | max_vq = 0; | ||
260 | for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq) | ||
261 | if (vq_present(&vqs, vq)) | ||
262 | max_vq = vq; | ||
263 | |||
264 | if (max_vq > sve_vq_from_vl(kvm_sve_max_vl)) | ||
265 | return -EINVAL; | ||
266 | |||
267 | /* | ||
268 | * Vector lengths supported by the host can't currently be | ||
269 | * hidden from the guest individually: instead we can only set a | ||
270 | * maxmium via ZCR_EL2.LEN. So, make sure the available vector | ||
271 | * lengths match the set requested exactly up to the requested | ||
272 | * maximum: | ||
273 | */ | ||
274 | for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) | ||
275 | if (vq_present(&vqs, vq) != sve_vq_available(vq)) | ||
276 | return -EINVAL; | ||
277 | |||
278 | /* Can't run with no vector lengths at all: */ | ||
279 | if (max_vq < SVE_VQ_MIN) | ||
280 | return -EINVAL; | ||
281 | |||
282 | /* vcpu->arch.sve_state will be alloc'd by kvm_vcpu_finalize_sve() */ | ||
283 | vcpu->arch.sve_max_vl = sve_vl_from_vq(max_vq); | ||
284 | |||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | #define SVE_REG_SLICE_SHIFT 0 | ||
289 | #define SVE_REG_SLICE_BITS 5 | ||
290 | #define SVE_REG_ID_SHIFT (SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS) | ||
291 | #define SVE_REG_ID_BITS 5 | ||
292 | |||
293 | #define SVE_REG_SLICE_MASK \ | ||
294 | GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1, \ | ||
295 | SVE_REG_SLICE_SHIFT) | ||
296 | #define SVE_REG_ID_MASK \ | ||
297 | GENMASK(SVE_REG_ID_SHIFT + SVE_REG_ID_BITS - 1, SVE_REG_ID_SHIFT) | ||
298 | |||
299 | #define SVE_NUM_SLICES (1 << SVE_REG_SLICE_BITS) | ||
300 | |||
301 | #define KVM_SVE_ZREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0)) | ||
302 | #define KVM_SVE_PREG_SIZE KVM_REG_SIZE(KVM_REG_ARM64_SVE_PREG(0, 0)) | ||
303 | |||
304 | /* | ||
305 | * Number of register slices required to cover each whole SVE register. | ||
306 | * NOTE: Only the first slice every exists, for now. | ||
307 | * If you are tempted to modify this, you must also rework sve_reg_to_region() | ||
308 | * to match: | ||
309 | */ | ||
310 | #define vcpu_sve_slices(vcpu) 1 | ||
311 | |||
312 | /* Bounds of a single SVE register slice within vcpu->arch.sve_state */ | ||
313 | struct sve_state_reg_region { | ||
314 | unsigned int koffset; /* offset into sve_state in kernel memory */ | ||
315 | unsigned int klen; /* length in kernel memory */ | ||
316 | unsigned int upad; /* extra trailing padding in user memory */ | ||
317 | }; | ||
318 | |||
319 | /* | ||
320 | * Validate SVE register ID and get sanitised bounds for user/kernel SVE | ||
321 | * register copy | ||
322 | */ | ||
323 | static int sve_reg_to_region(struct sve_state_reg_region *region, | ||
324 | struct kvm_vcpu *vcpu, | ||
325 | const struct kvm_one_reg *reg) | ||
326 | { | ||
327 | /* reg ID ranges for Z- registers */ | ||
328 | const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0); | ||
329 | const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1, | ||
330 | SVE_NUM_SLICES - 1); | ||
331 | |||
332 | /* reg ID ranges for P- registers and FFR (which are contiguous) */ | ||
333 | const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0); | ||
334 | const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1); | ||
335 | |||
336 | unsigned int vq; | ||
337 | unsigned int reg_num; | ||
338 | |||
339 | unsigned int reqoffset, reqlen; /* User-requested offset and length */ | ||
340 | unsigned int maxlen; /* Maxmimum permitted length */ | ||
341 | |||
342 | size_t sve_state_size; | ||
343 | |||
344 | const u64 last_preg_id = KVM_REG_ARM64_SVE_PREG(SVE_NUM_PREGS - 1, | ||
345 | SVE_NUM_SLICES - 1); | ||
346 | |||
347 | /* Verify that the P-regs and FFR really do have contiguous IDs: */ | ||
348 | BUILD_BUG_ON(KVM_REG_ARM64_SVE_FFR(0) != last_preg_id + 1); | ||
349 | |||
350 | /* Verify that we match the UAPI header: */ | ||
351 | BUILD_BUG_ON(SVE_NUM_SLICES != KVM_ARM64_SVE_MAX_SLICES); | ||
352 | |||
353 | reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT; | ||
354 | |||
355 | if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) { | ||
356 | if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) | ||
357 | return -ENOENT; | ||
358 | |||
359 | vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); | ||
360 | |||
361 | reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) - | ||
362 | SVE_SIG_REGS_OFFSET; | ||
363 | reqlen = KVM_SVE_ZREG_SIZE; | ||
364 | maxlen = SVE_SIG_ZREG_SIZE(vq); | ||
365 | } else if (reg->id >= preg_id_min && reg->id <= preg_id_max) { | ||
366 | if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0) | ||
367 | return -ENOENT; | ||
368 | |||
369 | vq = sve_vq_from_vl(vcpu->arch.sve_max_vl); | ||
370 | |||
371 | reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) - | ||
372 | SVE_SIG_REGS_OFFSET; | ||
373 | reqlen = KVM_SVE_PREG_SIZE; | ||
374 | maxlen = SVE_SIG_PREG_SIZE(vq); | ||
375 | } else { | ||
376 | return -EINVAL; | ||
377 | } | ||
378 | |||
379 | sve_state_size = vcpu_sve_state_size(vcpu); | ||
380 | if (WARN_ON(!sve_state_size)) | ||
381 | return -EINVAL; | ||
382 | |||
383 | region->koffset = array_index_nospec(reqoffset, sve_state_size); | ||
384 | region->klen = min(maxlen, reqlen); | ||
385 | region->upad = reqlen - region->klen; | ||
386 | |||
387 | return 0; | ||
388 | } | ||
389 | |||
390 | static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | ||
391 | { | ||
392 | int ret; | ||
393 | struct sve_state_reg_region region; | ||
394 | char __user *uptr = (char __user *)reg->addr; | ||
395 | |||
396 | /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */ | ||
397 | if (reg->id == KVM_REG_ARM64_SVE_VLS) | ||
398 | return get_sve_vls(vcpu, reg); | ||
399 | |||
400 | /* Try to interpret reg ID as an architectural SVE register... */ | ||
401 | ret = sve_reg_to_region(®ion, vcpu, reg); | ||
402 | if (ret) | ||
403 | return ret; | ||
404 | |||
405 | if (!kvm_arm_vcpu_sve_finalized(vcpu)) | ||
406 | return -EPERM; | ||
407 | |||
408 | if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset, | ||
409 | region.klen) || | ||
410 | clear_user(uptr + region.klen, region.upad)) | ||
411 | return -EFAULT; | ||
412 | |||
413 | return 0; | ||
414 | } | ||
415 | |||
416 | static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | ||
417 | { | ||
418 | int ret; | ||
419 | struct sve_state_reg_region region; | ||
420 | const char __user *uptr = (const char __user *)reg->addr; | ||
421 | |||
422 | /* Handle the KVM_REG_ARM64_SVE_VLS pseudo-reg as a special case: */ | ||
423 | if (reg->id == KVM_REG_ARM64_SVE_VLS) | ||
424 | return set_sve_vls(vcpu, reg); | ||
425 | |||
426 | /* Try to interpret reg ID as an architectural SVE register... */ | ||
427 | ret = sve_reg_to_region(®ion, vcpu, reg); | ||
428 | if (ret) | ||
429 | return ret; | ||
430 | |||
431 | if (!kvm_arm_vcpu_sve_finalized(vcpu)) | ||
432 | return -EPERM; | ||
433 | |||
434 | if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr, | ||
435 | region.klen)) | ||
436 | return -EFAULT; | ||
437 | |||
438 | return 0; | ||
439 | } | ||
440 | |||
186 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | 441 | int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) |
187 | { | 442 | { |
188 | return -EINVAL; | 443 | return -EINVAL; |
@@ -193,9 +448,37 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
193 | return -EINVAL; | 448 | return -EINVAL; |
194 | } | 449 | } |
195 | 450 | ||
196 | static unsigned long num_core_regs(void) | 451 | static int copy_core_reg_indices(const struct kvm_vcpu *vcpu, |
452 | u64 __user *uindices) | ||
453 | { | ||
454 | unsigned int i; | ||
455 | int n = 0; | ||
456 | const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; | ||
457 | |||
458 | for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { | ||
459 | /* | ||
460 | * The KVM_REG_ARM64_SVE regs must be used instead of | ||
461 | * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on | ||
462 | * SVE-enabled vcpus: | ||
463 | */ | ||
464 | if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(i)) | ||
465 | continue; | ||
466 | |||
467 | if (uindices) { | ||
468 | if (put_user(core_reg | i, uindices)) | ||
469 | return -EFAULT; | ||
470 | uindices++; | ||
471 | } | ||
472 | |||
473 | n++; | ||
474 | } | ||
475 | |||
476 | return n; | ||
477 | } | ||
478 | |||
479 | static unsigned long num_core_regs(const struct kvm_vcpu *vcpu) | ||
197 | { | 480 | { |
198 | return sizeof(struct kvm_regs) / sizeof(__u32); | 481 | return copy_core_reg_indices(vcpu, NULL); |
199 | } | 482 | } |
200 | 483 | ||
201 | /** | 484 | /** |
@@ -251,6 +534,67 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
251 | return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; | 534 | return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; |
252 | } | 535 | } |
253 | 536 | ||
537 | static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu) | ||
538 | { | ||
539 | const unsigned int slices = vcpu_sve_slices(vcpu); | ||
540 | |||
541 | if (!vcpu_has_sve(vcpu)) | ||
542 | return 0; | ||
543 | |||
544 | /* Policed by KVM_GET_REG_LIST: */ | ||
545 | WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); | ||
546 | |||
547 | return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */) | ||
548 | + 1; /* KVM_REG_ARM64_SVE_VLS */ | ||
549 | } | ||
550 | |||
551 | static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, | ||
552 | u64 __user *uindices) | ||
553 | { | ||
554 | const unsigned int slices = vcpu_sve_slices(vcpu); | ||
555 | u64 reg; | ||
556 | unsigned int i, n; | ||
557 | int num_regs = 0; | ||
558 | |||
559 | if (!vcpu_has_sve(vcpu)) | ||
560 | return 0; | ||
561 | |||
562 | /* Policed by KVM_GET_REG_LIST: */ | ||
563 | WARN_ON(!kvm_arm_vcpu_sve_finalized(vcpu)); | ||
564 | |||
565 | /* | ||
566 | * Enumerate this first, so that userspace can save/restore in | ||
567 | * the order reported by KVM_GET_REG_LIST: | ||
568 | */ | ||
569 | reg = KVM_REG_ARM64_SVE_VLS; | ||
570 | if (put_user(reg, uindices++)) | ||
571 | return -EFAULT; | ||
572 | ++num_regs; | ||
573 | |||
574 | for (i = 0; i < slices; i++) { | ||
575 | for (n = 0; n < SVE_NUM_ZREGS; n++) { | ||
576 | reg = KVM_REG_ARM64_SVE_ZREG(n, i); | ||
577 | if (put_user(reg, uindices++)) | ||
578 | return -EFAULT; | ||
579 | num_regs++; | ||
580 | } | ||
581 | |||
582 | for (n = 0; n < SVE_NUM_PREGS; n++) { | ||
583 | reg = KVM_REG_ARM64_SVE_PREG(n, i); | ||
584 | if (put_user(reg, uindices++)) | ||
585 | return -EFAULT; | ||
586 | num_regs++; | ||
587 | } | ||
588 | |||
589 | reg = KVM_REG_ARM64_SVE_FFR(i); | ||
590 | if (put_user(reg, uindices++)) | ||
591 | return -EFAULT; | ||
592 | num_regs++; | ||
593 | } | ||
594 | |||
595 | return num_regs; | ||
596 | } | ||
597 | |||
254 | /** | 598 | /** |
255 | * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG | 599 | * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG |
256 | * | 600 | * |
@@ -258,8 +602,15 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
258 | */ | 602 | */ |
259 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) | 603 | unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) |
260 | { | 604 | { |
261 | return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu) | 605 | unsigned long res = 0; |
262 | + kvm_arm_get_fw_num_regs(vcpu) + NUM_TIMER_REGS; | 606 | |
607 | res += num_core_regs(vcpu); | ||
608 | res += num_sve_regs(vcpu); | ||
609 | res += kvm_arm_num_sys_reg_descs(vcpu); | ||
610 | res += kvm_arm_get_fw_num_regs(vcpu); | ||
611 | res += NUM_TIMER_REGS; | ||
612 | |||
613 | return res; | ||
263 | } | 614 | } |
264 | 615 | ||
265 | /** | 616 | /** |
@@ -269,23 +620,25 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) | |||
269 | */ | 620 | */ |
270 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) | 621 | int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) |
271 | { | 622 | { |
272 | unsigned int i; | ||
273 | const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; | ||
274 | int ret; | 623 | int ret; |
275 | 624 | ||
276 | for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { | 625 | ret = copy_core_reg_indices(vcpu, uindices); |
277 | if (put_user(core_reg | i, uindices)) | 626 | if (ret < 0) |
278 | return -EFAULT; | 627 | return ret; |
279 | uindices++; | 628 | uindices += ret; |
280 | } | 629 | |
630 | ret = copy_sve_reg_indices(vcpu, uindices); | ||
631 | if (ret < 0) | ||
632 | return ret; | ||
633 | uindices += ret; | ||
281 | 634 | ||
282 | ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); | 635 | ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices); |
283 | if (ret) | 636 | if (ret < 0) |
284 | return ret; | 637 | return ret; |
285 | uindices += kvm_arm_get_fw_num_regs(vcpu); | 638 | uindices += kvm_arm_get_fw_num_regs(vcpu); |
286 | 639 | ||
287 | ret = copy_timer_indices(vcpu, uindices); | 640 | ret = copy_timer_indices(vcpu, uindices); |
288 | if (ret) | 641 | if (ret < 0) |
289 | return ret; | 642 | return ret; |
290 | uindices += NUM_TIMER_REGS; | 643 | uindices += NUM_TIMER_REGS; |
291 | 644 | ||
@@ -298,12 +651,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
298 | if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) | 651 | if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) |
299 | return -EINVAL; | 652 | return -EINVAL; |
300 | 653 | ||
301 | /* Register group 16 means we want a core register. */ | 654 | switch (reg->id & KVM_REG_ARM_COPROC_MASK) { |
302 | if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) | 655 | case KVM_REG_ARM_CORE: return get_core_reg(vcpu, reg); |
303 | return get_core_reg(vcpu, reg); | 656 | case KVM_REG_ARM_FW: return kvm_arm_get_fw_reg(vcpu, reg); |
304 | 657 | case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg); | |
305 | if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) | 658 | } |
306 | return kvm_arm_get_fw_reg(vcpu, reg); | ||
307 | 659 | ||
308 | if (is_timer_reg(reg->id)) | 660 | if (is_timer_reg(reg->id)) |
309 | return get_timer_reg(vcpu, reg); | 661 | return get_timer_reg(vcpu, reg); |
@@ -317,12 +669,11 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) | |||
317 | if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) | 669 | if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32) |
318 | return -EINVAL; | 670 | return -EINVAL; |
319 | 671 | ||
320 | /* Register group 16 means we set a core register. */ | 672 | switch (reg->id & KVM_REG_ARM_COPROC_MASK) { |
321 | if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) | 673 | case KVM_REG_ARM_CORE: return set_core_reg(vcpu, reg); |
322 | return set_core_reg(vcpu, reg); | 674 | case KVM_REG_ARM_FW: return kvm_arm_set_fw_reg(vcpu, reg); |
323 | 675 | case KVM_REG_ARM64_SVE: return set_sve_reg(vcpu, reg); | |
324 | if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW) | 676 | } |
325 | return kvm_arm_set_fw_reg(vcpu, reg); | ||
326 | 677 | ||
327 | if (is_timer_reg(reg->id)) | 678 | if (is_timer_reg(reg->id)) |
328 | return set_timer_reg(vcpu, reg); | 679 | return set_timer_reg(vcpu, reg); |
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 0b7983442071..516aead3c2a9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c | |||
@@ -173,20 +173,40 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
173 | return 1; | 173 | return 1; |
174 | } | 174 | } |
175 | 175 | ||
176 | #define __ptrauth_save_key(regs, key) \ | ||
177 | ({ \ | ||
178 | regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ | ||
179 | regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ | ||
180 | }) | ||
181 | |||
182 | /* | ||
183 | * Handle the guest trying to use a ptrauth instruction, or trying to access a | ||
184 | * ptrauth register. | ||
185 | */ | ||
186 | void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) | ||
187 | { | ||
188 | struct kvm_cpu_context *ctxt; | ||
189 | |||
190 | if (vcpu_has_ptrauth(vcpu)) { | ||
191 | vcpu_ptrauth_enable(vcpu); | ||
192 | ctxt = vcpu->arch.host_cpu_context; | ||
193 | __ptrauth_save_key(ctxt->sys_regs, APIA); | ||
194 | __ptrauth_save_key(ctxt->sys_regs, APIB); | ||
195 | __ptrauth_save_key(ctxt->sys_regs, APDA); | ||
196 | __ptrauth_save_key(ctxt->sys_regs, APDB); | ||
197 | __ptrauth_save_key(ctxt->sys_regs, APGA); | ||
198 | } else { | ||
199 | kvm_inject_undefined(vcpu); | ||
200 | } | ||
201 | } | ||
202 | |||
176 | /* | 203 | /* |
177 | * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into | 204 | * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into |
178 | * a NOP). | 205 | * a NOP). |
179 | */ | 206 | */ |
180 | static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run) | 207 | static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run) |
181 | { | 208 | { |
182 | /* | 209 | kvm_arm_vcpu_ptrauth_trap(vcpu); |
183 | * We don't currently support ptrauth in a guest, and we mask the ID | ||
184 | * registers to prevent well-behaved guests from trying to make use of | ||
185 | * it. | ||
186 | * | ||
187 | * Inject an UNDEF, as if the feature really isn't present. | ||
188 | */ | ||
189 | kvm_inject_undefined(vcpu); | ||
190 | return 1; | 210 | return 1; |
191 | } | 211 | } |
192 | 212 | ||
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 675fdc186e3b..93ba3d7ef027 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/kvm_arm.h> | 24 | #include <asm/kvm_arm.h> |
25 | #include <asm/kvm_asm.h> | 25 | #include <asm/kvm_asm.h> |
26 | #include <asm/kvm_mmu.h> | 26 | #include <asm/kvm_mmu.h> |
27 | #include <asm/kvm_ptrauth.h> | ||
27 | 28 | ||
28 | #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) | 29 | #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) |
29 | #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) | 30 | #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) |
@@ -64,6 +65,13 @@ ENTRY(__guest_enter) | |||
64 | 65 | ||
65 | add x18, x0, #VCPU_CONTEXT | 66 | add x18, x0, #VCPU_CONTEXT |
66 | 67 | ||
68 | // Macro ptrauth_switch_to_guest format: | ||
69 | // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) | ||
70 | // The below macro to restore guest keys is not implemented in C code | ||
71 | // as it may cause Pointer Authentication key signing mismatch errors | ||
72 | // when this feature is enabled for kernel code. | ||
73 | ptrauth_switch_to_guest x18, x0, x1, x2 | ||
74 | |||
67 | // Restore guest regs x0-x17 | 75 | // Restore guest regs x0-x17 |
68 | ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] | 76 | ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] |
69 | ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] | 77 | ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] |
@@ -118,6 +126,13 @@ ENTRY(__guest_exit) | |||
118 | 126 | ||
119 | get_host_ctxt x2, x3 | 127 | get_host_ctxt x2, x3 |
120 | 128 | ||
129 | // Macro ptrauth_switch_to_guest format: | ||
130 | // ptrauth_switch_to_host(guest cxt, host cxt, tmp1, tmp2, tmp3) | ||
131 | // The below macro to save/restore keys is not implemented in C code | ||
132 | // as it may cause Pointer Authentication key signing mismatch errors | ||
133 | // when this feature is enabled for kernel code. | ||
134 | ptrauth_switch_to_host x1, x2, x3, x4, x5 | ||
135 | |||
121 | // Now restore the host regs | 136 | // Now restore the host regs |
122 | restore_callee_saved_regs x2 | 137 | restore_callee_saved_regs x2 |
123 | 138 | ||
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 3563fe655cd5..22b4c335e0b2 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c | |||
@@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) | |||
100 | val = read_sysreg(cpacr_el1); | 100 | val = read_sysreg(cpacr_el1); |
101 | val |= CPACR_EL1_TTA; | 101 | val |= CPACR_EL1_TTA; |
102 | val &= ~CPACR_EL1_ZEN; | 102 | val &= ~CPACR_EL1_ZEN; |
103 | if (!update_fp_enabled(vcpu)) { | 103 | if (update_fp_enabled(vcpu)) { |
104 | if (vcpu_has_sve(vcpu)) | ||
105 | val |= CPACR_EL1_ZEN; | ||
106 | } else { | ||
104 | val &= ~CPACR_EL1_FPEN; | 107 | val &= ~CPACR_EL1_FPEN; |
105 | __activate_traps_fpsimd32(vcpu); | 108 | __activate_traps_fpsimd32(vcpu); |
106 | } | 109 | } |
@@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) | |||
317 | return true; | 320 | return true; |
318 | } | 321 | } |
319 | 322 | ||
320 | static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) | 323 | /* Check for an FPSIMD/SVE trap and handle as appropriate */ |
324 | static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) | ||
321 | { | 325 | { |
322 | struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state; | 326 | bool vhe, sve_guest, sve_host; |
327 | u8 hsr_ec; | ||
323 | 328 | ||
324 | if (has_vhe()) | 329 | if (!system_supports_fpsimd()) |
325 | write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, | 330 | return false; |
326 | cpacr_el1); | 331 | |
327 | else | 332 | if (system_supports_sve()) { |
333 | sve_guest = vcpu_has_sve(vcpu); | ||
334 | sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; | ||
335 | vhe = true; | ||
336 | } else { | ||
337 | sve_guest = false; | ||
338 | sve_host = false; | ||
339 | vhe = has_vhe(); | ||
340 | } | ||
341 | |||
342 | hsr_ec = kvm_vcpu_trap_get_class(vcpu); | ||
343 | if (hsr_ec != ESR_ELx_EC_FP_ASIMD && | ||
344 | hsr_ec != ESR_ELx_EC_SVE) | ||
345 | return false; | ||
346 | |||
347 | /* Don't handle SVE traps for non-SVE vcpus here: */ | ||
348 | if (!sve_guest) | ||
349 | if (hsr_ec != ESR_ELx_EC_FP_ASIMD) | ||
350 | return false; | ||
351 | |||
352 | /* Valid trap. Switch the context: */ | ||
353 | |||
354 | if (vhe) { | ||
355 | u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; | ||
356 | |||
357 | if (sve_guest) | ||
358 | reg |= CPACR_EL1_ZEN; | ||
359 | |||
360 | write_sysreg(reg, cpacr_el1); | ||
361 | } else { | ||
328 | write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, | 362 | write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, |
329 | cptr_el2); | 363 | cptr_el2); |
364 | } | ||
330 | 365 | ||
331 | isb(); | 366 | isb(); |
332 | 367 | ||
@@ -335,21 +370,28 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) | |||
335 | * In the SVE case, VHE is assumed: it is enforced by | 370 | * In the SVE case, VHE is assumed: it is enforced by |
336 | * Kconfig and kvm_arch_init(). | 371 | * Kconfig and kvm_arch_init(). |
337 | */ | 372 | */ |
338 | if (system_supports_sve() && | 373 | if (sve_host) { |
339 | (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) { | ||
340 | struct thread_struct *thread = container_of( | 374 | struct thread_struct *thread = container_of( |
341 | host_fpsimd, | 375 | vcpu->arch.host_fpsimd_state, |
342 | struct thread_struct, uw.fpsimd_state); | 376 | struct thread_struct, uw.fpsimd_state); |
343 | 377 | ||
344 | sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr); | 378 | sve_save_state(sve_pffr(thread), |
379 | &vcpu->arch.host_fpsimd_state->fpsr); | ||
345 | } else { | 380 | } else { |
346 | __fpsimd_save_state(host_fpsimd); | 381 | __fpsimd_save_state(vcpu->arch.host_fpsimd_state); |
347 | } | 382 | } |
348 | 383 | ||
349 | vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; | 384 | vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; |
350 | } | 385 | } |
351 | 386 | ||
352 | __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); | 387 | if (sve_guest) { |
388 | sve_load_state(vcpu_sve_pffr(vcpu), | ||
389 | &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, | ||
390 | sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); | ||
391 | write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); | ||
392 | } else { | ||
393 | __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); | ||
394 | } | ||
353 | 395 | ||
354 | /* Skip restoring fpexc32 for AArch64 guests */ | 396 | /* Skip restoring fpexc32 for AArch64 guests */ |
355 | if (!(read_sysreg(hcr_el2) & HCR_RW)) | 397 | if (!(read_sysreg(hcr_el2) & HCR_RW)) |
@@ -385,10 +427,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) | |||
385 | * and restore the guest context lazily. | 427 | * and restore the guest context lazily. |
386 | * If FP/SIMD is not implemented, handle the trap and inject an | 428 | * If FP/SIMD is not implemented, handle the trap and inject an |
387 | * undefined instruction exception to the guest. | 429 | * undefined instruction exception to the guest. |
430 | * Similarly for trapped SVE accesses. | ||
388 | */ | 431 | */ |
389 | if (system_supports_fpsimd() && | 432 | if (__hyp_handle_fpsimd(vcpu)) |
390 | kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD) | 433 | return true; |
391 | return __hyp_switch_fpsimd(vcpu); | ||
392 | 434 | ||
393 | if (!__populate_fault_info(vcpu)) | 435 | if (!__populate_fault_info(vcpu)) |
394 | return true; | 436 | return true; |
@@ -524,6 +566,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) | |||
524 | { | 566 | { |
525 | struct kvm_cpu_context *host_ctxt; | 567 | struct kvm_cpu_context *host_ctxt; |
526 | struct kvm_cpu_context *guest_ctxt; | 568 | struct kvm_cpu_context *guest_ctxt; |
569 | bool pmu_switch_needed; | ||
527 | u64 exit_code; | 570 | u64 exit_code; |
528 | 571 | ||
529 | /* | 572 | /* |
@@ -543,6 +586,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) | |||
543 | host_ctxt->__hyp_running_vcpu = vcpu; | 586 | host_ctxt->__hyp_running_vcpu = vcpu; |
544 | guest_ctxt = &vcpu->arch.ctxt; | 587 | guest_ctxt = &vcpu->arch.ctxt; |
545 | 588 | ||
589 | pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); | ||
590 | |||
546 | __sysreg_save_state_nvhe(host_ctxt); | 591 | __sysreg_save_state_nvhe(host_ctxt); |
547 | 592 | ||
548 | __activate_vm(kern_hyp_va(vcpu->kvm)); | 593 | __activate_vm(kern_hyp_va(vcpu->kvm)); |
@@ -589,6 +634,9 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) | |||
589 | */ | 634 | */ |
590 | __debug_switch_to_host(vcpu); | 635 | __debug_switch_to_host(vcpu); |
591 | 636 | ||
637 | if (pmu_switch_needed) | ||
638 | __pmu_switch_to_host(host_ctxt); | ||
639 | |||
592 | /* Returning to host will clear PSR.I, remask PMR if needed */ | 640 | /* Returning to host will clear PSR.I, remask PMR if needed */ |
593 | if (system_uses_irq_prio_masking()) | 641 | if (system_uses_irq_prio_masking()) |
594 | gic_write_pmr(GIC_PRIO_IRQOFF); | 642 | gic_write_pmr(GIC_PRIO_IRQOFF); |
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c new file mode 100644 index 000000000000..3da94a5bb6b7 --- /dev/null +++ b/arch/arm64/kvm/pmu.c | |||
@@ -0,0 +1,239 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Copyright 2019 Arm Limited | ||
4 | * Author: Andrew Murray <Andrew.Murray@arm.com> | ||
5 | */ | ||
6 | #include <linux/kvm_host.h> | ||
7 | #include <linux/perf_event.h> | ||
8 | #include <asm/kvm_hyp.h> | ||
9 | |||
10 | /* | ||
11 | * Given the perf event attributes and system type, determine | ||
12 | * if we are going to need to switch counters at guest entry/exit. | ||
13 | */ | ||
14 | static bool kvm_pmu_switch_needed(struct perf_event_attr *attr) | ||
15 | { | ||
16 | /** | ||
17 | * With VHE the guest kernel runs at EL1 and the host at EL2, | ||
18 | * where user (EL0) is excluded then we have no reason to switch | ||
19 | * counters. | ||
20 | */ | ||
21 | if (has_vhe() && attr->exclude_user) | ||
22 | return false; | ||
23 | |||
24 | /* Only switch if attributes are different */ | ||
25 | return (attr->exclude_host != attr->exclude_guest); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Add events to track that we may want to switch at guest entry/exit | ||
30 | * time. | ||
31 | */ | ||
32 | void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) | ||
33 | { | ||
34 | struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data); | ||
35 | |||
36 | if (!kvm_pmu_switch_needed(attr)) | ||
37 | return; | ||
38 | |||
39 | if (!attr->exclude_host) | ||
40 | ctx->pmu_events.events_host |= set; | ||
41 | if (!attr->exclude_guest) | ||
42 | ctx->pmu_events.events_guest |= set; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * Stop tracking events | ||
47 | */ | ||
48 | void kvm_clr_pmu_events(u32 clr) | ||
49 | { | ||
50 | struct kvm_host_data *ctx = this_cpu_ptr(&kvm_host_data); | ||
51 | |||
52 | ctx->pmu_events.events_host &= ~clr; | ||
53 | ctx->pmu_events.events_guest &= ~clr; | ||
54 | } | ||
55 | |||
56 | /** | ||
57 | * Disable host events, enable guest events | ||
58 | */ | ||
59 | bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) | ||
60 | { | ||
61 | struct kvm_host_data *host; | ||
62 | struct kvm_pmu_events *pmu; | ||
63 | |||
64 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); | ||
65 | pmu = &host->pmu_events; | ||
66 | |||
67 | if (pmu->events_host) | ||
68 | write_sysreg(pmu->events_host, pmcntenclr_el0); | ||
69 | |||
70 | if (pmu->events_guest) | ||
71 | write_sysreg(pmu->events_guest, pmcntenset_el0); | ||
72 | |||
73 | return (pmu->events_host || pmu->events_guest); | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * Disable guest events, enable host events | ||
78 | */ | ||
79 | void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) | ||
80 | { | ||
81 | struct kvm_host_data *host; | ||
82 | struct kvm_pmu_events *pmu; | ||
83 | |||
84 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); | ||
85 | pmu = &host->pmu_events; | ||
86 | |||
87 | if (pmu->events_guest) | ||
88 | write_sysreg(pmu->events_guest, pmcntenclr_el0); | ||
89 | |||
90 | if (pmu->events_host) | ||
91 | write_sysreg(pmu->events_host, pmcntenset_el0); | ||
92 | } | ||
93 | |||
94 | #define PMEVTYPER_READ_CASE(idx) \ | ||
95 | case idx: \ | ||
96 | return read_sysreg(pmevtyper##idx##_el0) | ||
97 | |||
98 | #define PMEVTYPER_WRITE_CASE(idx) \ | ||
99 | case idx: \ | ||
100 | write_sysreg(val, pmevtyper##idx##_el0); \ | ||
101 | break | ||
102 | |||
103 | #define PMEVTYPER_CASES(readwrite) \ | ||
104 | PMEVTYPER_##readwrite##_CASE(0); \ | ||
105 | PMEVTYPER_##readwrite##_CASE(1); \ | ||
106 | PMEVTYPER_##readwrite##_CASE(2); \ | ||
107 | PMEVTYPER_##readwrite##_CASE(3); \ | ||
108 | PMEVTYPER_##readwrite##_CASE(4); \ | ||
109 | PMEVTYPER_##readwrite##_CASE(5); \ | ||
110 | PMEVTYPER_##readwrite##_CASE(6); \ | ||
111 | PMEVTYPER_##readwrite##_CASE(7); \ | ||
112 | PMEVTYPER_##readwrite##_CASE(8); \ | ||
113 | PMEVTYPER_##readwrite##_CASE(9); \ | ||
114 | PMEVTYPER_##readwrite##_CASE(10); \ | ||
115 | PMEVTYPER_##readwrite##_CASE(11); \ | ||
116 | PMEVTYPER_##readwrite##_CASE(12); \ | ||
117 | PMEVTYPER_##readwrite##_CASE(13); \ | ||
118 | PMEVTYPER_##readwrite##_CASE(14); \ | ||
119 | PMEVTYPER_##readwrite##_CASE(15); \ | ||
120 | PMEVTYPER_##readwrite##_CASE(16); \ | ||
121 | PMEVTYPER_##readwrite##_CASE(17); \ | ||
122 | PMEVTYPER_##readwrite##_CASE(18); \ | ||
123 | PMEVTYPER_##readwrite##_CASE(19); \ | ||
124 | PMEVTYPER_##readwrite##_CASE(20); \ | ||
125 | PMEVTYPER_##readwrite##_CASE(21); \ | ||
126 | PMEVTYPER_##readwrite##_CASE(22); \ | ||
127 | PMEVTYPER_##readwrite##_CASE(23); \ | ||
128 | PMEVTYPER_##readwrite##_CASE(24); \ | ||
129 | PMEVTYPER_##readwrite##_CASE(25); \ | ||
130 | PMEVTYPER_##readwrite##_CASE(26); \ | ||
131 | PMEVTYPER_##readwrite##_CASE(27); \ | ||
132 | PMEVTYPER_##readwrite##_CASE(28); \ | ||
133 | PMEVTYPER_##readwrite##_CASE(29); \ | ||
134 | PMEVTYPER_##readwrite##_CASE(30) | ||
135 | |||
136 | /* | ||
137 | * Read a value direct from PMEVTYPER<idx> where idx is 0-30 | ||
138 | * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31). | ||
139 | */ | ||
140 | static u64 kvm_vcpu_pmu_read_evtype_direct(int idx) | ||
141 | { | ||
142 | switch (idx) { | ||
143 | PMEVTYPER_CASES(READ); | ||
144 | case ARMV8_PMU_CYCLE_IDX: | ||
145 | return read_sysreg(pmccfiltr_el0); | ||
146 | default: | ||
147 | WARN_ON(1); | ||
148 | } | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * Write a value direct to PMEVTYPER<idx> where idx is 0-30 | ||
155 | * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31). | ||
156 | */ | ||
157 | static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val) | ||
158 | { | ||
159 | switch (idx) { | ||
160 | PMEVTYPER_CASES(WRITE); | ||
161 | case ARMV8_PMU_CYCLE_IDX: | ||
162 | write_sysreg(val, pmccfiltr_el0); | ||
163 | break; | ||
164 | default: | ||
165 | WARN_ON(1); | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Modify ARMv8 PMU events to include EL0 counting | ||
171 | */ | ||
172 | static void kvm_vcpu_pmu_enable_el0(unsigned long events) | ||
173 | { | ||
174 | u64 typer; | ||
175 | u32 counter; | ||
176 | |||
177 | for_each_set_bit(counter, &events, 32) { | ||
178 | typer = kvm_vcpu_pmu_read_evtype_direct(counter); | ||
179 | typer &= ~ARMV8_PMU_EXCLUDE_EL0; | ||
180 | kvm_vcpu_pmu_write_evtype_direct(counter, typer); | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Modify ARMv8 PMU events to exclude EL0 counting | ||
186 | */ | ||
187 | static void kvm_vcpu_pmu_disable_el0(unsigned long events) | ||
188 | { | ||
189 | u64 typer; | ||
190 | u32 counter; | ||
191 | |||
192 | for_each_set_bit(counter, &events, 32) { | ||
193 | typer = kvm_vcpu_pmu_read_evtype_direct(counter); | ||
194 | typer |= ARMV8_PMU_EXCLUDE_EL0; | ||
195 | kvm_vcpu_pmu_write_evtype_direct(counter, typer); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * On VHE ensure that only guest events have EL0 counting enabled | ||
201 | */ | ||
202 | void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) | ||
203 | { | ||
204 | struct kvm_cpu_context *host_ctxt; | ||
205 | struct kvm_host_data *host; | ||
206 | u32 events_guest, events_host; | ||
207 | |||
208 | if (!has_vhe()) | ||
209 | return; | ||
210 | |||
211 | host_ctxt = vcpu->arch.host_cpu_context; | ||
212 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); | ||
213 | events_guest = host->pmu_events.events_guest; | ||
214 | events_host = host->pmu_events.events_host; | ||
215 | |||
216 | kvm_vcpu_pmu_enable_el0(events_guest); | ||
217 | kvm_vcpu_pmu_disable_el0(events_host); | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * On VHE ensure that only host events have EL0 counting enabled | ||
222 | */ | ||
223 | void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | struct kvm_cpu_context *host_ctxt; | ||
226 | struct kvm_host_data *host; | ||
227 | u32 events_guest, events_host; | ||
228 | |||
229 | if (!has_vhe()) | ||
230 | return; | ||
231 | |||
232 | host_ctxt = vcpu->arch.host_cpu_context; | ||
233 | host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); | ||
234 | events_guest = host->pmu_events.events_guest; | ||
235 | events_host = host->pmu_events.events_host; | ||
236 | |||
237 | kvm_vcpu_pmu_enable_el0(events_host); | ||
238 | kvm_vcpu_pmu_disable_el0(events_guest); | ||
239 | } | ||
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index e2a0500cd7a2..1140b4485575 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c | |||
@@ -20,20 +20,26 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/kernel.h> | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
24 | #include <linux/kvm.h> | 25 | #include <linux/kvm.h> |
25 | #include <linux/hw_breakpoint.h> | 26 | #include <linux/hw_breakpoint.h> |
27 | #include <linux/slab.h> | ||
28 | #include <linux/string.h> | ||
29 | #include <linux/types.h> | ||
26 | 30 | ||
27 | #include <kvm/arm_arch_timer.h> | 31 | #include <kvm/arm_arch_timer.h> |
28 | 32 | ||
29 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
30 | #include <asm/cputype.h> | 34 | #include <asm/cputype.h> |
35 | #include <asm/fpsimd.h> | ||
31 | #include <asm/ptrace.h> | 36 | #include <asm/ptrace.h> |
32 | #include <asm/kvm_arm.h> | 37 | #include <asm/kvm_arm.h> |
33 | #include <asm/kvm_asm.h> | 38 | #include <asm/kvm_asm.h> |
34 | #include <asm/kvm_coproc.h> | 39 | #include <asm/kvm_coproc.h> |
35 | #include <asm/kvm_emulate.h> | 40 | #include <asm/kvm_emulate.h> |
36 | #include <asm/kvm_mmu.h> | 41 | #include <asm/kvm_mmu.h> |
42 | #include <asm/virt.h> | ||
37 | 43 | ||
38 | /* Maximum phys_shift supported for any VM on this host */ | 44 | /* Maximum phys_shift supported for any VM on this host */ |
39 | static u32 kvm_ipa_limit; | 45 | static u32 kvm_ipa_limit; |
@@ -92,6 +98,14 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
92 | case KVM_CAP_ARM_VM_IPA_SIZE: | 98 | case KVM_CAP_ARM_VM_IPA_SIZE: |
93 | r = kvm_ipa_limit; | 99 | r = kvm_ipa_limit; |
94 | break; | 100 | break; |
101 | case KVM_CAP_ARM_SVE: | ||
102 | r = system_supports_sve(); | ||
103 | break; | ||
104 | case KVM_CAP_ARM_PTRAUTH_ADDRESS: | ||
105 | case KVM_CAP_ARM_PTRAUTH_GENERIC: | ||
106 | r = has_vhe() && system_supports_address_auth() && | ||
107 | system_supports_generic_auth(); | ||
108 | break; | ||
95 | default: | 109 | default: |
96 | r = 0; | 110 | r = 0; |
97 | } | 111 | } |
@@ -99,13 +113,148 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
99 | return r; | 113 | return r; |
100 | } | 114 | } |
101 | 115 | ||
116 | unsigned int kvm_sve_max_vl; | ||
117 | |||
118 | int kvm_arm_init_sve(void) | ||
119 | { | ||
120 | if (system_supports_sve()) { | ||
121 | kvm_sve_max_vl = sve_max_virtualisable_vl; | ||
122 | |||
123 | /* | ||
124 | * The get_sve_reg()/set_sve_reg() ioctl interface will need | ||
125 | * to be extended with multiple register slice support in | ||
126 | * order to support vector lengths greater than | ||
127 | * SVE_VL_ARCH_MAX: | ||
128 | */ | ||
129 | if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX)) | ||
130 | kvm_sve_max_vl = SVE_VL_ARCH_MAX; | ||
131 | |||
132 | /* | ||
133 | * Don't even try to make use of vector lengths that | ||
134 | * aren't available on all CPUs, for now: | ||
135 | */ | ||
136 | if (kvm_sve_max_vl < sve_max_vl) | ||
137 | pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", | ||
138 | kvm_sve_max_vl); | ||
139 | } | ||
140 | |||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) | ||
145 | { | ||
146 | if (!system_supports_sve()) | ||
147 | return -EINVAL; | ||
148 | |||
149 | /* Verify that KVM startup enforced this when SVE was detected: */ | ||
150 | if (WARN_ON(!has_vhe())) | ||
151 | return -EINVAL; | ||
152 | |||
153 | vcpu->arch.sve_max_vl = kvm_sve_max_vl; | ||
154 | |||
155 | /* | ||
156 | * Userspace can still customize the vector lengths by writing | ||
157 | * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until | ||
158 | * kvm_arm_vcpu_finalize(), which freezes the configuration. | ||
159 | */ | ||
160 | vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Finalize vcpu's maximum SVE vector length, allocating | ||
167 | * vcpu->arch.sve_state as necessary. | ||
168 | */ | ||
169 | static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) | ||
170 | { | ||
171 | void *buf; | ||
172 | unsigned int vl; | ||
173 | |||
174 | vl = vcpu->arch.sve_max_vl; | ||
175 | |||
176 | /* | ||
177 | * Resposibility for these properties is shared between | ||
178 | * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and | ||
179 | * set_sve_vls(). Double-check here just to be sure: | ||
180 | */ | ||
181 | if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || | ||
182 | vl > SVE_VL_ARCH_MAX)) | ||
183 | return -EIO; | ||
184 | |||
185 | buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); | ||
186 | if (!buf) | ||
187 | return -ENOMEM; | ||
188 | |||
189 | vcpu->arch.sve_state = buf; | ||
190 | vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature) | ||
195 | { | ||
196 | switch (feature) { | ||
197 | case KVM_ARM_VCPU_SVE: | ||
198 | if (!vcpu_has_sve(vcpu)) | ||
199 | return -EINVAL; | ||
200 | |||
201 | if (kvm_arm_vcpu_sve_finalized(vcpu)) | ||
202 | return -EPERM; | ||
203 | |||
204 | return kvm_vcpu_finalize_sve(vcpu); | ||
205 | } | ||
206 | |||
207 | return -EINVAL; | ||
208 | } | ||
209 | |||
210 | bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) | ||
211 | { | ||
212 | if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu)) | ||
213 | return false; | ||
214 | |||
215 | return true; | ||
216 | } | ||
217 | |||
218 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | ||
219 | { | ||
220 | kfree(vcpu->arch.sve_state); | ||
221 | } | ||
222 | |||
223 | static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) | ||
224 | { | ||
225 | if (vcpu_has_sve(vcpu)) | ||
226 | memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); | ||
227 | } | ||
228 | |||
229 | static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) | ||
230 | { | ||
231 | /* Support ptrauth only if the system supports these capabilities. */ | ||
232 | if (!has_vhe()) | ||
233 | return -EINVAL; | ||
234 | |||
235 | if (!system_supports_address_auth() || | ||
236 | !system_supports_generic_auth()) | ||
237 | return -EINVAL; | ||
238 | /* | ||
239 | * For now make sure that both address/generic pointer authentication | ||
240 | * features are requested by the userspace together. | ||
241 | */ | ||
242 | if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || | ||
243 | !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) | ||
244 | return -EINVAL; | ||
245 | |||
246 | vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; | ||
247 | return 0; | ||
248 | } | ||
249 | |||
102 | /** | 250 | /** |
103 | * kvm_reset_vcpu - sets core registers and sys_regs to reset value | 251 | * kvm_reset_vcpu - sets core registers and sys_regs to reset value |
104 | * @vcpu: The VCPU pointer | 252 | * @vcpu: The VCPU pointer |
105 | * | 253 | * |
106 | * This function finds the right table above and sets the registers on | 254 | * This function finds the right table above and sets the registers on |
107 | * the virtual CPU struct to their architecturally defined reset | 255 | * the virtual CPU struct to their architecturally defined reset |
108 | * values. | 256 | * values, except for registers whose reset is deferred until |
257 | * kvm_arm_vcpu_finalize(). | ||
109 | * | 258 | * |
110 | * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT | 259 | * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT |
111 | * ioctl or as part of handling a request issued by another VCPU in the PSCI | 260 | * ioctl or as part of handling a request issued by another VCPU in the PSCI |
@@ -131,6 +280,22 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) | |||
131 | if (loaded) | 280 | if (loaded) |
132 | kvm_arch_vcpu_put(vcpu); | 281 | kvm_arch_vcpu_put(vcpu); |
133 | 282 | ||
283 | if (!kvm_arm_vcpu_sve_finalized(vcpu)) { | ||
284 | if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) { | ||
285 | ret = kvm_vcpu_enable_sve(vcpu); | ||
286 | if (ret) | ||
287 | goto out; | ||
288 | } | ||
289 | } else { | ||
290 | kvm_vcpu_reset_sve(vcpu); | ||
291 | } | ||
292 | |||
293 | if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || | ||
294 | test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { | ||
295 | if (kvm_vcpu_enable_ptrauth(vcpu)) | ||
296 | goto out; | ||
297 | } | ||
298 | |||
134 | switch (vcpu->arch.target) { | 299 | switch (vcpu->arch.target) { |
135 | default: | 300 | default: |
136 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { | 301 | if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { |
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 539feecda5b8..857b226bcdde 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c | |||
@@ -695,6 +695,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
695 | val |= p->regval & ARMV8_PMU_PMCR_MASK; | 695 | val |= p->regval & ARMV8_PMU_PMCR_MASK; |
696 | __vcpu_sys_reg(vcpu, PMCR_EL0) = val; | 696 | __vcpu_sys_reg(vcpu, PMCR_EL0) = val; |
697 | kvm_pmu_handle_pmcr(vcpu, val); | 697 | kvm_pmu_handle_pmcr(vcpu, val); |
698 | kvm_vcpu_pmu_restore_guest(vcpu); | ||
698 | } else { | 699 | } else { |
699 | /* PMCR.P & PMCR.C are RAZ */ | 700 | /* PMCR.P & PMCR.C are RAZ */ |
700 | val = __vcpu_sys_reg(vcpu, PMCR_EL0) | 701 | val = __vcpu_sys_reg(vcpu, PMCR_EL0) |
@@ -850,6 +851,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
850 | if (p->is_write) { | 851 | if (p->is_write) { |
851 | kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); | 852 | kvm_pmu_set_counter_event_type(vcpu, p->regval, idx); |
852 | __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; | 853 | __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK; |
854 | kvm_vcpu_pmu_restore_guest(vcpu); | ||
853 | } else { | 855 | } else { |
854 | p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; | 856 | p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK; |
855 | } | 857 | } |
@@ -875,6 +877,7 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
875 | /* accessing PMCNTENSET_EL0 */ | 877 | /* accessing PMCNTENSET_EL0 */ |
876 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; | 878 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; |
877 | kvm_pmu_enable_counter(vcpu, val); | 879 | kvm_pmu_enable_counter(vcpu, val); |
880 | kvm_vcpu_pmu_restore_guest(vcpu); | ||
878 | } else { | 881 | } else { |
879 | /* accessing PMCNTENCLR_EL0 */ | 882 | /* accessing PMCNTENCLR_EL0 */ |
880 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; | 883 | __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; |
@@ -1007,6 +1010,37 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | |||
1007 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ | 1010 | { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ |
1008 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } | 1011 | access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } |
1009 | 1012 | ||
1013 | static bool trap_ptrauth(struct kvm_vcpu *vcpu, | ||
1014 | struct sys_reg_params *p, | ||
1015 | const struct sys_reg_desc *rd) | ||
1016 | { | ||
1017 | kvm_arm_vcpu_ptrauth_trap(vcpu); | ||
1018 | |||
1019 | /* | ||
1020 | * Return false for both cases as we never skip the trapped | ||
1021 | * instruction: | ||
1022 | * | ||
1023 | * - Either we re-execute the same key register access instruction | ||
1024 | * after enabling ptrauth. | ||
1025 | * - Or an UNDEF is injected as ptrauth is not supported/enabled. | ||
1026 | */ | ||
1027 | return false; | ||
1028 | } | ||
1029 | |||
1030 | static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, | ||
1031 | const struct sys_reg_desc *rd) | ||
1032 | { | ||
1033 | return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; | ||
1034 | } | ||
1035 | |||
1036 | #define __PTRAUTH_KEY(k) \ | ||
1037 | { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \ | ||
1038 | .visibility = ptrauth_visibility} | ||
1039 | |||
1040 | #define PTRAUTH_KEY(k) \ | ||
1041 | __PTRAUTH_KEY(k ## KEYLO_EL1), \ | ||
1042 | __PTRAUTH_KEY(k ## KEYHI_EL1) | ||
1043 | |||
1010 | static bool access_arch_timer(struct kvm_vcpu *vcpu, | 1044 | static bool access_arch_timer(struct kvm_vcpu *vcpu, |
1011 | struct sys_reg_params *p, | 1045 | struct sys_reg_params *p, |
1012 | const struct sys_reg_desc *r) | 1046 | const struct sys_reg_desc *r) |
@@ -1044,25 +1078,20 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, | |||
1044 | } | 1078 | } |
1045 | 1079 | ||
1046 | /* Read a sanitised cpufeature ID register by sys_reg_desc */ | 1080 | /* Read a sanitised cpufeature ID register by sys_reg_desc */ |
1047 | static u64 read_id_reg(struct sys_reg_desc const *r, bool raz) | 1081 | static u64 read_id_reg(const struct kvm_vcpu *vcpu, |
1082 | struct sys_reg_desc const *r, bool raz) | ||
1048 | { | 1083 | { |
1049 | u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, | 1084 | u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, |
1050 | (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); | 1085 | (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); |
1051 | u64 val = raz ? 0 : read_sanitised_ftr_reg(id); | 1086 | u64 val = raz ? 0 : read_sanitised_ftr_reg(id); |
1052 | 1087 | ||
1053 | if (id == SYS_ID_AA64PFR0_EL1) { | 1088 | if (id == SYS_ID_AA64PFR0_EL1 && !vcpu_has_sve(vcpu)) { |
1054 | if (val & (0xfUL << ID_AA64PFR0_SVE_SHIFT)) | ||
1055 | kvm_debug("SVE unsupported for guests, suppressing\n"); | ||
1056 | |||
1057 | val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); | 1089 | val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); |
1058 | } else if (id == SYS_ID_AA64ISAR1_EL1) { | 1090 | } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { |
1059 | const u64 ptrauth_mask = (0xfUL << ID_AA64ISAR1_APA_SHIFT) | | 1091 | val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) | |
1060 | (0xfUL << ID_AA64ISAR1_API_SHIFT) | | 1092 | (0xfUL << ID_AA64ISAR1_API_SHIFT) | |
1061 | (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | | 1093 | (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | |
1062 | (0xfUL << ID_AA64ISAR1_GPI_SHIFT); | 1094 | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); |
1063 | if (val & ptrauth_mask) | ||
1064 | kvm_debug("ptrauth unsupported for guests, suppressing\n"); | ||
1065 | val &= ~ptrauth_mask; | ||
1066 | } | 1095 | } |
1067 | 1096 | ||
1068 | return val; | 1097 | return val; |
@@ -1078,7 +1107,7 @@ static bool __access_id_reg(struct kvm_vcpu *vcpu, | |||
1078 | if (p->is_write) | 1107 | if (p->is_write) |
1079 | return write_to_read_only(vcpu, p, r); | 1108 | return write_to_read_only(vcpu, p, r); |
1080 | 1109 | ||
1081 | p->regval = read_id_reg(r, raz); | 1110 | p->regval = read_id_reg(vcpu, r, raz); |
1082 | return true; | 1111 | return true; |
1083 | } | 1112 | } |
1084 | 1113 | ||
@@ -1100,6 +1129,81 @@ static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); | |||
1100 | static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); | 1129 | static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); |
1101 | static u64 sys_reg_to_index(const struct sys_reg_desc *reg); | 1130 | static u64 sys_reg_to_index(const struct sys_reg_desc *reg); |
1102 | 1131 | ||
1132 | /* Visibility overrides for SVE-specific control registers */ | ||
1133 | static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, | ||
1134 | const struct sys_reg_desc *rd) | ||
1135 | { | ||
1136 | if (vcpu_has_sve(vcpu)) | ||
1137 | return 0; | ||
1138 | |||
1139 | return REG_HIDDEN_USER | REG_HIDDEN_GUEST; | ||
1140 | } | ||
1141 | |||
1142 | /* Visibility overrides for SVE-specific ID registers */ | ||
1143 | static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, | ||
1144 | const struct sys_reg_desc *rd) | ||
1145 | { | ||
1146 | if (vcpu_has_sve(vcpu)) | ||
1147 | return 0; | ||
1148 | |||
1149 | return REG_HIDDEN_USER; | ||
1150 | } | ||
1151 | |||
1152 | /* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ | ||
1153 | static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) | ||
1154 | { | ||
1155 | if (!vcpu_has_sve(vcpu)) | ||
1156 | return 0; | ||
1157 | |||
1158 | return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1); | ||
1159 | } | ||
1160 | |||
1161 | static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, | ||
1162 | struct sys_reg_params *p, | ||
1163 | const struct sys_reg_desc *rd) | ||
1164 | { | ||
1165 | if (p->is_write) | ||
1166 | return write_to_read_only(vcpu, p, rd); | ||
1167 | |||
1168 | p->regval = guest_id_aa64zfr0_el1(vcpu); | ||
1169 | return true; | ||
1170 | } | ||
1171 | |||
1172 | static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, | ||
1173 | const struct sys_reg_desc *rd, | ||
1174 | const struct kvm_one_reg *reg, void __user *uaddr) | ||
1175 | { | ||
1176 | u64 val; | ||
1177 | |||
1178 | if (WARN_ON(!vcpu_has_sve(vcpu))) | ||
1179 | return -ENOENT; | ||
1180 | |||
1181 | val = guest_id_aa64zfr0_el1(vcpu); | ||
1182 | return reg_to_user(uaddr, &val, reg->id); | ||
1183 | } | ||
1184 | |||
1185 | static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, | ||
1186 | const struct sys_reg_desc *rd, | ||
1187 | const struct kvm_one_reg *reg, void __user *uaddr) | ||
1188 | { | ||
1189 | const u64 id = sys_reg_to_index(rd); | ||
1190 | int err; | ||
1191 | u64 val; | ||
1192 | |||
1193 | if (WARN_ON(!vcpu_has_sve(vcpu))) | ||
1194 | return -ENOENT; | ||
1195 | |||
1196 | err = reg_from_user(&val, uaddr, id); | ||
1197 | if (err) | ||
1198 | return err; | ||
1199 | |||
1200 | /* This is what we mean by invariant: you can't change it. */ | ||
1201 | if (val != guest_id_aa64zfr0_el1(vcpu)) | ||
1202 | return -EINVAL; | ||
1203 | |||
1204 | return 0; | ||
1205 | } | ||
1206 | |||
1103 | /* | 1207 | /* |
1104 | * cpufeature ID register user accessors | 1208 | * cpufeature ID register user accessors |
1105 | * | 1209 | * |
@@ -1107,16 +1211,18 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg); | |||
1107 | * are stored, and for set_id_reg() we don't allow the effective value | 1211 | * are stored, and for set_id_reg() we don't allow the effective value |
1108 | * to be changed. | 1212 | * to be changed. |
1109 | */ | 1213 | */ |
1110 | static int __get_id_reg(const struct sys_reg_desc *rd, void __user *uaddr, | 1214 | static int __get_id_reg(const struct kvm_vcpu *vcpu, |
1215 | const struct sys_reg_desc *rd, void __user *uaddr, | ||
1111 | bool raz) | 1216 | bool raz) |
1112 | { | 1217 | { |
1113 | const u64 id = sys_reg_to_index(rd); | 1218 | const u64 id = sys_reg_to_index(rd); |
1114 | const u64 val = read_id_reg(rd, raz); | 1219 | const u64 val = read_id_reg(vcpu, rd, raz); |
1115 | 1220 | ||
1116 | return reg_to_user(uaddr, &val, id); | 1221 | return reg_to_user(uaddr, &val, id); |
1117 | } | 1222 | } |
1118 | 1223 | ||
1119 | static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr, | 1224 | static int __set_id_reg(const struct kvm_vcpu *vcpu, |
1225 | const struct sys_reg_desc *rd, void __user *uaddr, | ||
1120 | bool raz) | 1226 | bool raz) |
1121 | { | 1227 | { |
1122 | const u64 id = sys_reg_to_index(rd); | 1228 | const u64 id = sys_reg_to_index(rd); |
@@ -1128,7 +1234,7 @@ static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr, | |||
1128 | return err; | 1234 | return err; |
1129 | 1235 | ||
1130 | /* This is what we mean by invariant: you can't change it. */ | 1236 | /* This is what we mean by invariant: you can't change it. */ |
1131 | if (val != read_id_reg(rd, raz)) | 1237 | if (val != read_id_reg(vcpu, rd, raz)) |
1132 | return -EINVAL; | 1238 | return -EINVAL; |
1133 | 1239 | ||
1134 | return 0; | 1240 | return 0; |
@@ -1137,25 +1243,25 @@ static int __set_id_reg(const struct sys_reg_desc *rd, void __user *uaddr, | |||
1137 | static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | 1243 | static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, |
1138 | const struct kvm_one_reg *reg, void __user *uaddr) | 1244 | const struct kvm_one_reg *reg, void __user *uaddr) |
1139 | { | 1245 | { |
1140 | return __get_id_reg(rd, uaddr, false); | 1246 | return __get_id_reg(vcpu, rd, uaddr, false); |
1141 | } | 1247 | } |
1142 | 1248 | ||
1143 | static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | 1249 | static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, |
1144 | const struct kvm_one_reg *reg, void __user *uaddr) | 1250 | const struct kvm_one_reg *reg, void __user *uaddr) |
1145 | { | 1251 | { |
1146 | return __set_id_reg(rd, uaddr, false); | 1252 | return __set_id_reg(vcpu, rd, uaddr, false); |
1147 | } | 1253 | } |
1148 | 1254 | ||
1149 | static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | 1255 | static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, |
1150 | const struct kvm_one_reg *reg, void __user *uaddr) | 1256 | const struct kvm_one_reg *reg, void __user *uaddr) |
1151 | { | 1257 | { |
1152 | return __get_id_reg(rd, uaddr, true); | 1258 | return __get_id_reg(vcpu, rd, uaddr, true); |
1153 | } | 1259 | } |
1154 | 1260 | ||
1155 | static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | 1261 | static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, |
1156 | const struct kvm_one_reg *reg, void __user *uaddr) | 1262 | const struct kvm_one_reg *reg, void __user *uaddr) |
1157 | { | 1263 | { |
1158 | return __set_id_reg(rd, uaddr, true); | 1264 | return __set_id_reg(vcpu, rd, uaddr, true); |
1159 | } | 1265 | } |
1160 | 1266 | ||
1161 | static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, | 1267 | static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, |
@@ -1343,7 +1449,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1343 | ID_SANITISED(ID_AA64PFR1_EL1), | 1449 | ID_SANITISED(ID_AA64PFR1_EL1), |
1344 | ID_UNALLOCATED(4,2), | 1450 | ID_UNALLOCATED(4,2), |
1345 | ID_UNALLOCATED(4,3), | 1451 | ID_UNALLOCATED(4,3), |
1346 | ID_UNALLOCATED(4,4), | 1452 | { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, |
1347 | ID_UNALLOCATED(4,5), | 1453 | ID_UNALLOCATED(4,5), |
1348 | ID_UNALLOCATED(4,6), | 1454 | ID_UNALLOCATED(4,6), |
1349 | ID_UNALLOCATED(4,7), | 1455 | ID_UNALLOCATED(4,7), |
@@ -1380,10 +1486,17 @@ static const struct sys_reg_desc sys_reg_descs[] = { | |||
1380 | 1486 | ||
1381 | { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, | 1487 | { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, |
1382 | { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, | 1488 | { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, |
1489 | { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, | ||
1383 | { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, | 1490 | { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, |
1384 | { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, | 1491 | { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, |
1385 | { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, | 1492 | { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, |
1386 | 1493 | ||
1494 | PTRAUTH_KEY(APIA), | ||
1495 | PTRAUTH_KEY(APIB), | ||
1496 | PTRAUTH_KEY(APDA), | ||
1497 | PTRAUTH_KEY(APDB), | ||
1498 | PTRAUTH_KEY(APGA), | ||
1499 | |||
1387 | { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, | 1500 | { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, |
1388 | { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, | 1501 | { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, |
1389 | { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, | 1502 | { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, |
@@ -1924,6 +2037,12 @@ static void perform_access(struct kvm_vcpu *vcpu, | |||
1924 | { | 2037 | { |
1925 | trace_kvm_sys_access(*vcpu_pc(vcpu), params, r); | 2038 | trace_kvm_sys_access(*vcpu_pc(vcpu), params, r); |
1926 | 2039 | ||
2040 | /* Check for regs disabled by runtime config */ | ||
2041 | if (sysreg_hidden_from_guest(vcpu, r)) { | ||
2042 | kvm_inject_undefined(vcpu); | ||
2043 | return; | ||
2044 | } | ||
2045 | |||
1927 | /* | 2046 | /* |
1928 | * Not having an accessor means that we have configured a trap | 2047 | * Not having an accessor means that we have configured a trap |
1929 | * that we don't know how to handle. This certainly qualifies | 2048 | * that we don't know how to handle. This certainly qualifies |
@@ -2435,6 +2554,10 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg | |||
2435 | if (!r) | 2554 | if (!r) |
2436 | return get_invariant_sys_reg(reg->id, uaddr); | 2555 | return get_invariant_sys_reg(reg->id, uaddr); |
2437 | 2556 | ||
2557 | /* Check for regs disabled by runtime config */ | ||
2558 | if (sysreg_hidden_from_user(vcpu, r)) | ||
2559 | return -ENOENT; | ||
2560 | |||
2438 | if (r->get_user) | 2561 | if (r->get_user) |
2439 | return (r->get_user)(vcpu, r, reg, uaddr); | 2562 | return (r->get_user)(vcpu, r, reg, uaddr); |
2440 | 2563 | ||
@@ -2456,6 +2579,10 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg | |||
2456 | if (!r) | 2579 | if (!r) |
2457 | return set_invariant_sys_reg(reg->id, uaddr); | 2580 | return set_invariant_sys_reg(reg->id, uaddr); |
2458 | 2581 | ||
2582 | /* Check for regs disabled by runtime config */ | ||
2583 | if (sysreg_hidden_from_user(vcpu, r)) | ||
2584 | return -ENOENT; | ||
2585 | |||
2459 | if (r->set_user) | 2586 | if (r->set_user) |
2460 | return (r->set_user)(vcpu, r, reg, uaddr); | 2587 | return (r->set_user)(vcpu, r, reg, uaddr); |
2461 | 2588 | ||
@@ -2512,7 +2639,8 @@ static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind) | |||
2512 | return true; | 2639 | return true; |
2513 | } | 2640 | } |
2514 | 2641 | ||
2515 | static int walk_one_sys_reg(const struct sys_reg_desc *rd, | 2642 | static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, |
2643 | const struct sys_reg_desc *rd, | ||
2516 | u64 __user **uind, | 2644 | u64 __user **uind, |
2517 | unsigned int *total) | 2645 | unsigned int *total) |
2518 | { | 2646 | { |
@@ -2523,6 +2651,9 @@ static int walk_one_sys_reg(const struct sys_reg_desc *rd, | |||
2523 | if (!(rd->reg || rd->get_user)) | 2651 | if (!(rd->reg || rd->get_user)) |
2524 | return 0; | 2652 | return 0; |
2525 | 2653 | ||
2654 | if (sysreg_hidden_from_user(vcpu, rd)) | ||
2655 | return 0; | ||
2656 | |||
2526 | if (!copy_reg_to_user(rd, uind)) | 2657 | if (!copy_reg_to_user(rd, uind)) |
2527 | return -EFAULT; | 2658 | return -EFAULT; |
2528 | 2659 | ||
@@ -2551,9 +2682,9 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind) | |||
2551 | int cmp = cmp_sys_reg(i1, i2); | 2682 | int cmp = cmp_sys_reg(i1, i2); |
2552 | /* target-specific overrides generic entry. */ | 2683 | /* target-specific overrides generic entry. */ |
2553 | if (cmp <= 0) | 2684 | if (cmp <= 0) |
2554 | err = walk_one_sys_reg(i1, &uind, &total); | 2685 | err = walk_one_sys_reg(vcpu, i1, &uind, &total); |
2555 | else | 2686 | else |
2556 | err = walk_one_sys_reg(i2, &uind, &total); | 2687 | err = walk_one_sys_reg(vcpu, i2, &uind, &total); |
2557 | 2688 | ||
2558 | if (err) | 2689 | if (err) |
2559 | return err; | 2690 | return err; |
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 3b1bc7f01d0b..2be99508dcb9 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h | |||
@@ -64,8 +64,15 @@ struct sys_reg_desc { | |||
64 | const struct kvm_one_reg *reg, void __user *uaddr); | 64 | const struct kvm_one_reg *reg, void __user *uaddr); |
65 | int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, | 65 | int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, |
66 | const struct kvm_one_reg *reg, void __user *uaddr); | 66 | const struct kvm_one_reg *reg, void __user *uaddr); |
67 | |||
68 | /* Return mask of REG_* runtime visibility overrides */ | ||
69 | unsigned int (*visibility)(const struct kvm_vcpu *vcpu, | ||
70 | const struct sys_reg_desc *rd); | ||
67 | }; | 71 | }; |
68 | 72 | ||
73 | #define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ | ||
74 | #define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ | ||
75 | |||
69 | static inline void print_sys_reg_instr(const struct sys_reg_params *p) | 76 | static inline void print_sys_reg_instr(const struct sys_reg_params *p) |
70 | { | 77 | { |
71 | /* Look, we even formatted it for you to paste into the table! */ | 78 | /* Look, we even formatted it for you to paste into the table! */ |
@@ -102,6 +109,24 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r | |||
102 | __vcpu_sys_reg(vcpu, r->reg) = r->val; | 109 | __vcpu_sys_reg(vcpu, r->reg) = r->val; |
103 | } | 110 | } |
104 | 111 | ||
112 | static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu, | ||
113 | const struct sys_reg_desc *r) | ||
114 | { | ||
115 | if (likely(!r->visibility)) | ||
116 | return false; | ||
117 | |||
118 | return r->visibility(vcpu, r) & REG_HIDDEN_GUEST; | ||
119 | } | ||
120 | |||
121 | static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu, | ||
122 | const struct sys_reg_desc *r) | ||
123 | { | ||
124 | if (likely(!r->visibility)) | ||
125 | return false; | ||
126 | |||
127 | return r->visibility(vcpu, r) & REG_HIDDEN_USER; | ||
128 | } | ||
129 | |||
105 | static inline int cmp_sys_reg(const struct sys_reg_desc *i1, | 130 | static inline int cmp_sys_reg(const struct sys_reg_desc *i1, |
106 | const struct sys_reg_desc *i2) | 131 | const struct sys_reg_desc *i2) |
107 | { | 132 | { |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e6b5bb012ccb..013c76a0a03e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table { | |||
201 | struct kref kref; | 201 | struct kref kref; |
202 | }; | 202 | }; |
203 | 203 | ||
204 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | ||
205 | |||
204 | struct kvmppc_spapr_tce_table { | 206 | struct kvmppc_spapr_tce_table { |
205 | struct list_head list; | 207 | struct list_head list; |
206 | struct kvm *kvm; | 208 | struct kvm *kvm; |
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table { | |||
210 | u64 offset; /* in pages */ | 212 | u64 offset; /* in pages */ |
211 | u64 size; /* window size in pages */ | 213 | u64 size; /* window size in pages */ |
212 | struct list_head iommu_tables; | 214 | struct list_head iommu_tables; |
215 | struct mutex alloc_lock; | ||
213 | struct page *pages[0]; | 216 | struct page *pages[0]; |
214 | }; | 217 | }; |
215 | 218 | ||
@@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops; | |||
222 | struct kvmppc_xive; | 225 | struct kvmppc_xive; |
223 | struct kvmppc_xive_vcpu; | 226 | struct kvmppc_xive_vcpu; |
224 | extern struct kvm_device_ops kvm_xive_ops; | 227 | extern struct kvm_device_ops kvm_xive_ops; |
228 | extern struct kvm_device_ops kvm_xive_native_ops; | ||
225 | 229 | ||
226 | struct kvmppc_passthru_irqmap; | 230 | struct kvmppc_passthru_irqmap; |
227 | 231 | ||
@@ -312,7 +316,11 @@ struct kvm_arch { | |||
312 | #endif | 316 | #endif |
313 | #ifdef CONFIG_KVM_XICS | 317 | #ifdef CONFIG_KVM_XICS |
314 | struct kvmppc_xics *xics; | 318 | struct kvmppc_xics *xics; |
315 | struct kvmppc_xive *xive; | 319 | struct kvmppc_xive *xive; /* Current XIVE device in use */ |
320 | struct { | ||
321 | struct kvmppc_xive *native; | ||
322 | struct kvmppc_xive *xics_on_xive; | ||
323 | } xive_devices; | ||
316 | struct kvmppc_passthru_irqmap *pimap; | 324 | struct kvmppc_passthru_irqmap *pimap; |
317 | #endif | 325 | #endif |
318 | struct kvmppc_ops *kvm_ops; | 326 | struct kvmppc_ops *kvm_ops; |
@@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap { | |||
449 | #define KVMPPC_IRQ_DEFAULT 0 | 457 | #define KVMPPC_IRQ_DEFAULT 0 |
450 | #define KVMPPC_IRQ_MPIC 1 | 458 | #define KVMPPC_IRQ_MPIC 1 |
451 | #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ | 459 | #define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */ |
460 | #define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */ | ||
452 | 461 | ||
453 | #define MMIO_HPTE_CACHE_SIZE 4 | 462 | #define MMIO_HPTE_CACHE_SIZE 4 |
454 | 463 | ||
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index ac22b28ae78d..bc892380e6cd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table( | |||
197 | (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ | 197 | (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ |
198 | (stt)->size, (ioba), (npages)) ? \ | 198 | (stt)->size, (ioba), (npages)) ? \ |
199 | H_PARAMETER : H_SUCCESS) | 199 | H_PARAMETER : H_SUCCESS) |
200 | extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
201 | unsigned long *ua, unsigned long **prmap); | ||
202 | extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt, | ||
203 | unsigned long idx, unsigned long tce); | ||
204 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 200 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
205 | unsigned long ioba, unsigned long tce); | 201 | unsigned long ioba, unsigned long tce); |
206 | extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | 202 | extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, |
@@ -273,6 +269,7 @@ union kvmppc_one_reg { | |||
273 | u64 addr; | 269 | u64 addr; |
274 | u64 length; | 270 | u64 length; |
275 | } vpaval; | 271 | } vpaval; |
272 | u64 xive_timaval[2]; | ||
276 | }; | 273 | }; |
277 | 274 | ||
278 | struct kvmppc_ops { | 275 | struct kvmppc_ops { |
@@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void); | |||
480 | extern void kvm_hv_vm_deactivated(void); | 477 | extern void kvm_hv_vm_deactivated(void); |
481 | extern bool kvm_hv_mode_active(void); | 478 | extern bool kvm_hv_mode_active(void); |
482 | 479 | ||
480 | extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
481 | struct kvm_nested_guest *nested); | ||
482 | |||
483 | #else | 483 | #else |
484 | static inline void __init kvm_cma_reserve(void) | 484 | static inline void __init kvm_cma_reserve(void) |
485 | {} | 485 | {} |
@@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval); | |||
594 | extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, | 594 | extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, |
595 | int level, bool line_status); | 595 | int level, bool line_status); |
596 | extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); | 596 | extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); |
597 | |||
598 | static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) | ||
599 | { | ||
600 | return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE; | ||
601 | } | ||
602 | |||
603 | extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
604 | struct kvm_vcpu *vcpu, u32 cpu); | ||
605 | extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); | ||
606 | extern void kvmppc_xive_native_init_module(void); | ||
607 | extern void kvmppc_xive_native_exit_module(void); | ||
608 | extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, | ||
609 | union kvmppc_one_reg *val); | ||
610 | extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, | ||
611 | union kvmppc_one_reg *val); | ||
612 | |||
597 | #else | 613 | #else |
598 | static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, | 614 | static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, |
599 | u32 priority) { return -1; } | 615 | u32 priority) { return -1; } |
@@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur | |||
617 | static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, | 633 | static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, |
618 | int level, bool line_status) { return -ENODEV; } | 634 | int level, bool line_status) { return -ENODEV; } |
619 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } | 635 | static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } |
636 | |||
637 | static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) | ||
638 | { return 0; } | ||
639 | static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
640 | struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; } | ||
641 | static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { } | ||
642 | static inline void kvmppc_xive_native_init_module(void) { } | ||
643 | static inline void kvmppc_xive_native_exit_module(void) { } | ||
644 | static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, | ||
645 | union kvmppc_one_reg *val) | ||
646 | { return 0; } | ||
647 | static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, | ||
648 | union kvmppc_one_reg *val) | ||
649 | { return -ENOENT; } | ||
650 | |||
620 | #endif /* CONFIG_KVM_XIVE */ | 651 | #endif /* CONFIG_KVM_XIVE */ |
621 | 652 | ||
622 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) | 653 | #if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) |
@@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, | |||
665 | unsigned long pte_index); | 696 | unsigned long pte_index); |
666 | long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | 697 | long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, |
667 | unsigned long pte_index); | 698 | unsigned long pte_index); |
699 | long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
700 | unsigned long dest, unsigned long src); | ||
668 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, | 701 | long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, |
669 | unsigned long slb_v, unsigned int status, bool data); | 702 | unsigned long slb_v, unsigned int status, bool data); |
670 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); | 703 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); |
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index b579a943407b..eaf76f57023a 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h | |||
@@ -23,6 +23,7 @@ | |||
23 | * same offset regardless of where the code is executing | 23 | * same offset regardless of where the code is executing |
24 | */ | 24 | */ |
25 | extern void __iomem *xive_tima; | 25 | extern void __iomem *xive_tima; |
26 | extern unsigned long xive_tima_os; | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Offset in the TM area of our current execution level (provided by | 29 | * Offset in the TM area of our current execution level (provided by |
@@ -73,6 +74,8 @@ struct xive_q { | |||
73 | u32 esc_irq; | 74 | u32 esc_irq; |
74 | atomic_t count; | 75 | atomic_t count; |
75 | atomic_t pending_count; | 76 | atomic_t pending_count; |
77 | u64 guest_qaddr; | ||
78 | u32 guest_qshift; | ||
76 | }; | 79 | }; |
77 | 80 | ||
78 | /* Global enable flags for the XIVE support */ | 81 | /* Global enable flags for the XIVE support */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 26ca425f4c2c..b0f72dea8b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char { | |||
482 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ | 482 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ |
483 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff | 483 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff |
484 | 484 | ||
485 | #define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) | ||
486 | |||
485 | /* Device control API: PPC-specific devices */ | 487 | /* Device control API: PPC-specific devices */ |
486 | #define KVM_DEV_MPIC_GRP_MISC 1 | 488 | #define KVM_DEV_MPIC_GRP_MISC 1 |
487 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ | 489 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ |
@@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char { | |||
677 | #define KVM_XICS_PRESENTED (1ULL << 43) | 679 | #define KVM_XICS_PRESENTED (1ULL << 43) |
678 | #define KVM_XICS_QUEUED (1ULL << 44) | 680 | #define KVM_XICS_QUEUED (1ULL << 44) |
679 | 681 | ||
682 | /* POWER9 XIVE Native Interrupt Controller */ | ||
683 | #define KVM_DEV_XIVE_GRP_CTRL 1 | ||
684 | #define KVM_DEV_XIVE_RESET 1 | ||
685 | #define KVM_DEV_XIVE_EQ_SYNC 2 | ||
686 | #define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ | ||
687 | #define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ | ||
688 | #define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ | ||
689 | #define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */ | ||
690 | |||
691 | /* Layout of 64-bit XIVE source attribute values */ | ||
692 | #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) | ||
693 | #define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) | ||
694 | |||
695 | /* Layout of 64-bit XIVE source configuration attribute values */ | ||
696 | #define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0 | ||
697 | #define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7 | ||
698 | #define KVM_XIVE_SOURCE_SERVER_SHIFT 3 | ||
699 | #define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL | ||
700 | #define KVM_XIVE_SOURCE_MASKED_SHIFT 32 | ||
701 | #define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL | ||
702 | #define KVM_XIVE_SOURCE_EISN_SHIFT 33 | ||
703 | #define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL | ||
704 | |||
705 | /* Layout of 64-bit EQ identifier */ | ||
706 | #define KVM_XIVE_EQ_PRIORITY_SHIFT 0 | ||
707 | #define KVM_XIVE_EQ_PRIORITY_MASK 0x7 | ||
708 | #define KVM_XIVE_EQ_SERVER_SHIFT 3 | ||
709 | #define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL | ||
710 | |||
711 | /* Layout of EQ configuration values (64 bytes) */ | ||
712 | struct kvm_ppc_xive_eq { | ||
713 | __u32 flags; | ||
714 | __u32 qshift; | ||
715 | __u64 qaddr; | ||
716 | __u32 qtoggle; | ||
717 | __u32 qindex; | ||
718 | __u8 pad[40]; | ||
719 | }; | ||
720 | |||
721 | #define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 | ||
722 | |||
723 | #define KVM_XIVE_TIMA_PAGE_OFFSET 0 | ||
724 | #define KVM_XIVE_ESB_PAGE_OFFSET 4 | ||
725 | |||
680 | #endif /* __LINUX_KVM_POWERPC_H */ | 726 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3223aec88b2c..4c67cc79de7c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -94,7 +94,7 @@ endif | |||
94 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | 94 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ |
95 | book3s_xics.o | 95 | book3s_xics.o |
96 | 96 | ||
97 | kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o | 97 | kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o |
98 | kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o | 98 | kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o |
99 | 99 | ||
100 | kvm-book3s_64-module-objs := \ | 100 | kvm-book3s_64-module-objs := \ |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10c5579d20ce..61a212d0daf0 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); | 651 | *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); |
652 | break; | 652 | break; |
653 | #endif /* CONFIG_KVM_XICS */ | 653 | #endif /* CONFIG_KVM_XICS */ |
654 | #ifdef CONFIG_KVM_XIVE | ||
655 | case KVM_REG_PPC_VP_STATE: | ||
656 | if (!vcpu->arch.xive_vcpu) { | ||
657 | r = -ENXIO; | ||
658 | break; | ||
659 | } | ||
660 | if (xive_enabled()) | ||
661 | r = kvmppc_xive_native_get_vp(vcpu, val); | ||
662 | else | ||
663 | r = -ENXIO; | ||
664 | break; | ||
665 | #endif /* CONFIG_KVM_XIVE */ | ||
654 | case KVM_REG_PPC_FSCR: | 666 | case KVM_REG_PPC_FSCR: |
655 | *val = get_reg_val(id, vcpu->arch.fscr); | 667 | *val = get_reg_val(id, vcpu->arch.fscr); |
656 | break; | 668 | break; |
@@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | |||
724 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); | 736 | r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); |
725 | break; | 737 | break; |
726 | #endif /* CONFIG_KVM_XICS */ | 738 | #endif /* CONFIG_KVM_XICS */ |
739 | #ifdef CONFIG_KVM_XIVE | ||
740 | case KVM_REG_PPC_VP_STATE: | ||
741 | if (!vcpu->arch.xive_vcpu) { | ||
742 | r = -ENXIO; | ||
743 | break; | ||
744 | } | ||
745 | if (xive_enabled()) | ||
746 | r = kvmppc_xive_native_set_vp(vcpu, val); | ||
747 | else | ||
748 | r = -ENXIO; | ||
749 | break; | ||
750 | #endif /* CONFIG_KVM_XIVE */ | ||
727 | case KVM_REG_PPC_FSCR: | 751 | case KVM_REG_PPC_FSCR: |
728 | vcpu->arch.fscr = set_reg_val(id, *val); | 752 | vcpu->arch.fscr = set_reg_val(id, *val); |
729 | break; | 753 | break; |
@@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
891 | kvmppc_rtas_tokens_free(kvm); | 915 | kvmppc_rtas_tokens_free(kvm); |
892 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); | 916 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); |
893 | #endif | 917 | #endif |
918 | |||
919 | #ifdef CONFIG_KVM_XICS | ||
920 | /* | ||
921 | * Free the XIVE devices which are not directly freed by the | ||
922 | * device 'release' method | ||
923 | */ | ||
924 | kfree(kvm->arch.xive_devices.native); | ||
925 | kvm->arch.xive_devices.native = NULL; | ||
926 | kfree(kvm->arch.xive_devices.xics_on_xive); | ||
927 | kvm->arch.xive_devices.xics_on_xive = NULL; | ||
928 | #endif /* CONFIG_KVM_XICS */ | ||
894 | } | 929 | } |
895 | 930 | ||
896 | int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) | 931 | int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu) |
@@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void) | |||
1050 | if (xics_on_xive()) { | 1085 | if (xics_on_xive()) { |
1051 | kvmppc_xive_init_module(); | 1086 | kvmppc_xive_init_module(); |
1052 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); | 1087 | kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); |
1088 | kvmppc_xive_native_init_module(); | ||
1089 | kvm_register_device_ops(&kvm_xive_native_ops, | ||
1090 | KVM_DEV_TYPE_XIVE); | ||
1053 | } else | 1091 | } else |
1054 | #endif | 1092 | #endif |
1055 | kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); | 1093 | kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); |
@@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void) | |||
1060 | static void kvmppc_book3s_exit(void) | 1098 | static void kvmppc_book3s_exit(void) |
1061 | { | 1099 | { |
1062 | #ifdef CONFIG_KVM_XICS | 1100 | #ifdef CONFIG_KVM_XICS |
1063 | if (xics_on_xive()) | 1101 | if (xics_on_xive()) { |
1064 | kvmppc_xive_exit_module(); | 1102 | kvmppc_xive_exit_module(); |
1103 | kvmppc_xive_native_exit_module(); | ||
1104 | } | ||
1065 | #endif | 1105 | #endif |
1066 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER | 1106 | #ifdef CONFIG_KVM_BOOK3S_32_HANDLER |
1067 | kvmppc_book3s_exit_pr(); | 1107 | kvmppc_book3s_exit_pr(); |
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index f100e331e69b..66270e07449a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c | |||
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head) | |||
228 | unsigned long i, npages = kvmppc_tce_pages(stt->size); | 228 | unsigned long i, npages = kvmppc_tce_pages(stt->size); |
229 | 229 | ||
230 | for (i = 0; i < npages; i++) | 230 | for (i = 0; i < npages; i++) |
231 | __free_page(stt->pages[i]); | 231 | if (stt->pages[i]) |
232 | __free_page(stt->pages[i]); | ||
232 | 233 | ||
233 | kfree(stt); | 234 | kfree(stt); |
234 | } | 235 | } |
235 | 236 | ||
237 | static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt, | ||
238 | unsigned long sttpage) | ||
239 | { | ||
240 | struct page *page = stt->pages[sttpage]; | ||
241 | |||
242 | if (page) | ||
243 | return page; | ||
244 | |||
245 | mutex_lock(&stt->alloc_lock); | ||
246 | page = stt->pages[sttpage]; | ||
247 | if (!page) { | ||
248 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
249 | WARN_ON_ONCE(!page); | ||
250 | if (page) | ||
251 | stt->pages[sttpage] = page; | ||
252 | } | ||
253 | mutex_unlock(&stt->alloc_lock); | ||
254 | |||
255 | return page; | ||
256 | } | ||
257 | |||
236 | static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) | 258 | static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) |
237 | { | 259 | { |
238 | struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; | 260 | struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; |
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) | |||
241 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) | 263 | if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) |
242 | return VM_FAULT_SIGBUS; | 264 | return VM_FAULT_SIGBUS; |
243 | 265 | ||
244 | page = stt->pages[vmf->pgoff]; | 266 | page = kvm_spapr_get_tce_page(stt, vmf->pgoff); |
267 | if (!page) | ||
268 | return VM_FAULT_OOM; | ||
269 | |||
245 | get_page(page); | 270 | get_page(page); |
246 | vmf->page = page; | 271 | vmf->page = page; |
247 | return 0; | 272 | return 0; |
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
296 | struct kvmppc_spapr_tce_table *siter; | 321 | struct kvmppc_spapr_tce_table *siter; |
297 | unsigned long npages, size = args->size; | 322 | unsigned long npages, size = args->size; |
298 | int ret = -ENOMEM; | 323 | int ret = -ENOMEM; |
299 | int i; | ||
300 | 324 | ||
301 | if (!args->size || args->page_shift < 12 || args->page_shift > 34 || | 325 | if (!args->size || args->page_shift < 12 || args->page_shift > 34 || |
302 | (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) | 326 | (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) |
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
318 | stt->offset = args->offset; | 342 | stt->offset = args->offset; |
319 | stt->size = size; | 343 | stt->size = size; |
320 | stt->kvm = kvm; | 344 | stt->kvm = kvm; |
345 | mutex_init(&stt->alloc_lock); | ||
321 | INIT_LIST_HEAD_RCU(&stt->iommu_tables); | 346 | INIT_LIST_HEAD_RCU(&stt->iommu_tables); |
322 | 347 | ||
323 | for (i = 0; i < npages; i++) { | ||
324 | stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
325 | if (!stt->pages[i]) | ||
326 | goto fail; | ||
327 | } | ||
328 | |||
329 | mutex_lock(&kvm->lock); | 348 | mutex_lock(&kvm->lock); |
330 | 349 | ||
331 | /* Check this LIOBN hasn't been previously allocated */ | 350 | /* Check this LIOBN hasn't been previously allocated */ |
@@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | |||
352 | if (ret >= 0) | 371 | if (ret >= 0) |
353 | return ret; | 372 | return ret; |
354 | 373 | ||
355 | fail: | ||
356 | for (i = 0; i < npages; i++) | ||
357 | if (stt->pages[i]) | ||
358 | __free_page(stt->pages[i]); | ||
359 | |||
360 | kfree(stt); | 374 | kfree(stt); |
361 | fail_acct: | 375 | fail_acct: |
362 | kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); | 376 | kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); |
363 | return ret; | 377 | return ret; |
364 | } | 378 | } |
365 | 379 | ||
380 | static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
381 | unsigned long *ua) | ||
382 | { | ||
383 | unsigned long gfn = tce >> PAGE_SHIFT; | ||
384 | struct kvm_memory_slot *memslot; | ||
385 | |||
386 | memslot = search_memslots(kvm_memslots(kvm), gfn); | ||
387 | if (!memslot) | ||
388 | return -EINVAL; | ||
389 | |||
390 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | ||
391 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | ||
392 | |||
393 | return 0; | ||
394 | } | ||
395 | |||
366 | static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | 396 | static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, |
367 | unsigned long tce) | 397 | unsigned long tce) |
368 | { | 398 | { |
@@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
378 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) | 408 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) |
379 | return H_TOO_HARD; | 409 | return H_TOO_HARD; |
380 | 410 | ||
381 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) | 411 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) |
382 | return H_TOO_HARD; | 412 | return H_TOO_HARD; |
383 | 413 | ||
384 | list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { | 414 | list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { |
@@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
397 | return H_SUCCESS; | 427 | return H_SUCCESS; |
398 | } | 428 | } |
399 | 429 | ||
430 | /* | ||
431 | * Handles TCE requests for emulated devices. | ||
432 | * Puts guest TCE values to the table and expects user space to convert them. | ||
433 | * Cannot fail so kvmppc_tce_validate must be called before it. | ||
434 | */ | ||
435 | static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | ||
436 | unsigned long idx, unsigned long tce) | ||
437 | { | ||
438 | struct page *page; | ||
439 | u64 *tbl; | ||
440 | unsigned long sttpage; | ||
441 | |||
442 | idx -= stt->offset; | ||
443 | sttpage = idx / TCES_PER_PAGE; | ||
444 | page = stt->pages[sttpage]; | ||
445 | |||
446 | if (!page) { | ||
447 | /* We allow any TCE, not just with read|write permissions */ | ||
448 | if (!tce) | ||
449 | return; | ||
450 | |||
451 | page = kvm_spapr_get_tce_page(stt, sttpage); | ||
452 | if (!page) | ||
453 | return; | ||
454 | } | ||
455 | tbl = page_to_virt(page); | ||
456 | |||
457 | tbl[idx % TCES_PER_PAGE] = tce; | ||
458 | } | ||
459 | |||
400 | static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, | 460 | static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, |
401 | unsigned long entry) | 461 | unsigned long entry) |
402 | { | 462 | { |
@@ -551,7 +611,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
551 | 611 | ||
552 | dir = iommu_tce_direction(tce); | 612 | dir = iommu_tce_direction(tce); |
553 | 613 | ||
554 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { | 614 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { |
555 | ret = H_PARAMETER; | 615 | ret = H_PARAMETER; |
556 | goto unlock_exit; | 616 | goto unlock_exit; |
557 | } | 617 | } |
@@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
612 | return ret; | 672 | return ret; |
613 | 673 | ||
614 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 674 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
615 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) { | 675 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) { |
616 | ret = H_TOO_HARD; | 676 | ret = H_TOO_HARD; |
617 | goto unlock_exit; | 677 | goto unlock_exit; |
618 | } | 678 | } |
@@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
647 | } | 707 | } |
648 | tce = be64_to_cpu(tce); | 708 | tce = be64_to_cpu(tce); |
649 | 709 | ||
650 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 710 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) |
651 | return H_PARAMETER; | 711 | return H_PARAMETER; |
652 | 712 | ||
653 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 713 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 2206bc729b9a..484b47fa3960 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c | |||
@@ -66,8 +66,6 @@ | |||
66 | 66 | ||
67 | #endif | 67 | #endif |
68 | 68 | ||
69 | #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) | ||
70 | |||
71 | /* | 69 | /* |
72 | * Finds a TCE table descriptor by LIOBN. | 70 | * Finds a TCE table descriptor by LIOBN. |
73 | * | 71 | * |
@@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, | |||
88 | EXPORT_SYMBOL_GPL(kvmppc_find_table); | 86 | EXPORT_SYMBOL_GPL(kvmppc_find_table); |
89 | 87 | ||
90 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 88 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
89 | static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, | ||
90 | unsigned long *ua, unsigned long **prmap) | ||
91 | { | ||
92 | unsigned long gfn = tce >> PAGE_SHIFT; | ||
93 | struct kvm_memory_slot *memslot; | ||
94 | |||
95 | memslot = search_memslots(kvm_memslots_raw(kvm), gfn); | ||
96 | if (!memslot) | ||
97 | return -EINVAL; | ||
98 | |||
99 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | ||
100 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | ||
101 | |||
102 | if (prmap) | ||
103 | *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
91 | /* | 108 | /* |
92 | * Validates TCE address. | 109 | * Validates TCE address. |
93 | * At the moment flags and page mask are validated. | 110 | * At the moment flags and page mask are validated. |
@@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
111 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) | 128 | if (iommu_tce_check_gpa(stt->page_shift, gpa)) |
112 | return H_PARAMETER; | 129 | return H_PARAMETER; |
113 | 130 | ||
114 | if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL)) | 131 | if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) |
115 | return H_TOO_HARD; | 132 | return H_TOO_HARD; |
116 | 133 | ||
117 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 134 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
@@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, | |||
129 | 146 | ||
130 | return H_SUCCESS; | 147 | return H_SUCCESS; |
131 | } | 148 | } |
132 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ | ||
133 | 149 | ||
134 | /* Note on the use of page_address() in real mode, | 150 | /* Note on the use of page_address() in real mode, |
135 | * | 151 | * |
@@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page) | |||
161 | /* | 177 | /* |
162 | * Handles TCE requests for emulated devices. | 178 | * Handles TCE requests for emulated devices. |
163 | * Puts guest TCE values to the table and expects user space to convert them. | 179 | * Puts guest TCE values to the table and expects user space to convert them. |
164 | * Called in both real and virtual modes. | 180 | * Cannot fail so kvmppc_rm_tce_validate must be called before it. |
165 | * Cannot fail so kvmppc_tce_validate must be called before it. | ||
166 | * | ||
167 | * WARNING: This will be called in real-mode on HV KVM and virtual | ||
168 | * mode on PR KVM | ||
169 | */ | 181 | */ |
170 | void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | 182 | static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, |
171 | unsigned long idx, unsigned long tce) | 183 | unsigned long idx, unsigned long tce) |
172 | { | 184 | { |
173 | struct page *page; | 185 | struct page *page; |
@@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, | |||
175 | 187 | ||
176 | idx -= stt->offset; | 188 | idx -= stt->offset; |
177 | page = stt->pages[idx / TCES_PER_PAGE]; | 189 | page = stt->pages[idx / TCES_PER_PAGE]; |
190 | /* | ||
191 | * page must not be NULL in real mode, | ||
192 | * kvmppc_rm_ioba_validate() must have taken care of this. | ||
193 | */ | ||
194 | WARN_ON_ONCE_RM(!page); | ||
178 | tbl = kvmppc_page_address(page); | 195 | tbl = kvmppc_page_address(page); |
179 | 196 | ||
180 | tbl[idx % TCES_PER_PAGE] = tce; | 197 | tbl[idx % TCES_PER_PAGE] = tce; |
181 | } | 198 | } |
182 | EXPORT_SYMBOL_GPL(kvmppc_tce_put); | ||
183 | 199 | ||
184 | long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce, | 200 | /* |
185 | unsigned long *ua, unsigned long **prmap) | 201 | * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so |
202 | * in real mode. | ||
203 | * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is | ||
204 | * allocated or not required (when clearing a tce entry). | ||
205 | */ | ||
206 | static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, | ||
207 | unsigned long ioba, unsigned long npages, bool clearing) | ||
186 | { | 208 | { |
187 | unsigned long gfn = tce >> PAGE_SHIFT; | 209 | unsigned long i, idx, sttpage, sttpages; |
188 | struct kvm_memory_slot *memslot; | 210 | unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages); |
189 | 211 | ||
190 | memslot = search_memslots(kvm_memslots(kvm), gfn); | 212 | if (ret) |
191 | if (!memslot) | 213 | return ret; |
192 | return -EINVAL; | 214 | /* |
193 | 215 | * clearing==true says kvmppc_rm_tce_put won't be allocating pages | |
194 | *ua = __gfn_to_hva_memslot(memslot, gfn) | | 216 | * for empty tces. |
195 | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); | 217 | */ |
218 | if (clearing) | ||
219 | return H_SUCCESS; | ||
196 | 220 | ||
197 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 221 | idx = (ioba >> stt->page_shift) - stt->offset; |
198 | if (prmap) | 222 | sttpage = idx / TCES_PER_PAGE; |
199 | *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | 223 | sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / |
200 | #endif | 224 | TCES_PER_PAGE; |
225 | for (i = sttpage; i < sttpage + sttpages; ++i) | ||
226 | if (!stt->pages[i]) | ||
227 | return H_TOO_HARD; | ||
201 | 228 | ||
202 | return 0; | 229 | return H_SUCCESS; |
203 | } | 230 | } |
204 | EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua); | ||
205 | 231 | ||
206 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | ||
207 | static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, | 232 | static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, |
208 | unsigned long entry, unsigned long *hpa, | 233 | unsigned long entry, unsigned long *hpa, |
209 | enum dma_data_direction *direction) | 234 | enum dma_data_direction *direction) |
@@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
381 | if (!stt) | 406 | if (!stt) |
382 | return H_TOO_HARD; | 407 | return H_TOO_HARD; |
383 | 408 | ||
384 | ret = kvmppc_ioba_validate(stt, ioba, 1); | 409 | ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0); |
385 | if (ret != H_SUCCESS) | 410 | if (ret != H_SUCCESS) |
386 | return ret; | 411 | return ret; |
387 | 412 | ||
@@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
390 | return ret; | 415 | return ret; |
391 | 416 | ||
392 | dir = iommu_tce_direction(tce); | 417 | dir = iommu_tce_direction(tce); |
393 | if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 418 | if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) |
394 | return H_PARAMETER; | 419 | return H_PARAMETER; |
395 | 420 | ||
396 | entry = ioba >> stt->page_shift; | 421 | entry = ioba >> stt->page_shift; |
@@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
409 | } | 434 | } |
410 | } | 435 | } |
411 | 436 | ||
412 | kvmppc_tce_put(stt, entry, tce); | 437 | kvmppc_rm_tce_put(stt, entry, tce); |
413 | 438 | ||
414 | return H_SUCCESS; | 439 | return H_SUCCESS; |
415 | } | 440 | } |
@@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
480 | if (tce_list & (SZ_4K - 1)) | 505 | if (tce_list & (SZ_4K - 1)) |
481 | return H_PARAMETER; | 506 | return H_PARAMETER; |
482 | 507 | ||
483 | ret = kvmppc_ioba_validate(stt, ioba, npages); | 508 | ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false); |
484 | if (ret != H_SUCCESS) | 509 | if (ret != H_SUCCESS) |
485 | return ret; | 510 | return ret; |
486 | 511 | ||
@@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
492 | */ | 517 | */ |
493 | struct mm_iommu_table_group_mem_t *mem; | 518 | struct mm_iommu_table_group_mem_t *mem; |
494 | 519 | ||
495 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) | 520 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) |
496 | return H_TOO_HARD; | 521 | return H_TOO_HARD; |
497 | 522 | ||
498 | mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); | 523 | mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); |
@@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
508 | * We do not require memory to be preregistered in this case | 533 | * We do not require memory to be preregistered in this case |
509 | * so lock rmap and do __find_linux_pte_or_hugepte(). | 534 | * so lock rmap and do __find_linux_pte_or_hugepte(). |
510 | */ | 535 | */ |
511 | if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) | 536 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) |
512 | return H_TOO_HARD; | 537 | return H_TOO_HARD; |
513 | 538 | ||
514 | rmap = (void *) vmalloc_to_phys(rmap); | 539 | rmap = (void *) vmalloc_to_phys(rmap); |
@@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
542 | unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); | 567 | unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); |
543 | 568 | ||
544 | ua = 0; | 569 | ua = 0; |
545 | if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) | 570 | if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) |
546 | return H_PARAMETER; | 571 | return H_PARAMETER; |
547 | 572 | ||
548 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { | 573 | list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { |
@@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, | |||
557 | } | 582 | } |
558 | } | 583 | } |
559 | 584 | ||
560 | kvmppc_tce_put(stt, entry + i, tce); | 585 | kvmppc_rm_tce_put(stt, entry + i, tce); |
561 | } | 586 | } |
562 | 587 | ||
563 | unlock_exit: | 588 | unlock_exit: |
@@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
583 | if (!stt) | 608 | if (!stt) |
584 | return H_TOO_HARD; | 609 | return H_TOO_HARD; |
585 | 610 | ||
586 | ret = kvmppc_ioba_validate(stt, ioba, npages); | 611 | ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0); |
587 | if (ret != H_SUCCESS) | 612 | if (ret != H_SUCCESS) |
588 | return ret; | 613 | return ret; |
589 | 614 | ||
@@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, | |||
610 | } | 635 | } |
611 | 636 | ||
612 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) | 637 | for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) |
613 | kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); | 638 | kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); |
614 | 639 | ||
615 | return H_SUCCESS; | 640 | return H_SUCCESS; |
616 | } | 641 | } |
@@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | |||
635 | 660 | ||
636 | idx = (ioba >> stt->page_shift) - stt->offset; | 661 | idx = (ioba >> stt->page_shift) - stt->offset; |
637 | page = stt->pages[idx / TCES_PER_PAGE]; | 662 | page = stt->pages[idx / TCES_PER_PAGE]; |
663 | if (!page) { | ||
664 | vcpu->arch.regs.gpr[4] = 0; | ||
665 | return H_SUCCESS; | ||
666 | } | ||
638 | tbl = (u64 *)page_address(page); | 667 | tbl = (u64 *)page_address(page); |
639 | 668 | ||
640 | vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; | 669 | vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE]; |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7bdcd4d7a9f0..d5fc624e0655 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -750,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) | |||
750 | /* | 750 | /* |
751 | * Ensure that the read of vcore->dpdes comes after the read | 751 | * Ensure that the read of vcore->dpdes comes after the read |
752 | * of vcpu->doorbell_request. This barrier matches the | 752 | * of vcpu->doorbell_request. This barrier matches the |
753 | * smb_wmb() in kvmppc_guest_entry_inject(). | 753 | * smp_wmb() in kvmppc_guest_entry_inject(). |
754 | */ | 754 | */ |
755 | smp_rmb(); | 755 | smp_rmb(); |
756 | vc = vcpu->arch.vcore; | 756 | vc = vcpu->arch.vcore; |
@@ -802,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, | |||
802 | } | 802 | } |
803 | } | 803 | } |
804 | 804 | ||
805 | /* Copy guest memory in place - must reside within a single memslot */ | ||
806 | static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from, | ||
807 | unsigned long len) | ||
808 | { | ||
809 | struct kvm_memory_slot *to_memslot = NULL; | ||
810 | struct kvm_memory_slot *from_memslot = NULL; | ||
811 | unsigned long to_addr, from_addr; | ||
812 | int r; | ||
813 | |||
814 | /* Get HPA for from address */ | ||
815 | from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT); | ||
816 | if (!from_memslot) | ||
817 | return -EFAULT; | ||
818 | if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages) | ||
819 | << PAGE_SHIFT)) | ||
820 | return -EINVAL; | ||
821 | from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT); | ||
822 | if (kvm_is_error_hva(from_addr)) | ||
823 | return -EFAULT; | ||
824 | from_addr |= (from & (PAGE_SIZE - 1)); | ||
825 | |||
826 | /* Get HPA for to address */ | ||
827 | to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT); | ||
828 | if (!to_memslot) | ||
829 | return -EFAULT; | ||
830 | if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages) | ||
831 | << PAGE_SHIFT)) | ||
832 | return -EINVAL; | ||
833 | to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT); | ||
834 | if (kvm_is_error_hva(to_addr)) | ||
835 | return -EFAULT; | ||
836 | to_addr |= (to & (PAGE_SIZE - 1)); | ||
837 | |||
838 | /* Perform copy */ | ||
839 | r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr, | ||
840 | len); | ||
841 | if (r) | ||
842 | return -EFAULT; | ||
843 | mark_page_dirty(kvm, to >> PAGE_SHIFT); | ||
844 | return 0; | ||
845 | } | ||
846 | |||
847 | static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
848 | unsigned long dest, unsigned long src) | ||
849 | { | ||
850 | u64 pg_sz = SZ_4K; /* 4K page size */ | ||
851 | u64 pg_mask = SZ_4K - 1; | ||
852 | int ret; | ||
853 | |||
854 | /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ | ||
855 | if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | | ||
856 | H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) | ||
857 | return H_PARAMETER; | ||
858 | |||
859 | /* dest (and src if copy_page flag set) must be page aligned */ | ||
860 | if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) | ||
861 | return H_PARAMETER; | ||
862 | |||
863 | /* zero and/or copy the page as determined by the flags */ | ||
864 | if (flags & H_COPY_PAGE) { | ||
865 | ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz); | ||
866 | if (ret < 0) | ||
867 | return H_PARAMETER; | ||
868 | } else if (flags & H_ZERO_PAGE) { | ||
869 | ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz); | ||
870 | if (ret < 0) | ||
871 | return H_PARAMETER; | ||
872 | } | ||
873 | |||
874 | /* We can ignore the remaining flags */ | ||
875 | |||
876 | return H_SUCCESS; | ||
877 | } | ||
878 | |||
805 | static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) | 879 | static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) |
806 | { | 880 | { |
807 | struct kvmppc_vcore *vcore = target->arch.vcore; | 881 | struct kvmppc_vcore *vcore = target->arch.vcore; |
@@ -1004,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
1004 | if (nesting_enabled(vcpu->kvm)) | 1078 | if (nesting_enabled(vcpu->kvm)) |
1005 | ret = kvmhv_copy_tofrom_guest_nested(vcpu); | 1079 | ret = kvmhv_copy_tofrom_guest_nested(vcpu); |
1006 | break; | 1080 | break; |
1081 | case H_PAGE_INIT: | ||
1082 | ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
1083 | kvmppc_get_gpr(vcpu, 5), | ||
1084 | kvmppc_get_gpr(vcpu, 6)); | ||
1085 | break; | ||
1007 | default: | 1086 | default: |
1008 | return RESUME_HOST; | 1087 | return RESUME_HOST; |
1009 | } | 1088 | } |
@@ -1048,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd) | |||
1048 | case H_IPOLL: | 1127 | case H_IPOLL: |
1049 | case H_XIRR_X: | 1128 | case H_XIRR_X: |
1050 | #endif | 1129 | #endif |
1130 | case H_PAGE_INIT: | ||
1051 | return 1; | 1131 | return 1; |
1052 | } | 1132 | } |
1053 | 1133 | ||
@@ -2505,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) | |||
2505 | } | 2585 | } |
2506 | } | 2586 | } |
2507 | 2587 | ||
2508 | static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
2509 | struct kvm_nested_guest *nested) | ||
2510 | { | ||
2511 | cpumask_t *need_tlb_flush; | ||
2512 | int lpid; | ||
2513 | |||
2514 | if (!cpu_has_feature(CPU_FTR_HVMODE)) | ||
2515 | return; | ||
2516 | |||
2517 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
2518 | pcpu &= ~0x3UL; | ||
2519 | |||
2520 | if (nested) { | ||
2521 | lpid = nested->shadow_lpid; | ||
2522 | need_tlb_flush = &nested->need_tlb_flush; | ||
2523 | } else { | ||
2524 | lpid = kvm->arch.lpid; | ||
2525 | need_tlb_flush = &kvm->arch.need_tlb_flush; | ||
2526 | } | ||
2527 | |||
2528 | mtspr(SPRN_LPID, lpid); | ||
2529 | isync(); | ||
2530 | smp_mb(); | ||
2531 | |||
2532 | if (cpumask_test_cpu(pcpu, need_tlb_flush)) { | ||
2533 | radix__local_flush_tlb_lpid_guest(lpid); | ||
2534 | /* Clear the bit after the TLB flush */ | ||
2535 | cpumask_clear_cpu(pcpu, need_tlb_flush); | ||
2536 | } | ||
2537 | } | ||
2538 | |||
2539 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) | 2588 | static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) |
2540 | { | 2589 | { |
2541 | int cpu; | 2590 | int cpu; |
@@ -3229,19 +3278,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
3229 | for (sub = 0; sub < core_info.n_subcores; ++sub) | 3278 | for (sub = 0; sub < core_info.n_subcores; ++sub) |
3230 | spin_unlock(&core_info.vc[sub]->lock); | 3279 | spin_unlock(&core_info.vc[sub]->lock); |
3231 | 3280 | ||
3232 | if (kvm_is_radix(vc->kvm)) { | 3281 | guest_enter_irqoff(); |
3233 | /* | 3282 | |
3234 | * Do we need to flush the process scoped TLB for the LPAR? | 3283 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); |
3235 | * | 3284 | |
3236 | * On POWER9, individual threads can come in here, but the | 3285 | this_cpu_disable_ftrace(); |
3237 | * TLB is shared between the 4 threads in a core, hence | ||
3238 | * invalidating on one thread invalidates for all. | ||
3239 | * Thus we make all 4 threads use the same bit here. | ||
3240 | * | ||
3241 | * Hash must be flushed in realmode in order to use tlbiel. | ||
3242 | */ | ||
3243 | kvmppc_radix_check_need_tlb_flush(vc->kvm, pcpu, NULL); | ||
3244 | } | ||
3245 | 3286 | ||
3246 | /* | 3287 | /* |
3247 | * Interrupts will be enabled once we get into the guest, | 3288 | * Interrupts will be enabled once we get into the guest, |
@@ -3249,19 +3290,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) | |||
3249 | */ | 3290 | */ |
3250 | trace_hardirqs_on(); | 3291 | trace_hardirqs_on(); |
3251 | 3292 | ||
3252 | guest_enter_irqoff(); | ||
3253 | |||
3254 | srcu_idx = srcu_read_lock(&vc->kvm->srcu); | ||
3255 | |||
3256 | this_cpu_disable_ftrace(); | ||
3257 | |||
3258 | trap = __kvmppc_vcore_entry(); | 3293 | trap = __kvmppc_vcore_entry(); |
3259 | 3294 | ||
3295 | trace_hardirqs_off(); | ||
3296 | |||
3260 | this_cpu_enable_ftrace(); | 3297 | this_cpu_enable_ftrace(); |
3261 | 3298 | ||
3262 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); | 3299 | srcu_read_unlock(&vc->kvm->srcu, srcu_idx); |
3263 | 3300 | ||
3264 | trace_hardirqs_off(); | ||
3265 | set_irq_happened(trap); | 3301 | set_irq_happened(trap); |
3266 | 3302 | ||
3267 | spin_lock(&vc->lock); | 3303 | spin_lock(&vc->lock); |
@@ -3514,6 +3550,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, | |||
3514 | #ifdef CONFIG_ALTIVEC | 3550 | #ifdef CONFIG_ALTIVEC |
3515 | load_vr_state(&vcpu->arch.vr); | 3551 | load_vr_state(&vcpu->arch.vr); |
3516 | #endif | 3552 | #endif |
3553 | mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); | ||
3517 | 3554 | ||
3518 | mtspr(SPRN_DSCR, vcpu->arch.dscr); | 3555 | mtspr(SPRN_DSCR, vcpu->arch.dscr); |
3519 | mtspr(SPRN_IAMR, vcpu->arch.iamr); | 3556 | mtspr(SPRN_IAMR, vcpu->arch.iamr); |
@@ -3605,6 +3642,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, | |||
3605 | #ifdef CONFIG_ALTIVEC | 3642 | #ifdef CONFIG_ALTIVEC |
3606 | store_vr_state(&vcpu->arch.vr); | 3643 | store_vr_state(&vcpu->arch.vr); |
3607 | #endif | 3644 | #endif |
3645 | vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); | ||
3608 | 3646 | ||
3609 | if (cpu_has_feature(CPU_FTR_TM) || | 3647 | if (cpu_has_feature(CPU_FTR_TM) || |
3610 | cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) | 3648 | cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) |
@@ -3970,7 +4008,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, | |||
3970 | unsigned long lpcr) | 4008 | unsigned long lpcr) |
3971 | { | 4009 | { |
3972 | int trap, r, pcpu; | 4010 | int trap, r, pcpu; |
3973 | int srcu_idx; | 4011 | int srcu_idx, lpid; |
3974 | struct kvmppc_vcore *vc; | 4012 | struct kvmppc_vcore *vc; |
3975 | struct kvm *kvm = vcpu->kvm; | 4013 | struct kvm *kvm = vcpu->kvm; |
3976 | struct kvm_nested_guest *nested = vcpu->arch.nested; | 4014 | struct kvm_nested_guest *nested = vcpu->arch.nested; |
@@ -4046,8 +4084,12 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, | |||
4046 | vc->vcore_state = VCORE_RUNNING; | 4084 | vc->vcore_state = VCORE_RUNNING; |
4047 | trace_kvmppc_run_core(vc, 0); | 4085 | trace_kvmppc_run_core(vc, 0); |
4048 | 4086 | ||
4049 | if (cpu_has_feature(CPU_FTR_HVMODE)) | 4087 | if (cpu_has_feature(CPU_FTR_HVMODE)) { |
4050 | kvmppc_radix_check_need_tlb_flush(kvm, pcpu, nested); | 4088 | lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; |
4089 | mtspr(SPRN_LPID, lpid); | ||
4090 | isync(); | ||
4091 | kvmppc_check_need_tlb_flush(kvm, pcpu, nested); | ||
4092 | } | ||
4051 | 4093 | ||
4052 | trace_hardirqs_on(); | 4094 | trace_hardirqs_on(); |
4053 | guest_enter_irqoff(); | 4095 | guest_enter_irqoff(); |
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index b0cf22477e87..6035d24f1d1d 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c | |||
@@ -805,3 +805,60 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) | |||
805 | vcpu->arch.doorbell_request = 0; | 805 | vcpu->arch.doorbell_request = 0; |
806 | } | 806 | } |
807 | } | 807 | } |
808 | |||
809 | static void flush_guest_tlb(struct kvm *kvm) | ||
810 | { | ||
811 | unsigned long rb, set; | ||
812 | |||
813 | rb = PPC_BIT(52); /* IS = 2 */ | ||
814 | if (kvm_is_radix(kvm)) { | ||
815 | /* R=1 PRS=1 RIC=2 */ | ||
816 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
817 | : : "r" (rb), "i" (1), "i" (1), "i" (2), | ||
818 | "r" (0) : "memory"); | ||
819 | for (set = 1; set < kvm->arch.tlb_sets; ++set) { | ||
820 | rb += PPC_BIT(51); /* increment set number */ | ||
821 | /* R=1 PRS=1 RIC=0 */ | ||
822 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
823 | : : "r" (rb), "i" (1), "i" (1), "i" (0), | ||
824 | "r" (0) : "memory"); | ||
825 | } | ||
826 | } else { | ||
827 | for (set = 0; set < kvm->arch.tlb_sets; ++set) { | ||
828 | /* R=0 PRS=0 RIC=0 */ | ||
829 | asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) | ||
830 | : : "r" (rb), "i" (0), "i" (0), "i" (0), | ||
831 | "r" (0) : "memory"); | ||
832 | rb += PPC_BIT(51); /* increment set number */ | ||
833 | } | ||
834 | } | ||
835 | asm volatile("ptesync": : :"memory"); | ||
836 | } | ||
837 | |||
838 | void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, | ||
839 | struct kvm_nested_guest *nested) | ||
840 | { | ||
841 | cpumask_t *need_tlb_flush; | ||
842 | |||
843 | /* | ||
844 | * On POWER9, individual threads can come in here, but the | ||
845 | * TLB is shared between the 4 threads in a core, hence | ||
846 | * invalidating on one thread invalidates for all. | ||
847 | * Thus we make all 4 threads use the same bit. | ||
848 | */ | ||
849 | if (cpu_has_feature(CPU_FTR_ARCH_300)) | ||
850 | pcpu = cpu_first_thread_sibling(pcpu); | ||
851 | |||
852 | if (nested) | ||
853 | need_tlb_flush = &nested->need_tlb_flush; | ||
854 | else | ||
855 | need_tlb_flush = &kvm->arch.need_tlb_flush; | ||
856 | |||
857 | if (cpumask_test_cpu(pcpu, need_tlb_flush)) { | ||
858 | flush_guest_tlb(kvm); | ||
859 | |||
860 | /* Clear the bit after the TLB flush */ | ||
861 | cpumask_clear_cpu(pcpu, need_tlb_flush); | ||
862 | } | ||
863 | } | ||
864 | EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush); | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 3b3791ed74a6..8431ad1e8391 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/hugetlb.h> | 13 | #include <linux/hugetlb.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/log2.h> | 15 | #include <linux/log2.h> |
16 | #include <linux/sizes.h> | ||
16 | 17 | ||
17 | #include <asm/trace.h> | 18 | #include <asm/trace.h> |
18 | #include <asm/kvm_ppc.h> | 19 | #include <asm/kvm_ppc.h> |
@@ -867,6 +868,149 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, | |||
867 | return ret; | 868 | return ret; |
868 | } | 869 | } |
869 | 870 | ||
871 | static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, | ||
872 | int writing, unsigned long *hpa, | ||
873 | struct kvm_memory_slot **memslot_p) | ||
874 | { | ||
875 | struct kvm *kvm = vcpu->kvm; | ||
876 | struct kvm_memory_slot *memslot; | ||
877 | unsigned long gfn, hva, pa, psize = PAGE_SHIFT; | ||
878 | unsigned int shift; | ||
879 | pte_t *ptep, pte; | ||
880 | |||
881 | /* Find the memslot for this address */ | ||
882 | gfn = gpa >> PAGE_SHIFT; | ||
883 | memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); | ||
884 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) | ||
885 | return H_PARAMETER; | ||
886 | |||
887 | /* Translate to host virtual address */ | ||
888 | hva = __gfn_to_hva_memslot(memslot, gfn); | ||
889 | |||
890 | /* Try to find the host pte for that virtual address */ | ||
891 | ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); | ||
892 | if (!ptep) | ||
893 | return H_TOO_HARD; | ||
894 | pte = kvmppc_read_update_linux_pte(ptep, writing); | ||
895 | if (!pte_present(pte)) | ||
896 | return H_TOO_HARD; | ||
897 | |||
898 | /* Convert to a physical address */ | ||
899 | if (shift) | ||
900 | psize = 1UL << shift; | ||
901 | pa = pte_pfn(pte) << PAGE_SHIFT; | ||
902 | pa |= hva & (psize - 1); | ||
903 | pa |= gpa & ~PAGE_MASK; | ||
904 | |||
905 | if (hpa) | ||
906 | *hpa = pa; | ||
907 | if (memslot_p) | ||
908 | *memslot_p = memslot; | ||
909 | |||
910 | return H_SUCCESS; | ||
911 | } | ||
912 | |||
913 | static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, | ||
914 | unsigned long dest) | ||
915 | { | ||
916 | struct kvm_memory_slot *memslot; | ||
917 | struct kvm *kvm = vcpu->kvm; | ||
918 | unsigned long pa, mmu_seq; | ||
919 | long ret = H_SUCCESS; | ||
920 | int i; | ||
921 | |||
922 | /* Used later to detect if we might have been invalidated */ | ||
923 | mmu_seq = kvm->mmu_notifier_seq; | ||
924 | smp_rmb(); | ||
925 | |||
926 | ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); | ||
927 | if (ret != H_SUCCESS) | ||
928 | return ret; | ||
929 | |||
930 | /* Check if we've been invalidated */ | ||
931 | raw_spin_lock(&kvm->mmu_lock.rlock); | ||
932 | if (mmu_notifier_retry(kvm, mmu_seq)) { | ||
933 | ret = H_TOO_HARD; | ||
934 | goto out_unlock; | ||
935 | } | ||
936 | |||
937 | /* Zero the page */ | ||
938 | for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) | ||
939 | dcbz((void *)pa); | ||
940 | kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); | ||
941 | |||
942 | out_unlock: | ||
943 | raw_spin_unlock(&kvm->mmu_lock.rlock); | ||
944 | return ret; | ||
945 | } | ||
946 | |||
947 | static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, | ||
948 | unsigned long dest, unsigned long src) | ||
949 | { | ||
950 | unsigned long dest_pa, src_pa, mmu_seq; | ||
951 | struct kvm_memory_slot *dest_memslot; | ||
952 | struct kvm *kvm = vcpu->kvm; | ||
953 | long ret = H_SUCCESS; | ||
954 | |||
955 | /* Used later to detect if we might have been invalidated */ | ||
956 | mmu_seq = kvm->mmu_notifier_seq; | ||
957 | smp_rmb(); | ||
958 | |||
959 | ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); | ||
960 | if (ret != H_SUCCESS) | ||
961 | return ret; | ||
962 | ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); | ||
963 | if (ret != H_SUCCESS) | ||
964 | return ret; | ||
965 | |||
966 | /* Check if we've been invalidated */ | ||
967 | raw_spin_lock(&kvm->mmu_lock.rlock); | ||
968 | if (mmu_notifier_retry(kvm, mmu_seq)) { | ||
969 | ret = H_TOO_HARD; | ||
970 | goto out_unlock; | ||
971 | } | ||
972 | |||
973 | /* Copy the page */ | ||
974 | memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); | ||
975 | |||
976 | kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); | ||
977 | |||
978 | out_unlock: | ||
979 | raw_spin_unlock(&kvm->mmu_lock.rlock); | ||
980 | return ret; | ||
981 | } | ||
982 | |||
983 | long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, | ||
984 | unsigned long dest, unsigned long src) | ||
985 | { | ||
986 | struct kvm *kvm = vcpu->kvm; | ||
987 | u64 pg_mask = SZ_4K - 1; /* 4K page size */ | ||
988 | long ret = H_SUCCESS; | ||
989 | |||
990 | /* Don't handle radix mode here, go up to the virtual mode handler */ | ||
991 | if (kvm_is_radix(kvm)) | ||
992 | return H_TOO_HARD; | ||
993 | |||
994 | /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ | ||
995 | if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | | ||
996 | H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) | ||
997 | return H_PARAMETER; | ||
998 | |||
999 | /* dest (and src if copy_page flag set) must be page aligned */ | ||
1000 | if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) | ||
1001 | return H_PARAMETER; | ||
1002 | |||
1003 | /* zero and/or copy the page as determined by the flags */ | ||
1004 | if (flags & H_COPY_PAGE) | ||
1005 | ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); | ||
1006 | else if (flags & H_ZERO_PAGE) | ||
1007 | ret = kvmppc_do_h_page_init_zero(vcpu, dest); | ||
1008 | |||
1009 | /* We can ignore the other flags */ | ||
1010 | |||
1011 | return ret; | ||
1012 | } | ||
1013 | |||
870 | void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, | 1014 | void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, |
871 | unsigned long pte_index) | 1015 | unsigned long pte_index) |
872 | { | 1016 | { |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index dd014308f065..f9b2620fbecd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -589,11 +589,8 @@ kvmppc_hv_entry: | |||
589 | 1: | 589 | 1: |
590 | #endif | 590 | #endif |
591 | 591 | ||
592 | /* Use cr7 as an indication of radix mode */ | ||
593 | ld r5, HSTATE_KVM_VCORE(r13) | 592 | ld r5, HSTATE_KVM_VCORE(r13) |
594 | ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ | 593 | ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ |
595 | lbz r0, KVM_RADIX(r9) | ||
596 | cmpwi cr7, r0, 0 | ||
597 | 594 | ||
598 | /* | 595 | /* |
599 | * POWER7/POWER8 host -> guest partition switch code. | 596 | * POWER7/POWER8 host -> guest partition switch code. |
@@ -616,9 +613,6 @@ kvmppc_hv_entry: | |||
616 | cmpwi r6,0 | 613 | cmpwi r6,0 |
617 | bne 10f | 614 | bne 10f |
618 | 615 | ||
619 | /* Radix has already switched LPID and flushed core TLB */ | ||
620 | bne cr7, 22f | ||
621 | |||
622 | lwz r7,KVM_LPID(r9) | 616 | lwz r7,KVM_LPID(r9) |
623 | BEGIN_FTR_SECTION | 617 | BEGIN_FTR_SECTION |
624 | ld r6,KVM_SDR1(r9) | 618 | ld r6,KVM_SDR1(r9) |
@@ -630,41 +624,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
630 | mtspr SPRN_LPID,r7 | 624 | mtspr SPRN_LPID,r7 |
631 | isync | 625 | isync |
632 | 626 | ||
633 | /* See if we need to flush the TLB. Hash has to be done in RM */ | 627 | /* See if we need to flush the TLB. */ |
634 | lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ | 628 | mr r3, r9 /* kvm pointer */ |
635 | BEGIN_FTR_SECTION | 629 | lhz r4, PACAPACAINDEX(r13) /* physical cpu number */ |
636 | /* | 630 | li r5, 0 /* nested vcpu pointer */ |
637 | * On POWER9, individual threads can come in here, but the | 631 | bl kvmppc_check_need_tlb_flush |
638 | * TLB is shared between the 4 threads in a core, hence | 632 | nop |
639 | * invalidating on one thread invalidates for all. | 633 | ld r5, HSTATE_KVM_VCORE(r13) |
640 | * Thus we make all 4 threads use the same bit here. | ||
641 | */ | ||
642 | clrrdi r6,r6,2 | ||
643 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
644 | clrldi r7,r6,64-6 /* extract bit number (6 bits) */ | ||
645 | srdi r6,r6,6 /* doubleword number */ | ||
646 | sldi r6,r6,3 /* address offset */ | ||
647 | add r6,r6,r9 | ||
648 | addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ | ||
649 | li r8,1 | ||
650 | sld r8,r8,r7 | ||
651 | ld r7,0(r6) | ||
652 | and. r7,r7,r8 | ||
653 | beq 22f | ||
654 | /* Flush the TLB of any entries for this LPID */ | ||
655 | lwz r0,KVM_TLB_SETS(r9) | ||
656 | mtctr r0 | ||
657 | li r7,0x800 /* IS field = 0b10 */ | ||
658 | ptesync | ||
659 | li r0,0 /* RS for P9 version of tlbiel */ | ||
660 | 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ | ||
661 | addi r7,r7,0x1000 | ||
662 | bdnz 28b | ||
663 | ptesync | ||
664 | 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ | ||
665 | andc r7,r7,r8 | ||
666 | stdcx. r7,0,r6 | ||
667 | bne 23b | ||
668 | 634 | ||
669 | /* Add timebase offset onto timebase */ | 635 | /* Add timebase offset onto timebase */ |
670 | 22: ld r8,VCORE_TB_OFFSET(r5) | 636 | 22: ld r8,VCORE_TB_OFFSET(r5) |
@@ -980,17 +946,27 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
980 | 946 | ||
981 | #ifdef CONFIG_KVM_XICS | 947 | #ifdef CONFIG_KVM_XICS |
982 | /* We are entering the guest on that thread, push VCPU to XIVE */ | 948 | /* We are entering the guest on that thread, push VCPU to XIVE */ |
983 | ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | ||
984 | cmpldi cr0, r10, 0 | ||
985 | beq no_xive | ||
986 | ld r11, VCPU_XIVE_SAVED_STATE(r4) | 949 | ld r11, VCPU_XIVE_SAVED_STATE(r4) |
987 | li r9, TM_QW1_OS | 950 | li r9, TM_QW1_OS |
951 | lwz r8, VCPU_XIVE_CAM_WORD(r4) | ||
952 | li r7, TM_QW1_OS + TM_WORD2 | ||
953 | mfmsr r0 | ||
954 | andi. r0, r0, MSR_DR /* in real mode? */ | ||
955 | beq 2f | ||
956 | ld r10, HSTATE_XIVE_TIMA_VIRT(r13) | ||
957 | cmpldi cr1, r10, 0 | ||
958 | beq cr1, no_xive | ||
959 | eieio | ||
960 | stdx r11,r9,r10 | ||
961 | stwx r8,r7,r10 | ||
962 | b 3f | ||
963 | 2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) | ||
964 | cmpldi cr1, r10, 0 | ||
965 | beq cr1, no_xive | ||
988 | eieio | 966 | eieio |
989 | stdcix r11,r9,r10 | 967 | stdcix r11,r9,r10 |
990 | lwz r11, VCPU_XIVE_CAM_WORD(r4) | 968 | stwcix r8,r7,r10 |
991 | li r9, TM_QW1_OS + TM_WORD2 | 969 | 3: li r9, 1 |
992 | stwcix r11,r9,r10 | ||
993 | li r9, 1 | ||
994 | stb r9, VCPU_XIVE_PUSHED(r4) | 970 | stb r9, VCPU_XIVE_PUSHED(r4) |
995 | eieio | 971 | eieio |
996 | 972 | ||
@@ -1009,12 +985,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
1009 | * on, we mask it. | 985 | * on, we mask it. |
1010 | */ | 986 | */ |
1011 | lbz r0, VCPU_XIVE_ESC_ON(r4) | 987 | lbz r0, VCPU_XIVE_ESC_ON(r4) |
1012 | cmpwi r0,0 | 988 | cmpwi cr1, r0,0 |
1013 | beq 1f | 989 | beq cr1, 1f |
1014 | ld r10, VCPU_XIVE_ESC_RADDR(r4) | ||
1015 | li r9, XIVE_ESB_SET_PQ_01 | 990 | li r9, XIVE_ESB_SET_PQ_01 |
991 | beq 4f /* in real mode? */ | ||
992 | ld r10, VCPU_XIVE_ESC_VADDR(r4) | ||
993 | ldx r0, r10, r9 | ||
994 | b 5f | ||
995 | 4: ld r10, VCPU_XIVE_ESC_RADDR(r4) | ||
1016 | ldcix r0, r10, r9 | 996 | ldcix r0, r10, r9 |
1017 | sync | 997 | 5: sync |
1018 | 998 | ||
1019 | /* We have a possible subtle race here: The escalation interrupt might | 999 | /* We have a possible subtle race here: The escalation interrupt might |
1020 | * have fired and be on its way to the host queue while we mask it, | 1000 | * have fired and be on its way to the host queue while we mask it, |
@@ -2292,7 +2272,7 @@ hcall_real_table: | |||
2292 | #endif | 2272 | #endif |
2293 | .long 0 /* 0x24 - H_SET_SPRG0 */ | 2273 | .long 0 /* 0x24 - H_SET_SPRG0 */ |
2294 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table | 2274 | .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table |
2295 | .long 0 /* 0x2c */ | 2275 | .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table |
2296 | .long 0 /* 0x30 */ | 2276 | .long 0 /* 0x30 */ |
2297 | .long 0 /* 0x34 */ | 2277 | .long 0 /* 0x34 */ |
2298 | .long 0 /* 0x38 */ | 2278 | .long 0 /* 0x38 */ |
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index f78d002f0fe0..4953957333b7 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c | |||
@@ -166,7 +166,8 @@ static irqreturn_t xive_esc_irq(int irq, void *data) | |||
166 | return IRQ_HANDLED; | 166 | return IRQ_HANDLED; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | 169 | int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, |
170 | bool single_escalation) | ||
170 | { | 171 | { |
171 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 172 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
172 | struct xive_q *q = &xc->queues[prio]; | 173 | struct xive_q *q = &xc->queues[prio]; |
@@ -185,7 +186,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
185 | return -EIO; | 186 | return -EIO; |
186 | } | 187 | } |
187 | 188 | ||
188 | if (xc->xive->single_escalation) | 189 | if (single_escalation) |
189 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d", | 190 | name = kasprintf(GFP_KERNEL, "kvm-%d-%d", |
190 | vcpu->kvm->arch.lpid, xc->server_num); | 191 | vcpu->kvm->arch.lpid, xc->server_num); |
191 | else | 192 | else |
@@ -217,7 +218,7 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio) | |||
217 | * interrupt, thus leaving it effectively masked after | 218 | * interrupt, thus leaving it effectively masked after |
218 | * it fires once. | 219 | * it fires once. |
219 | */ | 220 | */ |
220 | if (xc->xive->single_escalation) { | 221 | if (single_escalation) { |
221 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); | 222 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]); |
222 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | 223 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); |
223 | 224 | ||
@@ -291,7 +292,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio) | |||
291 | continue; | 292 | continue; |
292 | rc = xive_provision_queue(vcpu, prio); | 293 | rc = xive_provision_queue(vcpu, prio); |
293 | if (rc == 0 && !xive->single_escalation) | 294 | if (rc == 0 && !xive->single_escalation) |
294 | xive_attach_escalation(vcpu, prio); | 295 | kvmppc_xive_attach_escalation(vcpu, prio, |
296 | xive->single_escalation); | ||
295 | if (rc) | 297 | if (rc) |
296 | return rc; | 298 | return rc; |
297 | } | 299 | } |
@@ -342,7 +344,7 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio) | |||
342 | return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; | 344 | return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY; |
343 | } | 345 | } |
344 | 346 | ||
345 | static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) | 347 | int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) |
346 | { | 348 | { |
347 | struct kvm_vcpu *vcpu; | 349 | struct kvm_vcpu *vcpu; |
348 | int i, rc; | 350 | int i, rc; |
@@ -380,11 +382,6 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio) | |||
380 | return -EBUSY; | 382 | return -EBUSY; |
381 | } | 383 | } |
382 | 384 | ||
383 | static u32 xive_vp(struct kvmppc_xive *xive, u32 server) | ||
384 | { | ||
385 | return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); | ||
386 | } | ||
387 | |||
388 | static u8 xive_lock_and_mask(struct kvmppc_xive *xive, | 385 | static u8 xive_lock_and_mask(struct kvmppc_xive *xive, |
389 | struct kvmppc_xive_src_block *sb, | 386 | struct kvmppc_xive_src_block *sb, |
390 | struct kvmppc_xive_irq_state *state) | 387 | struct kvmppc_xive_irq_state *state) |
@@ -430,8 +427,8 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive, | |||
430 | */ | 427 | */ |
431 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { | 428 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { |
432 | xive_native_configure_irq(hw_num, | 429 | xive_native_configure_irq(hw_num, |
433 | xive_vp(xive, state->act_server), | 430 | kvmppc_xive_vp(xive, state->act_server), |
434 | MASKED, state->number); | 431 | MASKED, state->number); |
435 | /* set old_p so we can track if an H_EOI was done */ | 432 | /* set old_p so we can track if an H_EOI was done */ |
436 | state->old_p = true; | 433 | state->old_p = true; |
437 | state->old_q = false; | 434 | state->old_q = false; |
@@ -486,8 +483,8 @@ static void xive_finish_unmask(struct kvmppc_xive *xive, | |||
486 | */ | 483 | */ |
487 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { | 484 | if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) { |
488 | xive_native_configure_irq(hw_num, | 485 | xive_native_configure_irq(hw_num, |
489 | xive_vp(xive, state->act_server), | 486 | kvmppc_xive_vp(xive, state->act_server), |
490 | state->act_priority, state->number); | 487 | state->act_priority, state->number); |
491 | /* If an EOI is needed, do it here */ | 488 | /* If an EOI is needed, do it here */ |
492 | if (!state->old_p) | 489 | if (!state->old_p) |
493 | xive_vm_source_eoi(hw_num, xd); | 490 | xive_vm_source_eoi(hw_num, xd); |
@@ -535,7 +532,7 @@ static int xive_target_interrupt(struct kvm *kvm, | |||
535 | * priority. The count for that new target will have | 532 | * priority. The count for that new target will have |
536 | * already been incremented. | 533 | * already been incremented. |
537 | */ | 534 | */ |
538 | rc = xive_select_target(kvm, &server, prio); | 535 | rc = kvmppc_xive_select_target(kvm, &server, prio); |
539 | 536 | ||
540 | /* | 537 | /* |
541 | * We failed to find a target ? Not much we can do | 538 | * We failed to find a target ? Not much we can do |
@@ -563,7 +560,7 @@ static int xive_target_interrupt(struct kvm *kvm, | |||
563 | kvmppc_xive_select_irq(state, &hw_num, NULL); | 560 | kvmppc_xive_select_irq(state, &hw_num, NULL); |
564 | 561 | ||
565 | return xive_native_configure_irq(hw_num, | 562 | return xive_native_configure_irq(hw_num, |
566 | xive_vp(xive, server), | 563 | kvmppc_xive_vp(xive, server), |
567 | prio, state->number); | 564 | prio, state->number); |
568 | } | 565 | } |
569 | 566 | ||
@@ -849,7 +846,8 @@ int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) | |||
849 | 846 | ||
850 | /* | 847 | /* |
851 | * We can't update the state of a "pushed" VCPU, but that | 848 | * We can't update the state of a "pushed" VCPU, but that |
852 | * shouldn't happen. | 849 | * shouldn't happen because the vcpu->mutex makes running a |
850 | * vcpu mutually exclusive with doing one_reg get/set on it. | ||
853 | */ | 851 | */ |
854 | if (WARN_ON(vcpu->arch.xive_pushed)) | 852 | if (WARN_ON(vcpu->arch.xive_pushed)) |
855 | return -EIO; | 853 | return -EIO; |
@@ -940,6 +938,13 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
940 | /* Turn the IPI hard off */ | 938 | /* Turn the IPI hard off */ |
941 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | 939 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); |
942 | 940 | ||
941 | /* | ||
942 | * Reset ESB guest mapping. Needed when ESB pages are exposed | ||
943 | * to the guest in XIVE native mode | ||
944 | */ | ||
945 | if (xive->ops && xive->ops->reset_mapped) | ||
946 | xive->ops->reset_mapped(kvm, guest_irq); | ||
947 | |||
943 | /* Grab info about irq */ | 948 | /* Grab info about irq */ |
944 | state->pt_number = hw_irq; | 949 | state->pt_number = hw_irq; |
945 | state->pt_data = irq_data_get_irq_handler_data(host_data); | 950 | state->pt_data = irq_data_get_irq_handler_data(host_data); |
@@ -951,7 +956,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
951 | * which is fine for a never started interrupt. | 956 | * which is fine for a never started interrupt. |
952 | */ | 957 | */ |
953 | xive_native_configure_irq(hw_irq, | 958 | xive_native_configure_irq(hw_irq, |
954 | xive_vp(xive, state->act_server), | 959 | kvmppc_xive_vp(xive, state->act_server), |
955 | state->act_priority, state->number); | 960 | state->act_priority, state->number); |
956 | 961 | ||
957 | /* | 962 | /* |
@@ -1025,9 +1030,17 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
1025 | state->pt_number = 0; | 1030 | state->pt_number = 0; |
1026 | state->pt_data = NULL; | 1031 | state->pt_data = NULL; |
1027 | 1032 | ||
1033 | /* | ||
1034 | * Reset ESB guest mapping. Needed when ESB pages are exposed | ||
1035 | * to the guest in XIVE native mode | ||
1036 | */ | ||
1037 | if (xive->ops && xive->ops->reset_mapped) { | ||
1038 | xive->ops->reset_mapped(kvm, guest_irq); | ||
1039 | } | ||
1040 | |||
1028 | /* Reconfigure the IPI */ | 1041 | /* Reconfigure the IPI */ |
1029 | xive_native_configure_irq(state->ipi_number, | 1042 | xive_native_configure_irq(state->ipi_number, |
1030 | xive_vp(xive, state->act_server), | 1043 | kvmppc_xive_vp(xive, state->act_server), |
1031 | state->act_priority, state->number); | 1044 | state->act_priority, state->number); |
1032 | 1045 | ||
1033 | /* | 1046 | /* |
@@ -1049,7 +1062,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, | |||
1049 | } | 1062 | } |
1050 | EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); | 1063 | EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); |
1051 | 1064 | ||
1052 | static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) | 1065 | void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) |
1053 | { | 1066 | { |
1054 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 1067 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1055 | struct kvm *kvm = vcpu->kvm; | 1068 | struct kvm *kvm = vcpu->kvm; |
@@ -1083,14 +1096,35 @@ static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) | |||
1083 | arch_spin_unlock(&sb->lock); | 1096 | arch_spin_unlock(&sb->lock); |
1084 | } | 1097 | } |
1085 | } | 1098 | } |
1099 | |||
1100 | /* Disable vcpu's escalation interrupt */ | ||
1101 | if (vcpu->arch.xive_esc_on) { | ||
1102 | __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + | ||
1103 | XIVE_ESB_SET_PQ_01)); | ||
1104 | vcpu->arch.xive_esc_on = false; | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | * Clear pointers to escalation interrupt ESB. | ||
1109 | * This is safe because the vcpu->mutex is held, preventing | ||
1110 | * any other CPU from concurrently executing a KVM_RUN ioctl. | ||
1111 | */ | ||
1112 | vcpu->arch.xive_esc_vaddr = 0; | ||
1113 | vcpu->arch.xive_esc_raddr = 0; | ||
1086 | } | 1114 | } |
1087 | 1115 | ||
1088 | void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) | 1116 | void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) |
1089 | { | 1117 | { |
1090 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 1118 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1091 | struct kvmppc_xive *xive = xc->xive; | 1119 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
1092 | int i; | 1120 | int i; |
1093 | 1121 | ||
1122 | if (!kvmppc_xics_enabled(vcpu)) | ||
1123 | return; | ||
1124 | |||
1125 | if (!xc) | ||
1126 | return; | ||
1127 | |||
1094 | pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); | 1128 | pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); |
1095 | 1129 | ||
1096 | /* Ensure no interrupt is still routed to that VP */ | 1130 | /* Ensure no interrupt is still routed to that VP */ |
@@ -1129,6 +1163,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) | |||
1129 | } | 1163 | } |
1130 | /* Free the VP */ | 1164 | /* Free the VP */ |
1131 | kfree(xc); | 1165 | kfree(xc); |
1166 | |||
1167 | /* Cleanup the vcpu */ | ||
1168 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
1169 | vcpu->arch.xive_vcpu = NULL; | ||
1132 | } | 1170 | } |
1133 | 1171 | ||
1134 | int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | 1172 | int kvmppc_xive_connect_vcpu(struct kvm_device *dev, |
@@ -1146,7 +1184,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1146 | } | 1184 | } |
1147 | if (xive->kvm != vcpu->kvm) | 1185 | if (xive->kvm != vcpu->kvm) |
1148 | return -EPERM; | 1186 | return -EPERM; |
1149 | if (vcpu->arch.irq_type) | 1187 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) |
1150 | return -EBUSY; | 1188 | return -EBUSY; |
1151 | if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { | 1189 | if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { |
1152 | pr_devel("Duplicate !\n"); | 1190 | pr_devel("Duplicate !\n"); |
@@ -1166,7 +1204,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1166 | xc->xive = xive; | 1204 | xc->xive = xive; |
1167 | xc->vcpu = vcpu; | 1205 | xc->vcpu = vcpu; |
1168 | xc->server_num = cpu; | 1206 | xc->server_num = cpu; |
1169 | xc->vp_id = xive_vp(xive, cpu); | 1207 | xc->vp_id = kvmppc_xive_vp(xive, cpu); |
1170 | xc->mfrr = 0xff; | 1208 | xc->mfrr = 0xff; |
1171 | xc->valid = true; | 1209 | xc->valid = true; |
1172 | 1210 | ||
@@ -1219,7 +1257,8 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1219 | if (xive->qmap & (1 << i)) { | 1257 | if (xive->qmap & (1 << i)) { |
1220 | r = xive_provision_queue(vcpu, i); | 1258 | r = xive_provision_queue(vcpu, i); |
1221 | if (r == 0 && !xive->single_escalation) | 1259 | if (r == 0 && !xive->single_escalation) |
1222 | xive_attach_escalation(vcpu, i); | 1260 | kvmppc_xive_attach_escalation( |
1261 | vcpu, i, xive->single_escalation); | ||
1223 | if (r) | 1262 | if (r) |
1224 | goto bail; | 1263 | goto bail; |
1225 | } else { | 1264 | } else { |
@@ -1234,7 +1273,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, | |||
1234 | } | 1273 | } |
1235 | 1274 | ||
1236 | /* If not done above, attach priority 0 escalation */ | 1275 | /* If not done above, attach priority 0 escalation */ |
1237 | r = xive_attach_escalation(vcpu, 0); | 1276 | r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation); |
1238 | if (r) | 1277 | if (r) |
1239 | goto bail; | 1278 | goto bail; |
1240 | 1279 | ||
@@ -1485,8 +1524,8 @@ static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) | |||
1485 | return 0; | 1524 | return 0; |
1486 | } | 1525 | } |
1487 | 1526 | ||
1488 | static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive, | 1527 | struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( |
1489 | int irq) | 1528 | struct kvmppc_xive *xive, int irq) |
1490 | { | 1529 | { |
1491 | struct kvm *kvm = xive->kvm; | 1530 | struct kvm *kvm = xive->kvm; |
1492 | struct kvmppc_xive_src_block *sb; | 1531 | struct kvmppc_xive_src_block *sb; |
@@ -1509,6 +1548,7 @@ static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *x | |||
1509 | 1548 | ||
1510 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | 1549 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
1511 | sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; | 1550 | sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; |
1551 | sb->irq_state[i].eisn = 0; | ||
1512 | sb->irq_state[i].guest_priority = MASKED; | 1552 | sb->irq_state[i].guest_priority = MASKED; |
1513 | sb->irq_state[i].saved_priority = MASKED; | 1553 | sb->irq_state[i].saved_priority = MASKED; |
1514 | sb->irq_state[i].act_priority = MASKED; | 1554 | sb->irq_state[i].act_priority = MASKED; |
@@ -1565,7 +1605,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) | |||
1565 | sb = kvmppc_xive_find_source(xive, irq, &idx); | 1605 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1566 | if (!sb) { | 1606 | if (!sb) { |
1567 | pr_devel("No source, creating source block...\n"); | 1607 | pr_devel("No source, creating source block...\n"); |
1568 | sb = xive_create_src_block(xive, irq); | 1608 | sb = kvmppc_xive_create_src_block(xive, irq); |
1569 | if (!sb) { | 1609 | if (!sb) { |
1570 | pr_devel("Failed to create block...\n"); | 1610 | pr_devel("Failed to create block...\n"); |
1571 | return -ENOMEM; | 1611 | return -ENOMEM; |
@@ -1789,7 +1829,7 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) | |||
1789 | xive_cleanup_irq_data(xd); | 1829 | xive_cleanup_irq_data(xd); |
1790 | } | 1830 | } |
1791 | 1831 | ||
1792 | static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) | 1832 | void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) |
1793 | { | 1833 | { |
1794 | int i; | 1834 | int i; |
1795 | 1835 | ||
@@ -1810,16 +1850,55 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) | |||
1810 | } | 1850 | } |
1811 | } | 1851 | } |
1812 | 1852 | ||
1813 | static void kvmppc_xive_free(struct kvm_device *dev) | 1853 | /* |
1854 | * Called when device fd is closed. kvm->lock is held. | ||
1855 | */ | ||
1856 | static void kvmppc_xive_release(struct kvm_device *dev) | ||
1814 | { | 1857 | { |
1815 | struct kvmppc_xive *xive = dev->private; | 1858 | struct kvmppc_xive *xive = dev->private; |
1816 | struct kvm *kvm = xive->kvm; | 1859 | struct kvm *kvm = xive->kvm; |
1860 | struct kvm_vcpu *vcpu; | ||
1817 | int i; | 1861 | int i; |
1862 | int was_ready; | ||
1863 | |||
1864 | pr_devel("Releasing xive device\n"); | ||
1818 | 1865 | ||
1819 | debugfs_remove(xive->dentry); | 1866 | debugfs_remove(xive->dentry); |
1820 | 1867 | ||
1821 | if (kvm) | 1868 | /* |
1822 | kvm->arch.xive = NULL; | 1869 | * Clearing mmu_ready temporarily while holding kvm->lock |
1870 | * is a way of ensuring that no vcpus can enter the guest | ||
1871 | * until we drop kvm->lock. Doing kick_all_cpus_sync() | ||
1872 | * ensures that any vcpu executing inside the guest has | ||
1873 | * exited the guest. Once kick_all_cpus_sync() has finished, | ||
1874 | * we know that no vcpu can be executing the XIVE push or | ||
1875 | * pull code, or executing a XICS hcall. | ||
1876 | * | ||
1877 | * Since this is the device release function, we know that | ||
1878 | * userspace does not have any open fd referring to the | ||
1879 | * device. Therefore there can not be any of the device | ||
1880 | * attribute set/get functions being executed concurrently, | ||
1881 | * and similarly, the connect_vcpu and set/clr_mapped | ||
1882 | * functions also cannot be being executed. | ||
1883 | */ | ||
1884 | was_ready = kvm->arch.mmu_ready; | ||
1885 | kvm->arch.mmu_ready = 0; | ||
1886 | kick_all_cpus_sync(); | ||
1887 | |||
1888 | /* | ||
1889 | * We should clean up the vCPU interrupt presenters first. | ||
1890 | */ | ||
1891 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1892 | /* | ||
1893 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl | ||
1894 | * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. | ||
1895 | */ | ||
1896 | mutex_lock(&vcpu->mutex); | ||
1897 | kvmppc_xive_cleanup_vcpu(vcpu); | ||
1898 | mutex_unlock(&vcpu->mutex); | ||
1899 | } | ||
1900 | |||
1901 | kvm->arch.xive = NULL; | ||
1823 | 1902 | ||
1824 | /* Mask and free interrupts */ | 1903 | /* Mask and free interrupts */ |
1825 | for (i = 0; i <= xive->max_sbid; i++) { | 1904 | for (i = 0; i <= xive->max_sbid; i++) { |
@@ -1832,11 +1911,47 @@ static void kvmppc_xive_free(struct kvm_device *dev) | |||
1832 | if (xive->vp_base != XIVE_INVALID_VP) | 1911 | if (xive->vp_base != XIVE_INVALID_VP) |
1833 | xive_native_free_vp_block(xive->vp_base); | 1912 | xive_native_free_vp_block(xive->vp_base); |
1834 | 1913 | ||
1914 | kvm->arch.mmu_ready = was_ready; | ||
1915 | |||
1916 | /* | ||
1917 | * A reference of the kvmppc_xive pointer is now kept under | ||
1918 | * the xive_devices struct of the machine for reuse. It is | ||
1919 | * freed when the VM is destroyed for now until we fix all the | ||
1920 | * execution paths. | ||
1921 | */ | ||
1835 | 1922 | ||
1836 | kfree(xive); | ||
1837 | kfree(dev); | 1923 | kfree(dev); |
1838 | } | 1924 | } |
1839 | 1925 | ||
1926 | /* | ||
1927 | * When the guest chooses the interrupt mode (XICS legacy or XIVE | ||
1928 | * native), the VM will switch of KVM device. The previous device will | ||
1929 | * be "released" before the new one is created. | ||
1930 | * | ||
1931 | * Until we are sure all execution paths are well protected, provide a | ||
1932 | * fail safe (transitional) method for device destruction, in which | ||
1933 | * the XIVE device pointer is recycled and not directly freed. | ||
1934 | */ | ||
1935 | struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type) | ||
1936 | { | ||
1937 | struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ? | ||
1938 | &kvm->arch.xive_devices.native : | ||
1939 | &kvm->arch.xive_devices.xics_on_xive; | ||
1940 | struct kvmppc_xive *xive = *kvm_xive_device; | ||
1941 | |||
1942 | if (!xive) { | ||
1943 | xive = kzalloc(sizeof(*xive), GFP_KERNEL); | ||
1944 | *kvm_xive_device = xive; | ||
1945 | } else { | ||
1946 | memset(xive, 0, sizeof(*xive)); | ||
1947 | } | ||
1948 | |||
1949 | return xive; | ||
1950 | } | ||
1951 | |||
1952 | /* | ||
1953 | * Create a XICS device with XIVE backend. kvm->lock is held. | ||
1954 | */ | ||
1840 | static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | 1955 | static int kvmppc_xive_create(struct kvm_device *dev, u32 type) |
1841 | { | 1956 | { |
1842 | struct kvmppc_xive *xive; | 1957 | struct kvmppc_xive *xive; |
@@ -1845,7 +1960,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | |||
1845 | 1960 | ||
1846 | pr_devel("Creating xive for partition\n"); | 1961 | pr_devel("Creating xive for partition\n"); |
1847 | 1962 | ||
1848 | xive = kzalloc(sizeof(*xive), GFP_KERNEL); | 1963 | xive = kvmppc_xive_get_device(kvm, type); |
1849 | if (!xive) | 1964 | if (!xive) |
1850 | return -ENOMEM; | 1965 | return -ENOMEM; |
1851 | 1966 | ||
@@ -1883,6 +1998,43 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) | |||
1883 | return 0; | 1998 | return 0; |
1884 | } | 1999 | } |
1885 | 2000 | ||
2001 | int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) | ||
2002 | { | ||
2003 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
2004 | unsigned int i; | ||
2005 | |||
2006 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
2007 | struct xive_q *q = &xc->queues[i]; | ||
2008 | u32 i0, i1, idx; | ||
2009 | |||
2010 | if (!q->qpage && !xc->esc_virq[i]) | ||
2011 | continue; | ||
2012 | |||
2013 | seq_printf(m, " [q%d]: ", i); | ||
2014 | |||
2015 | if (q->qpage) { | ||
2016 | idx = q->idx; | ||
2017 | i0 = be32_to_cpup(q->qpage + idx); | ||
2018 | idx = (idx + 1) & q->msk; | ||
2019 | i1 = be32_to_cpup(q->qpage + idx); | ||
2020 | seq_printf(m, "T=%d %08x %08x...\n", q->toggle, | ||
2021 | i0, i1); | ||
2022 | } | ||
2023 | if (xc->esc_virq[i]) { | ||
2024 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); | ||
2025 | struct xive_irq_data *xd = | ||
2026 | irq_data_get_irq_handler_data(d); | ||
2027 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); | ||
2028 | |||
2029 | seq_printf(m, "E:%c%c I(%d:%llx:%llx)", | ||
2030 | (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', | ||
2031 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', | ||
2032 | xc->esc_virq[i], pq, xd->eoi_page); | ||
2033 | seq_puts(m, "\n"); | ||
2034 | } | ||
2035 | } | ||
2036 | return 0; | ||
2037 | } | ||
1886 | 2038 | ||
1887 | static int xive_debug_show(struct seq_file *m, void *private) | 2039 | static int xive_debug_show(struct seq_file *m, void *private) |
1888 | { | 2040 | { |
@@ -1908,7 +2060,6 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
1908 | 2060 | ||
1909 | kvm_for_each_vcpu(i, vcpu, kvm) { | 2061 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1910 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 2062 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1911 | unsigned int i; | ||
1912 | 2063 | ||
1913 | if (!xc) | 2064 | if (!xc) |
1914 | continue; | 2065 | continue; |
@@ -1918,33 +2069,8 @@ static int xive_debug_show(struct seq_file *m, void *private) | |||
1918 | xc->server_num, xc->cppr, xc->hw_cppr, | 2069 | xc->server_num, xc->cppr, xc->hw_cppr, |
1919 | xc->mfrr, xc->pending, | 2070 | xc->mfrr, xc->pending, |
1920 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); | 2071 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); |
1921 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
1922 | struct xive_q *q = &xc->queues[i]; | ||
1923 | u32 i0, i1, idx; | ||
1924 | |||
1925 | if (!q->qpage && !xc->esc_virq[i]) | ||
1926 | continue; | ||
1927 | 2072 | ||
1928 | seq_printf(m, " [q%d]: ", i); | 2073 | kvmppc_xive_debug_show_queues(m, vcpu); |
1929 | |||
1930 | if (q->qpage) { | ||
1931 | idx = q->idx; | ||
1932 | i0 = be32_to_cpup(q->qpage + idx); | ||
1933 | idx = (idx + 1) & q->msk; | ||
1934 | i1 = be32_to_cpup(q->qpage + idx); | ||
1935 | seq_printf(m, "T=%d %08x %08x... \n", q->toggle, i0, i1); | ||
1936 | } | ||
1937 | if (xc->esc_virq[i]) { | ||
1938 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); | ||
1939 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); | ||
1940 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); | ||
1941 | seq_printf(m, "E:%c%c I(%d:%llx:%llx)", | ||
1942 | (pq & XIVE_ESB_VAL_P) ? 'P' : 'p', | ||
1943 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q', | ||
1944 | xc->esc_virq[i], pq, xd->eoi_page); | ||
1945 | seq_printf(m, "\n"); | ||
1946 | } | ||
1947 | } | ||
1948 | 2074 | ||
1949 | t_rm_h_xirr += xc->stat_rm_h_xirr; | 2075 | t_rm_h_xirr += xc->stat_rm_h_xirr; |
1950 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; | 2076 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; |
@@ -1999,7 +2125,7 @@ struct kvm_device_ops kvm_xive_ops = { | |||
1999 | .name = "kvm-xive", | 2125 | .name = "kvm-xive", |
2000 | .create = kvmppc_xive_create, | 2126 | .create = kvmppc_xive_create, |
2001 | .init = kvmppc_xive_init, | 2127 | .init = kvmppc_xive_init, |
2002 | .destroy = kvmppc_xive_free, | 2128 | .release = kvmppc_xive_release, |
2003 | .set_attr = xive_set_attr, | 2129 | .set_attr = xive_set_attr, |
2004 | .get_attr = xive_get_attr, | 2130 | .get_attr = xive_get_attr, |
2005 | .has_attr = xive_has_attr, | 2131 | .has_attr = xive_has_attr, |
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index a08ae6fd4c51..426146332984 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h | |||
@@ -13,6 +13,13 @@ | |||
13 | #include "book3s_xics.h" | 13 | #include "book3s_xics.h" |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * The XIVE Interrupt source numbers are within the range 0 to | ||
17 | * KVMPPC_XICS_NR_IRQS. | ||
18 | */ | ||
19 | #define KVMPPC_XIVE_FIRST_IRQ 0 | ||
20 | #define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS | ||
21 | |||
22 | /* | ||
16 | * State for one guest irq source. | 23 | * State for one guest irq source. |
17 | * | 24 | * |
18 | * For each guest source we allocate a HW interrupt in the XIVE | 25 | * For each guest source we allocate a HW interrupt in the XIVE |
@@ -54,6 +61,9 @@ struct kvmppc_xive_irq_state { | |||
54 | bool saved_p; | 61 | bool saved_p; |
55 | bool saved_q; | 62 | bool saved_q; |
56 | u8 saved_scan_prio; | 63 | u8 saved_scan_prio; |
64 | |||
65 | /* Xive native */ | ||
66 | u32 eisn; /* Guest Effective IRQ number */ | ||
57 | }; | 67 | }; |
58 | 68 | ||
59 | /* Select the "right" interrupt (IPI vs. passthrough) */ | 69 | /* Select the "right" interrupt (IPI vs. passthrough) */ |
@@ -84,6 +94,11 @@ struct kvmppc_xive_src_block { | |||
84 | struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; | 94 | struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; |
85 | }; | 95 | }; |
86 | 96 | ||
97 | struct kvmppc_xive; | ||
98 | |||
99 | struct kvmppc_xive_ops { | ||
100 | int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq); | ||
101 | }; | ||
87 | 102 | ||
88 | struct kvmppc_xive { | 103 | struct kvmppc_xive { |
89 | struct kvm *kvm; | 104 | struct kvm *kvm; |
@@ -122,6 +137,10 @@ struct kvmppc_xive { | |||
122 | 137 | ||
123 | /* Flags */ | 138 | /* Flags */ |
124 | u8 single_escalation; | 139 | u8 single_escalation; |
140 | |||
141 | struct kvmppc_xive_ops *ops; | ||
142 | struct address_space *mapping; | ||
143 | struct mutex mapping_lock; | ||
125 | }; | 144 | }; |
126 | 145 | ||
127 | #define KVMPPC_XIVE_Q_COUNT 8 | 146 | #define KVMPPC_XIVE_Q_COUNT 8 |
@@ -198,6 +217,11 @@ static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmpp | |||
198 | return xive->src_blocks[bid]; | 217 | return xive->src_blocks[bid]; |
199 | } | 218 | } |
200 | 219 | ||
220 | static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) | ||
221 | { | ||
222 | return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); | ||
223 | } | ||
224 | |||
201 | /* | 225 | /* |
202 | * Mapping between guest priorities and host priorities | 226 | * Mapping between guest priorities and host priorities |
203 | * is as follow. | 227 | * is as follow. |
@@ -248,5 +272,18 @@ extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, | |||
248 | extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); | 272 | extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr); |
249 | extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); | 273 | extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr); |
250 | 274 | ||
275 | /* | ||
276 | * Common Xive routines for XICS-over-XIVE and XIVE native | ||
277 | */ | ||
278 | void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu); | ||
279 | int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu); | ||
280 | struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( | ||
281 | struct kvmppc_xive *xive, int irq); | ||
282 | void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb); | ||
283 | int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); | ||
284 | int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, | ||
285 | bool single_escalation); | ||
286 | struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); | ||
287 | |||
251 | #endif /* CONFIG_KVM_XICS */ | 288 | #endif /* CONFIG_KVM_XICS */ |
252 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ | 289 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ |
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c new file mode 100644 index 000000000000..6a8e698c4b6e --- /dev/null +++ b/arch/powerpc/kvm/book3s_xive_native.c | |||
@@ -0,0 +1,1249 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Copyright (c) 2017-2019, IBM Corporation. | ||
4 | */ | ||
5 | |||
6 | #define pr_fmt(fmt) "xive-kvm: " fmt | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/kvm_host.h> | ||
10 | #include <linux/err.h> | ||
11 | #include <linux/gfp.h> | ||
12 | #include <linux/spinlock.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <asm/uaccess.h> | ||
16 | #include <asm/kvm_book3s.h> | ||
17 | #include <asm/kvm_ppc.h> | ||
18 | #include <asm/hvcall.h> | ||
19 | #include <asm/xive.h> | ||
20 | #include <asm/xive-regs.h> | ||
21 | #include <asm/debug.h> | ||
22 | #include <asm/debugfs.h> | ||
23 | #include <asm/opal.h> | ||
24 | |||
25 | #include <linux/debugfs.h> | ||
26 | #include <linux/seq_file.h> | ||
27 | |||
28 | #include "book3s_xive.h" | ||
29 | |||
30 | static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) | ||
31 | { | ||
32 | u64 val; | ||
33 | |||
34 | if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) | ||
35 | offset |= offset << 4; | ||
36 | |||
37 | val = in_be64(xd->eoi_mmio + offset); | ||
38 | return (u8)val; | ||
39 | } | ||
40 | |||
41 | static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) | ||
42 | { | ||
43 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
44 | struct xive_q *q = &xc->queues[prio]; | ||
45 | |||
46 | xive_native_disable_queue(xc->vp_id, q, prio); | ||
47 | if (q->qpage) { | ||
48 | put_page(virt_to_page(q->qpage)); | ||
49 | q->qpage = NULL; | ||
50 | } | ||
51 | } | ||
52 | |||
53 | void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) | ||
54 | { | ||
55 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
56 | int i; | ||
57 | |||
58 | if (!kvmppc_xive_enabled(vcpu)) | ||
59 | return; | ||
60 | |||
61 | if (!xc) | ||
62 | return; | ||
63 | |||
64 | pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); | ||
65 | |||
66 | /* Ensure no interrupt is still routed to that VP */ | ||
67 | xc->valid = false; | ||
68 | kvmppc_xive_disable_vcpu_interrupts(vcpu); | ||
69 | |||
70 | /* Disable the VP */ | ||
71 | xive_native_disable_vp(xc->vp_id); | ||
72 | |||
73 | /* Free the queues & associated interrupts */ | ||
74 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { | ||
75 | /* Free the escalation irq */ | ||
76 | if (xc->esc_virq[i]) { | ||
77 | free_irq(xc->esc_virq[i], vcpu); | ||
78 | irq_dispose_mapping(xc->esc_virq[i]); | ||
79 | kfree(xc->esc_virq_names[i]); | ||
80 | xc->esc_virq[i] = 0; | ||
81 | } | ||
82 | |||
83 | /* Free the queue */ | ||
84 | kvmppc_xive_native_cleanup_queue(vcpu, i); | ||
85 | } | ||
86 | |||
87 | /* Free the VP */ | ||
88 | kfree(xc); | ||
89 | |||
90 | /* Cleanup the vcpu */ | ||
91 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
92 | vcpu->arch.xive_vcpu = NULL; | ||
93 | } | ||
94 | |||
95 | int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, | ||
96 | struct kvm_vcpu *vcpu, u32 server_num) | ||
97 | { | ||
98 | struct kvmppc_xive *xive = dev->private; | ||
99 | struct kvmppc_xive_vcpu *xc = NULL; | ||
100 | int rc; | ||
101 | |||
102 | pr_devel("native_connect_vcpu(server=%d)\n", server_num); | ||
103 | |||
104 | if (dev->ops != &kvm_xive_native_ops) { | ||
105 | pr_devel("Wrong ops !\n"); | ||
106 | return -EPERM; | ||
107 | } | ||
108 | if (xive->kvm != vcpu->kvm) | ||
109 | return -EPERM; | ||
110 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) | ||
111 | return -EBUSY; | ||
112 | if (server_num >= KVM_MAX_VCPUS) { | ||
113 | pr_devel("Out of bounds !\n"); | ||
114 | return -EINVAL; | ||
115 | } | ||
116 | |||
117 | mutex_lock(&vcpu->kvm->lock); | ||
118 | |||
119 | if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { | ||
120 | pr_devel("Duplicate !\n"); | ||
121 | rc = -EEXIST; | ||
122 | goto bail; | ||
123 | } | ||
124 | |||
125 | xc = kzalloc(sizeof(*xc), GFP_KERNEL); | ||
126 | if (!xc) { | ||
127 | rc = -ENOMEM; | ||
128 | goto bail; | ||
129 | } | ||
130 | |||
131 | vcpu->arch.xive_vcpu = xc; | ||
132 | xc->xive = xive; | ||
133 | xc->vcpu = vcpu; | ||
134 | xc->server_num = server_num; | ||
135 | |||
136 | xc->vp_id = kvmppc_xive_vp(xive, server_num); | ||
137 | xc->valid = true; | ||
138 | vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; | ||
139 | |||
140 | rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); | ||
141 | if (rc) { | ||
142 | pr_err("Failed to get VP info from OPAL: %d\n", rc); | ||
143 | goto bail; | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Enable the VP first as the single escalation mode will | ||
148 | * affect escalation interrupts numbering | ||
149 | */ | ||
150 | rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); | ||
151 | if (rc) { | ||
152 | pr_err("Failed to enable VP in OPAL: %d\n", rc); | ||
153 | goto bail; | ||
154 | } | ||
155 | |||
156 | /* Configure VCPU fields for use by assembly push/pull */ | ||
157 | vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); | ||
158 | vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); | ||
159 | |||
160 | /* TODO: reset all queues to a clean state ? */ | ||
161 | bail: | ||
162 | mutex_unlock(&vcpu->kvm->lock); | ||
163 | if (rc) | ||
164 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
165 | |||
166 | return rc; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Device passthrough support | ||
171 | */ | ||
172 | static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) | ||
173 | { | ||
174 | struct kvmppc_xive *xive = kvm->arch.xive; | ||
175 | |||
176 | if (irq >= KVMPPC_XIVE_NR_IRQS) | ||
177 | return -EINVAL; | ||
178 | |||
179 | /* | ||
180 | * Clear the ESB pages of the IRQ number being mapped (or | ||
181 | * unmapped) into the guest and let the the VM fault handler | ||
182 | * repopulate with the appropriate ESB pages (device or IC) | ||
183 | */ | ||
184 | pr_debug("clearing esb pages for girq 0x%lx\n", irq); | ||
185 | mutex_lock(&xive->mapping_lock); | ||
186 | if (xive->mapping) | ||
187 | unmap_mapping_range(xive->mapping, | ||
188 | irq * (2ull << PAGE_SHIFT), | ||
189 | 2ull << PAGE_SHIFT, 1); | ||
190 | mutex_unlock(&xive->mapping_lock); | ||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | static struct kvmppc_xive_ops kvmppc_xive_native_ops = { | ||
195 | .reset_mapped = kvmppc_xive_native_reset_mapped, | ||
196 | }; | ||
197 | |||
198 | static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) | ||
199 | { | ||
200 | struct vm_area_struct *vma = vmf->vma; | ||
201 | struct kvm_device *dev = vma->vm_file->private_data; | ||
202 | struct kvmppc_xive *xive = dev->private; | ||
203 | struct kvmppc_xive_src_block *sb; | ||
204 | struct kvmppc_xive_irq_state *state; | ||
205 | struct xive_irq_data *xd; | ||
206 | u32 hw_num; | ||
207 | u16 src; | ||
208 | u64 page; | ||
209 | unsigned long irq; | ||
210 | u64 page_offset; | ||
211 | |||
212 | /* | ||
213 | * Linux/KVM uses a two pages ESB setting, one for trigger and | ||
214 | * one for EOI | ||
215 | */ | ||
216 | page_offset = vmf->pgoff - vma->vm_pgoff; | ||
217 | irq = page_offset / 2; | ||
218 | |||
219 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
220 | if (!sb) { | ||
221 | pr_devel("%s: source %lx not found !\n", __func__, irq); | ||
222 | return VM_FAULT_SIGBUS; | ||
223 | } | ||
224 | |||
225 | state = &sb->irq_state[src]; | ||
226 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
227 | |||
228 | arch_spin_lock(&sb->lock); | ||
229 | |||
230 | /* | ||
231 | * first/even page is for trigger | ||
232 | * second/odd page is for EOI and management. | ||
233 | */ | ||
234 | page = page_offset % 2 ? xd->eoi_page : xd->trig_page; | ||
235 | arch_spin_unlock(&sb->lock); | ||
236 | |||
237 | if (WARN_ON(!page)) { | ||
238 | pr_err("%s: accessing invalid ESB page for source %lx !\n", | ||
239 | __func__, irq); | ||
240 | return VM_FAULT_SIGBUS; | ||
241 | } | ||
242 | |||
243 | vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); | ||
244 | return VM_FAULT_NOPAGE; | ||
245 | } | ||
246 | |||
247 | static const struct vm_operations_struct xive_native_esb_vmops = { | ||
248 | .fault = xive_native_esb_fault, | ||
249 | }; | ||
250 | |||
251 | static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) | ||
252 | { | ||
253 | struct vm_area_struct *vma = vmf->vma; | ||
254 | |||
255 | switch (vmf->pgoff - vma->vm_pgoff) { | ||
256 | case 0: /* HW - forbid access */ | ||
257 | case 1: /* HV - forbid access */ | ||
258 | return VM_FAULT_SIGBUS; | ||
259 | case 2: /* OS */ | ||
260 | vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); | ||
261 | return VM_FAULT_NOPAGE; | ||
262 | case 3: /* USER - TODO */ | ||
263 | default: | ||
264 | return VM_FAULT_SIGBUS; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static const struct vm_operations_struct xive_native_tima_vmops = { | ||
269 | .fault = xive_native_tima_fault, | ||
270 | }; | ||
271 | |||
272 | static int kvmppc_xive_native_mmap(struct kvm_device *dev, | ||
273 | struct vm_area_struct *vma) | ||
274 | { | ||
275 | struct kvmppc_xive *xive = dev->private; | ||
276 | |||
277 | /* We only allow mappings at fixed offset for now */ | ||
278 | if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { | ||
279 | if (vma_pages(vma) > 4) | ||
280 | return -EINVAL; | ||
281 | vma->vm_ops = &xive_native_tima_vmops; | ||
282 | } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { | ||
283 | if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) | ||
284 | return -EINVAL; | ||
285 | vma->vm_ops = &xive_native_esb_vmops; | ||
286 | } else { | ||
287 | return -EINVAL; | ||
288 | } | ||
289 | |||
290 | vma->vm_flags |= VM_IO | VM_PFNMAP; | ||
291 | vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); | ||
292 | |||
293 | /* | ||
294 | * Grab the KVM device file address_space to be able to clear | ||
295 | * the ESB pages mapping when a device is passed-through into | ||
296 | * the guest. | ||
297 | */ | ||
298 | xive->mapping = vma->vm_file->f_mapping; | ||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, | ||
303 | u64 addr) | ||
304 | { | ||
305 | struct kvmppc_xive_src_block *sb; | ||
306 | struct kvmppc_xive_irq_state *state; | ||
307 | u64 __user *ubufp = (u64 __user *) addr; | ||
308 | u64 val; | ||
309 | u16 idx; | ||
310 | int rc; | ||
311 | |||
312 | pr_devel("%s irq=0x%lx\n", __func__, irq); | ||
313 | |||
314 | if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) | ||
315 | return -E2BIG; | ||
316 | |||
317 | sb = kvmppc_xive_find_source(xive, irq, &idx); | ||
318 | if (!sb) { | ||
319 | pr_debug("No source, creating source block...\n"); | ||
320 | sb = kvmppc_xive_create_src_block(xive, irq); | ||
321 | if (!sb) { | ||
322 | pr_err("Failed to create block...\n"); | ||
323 | return -ENOMEM; | ||
324 | } | ||
325 | } | ||
326 | state = &sb->irq_state[idx]; | ||
327 | |||
328 | if (get_user(val, ubufp)) { | ||
329 | pr_err("fault getting user info !\n"); | ||
330 | return -EFAULT; | ||
331 | } | ||
332 | |||
333 | arch_spin_lock(&sb->lock); | ||
334 | |||
335 | /* | ||
336 | * If the source doesn't already have an IPI, allocate | ||
337 | * one and get the corresponding data | ||
338 | */ | ||
339 | if (!state->ipi_number) { | ||
340 | state->ipi_number = xive_native_alloc_irq(); | ||
341 | if (state->ipi_number == 0) { | ||
342 | pr_err("Failed to allocate IRQ !\n"); | ||
343 | rc = -ENXIO; | ||
344 | goto unlock; | ||
345 | } | ||
346 | xive_native_populate_irq_data(state->ipi_number, | ||
347 | &state->ipi_data); | ||
348 | pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, | ||
349 | state->ipi_number, irq); | ||
350 | } | ||
351 | |||
352 | /* Restore LSI state */ | ||
353 | if (val & KVM_XIVE_LEVEL_SENSITIVE) { | ||
354 | state->lsi = true; | ||
355 | if (val & KVM_XIVE_LEVEL_ASSERTED) | ||
356 | state->asserted = true; | ||
357 | pr_devel(" LSI ! Asserted=%d\n", state->asserted); | ||
358 | } | ||
359 | |||
360 | /* Mask IRQ to start with */ | ||
361 | state->act_server = 0; | ||
362 | state->act_priority = MASKED; | ||
363 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | ||
364 | xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); | ||
365 | |||
366 | /* Increment the number of valid sources and mark this one valid */ | ||
367 | if (!state->valid) | ||
368 | xive->src_count++; | ||
369 | state->valid = true; | ||
370 | |||
371 | rc = 0; | ||
372 | |||
373 | unlock: | ||
374 | arch_spin_unlock(&sb->lock); | ||
375 | |||
376 | return rc; | ||
377 | } | ||
378 | |||
379 | static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, | ||
380 | struct kvmppc_xive_src_block *sb, | ||
381 | struct kvmppc_xive_irq_state *state, | ||
382 | u32 server, u8 priority, bool masked, | ||
383 | u32 eisn) | ||
384 | { | ||
385 | struct kvm *kvm = xive->kvm; | ||
386 | u32 hw_num; | ||
387 | int rc = 0; | ||
388 | |||
389 | arch_spin_lock(&sb->lock); | ||
390 | |||
391 | if (state->act_server == server && state->act_priority == priority && | ||
392 | state->eisn == eisn) | ||
393 | goto unlock; | ||
394 | |||
395 | pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", | ||
396 | priority, server, masked, state->act_server, | ||
397 | state->act_priority); | ||
398 | |||
399 | kvmppc_xive_select_irq(state, &hw_num, NULL); | ||
400 | |||
401 | if (priority != MASKED && !masked) { | ||
402 | rc = kvmppc_xive_select_target(kvm, &server, priority); | ||
403 | if (rc) | ||
404 | goto unlock; | ||
405 | |||
406 | state->act_priority = priority; | ||
407 | state->act_server = server; | ||
408 | state->eisn = eisn; | ||
409 | |||
410 | rc = xive_native_configure_irq(hw_num, | ||
411 | kvmppc_xive_vp(xive, server), | ||
412 | priority, eisn); | ||
413 | } else { | ||
414 | state->act_priority = MASKED; | ||
415 | state->act_server = 0; | ||
416 | state->eisn = 0; | ||
417 | |||
418 | rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); | ||
419 | } | ||
420 | |||
421 | unlock: | ||
422 | arch_spin_unlock(&sb->lock); | ||
423 | return rc; | ||
424 | } | ||
425 | |||
426 | static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, | ||
427 | long irq, u64 addr) | ||
428 | { | ||
429 | struct kvmppc_xive_src_block *sb; | ||
430 | struct kvmppc_xive_irq_state *state; | ||
431 | u64 __user *ubufp = (u64 __user *) addr; | ||
432 | u16 src; | ||
433 | u64 kvm_cfg; | ||
434 | u32 server; | ||
435 | u8 priority; | ||
436 | bool masked; | ||
437 | u32 eisn; | ||
438 | |||
439 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
440 | if (!sb) | ||
441 | return -ENOENT; | ||
442 | |||
443 | state = &sb->irq_state[src]; | ||
444 | |||
445 | if (!state->valid) | ||
446 | return -EINVAL; | ||
447 | |||
448 | if (get_user(kvm_cfg, ubufp)) | ||
449 | return -EFAULT; | ||
450 | |||
451 | pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); | ||
452 | |||
453 | priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> | ||
454 | KVM_XIVE_SOURCE_PRIORITY_SHIFT; | ||
455 | server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> | ||
456 | KVM_XIVE_SOURCE_SERVER_SHIFT; | ||
457 | masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> | ||
458 | KVM_XIVE_SOURCE_MASKED_SHIFT; | ||
459 | eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> | ||
460 | KVM_XIVE_SOURCE_EISN_SHIFT; | ||
461 | |||
462 | if (priority != xive_prio_from_guest(priority)) { | ||
463 | pr_err("invalid priority for queue %d for VCPU %d\n", | ||
464 | priority, server); | ||
465 | return -EINVAL; | ||
466 | } | ||
467 | |||
468 | return kvmppc_xive_native_update_source_config(xive, sb, state, server, | ||
469 | priority, masked, eisn); | ||
470 | } | ||
471 | |||
472 | static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, | ||
473 | long irq, u64 addr) | ||
474 | { | ||
475 | struct kvmppc_xive_src_block *sb; | ||
476 | struct kvmppc_xive_irq_state *state; | ||
477 | struct xive_irq_data *xd; | ||
478 | u32 hw_num; | ||
479 | u16 src; | ||
480 | int rc = 0; | ||
481 | |||
482 | pr_devel("%s irq=0x%lx", __func__, irq); | ||
483 | |||
484 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
485 | if (!sb) | ||
486 | return -ENOENT; | ||
487 | |||
488 | state = &sb->irq_state[src]; | ||
489 | |||
490 | rc = -EINVAL; | ||
491 | |||
492 | arch_spin_lock(&sb->lock); | ||
493 | |||
494 | if (state->valid) { | ||
495 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
496 | xive_native_sync_source(hw_num); | ||
497 | rc = 0; | ||
498 | } | ||
499 | |||
500 | arch_spin_unlock(&sb->lock); | ||
501 | return rc; | ||
502 | } | ||
503 | |||
504 | static int xive_native_validate_queue_size(u32 qshift) | ||
505 | { | ||
506 | /* | ||
507 | * We only support 64K pages for the moment. This is also | ||
508 | * advertised in the DT property "ibm,xive-eq-sizes" | ||
509 | */ | ||
510 | switch (qshift) { | ||
511 | case 0: /* EQ reset */ | ||
512 | case 16: | ||
513 | return 0; | ||
514 | case 12: | ||
515 | case 21: | ||
516 | case 24: | ||
517 | default: | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | } | ||
521 | |||
522 | static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, | ||
523 | long eq_idx, u64 addr) | ||
524 | { | ||
525 | struct kvm *kvm = xive->kvm; | ||
526 | struct kvm_vcpu *vcpu; | ||
527 | struct kvmppc_xive_vcpu *xc; | ||
528 | void __user *ubufp = (void __user *) addr; | ||
529 | u32 server; | ||
530 | u8 priority; | ||
531 | struct kvm_ppc_xive_eq kvm_eq; | ||
532 | int rc; | ||
533 | __be32 *qaddr = 0; | ||
534 | struct page *page; | ||
535 | struct xive_q *q; | ||
536 | gfn_t gfn; | ||
537 | unsigned long page_size; | ||
538 | |||
539 | /* | ||
540 | * Demangle priority/server tuple from the EQ identifier | ||
541 | */ | ||
542 | priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> | ||
543 | KVM_XIVE_EQ_PRIORITY_SHIFT; | ||
544 | server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> | ||
545 | KVM_XIVE_EQ_SERVER_SHIFT; | ||
546 | |||
547 | if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) | ||
548 | return -EFAULT; | ||
549 | |||
550 | vcpu = kvmppc_xive_find_server(kvm, server); | ||
551 | if (!vcpu) { | ||
552 | pr_err("Can't find server %d\n", server); | ||
553 | return -ENOENT; | ||
554 | } | ||
555 | xc = vcpu->arch.xive_vcpu; | ||
556 | |||
557 | if (priority != xive_prio_from_guest(priority)) { | ||
558 | pr_err("Trying to restore invalid queue %d for VCPU %d\n", | ||
559 | priority, server); | ||
560 | return -EINVAL; | ||
561 | } | ||
562 | q = &xc->queues[priority]; | ||
563 | |||
564 | pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", | ||
565 | __func__, server, priority, kvm_eq.flags, | ||
566 | kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); | ||
567 | |||
568 | /* | ||
569 | * sPAPR specifies a "Unconditional Notify (n) flag" for the | ||
570 | * H_INT_SET_QUEUE_CONFIG hcall which forces notification | ||
571 | * without using the coalescing mechanisms provided by the | ||
572 | * XIVE END ESBs. This is required on KVM as notification | ||
573 | * using the END ESBs is not supported. | ||
574 | */ | ||
575 | if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { | ||
576 | pr_err("invalid flags %d\n", kvm_eq.flags); | ||
577 | return -EINVAL; | ||
578 | } | ||
579 | |||
580 | rc = xive_native_validate_queue_size(kvm_eq.qshift); | ||
581 | if (rc) { | ||
582 | pr_err("invalid queue size %d\n", kvm_eq.qshift); | ||
583 | return rc; | ||
584 | } | ||
585 | |||
586 | /* reset queue and disable queueing */ | ||
587 | if (!kvm_eq.qshift) { | ||
588 | q->guest_qaddr = 0; | ||
589 | q->guest_qshift = 0; | ||
590 | |||
591 | rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||
592 | NULL, 0, true); | ||
593 | if (rc) { | ||
594 | pr_err("Failed to reset queue %d for VCPU %d: %d\n", | ||
595 | priority, xc->server_num, rc); | ||
596 | return rc; | ||
597 | } | ||
598 | |||
599 | if (q->qpage) { | ||
600 | put_page(virt_to_page(q->qpage)); | ||
601 | q->qpage = NULL; | ||
602 | } | ||
603 | |||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { | ||
608 | pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, | ||
609 | 1ull << kvm_eq.qshift); | ||
610 | return -EINVAL; | ||
611 | } | ||
612 | |||
613 | gfn = gpa_to_gfn(kvm_eq.qaddr); | ||
614 | page = gfn_to_page(kvm, gfn); | ||
615 | if (is_error_page(page)) { | ||
616 | pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); | ||
617 | return -EINVAL; | ||
618 | } | ||
619 | |||
620 | page_size = kvm_host_page_size(kvm, gfn); | ||
621 | if (1ull << kvm_eq.qshift > page_size) { | ||
622 | pr_warn("Incompatible host page size %lx!\n", page_size); | ||
623 | return -EINVAL; | ||
624 | } | ||
625 | |||
626 | qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); | ||
627 | |||
628 | /* | ||
629 | * Backup the queue page guest address to the mark EQ page | ||
630 | * dirty for migration. | ||
631 | */ | ||
632 | q->guest_qaddr = kvm_eq.qaddr; | ||
633 | q->guest_qshift = kvm_eq.qshift; | ||
634 | |||
635 | /* | ||
636 | * Unconditional Notification is forced by default at the | ||
637 | * OPAL level because the use of END ESBs is not supported by | ||
638 | * Linux. | ||
639 | */ | ||
640 | rc = xive_native_configure_queue(xc->vp_id, q, priority, | ||
641 | (__be32 *) qaddr, kvm_eq.qshift, true); | ||
642 | if (rc) { | ||
643 | pr_err("Failed to configure queue %d for VCPU %d: %d\n", | ||
644 | priority, xc->server_num, rc); | ||
645 | put_page(page); | ||
646 | return rc; | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Only restore the queue state when needed. When doing the | ||
651 | * H_INT_SET_SOURCE_CONFIG hcall, it should not. | ||
652 | */ | ||
653 | if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { | ||
654 | rc = xive_native_set_queue_state(xc->vp_id, priority, | ||
655 | kvm_eq.qtoggle, | ||
656 | kvm_eq.qindex); | ||
657 | if (rc) | ||
658 | goto error; | ||
659 | } | ||
660 | |||
661 | rc = kvmppc_xive_attach_escalation(vcpu, priority, | ||
662 | xive->single_escalation); | ||
663 | error: | ||
664 | if (rc) | ||
665 | kvmppc_xive_native_cleanup_queue(vcpu, priority); | ||
666 | return rc; | ||
667 | } | ||
668 | |||
669 | static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, | ||
670 | long eq_idx, u64 addr) | ||
671 | { | ||
672 | struct kvm *kvm = xive->kvm; | ||
673 | struct kvm_vcpu *vcpu; | ||
674 | struct kvmppc_xive_vcpu *xc; | ||
675 | struct xive_q *q; | ||
676 | void __user *ubufp = (u64 __user *) addr; | ||
677 | u32 server; | ||
678 | u8 priority; | ||
679 | struct kvm_ppc_xive_eq kvm_eq; | ||
680 | u64 qaddr; | ||
681 | u64 qshift; | ||
682 | u64 qeoi_page; | ||
683 | u32 escalate_irq; | ||
684 | u64 qflags; | ||
685 | int rc; | ||
686 | |||
687 | /* | ||
688 | * Demangle priority/server tuple from the EQ identifier | ||
689 | */ | ||
690 | priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> | ||
691 | KVM_XIVE_EQ_PRIORITY_SHIFT; | ||
692 | server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> | ||
693 | KVM_XIVE_EQ_SERVER_SHIFT; | ||
694 | |||
695 | vcpu = kvmppc_xive_find_server(kvm, server); | ||
696 | if (!vcpu) { | ||
697 | pr_err("Can't find server %d\n", server); | ||
698 | return -ENOENT; | ||
699 | } | ||
700 | xc = vcpu->arch.xive_vcpu; | ||
701 | |||
702 | if (priority != xive_prio_from_guest(priority)) { | ||
703 | pr_err("invalid priority for queue %d for VCPU %d\n", | ||
704 | priority, server); | ||
705 | return -EINVAL; | ||
706 | } | ||
707 | q = &xc->queues[priority]; | ||
708 | |||
709 | memset(&kvm_eq, 0, sizeof(kvm_eq)); | ||
710 | |||
711 | if (!q->qpage) | ||
712 | return 0; | ||
713 | |||
714 | rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, | ||
715 | &qeoi_page, &escalate_irq, &qflags); | ||
716 | if (rc) | ||
717 | return rc; | ||
718 | |||
719 | kvm_eq.flags = 0; | ||
720 | if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) | ||
721 | kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; | ||
722 | |||
723 | kvm_eq.qshift = q->guest_qshift; | ||
724 | kvm_eq.qaddr = q->guest_qaddr; | ||
725 | |||
726 | rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, | ||
727 | &kvm_eq.qindex); | ||
728 | if (rc) | ||
729 | return rc; | ||
730 | |||
731 | pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", | ||
732 | __func__, server, priority, kvm_eq.flags, | ||
733 | kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); | ||
734 | |||
735 | if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) | ||
736 | return -EFAULT; | ||
737 | |||
738 | return 0; | ||
739 | } | ||
740 | |||
741 | static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) | ||
742 | { | ||
743 | int i; | ||
744 | |||
745 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
746 | struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; | ||
747 | |||
748 | if (!state->valid) | ||
749 | continue; | ||
750 | |||
751 | if (state->act_priority == MASKED) | ||
752 | continue; | ||
753 | |||
754 | state->eisn = 0; | ||
755 | state->act_server = 0; | ||
756 | state->act_priority = MASKED; | ||
757 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); | ||
758 | xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); | ||
759 | if (state->pt_number) { | ||
760 | xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); | ||
761 | xive_native_configure_irq(state->pt_number, | ||
762 | 0, MASKED, 0); | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | static int kvmppc_xive_reset(struct kvmppc_xive *xive) | ||
768 | { | ||
769 | struct kvm *kvm = xive->kvm; | ||
770 | struct kvm_vcpu *vcpu; | ||
771 | unsigned int i; | ||
772 | |||
773 | pr_devel("%s\n", __func__); | ||
774 | |||
775 | mutex_lock(&kvm->lock); | ||
776 | |||
777 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
778 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
779 | unsigned int prio; | ||
780 | |||
781 | if (!xc) | ||
782 | continue; | ||
783 | |||
784 | kvmppc_xive_disable_vcpu_interrupts(vcpu); | ||
785 | |||
786 | for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { | ||
787 | |||
788 | /* Single escalation, no queue 7 */ | ||
789 | if (prio == 7 && xive->single_escalation) | ||
790 | break; | ||
791 | |||
792 | if (xc->esc_virq[prio]) { | ||
793 | free_irq(xc->esc_virq[prio], vcpu); | ||
794 | irq_dispose_mapping(xc->esc_virq[prio]); | ||
795 | kfree(xc->esc_virq_names[prio]); | ||
796 | xc->esc_virq[prio] = 0; | ||
797 | } | ||
798 | |||
799 | kvmppc_xive_native_cleanup_queue(vcpu, prio); | ||
800 | } | ||
801 | } | ||
802 | |||
803 | for (i = 0; i <= xive->max_sbid; i++) { | ||
804 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; | ||
805 | |||
806 | if (sb) { | ||
807 | arch_spin_lock(&sb->lock); | ||
808 | kvmppc_xive_reset_sources(sb); | ||
809 | arch_spin_unlock(&sb->lock); | ||
810 | } | ||
811 | } | ||
812 | |||
813 | mutex_unlock(&kvm->lock); | ||
814 | |||
815 | return 0; | ||
816 | } | ||
817 | |||
818 | static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) | ||
819 | { | ||
820 | int j; | ||
821 | |||
822 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { | ||
823 | struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; | ||
824 | struct xive_irq_data *xd; | ||
825 | u32 hw_num; | ||
826 | |||
827 | if (!state->valid) | ||
828 | continue; | ||
829 | |||
830 | /* | ||
831 | * The struct kvmppc_xive_irq_state reflects the state | ||
832 | * of the EAS configuration and not the state of the | ||
833 | * source. The source is masked setting the PQ bits to | ||
834 | * '-Q', which is what is being done before calling | ||
835 | * the KVM_DEV_XIVE_EQ_SYNC control. | ||
836 | * | ||
837 | * If a source EAS is configured, OPAL syncs the XIVE | ||
838 | * IC of the source and the XIVE IC of the previous | ||
839 | * target if any. | ||
840 | * | ||
841 | * So it should be fine ignoring MASKED sources as | ||
842 | * they have been synced already. | ||
843 | */ | ||
844 | if (state->act_priority == MASKED) | ||
845 | continue; | ||
846 | |||
847 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
848 | xive_native_sync_source(hw_num); | ||
849 | xive_native_sync_queue(hw_num); | ||
850 | } | ||
851 | } | ||
852 | |||
853 | static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) | ||
854 | { | ||
855 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
856 | unsigned int prio; | ||
857 | |||
858 | if (!xc) | ||
859 | return -ENOENT; | ||
860 | |||
861 | for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { | ||
862 | struct xive_q *q = &xc->queues[prio]; | ||
863 | |||
864 | if (!q->qpage) | ||
865 | continue; | ||
866 | |||
867 | /* Mark EQ page dirty for migration */ | ||
868 | mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); | ||
869 | } | ||
870 | return 0; | ||
871 | } | ||
872 | |||
873 | static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) | ||
874 | { | ||
875 | struct kvm *kvm = xive->kvm; | ||
876 | struct kvm_vcpu *vcpu; | ||
877 | unsigned int i; | ||
878 | |||
879 | pr_devel("%s\n", __func__); | ||
880 | |||
881 | mutex_lock(&kvm->lock); | ||
882 | for (i = 0; i <= xive->max_sbid; i++) { | ||
883 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; | ||
884 | |||
885 | if (sb) { | ||
886 | arch_spin_lock(&sb->lock); | ||
887 | kvmppc_xive_native_sync_sources(sb); | ||
888 | arch_spin_unlock(&sb->lock); | ||
889 | } | ||
890 | } | ||
891 | |||
892 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
893 | kvmppc_xive_native_vcpu_eq_sync(vcpu); | ||
894 | } | ||
895 | mutex_unlock(&kvm->lock); | ||
896 | |||
897 | return 0; | ||
898 | } | ||
899 | |||
900 | static int kvmppc_xive_native_set_attr(struct kvm_device *dev, | ||
901 | struct kvm_device_attr *attr) | ||
902 | { | ||
903 | struct kvmppc_xive *xive = dev->private; | ||
904 | |||
905 | switch (attr->group) { | ||
906 | case KVM_DEV_XIVE_GRP_CTRL: | ||
907 | switch (attr->attr) { | ||
908 | case KVM_DEV_XIVE_RESET: | ||
909 | return kvmppc_xive_reset(xive); | ||
910 | case KVM_DEV_XIVE_EQ_SYNC: | ||
911 | return kvmppc_xive_native_eq_sync(xive); | ||
912 | } | ||
913 | break; | ||
914 | case KVM_DEV_XIVE_GRP_SOURCE: | ||
915 | return kvmppc_xive_native_set_source(xive, attr->attr, | ||
916 | attr->addr); | ||
917 | case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: | ||
918 | return kvmppc_xive_native_set_source_config(xive, attr->attr, | ||
919 | attr->addr); | ||
920 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
921 | return kvmppc_xive_native_set_queue_config(xive, attr->attr, | ||
922 | attr->addr); | ||
923 | case KVM_DEV_XIVE_GRP_SOURCE_SYNC: | ||
924 | return kvmppc_xive_native_sync_source(xive, attr->attr, | ||
925 | attr->addr); | ||
926 | } | ||
927 | return -ENXIO; | ||
928 | } | ||
929 | |||
930 | static int kvmppc_xive_native_get_attr(struct kvm_device *dev, | ||
931 | struct kvm_device_attr *attr) | ||
932 | { | ||
933 | struct kvmppc_xive *xive = dev->private; | ||
934 | |||
935 | switch (attr->group) { | ||
936 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
937 | return kvmppc_xive_native_get_queue_config(xive, attr->attr, | ||
938 | attr->addr); | ||
939 | } | ||
940 | return -ENXIO; | ||
941 | } | ||
942 | |||
943 | static int kvmppc_xive_native_has_attr(struct kvm_device *dev, | ||
944 | struct kvm_device_attr *attr) | ||
945 | { | ||
946 | switch (attr->group) { | ||
947 | case KVM_DEV_XIVE_GRP_CTRL: | ||
948 | switch (attr->attr) { | ||
949 | case KVM_DEV_XIVE_RESET: | ||
950 | case KVM_DEV_XIVE_EQ_SYNC: | ||
951 | return 0; | ||
952 | } | ||
953 | break; | ||
954 | case KVM_DEV_XIVE_GRP_SOURCE: | ||
955 | case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: | ||
956 | case KVM_DEV_XIVE_GRP_SOURCE_SYNC: | ||
957 | if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && | ||
958 | attr->attr < KVMPPC_XIVE_NR_IRQS) | ||
959 | return 0; | ||
960 | break; | ||
961 | case KVM_DEV_XIVE_GRP_EQ_CONFIG: | ||
962 | return 0; | ||
963 | } | ||
964 | return -ENXIO; | ||
965 | } | ||
966 | |||
967 | /* | ||
968 | * Called when device fd is closed | ||
969 | */ | ||
970 | static void kvmppc_xive_native_release(struct kvm_device *dev) | ||
971 | { | ||
972 | struct kvmppc_xive *xive = dev->private; | ||
973 | struct kvm *kvm = xive->kvm; | ||
974 | struct kvm_vcpu *vcpu; | ||
975 | int i; | ||
976 | int was_ready; | ||
977 | |||
978 | debugfs_remove(xive->dentry); | ||
979 | |||
980 | pr_devel("Releasing xive native device\n"); | ||
981 | |||
982 | /* | ||
983 | * Clearing mmu_ready temporarily while holding kvm->lock | ||
984 | * is a way of ensuring that no vcpus can enter the guest | ||
985 | * until we drop kvm->lock. Doing kick_all_cpus_sync() | ||
986 | * ensures that any vcpu executing inside the guest has | ||
987 | * exited the guest. Once kick_all_cpus_sync() has finished, | ||
988 | * we know that no vcpu can be executing the XIVE push or | ||
989 | * pull code or accessing the XIVE MMIO regions. | ||
990 | * | ||
991 | * Since this is the device release function, we know that | ||
992 | * userspace does not have any open fd or mmap referring to | ||
993 | * the device. Therefore there can not be any of the | ||
994 | * device attribute set/get, mmap, or page fault functions | ||
995 | * being executed concurrently, and similarly, the | ||
996 | * connect_vcpu and set/clr_mapped functions also cannot | ||
997 | * be being executed. | ||
998 | */ | ||
999 | was_ready = kvm->arch.mmu_ready; | ||
1000 | kvm->arch.mmu_ready = 0; | ||
1001 | kick_all_cpus_sync(); | ||
1002 | |||
1003 | /* | ||
1004 | * We should clean up the vCPU interrupt presenters first. | ||
1005 | */ | ||
1006 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1007 | /* | ||
1008 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl | ||
1009 | * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. | ||
1010 | */ | ||
1011 | mutex_lock(&vcpu->mutex); | ||
1012 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
1013 | mutex_unlock(&vcpu->mutex); | ||
1014 | } | ||
1015 | |||
1016 | kvm->arch.xive = NULL; | ||
1017 | |||
1018 | for (i = 0; i <= xive->max_sbid; i++) { | ||
1019 | if (xive->src_blocks[i]) | ||
1020 | kvmppc_xive_free_sources(xive->src_blocks[i]); | ||
1021 | kfree(xive->src_blocks[i]); | ||
1022 | xive->src_blocks[i] = NULL; | ||
1023 | } | ||
1024 | |||
1025 | if (xive->vp_base != XIVE_INVALID_VP) | ||
1026 | xive_native_free_vp_block(xive->vp_base); | ||
1027 | |||
1028 | kvm->arch.mmu_ready = was_ready; | ||
1029 | |||
1030 | /* | ||
1031 | * A reference of the kvmppc_xive pointer is now kept under | ||
1032 | * the xive_devices struct of the machine for reuse. It is | ||
1033 | * freed when the VM is destroyed for now until we fix all the | ||
1034 | * execution paths. | ||
1035 | */ | ||
1036 | |||
1037 | kfree(dev); | ||
1038 | } | ||
1039 | |||
1040 | /* | ||
1041 | * Create a XIVE device. kvm->lock is held. | ||
1042 | */ | ||
1043 | static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) | ||
1044 | { | ||
1045 | struct kvmppc_xive *xive; | ||
1046 | struct kvm *kvm = dev->kvm; | ||
1047 | int ret = 0; | ||
1048 | |||
1049 | pr_devel("Creating xive native device\n"); | ||
1050 | |||
1051 | if (kvm->arch.xive) | ||
1052 | return -EEXIST; | ||
1053 | |||
1054 | xive = kvmppc_xive_get_device(kvm, type); | ||
1055 | if (!xive) | ||
1056 | return -ENOMEM; | ||
1057 | |||
1058 | dev->private = xive; | ||
1059 | xive->dev = dev; | ||
1060 | xive->kvm = kvm; | ||
1061 | kvm->arch.xive = xive; | ||
1062 | mutex_init(&xive->mapping_lock); | ||
1063 | |||
1064 | /* | ||
1065 | * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for | ||
1066 | * a default. Getting the max number of CPUs the VM was | ||
1067 | * configured with would improve our usage of the XIVE VP space. | ||
1068 | */ | ||
1069 | xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); | ||
1070 | pr_devel("VP_Base=%x\n", xive->vp_base); | ||
1071 | |||
1072 | if (xive->vp_base == XIVE_INVALID_VP) | ||
1073 | ret = -ENXIO; | ||
1074 | |||
1075 | xive->single_escalation = xive_native_has_single_escalation(); | ||
1076 | xive->ops = &kvmppc_xive_native_ops; | ||
1077 | |||
1078 | if (ret) | ||
1079 | kfree(xive); | ||
1080 | |||
1081 | return ret; | ||
1082 | } | ||
1083 | |||
1084 | /* | ||
1085 | * Interrupt Pending Buffer (IPB) offset | ||
1086 | */ | ||
1087 | #define TM_IPB_SHIFT 40 | ||
1088 | #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) | ||
1089 | |||
1090 | int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) | ||
1091 | { | ||
1092 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
1093 | u64 opal_state; | ||
1094 | int rc; | ||
1095 | |||
1096 | if (!kvmppc_xive_enabled(vcpu)) | ||
1097 | return -EPERM; | ||
1098 | |||
1099 | if (!xc) | ||
1100 | return -ENOENT; | ||
1101 | |||
1102 | /* Thread context registers. We only care about IPB and CPPR */ | ||
1103 | val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; | ||
1104 | |||
1105 | /* Get the VP state from OPAL */ | ||
1106 | rc = xive_native_get_vp_state(xc->vp_id, &opal_state); | ||
1107 | if (rc) | ||
1108 | return rc; | ||
1109 | |||
1110 | /* | ||
1111 | * Capture the backup of IPB register in the NVT structure and | ||
1112 | * merge it in our KVM VP state. | ||
1113 | */ | ||
1114 | val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); | ||
1115 | |||
1116 | pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", | ||
1117 | __func__, | ||
1118 | vcpu->arch.xive_saved_state.nsr, | ||
1119 | vcpu->arch.xive_saved_state.cppr, | ||
1120 | vcpu->arch.xive_saved_state.ipb, | ||
1121 | vcpu->arch.xive_saved_state.pipr, | ||
1122 | vcpu->arch.xive_saved_state.w01, | ||
1123 | (u32) vcpu->arch.xive_cam_word, opal_state); | ||
1124 | |||
1125 | return 0; | ||
1126 | } | ||
1127 | |||
1128 | int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) | ||
1129 | { | ||
1130 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
1131 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; | ||
1132 | |||
1133 | pr_devel("%s w01=%016llx vp=%016llx\n", __func__, | ||
1134 | val->xive_timaval[0], val->xive_timaval[1]); | ||
1135 | |||
1136 | if (!kvmppc_xive_enabled(vcpu)) | ||
1137 | return -EPERM; | ||
1138 | |||
1139 | if (!xc || !xive) | ||
1140 | return -ENOENT; | ||
1141 | |||
1142 | /* We can't update the state of a "pushed" VCPU */ | ||
1143 | if (WARN_ON(vcpu->arch.xive_pushed)) | ||
1144 | return -EBUSY; | ||
1145 | |||
1146 | /* | ||
1147 | * Restore the thread context registers. IPB and CPPR should | ||
1148 | * be the only ones that matter. | ||
1149 | */ | ||
1150 | vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; | ||
1151 | |||
1152 | /* | ||
1153 | * There is no need to restore the XIVE internal state (IPB | ||
1154 | * stored in the NVT) as the IPB register was merged in KVM VP | ||
1155 | * state when captured. | ||
1156 | */ | ||
1157 | return 0; | ||
1158 | } | ||
1159 | |||
1160 | static int xive_native_debug_show(struct seq_file *m, void *private) | ||
1161 | { | ||
1162 | struct kvmppc_xive *xive = m->private; | ||
1163 | struct kvm *kvm = xive->kvm; | ||
1164 | struct kvm_vcpu *vcpu; | ||
1165 | unsigned int i; | ||
1166 | |||
1167 | if (!kvm) | ||
1168 | return 0; | ||
1169 | |||
1170 | seq_puts(m, "=========\nVCPU state\n=========\n"); | ||
1171 | |||
1172 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1173 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | ||
1174 | |||
1175 | if (!xc) | ||
1176 | continue; | ||
1177 | |||
1178 | seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", | ||
1179 | xc->server_num, | ||
1180 | vcpu->arch.xive_saved_state.nsr, | ||
1181 | vcpu->arch.xive_saved_state.cppr, | ||
1182 | vcpu->arch.xive_saved_state.ipb, | ||
1183 | vcpu->arch.xive_saved_state.pipr, | ||
1184 | vcpu->arch.xive_saved_state.w01, | ||
1185 | (u32) vcpu->arch.xive_cam_word); | ||
1186 | |||
1187 | kvmppc_xive_debug_show_queues(m, vcpu); | ||
1188 | } | ||
1189 | |||
1190 | return 0; | ||
1191 | } | ||
1192 | |||
1193 | static int xive_native_debug_open(struct inode *inode, struct file *file) | ||
1194 | { | ||
1195 | return single_open(file, xive_native_debug_show, inode->i_private); | ||
1196 | } | ||
1197 | |||
1198 | static const struct file_operations xive_native_debug_fops = { | ||
1199 | .open = xive_native_debug_open, | ||
1200 | .read = seq_read, | ||
1201 | .llseek = seq_lseek, | ||
1202 | .release = single_release, | ||
1203 | }; | ||
1204 | |||
1205 | static void xive_native_debugfs_init(struct kvmppc_xive *xive) | ||
1206 | { | ||
1207 | char *name; | ||
1208 | |||
1209 | name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); | ||
1210 | if (!name) { | ||
1211 | pr_err("%s: no memory for name\n", __func__); | ||
1212 | return; | ||
1213 | } | ||
1214 | |||
1215 | xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, | ||
1216 | xive, &xive_native_debug_fops); | ||
1217 | |||
1218 | pr_debug("%s: created %s\n", __func__, name); | ||
1219 | kfree(name); | ||
1220 | } | ||
1221 | |||
1222 | static void kvmppc_xive_native_init(struct kvm_device *dev) | ||
1223 | { | ||
1224 | struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; | ||
1225 | |||
1226 | /* Register some debug interfaces */ | ||
1227 | xive_native_debugfs_init(xive); | ||
1228 | } | ||
1229 | |||
1230 | struct kvm_device_ops kvm_xive_native_ops = { | ||
1231 | .name = "kvm-xive-native", | ||
1232 | .create = kvmppc_xive_native_create, | ||
1233 | .init = kvmppc_xive_native_init, | ||
1234 | .release = kvmppc_xive_native_release, | ||
1235 | .set_attr = kvmppc_xive_native_set_attr, | ||
1236 | .get_attr = kvmppc_xive_native_get_attr, | ||
1237 | .has_attr = kvmppc_xive_native_has_attr, | ||
1238 | .mmap = kvmppc_xive_native_mmap, | ||
1239 | }; | ||
1240 | |||
1241 | void kvmppc_xive_native_init_module(void) | ||
1242 | { | ||
1243 | ; | ||
1244 | } | ||
1245 | |||
1246 | void kvmppc_xive_native_exit_module(void) | ||
1247 | { | ||
1248 | ; | ||
1249 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 033363d6e764..0737acfd17f1 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c | |||
@@ -130,24 +130,14 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc, | |||
130 | */ | 130 | */ |
131 | prio = ffs(pending) - 1; | 131 | prio = ffs(pending) - 1; |
132 | 132 | ||
133 | /* | ||
134 | * If the most favoured prio we found pending is less | ||
135 | * favored (or equal) than a pending IPI, we return | ||
136 | * the IPI instead. | ||
137 | * | ||
138 | * Note: If pending was 0 and mfrr is 0xff, we will | ||
139 | * not spurriously take an IPI because mfrr cannot | ||
140 | * then be smaller than cppr. | ||
141 | */ | ||
142 | if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { | ||
143 | prio = xc->mfrr; | ||
144 | hirq = XICS_IPI; | ||
145 | break; | ||
146 | } | ||
147 | |||
148 | /* Don't scan past the guest cppr */ | 133 | /* Don't scan past the guest cppr */ |
149 | if (prio >= xc->cppr || prio > 7) | 134 | if (prio >= xc->cppr || prio > 7) { |
135 | if (xc->mfrr < xc->cppr) { | ||
136 | prio = xc->mfrr; | ||
137 | hirq = XICS_IPI; | ||
138 | } | ||
150 | break; | 139 | break; |
140 | } | ||
151 | 141 | ||
152 | /* Grab queue and pointers */ | 142 | /* Grab queue and pointers */ |
153 | q = &xc->queues[prio]; | 143 | q = &xc->queues[prio]; |
@@ -184,9 +174,12 @@ skip_ipi: | |||
184 | * been set and another occurrence of the IPI will trigger. | 174 | * been set and another occurrence of the IPI will trigger. |
185 | */ | 175 | */ |
186 | if (hirq == XICS_IPI || (prio == 0 && !qpage)) { | 176 | if (hirq == XICS_IPI || (prio == 0 && !qpage)) { |
187 | if (scan_type == scan_fetch) | 177 | if (scan_type == scan_fetch) { |
188 | GLUE(X_PFX,source_eoi)(xc->vp_ipi, | 178 | GLUE(X_PFX,source_eoi)(xc->vp_ipi, |
189 | &xc->vp_ipi_data); | 179 | &xc->vp_ipi_data); |
180 | q->idx = idx; | ||
181 | q->toggle = toggle; | ||
182 | } | ||
190 | /* Loop back on same queue with updated idx/toggle */ | 183 | /* Loop back on same queue with updated idx/toggle */ |
191 | #ifdef XIVE_RUNTIME_CHECKS | 184 | #ifdef XIVE_RUNTIME_CHECKS |
192 | WARN_ON(hirq && hirq != XICS_IPI); | 185 | WARN_ON(hirq && hirq != XICS_IPI); |
@@ -199,32 +192,41 @@ skip_ipi: | |||
199 | if (hirq == XICS_DUMMY) | 192 | if (hirq == XICS_DUMMY) |
200 | goto skip_ipi; | 193 | goto skip_ipi; |
201 | 194 | ||
202 | /* If fetching, update queue pointers */ | 195 | /* Clear the pending bit if the queue is now empty */ |
203 | if (scan_type == scan_fetch) { | 196 | if (!hirq) { |
204 | q->idx = idx; | 197 | pending &= ~(1 << prio); |
205 | q->toggle = toggle; | ||
206 | } | ||
207 | |||
208 | /* Something found, stop searching */ | ||
209 | if (hirq) | ||
210 | break; | ||
211 | |||
212 | /* Clear the pending bit on the now empty queue */ | ||
213 | pending &= ~(1 << prio); | ||
214 | 198 | ||
215 | /* | 199 | /* |
216 | * Check if the queue count needs adjusting due to | 200 | * Check if the queue count needs adjusting due to |
217 | * interrupts being moved away. | 201 | * interrupts being moved away. |
218 | */ | 202 | */ |
219 | if (atomic_read(&q->pending_count)) { | 203 | if (atomic_read(&q->pending_count)) { |
220 | int p = atomic_xchg(&q->pending_count, 0); | 204 | int p = atomic_xchg(&q->pending_count, 0); |
221 | if (p) { | 205 | if (p) { |
222 | #ifdef XIVE_RUNTIME_CHECKS | 206 | #ifdef XIVE_RUNTIME_CHECKS |
223 | WARN_ON(p > atomic_read(&q->count)); | 207 | WARN_ON(p > atomic_read(&q->count)); |
224 | #endif | 208 | #endif |
225 | atomic_sub(p, &q->count); | 209 | atomic_sub(p, &q->count); |
210 | } | ||
226 | } | 211 | } |
227 | } | 212 | } |
213 | |||
214 | /* | ||
215 | * If the most favoured prio we found pending is less | ||
216 | * favored (or equal) than a pending IPI, we return | ||
217 | * the IPI instead. | ||
218 | */ | ||
219 | if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { | ||
220 | prio = xc->mfrr; | ||
221 | hirq = XICS_IPI; | ||
222 | break; | ||
223 | } | ||
224 | |||
225 | /* If fetching, update queue pointers */ | ||
226 | if (scan_type == scan_fetch) { | ||
227 | q->idx = idx; | ||
228 | q->toggle = toggle; | ||
229 | } | ||
228 | } | 230 | } |
229 | 231 | ||
230 | /* If we are just taking a "peek", do nothing else */ | 232 | /* If we are just taking a "peek", do nothing else */ |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8885377ec3e0..3393b166817a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -570,6 +570,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
570 | case KVM_CAP_PPC_GET_CPU_CHAR: | 570 | case KVM_CAP_PPC_GET_CPU_CHAR: |
571 | r = 1; | 571 | r = 1; |
572 | break; | 572 | break; |
573 | #ifdef CONFIG_KVM_XIVE | ||
574 | case KVM_CAP_PPC_IRQ_XIVE: | ||
575 | /* | ||
576 | * We need XIVE to be enabled on the platform (implies | ||
577 | * a POWER9 processor) and the PowerNV platform, as | ||
578 | * nested is not yet supported. | ||
579 | */ | ||
580 | r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); | ||
581 | break; | ||
582 | #endif | ||
573 | 583 | ||
574 | case KVM_CAP_PPC_ALLOC_HTAB: | 584 | case KVM_CAP_PPC_ALLOC_HTAB: |
575 | r = hv_enabled; | 585 | r = hv_enabled; |
@@ -644,9 +654,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
644 | else | 654 | else |
645 | r = num_online_cpus(); | 655 | r = num_online_cpus(); |
646 | break; | 656 | break; |
647 | case KVM_CAP_NR_MEMSLOTS: | ||
648 | r = KVM_USER_MEM_SLOTS; | ||
649 | break; | ||
650 | case KVM_CAP_MAX_VCPUS: | 657 | case KVM_CAP_MAX_VCPUS: |
651 | r = KVM_MAX_VCPUS; | 658 | r = KVM_MAX_VCPUS; |
652 | break; | 659 | break; |
@@ -753,6 +760,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
753 | else | 760 | else |
754 | kvmppc_xics_free_icp(vcpu); | 761 | kvmppc_xics_free_icp(vcpu); |
755 | break; | 762 | break; |
763 | case KVMPPC_IRQ_XIVE: | ||
764 | kvmppc_xive_native_cleanup_vcpu(vcpu); | ||
765 | break; | ||
756 | } | 766 | } |
757 | 767 | ||
758 | kvmppc_core_vcpu_free(vcpu); | 768 | kvmppc_core_vcpu_free(vcpu); |
@@ -1941,6 +1951,30 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
1941 | break; | 1951 | break; |
1942 | } | 1952 | } |
1943 | #endif /* CONFIG_KVM_XICS */ | 1953 | #endif /* CONFIG_KVM_XICS */ |
1954 | #ifdef CONFIG_KVM_XIVE | ||
1955 | case KVM_CAP_PPC_IRQ_XIVE: { | ||
1956 | struct fd f; | ||
1957 | struct kvm_device *dev; | ||
1958 | |||
1959 | r = -EBADF; | ||
1960 | f = fdget(cap->args[0]); | ||
1961 | if (!f.file) | ||
1962 | break; | ||
1963 | |||
1964 | r = -ENXIO; | ||
1965 | if (!xive_enabled()) | ||
1966 | break; | ||
1967 | |||
1968 | r = -EPERM; | ||
1969 | dev = kvm_device_from_filp(f.file); | ||
1970 | if (dev) | ||
1971 | r = kvmppc_xive_native_connect_vcpu(dev, vcpu, | ||
1972 | cap->args[1]); | ||
1973 | |||
1974 | fdput(f); | ||
1975 | break; | ||
1976 | } | ||
1977 | #endif /* CONFIG_KVM_XIVE */ | ||
1944 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE | 1978 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
1945 | case KVM_CAP_PPC_FWNMI: | 1979 | case KVM_CAP_PPC_FWNMI: |
1946 | r = -EINVAL; | 1980 | r = -EINVAL; |
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0c037e933e55..7782201e5fe8 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c | |||
@@ -521,6 +521,9 @@ u32 xive_native_default_eq_shift(void) | |||
521 | } | 521 | } |
522 | EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); | 522 | EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); |
523 | 523 | ||
524 | unsigned long xive_tima_os; | ||
525 | EXPORT_SYMBOL_GPL(xive_tima_os); | ||
526 | |||
524 | bool __init xive_native_init(void) | 527 | bool __init xive_native_init(void) |
525 | { | 528 | { |
526 | struct device_node *np; | 529 | struct device_node *np; |
@@ -573,6 +576,14 @@ bool __init xive_native_init(void) | |||
573 | for_each_possible_cpu(cpu) | 576 | for_each_possible_cpu(cpu) |
574 | kvmppc_set_xive_tima(cpu, r.start, tima); | 577 | kvmppc_set_xive_tima(cpu, r.start, tima); |
575 | 578 | ||
579 | /* Resource 2 is OS window */ | ||
580 | if (of_address_to_resource(np, 2, &r)) { | ||
581 | pr_err("Failed to get thread mgmnt area resource\n"); | ||
582 | return false; | ||
583 | } | ||
584 | |||
585 | xive_tima_os = r.start; | ||
586 | |||
576 | /* Grab size of provisionning pages */ | 587 | /* Grab size of provisionning pages */ |
577 | xive_parse_provisioning(np); | 588 | xive_parse_provisioning(np); |
578 | 589 | ||
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index f316de40e51b..27696755daa9 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define CPACF_KMCTR 0xb92d /* MSA4 */ | 28 | #define CPACF_KMCTR 0xb92d /* MSA4 */ |
29 | #define CPACF_PRNO 0xb93c /* MSA5 */ | 29 | #define CPACF_PRNO 0xb93c /* MSA5 */ |
30 | #define CPACF_KMA 0xb929 /* MSA8 */ | 30 | #define CPACF_KMA 0xb929 /* MSA8 */ |
31 | #define CPACF_KDSA 0xb93a /* MSA9 */ | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * En/decryption modifier bits | 34 | * En/decryption modifier bits |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c47e22bba87f..bdbc81b5bc91 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
@@ -278,6 +278,7 @@ struct kvm_s390_sie_block { | |||
278 | #define ECD_HOSTREGMGMT 0x20000000 | 278 | #define ECD_HOSTREGMGMT 0x20000000 |
279 | #define ECD_MEF 0x08000000 | 279 | #define ECD_MEF 0x08000000 |
280 | #define ECD_ETOKENF 0x02000000 | 280 | #define ECD_ETOKENF 0x02000000 |
281 | #define ECD_ECC 0x00200000 | ||
281 | __u32 ecd; /* 0x01c8 */ | 282 | __u32 ecd; /* 0x01c8 */ |
282 | __u8 reserved1cc[18]; /* 0x01cc */ | 283 | __u8 reserved1cc[18]; /* 0x01cc */ |
283 | __u64 pp; /* 0x01de */ | 284 | __u64 pp; /* 0x01de */ |
@@ -312,6 +313,7 @@ struct kvm_vcpu_stat { | |||
312 | u64 halt_successful_poll; | 313 | u64 halt_successful_poll; |
313 | u64 halt_attempted_poll; | 314 | u64 halt_attempted_poll; |
314 | u64 halt_poll_invalid; | 315 | u64 halt_poll_invalid; |
316 | u64 halt_no_poll_steal; | ||
315 | u64 halt_wakeup; | 317 | u64 halt_wakeup; |
316 | u64 instruction_lctl; | 318 | u64 instruction_lctl; |
317 | u64 instruction_lctlg; | 319 | u64 instruction_lctlg; |
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 16511d97e8dc..47104e5b47fd 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h | |||
@@ -152,7 +152,10 @@ struct kvm_s390_vm_cpu_subfunc { | |||
152 | __u8 pcc[16]; /* with MSA4 */ | 152 | __u8 pcc[16]; /* with MSA4 */ |
153 | __u8 ppno[16]; /* with MSA5 */ | 153 | __u8 ppno[16]; /* with MSA5 */ |
154 | __u8 kma[16]; /* with MSA8 */ | 154 | __u8 kma[16]; /* with MSA8 */ |
155 | __u8 reserved[1808]; | 155 | __u8 kdsa[16]; /* with MSA9 */ |
156 | __u8 sortl[32]; /* with STFLE.150 */ | ||
157 | __u8 dfltcc[32]; /* with STFLE.151 */ | ||
158 | __u8 reserved[1728]; | ||
156 | }; | 159 | }; |
157 | 160 | ||
158 | /* kvm attributes for crypto */ | 161 | /* kvm attributes for crypto */ |
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 1816ee48eadd..d3db3d7ed077 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
@@ -30,6 +30,7 @@ config KVM | |||
30 | select HAVE_KVM_IRQFD | 30 | select HAVE_KVM_IRQFD |
31 | select HAVE_KVM_IRQ_ROUTING | 31 | select HAVE_KVM_IRQ_ROUTING |
32 | select HAVE_KVM_INVALID_WAKEUPS | 32 | select HAVE_KVM_INVALID_WAKEUPS |
33 | select HAVE_KVM_NO_POLL | ||
33 | select SRCU | 34 | select SRCU |
34 | select KVM_VFIO | 35 | select KVM_VFIO |
35 | ---help--- | 36 | ---help--- |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1fd706f6206c..9dde4d7d8704 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/kvm_host.h> | 14 | #include <linux/kvm_host.h> |
15 | #include <linux/hrtimer.h> | 15 | #include <linux/hrtimer.h> |
16 | #include <linux/mmu_context.h> | 16 | #include <linux/mmu_context.h> |
17 | #include <linux/nospec.h> | ||
17 | #include <linux/signal.h> | 18 | #include <linux/signal.h> |
18 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
19 | #include <linux/bitmap.h> | 20 | #include <linux/bitmap.h> |
@@ -2307,6 +2308,7 @@ static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id) | |||
2307 | { | 2308 | { |
2308 | if (id >= MAX_S390_IO_ADAPTERS) | 2309 | if (id >= MAX_S390_IO_ADAPTERS) |
2309 | return NULL; | 2310 | return NULL; |
2311 | id = array_index_nospec(id, MAX_S390_IO_ADAPTERS); | ||
2310 | return kvm->arch.adapters[id]; | 2312 | return kvm->arch.adapters[id]; |
2311 | } | 2313 | } |
2312 | 2314 | ||
@@ -2320,8 +2322,13 @@ static int register_io_adapter(struct kvm_device *dev, | |||
2320 | (void __user *)attr->addr, sizeof(adapter_info))) | 2322 | (void __user *)attr->addr, sizeof(adapter_info))) |
2321 | return -EFAULT; | 2323 | return -EFAULT; |
2322 | 2324 | ||
2323 | if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) || | 2325 | if (adapter_info.id >= MAX_S390_IO_ADAPTERS) |
2324 | (dev->kvm->arch.adapters[adapter_info.id] != NULL)) | 2326 | return -EINVAL; |
2327 | |||
2328 | adapter_info.id = array_index_nospec(adapter_info.id, | ||
2329 | MAX_S390_IO_ADAPTERS); | ||
2330 | |||
2331 | if (dev->kvm->arch.adapters[adapter_info.id] != NULL) | ||
2325 | return -EINVAL; | 2332 | return -EINVAL; |
2326 | 2333 | ||
2327 | adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); | 2334 | adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4638303ba6a8..8d6d75db8de6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -75,6 +75,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
75 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, | 75 | { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, |
76 | { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, | 76 | { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, |
77 | { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, | 77 | { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, |
78 | { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, | ||
78 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 79 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
79 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, | 80 | { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, |
80 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, | 81 | { "instruction_lctl", VCPU_STAT(instruction_lctl) }, |
@@ -177,6 +178,11 @@ static int hpage; | |||
177 | module_param(hpage, int, 0444); | 178 | module_param(hpage, int, 0444); |
178 | MODULE_PARM_DESC(hpage, "1m huge page backing support"); | 179 | MODULE_PARM_DESC(hpage, "1m huge page backing support"); |
179 | 180 | ||
181 | /* maximum percentage of steal time for polling. >100 is treated like 100 */ | ||
182 | static u8 halt_poll_max_steal = 10; | ||
183 | module_param(halt_poll_max_steal, byte, 0644); | ||
184 | MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling"); | ||
185 | |||
180 | /* | 186 | /* |
181 | * For now we handle at most 16 double words as this is what the s390 base | 187 | * For now we handle at most 16 double words as this is what the s390 base |
182 | * kernel handles and stores in the prefix page. If we ever need to go beyond | 188 | * kernel handles and stores in the prefix page. If we ever need to go beyond |
@@ -321,6 +327,22 @@ static inline int plo_test_bit(unsigned char nr) | |||
321 | return cc == 0; | 327 | return cc == 0; |
322 | } | 328 | } |
323 | 329 | ||
330 | static inline void __insn32_query(unsigned int opcode, u8 query[32]) | ||
331 | { | ||
332 | register unsigned long r0 asm("0") = 0; /* query function */ | ||
333 | register unsigned long r1 asm("1") = (unsigned long) query; | ||
334 | |||
335 | asm volatile( | ||
336 | /* Parameter regs are ignored */ | ||
337 | " .insn rrf,%[opc] << 16,2,4,6,0\n" | ||
338 | : "=m" (*query) | ||
339 | : "d" (r0), "a" (r1), [opc] "i" (opcode) | ||
340 | : "cc"); | ||
341 | } | ||
342 | |||
343 | #define INSN_SORTL 0xb938 | ||
344 | #define INSN_DFLTCC 0xb939 | ||
345 | |||
324 | static void kvm_s390_cpu_feat_init(void) | 346 | static void kvm_s390_cpu_feat_init(void) |
325 | { | 347 | { |
326 | int i; | 348 | int i; |
@@ -368,6 +390,16 @@ static void kvm_s390_cpu_feat_init(void) | |||
368 | __cpacf_query(CPACF_KMA, (cpacf_mask_t *) | 390 | __cpacf_query(CPACF_KMA, (cpacf_mask_t *) |
369 | kvm_s390_available_subfunc.kma); | 391 | kvm_s390_available_subfunc.kma); |
370 | 392 | ||
393 | if (test_facility(155)) /* MSA9 */ | ||
394 | __cpacf_query(CPACF_KDSA, (cpacf_mask_t *) | ||
395 | kvm_s390_available_subfunc.kdsa); | ||
396 | |||
397 | if (test_facility(150)) /* SORTL */ | ||
398 | __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); | ||
399 | |||
400 | if (test_facility(151)) /* DFLTCC */ | ||
401 | __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); | ||
402 | |||
371 | if (MACHINE_HAS_ESOP) | 403 | if (MACHINE_HAS_ESOP) |
372 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); | 404 | allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); |
373 | /* | 405 | /* |
@@ -513,9 +545,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
513 | else if (sclp.has_esca && sclp.has_64bscao) | 545 | else if (sclp.has_esca && sclp.has_64bscao) |
514 | r = KVM_S390_ESCA_CPU_SLOTS; | 546 | r = KVM_S390_ESCA_CPU_SLOTS; |
515 | break; | 547 | break; |
516 | case KVM_CAP_NR_MEMSLOTS: | ||
517 | r = KVM_USER_MEM_SLOTS; | ||
518 | break; | ||
519 | case KVM_CAP_S390_COW: | 548 | case KVM_CAP_S390_COW: |
520 | r = MACHINE_HAS_ESOP; | 549 | r = MACHINE_HAS_ESOP; |
521 | break; | 550 | break; |
@@ -657,6 +686,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) | |||
657 | set_kvm_facility(kvm->arch.model.fac_mask, 135); | 686 | set_kvm_facility(kvm->arch.model.fac_mask, 135); |
658 | set_kvm_facility(kvm->arch.model.fac_list, 135); | 687 | set_kvm_facility(kvm->arch.model.fac_list, 135); |
659 | } | 688 | } |
689 | if (test_facility(148)) { | ||
690 | set_kvm_facility(kvm->arch.model.fac_mask, 148); | ||
691 | set_kvm_facility(kvm->arch.model.fac_list, 148); | ||
692 | } | ||
693 | if (test_facility(152)) { | ||
694 | set_kvm_facility(kvm->arch.model.fac_mask, 152); | ||
695 | set_kvm_facility(kvm->arch.model.fac_list, 152); | ||
696 | } | ||
660 | r = 0; | 697 | r = 0; |
661 | } else | 698 | } else |
662 | r = -EINVAL; | 699 | r = -EINVAL; |
@@ -1323,6 +1360,19 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm, | |||
1323 | VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", | 1360 | VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", |
1324 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | 1361 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], |
1325 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | 1362 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); |
1363 | VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx", | ||
1364 | ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], | ||
1365 | ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); | ||
1366 | VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1367 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], | ||
1368 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], | ||
1369 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], | ||
1370 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); | ||
1371 | VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1372 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], | ||
1373 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], | ||
1374 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], | ||
1375 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); | ||
1326 | 1376 | ||
1327 | return 0; | 1377 | return 0; |
1328 | } | 1378 | } |
@@ -1491,6 +1541,19 @@ static int kvm_s390_get_processor_subfunc(struct kvm *kvm, | |||
1491 | VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", | 1541 | VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", |
1492 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], | 1542 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], |
1493 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); | 1543 | ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); |
1544 | VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx", | ||
1545 | ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0], | ||
1546 | ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]); | ||
1547 | VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1548 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0], | ||
1549 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1], | ||
1550 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2], | ||
1551 | ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]); | ||
1552 | VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1553 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0], | ||
1554 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1], | ||
1555 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2], | ||
1556 | ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]); | ||
1494 | 1557 | ||
1495 | return 0; | 1558 | return 0; |
1496 | } | 1559 | } |
@@ -1546,6 +1609,19 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, | |||
1546 | VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", | 1609 | VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", |
1547 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], | 1610 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], |
1548 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); | 1611 | ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); |
1612 | VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx", | ||
1613 | ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0], | ||
1614 | ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]); | ||
1615 | VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1616 | ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0], | ||
1617 | ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1], | ||
1618 | ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2], | ||
1619 | ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]); | ||
1620 | VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", | ||
1621 | ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0], | ||
1622 | ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1], | ||
1623 | ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2], | ||
1624 | ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]); | ||
1549 | 1625 | ||
1550 | return 0; | 1626 | return 0; |
1551 | } | 1627 | } |
@@ -2817,6 +2893,25 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
2817 | vcpu->arch.enabled_gmap = vcpu->arch.gmap; | 2893 | vcpu->arch.enabled_gmap = vcpu->arch.gmap; |
2818 | } | 2894 | } |
2819 | 2895 | ||
2896 | static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr) | ||
2897 | { | ||
2898 | if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) && | ||
2899 | test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo)) | ||
2900 | return true; | ||
2901 | return false; | ||
2902 | } | ||
2903 | |||
2904 | static bool kvm_has_pckmo_ecc(struct kvm *kvm) | ||
2905 | { | ||
2906 | /* At least one ECC subfunction must be present */ | ||
2907 | return kvm_has_pckmo_subfunc(kvm, 32) || | ||
2908 | kvm_has_pckmo_subfunc(kvm, 33) || | ||
2909 | kvm_has_pckmo_subfunc(kvm, 34) || | ||
2910 | kvm_has_pckmo_subfunc(kvm, 40) || | ||
2911 | kvm_has_pckmo_subfunc(kvm, 41); | ||
2912 | |||
2913 | } | ||
2914 | |||
2820 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) | 2915 | static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) |
2821 | { | 2916 | { |
2822 | /* | 2917 | /* |
@@ -2829,13 +2924,19 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) | |||
2829 | vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; | 2924 | vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd; |
2830 | vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); | 2925 | vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA); |
2831 | vcpu->arch.sie_block->eca &= ~ECA_APIE; | 2926 | vcpu->arch.sie_block->eca &= ~ECA_APIE; |
2927 | vcpu->arch.sie_block->ecd &= ~ECD_ECC; | ||
2832 | 2928 | ||
2833 | if (vcpu->kvm->arch.crypto.apie) | 2929 | if (vcpu->kvm->arch.crypto.apie) |
2834 | vcpu->arch.sie_block->eca |= ECA_APIE; | 2930 | vcpu->arch.sie_block->eca |= ECA_APIE; |
2835 | 2931 | ||
2836 | /* Set up protected key support */ | 2932 | /* Set up protected key support */ |
2837 | if (vcpu->kvm->arch.crypto.aes_kw) | 2933 | if (vcpu->kvm->arch.crypto.aes_kw) { |
2838 | vcpu->arch.sie_block->ecb3 |= ECB3_AES; | 2934 | vcpu->arch.sie_block->ecb3 |= ECB3_AES; |
2935 | /* ecc is also wrapped with AES key */ | ||
2936 | if (kvm_has_pckmo_ecc(vcpu->kvm)) | ||
2937 | vcpu->arch.sie_block->ecd |= ECD_ECC; | ||
2938 | } | ||
2939 | |||
2839 | if (vcpu->kvm->arch.crypto.dea_kw) | 2940 | if (vcpu->kvm->arch.crypto.dea_kw) |
2840 | vcpu->arch.sie_block->ecb3 |= ECB3_DEA; | 2941 | vcpu->arch.sie_block->ecb3 |= ECB3_DEA; |
2841 | } | 2942 | } |
@@ -3068,6 +3169,17 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, | |||
3068 | } | 3169 | } |
3069 | } | 3170 | } |
3070 | 3171 | ||
3172 | bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) | ||
3173 | { | ||
3174 | /* do not poll with more than halt_poll_max_steal percent of steal time */ | ||
3175 | if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >= | ||
3176 | halt_poll_max_steal) { | ||
3177 | vcpu->stat.halt_no_poll_steal++; | ||
3178 | return true; | ||
3179 | } | ||
3180 | return false; | ||
3181 | } | ||
3182 | |||
3071 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) | 3183 | int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) |
3072 | { | 3184 | { |
3073 | /* kvm common code refers to this, but never calls it */ | 3185 | /* kvm common code refers to this, but never calls it */ |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d62fa148558b..076090f9e666 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -288,7 +288,9 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
288 | const u32 crycb_addr = crycbd_o & 0x7ffffff8U; | 288 | const u32 crycb_addr = crycbd_o & 0x7ffffff8U; |
289 | unsigned long *b1, *b2; | 289 | unsigned long *b1, *b2; |
290 | u8 ecb3_flags; | 290 | u8 ecb3_flags; |
291 | u32 ecd_flags; | ||
291 | int apie_h; | 292 | int apie_h; |
293 | int apie_s; | ||
292 | int key_msk = test_kvm_facility(vcpu->kvm, 76); | 294 | int key_msk = test_kvm_facility(vcpu->kvm, 76); |
293 | int fmt_o = crycbd_o & CRYCB_FORMAT_MASK; | 295 | int fmt_o = crycbd_o & CRYCB_FORMAT_MASK; |
294 | int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK; | 296 | int fmt_h = vcpu->arch.sie_block->crycbd & CRYCB_FORMAT_MASK; |
@@ -297,7 +299,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
297 | scb_s->crycbd = 0; | 299 | scb_s->crycbd = 0; |
298 | 300 | ||
299 | apie_h = vcpu->arch.sie_block->eca & ECA_APIE; | 301 | apie_h = vcpu->arch.sie_block->eca & ECA_APIE; |
300 | if (!apie_h && (!key_msk || fmt_o == CRYCB_FORMAT0)) | 302 | apie_s = apie_h & scb_o->eca; |
303 | if (!apie_s && (!key_msk || (fmt_o == CRYCB_FORMAT0))) | ||
301 | return 0; | 304 | return 0; |
302 | 305 | ||
303 | if (!crycb_addr) | 306 | if (!crycb_addr) |
@@ -308,7 +311,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
308 | ((crycb_addr + 128) & PAGE_MASK)) | 311 | ((crycb_addr + 128) & PAGE_MASK)) |
309 | return set_validity_icpt(scb_s, 0x003CU); | 312 | return set_validity_icpt(scb_s, 0x003CU); |
310 | 313 | ||
311 | if (apie_h && (scb_o->eca & ECA_APIE)) { | 314 | if (apie_s) { |
312 | ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr, | 315 | ret = setup_apcb(vcpu, &vsie_page->crycb, crycb_addr, |
313 | vcpu->kvm->arch.crypto.crycb, | 316 | vcpu->kvm->arch.crypto.crycb, |
314 | fmt_o, fmt_h); | 317 | fmt_o, fmt_h); |
@@ -320,7 +323,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
320 | /* we may only allow it if enabled for guest 2 */ | 323 | /* we may only allow it if enabled for guest 2 */ |
321 | ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 & | 324 | ecb3_flags = scb_o->ecb3 & vcpu->arch.sie_block->ecb3 & |
322 | (ECB3_AES | ECB3_DEA); | 325 | (ECB3_AES | ECB3_DEA); |
323 | if (!ecb3_flags) | 326 | ecd_flags = scb_o->ecd & vcpu->arch.sie_block->ecd & ECD_ECC; |
327 | if (!ecb3_flags && !ecd_flags) | ||
324 | goto end; | 328 | goto end; |
325 | 329 | ||
326 | /* copy only the wrapping keys */ | 330 | /* copy only the wrapping keys */ |
@@ -329,6 +333,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
329 | return set_validity_icpt(scb_s, 0x0035U); | 333 | return set_validity_icpt(scb_s, 0x0035U); |
330 | 334 | ||
331 | scb_s->ecb3 |= ecb3_flags; | 335 | scb_s->ecb3 |= ecb3_flags; |
336 | scb_s->ecd |= ecd_flags; | ||
332 | 337 | ||
333 | /* xor both blocks in one run */ | 338 | /* xor both blocks in one run */ |
334 | b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask; | 339 | b1 = (unsigned long *) vsie_page->crycb.dea_wrapping_key_mask; |
@@ -339,7 +344,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
339 | end: | 344 | end: |
340 | switch (ret) { | 345 | switch (ret) { |
341 | case -EINVAL: | 346 | case -EINVAL: |
342 | return set_validity_icpt(scb_s, 0x0020U); | 347 | return set_validity_icpt(scb_s, 0x0022U); |
343 | case -EFAULT: | 348 | case -EFAULT: |
344 | return set_validity_icpt(scb_s, 0x0035U); | 349 | return set_validity_icpt(scb_s, 0x0035U); |
345 | case -EACCES: | 350 | case -EACCES: |
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index fd788e0f2e5b..cead9e0dcffb 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c | |||
@@ -93,6 +93,9 @@ static struct facility_def facility_defs[] = { | |||
93 | 131, /* enhanced-SOP 2 and side-effect */ | 93 | 131, /* enhanced-SOP 2 and side-effect */ |
94 | 139, /* multiple epoch facility */ | 94 | 139, /* multiple epoch facility */ |
95 | 146, /* msa extension 8 */ | 95 | 146, /* msa extension 8 */ |
96 | 150, /* enhanced sort */ | ||
97 | 151, /* deflate conversion */ | ||
98 | 155, /* msa extension 9 */ | ||
96 | -1 /* END */ | 99 | -1 /* END */ |
97 | } | 100 | } |
98 | }, | 101 | }, |
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 12ec402f4114..546d13e436aa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -2384,7 +2384,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) | |||
2384 | */ | 2384 | */ |
2385 | if (__test_and_clear_bit(55, (unsigned long *)&status)) { | 2385 | if (__test_and_clear_bit(55, (unsigned long *)&status)) { |
2386 | handled++; | 2386 | handled++; |
2387 | intel_pt_interrupt(); | 2387 | if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && |
2388 | perf_guest_cbs->handle_intel_pt_intr)) | ||
2389 | perf_guest_cbs->handle_intel_pt_intr(); | ||
2390 | else | ||
2391 | intel_pt_interrupt(); | ||
2388 | } | 2392 | } |
2389 | 2393 | ||
2390 | /* | 2394 | /* |
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h index 62be73b23d5c..e8f58ddd06d9 100644 --- a/arch/x86/include/asm/e820/api.h +++ b/arch/x86/include/asm/e820/api.h | |||
@@ -10,6 +10,7 @@ extern struct e820_table *e820_table_firmware; | |||
10 | 10 | ||
11 | extern unsigned long pci_mem_start; | 11 | extern unsigned long pci_mem_start; |
12 | 12 | ||
13 | extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type); | ||
13 | extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); | 14 | extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type); |
14 | extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); | 15 | extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); |
15 | 16 | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c79abe7ca093..450d69a1e6fa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -470,6 +470,7 @@ struct kvm_pmu { | |||
470 | u64 global_ovf_ctrl; | 470 | u64 global_ovf_ctrl; |
471 | u64 counter_bitmask[2]; | 471 | u64 counter_bitmask[2]; |
472 | u64 global_ctrl_mask; | 472 | u64 global_ctrl_mask; |
473 | u64 global_ovf_ctrl_mask; | ||
473 | u64 reserved_bits; | 474 | u64 reserved_bits; |
474 | u8 version; | 475 | u8 version; |
475 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | 476 | struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; |
@@ -781,6 +782,9 @@ struct kvm_vcpu_arch { | |||
781 | 782 | ||
782 | /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ | 783 | /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ |
783 | bool l1tf_flush_l1d; | 784 | bool l1tf_flush_l1d; |
785 | |||
786 | /* AMD MSRC001_0015 Hardware Configuration */ | ||
787 | u64 msr_hwcr; | ||
784 | }; | 788 | }; |
785 | 789 | ||
786 | struct kvm_lpage_info { | 790 | struct kvm_lpage_info { |
@@ -1168,7 +1172,8 @@ struct kvm_x86_ops { | |||
1168 | uint32_t guest_irq, bool set); | 1172 | uint32_t guest_irq, bool set); |
1169 | void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); | 1173 | void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); |
1170 | 1174 | ||
1171 | int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc); | 1175 | int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, |
1176 | bool *expired); | ||
1172 | void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); | 1177 | void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); |
1173 | 1178 | ||
1174 | void (*setup_mce)(struct kvm_vcpu *vcpu); | 1179 | void (*setup_mce)(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 88dd202c8b00..979ef971cc78 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -789,6 +789,14 @@ | |||
789 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f | 789 | #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f |
790 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 | 790 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 |
791 | 791 | ||
792 | /* PERF_GLOBAL_OVF_CTL bits */ | ||
793 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 | ||
794 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) | ||
795 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62 | ||
796 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT) | ||
797 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63 | ||
798 | #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT) | ||
799 | |||
792 | /* Geode defined MSRs */ | 800 | /* Geode defined MSRs */ |
793 | #define MSR_GEODE_BUSCONT_CONF0 0x00001900 | 801 | #define MSR_GEODE_BUSCONT_CONF0 0x00001900 |
794 | 802 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 2879e234e193..76dd605ee2a3 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -73,12 +73,13 @@ EXPORT_SYMBOL(pci_mem_start); | |||
73 | * This function checks if any part of the range <start,end> is mapped | 73 | * This function checks if any part of the range <start,end> is mapped |
74 | * with type. | 74 | * with type. |
75 | */ | 75 | */ |
76 | bool e820__mapped_any(u64 start, u64 end, enum e820_type type) | 76 | static bool _e820__mapped_any(struct e820_table *table, |
77 | u64 start, u64 end, enum e820_type type) | ||
77 | { | 78 | { |
78 | int i; | 79 | int i; |
79 | 80 | ||
80 | for (i = 0; i < e820_table->nr_entries; i++) { | 81 | for (i = 0; i < table->nr_entries; i++) { |
81 | struct e820_entry *entry = &e820_table->entries[i]; | 82 | struct e820_entry *entry = &table->entries[i]; |
82 | 83 | ||
83 | if (type && entry->type != type) | 84 | if (type && entry->type != type) |
84 | continue; | 85 | continue; |
@@ -88,6 +89,17 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type) | |||
88 | } | 89 | } |
89 | return 0; | 90 | return 0; |
90 | } | 91 | } |
92 | |||
93 | bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) | ||
94 | { | ||
95 | return _e820__mapped_any(e820_table_firmware, start, end, type); | ||
96 | } | ||
97 | EXPORT_SYMBOL_GPL(e820__mapped_raw_any); | ||
98 | |||
99 | bool e820__mapped_any(u64 start, u64 end, enum e820_type type) | ||
100 | { | ||
101 | return _e820__mapped_any(e820_table, start, end, type); | ||
102 | } | ||
91 | EXPORT_SYMBOL_GPL(e820__mapped_any); | 103 | EXPORT_SYMBOL_GPL(e820__mapped_any); |
92 | 104 | ||
93 | /* | 105 | /* |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index bbbe611f0c49..80a642a0143d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -963,13 +963,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
963 | if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) | 963 | if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) |
964 | return 1; | 964 | return 1; |
965 | 965 | ||
966 | eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 966 | eax = kvm_rax_read(vcpu); |
967 | ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 967 | ecx = kvm_rcx_read(vcpu); |
968 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); | 968 | kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true); |
969 | kvm_register_write(vcpu, VCPU_REGS_RAX, eax); | 969 | kvm_rax_write(vcpu, eax); |
970 | kvm_register_write(vcpu, VCPU_REGS_RBX, ebx); | 970 | kvm_rbx_write(vcpu, ebx); |
971 | kvm_register_write(vcpu, VCPU_REGS_RCX, ecx); | 971 | kvm_rcx_write(vcpu, ecx); |
972 | kvm_register_write(vcpu, VCPU_REGS_RDX, edx); | 972 | kvm_rdx_write(vcpu, edx); |
973 | return kvm_skip_emulated_instruction(vcpu); | 973 | return kvm_skip_emulated_instruction(vcpu); |
974 | } | 974 | } |
975 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 975 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index cc24b3a32c44..8ca4b39918e0 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1535,10 +1535,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) | |||
1535 | 1535 | ||
1536 | longmode = is_64_bit_mode(vcpu); | 1536 | longmode = is_64_bit_mode(vcpu); |
1537 | if (longmode) | 1537 | if (longmode) |
1538 | kvm_register_write(vcpu, VCPU_REGS_RAX, result); | 1538 | kvm_rax_write(vcpu, result); |
1539 | else { | 1539 | else { |
1540 | kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32); | 1540 | kvm_rdx_write(vcpu, result >> 32); |
1541 | kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff); | 1541 | kvm_rax_write(vcpu, result & 0xffffffff); |
1542 | } | 1542 | } |
1543 | } | 1543 | } |
1544 | 1544 | ||
@@ -1611,18 +1611,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1611 | longmode = is_64_bit_mode(vcpu); | 1611 | longmode = is_64_bit_mode(vcpu); |
1612 | 1612 | ||
1613 | if (!longmode) { | 1613 | if (!longmode) { |
1614 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | 1614 | param = ((u64)kvm_rdx_read(vcpu) << 32) | |
1615 | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); | 1615 | (kvm_rax_read(vcpu) & 0xffffffff); |
1616 | ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | | 1616 | ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | |
1617 | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); | 1617 | (kvm_rcx_read(vcpu) & 0xffffffff); |
1618 | outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | | 1618 | outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | |
1619 | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); | 1619 | (kvm_rsi_read(vcpu) & 0xffffffff); |
1620 | } | 1620 | } |
1621 | #ifdef CONFIG_X86_64 | 1621 | #ifdef CONFIG_X86_64 |
1622 | else { | 1622 | else { |
1623 | param = kvm_register_read(vcpu, VCPU_REGS_RCX); | 1623 | param = kvm_rcx_read(vcpu); |
1624 | ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); | 1624 | ingpa = kvm_rdx_read(vcpu); |
1625 | outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); | 1625 | outgpa = kvm_r8_read(vcpu); |
1626 | } | 1626 | } |
1627 | #endif | 1627 | #endif |
1628 | 1628 | ||
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f8f56a93358b..1cc6c47dc77e 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -9,6 +9,34 @@ | |||
9 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | 9 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ |
10 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) | 10 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE) |
11 | 11 | ||
12 | #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ | ||
13 | static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ | ||
14 | { \ | ||
15 | return vcpu->arch.regs[VCPU_REGS_##uname]; \ | ||
16 | } \ | ||
17 | static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ | ||
18 | unsigned long val) \ | ||
19 | { \ | ||
20 | vcpu->arch.regs[VCPU_REGS_##uname] = val; \ | ||
21 | } | ||
22 | BUILD_KVM_GPR_ACCESSORS(rax, RAX) | ||
23 | BUILD_KVM_GPR_ACCESSORS(rbx, RBX) | ||
24 | BUILD_KVM_GPR_ACCESSORS(rcx, RCX) | ||
25 | BUILD_KVM_GPR_ACCESSORS(rdx, RDX) | ||
26 | BUILD_KVM_GPR_ACCESSORS(rbp, RBP) | ||
27 | BUILD_KVM_GPR_ACCESSORS(rsi, RSI) | ||
28 | BUILD_KVM_GPR_ACCESSORS(rdi, RDI) | ||
29 | #ifdef CONFIG_X86_64 | ||
30 | BUILD_KVM_GPR_ACCESSORS(r8, R8) | ||
31 | BUILD_KVM_GPR_ACCESSORS(r9, R9) | ||
32 | BUILD_KVM_GPR_ACCESSORS(r10, R10) | ||
33 | BUILD_KVM_GPR_ACCESSORS(r11, R11) | ||
34 | BUILD_KVM_GPR_ACCESSORS(r12, R12) | ||
35 | BUILD_KVM_GPR_ACCESSORS(r13, R13) | ||
36 | BUILD_KVM_GPR_ACCESSORS(r14, R14) | ||
37 | BUILD_KVM_GPR_ACCESSORS(r15, R15) | ||
38 | #endif | ||
39 | |||
12 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 40 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
13 | enum kvm_reg reg) | 41 | enum kvm_reg reg) |
14 | { | 42 | { |
@@ -37,6 +65,16 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | |||
37 | kvm_register_write(vcpu, VCPU_REGS_RIP, val); | 65 | kvm_register_write(vcpu, VCPU_REGS_RIP, val); |
38 | } | 66 | } |
39 | 67 | ||
68 | static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) | ||
69 | { | ||
70 | return kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
71 | } | ||
72 | |||
73 | static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) | ||
74 | { | ||
75 | kvm_register_write(vcpu, VCPU_REGS_RSP, val); | ||
76 | } | ||
77 | |||
40 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | 78 | static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) |
41 | { | 79 | { |
42 | might_sleep(); /* on svm */ | 80 | might_sleep(); /* on svm */ |
@@ -83,8 +121,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | |||
83 | 121 | ||
84 | static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | 122 | static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) |
85 | { | 123 | { |
86 | return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u) | 124 | return (kvm_rax_read(vcpu) & -1u) |
87 | | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | 125 | | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32); |
88 | } | 126 | } |
89 | 127 | ||
90 | static inline void enter_guest_mode(struct kvm_vcpu *vcpu) | 128 | static inline void enter_guest_mode(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bd13fdddbdc4..4924f83ed4f3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1454,7 +1454,7 @@ static void apic_timer_expired(struct kvm_lapic *apic) | |||
1454 | if (swait_active(q)) | 1454 | if (swait_active(q)) |
1455 | swake_up_one(q); | 1455 | swake_up_one(q); |
1456 | 1456 | ||
1457 | if (apic_lvtt_tscdeadline(apic)) | 1457 | if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) |
1458 | ktimer->expired_tscdeadline = ktimer->tscdeadline; | 1458 | ktimer->expired_tscdeadline = ktimer->tscdeadline; |
1459 | } | 1459 | } |
1460 | 1460 | ||
@@ -1696,37 +1696,42 @@ static void cancel_hv_timer(struct kvm_lapic *apic) | |||
1696 | static bool start_hv_timer(struct kvm_lapic *apic) | 1696 | static bool start_hv_timer(struct kvm_lapic *apic) |
1697 | { | 1697 | { |
1698 | struct kvm_timer *ktimer = &apic->lapic_timer; | 1698 | struct kvm_timer *ktimer = &apic->lapic_timer; |
1699 | int r; | 1699 | struct kvm_vcpu *vcpu = apic->vcpu; |
1700 | bool expired; | ||
1700 | 1701 | ||
1701 | WARN_ON(preemptible()); | 1702 | WARN_ON(preemptible()); |
1702 | if (!kvm_x86_ops->set_hv_timer) | 1703 | if (!kvm_x86_ops->set_hv_timer) |
1703 | return false; | 1704 | return false; |
1704 | 1705 | ||
1705 | if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending)) | ||
1706 | return false; | ||
1707 | |||
1708 | if (!ktimer->tscdeadline) | 1706 | if (!ktimer->tscdeadline) |
1709 | return false; | 1707 | return false; |
1710 | 1708 | ||
1711 | r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline); | 1709 | if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired)) |
1712 | if (r < 0) | ||
1713 | return false; | 1710 | return false; |
1714 | 1711 | ||
1715 | ktimer->hv_timer_in_use = true; | 1712 | ktimer->hv_timer_in_use = true; |
1716 | hrtimer_cancel(&ktimer->timer); | 1713 | hrtimer_cancel(&ktimer->timer); |
1717 | 1714 | ||
1718 | /* | 1715 | /* |
1719 | * Also recheck ktimer->pending, in case the sw timer triggered in | 1716 | * To simplify handling the periodic timer, leave the hv timer running |
1720 | * the window. For periodic timer, leave the hv timer running for | 1717 | * even if the deadline timer has expired, i.e. rely on the resulting |
1721 | * simplicity, and the deadline will be recomputed on the next vmexit. | 1718 | * VM-Exit to recompute the periodic timer's target expiration. |
1722 | */ | 1719 | */ |
1723 | if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) { | 1720 | if (!apic_lvtt_period(apic)) { |
1724 | if (r) | 1721 | /* |
1722 | * Cancel the hv timer if the sw timer fired while the hv timer | ||
1723 | * was being programmed, or if the hv timer itself expired. | ||
1724 | */ | ||
1725 | if (atomic_read(&ktimer->pending)) { | ||
1726 | cancel_hv_timer(apic); | ||
1727 | } else if (expired) { | ||
1725 | apic_timer_expired(apic); | 1728 | apic_timer_expired(apic); |
1726 | return false; | 1729 | cancel_hv_timer(apic); |
1730 | } | ||
1727 | } | 1731 | } |
1728 | 1732 | ||
1729 | trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true); | 1733 | trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use); |
1734 | |||
1730 | return true; | 1735 | return true; |
1731 | } | 1736 | } |
1732 | 1737 | ||
@@ -1750,8 +1755,13 @@ static void start_sw_timer(struct kvm_lapic *apic) | |||
1750 | static void restart_apic_timer(struct kvm_lapic *apic) | 1755 | static void restart_apic_timer(struct kvm_lapic *apic) |
1751 | { | 1756 | { |
1752 | preempt_disable(); | 1757 | preempt_disable(); |
1758 | |||
1759 | if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending)) | ||
1760 | goto out; | ||
1761 | |||
1753 | if (!start_hv_timer(apic)) | 1762 | if (!start_hv_timer(apic)) |
1754 | start_sw_timer(apic); | 1763 | start_sw_timer(apic); |
1764 | out: | ||
1755 | preempt_enable(); | 1765 | preempt_enable(); |
1756 | } | 1766 | } |
1757 | 1767 | ||
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d9c7b45d231f..1e9ba81accba 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <asm/page.h> | 44 | #include <asm/page.h> |
45 | #include <asm/pat.h> | 45 | #include <asm/pat.h> |
46 | #include <asm/cmpxchg.h> | 46 | #include <asm/cmpxchg.h> |
47 | #include <asm/e820/api.h> | ||
47 | #include <asm/io.h> | 48 | #include <asm/io.h> |
48 | #include <asm/vmx.h> | 49 | #include <asm/vmx.h> |
49 | #include <asm/kvm_page_track.h> | 50 | #include <asm/kvm_page_track.h> |
@@ -487,16 +488,24 @@ static void kvm_mmu_reset_all_pte_masks(void) | |||
487 | * If the CPU has 46 or less physical address bits, then set an | 488 | * If the CPU has 46 or less physical address bits, then set an |
488 | * appropriate mask to guard against L1TF attacks. Otherwise, it is | 489 | * appropriate mask to guard against L1TF attacks. Otherwise, it is |
489 | * assumed that the CPU is not vulnerable to L1TF. | 490 | * assumed that the CPU is not vulnerable to L1TF. |
491 | * | ||
492 | * Some Intel CPUs address the L1 cache using more PA bits than are | ||
493 | * reported by CPUID. Use the PA width of the L1 cache when possible | ||
494 | * to achieve more effective mitigation, e.g. if system RAM overlaps | ||
495 | * the most significant bits of legal physical address space. | ||
490 | */ | 496 | */ |
491 | low_phys_bits = boot_cpu_data.x86_phys_bits; | 497 | shadow_nonpresent_or_rsvd_mask = 0; |
492 | if (boot_cpu_data.x86_phys_bits < | 498 | low_phys_bits = boot_cpu_data.x86_cache_bits; |
499 | if (boot_cpu_data.x86_cache_bits < | ||
493 | 52 - shadow_nonpresent_or_rsvd_mask_len) { | 500 | 52 - shadow_nonpresent_or_rsvd_mask_len) { |
494 | shadow_nonpresent_or_rsvd_mask = | 501 | shadow_nonpresent_or_rsvd_mask = |
495 | rsvd_bits(boot_cpu_data.x86_phys_bits - | 502 | rsvd_bits(boot_cpu_data.x86_cache_bits - |
496 | shadow_nonpresent_or_rsvd_mask_len, | 503 | shadow_nonpresent_or_rsvd_mask_len, |
497 | boot_cpu_data.x86_phys_bits - 1); | 504 | boot_cpu_data.x86_cache_bits - 1); |
498 | low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; | 505 | low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; |
499 | } | 506 | } else |
507 | WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); | ||
508 | |||
500 | shadow_nonpresent_or_rsvd_lower_gfn_mask = | 509 | shadow_nonpresent_or_rsvd_lower_gfn_mask = |
501 | GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); | 510 | GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); |
502 | } | 511 | } |
@@ -2892,7 +2901,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn) | |||
2892 | */ | 2901 | */ |
2893 | (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn)); | 2902 | (!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn)); |
2894 | 2903 | ||
2895 | return true; | 2904 | return !e820__mapped_raw_any(pfn_to_hpa(pfn), |
2905 | pfn_to_hpa(pfn + 1) - 1, | ||
2906 | E820_TYPE_RAM); | ||
2896 | } | 2907 | } |
2897 | 2908 | ||
2898 | /* Bits which may be returned by set_spte() */ | 2909 | /* Bits which may be returned by set_spte() */ |
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index e9ea2d45ae66..9f72cc427158 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c | |||
@@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr) | |||
48 | return false; | 48 | return false; |
49 | } | 49 | } |
50 | 50 | ||
51 | static bool valid_pat_type(unsigned t) | ||
52 | { | ||
53 | return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */ | ||
54 | } | ||
55 | |||
56 | static bool valid_mtrr_type(unsigned t) | 51 | static bool valid_mtrr_type(unsigned t) |
57 | { | 52 | { |
58 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ | 53 | return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */ |
@@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
67 | return false; | 62 | return false; |
68 | 63 | ||
69 | if (msr == MSR_IA32_CR_PAT) { | 64 | if (msr == MSR_IA32_CR_PAT) { |
70 | for (i = 0; i < 8; i++) | 65 | return kvm_pat_valid(data); |
71 | if (!valid_pat_type((data >> (i * 8)) & 0xff)) | ||
72 | return false; | ||
73 | return true; | ||
74 | } else if (msr == MSR_MTRRdefType) { | 66 | } else if (msr == MSR_MTRRdefType) { |
75 | if (data & ~0xcff) | 67 | if (data & ~0xcff) |
76 | return false; | 68 | return false; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 08715034e315..367a47df4ba0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, | |||
141 | struct page *page; | 141 | struct page *page; |
142 | 142 | ||
143 | npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); | 143 | npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); |
144 | /* Check if the user is doing something meaningless. */ | 144 | if (likely(npages == 1)) { |
145 | if (unlikely(npages != 1)) | 145 | table = kmap_atomic(page); |
146 | return -EFAULT; | 146 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
147 | 147 | kunmap_atomic(table); | |
148 | table = kmap_atomic(page); | 148 | |
149 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 149 | kvm_release_page_dirty(page); |
150 | kunmap_atomic(table); | 150 | } else { |
151 | 151 | struct vm_area_struct *vma; | |
152 | kvm_release_page_dirty(page); | 152 | unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; |
153 | unsigned long pfn; | ||
154 | unsigned long paddr; | ||
155 | |||
156 | down_read(¤t->mm->mmap_sem); | ||
157 | vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); | ||
158 | if (!vma || !(vma->vm_flags & VM_PFNMAP)) { | ||
159 | up_read(¤t->mm->mmap_sem); | ||
160 | return -EFAULT; | ||
161 | } | ||
162 | pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | ||
163 | paddr = pfn << PAGE_SHIFT; | ||
164 | table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); | ||
165 | if (!table) { | ||
166 | up_read(¤t->mm->mmap_sem); | ||
167 | return -EFAULT; | ||
168 | } | ||
169 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | ||
170 | memunmap(table); | ||
171 | up_read(¤t->mm->mmap_sem); | ||
172 | } | ||
153 | 173 | ||
154 | return (ret != orig_pte); | 174 | return (ret != orig_pte); |
155 | } | 175 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6b92eaf4a3b1..a849dcb7fbc5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -2091,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
2091 | init_vmcb(svm); | 2091 | init_vmcb(svm); |
2092 | 2092 | ||
2093 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); | 2093 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true); |
2094 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 2094 | kvm_rdx_write(vcpu, eax); |
2095 | 2095 | ||
2096 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) | 2096 | if (kvm_vcpu_apicv_active(vcpu) && !init_event) |
2097 | avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); | 2097 | avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); |
@@ -3071,32 +3071,6 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm) | |||
3071 | return false; | 3071 | return false; |
3072 | } | 3072 | } |
3073 | 3073 | ||
3074 | static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) | ||
3075 | { | ||
3076 | struct page *page; | ||
3077 | |||
3078 | might_sleep(); | ||
3079 | |||
3080 | page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT); | ||
3081 | if (is_error_page(page)) | ||
3082 | goto error; | ||
3083 | |||
3084 | *_page = page; | ||
3085 | |||
3086 | return kmap(page); | ||
3087 | |||
3088 | error: | ||
3089 | kvm_inject_gp(&svm->vcpu, 0); | ||
3090 | |||
3091 | return NULL; | ||
3092 | } | ||
3093 | |||
3094 | static void nested_svm_unmap(struct page *page) | ||
3095 | { | ||
3096 | kunmap(page); | ||
3097 | kvm_release_page_dirty(page); | ||
3098 | } | ||
3099 | |||
3100 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) | 3074 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
3101 | { | 3075 | { |
3102 | unsigned port, size, iopm_len; | 3076 | unsigned port, size, iopm_len; |
@@ -3299,10 +3273,11 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr | |||
3299 | 3273 | ||
3300 | static int nested_svm_vmexit(struct vcpu_svm *svm) | 3274 | static int nested_svm_vmexit(struct vcpu_svm *svm) |
3301 | { | 3275 | { |
3276 | int rc; | ||
3302 | struct vmcb *nested_vmcb; | 3277 | struct vmcb *nested_vmcb; |
3303 | struct vmcb *hsave = svm->nested.hsave; | 3278 | struct vmcb *hsave = svm->nested.hsave; |
3304 | struct vmcb *vmcb = svm->vmcb; | 3279 | struct vmcb *vmcb = svm->vmcb; |
3305 | struct page *page; | 3280 | struct kvm_host_map map; |
3306 | 3281 | ||
3307 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | 3282 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, |
3308 | vmcb->control.exit_info_1, | 3283 | vmcb->control.exit_info_1, |
@@ -3311,9 +3286,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
3311 | vmcb->control.exit_int_info_err, | 3286 | vmcb->control.exit_int_info_err, |
3312 | KVM_ISA_SVM); | 3287 | KVM_ISA_SVM); |
3313 | 3288 | ||
3314 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); | 3289 | rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map); |
3315 | if (!nested_vmcb) | 3290 | if (rc) { |
3291 | if (rc == -EINVAL) | ||
3292 | kvm_inject_gp(&svm->vcpu, 0); | ||
3316 | return 1; | 3293 | return 1; |
3294 | } | ||
3295 | |||
3296 | nested_vmcb = map.hva; | ||
3317 | 3297 | ||
3318 | /* Exit Guest-Mode */ | 3298 | /* Exit Guest-Mode */ |
3319 | leave_guest_mode(&svm->vcpu); | 3299 | leave_guest_mode(&svm->vcpu); |
@@ -3408,16 +3388,16 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
3408 | } else { | 3388 | } else { |
3409 | (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); | 3389 | (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); |
3410 | } | 3390 | } |
3411 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); | 3391 | kvm_rax_write(&svm->vcpu, hsave->save.rax); |
3412 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); | 3392 | kvm_rsp_write(&svm->vcpu, hsave->save.rsp); |
3413 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); | 3393 | kvm_rip_write(&svm->vcpu, hsave->save.rip); |
3414 | svm->vmcb->save.dr7 = 0; | 3394 | svm->vmcb->save.dr7 = 0; |
3415 | svm->vmcb->save.cpl = 0; | 3395 | svm->vmcb->save.cpl = 0; |
3416 | svm->vmcb->control.exit_int_info = 0; | 3396 | svm->vmcb->control.exit_int_info = 0; |
3417 | 3397 | ||
3418 | mark_all_dirty(svm->vmcb); | 3398 | mark_all_dirty(svm->vmcb); |
3419 | 3399 | ||
3420 | nested_svm_unmap(page); | 3400 | kvm_vcpu_unmap(&svm->vcpu, &map, true); |
3421 | 3401 | ||
3422 | nested_svm_uninit_mmu_context(&svm->vcpu); | 3402 | nested_svm_uninit_mmu_context(&svm->vcpu); |
3423 | kvm_mmu_reset_context(&svm->vcpu); | 3403 | kvm_mmu_reset_context(&svm->vcpu); |
@@ -3483,7 +3463,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb) | |||
3483 | } | 3463 | } |
3484 | 3464 | ||
3485 | static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, | 3465 | static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, |
3486 | struct vmcb *nested_vmcb, struct page *page) | 3466 | struct vmcb *nested_vmcb, struct kvm_host_map *map) |
3487 | { | 3467 | { |
3488 | if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) | 3468 | if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) |
3489 | svm->vcpu.arch.hflags |= HF_HIF_MASK; | 3469 | svm->vcpu.arch.hflags |= HF_HIF_MASK; |
@@ -3516,9 +3496,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, | |||
3516 | kvm_mmu_reset_context(&svm->vcpu); | 3496 | kvm_mmu_reset_context(&svm->vcpu); |
3517 | 3497 | ||
3518 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; | 3498 | svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; |
3519 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); | 3499 | kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); |
3520 | kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); | 3500 | kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp); |
3521 | kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); | 3501 | kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip); |
3522 | 3502 | ||
3523 | /* In case we don't even reach vcpu_run, the fields are not updated */ | 3503 | /* In case we don't even reach vcpu_run, the fields are not updated */ |
3524 | svm->vmcb->save.rax = nested_vmcb->save.rax; | 3504 | svm->vmcb->save.rax = nested_vmcb->save.rax; |
@@ -3567,7 +3547,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, | |||
3567 | svm->vmcb->control.pause_filter_thresh = | 3547 | svm->vmcb->control.pause_filter_thresh = |
3568 | nested_vmcb->control.pause_filter_thresh; | 3548 | nested_vmcb->control.pause_filter_thresh; |
3569 | 3549 | ||
3570 | nested_svm_unmap(page); | 3550 | kvm_vcpu_unmap(&svm->vcpu, map, true); |
3571 | 3551 | ||
3572 | /* Enter Guest-Mode */ | 3552 | /* Enter Guest-Mode */ |
3573 | enter_guest_mode(&svm->vcpu); | 3553 | enter_guest_mode(&svm->vcpu); |
@@ -3587,17 +3567,23 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, | |||
3587 | 3567 | ||
3588 | static bool nested_svm_vmrun(struct vcpu_svm *svm) | 3568 | static bool nested_svm_vmrun(struct vcpu_svm *svm) |
3589 | { | 3569 | { |
3570 | int rc; | ||
3590 | struct vmcb *nested_vmcb; | 3571 | struct vmcb *nested_vmcb; |
3591 | struct vmcb *hsave = svm->nested.hsave; | 3572 | struct vmcb *hsave = svm->nested.hsave; |
3592 | struct vmcb *vmcb = svm->vmcb; | 3573 | struct vmcb *vmcb = svm->vmcb; |
3593 | struct page *page; | 3574 | struct kvm_host_map map; |
3594 | u64 vmcb_gpa; | 3575 | u64 vmcb_gpa; |
3595 | 3576 | ||
3596 | vmcb_gpa = svm->vmcb->save.rax; | 3577 | vmcb_gpa = svm->vmcb->save.rax; |
3597 | 3578 | ||
3598 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 3579 | rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map); |
3599 | if (!nested_vmcb) | 3580 | if (rc) { |
3581 | if (rc == -EINVAL) | ||
3582 | kvm_inject_gp(&svm->vcpu, 0); | ||
3600 | return false; | 3583 | return false; |
3584 | } | ||
3585 | |||
3586 | nested_vmcb = map.hva; | ||
3601 | 3587 | ||
3602 | if (!nested_vmcb_checks(nested_vmcb)) { | 3588 | if (!nested_vmcb_checks(nested_vmcb)) { |
3603 | nested_vmcb->control.exit_code = SVM_EXIT_ERR; | 3589 | nested_vmcb->control.exit_code = SVM_EXIT_ERR; |
@@ -3605,7 +3591,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
3605 | nested_vmcb->control.exit_info_1 = 0; | 3591 | nested_vmcb->control.exit_info_1 = 0; |
3606 | nested_vmcb->control.exit_info_2 = 0; | 3592 | nested_vmcb->control.exit_info_2 = 0; |
3607 | 3593 | ||
3608 | nested_svm_unmap(page); | 3594 | kvm_vcpu_unmap(&svm->vcpu, &map, true); |
3609 | 3595 | ||
3610 | return false; | 3596 | return false; |
3611 | } | 3597 | } |
@@ -3649,7 +3635,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
3649 | 3635 | ||
3650 | copy_vmcb_control_area(hsave, vmcb); | 3636 | copy_vmcb_control_area(hsave, vmcb); |
3651 | 3637 | ||
3652 | enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page); | 3638 | enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); |
3653 | 3639 | ||
3654 | return true; | 3640 | return true; |
3655 | } | 3641 | } |
@@ -3673,21 +3659,26 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
3673 | static int vmload_interception(struct vcpu_svm *svm) | 3659 | static int vmload_interception(struct vcpu_svm *svm) |
3674 | { | 3660 | { |
3675 | struct vmcb *nested_vmcb; | 3661 | struct vmcb *nested_vmcb; |
3676 | struct page *page; | 3662 | struct kvm_host_map map; |
3677 | int ret; | 3663 | int ret; |
3678 | 3664 | ||
3679 | if (nested_svm_check_permissions(svm)) | 3665 | if (nested_svm_check_permissions(svm)) |
3680 | return 1; | 3666 | return 1; |
3681 | 3667 | ||
3682 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 3668 | ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map); |
3683 | if (!nested_vmcb) | 3669 | if (ret) { |
3670 | if (ret == -EINVAL) | ||
3671 | kvm_inject_gp(&svm->vcpu, 0); | ||
3684 | return 1; | 3672 | return 1; |
3673 | } | ||
3674 | |||
3675 | nested_vmcb = map.hva; | ||
3685 | 3676 | ||
3686 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3677 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
3687 | ret = kvm_skip_emulated_instruction(&svm->vcpu); | 3678 | ret = kvm_skip_emulated_instruction(&svm->vcpu); |
3688 | 3679 | ||
3689 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); | 3680 | nested_svm_vmloadsave(nested_vmcb, svm->vmcb); |
3690 | nested_svm_unmap(page); | 3681 | kvm_vcpu_unmap(&svm->vcpu, &map, true); |
3691 | 3682 | ||
3692 | return ret; | 3683 | return ret; |
3693 | } | 3684 | } |
@@ -3695,21 +3686,26 @@ static int vmload_interception(struct vcpu_svm *svm) | |||
3695 | static int vmsave_interception(struct vcpu_svm *svm) | 3686 | static int vmsave_interception(struct vcpu_svm *svm) |
3696 | { | 3687 | { |
3697 | struct vmcb *nested_vmcb; | 3688 | struct vmcb *nested_vmcb; |
3698 | struct page *page; | 3689 | struct kvm_host_map map; |
3699 | int ret; | 3690 | int ret; |
3700 | 3691 | ||
3701 | if (nested_svm_check_permissions(svm)) | 3692 | if (nested_svm_check_permissions(svm)) |
3702 | return 1; | 3693 | return 1; |
3703 | 3694 | ||
3704 | nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); | 3695 | ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map); |
3705 | if (!nested_vmcb) | 3696 | if (ret) { |
3697 | if (ret == -EINVAL) | ||
3698 | kvm_inject_gp(&svm->vcpu, 0); | ||
3706 | return 1; | 3699 | return 1; |
3700 | } | ||
3701 | |||
3702 | nested_vmcb = map.hva; | ||
3707 | 3703 | ||
3708 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3704 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
3709 | ret = kvm_skip_emulated_instruction(&svm->vcpu); | 3705 | ret = kvm_skip_emulated_instruction(&svm->vcpu); |
3710 | 3706 | ||
3711 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); | 3707 | nested_svm_vmloadsave(svm->vmcb, nested_vmcb); |
3712 | nested_svm_unmap(page); | 3708 | kvm_vcpu_unmap(&svm->vcpu, &map, true); |
3713 | 3709 | ||
3714 | return ret; | 3710 | return ret; |
3715 | } | 3711 | } |
@@ -3791,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm) | |||
3791 | { | 3787 | { |
3792 | struct kvm_vcpu *vcpu = &svm->vcpu; | 3788 | struct kvm_vcpu *vcpu = &svm->vcpu; |
3793 | 3789 | ||
3794 | trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), | 3790 | trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu), |
3795 | kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); | 3791 | kvm_rax_read(&svm->vcpu)); |
3796 | 3792 | ||
3797 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 3793 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
3798 | kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); | 3794 | kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu)); |
3799 | 3795 | ||
3800 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3796 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
3801 | return kvm_skip_emulated_instruction(&svm->vcpu); | 3797 | return kvm_skip_emulated_instruction(&svm->vcpu); |
@@ -3803,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm) | |||
3803 | 3799 | ||
3804 | static int skinit_interception(struct vcpu_svm *svm) | 3800 | static int skinit_interception(struct vcpu_svm *svm) |
3805 | { | 3801 | { |
3806 | trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); | 3802 | trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu)); |
3807 | 3803 | ||
3808 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 3804 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
3809 | return 1; | 3805 | return 1; |
@@ -3817,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm) | |||
3817 | static int xsetbv_interception(struct vcpu_svm *svm) | 3813 | static int xsetbv_interception(struct vcpu_svm *svm) |
3818 | { | 3814 | { |
3819 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); | 3815 | u64 new_bv = kvm_read_edx_eax(&svm->vcpu); |
3820 | u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | 3816 | u32 index = kvm_rcx_read(&svm->vcpu); |
3821 | 3817 | ||
3822 | if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { | 3818 | if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { |
3823 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 3819 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
@@ -4213,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
4213 | 4209 | ||
4214 | static int rdmsr_interception(struct vcpu_svm *svm) | 4210 | static int rdmsr_interception(struct vcpu_svm *svm) |
4215 | { | 4211 | { |
4216 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | 4212 | u32 ecx = kvm_rcx_read(&svm->vcpu); |
4217 | struct msr_data msr_info; | 4213 | struct msr_data msr_info; |
4218 | 4214 | ||
4219 | msr_info.index = ecx; | 4215 | msr_info.index = ecx; |
@@ -4225,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
4225 | } else { | 4221 | } else { |
4226 | trace_kvm_msr_read(ecx, msr_info.data); | 4222 | trace_kvm_msr_read(ecx, msr_info.data); |
4227 | 4223 | ||
4228 | kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, | 4224 | kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff); |
4229 | msr_info.data & 0xffffffff); | 4225 | kvm_rdx_write(&svm->vcpu, msr_info.data >> 32); |
4230 | kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, | ||
4231 | msr_info.data >> 32); | ||
4232 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 4226 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
4233 | return kvm_skip_emulated_instruction(&svm->vcpu); | 4227 | return kvm_skip_emulated_instruction(&svm->vcpu); |
4234 | } | 4228 | } |
@@ -4422,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
4422 | static int wrmsr_interception(struct vcpu_svm *svm) | 4416 | static int wrmsr_interception(struct vcpu_svm *svm) |
4423 | { | 4417 | { |
4424 | struct msr_data msr; | 4418 | struct msr_data msr; |
4425 | u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); | 4419 | u32 ecx = kvm_rcx_read(&svm->vcpu); |
4426 | u64 data = kvm_read_edx_eax(&svm->vcpu); | 4420 | u64 data = kvm_read_edx_eax(&svm->vcpu); |
4427 | 4421 | ||
4428 | msr.data = data; | 4422 | msr.data = data; |
@@ -6236,7 +6230,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) | |||
6236 | { | 6230 | { |
6237 | struct vcpu_svm *svm = to_svm(vcpu); | 6231 | struct vcpu_svm *svm = to_svm(vcpu); |
6238 | struct vmcb *nested_vmcb; | 6232 | struct vmcb *nested_vmcb; |
6239 | struct page *page; | 6233 | struct kvm_host_map map; |
6240 | u64 guest; | 6234 | u64 guest; |
6241 | u64 vmcb; | 6235 | u64 vmcb; |
6242 | 6236 | ||
@@ -6244,10 +6238,10 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) | |||
6244 | vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); | 6238 | vmcb = GET_SMSTATE(u64, smstate, 0x7ee0); |
6245 | 6239 | ||
6246 | if (guest) { | 6240 | if (guest) { |
6247 | nested_vmcb = nested_svm_map(svm, vmcb, &page); | 6241 | if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) |
6248 | if (!nested_vmcb) | ||
6249 | return 1; | 6242 | return 1; |
6250 | enter_svm_guest_mode(svm, vmcb, nested_vmcb, page); | 6243 | nested_vmcb = map.hva; |
6244 | enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map); | ||
6251 | } | 6245 | } |
6252 | return 0; | 6246 | return 0; |
6253 | } | 6247 | } |
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 854e144131c6..d6664ee3d127 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h | |||
@@ -2,6 +2,8 @@ | |||
2 | #ifndef __KVM_X86_VMX_CAPS_H | 2 | #ifndef __KVM_X86_VMX_CAPS_H |
3 | #define __KVM_X86_VMX_CAPS_H | 3 | #define __KVM_X86_VMX_CAPS_H |
4 | 4 | ||
5 | #include <asm/vmx.h> | ||
6 | |||
5 | #include "lapic.h" | 7 | #include "lapic.h" |
6 | 8 | ||
7 | extern bool __read_mostly enable_vpid; | 9 | extern bool __read_mostly enable_vpid; |
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c601d079cd2..f1a69117ac0f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c | |||
@@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) | |||
193 | if (!vmx->nested.hv_evmcs) | 193 | if (!vmx->nested.hv_evmcs) |
194 | return; | 194 | return; |
195 | 195 | ||
196 | kunmap(vmx->nested.hv_evmcs_page); | 196 | kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); |
197 | kvm_release_page_dirty(vmx->nested.hv_evmcs_page); | ||
198 | vmx->nested.hv_evmcs_vmptr = -1ull; | 197 | vmx->nested.hv_evmcs_vmptr = -1ull; |
199 | vmx->nested.hv_evmcs_page = NULL; | ||
200 | vmx->nested.hv_evmcs = NULL; | 198 | vmx->nested.hv_evmcs = NULL; |
201 | } | 199 | } |
202 | 200 | ||
@@ -229,16 +227,9 @@ static void free_nested(struct kvm_vcpu *vcpu) | |||
229 | kvm_release_page_dirty(vmx->nested.apic_access_page); | 227 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
230 | vmx->nested.apic_access_page = NULL; | 228 | vmx->nested.apic_access_page = NULL; |
231 | } | 229 | } |
232 | if (vmx->nested.virtual_apic_page) { | 230 | kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); |
233 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); | 231 | kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); |
234 | vmx->nested.virtual_apic_page = NULL; | 232 | vmx->nested.pi_desc = NULL; |
235 | } | ||
236 | if (vmx->nested.pi_desc_page) { | ||
237 | kunmap(vmx->nested.pi_desc_page); | ||
238 | kvm_release_page_dirty(vmx->nested.pi_desc_page); | ||
239 | vmx->nested.pi_desc_page = NULL; | ||
240 | vmx->nested.pi_desc = NULL; | ||
241 | } | ||
242 | 233 | ||
243 | kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); | 234 | kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); |
244 | 235 | ||
@@ -519,39 +510,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | |||
519 | struct vmcs12 *vmcs12) | 510 | struct vmcs12 *vmcs12) |
520 | { | 511 | { |
521 | int msr; | 512 | int msr; |
522 | struct page *page; | ||
523 | unsigned long *msr_bitmap_l1; | 513 | unsigned long *msr_bitmap_l1; |
524 | unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; | 514 | unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; |
525 | /* | 515 | struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; |
526 | * pred_cmd & spec_ctrl are trying to verify two things: | ||
527 | * | ||
528 | * 1. L0 gave a permission to L1 to actually passthrough the MSR. This | ||
529 | * ensures that we do not accidentally generate an L02 MSR bitmap | ||
530 | * from the L12 MSR bitmap that is too permissive. | ||
531 | * 2. That L1 or L2s have actually used the MSR. This avoids | ||
532 | * unnecessarily merging of the bitmap if the MSR is unused. This | ||
533 | * works properly because we only update the L01 MSR bitmap lazily. | ||
534 | * So even if L0 should pass L1 these MSRs, the L01 bitmap is only | ||
535 | * updated to reflect this when L1 (or its L2s) actually write to | ||
536 | * the MSR. | ||
537 | */ | ||
538 | bool pred_cmd = !msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); | ||
539 | bool spec_ctrl = !msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); | ||
540 | 516 | ||
541 | /* Nothing to do if the MSR bitmap is not in use. */ | 517 | /* Nothing to do if the MSR bitmap is not in use. */ |
542 | if (!cpu_has_vmx_msr_bitmap() || | 518 | if (!cpu_has_vmx_msr_bitmap() || |
543 | !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) | 519 | !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
544 | return false; | 520 | return false; |
545 | 521 | ||
546 | if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && | 522 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) |
547 | !pred_cmd && !spec_ctrl) | ||
548 | return false; | ||
549 | |||
550 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); | ||
551 | if (is_error_page(page)) | ||
552 | return false; | 523 | return false; |
553 | 524 | ||
554 | msr_bitmap_l1 = (unsigned long *)kmap(page); | 525 | msr_bitmap_l1 = (unsigned long *)map->hva; |
555 | 526 | ||
556 | /* | 527 | /* |
557 | * To keep the control flow simple, pay eight 8-byte writes (sixteen | 528 | * To keep the control flow simple, pay eight 8-byte writes (sixteen |
@@ -592,20 +563,42 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | |||
592 | } | 563 | } |
593 | } | 564 | } |
594 | 565 | ||
595 | if (spec_ctrl) | 566 | /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ |
567 | nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, | ||
568 | MSR_FS_BASE, MSR_TYPE_RW); | ||
569 | |||
570 | nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, | ||
571 | MSR_GS_BASE, MSR_TYPE_RW); | ||
572 | |||
573 | nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, | ||
574 | MSR_KERNEL_GS_BASE, MSR_TYPE_RW); | ||
575 | |||
576 | /* | ||
577 | * Checking the L0->L1 bitmap is trying to verify two things: | ||
578 | * | ||
579 | * 1. L0 gave a permission to L1 to actually passthrough the MSR. This | ||
580 | * ensures that we do not accidentally generate an L02 MSR bitmap | ||
581 | * from the L12 MSR bitmap that is too permissive. | ||
582 | * 2. That L1 or L2s have actually used the MSR. This avoids | ||
583 | * unnecessarily merging of the bitmap if the MSR is unused. This | ||
584 | * works properly because we only update the L01 MSR bitmap lazily. | ||
585 | * So even if L0 should pass L1 these MSRs, the L01 bitmap is only | ||
586 | * updated to reflect this when L1 (or its L2s) actually write to | ||
587 | * the MSR. | ||
588 | */ | ||
589 | if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) | ||
596 | nested_vmx_disable_intercept_for_msr( | 590 | nested_vmx_disable_intercept_for_msr( |
597 | msr_bitmap_l1, msr_bitmap_l0, | 591 | msr_bitmap_l1, msr_bitmap_l0, |
598 | MSR_IA32_SPEC_CTRL, | 592 | MSR_IA32_SPEC_CTRL, |
599 | MSR_TYPE_R | MSR_TYPE_W); | 593 | MSR_TYPE_R | MSR_TYPE_W); |
600 | 594 | ||
601 | if (pred_cmd) | 595 | if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) |
602 | nested_vmx_disable_intercept_for_msr( | 596 | nested_vmx_disable_intercept_for_msr( |
603 | msr_bitmap_l1, msr_bitmap_l0, | 597 | msr_bitmap_l1, msr_bitmap_l0, |
604 | MSR_IA32_PRED_CMD, | 598 | MSR_IA32_PRED_CMD, |
605 | MSR_TYPE_W); | 599 | MSR_TYPE_W); |
606 | 600 | ||
607 | kunmap(page); | 601 | kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); |
608 | kvm_release_page_clean(page); | ||
609 | 602 | ||
610 | return true; | 603 | return true; |
611 | } | 604 | } |
@@ -613,20 +606,20 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, | |||
613 | static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, | 606 | static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, |
614 | struct vmcs12 *vmcs12) | 607 | struct vmcs12 *vmcs12) |
615 | { | 608 | { |
609 | struct kvm_host_map map; | ||
616 | struct vmcs12 *shadow; | 610 | struct vmcs12 *shadow; |
617 | struct page *page; | ||
618 | 611 | ||
619 | if (!nested_cpu_has_shadow_vmcs(vmcs12) || | 612 | if (!nested_cpu_has_shadow_vmcs(vmcs12) || |
620 | vmcs12->vmcs_link_pointer == -1ull) | 613 | vmcs12->vmcs_link_pointer == -1ull) |
621 | return; | 614 | return; |
622 | 615 | ||
623 | shadow = get_shadow_vmcs12(vcpu); | 616 | shadow = get_shadow_vmcs12(vcpu); |
624 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); | ||
625 | 617 | ||
626 | memcpy(shadow, kmap(page), VMCS12_SIZE); | 618 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) |
619 | return; | ||
627 | 620 | ||
628 | kunmap(page); | 621 | memcpy(shadow, map.hva, VMCS12_SIZE); |
629 | kvm_release_page_clean(page); | 622 | kvm_vcpu_unmap(vcpu, &map, false); |
630 | } | 623 | } |
631 | 624 | ||
632 | static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, | 625 | static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, |
@@ -930,7 +923,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | |||
930 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { | 923 | if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { |
931 | if (!nested_cr3_valid(vcpu, cr3)) { | 924 | if (!nested_cr3_valid(vcpu, cr3)) { |
932 | *entry_failure_code = ENTRY_FAIL_DEFAULT; | 925 | *entry_failure_code = ENTRY_FAIL_DEFAULT; |
933 | return 1; | 926 | return -EINVAL; |
934 | } | 927 | } |
935 | 928 | ||
936 | /* | 929 | /* |
@@ -941,7 +934,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne | |||
941 | !nested_ept) { | 934 | !nested_ept) { |
942 | if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { | 935 | if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { |
943 | *entry_failure_code = ENTRY_FAIL_PDPTE; | 936 | *entry_failure_code = ENTRY_FAIL_PDPTE; |
944 | return 1; | 937 | return -EINVAL; |
945 | } | 938 | } |
946 | } | 939 | } |
947 | } | 940 | } |
@@ -1794,13 +1787,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, | |||
1794 | 1787 | ||
1795 | nested_release_evmcs(vcpu); | 1788 | nested_release_evmcs(vcpu); |
1796 | 1789 | ||
1797 | vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page( | 1790 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs), |
1798 | vcpu, assist_page.current_nested_vmcs); | 1791 | &vmx->nested.hv_evmcs_map)) |
1799 | |||
1800 | if (unlikely(is_error_page(vmx->nested.hv_evmcs_page))) | ||
1801 | return 0; | 1792 | return 0; |
1802 | 1793 | ||
1803 | vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); | 1794 | vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva; |
1804 | 1795 | ||
1805 | /* | 1796 | /* |
1806 | * Currently, KVM only supports eVMCS version 1 | 1797 | * Currently, KVM only supports eVMCS version 1 |
@@ -2373,19 +2364,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
2373 | */ | 2364 | */ |
2374 | if (vmx->emulation_required) { | 2365 | if (vmx->emulation_required) { |
2375 | *entry_failure_code = ENTRY_FAIL_DEFAULT; | 2366 | *entry_failure_code = ENTRY_FAIL_DEFAULT; |
2376 | return 1; | 2367 | return -EINVAL; |
2377 | } | 2368 | } |
2378 | 2369 | ||
2379 | /* Shadow page tables on either EPT or shadow page tables. */ | 2370 | /* Shadow page tables on either EPT or shadow page tables. */ |
2380 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), | 2371 | if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), |
2381 | entry_failure_code)) | 2372 | entry_failure_code)) |
2382 | return 1; | 2373 | return -EINVAL; |
2383 | 2374 | ||
2384 | if (!enable_ept) | 2375 | if (!enable_ept) |
2385 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; | 2376 | vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; |
2386 | 2377 | ||
2387 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp); | 2378 | kvm_rsp_write(vcpu, vmcs12->guest_rsp); |
2388 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip); | 2379 | kvm_rip_write(vcpu, vmcs12->guest_rip); |
2389 | return 0; | 2380 | return 0; |
2390 | } | 2381 | } |
2391 | 2382 | ||
@@ -2589,11 +2580,19 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, | |||
2589 | return 0; | 2580 | return 0; |
2590 | } | 2581 | } |
2591 | 2582 | ||
2592 | /* | 2583 | static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, |
2593 | * Checks related to Host Control Registers and MSRs | 2584 | struct vmcs12 *vmcs12) |
2594 | */ | 2585 | { |
2595 | static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, | 2586 | if (nested_check_vm_execution_controls(vcpu, vmcs12) || |
2596 | struct vmcs12 *vmcs12) | 2587 | nested_check_vm_exit_controls(vcpu, vmcs12) || |
2588 | nested_check_vm_entry_controls(vcpu, vmcs12)) | ||
2589 | return -EINVAL; | ||
2590 | |||
2591 | return 0; | ||
2592 | } | ||
2593 | |||
2594 | static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, | ||
2595 | struct vmcs12 *vmcs12) | ||
2597 | { | 2596 | { |
2598 | bool ia32e; | 2597 | bool ia32e; |
2599 | 2598 | ||
@@ -2606,6 +2605,10 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, | |||
2606 | is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) | 2605 | is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) |
2607 | return -EINVAL; | 2606 | return -EINVAL; |
2608 | 2607 | ||
2608 | if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && | ||
2609 | !kvm_pat_valid(vmcs12->host_ia32_pat)) | ||
2610 | return -EINVAL; | ||
2611 | |||
2609 | /* | 2612 | /* |
2610 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | 2613 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the |
2611 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | 2614 | * IA32_EFER MSR must be 0 in the field for that register. In addition, |
@@ -2624,41 +2627,12 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, | |||
2624 | return 0; | 2627 | return 0; |
2625 | } | 2628 | } |
2626 | 2629 | ||
2627 | /* | ||
2628 | * Checks related to Guest Non-register State | ||
2629 | */ | ||
2630 | static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) | ||
2631 | { | ||
2632 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && | ||
2633 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) | ||
2634 | return -EINVAL; | ||
2635 | |||
2636 | return 0; | ||
2637 | } | ||
2638 | |||
2639 | static int nested_vmx_check_vmentry_prereqs(struct kvm_vcpu *vcpu, | ||
2640 | struct vmcs12 *vmcs12) | ||
2641 | { | ||
2642 | if (nested_check_vm_execution_controls(vcpu, vmcs12) || | ||
2643 | nested_check_vm_exit_controls(vcpu, vmcs12) || | ||
2644 | nested_check_vm_entry_controls(vcpu, vmcs12)) | ||
2645 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
2646 | |||
2647 | if (nested_check_host_control_regs(vcpu, vmcs12)) | ||
2648 | return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; | ||
2649 | |||
2650 | if (nested_check_guest_non_reg_state(vmcs12)) | ||
2651 | return VMXERR_ENTRY_INVALID_CONTROL_FIELD; | ||
2652 | |||
2653 | return 0; | ||
2654 | } | ||
2655 | |||
2656 | static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, | 2630 | static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, |
2657 | struct vmcs12 *vmcs12) | 2631 | struct vmcs12 *vmcs12) |
2658 | { | 2632 | { |
2659 | int r; | 2633 | int r = 0; |
2660 | struct page *page; | ||
2661 | struct vmcs12 *shadow; | 2634 | struct vmcs12 *shadow; |
2635 | struct kvm_host_map map; | ||
2662 | 2636 | ||
2663 | if (vmcs12->vmcs_link_pointer == -1ull) | 2637 | if (vmcs12->vmcs_link_pointer == -1ull) |
2664 | return 0; | 2638 | return 0; |
@@ -2666,23 +2640,34 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, | |||
2666 | if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) | 2640 | if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) |
2667 | return -EINVAL; | 2641 | return -EINVAL; |
2668 | 2642 | ||
2669 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->vmcs_link_pointer); | 2643 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) |
2670 | if (is_error_page(page)) | ||
2671 | return -EINVAL; | 2644 | return -EINVAL; |
2672 | 2645 | ||
2673 | r = 0; | 2646 | shadow = map.hva; |
2674 | shadow = kmap(page); | 2647 | |
2675 | if (shadow->hdr.revision_id != VMCS12_REVISION || | 2648 | if (shadow->hdr.revision_id != VMCS12_REVISION || |
2676 | shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) | 2649 | shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) |
2677 | r = -EINVAL; | 2650 | r = -EINVAL; |
2678 | kunmap(page); | 2651 | |
2679 | kvm_release_page_clean(page); | 2652 | kvm_vcpu_unmap(vcpu, &map, false); |
2680 | return r; | 2653 | return r; |
2681 | } | 2654 | } |
2682 | 2655 | ||
2683 | static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, | 2656 | /* |
2684 | struct vmcs12 *vmcs12, | 2657 | * Checks related to Guest Non-register State |
2685 | u32 *exit_qual) | 2658 | */ |
2659 | static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) | ||
2660 | { | ||
2661 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && | ||
2662 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) | ||
2663 | return -EINVAL; | ||
2664 | |||
2665 | return 0; | ||
2666 | } | ||
2667 | |||
2668 | static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, | ||
2669 | struct vmcs12 *vmcs12, | ||
2670 | u32 *exit_qual) | ||
2686 | { | 2671 | { |
2687 | bool ia32e; | 2672 | bool ia32e; |
2688 | 2673 | ||
@@ -2690,11 +2675,15 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, | |||
2690 | 2675 | ||
2691 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || | 2676 | if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || |
2692 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) | 2677 | !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) |
2693 | return 1; | 2678 | return -EINVAL; |
2679 | |||
2680 | if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && | ||
2681 | !kvm_pat_valid(vmcs12->guest_ia32_pat)) | ||
2682 | return -EINVAL; | ||
2694 | 2683 | ||
2695 | if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { | 2684 | if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { |
2696 | *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; | 2685 | *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; |
2697 | return 1; | 2686 | return -EINVAL; |
2698 | } | 2687 | } |
2699 | 2688 | ||
2700 | /* | 2689 | /* |
@@ -2713,13 +2702,16 @@ static int nested_vmx_check_vmentry_postreqs(struct kvm_vcpu *vcpu, | |||
2713 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | 2702 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || |
2714 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | 2703 | ((vmcs12->guest_cr0 & X86_CR0_PG) && |
2715 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) | 2704 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) |
2716 | return 1; | 2705 | return -EINVAL; |
2717 | } | 2706 | } |
2718 | 2707 | ||
2719 | if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && | 2708 | if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && |
2720 | (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || | 2709 | (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || |
2721 | (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) | 2710 | (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) |
2722 | return 1; | 2711 | return -EINVAL; |
2712 | |||
2713 | if (nested_check_guest_non_reg_state(vmcs12)) | ||
2714 | return -EINVAL; | ||
2723 | 2715 | ||
2724 | return 0; | 2716 | return 0; |
2725 | } | 2717 | } |
@@ -2832,6 +2824,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2832 | { | 2824 | { |
2833 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 2825 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2826 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2827 | struct kvm_host_map *map; | ||
2835 | struct page *page; | 2828 | struct page *page; |
2836 | u64 hpa; | 2829 | u64 hpa; |
2837 | 2830 | ||
@@ -2864,20 +2857,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2864 | } | 2857 | } |
2865 | 2858 | ||
2866 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | 2859 | if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
2867 | if (vmx->nested.virtual_apic_page) { /* shouldn't happen */ | 2860 | map = &vmx->nested.virtual_apic_map; |
2868 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); | ||
2869 | vmx->nested.virtual_apic_page = NULL; | ||
2870 | } | ||
2871 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->virtual_apic_page_addr); | ||
2872 | 2861 | ||
2873 | /* | 2862 | /* |
2874 | * If translation failed, VM entry will fail because | 2863 | * If translation failed, VM entry will fail because |
2875 | * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. | 2864 | * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. |
2876 | */ | 2865 | */ |
2877 | if (!is_error_page(page)) { | 2866 | if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { |
2878 | vmx->nested.virtual_apic_page = page; | 2867 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); |
2879 | hpa = page_to_phys(vmx->nested.virtual_apic_page); | ||
2880 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); | ||
2881 | } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && | 2868 | } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && |
2882 | nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && | 2869 | nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) && |
2883 | !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { | 2870 | !nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { |
@@ -2898,26 +2885,15 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) | |||
2898 | } | 2885 | } |
2899 | 2886 | ||
2900 | if (nested_cpu_has_posted_intr(vmcs12)) { | 2887 | if (nested_cpu_has_posted_intr(vmcs12)) { |
2901 | if (vmx->nested.pi_desc_page) { /* shouldn't happen */ | 2888 | map = &vmx->nested.pi_desc_map; |
2902 | kunmap(vmx->nested.pi_desc_page); | 2889 | |
2903 | kvm_release_page_dirty(vmx->nested.pi_desc_page); | 2890 | if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) { |
2904 | vmx->nested.pi_desc_page = NULL; | 2891 | vmx->nested.pi_desc = |
2905 | vmx->nested.pi_desc = NULL; | 2892 | (struct pi_desc *)(((void *)map->hva) + |
2906 | vmcs_write64(POSTED_INTR_DESC_ADDR, -1ull); | 2893 | offset_in_page(vmcs12->posted_intr_desc_addr)); |
2894 | vmcs_write64(POSTED_INTR_DESC_ADDR, | ||
2895 | pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr)); | ||
2907 | } | 2896 | } |
2908 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->posted_intr_desc_addr); | ||
2909 | if (is_error_page(page)) | ||
2910 | return; | ||
2911 | vmx->nested.pi_desc_page = page; | ||
2912 | vmx->nested.pi_desc = kmap(vmx->nested.pi_desc_page); | ||
2913 | vmx->nested.pi_desc = | ||
2914 | (struct pi_desc *)((void *)vmx->nested.pi_desc + | ||
2915 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
2916 | (PAGE_SIZE - 1))); | ||
2917 | vmcs_write64(POSTED_INTR_DESC_ADDR, | ||
2918 | page_to_phys(vmx->nested.pi_desc_page) + | ||
2919 | (unsigned long)(vmcs12->posted_intr_desc_addr & | ||
2920 | (PAGE_SIZE - 1))); | ||
2921 | } | 2897 | } |
2922 | if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) | 2898 | if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) |
2923 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, | 2899 | vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, |
@@ -3000,7 +2976,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | |||
3000 | return -1; | 2976 | return -1; |
3001 | } | 2977 | } |
3002 | 2978 | ||
3003 | if (nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) | 2979 | if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) |
3004 | goto vmentry_fail_vmexit; | 2980 | goto vmentry_fail_vmexit; |
3005 | } | 2981 | } |
3006 | 2982 | ||
@@ -3145,9 +3121,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
3145 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS | 3121 | launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS |
3146 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); | 3122 | : VMXERR_VMRESUME_NONLAUNCHED_VMCS); |
3147 | 3123 | ||
3148 | ret = nested_vmx_check_vmentry_prereqs(vcpu, vmcs12); | 3124 | if (nested_vmx_check_controls(vcpu, vmcs12)) |
3149 | if (ret) | 3125 | return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
3150 | return nested_vmx_failValid(vcpu, ret); | 3126 | |
3127 | if (nested_vmx_check_host_state(vcpu, vmcs12)) | ||
3128 | return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); | ||
3151 | 3129 | ||
3152 | /* | 3130 | /* |
3153 | * We're finally done with prerequisite checking, and can start with | 3131 | * We're finally done with prerequisite checking, and can start with |
@@ -3310,11 +3288,12 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | |||
3310 | 3288 | ||
3311 | max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); | 3289 | max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); |
3312 | if (max_irr != 256) { | 3290 | if (max_irr != 256) { |
3313 | vapic_page = kmap(vmx->nested.virtual_apic_page); | 3291 | vapic_page = vmx->nested.virtual_apic_map.hva; |
3292 | if (!vapic_page) | ||
3293 | return; | ||
3294 | |||
3314 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, | 3295 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, |
3315 | vapic_page, &max_irr); | 3296 | vapic_page, &max_irr); |
3316 | kunmap(vmx->nested.virtual_apic_page); | ||
3317 | |||
3318 | status = vmcs_read16(GUEST_INTR_STATUS); | 3297 | status = vmcs_read16(GUEST_INTR_STATUS); |
3319 | if ((u8)max_irr > ((u8)status & 0xff)) { | 3298 | if ((u8)max_irr > ((u8)status & 0xff)) { |
3320 | status &= ~0xff; | 3299 | status &= ~0xff; |
@@ -3425,8 +3404,8 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
3425 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 3404 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
3426 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | 3405 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); |
3427 | 3406 | ||
3428 | vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | 3407 | vmcs12->guest_rsp = kvm_rsp_read(vcpu); |
3429 | vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); | 3408 | vmcs12->guest_rip = kvm_rip_read(vcpu); |
3430 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); | 3409 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); |
3431 | 3410 | ||
3432 | vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); | 3411 | vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); |
@@ -3609,8 +3588,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
3609 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 3588 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
3610 | vmx_set_efer(vcpu, vcpu->arch.efer); | 3589 | vmx_set_efer(vcpu, vcpu->arch.efer); |
3611 | 3590 | ||
3612 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); | 3591 | kvm_rsp_write(vcpu, vmcs12->host_rsp); |
3613 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); | 3592 | kvm_rip_write(vcpu, vmcs12->host_rip); |
3614 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); | 3593 | vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); |
3615 | vmx_set_interrupt_shadow(vcpu, 0); | 3594 | vmx_set_interrupt_shadow(vcpu, 0); |
3616 | 3595 | ||
@@ -3955,16 +3934,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
3955 | kvm_release_page_dirty(vmx->nested.apic_access_page); | 3934 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
3956 | vmx->nested.apic_access_page = NULL; | 3935 | vmx->nested.apic_access_page = NULL; |
3957 | } | 3936 | } |
3958 | if (vmx->nested.virtual_apic_page) { | 3937 | kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); |
3959 | kvm_release_page_dirty(vmx->nested.virtual_apic_page); | 3938 | kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); |
3960 | vmx->nested.virtual_apic_page = NULL; | 3939 | vmx->nested.pi_desc = NULL; |
3961 | } | ||
3962 | if (vmx->nested.pi_desc_page) { | ||
3963 | kunmap(vmx->nested.pi_desc_page); | ||
3964 | kvm_release_page_dirty(vmx->nested.pi_desc_page); | ||
3965 | vmx->nested.pi_desc_page = NULL; | ||
3966 | vmx->nested.pi_desc = NULL; | ||
3967 | } | ||
3968 | 3940 | ||
3969 | /* | 3941 | /* |
3970 | * We are now running in L2, mmu_notifier will force to reload the | 3942 | * We are now running in L2, mmu_notifier will force to reload the |
@@ -4260,7 +4232,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
4260 | { | 4232 | { |
4261 | int ret; | 4233 | int ret; |
4262 | gpa_t vmptr; | 4234 | gpa_t vmptr; |
4263 | struct page *page; | 4235 | uint32_t revision; |
4264 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4236 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4265 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | 4237 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED |
4266 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | 4238 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
@@ -4306,20 +4278,12 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
4306 | * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; | 4278 | * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; |
4307 | * which replaces physical address width with 32 | 4279 | * which replaces physical address width with 32 |
4308 | */ | 4280 | */ |
4309 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) | 4281 | if (!page_address_valid(vcpu, vmptr)) |
4310 | return nested_vmx_failInvalid(vcpu); | ||
4311 | |||
4312 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); | ||
4313 | if (is_error_page(page)) | ||
4314 | return nested_vmx_failInvalid(vcpu); | 4282 | return nested_vmx_failInvalid(vcpu); |
4315 | 4283 | ||
4316 | if (*(u32 *)kmap(page) != VMCS12_REVISION) { | 4284 | if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) || |
4317 | kunmap(page); | 4285 | revision != VMCS12_REVISION) |
4318 | kvm_release_page_clean(page); | ||
4319 | return nested_vmx_failInvalid(vcpu); | 4286 | return nested_vmx_failInvalid(vcpu); |
4320 | } | ||
4321 | kunmap(page); | ||
4322 | kvm_release_page_clean(page); | ||
4323 | 4287 | ||
4324 | vmx->nested.vmxon_ptr = vmptr; | 4288 | vmx->nested.vmxon_ptr = vmptr; |
4325 | ret = enter_vmx_operation(vcpu); | 4289 | ret = enter_vmx_operation(vcpu); |
@@ -4377,7 +4341,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
4377 | if (nested_vmx_get_vmptr(vcpu, &vmptr)) | 4341 | if (nested_vmx_get_vmptr(vcpu, &vmptr)) |
4378 | return 1; | 4342 | return 1; |
4379 | 4343 | ||
4380 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) | 4344 | if (!page_address_valid(vcpu, vmptr)) |
4381 | return nested_vmx_failValid(vcpu, | 4345 | return nested_vmx_failValid(vcpu, |
4382 | VMXERR_VMCLEAR_INVALID_ADDRESS); | 4346 | VMXERR_VMCLEAR_INVALID_ADDRESS); |
4383 | 4347 | ||
@@ -4385,7 +4349,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
4385 | return nested_vmx_failValid(vcpu, | 4349 | return nested_vmx_failValid(vcpu, |
4386 | VMXERR_VMCLEAR_VMXON_POINTER); | 4350 | VMXERR_VMCLEAR_VMXON_POINTER); |
4387 | 4351 | ||
4388 | if (vmx->nested.hv_evmcs_page) { | 4352 | if (vmx->nested.hv_evmcs_map.hva) { |
4389 | if (vmptr == vmx->nested.hv_evmcs_vmptr) | 4353 | if (vmptr == vmx->nested.hv_evmcs_vmptr) |
4390 | nested_release_evmcs(vcpu); | 4354 | nested_release_evmcs(vcpu); |
4391 | } else { | 4355 | } else { |
@@ -4584,7 +4548,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
4584 | if (nested_vmx_get_vmptr(vcpu, &vmptr)) | 4548 | if (nested_vmx_get_vmptr(vcpu, &vmptr)) |
4585 | return 1; | 4549 | return 1; |
4586 | 4550 | ||
4587 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) | 4551 | if (!page_address_valid(vcpu, vmptr)) |
4588 | return nested_vmx_failValid(vcpu, | 4552 | return nested_vmx_failValid(vcpu, |
4589 | VMXERR_VMPTRLD_INVALID_ADDRESS); | 4553 | VMXERR_VMPTRLD_INVALID_ADDRESS); |
4590 | 4554 | ||
@@ -4597,11 +4561,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
4597 | return 1; | 4561 | return 1; |
4598 | 4562 | ||
4599 | if (vmx->nested.current_vmptr != vmptr) { | 4563 | if (vmx->nested.current_vmptr != vmptr) { |
4564 | struct kvm_host_map map; | ||
4600 | struct vmcs12 *new_vmcs12; | 4565 | struct vmcs12 *new_vmcs12; |
4601 | struct page *page; | ||
4602 | 4566 | ||
4603 | page = kvm_vcpu_gpa_to_page(vcpu, vmptr); | 4567 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { |
4604 | if (is_error_page(page)) { | ||
4605 | /* | 4568 | /* |
4606 | * Reads from an unbacked page return all 1s, | 4569 | * Reads from an unbacked page return all 1s, |
4607 | * which means that the 32 bits located at the | 4570 | * which means that the 32 bits located at the |
@@ -4611,12 +4574,13 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
4611 | return nested_vmx_failValid(vcpu, | 4574 | return nested_vmx_failValid(vcpu, |
4612 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); | 4575 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); |
4613 | } | 4576 | } |
4614 | new_vmcs12 = kmap(page); | 4577 | |
4578 | new_vmcs12 = map.hva; | ||
4579 | |||
4615 | if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || | 4580 | if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || |
4616 | (new_vmcs12->hdr.shadow_vmcs && | 4581 | (new_vmcs12->hdr.shadow_vmcs && |
4617 | !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { | 4582 | !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { |
4618 | kunmap(page); | 4583 | kvm_vcpu_unmap(vcpu, &map, false); |
4619 | kvm_release_page_clean(page); | ||
4620 | return nested_vmx_failValid(vcpu, | 4584 | return nested_vmx_failValid(vcpu, |
4621 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); | 4585 | VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); |
4622 | } | 4586 | } |
@@ -4628,8 +4592,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
4628 | * cached. | 4592 | * cached. |
4629 | */ | 4593 | */ |
4630 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); | 4594 | memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); |
4631 | kunmap(page); | 4595 | kvm_vcpu_unmap(vcpu, &map, false); |
4632 | kvm_release_page_clean(page); | ||
4633 | 4596 | ||
4634 | set_current_vmptr(vmx, vmptr); | 4597 | set_current_vmptr(vmx, vmptr); |
4635 | } | 4598 | } |
@@ -4804,7 +4767,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) | |||
4804 | static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, | 4767 | static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, |
4805 | struct vmcs12 *vmcs12) | 4768 | struct vmcs12 *vmcs12) |
4806 | { | 4769 | { |
4807 | u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; | 4770 | u32 index = kvm_rcx_read(vcpu); |
4808 | u64 address; | 4771 | u64 address; |
4809 | bool accessed_dirty; | 4772 | bool accessed_dirty; |
4810 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; | 4773 | struct kvm_mmu *mmu = vcpu->arch.walk_mmu; |
@@ -4850,7 +4813,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) | |||
4850 | { | 4813 | { |
4851 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4814 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4852 | struct vmcs12 *vmcs12; | 4815 | struct vmcs12 *vmcs12; |
4853 | u32 function = vcpu->arch.regs[VCPU_REGS_RAX]; | 4816 | u32 function = kvm_rax_read(vcpu); |
4854 | 4817 | ||
4855 | /* | 4818 | /* |
4856 | * VMFUNC is only supported for nested guests, but we always enable the | 4819 | * VMFUNC is only supported for nested guests, but we always enable the |
@@ -4936,7 +4899,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
4936 | static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | 4899 | static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, |
4937 | struct vmcs12 *vmcs12, u32 exit_reason) | 4900 | struct vmcs12 *vmcs12, u32 exit_reason) |
4938 | { | 4901 | { |
4939 | u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; | 4902 | u32 msr_index = kvm_rcx_read(vcpu); |
4940 | gpa_t bitmap; | 4903 | gpa_t bitmap; |
4941 | 4904 | ||
4942 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) | 4905 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) |
@@ -5373,9 +5336,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5373 | if (kvm_state->format != 0) | 5336 | if (kvm_state->format != 0) |
5374 | return -EINVAL; | 5337 | return -EINVAL; |
5375 | 5338 | ||
5376 | if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) | ||
5377 | nested_enable_evmcs(vcpu, NULL); | ||
5378 | |||
5379 | if (!nested_vmx_allowed(vcpu)) | 5339 | if (!nested_vmx_allowed(vcpu)) |
5380 | return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; | 5340 | return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; |
5381 | 5341 | ||
@@ -5417,6 +5377,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5417 | if (kvm_state->vmx.vmxon_pa == -1ull) | 5377 | if (kvm_state->vmx.vmxon_pa == -1ull) |
5418 | return 0; | 5378 | return 0; |
5419 | 5379 | ||
5380 | if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) | ||
5381 | nested_enable_evmcs(vcpu, NULL); | ||
5382 | |||
5420 | vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; | 5383 | vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; |
5421 | ret = enter_vmx_operation(vcpu); | 5384 | ret = enter_vmx_operation(vcpu); |
5422 | if (ret) | 5385 | if (ret) |
@@ -5460,9 +5423,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5460 | if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) | 5423 | if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) |
5461 | return 0; | 5424 | return 0; |
5462 | 5425 | ||
5463 | vmx->nested.nested_run_pending = | ||
5464 | !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); | ||
5465 | |||
5466 | if (nested_cpu_has_shadow_vmcs(vmcs12) && | 5426 | if (nested_cpu_has_shadow_vmcs(vmcs12) && |
5467 | vmcs12->vmcs_link_pointer != -1ull) { | 5427 | vmcs12->vmcs_link_pointer != -1ull) { |
5468 | struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); | 5428 | struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); |
@@ -5480,14 +5440,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, | |||
5480 | return -EINVAL; | 5440 | return -EINVAL; |
5481 | } | 5441 | } |
5482 | 5442 | ||
5483 | if (nested_vmx_check_vmentry_prereqs(vcpu, vmcs12) || | 5443 | if (nested_vmx_check_controls(vcpu, vmcs12) || |
5484 | nested_vmx_check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) | 5444 | nested_vmx_check_host_state(vcpu, vmcs12) || |
5445 | nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) | ||
5485 | return -EINVAL; | 5446 | return -EINVAL; |
5486 | 5447 | ||
5487 | vmx->nested.dirty_vmcs12 = true; | 5448 | vmx->nested.dirty_vmcs12 = true; |
5449 | vmx->nested.nested_run_pending = | ||
5450 | !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); | ||
5451 | |||
5488 | ret = nested_vmx_enter_non_root_mode(vcpu, false); | 5452 | ret = nested_vmx_enter_non_root_mode(vcpu, false); |
5489 | if (ret) | 5453 | if (ret) { |
5454 | vmx->nested.nested_run_pending = 0; | ||
5490 | return -EINVAL; | 5455 | return -EINVAL; |
5456 | } | ||
5491 | 5457 | ||
5492 | return 0; | 5458 | return 0; |
5493 | } | 5459 | } |
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 5ab4a364348e..f8502c376b37 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c | |||
@@ -227,7 +227,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
227 | } | 227 | } |
228 | break; | 228 | break; |
229 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 229 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
230 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | 230 | if (!(data & pmu->global_ovf_ctrl_mask)) { |
231 | if (!msr_info->host_initiated) | 231 | if (!msr_info->host_initiated) |
232 | pmu->global_status &= ~data; | 232 | pmu->global_status &= ~data; |
233 | pmu->global_ovf_ctrl = data; | 233 | pmu->global_ovf_ctrl = data; |
@@ -297,6 +297,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) | |||
297 | pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | | 297 | pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | |
298 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | 298 | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); |
299 | pmu->global_ctrl_mask = ~pmu->global_ctrl; | 299 | pmu->global_ctrl_mask = ~pmu->global_ctrl; |
300 | pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask | ||
301 | & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | | ||
302 | MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); | ||
303 | if (kvm_x86_ops->pt_supported()) | ||
304 | pmu->global_ovf_ctrl_mask &= | ||
305 | ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; | ||
300 | 306 | ||
301 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); | 307 | entry = kvm_find_cpuid_entry(vcpu, 7, 0); |
302 | if (entry && | 308 | if (entry && |
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e1fa935a545f..1ac167614032 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c | |||
@@ -1692,6 +1692,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1692 | case MSR_IA32_SYSENTER_ESP: | 1692 | case MSR_IA32_SYSENTER_ESP: |
1693 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); | 1693 | msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); |
1694 | break; | 1694 | break; |
1695 | case MSR_IA32_POWER_CTL: | ||
1696 | msr_info->data = vmx->msr_ia32_power_ctl; | ||
1697 | break; | ||
1695 | case MSR_IA32_BNDCFGS: | 1698 | case MSR_IA32_BNDCFGS: |
1696 | if (!kvm_mpx_supported() || | 1699 | if (!kvm_mpx_supported() || |
1697 | (!msr_info->host_initiated && | 1700 | (!msr_info->host_initiated && |
@@ -1822,6 +1825,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1822 | case MSR_IA32_SYSENTER_ESP: | 1825 | case MSR_IA32_SYSENTER_ESP: |
1823 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 1826 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
1824 | break; | 1827 | break; |
1828 | case MSR_IA32_POWER_CTL: | ||
1829 | vmx->msr_ia32_power_ctl = data; | ||
1830 | break; | ||
1825 | case MSR_IA32_BNDCFGS: | 1831 | case MSR_IA32_BNDCFGS: |
1826 | if (!kvm_mpx_supported() || | 1832 | if (!kvm_mpx_supported() || |
1827 | (!msr_info->host_initiated && | 1833 | (!msr_info->host_initiated && |
@@ -1891,7 +1897,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
1891 | break; | 1897 | break; |
1892 | case MSR_IA32_CR_PAT: | 1898 | case MSR_IA32_CR_PAT: |
1893 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | 1899 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
1894 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) | 1900 | if (!kvm_pat_valid(data)) |
1895 | return 1; | 1901 | return 1; |
1896 | vmcs_write64(GUEST_IA32_PAT, data); | 1902 | vmcs_write64(GUEST_IA32_PAT, data); |
1897 | vcpu->arch.pat = data; | 1903 | vcpu->arch.pat = data; |
@@ -2288,7 +2294,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, | |||
2288 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2294 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2289 | #endif | 2295 | #endif |
2290 | opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | | 2296 | opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | |
2291 | VM_EXIT_SAVE_IA32_PAT | | ||
2292 | VM_EXIT_LOAD_IA32_PAT | | 2297 | VM_EXIT_LOAD_IA32_PAT | |
2293 | VM_EXIT_LOAD_IA32_EFER | | 2298 | VM_EXIT_LOAD_IA32_EFER | |
2294 | VM_EXIT_CLEAR_BNDCFGS | | 2299 | VM_EXIT_CLEAR_BNDCFGS | |
@@ -3619,14 +3624,13 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) | |||
3619 | 3624 | ||
3620 | if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || | 3625 | if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || |
3621 | !nested_cpu_has_vid(get_vmcs12(vcpu)) || | 3626 | !nested_cpu_has_vid(get_vmcs12(vcpu)) || |
3622 | WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) | 3627 | WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn)) |
3623 | return false; | 3628 | return false; |
3624 | 3629 | ||
3625 | rvi = vmx_get_rvi(); | 3630 | rvi = vmx_get_rvi(); |
3626 | 3631 | ||
3627 | vapic_page = kmap(vmx->nested.virtual_apic_page); | 3632 | vapic_page = vmx->nested.virtual_apic_map.hva; |
3628 | vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); | 3633 | vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); |
3629 | kunmap(vmx->nested.virtual_apic_page); | ||
3630 | 3634 | ||
3631 | return ((rvi & 0xf0) > (vppr & 0xf0)); | 3635 | return ((rvi & 0xf0) > (vppr & 0xf0)); |
3632 | } | 3636 | } |
@@ -4827,7 +4831,7 @@ static int handle_cpuid(struct kvm_vcpu *vcpu) | |||
4827 | 4831 | ||
4828 | static int handle_rdmsr(struct kvm_vcpu *vcpu) | 4832 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
4829 | { | 4833 | { |
4830 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 4834 | u32 ecx = kvm_rcx_read(vcpu); |
4831 | struct msr_data msr_info; | 4835 | struct msr_data msr_info; |
4832 | 4836 | ||
4833 | msr_info.index = ecx; | 4837 | msr_info.index = ecx; |
@@ -4840,18 +4844,16 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) | |||
4840 | 4844 | ||
4841 | trace_kvm_msr_read(ecx, msr_info.data); | 4845 | trace_kvm_msr_read(ecx, msr_info.data); |
4842 | 4846 | ||
4843 | /* FIXME: handling of bits 32:63 of rax, rdx */ | 4847 | kvm_rax_write(vcpu, msr_info.data & -1u); |
4844 | vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u; | 4848 | kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u); |
4845 | vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u; | ||
4846 | return kvm_skip_emulated_instruction(vcpu); | 4849 | return kvm_skip_emulated_instruction(vcpu); |
4847 | } | 4850 | } |
4848 | 4851 | ||
4849 | static int handle_wrmsr(struct kvm_vcpu *vcpu) | 4852 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
4850 | { | 4853 | { |
4851 | struct msr_data msr; | 4854 | struct msr_data msr; |
4852 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 4855 | u32 ecx = kvm_rcx_read(vcpu); |
4853 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 4856 | u64 data = kvm_read_edx_eax(vcpu); |
4854 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | ||
4855 | 4857 | ||
4856 | msr.data = data; | 4858 | msr.data = data; |
4857 | msr.index = ecx; | 4859 | msr.index = ecx; |
@@ -4922,7 +4924,7 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu) | |||
4922 | static int handle_xsetbv(struct kvm_vcpu *vcpu) | 4924 | static int handle_xsetbv(struct kvm_vcpu *vcpu) |
4923 | { | 4925 | { |
4924 | u64 new_bv = kvm_read_edx_eax(vcpu); | 4926 | u64 new_bv = kvm_read_edx_eax(vcpu); |
4925 | u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4927 | u32 index = kvm_rcx_read(vcpu); |
4926 | 4928 | ||
4927 | if (kvm_set_xcr(vcpu, index, new_bv) == 0) | 4929 | if (kvm_set_xcr(vcpu, index, new_bv) == 0) |
4928 | return kvm_skip_emulated_instruction(vcpu); | 4930 | return kvm_skip_emulated_instruction(vcpu); |
@@ -5723,8 +5725,16 @@ void dump_vmcs(void) | |||
5723 | if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) | 5725 | if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) |
5724 | pr_err("TSC Multiplier = 0x%016llx\n", | 5726 | pr_err("TSC Multiplier = 0x%016llx\n", |
5725 | vmcs_read64(TSC_MULTIPLIER)); | 5727 | vmcs_read64(TSC_MULTIPLIER)); |
5726 | if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) | 5728 | if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) { |
5727 | pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); | 5729 | if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { |
5730 | u16 status = vmcs_read16(GUEST_INTR_STATUS); | ||
5731 | pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff); | ||
5732 | } | ||
5733 | pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); | ||
5734 | if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) | ||
5735 | pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR)); | ||
5736 | pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR)); | ||
5737 | } | ||
5728 | if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) | 5738 | if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) |
5729 | pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); | 5739 | pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); |
5730 | if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) | 5740 | if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) |
@@ -6856,30 +6866,6 @@ static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) | |||
6856 | } | 6866 | } |
6857 | } | 6867 | } |
6858 | 6868 | ||
6859 | static bool guest_cpuid_has_pmu(struct kvm_vcpu *vcpu) | ||
6860 | { | ||
6861 | struct kvm_cpuid_entry2 *entry; | ||
6862 | union cpuid10_eax eax; | ||
6863 | |||
6864 | entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | ||
6865 | if (!entry) | ||
6866 | return false; | ||
6867 | |||
6868 | eax.full = entry->eax; | ||
6869 | return (eax.split.version_id > 0); | ||
6870 | } | ||
6871 | |||
6872 | static void nested_vmx_procbased_ctls_update(struct kvm_vcpu *vcpu) | ||
6873 | { | ||
6874 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6875 | bool pmu_enabled = guest_cpuid_has_pmu(vcpu); | ||
6876 | |||
6877 | if (pmu_enabled) | ||
6878 | vmx->nested.msrs.procbased_ctls_high |= CPU_BASED_RDPMC_EXITING; | ||
6879 | else | ||
6880 | vmx->nested.msrs.procbased_ctls_high &= ~CPU_BASED_RDPMC_EXITING; | ||
6881 | } | ||
6882 | |||
6883 | static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) | 6869 | static void update_intel_pt_cfg(struct kvm_vcpu *vcpu) |
6884 | { | 6870 | { |
6885 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6871 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -6968,7 +6954,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | |||
6968 | if (nested_vmx_allowed(vcpu)) { | 6954 | if (nested_vmx_allowed(vcpu)) { |
6969 | nested_vmx_cr_fixed1_bits_update(vcpu); | 6955 | nested_vmx_cr_fixed1_bits_update(vcpu); |
6970 | nested_vmx_entry_exit_ctls_update(vcpu); | 6956 | nested_vmx_entry_exit_ctls_update(vcpu); |
6971 | nested_vmx_procbased_ctls_update(vcpu); | ||
6972 | } | 6957 | } |
6973 | 6958 | ||
6974 | if (boot_cpu_has(X86_FEATURE_INTEL_PT) && | 6959 | if (boot_cpu_has(X86_FEATURE_INTEL_PT) && |
@@ -7028,7 +7013,8 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift, | |||
7028 | return 0; | 7013 | return 0; |
7029 | } | 7014 | } |
7030 | 7015 | ||
7031 | static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | 7016 | static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, |
7017 | bool *expired) | ||
7032 | { | 7018 | { |
7033 | struct vcpu_vmx *vmx; | 7019 | struct vcpu_vmx *vmx; |
7034 | u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; | 7020 | u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; |
@@ -7051,10 +7037,9 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | |||
7051 | 7037 | ||
7052 | /* Convert to host delta tsc if tsc scaling is enabled */ | 7038 | /* Convert to host delta tsc if tsc scaling is enabled */ |
7053 | if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && | 7039 | if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio && |
7054 | u64_shl_div_u64(delta_tsc, | 7040 | delta_tsc && u64_shl_div_u64(delta_tsc, |
7055 | kvm_tsc_scaling_ratio_frac_bits, | 7041 | kvm_tsc_scaling_ratio_frac_bits, |
7056 | vcpu->arch.tsc_scaling_ratio, | 7042 | vcpu->arch.tsc_scaling_ratio, &delta_tsc)) |
7057 | &delta_tsc)) | ||
7058 | return -ERANGE; | 7043 | return -ERANGE; |
7059 | 7044 | ||
7060 | /* | 7045 | /* |
@@ -7067,7 +7052,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) | |||
7067 | return -ERANGE; | 7052 | return -ERANGE; |
7068 | 7053 | ||
7069 | vmx->hv_deadline_tsc = tscl + delta_tsc; | 7054 | vmx->hv_deadline_tsc = tscl + delta_tsc; |
7070 | return delta_tsc == 0; | 7055 | *expired = !delta_tsc; |
7056 | return 0; | ||
7071 | } | 7057 | } |
7072 | 7058 | ||
7073 | static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) | 7059 | static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) |
@@ -7104,9 +7090,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) | |||
7104 | { | 7090 | { |
7105 | struct vmcs12 *vmcs12; | 7091 | struct vmcs12 *vmcs12; |
7106 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7092 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7107 | gpa_t gpa; | 7093 | gpa_t gpa, dst; |
7108 | struct page *page = NULL; | ||
7109 | u64 *pml_address; | ||
7110 | 7094 | ||
7111 | if (is_guest_mode(vcpu)) { | 7095 | if (is_guest_mode(vcpu)) { |
7112 | WARN_ON_ONCE(vmx->nested.pml_full); | 7096 | WARN_ON_ONCE(vmx->nested.pml_full); |
@@ -7126,15 +7110,13 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) | |||
7126 | } | 7110 | } |
7127 | 7111 | ||
7128 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; | 7112 | gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; |
7113 | dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; | ||
7129 | 7114 | ||
7130 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->pml_address); | 7115 | if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, |
7131 | if (is_error_page(page)) | 7116 | offset_in_page(dst), sizeof(gpa))) |
7132 | return 0; | 7117 | return 0; |
7133 | 7118 | ||
7134 | pml_address = kmap(page); | 7119 | vmcs12->guest_pml_index--; |
7135 | pml_address[vmcs12->guest_pml_index--] = gpa; | ||
7136 | kunmap(page); | ||
7137 | kvm_release_page_clean(page); | ||
7138 | } | 7120 | } |
7139 | 7121 | ||
7140 | return 0; | 7122 | return 0; |
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f879529906b4..63d37ccce3dc 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h | |||
@@ -142,8 +142,11 @@ struct nested_vmx { | |||
142 | * pointers, so we must keep them pinned while L2 runs. | 142 | * pointers, so we must keep them pinned while L2 runs. |
143 | */ | 143 | */ |
144 | struct page *apic_access_page; | 144 | struct page *apic_access_page; |
145 | struct page *virtual_apic_page; | 145 | struct kvm_host_map virtual_apic_map; |
146 | struct page *pi_desc_page; | 146 | struct kvm_host_map pi_desc_map; |
147 | |||
148 | struct kvm_host_map msr_bitmap_map; | ||
149 | |||
147 | struct pi_desc *pi_desc; | 150 | struct pi_desc *pi_desc; |
148 | bool pi_pending; | 151 | bool pi_pending; |
149 | u16 posted_intr_nv; | 152 | u16 posted_intr_nv; |
@@ -169,7 +172,7 @@ struct nested_vmx { | |||
169 | } smm; | 172 | } smm; |
170 | 173 | ||
171 | gpa_t hv_evmcs_vmptr; | 174 | gpa_t hv_evmcs_vmptr; |
172 | struct page *hv_evmcs_page; | 175 | struct kvm_host_map hv_evmcs_map; |
173 | struct hv_enlightened_vmcs *hv_evmcs; | 176 | struct hv_enlightened_vmcs *hv_evmcs; |
174 | }; | 177 | }; |
175 | 178 | ||
@@ -257,6 +260,8 @@ struct vcpu_vmx { | |||
257 | 260 | ||
258 | unsigned long host_debugctlmsr; | 261 | unsigned long host_debugctlmsr; |
259 | 262 | ||
263 | u64 msr_ia32_power_ctl; | ||
264 | |||
260 | /* | 265 | /* |
261 | * Only bits masked by msr_ia32_feature_control_valid_bits can be set in | 266 | * Only bits masked by msr_ia32_feature_control_valid_bits can be set in |
262 | * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included | 267 | * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9591abde62a..536b78c4af6e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1100,15 +1100,15 @@ EXPORT_SYMBOL_GPL(kvm_get_dr); | |||
1100 | 1100 | ||
1101 | bool kvm_rdpmc(struct kvm_vcpu *vcpu) | 1101 | bool kvm_rdpmc(struct kvm_vcpu *vcpu) |
1102 | { | 1102 | { |
1103 | u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 1103 | u32 ecx = kvm_rcx_read(vcpu); |
1104 | u64 data; | 1104 | u64 data; |
1105 | int err; | 1105 | int err; |
1106 | 1106 | ||
1107 | err = kvm_pmu_rdpmc(vcpu, ecx, &data); | 1107 | err = kvm_pmu_rdpmc(vcpu, ecx, &data); |
1108 | if (err) | 1108 | if (err) |
1109 | return err; | 1109 | return err; |
1110 | kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data); | 1110 | kvm_rax_write(vcpu, (u32)data); |
1111 | kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32); | 1111 | kvm_rdx_write(vcpu, data >> 32); |
1112 | return err; | 1112 | return err; |
1113 | } | 1113 | } |
1114 | EXPORT_SYMBOL_GPL(kvm_rdpmc); | 1114 | EXPORT_SYMBOL_GPL(kvm_rdpmc); |
@@ -1174,6 +1174,9 @@ static u32 emulated_msrs[] = { | |||
1174 | MSR_PLATFORM_INFO, | 1174 | MSR_PLATFORM_INFO, |
1175 | MSR_MISC_FEATURES_ENABLES, | 1175 | MSR_MISC_FEATURES_ENABLES, |
1176 | MSR_AMD64_VIRT_SPEC_CTRL, | 1176 | MSR_AMD64_VIRT_SPEC_CTRL, |
1177 | MSR_IA32_POWER_CTL, | ||
1178 | |||
1179 | MSR_K7_HWCR, | ||
1177 | }; | 1180 | }; |
1178 | 1181 | ||
1179 | static unsigned num_emulated_msrs; | 1182 | static unsigned num_emulated_msrs; |
@@ -1262,31 +1265,49 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) | |||
1262 | return 0; | 1265 | return 0; |
1263 | } | 1266 | } |
1264 | 1267 | ||
1265 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) | 1268 | static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) |
1266 | { | 1269 | { |
1267 | if (efer & efer_reserved_bits) | ||
1268 | return false; | ||
1269 | |||
1270 | if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) | 1270 | if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) |
1271 | return false; | 1271 | return false; |
1272 | 1272 | ||
1273 | if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) | 1273 | if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) |
1274 | return false; | 1274 | return false; |
1275 | |||
1276 | if (efer & (EFER_LME | EFER_LMA) && | ||
1277 | !guest_cpuid_has(vcpu, X86_FEATURE_LM)) | ||
1278 | return false; | ||
1279 | |||
1280 | if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX)) | ||
1281 | return false; | ||
1275 | 1282 | ||
1276 | return true; | 1283 | return true; |
1284 | |||
1285 | } | ||
1286 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) | ||
1287 | { | ||
1288 | if (efer & efer_reserved_bits) | ||
1289 | return false; | ||
1290 | |||
1291 | return __kvm_valid_efer(vcpu, efer); | ||
1277 | } | 1292 | } |
1278 | EXPORT_SYMBOL_GPL(kvm_valid_efer); | 1293 | EXPORT_SYMBOL_GPL(kvm_valid_efer); |
1279 | 1294 | ||
1280 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1295 | static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
1281 | { | 1296 | { |
1282 | u64 old_efer = vcpu->arch.efer; | 1297 | u64 old_efer = vcpu->arch.efer; |
1298 | u64 efer = msr_info->data; | ||
1283 | 1299 | ||
1284 | if (!kvm_valid_efer(vcpu, efer)) | 1300 | if (efer & efer_reserved_bits) |
1285 | return 1; | 1301 | return false; |
1286 | 1302 | ||
1287 | if (is_paging(vcpu) | 1303 | if (!msr_info->host_initiated) { |
1288 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | 1304 | if (!__kvm_valid_efer(vcpu, efer)) |
1289 | return 1; | 1305 | return 1; |
1306 | |||
1307 | if (is_paging(vcpu) && | ||
1308 | (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
1309 | return 1; | ||
1310 | } | ||
1290 | 1311 | ||
1291 | efer &= ~EFER_LMA; | 1312 | efer &= ~EFER_LMA; |
1292 | efer |= vcpu->arch.efer & EFER_LMA; | 1313 | efer |= vcpu->arch.efer & EFER_LMA; |
@@ -2279,6 +2300,18 @@ static void kvmclock_sync_fn(struct work_struct *work) | |||
2279 | KVMCLOCK_SYNC_PERIOD); | 2300 | KVMCLOCK_SYNC_PERIOD); |
2280 | } | 2301 | } |
2281 | 2302 | ||
2303 | /* | ||
2304 | * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP. | ||
2305 | */ | ||
2306 | static bool can_set_mci_status(struct kvm_vcpu *vcpu) | ||
2307 | { | ||
2308 | /* McStatusWrEn enabled? */ | ||
2309 | if (guest_cpuid_is_amd(vcpu)) | ||
2310 | return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); | ||
2311 | |||
2312 | return false; | ||
2313 | } | ||
2314 | |||
2282 | static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | 2315 | static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2283 | { | 2316 | { |
2284 | u64 mcg_cap = vcpu->arch.mcg_cap; | 2317 | u64 mcg_cap = vcpu->arch.mcg_cap; |
@@ -2310,9 +2343,14 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2310 | if ((offset & 0x3) == 0 && | 2343 | if ((offset & 0x3) == 0 && |
2311 | data != 0 && (data | (1 << 10)) != ~(u64)0) | 2344 | data != 0 && (data | (1 << 10)) != ~(u64)0) |
2312 | return -1; | 2345 | return -1; |
2346 | |||
2347 | /* MCi_STATUS */ | ||
2313 | if (!msr_info->host_initiated && | 2348 | if (!msr_info->host_initiated && |
2314 | (offset & 0x3) == 1 && data != 0) | 2349 | (offset & 0x3) == 1 && data != 0) { |
2315 | return -1; | 2350 | if (!can_set_mci_status(vcpu)) |
2351 | return -1; | ||
2352 | } | ||
2353 | |||
2316 | vcpu->arch.mce_banks[offset] = data; | 2354 | vcpu->arch.mce_banks[offset] = data; |
2317 | break; | 2355 | break; |
2318 | } | 2356 | } |
@@ -2456,13 +2494,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2456 | vcpu->arch.arch_capabilities = data; | 2494 | vcpu->arch.arch_capabilities = data; |
2457 | break; | 2495 | break; |
2458 | case MSR_EFER: | 2496 | case MSR_EFER: |
2459 | return set_efer(vcpu, data); | 2497 | return set_efer(vcpu, msr_info); |
2460 | case MSR_K7_HWCR: | 2498 | case MSR_K7_HWCR: |
2461 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 2499 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
2462 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 2500 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
2463 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | 2501 | data &= ~(u64)0x8; /* ignore TLB cache disable */ |
2464 | data &= ~(u64)0x40000; /* ignore Mc status write enable */ | 2502 | |
2465 | if (data != 0) { | 2503 | /* Handle McStatusWrEn */ |
2504 | if (data == BIT_ULL(18)) { | ||
2505 | vcpu->arch.msr_hwcr = data; | ||
2506 | } else if (data != 0) { | ||
2466 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 2507 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
2467 | data); | 2508 | data); |
2468 | return 1; | 2509 | return 1; |
@@ -2736,7 +2777,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2736 | case MSR_K8_SYSCFG: | 2777 | case MSR_K8_SYSCFG: |
2737 | case MSR_K8_TSEG_ADDR: | 2778 | case MSR_K8_TSEG_ADDR: |
2738 | case MSR_K8_TSEG_MASK: | 2779 | case MSR_K8_TSEG_MASK: |
2739 | case MSR_K7_HWCR: | ||
2740 | case MSR_VM_HSAVE_PA: | 2780 | case MSR_VM_HSAVE_PA: |
2741 | case MSR_K8_INT_PENDING_MSG: | 2781 | case MSR_K8_INT_PENDING_MSG: |
2742 | case MSR_AMD64_NB_CFG: | 2782 | case MSR_AMD64_NB_CFG: |
@@ -2900,6 +2940,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2900 | case MSR_MISC_FEATURES_ENABLES: | 2940 | case MSR_MISC_FEATURES_ENABLES: |
2901 | msr_info->data = vcpu->arch.msr_misc_features_enables; | 2941 | msr_info->data = vcpu->arch.msr_misc_features_enables; |
2902 | break; | 2942 | break; |
2943 | case MSR_K7_HWCR: | ||
2944 | msr_info->data = vcpu->arch.msr_hwcr; | ||
2945 | break; | ||
2903 | default: | 2946 | default: |
2904 | if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) | 2947 | if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) |
2905 | return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); | 2948 | return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); |
@@ -3079,9 +3122,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
3079 | case KVM_CAP_MAX_VCPUS: | 3122 | case KVM_CAP_MAX_VCPUS: |
3080 | r = KVM_MAX_VCPUS; | 3123 | r = KVM_MAX_VCPUS; |
3081 | break; | 3124 | break; |
3082 | case KVM_CAP_NR_MEMSLOTS: | ||
3083 | r = KVM_USER_MEM_SLOTS; | ||
3084 | break; | ||
3085 | case KVM_CAP_PV_MMU: /* obsolete */ | 3125 | case KVM_CAP_PV_MMU: /* obsolete */ |
3086 | r = 0; | 3126 | r = 0; |
3087 | break; | 3127 | break; |
@@ -5521,9 +5561,9 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
5521 | unsigned int bytes, | 5561 | unsigned int bytes, |
5522 | struct x86_exception *exception) | 5562 | struct x86_exception *exception) |
5523 | { | 5563 | { |
5564 | struct kvm_host_map map; | ||
5524 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 5565 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
5525 | gpa_t gpa; | 5566 | gpa_t gpa; |
5526 | struct page *page; | ||
5527 | char *kaddr; | 5567 | char *kaddr; |
5528 | bool exchanged; | 5568 | bool exchanged; |
5529 | 5569 | ||
@@ -5540,12 +5580,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
5540 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) | 5580 | if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) |
5541 | goto emul_write; | 5581 | goto emul_write; |
5542 | 5582 | ||
5543 | page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); | 5583 | if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) |
5544 | if (is_error_page(page)) | ||
5545 | goto emul_write; | 5584 | goto emul_write; |
5546 | 5585 | ||
5547 | kaddr = kmap_atomic(page); | 5586 | kaddr = map.hva + offset_in_page(gpa); |
5548 | kaddr += offset_in_page(gpa); | 5587 | |
5549 | switch (bytes) { | 5588 | switch (bytes) { |
5550 | case 1: | 5589 | case 1: |
5551 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); | 5590 | exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); |
@@ -5562,13 +5601,12 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
5562 | default: | 5601 | default: |
5563 | BUG(); | 5602 | BUG(); |
5564 | } | 5603 | } |
5565 | kunmap_atomic(kaddr); | 5604 | |
5566 | kvm_release_page_dirty(page); | 5605 | kvm_vcpu_unmap(vcpu, &map, true); |
5567 | 5606 | ||
5568 | if (!exchanged) | 5607 | if (!exchanged) |
5569 | return X86EMUL_CMPXCHG_FAILED; | 5608 | return X86EMUL_CMPXCHG_FAILED; |
5570 | 5609 | ||
5571 | kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); | ||
5572 | kvm_page_track_write(vcpu, gpa, new, bytes); | 5610 | kvm_page_track_write(vcpu, gpa, new, bytes); |
5573 | 5611 | ||
5574 | return X86EMUL_CONTINUE; | 5612 | return X86EMUL_CONTINUE; |
@@ -6558,7 +6596,7 @@ static int complete_fast_pio_out(struct kvm_vcpu *vcpu) | |||
6558 | static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, | 6596 | static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, |
6559 | unsigned short port) | 6597 | unsigned short port) |
6560 | { | 6598 | { |
6561 | unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 6599 | unsigned long val = kvm_rax_read(vcpu); |
6562 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, | 6600 | int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, |
6563 | size, port, &val, 1); | 6601 | size, port, &val, 1); |
6564 | if (ret) | 6602 | if (ret) |
@@ -6593,8 +6631,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | |||
6593 | } | 6631 | } |
6594 | 6632 | ||
6595 | /* For size less than 4 we merge, else we zero extend */ | 6633 | /* For size less than 4 we merge, else we zero extend */ |
6596 | val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) | 6634 | val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0; |
6597 | : 0; | ||
6598 | 6635 | ||
6599 | /* | 6636 | /* |
6600 | * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform | 6637 | * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform |
@@ -6602,7 +6639,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) | |||
6602 | */ | 6639 | */ |
6603 | emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size, | 6640 | emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size, |
6604 | vcpu->arch.pio.port, &val, 1); | 6641 | vcpu->arch.pio.port, &val, 1); |
6605 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | 6642 | kvm_rax_write(vcpu, val); |
6606 | 6643 | ||
6607 | return kvm_skip_emulated_instruction(vcpu); | 6644 | return kvm_skip_emulated_instruction(vcpu); |
6608 | } | 6645 | } |
@@ -6614,12 +6651,12 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, | |||
6614 | int ret; | 6651 | int ret; |
6615 | 6652 | ||
6616 | /* For size less than 4 we merge, else we zero extend */ | 6653 | /* For size less than 4 we merge, else we zero extend */ |
6617 | val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; | 6654 | val = (size < 4) ? kvm_rax_read(vcpu) : 0; |
6618 | 6655 | ||
6619 | ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, | 6656 | ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, |
6620 | &val, 1); | 6657 | &val, 1); |
6621 | if (ret) { | 6658 | if (ret) { |
6622 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | 6659 | kvm_rax_write(vcpu, val); |
6623 | return ret; | 6660 | return ret; |
6624 | } | 6661 | } |
6625 | 6662 | ||
@@ -6854,10 +6891,20 @@ static unsigned long kvm_get_guest_ip(void) | |||
6854 | return ip; | 6891 | return ip; |
6855 | } | 6892 | } |
6856 | 6893 | ||
6894 | static void kvm_handle_intel_pt_intr(void) | ||
6895 | { | ||
6896 | struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu); | ||
6897 | |||
6898 | kvm_make_request(KVM_REQ_PMI, vcpu); | ||
6899 | __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, | ||
6900 | (unsigned long *)&vcpu->arch.pmu.global_status); | ||
6901 | } | ||
6902 | |||
6857 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | 6903 | static struct perf_guest_info_callbacks kvm_guest_cbs = { |
6858 | .is_in_guest = kvm_is_in_guest, | 6904 | .is_in_guest = kvm_is_in_guest, |
6859 | .is_user_mode = kvm_is_user_mode, | 6905 | .is_user_mode = kvm_is_user_mode, |
6860 | .get_guest_ip = kvm_get_guest_ip, | 6906 | .get_guest_ip = kvm_get_guest_ip, |
6907 | .handle_intel_pt_intr = kvm_handle_intel_pt_intr, | ||
6861 | }; | 6908 | }; |
6862 | 6909 | ||
6863 | static void kvm_set_mmio_spte_mask(void) | 6910 | static void kvm_set_mmio_spte_mask(void) |
@@ -7133,11 +7180,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
7133 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | 7180 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) |
7134 | return kvm_hv_hypercall(vcpu); | 7181 | return kvm_hv_hypercall(vcpu); |
7135 | 7182 | ||
7136 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 7183 | nr = kvm_rax_read(vcpu); |
7137 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 7184 | a0 = kvm_rbx_read(vcpu); |
7138 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); | 7185 | a1 = kvm_rcx_read(vcpu); |
7139 | a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); | 7186 | a2 = kvm_rdx_read(vcpu); |
7140 | a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); | 7187 | a3 = kvm_rsi_read(vcpu); |
7141 | 7188 | ||
7142 | trace_kvm_hypercall(nr, a0, a1, a2, a3); | 7189 | trace_kvm_hypercall(nr, a0, a1, a2, a3); |
7143 | 7190 | ||
@@ -7178,7 +7225,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
7178 | out: | 7225 | out: |
7179 | if (!op_64_bit) | 7226 | if (!op_64_bit) |
7180 | ret = (u32)ret; | 7227 | ret = (u32)ret; |
7181 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | 7228 | kvm_rax_write(vcpu, ret); |
7182 | 7229 | ||
7183 | ++vcpu->stat.hypercalls; | 7230 | ++vcpu->stat.hypercalls; |
7184 | return kvm_skip_emulated_instruction(vcpu); | 7231 | return kvm_skip_emulated_instruction(vcpu); |
@@ -8280,23 +8327,23 @@ static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
8280 | emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); | 8327 | emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); |
8281 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 8328 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
8282 | } | 8329 | } |
8283 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 8330 | regs->rax = kvm_rax_read(vcpu); |
8284 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | 8331 | regs->rbx = kvm_rbx_read(vcpu); |
8285 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 8332 | regs->rcx = kvm_rcx_read(vcpu); |
8286 | regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 8333 | regs->rdx = kvm_rdx_read(vcpu); |
8287 | regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); | 8334 | regs->rsi = kvm_rsi_read(vcpu); |
8288 | regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); | 8335 | regs->rdi = kvm_rdi_read(vcpu); |
8289 | regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | 8336 | regs->rsp = kvm_rsp_read(vcpu); |
8290 | regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); | 8337 | regs->rbp = kvm_rbp_read(vcpu); |
8291 | #ifdef CONFIG_X86_64 | 8338 | #ifdef CONFIG_X86_64 |
8292 | regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); | 8339 | regs->r8 = kvm_r8_read(vcpu); |
8293 | regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); | 8340 | regs->r9 = kvm_r9_read(vcpu); |
8294 | regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); | 8341 | regs->r10 = kvm_r10_read(vcpu); |
8295 | regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); | 8342 | regs->r11 = kvm_r11_read(vcpu); |
8296 | regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); | 8343 | regs->r12 = kvm_r12_read(vcpu); |
8297 | regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); | 8344 | regs->r13 = kvm_r13_read(vcpu); |
8298 | regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); | 8345 | regs->r14 = kvm_r14_read(vcpu); |
8299 | regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); | 8346 | regs->r15 = kvm_r15_read(vcpu); |
8300 | #endif | 8347 | #endif |
8301 | 8348 | ||
8302 | regs->rip = kvm_rip_read(vcpu); | 8349 | regs->rip = kvm_rip_read(vcpu); |
@@ -8316,23 +8363,23 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
8316 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; | 8363 | vcpu->arch.emulate_regs_need_sync_from_vcpu = true; |
8317 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 8364 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
8318 | 8365 | ||
8319 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); | 8366 | kvm_rax_write(vcpu, regs->rax); |
8320 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); | 8367 | kvm_rbx_write(vcpu, regs->rbx); |
8321 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); | 8368 | kvm_rcx_write(vcpu, regs->rcx); |
8322 | kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); | 8369 | kvm_rdx_write(vcpu, regs->rdx); |
8323 | kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); | 8370 | kvm_rsi_write(vcpu, regs->rsi); |
8324 | kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); | 8371 | kvm_rdi_write(vcpu, regs->rdi); |
8325 | kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); | 8372 | kvm_rsp_write(vcpu, regs->rsp); |
8326 | kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); | 8373 | kvm_rbp_write(vcpu, regs->rbp); |
8327 | #ifdef CONFIG_X86_64 | 8374 | #ifdef CONFIG_X86_64 |
8328 | kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); | 8375 | kvm_r8_write(vcpu, regs->r8); |
8329 | kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); | 8376 | kvm_r9_write(vcpu, regs->r9); |
8330 | kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); | 8377 | kvm_r10_write(vcpu, regs->r10); |
8331 | kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); | 8378 | kvm_r11_write(vcpu, regs->r11); |
8332 | kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); | 8379 | kvm_r12_write(vcpu, regs->r12); |
8333 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); | 8380 | kvm_r13_write(vcpu, regs->r13); |
8334 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); | 8381 | kvm_r14_write(vcpu, regs->r14); |
8335 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); | 8382 | kvm_r15_write(vcpu, regs->r15); |
8336 | #endif | 8383 | #endif |
8337 | 8384 | ||
8338 | kvm_rip_write(vcpu, regs->rip); | 8385 | kvm_rip_write(vcpu, regs->rip); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 534d3f28bb01..a470ff0868c5 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -345,6 +345,16 @@ static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) | |||
345 | __this_cpu_write(current_vcpu, NULL); | 345 | __this_cpu_write(current_vcpu, NULL); |
346 | } | 346 | } |
347 | 347 | ||
348 | |||
349 | static inline bool kvm_pat_valid(u64 data) | ||
350 | { | ||
351 | if (data & 0xF8F8F8F8F8F8F8F8ull) | ||
352 | return false; | ||
353 | /* 0, 1, 4, 5, 6, 7 are valid values. */ | ||
354 | return (data | ((data & 0x0202020202020202ull) << 1)) == data; | ||
355 | } | ||
356 | |||
348 | void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); | 357 | void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu); |
349 | void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); | 358 | void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu); |
359 | |||
350 | #endif | 360 | #endif |