diff options
Diffstat (limited to 'arch')
93 files changed, 7424 insertions, 1849 deletions
diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h index 1a66f907e5cc..bf863edb517d 100644 --- a/arch/arm/include/asm/idmap.h +++ b/arch/arm/include/asm/idmap.h | |||
@@ -8,7 +8,6 @@ | |||
8 | #define __idmap __section(.idmap.text) noinline notrace | 8 | #define __idmap __section(.idmap.text) noinline notrace |
9 | 9 | ||
10 | extern pgd_t *idmap_pgd; | 10 | extern pgd_t *idmap_pgd; |
11 | extern pgd_t *hyp_pgd; | ||
12 | 11 | ||
13 | void setup_mm_for_reboot(void); | 12 | void setup_mm_for_reboot(void); |
14 | 13 | ||
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c4e643d939e..57cb786a6203 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h | |||
@@ -87,7 +87,7 @@ struct kvm_vcpu_fault_info { | |||
87 | u32 hyp_pc; /* PC when exception was taken from Hyp mode */ | 87 | u32 hyp_pc; /* PC when exception was taken from Hyp mode */ |
88 | }; | 88 | }; |
89 | 89 | ||
90 | typedef struct vfp_hard_struct kvm_kernel_vfp_t; | 90 | typedef struct vfp_hard_struct kvm_cpu_context_t; |
91 | 91 | ||
92 | struct kvm_vcpu_arch { | 92 | struct kvm_vcpu_arch { |
93 | struct kvm_regs regs; | 93 | struct kvm_regs regs; |
@@ -105,8 +105,10 @@ struct kvm_vcpu_arch { | |||
105 | struct kvm_vcpu_fault_info fault; | 105 | struct kvm_vcpu_fault_info fault; |
106 | 106 | ||
107 | /* Floating point registers (VFP and Advanced SIMD/NEON) */ | 107 | /* Floating point registers (VFP and Advanced SIMD/NEON) */ |
108 | kvm_kernel_vfp_t vfp_guest; | 108 | struct vfp_hard_struct vfp_guest; |
109 | kvm_kernel_vfp_t *vfp_host; | 109 | |
110 | /* Host FP context */ | ||
111 | kvm_cpu_context_t *host_cpu_context; | ||
110 | 112 | ||
111 | /* VGIC state */ | 113 | /* VGIC state */ |
112 | struct vgic_cpu vgic_cpu; | 114 | struct vgic_cpu vgic_cpu; |
@@ -188,23 +190,38 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); | |||
188 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, | 190 | int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, |
189 | int exception_index); | 191 | int exception_index); |
190 | 192 | ||
191 | static inline void __cpu_init_hyp_mode(unsigned long long pgd_ptr, | 193 | static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr, |
194 | unsigned long long pgd_ptr, | ||
192 | unsigned long hyp_stack_ptr, | 195 | unsigned long hyp_stack_ptr, |
193 | unsigned long vector_ptr) | 196 | unsigned long vector_ptr) |
194 | { | 197 | { |
195 | unsigned long pgd_low, pgd_high; | ||
196 | |||
197 | pgd_low = (pgd_ptr & ((1ULL << 32) - 1)); | ||
198 | pgd_high = (pgd_ptr >> 32ULL); | ||
199 | |||
200 | /* | 198 | /* |
201 | * Call initialization code, and switch to the full blown | 199 | * Call initialization code, and switch to the full blown HYP |
202 | * HYP code. The init code doesn't need to preserve these registers as | 200 | * code. The init code doesn't need to preserve these |
203 | * r1-r3 and r12 are already callee save according to the AAPCS. | 201 | * registers as r0-r3 are already callee saved according to |
204 | * Note that we slightly misuse the prototype by casing the pgd_low to | 202 | * the AAPCS. |
205 | * a void *. | 203 | * Note that we slightly misuse the prototype by casing the |
204 | * stack pointer to a void *. | ||
205 | * | ||
206 | * We don't have enough registers to perform the full init in | ||
207 | * one go. Install the boot PGD first, and then install the | ||
208 | * runtime PGD, stack pointer and vectors. The PGDs are always | ||
209 | * passed as the third argument, in order to be passed into | ||
210 | * r2-r3 to the init code (yes, this is compliant with the | ||
211 | * PCS!). | ||
206 | */ | 212 | */ |
207 | kvm_call_hyp((void *)pgd_low, pgd_high, hyp_stack_ptr, vector_ptr); | 213 | |
214 | kvm_call_hyp(NULL, 0, boot_pgd_ptr); | ||
215 | |||
216 | kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); | ||
208 | } | 217 | } |
209 | 218 | ||
219 | static inline int kvm_arch_dev_ioctl_check_extension(long ext) | ||
220 | { | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | int kvm_perf_init(void); | ||
225 | int kvm_perf_teardown(void); | ||
226 | |||
210 | #endif /* __ARM_KVM_HOST_H__ */ | 227 | #endif /* __ARM_KVM_HOST_H__ */ |
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 970f3b5fa109..472ac7091003 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h | |||
@@ -19,21 +19,33 @@ | |||
19 | #ifndef __ARM_KVM_MMU_H__ | 19 | #ifndef __ARM_KVM_MMU_H__ |
20 | #define __ARM_KVM_MMU_H__ | 20 | #define __ARM_KVM_MMU_H__ |
21 | 21 | ||
22 | #include <asm/cacheflush.h> | 22 | #include <asm/memory.h> |
23 | #include <asm/pgalloc.h> | 23 | #include <asm/page.h> |
24 | #include <asm/idmap.h> | ||
25 | 24 | ||
26 | /* | 25 | /* |
27 | * We directly use the kernel VA for the HYP, as we can directly share | 26 | * We directly use the kernel VA for the HYP, as we can directly share |
28 | * the mapping (HTTBR "covers" TTBR1). | 27 | * the mapping (HTTBR "covers" TTBR1). |
29 | */ | 28 | */ |
30 | #define HYP_PAGE_OFFSET_MASK (~0UL) | 29 | #define HYP_PAGE_OFFSET_MASK UL(~0) |
31 | #define HYP_PAGE_OFFSET PAGE_OFFSET | 30 | #define HYP_PAGE_OFFSET PAGE_OFFSET |
32 | #define KERN_TO_HYP(kva) (kva) | 31 | #define KERN_TO_HYP(kva) (kva) |
33 | 32 | ||
33 | /* | ||
34 | * Our virtual mapping for the boot-time MMU-enable code. Must be | ||
35 | * shared across all the page-tables. Conveniently, we use the vectors | ||
36 | * page, where no kernel data will ever be shared with HYP. | ||
37 | */ | ||
38 | #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) | ||
39 | |||
40 | #ifndef __ASSEMBLY__ | ||
41 | |||
42 | #include <asm/cacheflush.h> | ||
43 | #include <asm/pgalloc.h> | ||
44 | |||
34 | int create_hyp_mappings(void *from, void *to); | 45 | int create_hyp_mappings(void *from, void *to); |
35 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); | 46 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t); |
36 | void free_hyp_pmds(void); | 47 | void free_boot_hyp_pgd(void); |
48 | void free_hyp_pgds(void); | ||
37 | 49 | ||
38 | int kvm_alloc_stage2_pgd(struct kvm *kvm); | 50 | int kvm_alloc_stage2_pgd(struct kvm *kvm); |
39 | void kvm_free_stage2_pgd(struct kvm *kvm); | 51 | void kvm_free_stage2_pgd(struct kvm *kvm); |
@@ -45,6 +57,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); | |||
45 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); | 57 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); |
46 | 58 | ||
47 | phys_addr_t kvm_mmu_get_httbr(void); | 59 | phys_addr_t kvm_mmu_get_httbr(void); |
60 | phys_addr_t kvm_mmu_get_boot_httbr(void); | ||
61 | phys_addr_t kvm_get_idmap_vector(void); | ||
48 | int kvm_mmu_init(void); | 62 | int kvm_mmu_init(void); |
49 | void kvm_clear_hyp_idmap(void); | 63 | void kvm_clear_hyp_idmap(void); |
50 | 64 | ||
@@ -114,4 +128,8 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn) | |||
114 | } | 128 | } |
115 | } | 129 | } |
116 | 130 | ||
131 | #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) | ||
132 | |||
133 | #endif /* !__ASSEMBLY__ */ | ||
134 | |||
117 | #endif /* __ARM_KVM_MMU_H__ */ | 135 | #endif /* __ARM_KVM_MMU_H__ */ |
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index a53efa993690..ee68cce6b48e 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c | |||
@@ -158,7 +158,7 @@ int main(void) | |||
158 | DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); | 158 | DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); |
159 | DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); | 159 | DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); |
160 | DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); | 160 | DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); |
161 | DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); | 161 | DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); |
162 | DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); | 162 | DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); |
163 | DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); | 163 | DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); |
164 | DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); | 164 | DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); |
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b571484e9f03..a871b8e00fca 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S | |||
@@ -20,7 +20,7 @@ | |||
20 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ | 20 | VMLINUX_SYMBOL(__idmap_text_start) = .; \ |
21 | *(.idmap.text) \ | 21 | *(.idmap.text) \ |
22 | VMLINUX_SYMBOL(__idmap_text_end) = .; \ | 22 | VMLINUX_SYMBOL(__idmap_text_end) = .; \ |
23 | ALIGN_FUNCTION(); \ | 23 | . = ALIGN(32); \ |
24 | VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ | 24 | VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ |
25 | *(.hyp.idmap.text) \ | 25 | *(.hyp.idmap.text) \ |
26 | VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; | 26 | VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; |
@@ -315,3 +315,8 @@ SECTIONS | |||
315 | */ | 315 | */ |
316 | ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") | 316 | ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") |
317 | ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") | 317 | ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") |
318 | /* | ||
319 | * The HYP init code can't be more than a page long. | ||
320 | * The above comment applies as well. | ||
321 | */ | ||
322 | ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") | ||
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 49dd64e579c2..370e1a8af6ac 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig | |||
@@ -41,9 +41,9 @@ config KVM_ARM_HOST | |||
41 | Provides host support for ARM processors. | 41 | Provides host support for ARM processors. |
42 | 42 | ||
43 | config KVM_ARM_MAX_VCPUS | 43 | config KVM_ARM_MAX_VCPUS |
44 | int "Number maximum supported virtual CPUs per VM" | 44 | int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST |
45 | depends on KVM_ARM_HOST | 45 | default 4 if KVM_ARM_HOST |
46 | default 4 | 46 | default 0 |
47 | help | 47 | help |
48 | Static number of max supported virtual CPUs per VM. | 48 | Static number of max supported virtual CPUs per VM. |
49 | 49 | ||
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 8dc5e76cb789..53c5ed83d16f 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile | |||
@@ -18,6 +18,6 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) | |||
18 | 18 | ||
19 | obj-y += kvm-arm.o init.o interrupts.o | 19 | obj-y += kvm-arm.o init.o interrupts.o |
20 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o | 20 | obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o |
21 | obj-y += coproc.o coproc_a15.o mmio.o psci.o | 21 | obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o |
22 | obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o | 22 | obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o |
23 | obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o | 23 | obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o |
diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c index 6ac938d46297..c55b6089e923 100644 --- a/arch/arm/kvm/arch_timer.c +++ b/arch/arm/kvm/arch_timer.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
23 | #include <linux/interrupt.h> | 23 | #include <linux/interrupt.h> |
24 | 24 | ||
25 | #include <clocksource/arm_arch_timer.h> | ||
25 | #include <asm/arch_timer.h> | 26 | #include <asm/arch_timer.h> |
26 | 27 | ||
27 | #include <asm/kvm_vgic.h> | 28 | #include <asm/kvm_vgic.h> |
@@ -64,7 +65,7 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) | |||
64 | { | 65 | { |
65 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; | 66 | struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; |
66 | 67 | ||
67 | timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ | 68 | timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; |
68 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, | 69 | kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, |
69 | vcpu->arch.timer_cpu.irq->irq, | 70 | vcpu->arch.timer_cpu.irq->irq, |
70 | vcpu->arch.timer_cpu.irq->level); | 71 | vcpu->arch.timer_cpu.irq->level); |
@@ -133,8 +134,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) | |||
133 | cycle_t cval, now; | 134 | cycle_t cval, now; |
134 | u64 ns; | 135 | u64 ns; |
135 | 136 | ||
136 | /* Check if the timer is enabled and unmasked first */ | 137 | if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || |
137 | if ((timer->cntv_ctl & 3) != 1) | 138 | !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) |
138 | return; | 139 | return; |
139 | 140 | ||
140 | cval = timer->cntv_cval; | 141 | cval = timer->cntv_cval; |
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a0dfc2a53f91..37d216d814cd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | 16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/cpu.h> | ||
19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
20 | #include <linux/err.h> | 21 | #include <linux/err.h> |
21 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
@@ -48,7 +49,7 @@ __asm__(".arch_extension virt"); | |||
48 | #endif | 49 | #endif |
49 | 50 | ||
50 | static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); | 51 | static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); |
51 | static kvm_kernel_vfp_t __percpu *kvm_host_vfp_state; | 52 | static kvm_cpu_context_t __percpu *kvm_host_cpu_state; |
52 | static unsigned long hyp_default_vectors; | 53 | static unsigned long hyp_default_vectors; |
53 | 54 | ||
54 | /* Per-CPU variable containing the currently running vcpu. */ | 55 | /* Per-CPU variable containing the currently running vcpu. */ |
@@ -206,7 +207,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
206 | r = KVM_MAX_VCPUS; | 207 | r = KVM_MAX_VCPUS; |
207 | break; | 208 | break; |
208 | default: | 209 | default: |
209 | r = 0; | 210 | r = kvm_arch_dev_ioctl_check_extension(ext); |
210 | break; | 211 | break; |
211 | } | 212 | } |
212 | return r; | 213 | return r; |
@@ -218,27 +219,18 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
218 | return -EINVAL; | 219 | return -EINVAL; |
219 | } | 220 | } |
220 | 221 | ||
221 | int kvm_arch_set_memory_region(struct kvm *kvm, | ||
222 | struct kvm_userspace_memory_region *mem, | ||
223 | struct kvm_memory_slot old, | ||
224 | int user_alloc) | ||
225 | { | ||
226 | return 0; | ||
227 | } | ||
228 | |||
229 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 222 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
230 | struct kvm_memory_slot *memslot, | 223 | struct kvm_memory_slot *memslot, |
231 | struct kvm_memory_slot old, | ||
232 | struct kvm_userspace_memory_region *mem, | 224 | struct kvm_userspace_memory_region *mem, |
233 | bool user_alloc) | 225 | enum kvm_mr_change change) |
234 | { | 226 | { |
235 | return 0; | 227 | return 0; |
236 | } | 228 | } |
237 | 229 | ||
238 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 230 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
239 | struct kvm_userspace_memory_region *mem, | 231 | struct kvm_userspace_memory_region *mem, |
240 | struct kvm_memory_slot old, | 232 | const struct kvm_memory_slot *old, |
241 | bool user_alloc) | 233 | enum kvm_mr_change change) |
242 | { | 234 | { |
243 | } | 235 | } |
244 | 236 | ||
@@ -326,7 +318,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
326 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 318 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
327 | { | 319 | { |
328 | vcpu->cpu = cpu; | 320 | vcpu->cpu = cpu; |
329 | vcpu->arch.vfp_host = this_cpu_ptr(kvm_host_vfp_state); | 321 | vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); |
330 | 322 | ||
331 | /* | 323 | /* |
332 | * Check whether this vcpu requires the cache to be flushed on | 324 | * Check whether this vcpu requires the cache to be flushed on |
@@ -639,7 +631,8 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) | |||
639 | return 0; | 631 | return 0; |
640 | } | 632 | } |
641 | 633 | ||
642 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) | 634 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, |
635 | bool line_status) | ||
643 | { | 636 | { |
644 | u32 irq = irq_level->irq; | 637 | u32 irq = irq_level->irq; |
645 | unsigned int irq_type, vcpu_idx, irq_num; | 638 | unsigned int irq_type, vcpu_idx, irq_num; |
@@ -794,30 +787,48 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
794 | } | 787 | } |
795 | } | 788 | } |
796 | 789 | ||
797 | static void cpu_init_hyp_mode(void *vector) | 790 | static void cpu_init_hyp_mode(void *dummy) |
798 | { | 791 | { |
792 | unsigned long long boot_pgd_ptr; | ||
799 | unsigned long long pgd_ptr; | 793 | unsigned long long pgd_ptr; |
800 | unsigned long hyp_stack_ptr; | 794 | unsigned long hyp_stack_ptr; |
801 | unsigned long stack_page; | 795 | unsigned long stack_page; |
802 | unsigned long vector_ptr; | 796 | unsigned long vector_ptr; |
803 | 797 | ||
804 | /* Switch from the HYP stub to our own HYP init vector */ | 798 | /* Switch from the HYP stub to our own HYP init vector */ |
805 | __hyp_set_vectors((unsigned long)vector); | 799 | __hyp_set_vectors(kvm_get_idmap_vector()); |
806 | 800 | ||
801 | boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr(); | ||
807 | pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); | 802 | pgd_ptr = (unsigned long long)kvm_mmu_get_httbr(); |
808 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); | 803 | stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); |
809 | hyp_stack_ptr = stack_page + PAGE_SIZE; | 804 | hyp_stack_ptr = stack_page + PAGE_SIZE; |
810 | vector_ptr = (unsigned long)__kvm_hyp_vector; | 805 | vector_ptr = (unsigned long)__kvm_hyp_vector; |
811 | 806 | ||
812 | __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); | 807 | __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); |
808 | } | ||
809 | |||
810 | static int hyp_init_cpu_notify(struct notifier_block *self, | ||
811 | unsigned long action, void *cpu) | ||
812 | { | ||
813 | switch (action) { | ||
814 | case CPU_STARTING: | ||
815 | case CPU_STARTING_FROZEN: | ||
816 | cpu_init_hyp_mode(NULL); | ||
817 | break; | ||
818 | } | ||
819 | |||
820 | return NOTIFY_OK; | ||
813 | } | 821 | } |
814 | 822 | ||
823 | static struct notifier_block hyp_init_cpu_nb = { | ||
824 | .notifier_call = hyp_init_cpu_notify, | ||
825 | }; | ||
826 | |||
815 | /** | 827 | /** |
816 | * Inits Hyp-mode on all online CPUs | 828 | * Inits Hyp-mode on all online CPUs |
817 | */ | 829 | */ |
818 | static int init_hyp_mode(void) | 830 | static int init_hyp_mode(void) |
819 | { | 831 | { |
820 | phys_addr_t init_phys_addr; | ||
821 | int cpu; | 832 | int cpu; |
822 | int err = 0; | 833 | int err = 0; |
823 | 834 | ||
@@ -850,24 +861,6 @@ static int init_hyp_mode(void) | |||
850 | } | 861 | } |
851 | 862 | ||
852 | /* | 863 | /* |
853 | * Execute the init code on each CPU. | ||
854 | * | ||
855 | * Note: The stack is not mapped yet, so don't do anything else than | ||
856 | * initializing the hypervisor mode on each CPU using a local stack | ||
857 | * space for temporary storage. | ||
858 | */ | ||
859 | init_phys_addr = virt_to_phys(__kvm_hyp_init); | ||
860 | for_each_online_cpu(cpu) { | ||
861 | smp_call_function_single(cpu, cpu_init_hyp_mode, | ||
862 | (void *)(long)init_phys_addr, 1); | ||
863 | } | ||
864 | |||
865 | /* | ||
866 | * Unmap the identity mapping | ||
867 | */ | ||
868 | kvm_clear_hyp_idmap(); | ||
869 | |||
870 | /* | ||
871 | * Map the Hyp-code called directly from the host | 864 | * Map the Hyp-code called directly from the host |
872 | */ | 865 | */ |
873 | err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); | 866 | err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end); |
@@ -890,33 +883,38 @@ static int init_hyp_mode(void) | |||
890 | } | 883 | } |
891 | 884 | ||
892 | /* | 885 | /* |
893 | * Map the host VFP structures | 886 | * Map the host CPU structures |
894 | */ | 887 | */ |
895 | kvm_host_vfp_state = alloc_percpu(kvm_kernel_vfp_t); | 888 | kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t); |
896 | if (!kvm_host_vfp_state) { | 889 | if (!kvm_host_cpu_state) { |
897 | err = -ENOMEM; | 890 | err = -ENOMEM; |
898 | kvm_err("Cannot allocate host VFP state\n"); | 891 | kvm_err("Cannot allocate host CPU state\n"); |
899 | goto out_free_mappings; | 892 | goto out_free_mappings; |
900 | } | 893 | } |
901 | 894 | ||
902 | for_each_possible_cpu(cpu) { | 895 | for_each_possible_cpu(cpu) { |
903 | kvm_kernel_vfp_t *vfp; | 896 | kvm_cpu_context_t *cpu_ctxt; |
904 | 897 | ||
905 | vfp = per_cpu_ptr(kvm_host_vfp_state, cpu); | 898 | cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); |
906 | err = create_hyp_mappings(vfp, vfp + 1); | 899 | err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); |
907 | 900 | ||
908 | if (err) { | 901 | if (err) { |
909 | kvm_err("Cannot map host VFP state: %d\n", err); | 902 | kvm_err("Cannot map host CPU state: %d\n", err); |
910 | goto out_free_vfp; | 903 | goto out_free_context; |
911 | } | 904 | } |
912 | } | 905 | } |
913 | 906 | ||
914 | /* | 907 | /* |
908 | * Execute the init code on each CPU. | ||
909 | */ | ||
910 | on_each_cpu(cpu_init_hyp_mode, NULL, 1); | ||
911 | |||
912 | /* | ||
915 | * Init HYP view of VGIC | 913 | * Init HYP view of VGIC |
916 | */ | 914 | */ |
917 | err = kvm_vgic_hyp_init(); | 915 | err = kvm_vgic_hyp_init(); |
918 | if (err) | 916 | if (err) |
919 | goto out_free_vfp; | 917 | goto out_free_context; |
920 | 918 | ||
921 | #ifdef CONFIG_KVM_ARM_VGIC | 919 | #ifdef CONFIG_KVM_ARM_VGIC |
922 | vgic_present = true; | 920 | vgic_present = true; |
@@ -929,12 +927,19 @@ static int init_hyp_mode(void) | |||
929 | if (err) | 927 | if (err) |
930 | goto out_free_mappings; | 928 | goto out_free_mappings; |
931 | 929 | ||
930 | #ifndef CONFIG_HOTPLUG_CPU | ||
931 | free_boot_hyp_pgd(); | ||
932 | #endif | ||
933 | |||
934 | kvm_perf_init(); | ||
935 | |||
932 | kvm_info("Hyp mode initialized successfully\n"); | 936 | kvm_info("Hyp mode initialized successfully\n"); |
937 | |||
933 | return 0; | 938 | return 0; |
934 | out_free_vfp: | 939 | out_free_context: |
935 | free_percpu(kvm_host_vfp_state); | 940 | free_percpu(kvm_host_cpu_state); |
936 | out_free_mappings: | 941 | out_free_mappings: |
937 | free_hyp_pmds(); | 942 | free_hyp_pgds(); |
938 | out_free_stack_pages: | 943 | out_free_stack_pages: |
939 | for_each_possible_cpu(cpu) | 944 | for_each_possible_cpu(cpu) |
940 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); | 945 | free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); |
@@ -943,27 +948,42 @@ out_err: | |||
943 | return err; | 948 | return err; |
944 | } | 949 | } |
945 | 950 | ||
951 | static void check_kvm_target_cpu(void *ret) | ||
952 | { | ||
953 | *(int *)ret = kvm_target_cpu(); | ||
954 | } | ||
955 | |||
946 | /** | 956 | /** |
947 | * Initialize Hyp-mode and memory mappings on all CPUs. | 957 | * Initialize Hyp-mode and memory mappings on all CPUs. |
948 | */ | 958 | */ |
949 | int kvm_arch_init(void *opaque) | 959 | int kvm_arch_init(void *opaque) |
950 | { | 960 | { |
951 | int err; | 961 | int err; |
962 | int ret, cpu; | ||
952 | 963 | ||
953 | if (!is_hyp_mode_available()) { | 964 | if (!is_hyp_mode_available()) { |
954 | kvm_err("HYP mode not available\n"); | 965 | kvm_err("HYP mode not available\n"); |
955 | return -ENODEV; | 966 | return -ENODEV; |
956 | } | 967 | } |
957 | 968 | ||
958 | if (kvm_target_cpu() < 0) { | 969 | for_each_online_cpu(cpu) { |
959 | kvm_err("Target CPU not supported!\n"); | 970 | smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); |
960 | return -ENODEV; | 971 | if (ret < 0) { |
972 | kvm_err("Error, CPU %d not supported!\n", cpu); | ||
973 | return -ENODEV; | ||
974 | } | ||
961 | } | 975 | } |
962 | 976 | ||
963 | err = init_hyp_mode(); | 977 | err = init_hyp_mode(); |
964 | if (err) | 978 | if (err) |
965 | goto out_err; | 979 | goto out_err; |
966 | 980 | ||
981 | err = register_cpu_notifier(&hyp_init_cpu_nb); | ||
982 | if (err) { | ||
983 | kvm_err("Cannot register HYP init CPU notifier (%d)\n", err); | ||
984 | goto out_err; | ||
985 | } | ||
986 | |||
967 | kvm_coproc_table_init(); | 987 | kvm_coproc_table_init(); |
968 | return 0; | 988 | return 0; |
969 | out_err: | 989 | out_err: |
@@ -973,6 +993,7 @@ out_err: | |||
973 | /* NOP: Compiling as a module not supported */ | 993 | /* NOP: Compiling as a module not supported */ |
974 | void kvm_arch_exit(void) | 994 | void kvm_arch_exit(void) |
975 | { | 995 | { |
996 | kvm_perf_teardown(); | ||
976 | } | 997 | } |
977 | 998 | ||
978 | static int arm_init(void) | 999 | static int arm_init(void) |
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 9f37a79b880b..f048338135f7 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S | |||
@@ -21,13 +21,33 @@ | |||
21 | #include <asm/asm-offsets.h> | 21 | #include <asm/asm-offsets.h> |
22 | #include <asm/kvm_asm.h> | 22 | #include <asm/kvm_asm.h> |
23 | #include <asm/kvm_arm.h> | 23 | #include <asm/kvm_arm.h> |
24 | #include <asm/kvm_mmu.h> | ||
24 | 25 | ||
25 | /******************************************************************** | 26 | /******************************************************************** |
26 | * Hypervisor initialization | 27 | * Hypervisor initialization |
27 | * - should be called with: | 28 | * - should be called with: |
28 | * r0,r1 = Hypervisor pgd pointer | 29 | * r0 = top of Hyp stack (kernel VA) |
29 | * r2 = top of Hyp stack (kernel VA) | 30 | * r1 = pointer to hyp vectors |
30 | * r3 = pointer to hyp vectors | 31 | * r2,r3 = Hypervisor pgd pointer |
32 | * | ||
33 | * The init scenario is: | ||
34 | * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, | ||
35 | * runtime stack, runtime vectors | ||
36 | * - Enable the MMU with the boot pgd | ||
37 | * - Jump to a target into the trampoline page (remember, this is the same | ||
38 | * physical page!) | ||
39 | * - Now switch to the runtime pgd (same VA, and still the same physical | ||
40 | * page!) | ||
41 | * - Invalidate TLBs | ||
42 | * - Set stack and vectors | ||
43 | * - Profit! (or eret, if you only care about the code). | ||
44 | * | ||
45 | * As we only have four registers available to pass parameters (and we | ||
46 | * need six), we split the init in two phases: | ||
47 | * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. | ||
48 | * Provides the basic HYP init, and enable the MMU. | ||
49 | * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. | ||
50 | * Switches to the runtime PGD, set stack and vectors. | ||
31 | */ | 51 | */ |
32 | 52 | ||
33 | .text | 53 | .text |
@@ -47,22 +67,25 @@ __kvm_hyp_init: | |||
47 | W(b) . | 67 | W(b) . |
48 | 68 | ||
49 | __do_hyp_init: | 69 | __do_hyp_init: |
70 | cmp r0, #0 @ We have a SP? | ||
71 | bne phase2 @ Yes, second stage init | ||
72 | |||
50 | @ Set the HTTBR to point to the hypervisor PGD pointer passed | 73 | @ Set the HTTBR to point to the hypervisor PGD pointer passed |
51 | mcrr p15, 4, r0, r1, c2 | 74 | mcrr p15, 4, r2, r3, c2 |
52 | 75 | ||
53 | @ Set the HTCR and VTCR to the same shareability and cacheability | 76 | @ Set the HTCR and VTCR to the same shareability and cacheability |
54 | @ settings as the non-secure TTBCR and with T0SZ == 0. | 77 | @ settings as the non-secure TTBCR and with T0SZ == 0. |
55 | mrc p15, 4, r0, c2, c0, 2 @ HTCR | 78 | mrc p15, 4, r0, c2, c0, 2 @ HTCR |
56 | ldr r12, =HTCR_MASK | 79 | ldr r2, =HTCR_MASK |
57 | bic r0, r0, r12 | 80 | bic r0, r0, r2 |
58 | mrc p15, 0, r1, c2, c0, 2 @ TTBCR | 81 | mrc p15, 0, r1, c2, c0, 2 @ TTBCR |
59 | and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) | 82 | and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ) |
60 | orr r0, r0, r1 | 83 | orr r0, r0, r1 |
61 | mcr p15, 4, r0, c2, c0, 2 @ HTCR | 84 | mcr p15, 4, r0, c2, c0, 2 @ HTCR |
62 | 85 | ||
63 | mrc p15, 4, r1, c2, c1, 2 @ VTCR | 86 | mrc p15, 4, r1, c2, c1, 2 @ VTCR |
64 | ldr r12, =VTCR_MASK | 87 | ldr r2, =VTCR_MASK |
65 | bic r1, r1, r12 | 88 | bic r1, r1, r2 |
66 | bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits | 89 | bic r0, r0, #(~VTCR_HTCR_SH) @ clear non-reusable HTCR bits |
67 | orr r1, r0, r1 | 90 | orr r1, r0, r1 |
68 | orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) | 91 | orr r1, r1, #(KVM_VTCR_SL0 | KVM_VTCR_T0SZ | KVM_VTCR_S) |
@@ -85,24 +108,41 @@ __do_hyp_init: | |||
85 | @ - Memory alignment checks: enabled | 108 | @ - Memory alignment checks: enabled |
86 | @ - MMU: enabled (this code must be run from an identity mapping) | 109 | @ - MMU: enabled (this code must be run from an identity mapping) |
87 | mrc p15, 4, r0, c1, c0, 0 @ HSCR | 110 | mrc p15, 4, r0, c1, c0, 0 @ HSCR |
88 | ldr r12, =HSCTLR_MASK | 111 | ldr r2, =HSCTLR_MASK |
89 | bic r0, r0, r12 | 112 | bic r0, r0, r2 |
90 | mrc p15, 0, r1, c1, c0, 0 @ SCTLR | 113 | mrc p15, 0, r1, c1, c0, 0 @ SCTLR |
91 | ldr r12, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) | 114 | ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C) |
92 | and r1, r1, r12 | 115 | and r1, r1, r2 |
93 | ARM( ldr r12, =(HSCTLR_M | HSCTLR_A) ) | 116 | ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) ) |
94 | THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) | 117 | THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) |
95 | orr r1, r1, r12 | 118 | orr r1, r1, r2 |
96 | orr r0, r0, r1 | 119 | orr r0, r0, r1 |
97 | isb | 120 | isb |
98 | mcr p15, 4, r0, c1, c0, 0 @ HSCR | 121 | mcr p15, 4, r0, c1, c0, 0 @ HSCR |
99 | isb | ||
100 | 122 | ||
101 | @ Set stack pointer and return to the kernel | 123 | @ End of init phase-1 |
102 | mov sp, r2 | 124 | eret |
125 | |||
126 | phase2: | ||
127 | @ Set stack pointer | ||
128 | mov sp, r0 | ||
103 | 129 | ||
104 | @ Set HVBAR to point to the HYP vectors | 130 | @ Set HVBAR to point to the HYP vectors |
105 | mcr p15, 4, r3, c12, c0, 0 @ HVBAR | 131 | mcr p15, 4, r1, c12, c0, 0 @ HVBAR |
132 | |||
133 | @ Jump to the trampoline page | ||
134 | ldr r0, =TRAMPOLINE_VA | ||
135 | adr r1, target | ||
136 | bfi r0, r1, #0, #PAGE_SHIFT | ||
137 | mov pc, r0 | ||
138 | |||
139 | target: @ We're now in the trampoline code, switch page tables | ||
140 | mcrr p15, 4, r2, r3, c2 | ||
141 | isb | ||
142 | |||
143 | @ Invalidate the old TLBs | ||
144 | mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH | ||
145 | dsb | ||
106 | 146 | ||
107 | eret | 147 | eret |
108 | 148 | ||
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e4056408..965706578f13 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c | |||
@@ -32,8 +32,15 @@ | |||
32 | 32 | ||
33 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | 33 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; |
34 | 34 | ||
35 | static pgd_t *boot_hyp_pgd; | ||
36 | static pgd_t *hyp_pgd; | ||
35 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); | 37 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); |
36 | 38 | ||
39 | static void *init_bounce_page; | ||
40 | static unsigned long hyp_idmap_start; | ||
41 | static unsigned long hyp_idmap_end; | ||
42 | static phys_addr_t hyp_idmap_vector; | ||
43 | |||
37 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) | 44 | static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) |
38 | { | 45 | { |
39 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); | 46 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); |
@@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) | |||
71 | return p; | 78 | return p; |
72 | } | 79 | } |
73 | 80 | ||
74 | static void free_ptes(pmd_t *pmd, unsigned long addr) | 81 | static void clear_pud_entry(pud_t *pud) |
75 | { | 82 | { |
76 | pte_t *pte; | 83 | pmd_t *pmd_table = pmd_offset(pud, 0); |
77 | unsigned int i; | 84 | pud_clear(pud); |
85 | pmd_free(NULL, pmd_table); | ||
86 | put_page(virt_to_page(pud)); | ||
87 | } | ||
78 | 88 | ||
79 | for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { | 89 | static void clear_pmd_entry(pmd_t *pmd) |
80 | if (!pmd_none(*pmd) && pmd_table(*pmd)) { | 90 | { |
81 | pte = pte_offset_kernel(pmd, addr); | 91 | pte_t *pte_table = pte_offset_kernel(pmd, 0); |
82 | pte_free_kernel(NULL, pte); | 92 | pmd_clear(pmd); |
83 | } | 93 | pte_free_kernel(NULL, pte_table); |
84 | pmd++; | 94 | put_page(virt_to_page(pmd)); |
95 | } | ||
96 | |||
97 | static bool pmd_empty(pmd_t *pmd) | ||
98 | { | ||
99 | struct page *pmd_page = virt_to_page(pmd); | ||
100 | return page_count(pmd_page) == 1; | ||
101 | } | ||
102 | |||
103 | static void clear_pte_entry(pte_t *pte) | ||
104 | { | ||
105 | if (pte_present(*pte)) { | ||
106 | kvm_set_pte(pte, __pte(0)); | ||
107 | put_page(virt_to_page(pte)); | ||
85 | } | 108 | } |
86 | } | 109 | } |
87 | 110 | ||
88 | static void free_hyp_pgd_entry(unsigned long addr) | 111 | static bool pte_empty(pte_t *pte) |
112 | { | ||
113 | struct page *pte_page = virt_to_page(pte); | ||
114 | return page_count(pte_page) == 1; | ||
115 | } | ||
116 | |||
117 | static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) | ||
89 | { | 118 | { |
90 | pgd_t *pgd; | 119 | pgd_t *pgd; |
91 | pud_t *pud; | 120 | pud_t *pud; |
92 | pmd_t *pmd; | 121 | pmd_t *pmd; |
93 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 122 | pte_t *pte; |
123 | unsigned long long addr = start, end = start + size; | ||
124 | u64 range; | ||
125 | |||
126 | while (addr < end) { | ||
127 | pgd = pgdp + pgd_index(addr); | ||
128 | pud = pud_offset(pgd, addr); | ||
129 | if (pud_none(*pud)) { | ||
130 | addr += PUD_SIZE; | ||
131 | continue; | ||
132 | } | ||
94 | 133 | ||
95 | pgd = hyp_pgd + pgd_index(hyp_addr); | 134 | pmd = pmd_offset(pud, addr); |
96 | pud = pud_offset(pgd, hyp_addr); | 135 | if (pmd_none(*pmd)) { |
136 | addr += PMD_SIZE; | ||
137 | continue; | ||
138 | } | ||
97 | 139 | ||
98 | if (pud_none(*pud)) | 140 | pte = pte_offset_kernel(pmd, addr); |
99 | return; | 141 | clear_pte_entry(pte); |
100 | BUG_ON(pud_bad(*pud)); | 142 | range = PAGE_SIZE; |
101 | 143 | ||
102 | pmd = pmd_offset(pud, hyp_addr); | 144 | /* If we emptied the pte, walk back up the ladder */ |
103 | free_ptes(pmd, addr); | 145 | if (pte_empty(pte)) { |
104 | pmd_free(NULL, pmd); | 146 | clear_pmd_entry(pmd); |
105 | pud_clear(pud); | 147 | range = PMD_SIZE; |
148 | if (pmd_empty(pmd)) { | ||
149 | clear_pud_entry(pud); | ||
150 | range = PUD_SIZE; | ||
151 | } | ||
152 | } | ||
153 | |||
154 | addr += range; | ||
155 | } | ||
106 | } | 156 | } |
107 | 157 | ||
108 | /** | 158 | /** |
109 | * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables | 159 | * free_boot_hyp_pgd - free HYP boot page tables |
110 | * | 160 | * |
111 | * Assumes this is a page table used strictly in Hyp-mode and therefore contains | 161 | * Free the HYP boot page tables. The bounce page is also freed. |
112 | * either mappings in the kernel memory area (above PAGE_OFFSET), or | ||
113 | * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). | ||
114 | */ | 162 | */ |
115 | void free_hyp_pmds(void) | 163 | void free_boot_hyp_pgd(void) |
116 | { | 164 | { |
117 | unsigned long addr; | ||
118 | |||
119 | mutex_lock(&kvm_hyp_pgd_mutex); | 165 | mutex_lock(&kvm_hyp_pgd_mutex); |
120 | for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) | 166 | |
121 | free_hyp_pgd_entry(addr); | 167 | if (boot_hyp_pgd) { |
122 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) | 168 | unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); |
123 | free_hyp_pgd_entry(addr); | 169 | unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); |
170 | kfree(boot_hyp_pgd); | ||
171 | boot_hyp_pgd = NULL; | ||
172 | } | ||
173 | |||
174 | if (hyp_pgd) | ||
175 | unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); | ||
176 | |||
177 | kfree(init_bounce_page); | ||
178 | init_bounce_page = NULL; | ||
179 | |||
124 | mutex_unlock(&kvm_hyp_pgd_mutex); | 180 | mutex_unlock(&kvm_hyp_pgd_mutex); |
125 | } | 181 | } |
126 | 182 | ||
127 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, | 183 | /** |
128 | unsigned long end) | 184 | * free_hyp_pgds - free Hyp-mode page tables |
185 | * | ||
186 | * Assumes hyp_pgd is a page table used strictly in Hyp-mode and | ||
187 | * therefore contains either mappings in the kernel memory area (above | ||
188 | * PAGE_OFFSET), or device mappings in the vmalloc range (from | ||
189 | * VMALLOC_START to VMALLOC_END). | ||
190 | * | ||
191 | * boot_hyp_pgd should only map two pages for the init code. | ||
192 | */ | ||
193 | void free_hyp_pgds(void) | ||
129 | { | 194 | { |
130 | pte_t *pte; | ||
131 | unsigned long addr; | 195 | unsigned long addr; |
132 | struct page *page; | ||
133 | 196 | ||
134 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | 197 | free_boot_hyp_pgd(); |
135 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 198 | |
199 | mutex_lock(&kvm_hyp_pgd_mutex); | ||
136 | 200 | ||
137 | pte = pte_offset_kernel(pmd, hyp_addr); | 201 | if (hyp_pgd) { |
138 | BUG_ON(!virt_addr_valid(addr)); | 202 | for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) |
139 | page = virt_to_page(addr); | 203 | unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); |
140 | kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); | 204 | for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) |
205 | unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); | ||
206 | kfree(hyp_pgd); | ||
207 | hyp_pgd = NULL; | ||
141 | } | 208 | } |
209 | |||
210 | mutex_unlock(&kvm_hyp_pgd_mutex); | ||
142 | } | 211 | } |
143 | 212 | ||
144 | static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, | 213 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, |
145 | unsigned long end, | 214 | unsigned long end, unsigned long pfn, |
146 | unsigned long *pfn_base) | 215 | pgprot_t prot) |
147 | { | 216 | { |
148 | pte_t *pte; | 217 | pte_t *pte; |
149 | unsigned long addr; | 218 | unsigned long addr; |
150 | 219 | ||
151 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | 220 | addr = start; |
152 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 221 | do { |
153 | 222 | pte = pte_offset_kernel(pmd, addr); | |
154 | pte = pte_offset_kernel(pmd, hyp_addr); | 223 | kvm_set_pte(pte, pfn_pte(pfn, prot)); |
155 | BUG_ON(pfn_valid(*pfn_base)); | 224 | get_page(virt_to_page(pte)); |
156 | kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); | 225 | kvm_flush_dcache_to_poc(pte, sizeof(*pte)); |
157 | (*pfn_base)++; | 226 | pfn++; |
158 | } | 227 | } while (addr += PAGE_SIZE, addr != end); |
159 | } | 228 | } |
160 | 229 | ||
161 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, | 230 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, |
162 | unsigned long end, unsigned long *pfn_base) | 231 | unsigned long end, unsigned long pfn, |
232 | pgprot_t prot) | ||
163 | { | 233 | { |
164 | pmd_t *pmd; | 234 | pmd_t *pmd; |
165 | pte_t *pte; | 235 | pte_t *pte; |
166 | unsigned long addr, next; | 236 | unsigned long addr, next; |
167 | 237 | ||
168 | for (addr = start; addr < end; addr = next) { | 238 | addr = start; |
169 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 239 | do { |
170 | pmd = pmd_offset(pud, hyp_addr); | 240 | pmd = pmd_offset(pud, addr); |
171 | 241 | ||
172 | BUG_ON(pmd_sect(*pmd)); | 242 | BUG_ON(pmd_sect(*pmd)); |
173 | 243 | ||
174 | if (pmd_none(*pmd)) { | 244 | if (pmd_none(*pmd)) { |
175 | pte = pte_alloc_one_kernel(NULL, hyp_addr); | 245 | pte = pte_alloc_one_kernel(NULL, addr); |
176 | if (!pte) { | 246 | if (!pte) { |
177 | kvm_err("Cannot allocate Hyp pte\n"); | 247 | kvm_err("Cannot allocate Hyp pte\n"); |
178 | return -ENOMEM; | 248 | return -ENOMEM; |
179 | } | 249 | } |
180 | pmd_populate_kernel(NULL, pmd, pte); | 250 | pmd_populate_kernel(NULL, pmd, pte); |
251 | get_page(virt_to_page(pmd)); | ||
252 | kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); | ||
181 | } | 253 | } |
182 | 254 | ||
183 | next = pmd_addr_end(addr, end); | 255 | next = pmd_addr_end(addr, end); |
184 | 256 | ||
185 | /* | 257 | create_hyp_pte_mappings(pmd, addr, next, pfn, prot); |
186 | * If pfn_base is NULL, we map kernel pages into HYP with the | 258 | pfn += (next - addr) >> PAGE_SHIFT; |
187 | * virtual address. Otherwise, this is considered an I/O | 259 | } while (addr = next, addr != end); |
188 | * mapping and we map the physical region starting at | ||
189 | * *pfn_base to [start, end[. | ||
190 | */ | ||
191 | if (!pfn_base) | ||
192 | create_hyp_pte_mappings(pmd, addr, next); | ||
193 | else | ||
194 | create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); | ||
195 | } | ||
196 | 260 | ||
197 | return 0; | 261 | return 0; |
198 | } | 262 | } |
199 | 263 | ||
200 | static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) | 264 | static int __create_hyp_mappings(pgd_t *pgdp, |
265 | unsigned long start, unsigned long end, | ||
266 | unsigned long pfn, pgprot_t prot) | ||
201 | { | 267 | { |
202 | unsigned long start = (unsigned long)from; | ||
203 | unsigned long end = (unsigned long)to; | ||
204 | pgd_t *pgd; | 268 | pgd_t *pgd; |
205 | pud_t *pud; | 269 | pud_t *pud; |
206 | pmd_t *pmd; | 270 | pmd_t *pmd; |
207 | unsigned long addr, next; | 271 | unsigned long addr, next; |
208 | int err = 0; | 272 | int err = 0; |
209 | 273 | ||
210 | if (start >= end) | ||
211 | return -EINVAL; | ||
212 | /* Check for a valid kernel memory mapping */ | ||
213 | if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) | ||
214 | return -EINVAL; | ||
215 | /* Check for a valid kernel IO mapping */ | ||
216 | if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) | ||
217 | return -EINVAL; | ||
218 | |||
219 | mutex_lock(&kvm_hyp_pgd_mutex); | 274 | mutex_lock(&kvm_hyp_pgd_mutex); |
220 | for (addr = start; addr < end; addr = next) { | 275 | addr = start & PAGE_MASK; |
221 | unsigned long hyp_addr = KERN_TO_HYP(addr); | 276 | end = PAGE_ALIGN(end); |
222 | pgd = hyp_pgd + pgd_index(hyp_addr); | 277 | do { |
223 | pud = pud_offset(pgd, hyp_addr); | 278 | pgd = pgdp + pgd_index(addr); |
279 | pud = pud_offset(pgd, addr); | ||
224 | 280 | ||
225 | if (pud_none_or_clear_bad(pud)) { | 281 | if (pud_none_or_clear_bad(pud)) { |
226 | pmd = pmd_alloc_one(NULL, hyp_addr); | 282 | pmd = pmd_alloc_one(NULL, addr); |
227 | if (!pmd) { | 283 | if (!pmd) { |
228 | kvm_err("Cannot allocate Hyp pmd\n"); | 284 | kvm_err("Cannot allocate Hyp pmd\n"); |
229 | err = -ENOMEM; | 285 | err = -ENOMEM; |
230 | goto out; | 286 | goto out; |
231 | } | 287 | } |
232 | pud_populate(NULL, pud, pmd); | 288 | pud_populate(NULL, pud, pmd); |
289 | get_page(virt_to_page(pud)); | ||
290 | kvm_flush_dcache_to_poc(pud, sizeof(*pud)); | ||
233 | } | 291 | } |
234 | 292 | ||
235 | next = pgd_addr_end(addr, end); | 293 | next = pgd_addr_end(addr, end); |
236 | err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); | 294 | err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); |
237 | if (err) | 295 | if (err) |
238 | goto out; | 296 | goto out; |
239 | } | 297 | pfn += (next - addr) >> PAGE_SHIFT; |
298 | } while (addr = next, addr != end); | ||
240 | out: | 299 | out: |
241 | mutex_unlock(&kvm_hyp_pgd_mutex); | 300 | mutex_unlock(&kvm_hyp_pgd_mutex); |
242 | return err; | 301 | return err; |
@@ -250,27 +309,41 @@ out: | |||
250 | * The same virtual address as the kernel virtual address is also used | 309 | * The same virtual address as the kernel virtual address is also used |
251 | * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying | 310 | * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying |
252 | * physical pages. | 311 | * physical pages. |
253 | * | ||
254 | * Note: Wrapping around zero in the "to" address is not supported. | ||
255 | */ | 312 | */ |
256 | int create_hyp_mappings(void *from, void *to) | 313 | int create_hyp_mappings(void *from, void *to) |
257 | { | 314 | { |
258 | return __create_hyp_mappings(from, to, NULL); | 315 | unsigned long phys_addr = virt_to_phys(from); |
316 | unsigned long start = KERN_TO_HYP((unsigned long)from); | ||
317 | unsigned long end = KERN_TO_HYP((unsigned long)to); | ||
318 | |||
319 | /* Check for a valid kernel memory mapping */ | ||
320 | if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) | ||
321 | return -EINVAL; | ||
322 | |||
323 | return __create_hyp_mappings(hyp_pgd, start, end, | ||
324 | __phys_to_pfn(phys_addr), PAGE_HYP); | ||
259 | } | 325 | } |
260 | 326 | ||
261 | /** | 327 | /** |
262 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode | 328 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode |
263 | * @from: The kernel start VA of the range | 329 | * @from: The kernel start VA of the range |
264 | * @to: The kernel end VA of the range (exclusive) | 330 | * @to: The kernel end VA of the range (exclusive) |
265 | * @addr: The physical start address which gets mapped | 331 | * @phys_addr: The physical start address which gets mapped |
266 | * | 332 | * |
267 | * The resulting HYP VA is the same as the kernel VA, modulo | 333 | * The resulting HYP VA is the same as the kernel VA, modulo |
268 | * HYP_PAGE_OFFSET. | 334 | * HYP_PAGE_OFFSET. |
269 | */ | 335 | */ |
270 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) | 336 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) |
271 | { | 337 | { |
272 | unsigned long pfn = __phys_to_pfn(addr); | 338 | unsigned long start = KERN_TO_HYP((unsigned long)from); |
273 | return __create_hyp_mappings(from, to, &pfn); | 339 | unsigned long end = KERN_TO_HYP((unsigned long)to); |
340 | |||
341 | /* Check for a valid kernel IO mapping */ | ||
342 | if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) | ||
343 | return -EINVAL; | ||
344 | |||
345 | return __create_hyp_mappings(hyp_pgd, start, end, | ||
346 | __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); | ||
274 | } | 347 | } |
275 | 348 | ||
276 | /** | 349 | /** |
@@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) | |||
307 | return 0; | 380 | return 0; |
308 | } | 381 | } |
309 | 382 | ||
310 | static void clear_pud_entry(pud_t *pud) | ||
311 | { | ||
312 | pmd_t *pmd_table = pmd_offset(pud, 0); | ||
313 | pud_clear(pud); | ||
314 | pmd_free(NULL, pmd_table); | ||
315 | put_page(virt_to_page(pud)); | ||
316 | } | ||
317 | |||
318 | static void clear_pmd_entry(pmd_t *pmd) | ||
319 | { | ||
320 | pte_t *pte_table = pte_offset_kernel(pmd, 0); | ||
321 | pmd_clear(pmd); | ||
322 | pte_free_kernel(NULL, pte_table); | ||
323 | put_page(virt_to_page(pmd)); | ||
324 | } | ||
325 | |||
326 | static bool pmd_empty(pmd_t *pmd) | ||
327 | { | ||
328 | struct page *pmd_page = virt_to_page(pmd); | ||
329 | return page_count(pmd_page) == 1; | ||
330 | } | ||
331 | |||
332 | static void clear_pte_entry(pte_t *pte) | ||
333 | { | ||
334 | if (pte_present(*pte)) { | ||
335 | kvm_set_pte(pte, __pte(0)); | ||
336 | put_page(virt_to_page(pte)); | ||
337 | } | ||
338 | } | ||
339 | |||
340 | static bool pte_empty(pte_t *pte) | ||
341 | { | ||
342 | struct page *pte_page = virt_to_page(pte); | ||
343 | return page_count(pte_page) == 1; | ||
344 | } | ||
345 | |||
346 | /** | 383 | /** |
347 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range | 384 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range |
348 | * @kvm: The VM pointer | 385 | * @kvm: The VM pointer |
@@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte) | |||
356 | */ | 393 | */ |
357 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | 394 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) |
358 | { | 395 | { |
359 | pgd_t *pgd; | 396 | unmap_range(kvm->arch.pgd, start, size); |
360 | pud_t *pud; | ||
361 | pmd_t *pmd; | ||
362 | pte_t *pte; | ||
363 | phys_addr_t addr = start, end = start + size; | ||
364 | u64 range; | ||
365 | |||
366 | while (addr < end) { | ||
367 | pgd = kvm->arch.pgd + pgd_index(addr); | ||
368 | pud = pud_offset(pgd, addr); | ||
369 | if (pud_none(*pud)) { | ||
370 | addr += PUD_SIZE; | ||
371 | continue; | ||
372 | } | ||
373 | |||
374 | pmd = pmd_offset(pud, addr); | ||
375 | if (pmd_none(*pmd)) { | ||
376 | addr += PMD_SIZE; | ||
377 | continue; | ||
378 | } | ||
379 | |||
380 | pte = pte_offset_kernel(pmd, addr); | ||
381 | clear_pte_entry(pte); | ||
382 | range = PAGE_SIZE; | ||
383 | |||
384 | /* If we emptied the pte, walk back up the ladder */ | ||
385 | if (pte_empty(pte)) { | ||
386 | clear_pmd_entry(pmd); | ||
387 | range = PMD_SIZE; | ||
388 | if (pmd_empty(pmd)) { | ||
389 | clear_pud_entry(pud); | ||
390 | range = PUD_SIZE; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | addr += range; | ||
395 | } | ||
396 | } | 397 | } |
397 | 398 | ||
398 | /** | 399 | /** |
@@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |||
728 | 729 | ||
729 | phys_addr_t kvm_mmu_get_httbr(void) | 730 | phys_addr_t kvm_mmu_get_httbr(void) |
730 | { | 731 | { |
731 | VM_BUG_ON(!virt_addr_valid(hyp_pgd)); | ||
732 | return virt_to_phys(hyp_pgd); | 732 | return virt_to_phys(hyp_pgd); |
733 | } | 733 | } |
734 | 734 | ||
735 | phys_addr_t kvm_mmu_get_boot_httbr(void) | ||
736 | { | ||
737 | return virt_to_phys(boot_hyp_pgd); | ||
738 | } | ||
739 | |||
740 | phys_addr_t kvm_get_idmap_vector(void) | ||
741 | { | ||
742 | return hyp_idmap_vector; | ||
743 | } | ||
744 | |||
735 | int kvm_mmu_init(void) | 745 | int kvm_mmu_init(void) |
736 | { | 746 | { |
737 | if (!hyp_pgd) { | 747 | int err; |
748 | |||
749 | hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); | ||
750 | hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); | ||
751 | hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); | ||
752 | |||
753 | if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { | ||
754 | /* | ||
755 | * Our init code is crossing a page boundary. Allocate | ||
756 | * a bounce page, copy the code over and use that. | ||
757 | */ | ||
758 | size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; | ||
759 | phys_addr_t phys_base; | ||
760 | |||
761 | init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
762 | if (!init_bounce_page) { | ||
763 | kvm_err("Couldn't allocate HYP init bounce page\n"); | ||
764 | err = -ENOMEM; | ||
765 | goto out; | ||
766 | } | ||
767 | |||
768 | memcpy(init_bounce_page, __hyp_idmap_text_start, len); | ||
769 | /* | ||
770 | * Warning: the code we just copied to the bounce page | ||
771 | * must be flushed to the point of coherency. | ||
772 | * Otherwise, the data may be sitting in L2, and HYP | ||
773 | * mode won't be able to observe it as it runs with | ||
774 | * caches off at that point. | ||
775 | */ | ||
776 | kvm_flush_dcache_to_poc(init_bounce_page, len); | ||
777 | |||
778 | phys_base = virt_to_phys(init_bounce_page); | ||
779 | hyp_idmap_vector += phys_base - hyp_idmap_start; | ||
780 | hyp_idmap_start = phys_base; | ||
781 | hyp_idmap_end = phys_base + len; | ||
782 | |||
783 | kvm_info("Using HYP init bounce page @%lx\n", | ||
784 | (unsigned long)phys_base); | ||
785 | } | ||
786 | |||
787 | hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
788 | boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
789 | if (!hyp_pgd || !boot_hyp_pgd) { | ||
738 | kvm_err("Hyp mode PGD not allocated\n"); | 790 | kvm_err("Hyp mode PGD not allocated\n"); |
739 | return -ENOMEM; | 791 | err = -ENOMEM; |
792 | goto out; | ||
740 | } | 793 | } |
741 | 794 | ||
742 | return 0; | 795 | /* Create the idmap in the boot page tables */ |
743 | } | 796 | err = __create_hyp_mappings(boot_hyp_pgd, |
797 | hyp_idmap_start, hyp_idmap_end, | ||
798 | __phys_to_pfn(hyp_idmap_start), | ||
799 | PAGE_HYP); | ||
744 | 800 | ||
745 | /** | 801 | if (err) { |
746 | * kvm_clear_idmap - remove all idmaps from the hyp pgd | 802 | kvm_err("Failed to idmap %lx-%lx\n", |
747 | * | 803 | hyp_idmap_start, hyp_idmap_end); |
748 | * Free the underlying pmds for all pgds in range and clear the pgds (but | 804 | goto out; |
749 | * don't free them) afterwards. | 805 | } |
750 | */ | ||
751 | void kvm_clear_hyp_idmap(void) | ||
752 | { | ||
753 | unsigned long addr, end; | ||
754 | unsigned long next; | ||
755 | pgd_t *pgd = hyp_pgd; | ||
756 | pud_t *pud; | ||
757 | pmd_t *pmd; | ||
758 | 806 | ||
759 | addr = virt_to_phys(__hyp_idmap_text_start); | 807 | /* Map the very same page at the trampoline VA */ |
760 | end = virt_to_phys(__hyp_idmap_text_end); | 808 | err = __create_hyp_mappings(boot_hyp_pgd, |
809 | TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, | ||
810 | __phys_to_pfn(hyp_idmap_start), | ||
811 | PAGE_HYP); | ||
812 | if (err) { | ||
813 | kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", | ||
814 | TRAMPOLINE_VA); | ||
815 | goto out; | ||
816 | } | ||
761 | 817 | ||
762 | pgd += pgd_index(addr); | 818 | /* Map the same page again into the runtime page tables */ |
763 | do { | 819 | err = __create_hyp_mappings(hyp_pgd, |
764 | next = pgd_addr_end(addr, end); | 820 | TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, |
765 | if (pgd_none_or_clear_bad(pgd)) | 821 | __phys_to_pfn(hyp_idmap_start), |
766 | continue; | 822 | PAGE_HYP); |
767 | pud = pud_offset(pgd, addr); | 823 | if (err) { |
768 | pmd = pmd_offset(pud, addr); | 824 | kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", |
825 | TRAMPOLINE_VA); | ||
826 | goto out; | ||
827 | } | ||
769 | 828 | ||
770 | pud_clear(pud); | 829 | return 0; |
771 | kvm_clean_pmd_entry(pmd); | 830 | out: |
772 | pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); | 831 | free_hyp_pgds(); |
773 | } while (pgd++, addr = next, addr < end); | 832 | return err; |
774 | } | 833 | } |
diff --git a/arch/arm/kvm/perf.c b/arch/arm/kvm/perf.c new file mode 100644 index 000000000000..1a3849da0b4b --- /dev/null +++ b/arch/arm/kvm/perf.c | |||
@@ -0,0 +1,68 @@ | |||
1 | /* | ||
2 | * Based on the x86 implementation. | ||
3 | * | ||
4 | * Copyright (C) 2012 ARM Ltd. | ||
5 | * Author: Marc Zyngier <marc.zyngier@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
18 | */ | ||
19 | |||
20 | #include <linux/perf_event.h> | ||
21 | #include <linux/kvm_host.h> | ||
22 | |||
23 | #include <asm/kvm_emulate.h> | ||
24 | |||
25 | static int kvm_is_in_guest(void) | ||
26 | { | ||
27 | return kvm_arm_get_running_vcpu() != NULL; | ||
28 | } | ||
29 | |||
30 | static int kvm_is_user_mode(void) | ||
31 | { | ||
32 | struct kvm_vcpu *vcpu; | ||
33 | |||
34 | vcpu = kvm_arm_get_running_vcpu(); | ||
35 | |||
36 | if (vcpu) | ||
37 | return !vcpu_mode_priv(vcpu); | ||
38 | |||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | static unsigned long kvm_get_guest_ip(void) | ||
43 | { | ||
44 | struct kvm_vcpu *vcpu; | ||
45 | |||
46 | vcpu = kvm_arm_get_running_vcpu(); | ||
47 | |||
48 | if (vcpu) | ||
49 | return *vcpu_pc(vcpu); | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
55 | .is_in_guest = kvm_is_in_guest, | ||
56 | .is_user_mode = kvm_is_user_mode, | ||
57 | .get_guest_ip = kvm_get_guest_ip, | ||
58 | }; | ||
59 | |||
60 | int kvm_perf_init(void) | ||
61 | { | ||
62 | return perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
63 | } | ||
64 | |||
65 | int kvm_perf_teardown(void) | ||
66 | { | ||
67 | return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
68 | } | ||
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 5ee505c937d1..83cb3ac27095 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <asm/pgtable.h> | 8 | #include <asm/pgtable.h> |
9 | #include <asm/sections.h> | 9 | #include <asm/sections.h> |
10 | #include <asm/system_info.h> | 10 | #include <asm/system_info.h> |
11 | #include <asm/virt.h> | ||
12 | 11 | ||
13 | pgd_t *idmap_pgd; | 12 | pgd_t *idmap_pgd; |
14 | 13 | ||
@@ -83,37 +82,10 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start, | |||
83 | } while (pgd++, addr = next, addr != end); | 82 | } while (pgd++, addr = next, addr != end); |
84 | } | 83 | } |
85 | 84 | ||
86 | #if defined(CONFIG_ARM_VIRT_EXT) && defined(CONFIG_ARM_LPAE) | ||
87 | pgd_t *hyp_pgd; | ||
88 | |||
89 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | ||
90 | |||
91 | static int __init init_static_idmap_hyp(void) | ||
92 | { | ||
93 | hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); | ||
94 | if (!hyp_pgd) | ||
95 | return -ENOMEM; | ||
96 | |||
97 | pr_info("Setting up static HYP identity map for 0x%p - 0x%p\n", | ||
98 | __hyp_idmap_text_start, __hyp_idmap_text_end); | ||
99 | identity_mapping_add(hyp_pgd, __hyp_idmap_text_start, | ||
100 | __hyp_idmap_text_end, PMD_SECT_AP1); | ||
101 | |||
102 | return 0; | ||
103 | } | ||
104 | #else | ||
105 | static int __init init_static_idmap_hyp(void) | ||
106 | { | ||
107 | return 0; | ||
108 | } | ||
109 | #endif | ||
110 | |||
111 | extern char __idmap_text_start[], __idmap_text_end[]; | 85 | extern char __idmap_text_start[], __idmap_text_end[]; |
112 | 86 | ||
113 | static int __init init_static_idmap(void) | 87 | static int __init init_static_idmap(void) |
114 | { | 88 | { |
115 | int ret; | ||
116 | |||
117 | idmap_pgd = pgd_alloc(&init_mm); | 89 | idmap_pgd = pgd_alloc(&init_mm); |
118 | if (!idmap_pgd) | 90 | if (!idmap_pgd) |
119 | return -ENOMEM; | 91 | return -ENOMEM; |
@@ -123,12 +95,10 @@ static int __init init_static_idmap(void) | |||
123 | identity_mapping_add(idmap_pgd, __idmap_text_start, | 95 | identity_mapping_add(idmap_pgd, __idmap_text_start, |
124 | __idmap_text_end, 0); | 96 | __idmap_text_end, 0); |
125 | 97 | ||
126 | ret = init_static_idmap_hyp(); | ||
127 | |||
128 | /* Flush L1 for the hardware to see this page table content */ | 98 | /* Flush L1 for the hardware to see this page table content */ |
129 | flush_cache_louis(); | 99 | flush_cache_louis(); |
130 | 100 | ||
131 | return ret; | 101 | return 0; |
132 | } | 102 | } |
133 | early_initcall(init_static_idmap); | 103 | early_initcall(init_static_idmap); |
134 | 104 | ||
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index cfa74983c675..989dd3fe8de1 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
@@ -26,6 +26,7 @@ | |||
26 | #define KVM_USER_MEM_SLOTS 32 | 26 | #define KVM_USER_MEM_SLOTS 32 |
27 | 27 | ||
28 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 28 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
29 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
29 | 30 | ||
30 | /* define exit reasons from vmm to kvm*/ | 31 | /* define exit reasons from vmm to kvm*/ |
31 | #define EXIT_REASON_VM_PANIC 0 | 32 | #define EXIT_REASON_VM_PANIC 0 |
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h index ec6c6b301238..99503c284400 100644 --- a/arch/ia64/include/uapi/asm/kvm.h +++ b/arch/ia64/include/uapi/asm/kvm.h | |||
@@ -27,7 +27,6 @@ | |||
27 | /* Select x86 specific features in <linux/kvm.h> */ | 27 | /* Select x86 specific features in <linux/kvm.h> */ |
28 | #define __KVM_HAVE_IOAPIC | 28 | #define __KVM_HAVE_IOAPIC |
29 | #define __KVM_HAVE_IRQ_LINE | 29 | #define __KVM_HAVE_IRQ_LINE |
30 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | ||
31 | 30 | ||
32 | /* Architectural interrupt line count. */ | 31 | /* Architectural interrupt line count. */ |
33 | #define KVM_NR_INTERRUPTS 256 | 32 | #define KVM_NR_INTERRUPTS 256 |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 2cd225f8c68d..990b86420cc6 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig | |||
@@ -21,12 +21,11 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on BROKEN | 22 | depends on BROKEN |
23 | depends on HAVE_KVM && MODULES | 23 | depends on HAVE_KVM && MODULES |
24 | # for device assignment: | ||
25 | depends on PCI | ||
26 | depends on BROKEN | 24 | depends on BROKEN |
27 | select PREEMPT_NOTIFIERS | 25 | select PREEMPT_NOTIFIERS |
28 | select ANON_INODES | 26 | select ANON_INODES |
29 | select HAVE_KVM_IRQCHIP | 27 | select HAVE_KVM_IRQCHIP |
28 | select HAVE_KVM_IRQ_ROUTING | ||
30 | select KVM_APIC_ARCHITECTURE | 29 | select KVM_APIC_ARCHITECTURE |
31 | select KVM_MMIO | 30 | select KVM_MMIO |
32 | ---help--- | 31 | ---help--- |
@@ -50,6 +49,17 @@ config KVM_INTEL | |||
50 | Provides support for KVM on Itanium 2 processors equipped with the VT | 49 | Provides support for KVM on Itanium 2 processors equipped with the VT |
51 | extensions. | 50 | extensions. |
52 | 51 | ||
52 | config KVM_DEVICE_ASSIGNMENT | ||
53 | bool "KVM legacy PCI device assignment support" | ||
54 | depends on KVM && PCI && IOMMU_API | ||
55 | default y | ||
56 | ---help--- | ||
57 | Provide support for legacy PCI device assignment through KVM. The | ||
58 | kernel now also supports a full featured userspace device driver | ||
59 | framework through VFIO, which supersedes much of this support. | ||
60 | |||
61 | If unsure, say Y. | ||
62 | |||
53 | source drivers/vhost/Kconfig | 63 | source drivers/vhost/Kconfig |
54 | 64 | ||
55 | endif # VIRTUALIZATION | 65 | endif # VIRTUALIZATION |
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index db3d7c5d1071..1a4053789d01 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile | |||
@@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | |||
49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ | 49 | asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/ |
50 | 50 | ||
51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
52 | coalesced_mmio.o irq_comm.o assigned-dev.o) | 52 | coalesced_mmio.o irq_comm.o) |
53 | 53 | ||
54 | ifeq ($(CONFIG_IOMMU_API),y) | 54 | ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y) |
55 | common-objs += $(addprefix ../../../virt/kvm/, iommu.o) | 55 | common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o) |
56 | endif | 56 | endif |
57 | 57 | ||
58 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o | 58 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index ad3126a58644..5b2dc0d10c8f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -204,9 +204,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
204 | case KVM_CAP_COALESCED_MMIO: | 204 | case KVM_CAP_COALESCED_MMIO: |
205 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | 205 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; |
206 | break; | 206 | break; |
207 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
207 | case KVM_CAP_IOMMU: | 208 | case KVM_CAP_IOMMU: |
208 | r = iommu_present(&pci_bus_type); | 209 | r = iommu_present(&pci_bus_type); |
209 | break; | 210 | break; |
211 | #endif | ||
210 | default: | 212 | default: |
211 | r = 0; | 213 | r = 0; |
212 | } | 214 | } |
@@ -924,13 +926,15 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
924 | return 0; | 926 | return 0; |
925 | } | 927 | } |
926 | 928 | ||
927 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | 929 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
930 | bool line_status) | ||
928 | { | 931 | { |
929 | if (!irqchip_in_kernel(kvm)) | 932 | if (!irqchip_in_kernel(kvm)) |
930 | return -ENXIO; | 933 | return -ENXIO; |
931 | 934 | ||
932 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 935 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
933 | irq_event->irq, irq_event->level); | 936 | irq_event->irq, irq_event->level, |
937 | line_status); | ||
934 | return 0; | 938 | return 0; |
935 | } | 939 | } |
936 | 940 | ||
@@ -942,24 +946,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
942 | int r = -ENOTTY; | 946 | int r = -ENOTTY; |
943 | 947 | ||
944 | switch (ioctl) { | 948 | switch (ioctl) { |
945 | case KVM_SET_MEMORY_REGION: { | ||
946 | struct kvm_memory_region kvm_mem; | ||
947 | struct kvm_userspace_memory_region kvm_userspace_mem; | ||
948 | |||
949 | r = -EFAULT; | ||
950 | if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) | ||
951 | goto out; | ||
952 | kvm_userspace_mem.slot = kvm_mem.slot; | ||
953 | kvm_userspace_mem.flags = kvm_mem.flags; | ||
954 | kvm_userspace_mem.guest_phys_addr = | ||
955 | kvm_mem.guest_phys_addr; | ||
956 | kvm_userspace_mem.memory_size = kvm_mem.memory_size; | ||
957 | r = kvm_vm_ioctl_set_memory_region(kvm, | ||
958 | &kvm_userspace_mem, false); | ||
959 | if (r) | ||
960 | goto out; | ||
961 | break; | ||
962 | } | ||
963 | case KVM_CREATE_IRQCHIP: | 949 | case KVM_CREATE_IRQCHIP: |
964 | r = -EFAULT; | 950 | r = -EFAULT; |
965 | r = kvm_ioapic_init(kvm); | 951 | r = kvm_ioapic_init(kvm); |
@@ -1384,9 +1370,7 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
1384 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1370 | void kvm_arch_destroy_vm(struct kvm *kvm) |
1385 | { | 1371 | { |
1386 | kvm_iommu_unmap_guest(kvm); | 1372 | kvm_iommu_unmap_guest(kvm); |
1387 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
1388 | kvm_free_all_assigned_devices(kvm); | 1373 | kvm_free_all_assigned_devices(kvm); |
1389 | #endif | ||
1390 | kfree(kvm->arch.vioapic); | 1374 | kfree(kvm->arch.vioapic); |
1391 | kvm_release_vm_pages(kvm); | 1375 | kvm_release_vm_pages(kvm); |
1392 | } | 1376 | } |
@@ -1578,9 +1562,8 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
1578 | 1562 | ||
1579 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 1563 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1580 | struct kvm_memory_slot *memslot, | 1564 | struct kvm_memory_slot *memslot, |
1581 | struct kvm_memory_slot old, | ||
1582 | struct kvm_userspace_memory_region *mem, | 1565 | struct kvm_userspace_memory_region *mem, |
1583 | bool user_alloc) | 1566 | enum kvm_mr_change change) |
1584 | { | 1567 | { |
1585 | unsigned long i; | 1568 | unsigned long i; |
1586 | unsigned long pfn; | 1569 | unsigned long pfn; |
@@ -1610,8 +1593,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
1610 | 1593 | ||
1611 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 1594 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
1612 | struct kvm_userspace_memory_region *mem, | 1595 | struct kvm_userspace_memory_region *mem, |
1613 | struct kvm_memory_slot old, | 1596 | const struct kvm_memory_slot *old, |
1614 | bool user_alloc) | 1597 | enum kvm_mr_change change) |
1615 | { | 1598 | { |
1616 | return; | 1599 | return; |
1617 | } | 1600 | } |
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index c3e2935b6db4..c5f92a926a9a 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h | |||
@@ -27,10 +27,4 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | |||
27 | #define kvm_apic_present(x) (true) | 27 | #define kvm_apic_present(x) (true) |
28 | #define kvm_lapic_enabled(x) (true) | 28 | #define kvm_lapic_enabled(x) (true) |
29 | 29 | ||
30 | static inline bool kvm_apic_vid_enabled(void) | ||
31 | { | ||
32 | /* IA64 has no apicv supporting, do nothing here */ | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | #endif | 30 | #endif |
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3dad6ad..cf4df8e2139a 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h | |||
@@ -270,6 +270,9 @@ | |||
270 | #define H_SET_MODE 0x31C | 270 | #define H_SET_MODE 0x31C |
271 | #define MAX_HCALL_OPCODE H_SET_MODE | 271 | #define MAX_HCALL_OPCODE H_SET_MODE |
272 | 272 | ||
273 | /* Platform specific hcalls, used by KVM */ | ||
274 | #define H_RTAS 0xf000 | ||
275 | |||
273 | #ifndef __ASSEMBLY__ | 276 | #ifndef __ASSEMBLY__ |
274 | 277 | ||
275 | /** | 278 | /** |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 5a56e1c5f851..349ed85c7d61 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); | |||
142 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 142 | extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
143 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); | 143 | extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); |
144 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); | 144 | extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); |
145 | extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, | ||
146 | unsigned int vec); | ||
145 | extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); | 147 | extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); |
146 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | 148 | extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, |
147 | bool upper, u32 val); | 149 | bool upper, u32 val); |
@@ -156,7 +158,8 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, | |||
156 | unsigned long pte_index); | 158 | unsigned long pte_index); |
157 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, | 159 | extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, |
158 | unsigned long *nb_ret); | 160 | unsigned long *nb_ret); |
159 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); | 161 | extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr, |
162 | unsigned long gpa, bool dirty); | ||
160 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, | 163 | extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, |
161 | long pte_index, unsigned long pteh, unsigned long ptel); | 164 | long pte_index, unsigned long pteh, unsigned long ptel); |
162 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, | 165 | extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, |
@@ -458,6 +461,8 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) | |||
458 | #define OSI_SC_MAGIC_R4 0x77810F9B | 461 | #define OSI_SC_MAGIC_R4 0x77810F9B |
459 | 462 | ||
460 | #define INS_DCBZ 0x7c0007ec | 463 | #define INS_DCBZ 0x7c0007ec |
464 | /* TO = 31 for unconditional trap */ | ||
465 | #define INS_TW 0x7fe00008 | ||
461 | 466 | ||
462 | /* LPIDs we support with this build -- runtime limit may be lower */ | 467 | /* LPIDs we support with this build -- runtime limit may be lower */ |
463 | #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) | 468 | #define KVMPPC_NR_LPIDS (LPID_RSVD + 1) |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1dc9928..9c1ff330c805 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h | |||
@@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) | |||
268 | (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); | 268 | (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); |
269 | } | 269 | } |
270 | 270 | ||
271 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
272 | /* | ||
273 | * Note modification of an HPTE; set the HPTE modified bit | ||
274 | * if anyone is interested. | ||
275 | */ | ||
276 | static inline void note_hpte_modification(struct kvm *kvm, | ||
277 | struct revmap_entry *rev) | ||
278 | { | ||
279 | if (atomic_read(&kvm->arch.hpte_mod_interest)) | ||
280 | rev->guest_rpte |= HPTE_GR_MODIFIED; | ||
281 | } | ||
282 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | ||
283 | |||
271 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ | 284 | #endif /* __ASM_KVM_BOOK3S_64_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d2717cc6..9039d3c97eec 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h | |||
@@ -20,6 +20,11 @@ | |||
20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ | 20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ |
21 | #define __ASM_KVM_BOOK3S_ASM_H__ | 21 | #define __ASM_KVM_BOOK3S_ASM_H__ |
22 | 22 | ||
23 | /* XICS ICP register offsets */ | ||
24 | #define XICS_XIRR 4 | ||
25 | #define XICS_MFRR 0xc | ||
26 | #define XICS_IPI 2 /* interrupt source # for IPIs */ | ||
27 | |||
23 | #ifdef __ASSEMBLY__ | 28 | #ifdef __ASSEMBLY__ |
24 | 29 | ||
25 | #ifdef CONFIG_KVM_BOOK3S_HANDLER | 30 | #ifdef CONFIG_KVM_BOOK3S_HANDLER |
@@ -81,10 +86,11 @@ struct kvmppc_host_state { | |||
81 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 86 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
82 | u8 hwthread_req; | 87 | u8 hwthread_req; |
83 | u8 hwthread_state; | 88 | u8 hwthread_state; |
84 | 89 | u8 host_ipi; | |
85 | struct kvm_vcpu *kvm_vcpu; | 90 | struct kvm_vcpu *kvm_vcpu; |
86 | struct kvmppc_vcore *kvm_vcore; | 91 | struct kvmppc_vcore *kvm_vcore; |
87 | unsigned long xics_phys; | 92 | unsigned long xics_phys; |
93 | u32 saved_xirr; | ||
88 | u64 dabr; | 94 | u64 dabr; |
89 | u64 host_mmcr[3]; | 95 | u64 host_mmcr[3]; |
90 | u32 host_pmc[8]; | 96 | u32 host_pmc[8]; |
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index b7cd3356a532..d3c1eb34c986 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h | |||
@@ -26,6 +26,8 @@ | |||
26 | /* LPIDs we support with this build -- runtime limit may be lower */ | 26 | /* LPIDs we support with this build -- runtime limit may be lower */ |
27 | #define KVMPPC_NR_LPIDS 64 | 27 | #define KVMPPC_NR_LPIDS 64 |
28 | 28 | ||
29 | #define KVMPPC_INST_EHPRIV 0x7c00021c | ||
30 | |||
29 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | 31 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) |
30 | { | 32 | { |
31 | vcpu->arch.gpr[num] = val; | 33 | vcpu->arch.gpr[num] = val; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d1bb86074721..af326cde7cb6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -44,6 +44,10 @@ | |||
44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 |
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | /* These values are internal and can be increased later */ | ||
48 | #define KVM_NR_IRQCHIPS 1 | ||
49 | #define KVM_IRQCHIP_NUM_PINS 256 | ||
50 | |||
47 | #if !defined(CONFIG_KVM_440) | 51 | #if !defined(CONFIG_KVM_440) |
48 | #include <linux/mmu_notifier.h> | 52 | #include <linux/mmu_notifier.h> |
49 | 53 | ||
@@ -188,6 +192,10 @@ struct kvmppc_linear_info { | |||
188 | int type; | 192 | int type; |
189 | }; | 193 | }; |
190 | 194 | ||
195 | /* XICS components, defined in book3s_xics.c */ | ||
196 | struct kvmppc_xics; | ||
197 | struct kvmppc_icp; | ||
198 | |||
191 | /* | 199 | /* |
192 | * The reverse mapping array has one entry for each HPTE, | 200 | * The reverse mapping array has one entry for each HPTE, |
193 | * which stores the guest's view of the second word of the HPTE | 201 | * which stores the guest's view of the second word of the HPTE |
@@ -255,6 +263,13 @@ struct kvm_arch { | |||
255 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | 263 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ |
256 | #ifdef CONFIG_PPC_BOOK3S_64 | 264 | #ifdef CONFIG_PPC_BOOK3S_64 |
257 | struct list_head spapr_tce_tables; | 265 | struct list_head spapr_tce_tables; |
266 | struct list_head rtas_tokens; | ||
267 | #endif | ||
268 | #ifdef CONFIG_KVM_MPIC | ||
269 | struct openpic *mpic; | ||
270 | #endif | ||
271 | #ifdef CONFIG_KVM_XICS | ||
272 | struct kvmppc_xics *xics; | ||
258 | #endif | 273 | #endif |
259 | }; | 274 | }; |
260 | 275 | ||
@@ -301,11 +316,13 @@ struct kvmppc_vcore { | |||
301 | * that a guest can register. | 316 | * that a guest can register. |
302 | */ | 317 | */ |
303 | struct kvmppc_vpa { | 318 | struct kvmppc_vpa { |
319 | unsigned long gpa; /* Current guest phys addr */ | ||
304 | void *pinned_addr; /* Address in kernel linear mapping */ | 320 | void *pinned_addr; /* Address in kernel linear mapping */ |
305 | void *pinned_end; /* End of region */ | 321 | void *pinned_end; /* End of region */ |
306 | unsigned long next_gpa; /* Guest phys addr for update */ | 322 | unsigned long next_gpa; /* Guest phys addr for update */ |
307 | unsigned long len; /* Number of bytes required */ | 323 | unsigned long len; /* Number of bytes required */ |
308 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ | 324 | u8 update_pending; /* 1 => update pinned_addr from next_gpa */ |
325 | bool dirty; /* true => area has been modified by kernel */ | ||
309 | }; | 326 | }; |
310 | 327 | ||
311 | struct kvmppc_pte { | 328 | struct kvmppc_pte { |
@@ -359,6 +376,11 @@ struct kvmppc_slb { | |||
359 | #define KVMPPC_BOOKE_MAX_IAC 4 | 376 | #define KVMPPC_BOOKE_MAX_IAC 4 |
360 | #define KVMPPC_BOOKE_MAX_DAC 2 | 377 | #define KVMPPC_BOOKE_MAX_DAC 2 |
361 | 378 | ||
379 | /* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ | ||
380 | #define KVMPPC_EPR_NONE 0 /* EPR not supported */ | ||
381 | #define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ | ||
382 | #define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ | ||
383 | |||
362 | struct kvmppc_booke_debug_reg { | 384 | struct kvmppc_booke_debug_reg { |
363 | u32 dbcr0; | 385 | u32 dbcr0; |
364 | u32 dbcr1; | 386 | u32 dbcr1; |
@@ -370,6 +392,12 @@ struct kvmppc_booke_debug_reg { | |||
370 | u64 dac[KVMPPC_BOOKE_MAX_DAC]; | 392 | u64 dac[KVMPPC_BOOKE_MAX_DAC]; |
371 | }; | 393 | }; |
372 | 394 | ||
395 | #define KVMPPC_IRQ_DEFAULT 0 | ||
396 | #define KVMPPC_IRQ_MPIC 1 | ||
397 | #define KVMPPC_IRQ_XICS 2 | ||
398 | |||
399 | struct openpic; | ||
400 | |||
373 | struct kvm_vcpu_arch { | 401 | struct kvm_vcpu_arch { |
374 | ulong host_stack; | 402 | ulong host_stack; |
375 | u32 host_pid; | 403 | u32 host_pid; |
@@ -502,8 +530,11 @@ struct kvm_vcpu_arch { | |||
502 | spinlock_t wdt_lock; | 530 | spinlock_t wdt_lock; |
503 | struct timer_list wdt_timer; | 531 | struct timer_list wdt_timer; |
504 | u32 tlbcfg[4]; | 532 | u32 tlbcfg[4]; |
533 | u32 tlbps[4]; | ||
505 | u32 mmucfg; | 534 | u32 mmucfg; |
535 | u32 eptcfg; | ||
506 | u32 epr; | 536 | u32 epr; |
537 | u32 crit_save; | ||
507 | struct kvmppc_booke_debug_reg dbg_reg; | 538 | struct kvmppc_booke_debug_reg dbg_reg; |
508 | #endif | 539 | #endif |
509 | gpa_t paddr_accessed; | 540 | gpa_t paddr_accessed; |
@@ -521,7 +552,7 @@ struct kvm_vcpu_arch { | |||
521 | u8 sane; | 552 | u8 sane; |
522 | u8 cpu_type; | 553 | u8 cpu_type; |
523 | u8 hcall_needed; | 554 | u8 hcall_needed; |
524 | u8 epr_enabled; | 555 | u8 epr_flags; /* KVMPPC_EPR_xxx */ |
525 | u8 epr_needed; | 556 | u8 epr_needed; |
526 | 557 | ||
527 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ | 558 | u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ |
@@ -548,6 +579,13 @@ struct kvm_vcpu_arch { | |||
548 | unsigned long magic_page_pa; /* phys addr to map the magic page to */ | 579 | unsigned long magic_page_pa; /* phys addr to map the magic page to */ |
549 | unsigned long magic_page_ea; /* effect. addr to map the magic page to */ | 580 | unsigned long magic_page_ea; /* effect. addr to map the magic page to */ |
550 | 581 | ||
582 | int irq_type; /* one of KVM_IRQ_* */ | ||
583 | int irq_cpu_id; | ||
584 | struct openpic *mpic; /* KVM_IRQ_MPIC */ | ||
585 | #ifdef CONFIG_KVM_XICS | ||
586 | struct kvmppc_icp *icp; /* XICS presentation controller */ | ||
587 | #endif | ||
588 | |||
551 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 589 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
552 | struct kvm_vcpu_arch_shared shregs; | 590 | struct kvm_vcpu_arch_shared shregs; |
553 | 591 | ||
@@ -588,5 +626,6 @@ struct kvm_vcpu_arch { | |||
588 | #define KVM_MMIO_REG_FQPR 0x0060 | 626 | #define KVM_MMIO_REG_FQPR 0x0060 |
589 | 627 | ||
590 | #define __KVM_HAVE_ARCH_WQP | 628 | #define __KVM_HAVE_ARCH_WQP |
629 | #define __KVM_HAVE_CREATE_DEVICE | ||
591 | 630 | ||
592 | #endif /* __POWERPC_KVM_HOST_H__ */ | 631 | #endif /* __POWERPC_KVM_HOST_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 44a657adf416..a5287fe03d77 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -44,7 +44,7 @@ enum emulation_result { | |||
44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ | 44 | EMULATE_DO_DCR, /* kvm_run filled with DCR request */ |
45 | EMULATE_FAIL, /* can't emulate this instruction */ | 45 | EMULATE_FAIL, /* can't emulate this instruction */ |
46 | EMULATE_AGAIN, /* something went wrong. go again */ | 46 | EMULATE_AGAIN, /* something went wrong. go again */ |
47 | EMULATE_DO_PAPR, /* kvm_run filled with PAPR request */ | 47 | EMULATE_EXIT_USER, /* emulation requires exit to user-space */ |
48 | }; | 48 | }; |
49 | 49 | ||
50 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 50 | extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
@@ -104,8 +104,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | |||
104 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | 104 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); |
105 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 105 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
106 | struct kvm_interrupt *irq); | 106 | struct kvm_interrupt *irq); |
107 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 107 | extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); |
108 | struct kvm_interrupt *irq); | ||
109 | extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); | 108 | extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); |
110 | 109 | ||
111 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | 110 | extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, |
@@ -131,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, | |||
131 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, | 130 | extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, |
132 | struct kvm_memory_slot *memslot, unsigned long porder); | 131 | struct kvm_memory_slot *memslot, unsigned long porder); |
133 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); | 132 | extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); |
133 | |||
134 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, | 134 | extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, |
135 | struct kvm_create_spapr_tce *args); | 135 | struct kvm_create_spapr_tce *args); |
136 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, | 136 | extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, |
@@ -152,7 +152,7 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
152 | struct kvm_userspace_memory_region *mem); | 152 | struct kvm_userspace_memory_region *mem); |
153 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, | 153 | extern void kvmppc_core_commit_memory_region(struct kvm *kvm, |
154 | struct kvm_userspace_memory_region *mem, | 154 | struct kvm_userspace_memory_region *mem, |
155 | struct kvm_memory_slot old); | 155 | const struct kvm_memory_slot *old); |
156 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, | 156 | extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, |
157 | struct kvm_ppc_smmu_info *info); | 157 | struct kvm_ppc_smmu_info *info); |
158 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, | 158 | extern void kvmppc_core_flush_memslot(struct kvm *kvm, |
@@ -165,6 +165,18 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); | |||
165 | 165 | ||
166 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); | 166 | extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); |
167 | 167 | ||
168 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); | ||
169 | |||
170 | extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); | ||
171 | extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); | ||
172 | extern void kvmppc_rtas_tokens_free(struct kvm *kvm); | ||
173 | extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, | ||
174 | u32 priority); | ||
175 | extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, | ||
176 | u32 *priority); | ||
177 | extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); | ||
178 | extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); | ||
179 | |||
168 | /* | 180 | /* |
169 | * Cuts out inst bits with ordering according to spec. | 181 | * Cuts out inst bits with ordering according to spec. |
170 | * That means the leftmost bit is zero. All given bits are included. | 182 | * That means the leftmost bit is zero. All given bits are included. |
@@ -246,12 +258,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); | |||
246 | 258 | ||
247 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); | 259 | void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); |
248 | 260 | ||
261 | struct openpic; | ||
262 | |||
249 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 263 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
250 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | 264 | static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) |
251 | { | 265 | { |
252 | paca[cpu].kvm_hstate.xics_phys = addr; | 266 | paca[cpu].kvm_hstate.xics_phys = addr; |
253 | } | 267 | } |
254 | 268 | ||
269 | static inline u32 kvmppc_get_xics_latch(void) | ||
270 | { | ||
271 | u32 xirr = get_paca()->kvm_hstate.saved_xirr; | ||
272 | |||
273 | get_paca()->kvm_hstate.saved_xirr = 0; | ||
274 | |||
275 | return xirr; | ||
276 | } | ||
277 | |||
278 | static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) | ||
279 | { | ||
280 | paca[cpu].kvm_hstate.host_ipi = host_ipi; | ||
281 | } | ||
282 | |||
283 | extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); | ||
255 | extern void kvm_linear_init(void); | 284 | extern void kvm_linear_init(void); |
256 | 285 | ||
257 | #else | 286 | #else |
@@ -260,6 +289,46 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) | |||
260 | 289 | ||
261 | static inline void kvm_linear_init(void) | 290 | static inline void kvm_linear_init(void) |
262 | {} | 291 | {} |
292 | |||
293 | static inline u32 kvmppc_get_xics_latch(void) | ||
294 | { | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) | ||
299 | {} | ||
300 | |||
301 | static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) | ||
302 | { | ||
303 | kvm_vcpu_kick(vcpu); | ||
304 | } | ||
305 | #endif | ||
306 | |||
307 | #ifdef CONFIG_KVM_XICS | ||
308 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | ||
309 | { | ||
310 | return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; | ||
311 | } | ||
312 | extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); | ||
313 | extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); | ||
314 | extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); | ||
315 | extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); | ||
316 | extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); | ||
317 | extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); | ||
318 | extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, | ||
319 | struct kvm_vcpu *vcpu, u32 cpu); | ||
320 | #else | ||
321 | static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) | ||
322 | { return 0; } | ||
323 | static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } | ||
324 | static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, | ||
325 | unsigned long server) | ||
326 | { return -EINVAL; } | ||
327 | static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm, | ||
328 | struct kvm_irq_level *args) | ||
329 | { return -ENOTTY; } | ||
330 | static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | ||
331 | { return 0; } | ||
263 | #endif | 332 | #endif |
264 | 333 | ||
265 | static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) | 334 | static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) |
@@ -271,6 +340,32 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) | |||
271 | #endif | 340 | #endif |
272 | } | 341 | } |
273 | 342 | ||
343 | #ifdef CONFIG_KVM_MPIC | ||
344 | |||
345 | void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); | ||
346 | int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
347 | u32 cpu); | ||
348 | void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); | ||
349 | |||
350 | #else | ||
351 | |||
352 | static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) | ||
353 | { | ||
354 | } | ||
355 | |||
356 | static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, | ||
357 | struct kvm_vcpu *vcpu, u32 cpu) | ||
358 | { | ||
359 | return -EINVAL; | ||
360 | } | ||
361 | |||
362 | static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, | ||
363 | struct kvm_vcpu *vcpu) | ||
364 | { | ||
365 | } | ||
366 | |||
367 | #endif /* CONFIG_KVM_MPIC */ | ||
368 | |||
274 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | 369 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, |
275 | struct kvm_config_tlb *cfg); | 370 | struct kvm_config_tlb *cfg); |
276 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | 371 | int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, |
@@ -283,8 +378,15 @@ void kvmppc_init_lpid(unsigned long nr_lpids); | |||
283 | 378 | ||
284 | static inline void kvmppc_mmu_flush_icache(pfn_t pfn) | 379 | static inline void kvmppc_mmu_flush_icache(pfn_t pfn) |
285 | { | 380 | { |
286 | /* Clear i-cache for new pages */ | ||
287 | struct page *page; | 381 | struct page *page; |
382 | /* | ||
383 | * We can only access pages that the kernel maps | ||
384 | * as memory. Bail out for unmapped ones. | ||
385 | */ | ||
386 | if (!pfn_valid(pfn)) | ||
387 | return; | ||
388 | |||
389 | /* Clear i-cache for new pages */ | ||
288 | page = pfn_to_page(pfn); | 390 | page = pfn_to_page(pfn); |
289 | if (!test_bit(PG_arch_1, &page->flags)) { | 391 | if (!test_bit(PG_arch_1, &page->flags)) { |
290 | flush_dcache_icache_page(page); | 392 | flush_dcache_icache_page(page); |
@@ -324,4 +426,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) | |||
324 | return ea; | 426 | return ea; |
325 | } | 427 | } |
326 | 428 | ||
429 | extern void xics_wake_cpu(int cpu); | ||
430 | |||
327 | #endif /* __POWERPC_KVM_PPC_H__ */ | 431 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 3d17427e4fd7..a6136515c7f2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -300,6 +300,7 @@ | |||
300 | #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ | 300 | #define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ |
301 | #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ | 301 | #define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ |
302 | #define LPCR_MER 0x00000800 /* Mediated External Exception */ | 302 | #define LPCR_MER 0x00000800 /* Mediated External Exception */ |
303 | #define LPCR_MER_SH 11 | ||
303 | #define LPCR_LPES 0x0000000c | 304 | #define LPCR_LPES 0x0000000c |
304 | #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ | 305 | #define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ |
305 | #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ | 306 | #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ |
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 16064d00adb9..0fb1a6e9ff90 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h | |||
@@ -25,6 +25,8 @@ | |||
25 | /* Select powerpc specific features in <linux/kvm.h> */ | 25 | /* Select powerpc specific features in <linux/kvm.h> */ |
26 | #define __KVM_HAVE_SPAPR_TCE | 26 | #define __KVM_HAVE_SPAPR_TCE |
27 | #define __KVM_HAVE_PPC_SMT | 27 | #define __KVM_HAVE_PPC_SMT |
28 | #define __KVM_HAVE_IRQCHIP | ||
29 | #define __KVM_HAVE_IRQ_LINE | ||
28 | 30 | ||
29 | struct kvm_regs { | 31 | struct kvm_regs { |
30 | __u64 pc; | 32 | __u64 pc; |
@@ -272,8 +274,31 @@ struct kvm_debug_exit_arch { | |||
272 | 274 | ||
273 | /* for KVM_SET_GUEST_DEBUG */ | 275 | /* for KVM_SET_GUEST_DEBUG */ |
274 | struct kvm_guest_debug_arch { | 276 | struct kvm_guest_debug_arch { |
277 | struct { | ||
278 | /* H/W breakpoint/watchpoint address */ | ||
279 | __u64 addr; | ||
280 | /* | ||
281 | * Type denotes h/w breakpoint, read watchpoint, write | ||
282 | * watchpoint or watchpoint (both read and write). | ||
283 | */ | ||
284 | #define KVMPPC_DEBUG_NONE 0x0 | ||
285 | #define KVMPPC_DEBUG_BREAKPOINT (1UL << 1) | ||
286 | #define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2) | ||
287 | #define KVMPPC_DEBUG_WATCH_READ (1UL << 3) | ||
288 | __u32 type; | ||
289 | __u32 reserved; | ||
290 | } bp[16]; | ||
275 | }; | 291 | }; |
276 | 292 | ||
293 | /* Debug related defines */ | ||
294 | /* | ||
295 | * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic | ||
296 | * and upper 16 bits are architecture specific. Architecture specific defines | ||
297 | * that ioctl is for setting hardware breakpoint or software breakpoint. | ||
298 | */ | ||
299 | #define KVM_GUESTDBG_USE_SW_BP 0x00010000 | ||
300 | #define KVM_GUESTDBG_USE_HW_BP 0x00020000 | ||
301 | |||
277 | /* definition of registers in kvm_run */ | 302 | /* definition of registers in kvm_run */ |
278 | struct kvm_sync_regs { | 303 | struct kvm_sync_regs { |
279 | }; | 304 | }; |
@@ -299,6 +324,12 @@ struct kvm_allocate_rma { | |||
299 | __u64 rma_size; | 324 | __u64 rma_size; |
300 | }; | 325 | }; |
301 | 326 | ||
327 | /* for KVM_CAP_PPC_RTAS */ | ||
328 | struct kvm_rtas_token_args { | ||
329 | char name[120]; | ||
330 | __u64 token; /* Use a token of 0 to undefine a mapping */ | ||
331 | }; | ||
332 | |||
302 | struct kvm_book3e_206_tlb_entry { | 333 | struct kvm_book3e_206_tlb_entry { |
303 | __u32 mas8; | 334 | __u32 mas8; |
304 | __u32 mas1; | 335 | __u32 mas1; |
@@ -359,6 +390,26 @@ struct kvm_get_htab_header { | |||
359 | __u16 n_invalid; | 390 | __u16 n_invalid; |
360 | }; | 391 | }; |
361 | 392 | ||
393 | /* Per-vcpu XICS interrupt controller state */ | ||
394 | #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) | ||
395 | |||
396 | #define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */ | ||
397 | #define KVM_REG_PPC_ICP_CPPR_MASK 0xff | ||
398 | #define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */ | ||
399 | #define KVM_REG_PPC_ICP_XISR_MASK 0xffffff | ||
400 | #define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */ | ||
401 | #define KVM_REG_PPC_ICP_MFRR_MASK 0xff | ||
402 | #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ | ||
403 | #define KVM_REG_PPC_ICP_PPRI_MASK 0xff | ||
404 | |||
405 | /* Device control API: PPC-specific devices */ | ||
406 | #define KVM_DEV_MPIC_GRP_MISC 1 | ||
407 | #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ | ||
408 | |||
409 | #define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ | ||
410 | #define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ | ||
411 | |||
412 | /* One-Reg API: PPC-specific registers */ | ||
362 | #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) | 413 | #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) |
363 | #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) | 414 | #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) |
364 | #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) | 415 | #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) |
@@ -417,4 +468,47 @@ struct kvm_get_htab_header { | |||
417 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) | 468 | #define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) |
418 | #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) | 469 | #define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86) |
419 | 470 | ||
471 | /* Timer Status Register OR/CLEAR interface */ | ||
472 | #define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87) | ||
473 | #define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88) | ||
474 | #define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89) | ||
475 | #define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a) | ||
476 | |||
477 | /* Debugging: Special instruction for software breakpoint */ | ||
478 | #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) | ||
479 | |||
480 | /* MMU registers */ | ||
481 | #define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c) | ||
482 | #define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d) | ||
483 | #define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e) | ||
484 | #define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f) | ||
485 | #define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90) | ||
486 | #define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91) | ||
487 | #define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92) | ||
488 | /* | ||
489 | * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using | ||
490 | * KVM_CAP_SW_TLB ioctl | ||
491 | */ | ||
492 | #define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93) | ||
493 | #define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94) | ||
494 | #define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95) | ||
495 | #define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96) | ||
496 | #define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97) | ||
497 | #define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98) | ||
498 | #define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99) | ||
499 | #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) | ||
500 | #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) | ||
501 | |||
502 | /* PPC64 eXternal Interrupt Controller Specification */ | ||
503 | #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ | ||
504 | |||
505 | /* Layout of 64-bit source attribute values */ | ||
506 | #define KVM_XICS_DESTINATION_SHIFT 0 | ||
507 | #define KVM_XICS_DESTINATION_MASK 0xffffffffULL | ||
508 | #define KVM_XICS_PRIORITY_SHIFT 32 | ||
509 | #define KVM_XICS_PRIORITY_MASK 0xff | ||
510 | #define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) | ||
511 | #define KVM_XICS_MASKED (1ULL << 41) | ||
512 | #define KVM_XICS_PENDING (1ULL << 42) | ||
513 | |||
420 | #endif /* __LINUX_KVM_POWERPC_H */ | 514 | #endif /* __LINUX_KVM_POWERPC_H */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 172233eab799..b51a97cfedf8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -480,6 +480,7 @@ int main(void) | |||
480 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); | 480 | DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); |
481 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); | 481 | DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); |
482 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); | 482 | DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); |
483 | DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); | ||
483 | #endif | 484 | #endif |
484 | #ifdef CONFIG_PPC_BOOK3S | 485 | #ifdef CONFIG_PPC_BOOK3S |
485 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); | 486 | DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); |
@@ -576,6 +577,8 @@ int main(void) | |||
576 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); | 577 | HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); |
577 | HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); | 578 | HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); |
578 | HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); | 579 | HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); |
580 | HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); | ||
581 | HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); | ||
579 | HSTATE_FIELD(HSTATE_MMCR, host_mmcr); | 582 | HSTATE_FIELD(HSTATE_MMCR, host_mmcr); |
580 | HSTATE_FIELD(HSTATE_PMC, host_pmc); | 583 | HSTATE_FIELD(HSTATE_PMC, host_pmc); |
581 | HSTATE_FIELD(HSTATE_PURR, host_purr); | 584 | HSTATE_FIELD(HSTATE_PURR, host_purr); |
@@ -599,6 +602,7 @@ int main(void) | |||
599 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 602 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); |
600 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | 603 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); |
601 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); | 604 | DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); |
605 | DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save)); | ||
602 | #endif /* CONFIG_PPC_BOOK3S */ | 606 | #endif /* CONFIG_PPC_BOOK3S */ |
603 | #endif /* CONFIG_KVM */ | 607 | #endif /* CONFIG_KVM */ |
604 | 608 | ||
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 3d7fd21c65f9..2f5c6b6d6877 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c | |||
@@ -124,6 +124,18 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
124 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 124 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
125 | } | 125 | } |
126 | 126 | ||
127 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
128 | union kvmppc_one_reg *val) | ||
129 | { | ||
130 | return -EINVAL; | ||
131 | } | ||
132 | |||
133 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
134 | union kvmppc_one_reg *val) | ||
135 | { | ||
136 | return -EINVAL; | ||
137 | } | ||
138 | |||
127 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 139 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
128 | { | 140 | { |
129 | struct kvmppc_vcpu_44x *vcpu_44x; | 141 | struct kvmppc_vcpu_44x *vcpu_44x; |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 63c67ec72e43..eb643f862579 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -136,21 +136,41 @@ config KVM_E500V2 | |||
136 | If unsure, say N. | 136 | If unsure, say N. |
137 | 137 | ||
138 | config KVM_E500MC | 138 | config KVM_E500MC |
139 | bool "KVM support for PowerPC E500MC/E5500 processors" | 139 | bool "KVM support for PowerPC E500MC/E5500/E6500 processors" |
140 | depends on PPC_E500MC | 140 | depends on PPC_E500MC |
141 | select KVM | 141 | select KVM |
142 | select KVM_MMIO | 142 | select KVM_MMIO |
143 | select KVM_BOOKE_HV | 143 | select KVM_BOOKE_HV |
144 | select MMU_NOTIFIER | 144 | select MMU_NOTIFIER |
145 | ---help--- | 145 | ---help--- |
146 | Support running unmodified E500MC/E5500 (32-bit) guest kernels in | 146 | Support running unmodified E500MC/E5500/E6500 guest kernels in |
147 | virtual machines on E500MC/E5500 host processors. | 147 | virtual machines on E500MC/E5500/E6500 host processors. |
148 | 148 | ||
149 | This module provides access to the hardware capabilities through | 149 | This module provides access to the hardware capabilities through |
150 | a character device node named /dev/kvm. | 150 | a character device node named /dev/kvm. |
151 | 151 | ||
152 | If unsure, say N. | 152 | If unsure, say N. |
153 | 153 | ||
154 | config KVM_MPIC | ||
155 | bool "KVM in-kernel MPIC emulation" | ||
156 | depends on KVM && E500 | ||
157 | select HAVE_KVM_IRQCHIP | ||
158 | select HAVE_KVM_IRQ_ROUTING | ||
159 | select HAVE_KVM_MSI | ||
160 | help | ||
161 | Enable support for emulating MPIC devices inside the | ||
162 | host kernel, rather than relying on userspace to emulate. | ||
163 | Currently, support is limited to certain versions of | ||
164 | Freescale's MPIC implementation. | ||
165 | |||
166 | config KVM_XICS | ||
167 | bool "KVM in-kernel XICS emulation" | ||
168 | depends on KVM_BOOK3S_64 && !KVM_MPIC | ||
169 | ---help--- | ||
170 | Include support for the XICS (eXternal Interrupt Controller | ||
171 | Specification) interrupt controller architecture used on | ||
172 | IBM POWER (pSeries) servers. | ||
173 | |||
154 | source drivers/vhost/Kconfig | 174 | source drivers/vhost/Kconfig |
155 | 175 | ||
156 | endif # VIRTUALIZATION | 176 | endif # VIRTUALIZATION |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c26..422de3f4d46c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
@@ -72,12 +72,18 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | |||
72 | book3s_hv.o \ | 72 | book3s_hv.o \ |
73 | book3s_hv_interrupts.o \ | 73 | book3s_hv_interrupts.o \ |
74 | book3s_64_mmu_hv.o | 74 | book3s_64_mmu_hv.o |
75 | kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ | ||
76 | book3s_hv_rm_xics.o | ||
75 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ | 77 | kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ |
76 | book3s_hv_rmhandlers.o \ | 78 | book3s_hv_rmhandlers.o \ |
77 | book3s_hv_rm_mmu.o \ | 79 | book3s_hv_rm_mmu.o \ |
78 | book3s_64_vio_hv.o \ | 80 | book3s_64_vio_hv.o \ |
79 | book3s_hv_ras.o \ | 81 | book3s_hv_ras.o \ |
80 | book3s_hv_builtin.o | 82 | book3s_hv_builtin.o \ |
83 | $(kvm-book3s_64-builtin-xics-objs-y) | ||
84 | |||
85 | kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ | ||
86 | book3s_xics.o | ||
81 | 87 | ||
82 | kvm-book3s_64-module-objs := \ | 88 | kvm-book3s_64-module-objs := \ |
83 | ../../../virt/kvm/kvm_main.o \ | 89 | ../../../virt/kvm/kvm_main.o \ |
@@ -86,6 +92,7 @@ kvm-book3s_64-module-objs := \ | |||
86 | emulate.o \ | 92 | emulate.o \ |
87 | book3s.o \ | 93 | book3s.o \ |
88 | book3s_64_vio.o \ | 94 | book3s_64_vio.o \ |
95 | book3s_rtas.o \ | ||
89 | $(kvm-book3s_64-objs-y) | 96 | $(kvm-book3s_64-objs-y) |
90 | 97 | ||
91 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) | 98 | kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) |
@@ -103,6 +110,9 @@ kvm-book3s_32-objs := \ | |||
103 | book3s_32_mmu.o | 110 | book3s_32_mmu.o |
104 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) | 111 | kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) |
105 | 112 | ||
113 | kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o | ||
114 | kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) | ||
115 | |||
106 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) | 116 | kvm-objs := $(kvm-objs-m) $(kvm-objs-y) |
107 | 117 | ||
108 | obj-$(CONFIG_KVM_440) += kvm.o | 118 | obj-$(CONFIG_KVM_440) += kvm.o |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a4b645285240..700df6f1d32c 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -104,7 +104,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec) | |||
104 | return prio; | 104 | return prio; |
105 | } | 105 | } |
106 | 106 | ||
107 | static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, | 107 | void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, |
108 | unsigned int vec) | 108 | unsigned int vec) |
109 | { | 109 | { |
110 | unsigned long old_pending = vcpu->arch.pending_exceptions; | 110 | unsigned long old_pending = vcpu->arch.pending_exceptions; |
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
160 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 160 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
161 | } | 161 | } |
162 | 162 | ||
163 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 163 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) |
164 | struct kvm_interrupt *irq) | ||
165 | { | 164 | { |
166 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); | 165 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
167 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | 166 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); |
@@ -530,6 +529,21 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
530 | val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); | 529 | val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); |
531 | break; | 530 | break; |
532 | #endif /* CONFIG_ALTIVEC */ | 531 | #endif /* CONFIG_ALTIVEC */ |
532 | case KVM_REG_PPC_DEBUG_INST: { | ||
533 | u32 opcode = INS_TW; | ||
534 | r = copy_to_user((u32 __user *)(long)reg->addr, | ||
535 | &opcode, sizeof(u32)); | ||
536 | break; | ||
537 | } | ||
538 | #ifdef CONFIG_KVM_XICS | ||
539 | case KVM_REG_PPC_ICP_STATE: | ||
540 | if (!vcpu->arch.icp) { | ||
541 | r = -ENXIO; | ||
542 | break; | ||
543 | } | ||
544 | val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu)); | ||
545 | break; | ||
546 | #endif /* CONFIG_KVM_XICS */ | ||
533 | default: | 547 | default: |
534 | r = -EINVAL; | 548 | r = -EINVAL; |
535 | break; | 549 | break; |
@@ -592,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | |||
592 | vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); | 606 | vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); |
593 | break; | 607 | break; |
594 | #endif /* CONFIG_ALTIVEC */ | 608 | #endif /* CONFIG_ALTIVEC */ |
609 | #ifdef CONFIG_KVM_XICS | ||
610 | case KVM_REG_PPC_ICP_STATE: | ||
611 | if (!vcpu->arch.icp) { | ||
612 | r = -ENXIO; | ||
613 | break; | ||
614 | } | ||
615 | r = kvmppc_xics_set_icp(vcpu, | ||
616 | set_reg_val(reg->id, val)); | ||
617 | break; | ||
618 | #endif /* CONFIG_KVM_XICS */ | ||
595 | default: | 619 | default: |
596 | r = -EINVAL; | 620 | r = -EINVAL; |
597 | break; | 621 | break; |
@@ -607,6 +631,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
607 | return 0; | 631 | return 0; |
608 | } | 632 | } |
609 | 633 | ||
634 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
635 | struct kvm_guest_debug *dbg) | ||
636 | { | ||
637 | return -EINVAL; | ||
638 | } | ||
639 | |||
610 | void kvmppc_decrementer_func(unsigned long data) | 640 | void kvmppc_decrementer_func(unsigned long data) |
611 | { | 641 | { |
612 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | 642 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index da98e26f6e45..5880dfb31074 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
893 | /* Harvest R and C */ | 893 | /* Harvest R and C */ |
894 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); | 894 | rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); |
895 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; | 895 | *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; |
896 | rev[i].guest_rpte = ptel | rcbits; | 896 | if (rcbits & ~rev[i].guest_rpte) { |
897 | rev[i].guest_rpte = ptel | rcbits; | ||
898 | note_hpte_modification(kvm, &rev[i]); | ||
899 | } | ||
897 | } | 900 | } |
898 | unlock_rmap(rmapp); | 901 | unlock_rmap(rmapp); |
899 | hptep[0] &= ~HPTE_V_HVLOCK; | 902 | hptep[0] &= ~HPTE_V_HVLOCK; |
@@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
976 | /* Now check and modify the HPTE */ | 979 | /* Now check and modify the HPTE */ |
977 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { | 980 | if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { |
978 | kvmppc_clear_ref_hpte(kvm, hptep, i); | 981 | kvmppc_clear_ref_hpte(kvm, hptep, i); |
979 | rev[i].guest_rpte |= HPTE_R_R; | 982 | if (!(rev[i].guest_rpte & HPTE_R_R)) { |
983 | rev[i].guest_rpte |= HPTE_R_R; | ||
984 | note_hpte_modification(kvm, &rev[i]); | ||
985 | } | ||
980 | ret = 1; | 986 | ret = 1; |
981 | } | 987 | } |
982 | hptep[0] &= ~HPTE_V_HVLOCK; | 988 | hptep[0] &= ~HPTE_V_HVLOCK; |
@@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
1080 | hptep[1] &= ~HPTE_R_C; | 1086 | hptep[1] &= ~HPTE_R_C; |
1081 | eieio(); | 1087 | eieio(); |
1082 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; | 1088 | hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; |
1083 | rev[i].guest_rpte |= HPTE_R_C; | 1089 | if (!(rev[i].guest_rpte & HPTE_R_C)) { |
1090 | rev[i].guest_rpte |= HPTE_R_C; | ||
1091 | note_hpte_modification(kvm, &rev[i]); | ||
1092 | } | ||
1084 | ret = 1; | 1093 | ret = 1; |
1085 | } | 1094 | } |
1086 | hptep[0] &= ~HPTE_V_HVLOCK; | 1095 | hptep[0] &= ~HPTE_V_HVLOCK; |
@@ -1090,11 +1099,30 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) | |||
1090 | return ret; | 1099 | return ret; |
1091 | } | 1100 | } |
1092 | 1101 | ||
1102 | static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, | ||
1103 | struct kvm_memory_slot *memslot, | ||
1104 | unsigned long *map) | ||
1105 | { | ||
1106 | unsigned long gfn; | ||
1107 | |||
1108 | if (!vpa->dirty || !vpa->pinned_addr) | ||
1109 | return; | ||
1110 | gfn = vpa->gpa >> PAGE_SHIFT; | ||
1111 | if (gfn < memslot->base_gfn || | ||
1112 | gfn >= memslot->base_gfn + memslot->npages) | ||
1113 | return; | ||
1114 | |||
1115 | vpa->dirty = false; | ||
1116 | if (map) | ||
1117 | __set_bit_le(gfn - memslot->base_gfn, map); | ||
1118 | } | ||
1119 | |||
1093 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | 1120 | long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, |
1094 | unsigned long *map) | 1121 | unsigned long *map) |
1095 | { | 1122 | { |
1096 | unsigned long i; | 1123 | unsigned long i; |
1097 | unsigned long *rmapp; | 1124 | unsigned long *rmapp; |
1125 | struct kvm_vcpu *vcpu; | ||
1098 | 1126 | ||
1099 | preempt_disable(); | 1127 | preempt_disable(); |
1100 | rmapp = memslot->arch.rmap; | 1128 | rmapp = memslot->arch.rmap; |
@@ -1103,6 +1131,15 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
1103 | __set_bit_le(i, map); | 1131 | __set_bit_le(i, map); |
1104 | ++rmapp; | 1132 | ++rmapp; |
1105 | } | 1133 | } |
1134 | |||
1135 | /* Harvest dirty bits from VPA and DTL updates */ | ||
1136 | /* Note: we never modify the SLB shadow buffer areas */ | ||
1137 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
1138 | spin_lock(&vcpu->arch.vpa_update_lock); | ||
1139 | harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); | ||
1140 | harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); | ||
1141 | spin_unlock(&vcpu->arch.vpa_update_lock); | ||
1142 | } | ||
1106 | preempt_enable(); | 1143 | preempt_enable(); |
1107 | return 0; | 1144 | return 0; |
1108 | } | 1145 | } |
@@ -1114,7 +1151,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1114 | unsigned long gfn = gpa >> PAGE_SHIFT; | 1151 | unsigned long gfn = gpa >> PAGE_SHIFT; |
1115 | struct page *page, *pages[1]; | 1152 | struct page *page, *pages[1]; |
1116 | int npages; | 1153 | int npages; |
1117 | unsigned long hva, psize, offset; | 1154 | unsigned long hva, offset; |
1118 | unsigned long pa; | 1155 | unsigned long pa; |
1119 | unsigned long *physp; | 1156 | unsigned long *physp; |
1120 | int srcu_idx; | 1157 | int srcu_idx; |
@@ -1146,14 +1183,9 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1146 | } | 1183 | } |
1147 | srcu_read_unlock(&kvm->srcu, srcu_idx); | 1184 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
1148 | 1185 | ||
1149 | psize = PAGE_SIZE; | 1186 | offset = gpa & (PAGE_SIZE - 1); |
1150 | if (PageHuge(page)) { | ||
1151 | page = compound_head(page); | ||
1152 | psize <<= compound_order(page); | ||
1153 | } | ||
1154 | offset = gpa & (psize - 1); | ||
1155 | if (nb_ret) | 1187 | if (nb_ret) |
1156 | *nb_ret = psize - offset; | 1188 | *nb_ret = PAGE_SIZE - offset; |
1157 | return page_address(page) + offset; | 1189 | return page_address(page) + offset; |
1158 | 1190 | ||
1159 | err: | 1191 | err: |
@@ -1161,11 +1193,31 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, | |||
1161 | return NULL; | 1193 | return NULL; |
1162 | } | 1194 | } |
1163 | 1195 | ||
1164 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) | 1196 | void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa, |
1197 | bool dirty) | ||
1165 | { | 1198 | { |
1166 | struct page *page = virt_to_page(va); | 1199 | struct page *page = virt_to_page(va); |
1200 | struct kvm_memory_slot *memslot; | ||
1201 | unsigned long gfn; | ||
1202 | unsigned long *rmap; | ||
1203 | int srcu_idx; | ||
1167 | 1204 | ||
1168 | put_page(page); | 1205 | put_page(page); |
1206 | |||
1207 | if (!dirty || !kvm->arch.using_mmu_notifiers) | ||
1208 | return; | ||
1209 | |||
1210 | /* We need to mark this page dirty in the rmap chain */ | ||
1211 | gfn = gpa >> PAGE_SHIFT; | ||
1212 | srcu_idx = srcu_read_lock(&kvm->srcu); | ||
1213 | memslot = gfn_to_memslot(kvm, gfn); | ||
1214 | if (memslot) { | ||
1215 | rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; | ||
1216 | lock_rmap(rmap); | ||
1217 | *rmap |= KVMPPC_RMAP_CHANGED; | ||
1218 | unlock_rmap(rmap); | ||
1219 | } | ||
1220 | srcu_read_unlock(&kvm->srcu, srcu_idx); | ||
1169 | } | 1221 | } |
1170 | 1222 | ||
1171 | /* | 1223 | /* |
@@ -1193,16 +1245,36 @@ struct kvm_htab_ctx { | |||
1193 | 1245 | ||
1194 | #define HPTE_SIZE (2 * sizeof(unsigned long)) | 1246 | #define HPTE_SIZE (2 * sizeof(unsigned long)) |
1195 | 1247 | ||
1248 | /* | ||
1249 | * Returns 1 if this HPT entry has been modified or has pending | ||
1250 | * R/C bit changes. | ||
1251 | */ | ||
1252 | static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) | ||
1253 | { | ||
1254 | unsigned long rcbits_unset; | ||
1255 | |||
1256 | if (revp->guest_rpte & HPTE_GR_MODIFIED) | ||
1257 | return 1; | ||
1258 | |||
1259 | /* Also need to consider changes in reference and changed bits */ | ||
1260 | rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); | ||
1261 | if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset)) | ||
1262 | return 1; | ||
1263 | |||
1264 | return 0; | ||
1265 | } | ||
1266 | |||
1196 | static long record_hpte(unsigned long flags, unsigned long *hptp, | 1267 | static long record_hpte(unsigned long flags, unsigned long *hptp, |
1197 | unsigned long *hpte, struct revmap_entry *revp, | 1268 | unsigned long *hpte, struct revmap_entry *revp, |
1198 | int want_valid, int first_pass) | 1269 | int want_valid, int first_pass) |
1199 | { | 1270 | { |
1200 | unsigned long v, r; | 1271 | unsigned long v, r; |
1272 | unsigned long rcbits_unset; | ||
1201 | int ok = 1; | 1273 | int ok = 1; |
1202 | int valid, dirty; | 1274 | int valid, dirty; |
1203 | 1275 | ||
1204 | /* Unmodified entries are uninteresting except on the first pass */ | 1276 | /* Unmodified entries are uninteresting except on the first pass */ |
1205 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | 1277 | dirty = hpte_dirty(revp, hptp); |
1206 | if (!first_pass && !dirty) | 1278 | if (!first_pass && !dirty) |
1207 | return 0; | 1279 | return 0; |
1208 | 1280 | ||
@@ -1223,16 +1295,28 @@ static long record_hpte(unsigned long flags, unsigned long *hptp, | |||
1223 | while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) | 1295 | while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) |
1224 | cpu_relax(); | 1296 | cpu_relax(); |
1225 | v = hptp[0]; | 1297 | v = hptp[0]; |
1298 | |||
1299 | /* re-evaluate valid and dirty from synchronized HPTE value */ | ||
1300 | valid = !!(v & HPTE_V_VALID); | ||
1301 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | ||
1302 | |||
1303 | /* Harvest R and C into guest view if necessary */ | ||
1304 | rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); | ||
1305 | if (valid && (rcbits_unset & hptp[1])) { | ||
1306 | revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) | | ||
1307 | HPTE_GR_MODIFIED; | ||
1308 | dirty = 1; | ||
1309 | } | ||
1310 | |||
1226 | if (v & HPTE_V_ABSENT) { | 1311 | if (v & HPTE_V_ABSENT) { |
1227 | v &= ~HPTE_V_ABSENT; | 1312 | v &= ~HPTE_V_ABSENT; |
1228 | v |= HPTE_V_VALID; | 1313 | v |= HPTE_V_VALID; |
1314 | valid = 1; | ||
1229 | } | 1315 | } |
1230 | /* re-evaluate valid and dirty from synchronized HPTE value */ | ||
1231 | valid = !!(v & HPTE_V_VALID); | ||
1232 | if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) | 1316 | if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) |
1233 | valid = 0; | 1317 | valid = 0; |
1234 | r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); | 1318 | |
1235 | dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); | 1319 | r = revp->guest_rpte; |
1236 | /* only clear modified if this is the right sort of entry */ | 1320 | /* only clear modified if this is the right sort of entry */ |
1237 | if (valid == want_valid && dirty) { | 1321 | if (valid == want_valid && dirty) { |
1238 | r &= ~HPTE_GR_MODIFIED; | 1322 | r &= ~HPTE_GR_MODIFIED; |
@@ -1288,7 +1372,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, | |||
1288 | /* Skip uninteresting entries, i.e. clean on not-first pass */ | 1372 | /* Skip uninteresting entries, i.e. clean on not-first pass */ |
1289 | if (!first_pass) { | 1373 | if (!first_pass) { |
1290 | while (i < kvm->arch.hpt_npte && | 1374 | while (i < kvm->arch.hpt_npte && |
1291 | !(revp->guest_rpte & HPTE_GR_MODIFIED)) { | 1375 | !hpte_dirty(revp, hptp)) { |
1292 | ++i; | 1376 | ++i; |
1293 | hptp += 2; | 1377 | hptp += 2; |
1294 | ++revp; | 1378 | ++revp; |
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 836c56975e21..1f6344c4408d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c | |||
@@ -194,7 +194,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
194 | run->papr_hcall.args[i] = gpr; | 194 | run->papr_hcall.args[i] = gpr; |
195 | } | 195 | } |
196 | 196 | ||
197 | emulated = EMULATE_DO_PAPR; | 197 | run->exit_reason = KVM_EXIT_PAPR_HCALL; |
198 | vcpu->arch.hcall_needed = 1; | ||
199 | emulated = EMULATE_EXIT_USER; | ||
198 | break; | 200 | break; |
199 | } | 201 | } |
200 | #endif | 202 | #endif |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f5416934932b..9de24f8e03c7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -66,6 +66,31 @@ | |||
66 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); | 66 | static void kvmppc_end_cede(struct kvm_vcpu *vcpu); |
67 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); | 67 | static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); |
68 | 68 | ||
69 | void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) | ||
70 | { | ||
71 | int me; | ||
72 | int cpu = vcpu->cpu; | ||
73 | wait_queue_head_t *wqp; | ||
74 | |||
75 | wqp = kvm_arch_vcpu_wq(vcpu); | ||
76 | if (waitqueue_active(wqp)) { | ||
77 | wake_up_interruptible(wqp); | ||
78 | ++vcpu->stat.halt_wakeup; | ||
79 | } | ||
80 | |||
81 | me = get_cpu(); | ||
82 | |||
83 | /* CPU points to the first thread of the core */ | ||
84 | if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { | ||
85 | int real_cpu = cpu + vcpu->arch.ptid; | ||
86 | if (paca[real_cpu].kvm_hstate.xics_phys) | ||
87 | xics_wake_cpu(real_cpu); | ||
88 | else if (cpu_online(cpu)) | ||
89 | smp_send_reschedule(cpu); | ||
90 | } | ||
91 | put_cpu(); | ||
92 | } | ||
93 | |||
69 | /* | 94 | /* |
70 | * We use the vcpu_load/put functions to measure stolen time. | 95 | * We use the vcpu_load/put functions to measure stolen time. |
71 | * Stolen time is counted as time when either the vcpu is able to | 96 | * Stolen time is counted as time when either the vcpu is able to |
@@ -259,7 +284,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, | |||
259 | len = ((struct reg_vpa *)va)->length.hword; | 284 | len = ((struct reg_vpa *)va)->length.hword; |
260 | else | 285 | else |
261 | len = ((struct reg_vpa *)va)->length.word; | 286 | len = ((struct reg_vpa *)va)->length.word; |
262 | kvmppc_unpin_guest_page(kvm, va); | 287 | kvmppc_unpin_guest_page(kvm, va, vpa, false); |
263 | 288 | ||
264 | /* Check length */ | 289 | /* Check length */ |
265 | if (len > nb || len < sizeof(struct reg_vpa)) | 290 | if (len > nb || len < sizeof(struct reg_vpa)) |
@@ -359,13 +384,13 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
359 | va = NULL; | 384 | va = NULL; |
360 | nb = 0; | 385 | nb = 0; |
361 | if (gpa) | 386 | if (gpa) |
362 | va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); | 387 | va = kvmppc_pin_guest_page(kvm, gpa, &nb); |
363 | spin_lock(&vcpu->arch.vpa_update_lock); | 388 | spin_lock(&vcpu->arch.vpa_update_lock); |
364 | if (gpa == vpap->next_gpa) | 389 | if (gpa == vpap->next_gpa) |
365 | break; | 390 | break; |
366 | /* sigh... unpin that one and try again */ | 391 | /* sigh... unpin that one and try again */ |
367 | if (va) | 392 | if (va) |
368 | kvmppc_unpin_guest_page(kvm, va); | 393 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
369 | } | 394 | } |
370 | 395 | ||
371 | vpap->update_pending = 0; | 396 | vpap->update_pending = 0; |
@@ -375,12 +400,15 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) | |||
375 | * has changed the mappings underlying guest memory, | 400 | * has changed the mappings underlying guest memory, |
376 | * so unregister the region. | 401 | * so unregister the region. |
377 | */ | 402 | */ |
378 | kvmppc_unpin_guest_page(kvm, va); | 403 | kvmppc_unpin_guest_page(kvm, va, gpa, false); |
379 | va = NULL; | 404 | va = NULL; |
380 | } | 405 | } |
381 | if (vpap->pinned_addr) | 406 | if (vpap->pinned_addr) |
382 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); | 407 | kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, |
408 | vpap->dirty); | ||
409 | vpap->gpa = gpa; | ||
383 | vpap->pinned_addr = va; | 410 | vpap->pinned_addr = va; |
411 | vpap->dirty = false; | ||
384 | if (va) | 412 | if (va) |
385 | vpap->pinned_end = va + vpap->len; | 413 | vpap->pinned_end = va + vpap->len; |
386 | } | 414 | } |
@@ -472,6 +500,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, | |||
472 | /* order writing *dt vs. writing vpa->dtl_idx */ | 500 | /* order writing *dt vs. writing vpa->dtl_idx */ |
473 | smp_wmb(); | 501 | smp_wmb(); |
474 | vpa->dtl_idx = ++vcpu->arch.dtl_index; | 502 | vpa->dtl_idx = ++vcpu->arch.dtl_index; |
503 | vcpu->arch.dtl.dirty = true; | ||
475 | } | 504 | } |
476 | 505 | ||
477 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | 506 | int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) |
@@ -479,7 +508,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
479 | unsigned long req = kvmppc_get_gpr(vcpu, 3); | 508 | unsigned long req = kvmppc_get_gpr(vcpu, 3); |
480 | unsigned long target, ret = H_SUCCESS; | 509 | unsigned long target, ret = H_SUCCESS; |
481 | struct kvm_vcpu *tvcpu; | 510 | struct kvm_vcpu *tvcpu; |
482 | int idx; | 511 | int idx, rc; |
483 | 512 | ||
484 | switch (req) { | 513 | switch (req) { |
485 | case H_ENTER: | 514 | case H_ENTER: |
@@ -515,6 +544,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |||
515 | kvmppc_get_gpr(vcpu, 5), | 544 | kvmppc_get_gpr(vcpu, 5), |
516 | kvmppc_get_gpr(vcpu, 6)); | 545 | kvmppc_get_gpr(vcpu, 6)); |
517 | break; | 546 | break; |
547 | case H_RTAS: | ||
548 | if (list_empty(&vcpu->kvm->arch.rtas_tokens)) | ||
549 | return RESUME_HOST; | ||
550 | |||
551 | rc = kvmppc_rtas_hcall(vcpu); | ||
552 | |||
553 | if (rc == -ENOENT) | ||
554 | return RESUME_HOST; | ||
555 | else if (rc == 0) | ||
556 | break; | ||
557 | |||
558 | /* Send the error out to userspace via KVM_RUN */ | ||
559 | return rc; | ||
560 | |||
561 | case H_XIRR: | ||
562 | case H_CPPR: | ||
563 | case H_EOI: | ||
564 | case H_IPI: | ||
565 | if (kvmppc_xics_enabled(vcpu)) { | ||
566 | ret = kvmppc_xics_hcall(vcpu, req); | ||
567 | break; | ||
568 | } /* fallthrough */ | ||
518 | default: | 569 | default: |
519 | return RESUME_HOST; | 570 | return RESUME_HOST; |
520 | } | 571 | } |
@@ -913,15 +964,19 @@ out: | |||
913 | return ERR_PTR(err); | 964 | return ERR_PTR(err); |
914 | } | 965 | } |
915 | 966 | ||
967 | static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) | ||
968 | { | ||
969 | if (vpa->pinned_addr) | ||
970 | kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, | ||
971 | vpa->dirty); | ||
972 | } | ||
973 | |||
916 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) | 974 | void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) |
917 | { | 975 | { |
918 | spin_lock(&vcpu->arch.vpa_update_lock); | 976 | spin_lock(&vcpu->arch.vpa_update_lock); |
919 | if (vcpu->arch.dtl.pinned_addr) | 977 | unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); |
920 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); | 978 | unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); |
921 | if (vcpu->arch.slb_shadow.pinned_addr) | 979 | unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); |
922 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); | ||
923 | if (vcpu->arch.vpa.pinned_addr) | ||
924 | kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); | ||
925 | spin_unlock(&vcpu->arch.vpa_update_lock); | 980 | spin_unlock(&vcpu->arch.vpa_update_lock); |
926 | kvm_vcpu_uninit(vcpu); | 981 | kvm_vcpu_uninit(vcpu); |
927 | kmem_cache_free(kvm_vcpu_cache, vcpu); | 982 | kmem_cache_free(kvm_vcpu_cache, vcpu); |
@@ -955,7 +1010,6 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu) | |||
955 | } | 1010 | } |
956 | 1011 | ||
957 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | 1012 | extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); |
958 | extern void xics_wake_cpu(int cpu); | ||
959 | 1013 | ||
960 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, | 1014 | static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, |
961 | struct kvm_vcpu *vcpu) | 1015 | struct kvm_vcpu *vcpu) |
@@ -1330,9 +1384,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1330 | break; | 1384 | break; |
1331 | vc->runner = vcpu; | 1385 | vc->runner = vcpu; |
1332 | n_ceded = 0; | 1386 | n_ceded = 0; |
1333 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) | 1387 | list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { |
1334 | if (!v->arch.pending_exceptions) | 1388 | if (!v->arch.pending_exceptions) |
1335 | n_ceded += v->arch.ceded; | 1389 | n_ceded += v->arch.ceded; |
1390 | else | ||
1391 | v->arch.ceded = 0; | ||
1392 | } | ||
1336 | if (n_ceded == vc->n_runnable) | 1393 | if (n_ceded == vc->n_runnable) |
1337 | kvmppc_vcore_blocked(vc); | 1394 | kvmppc_vcore_blocked(vc); |
1338 | else | 1395 | else |
@@ -1645,12 +1702,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1645 | 1702 | ||
1646 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1703 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
1647 | struct kvm_userspace_memory_region *mem, | 1704 | struct kvm_userspace_memory_region *mem, |
1648 | struct kvm_memory_slot old) | 1705 | const struct kvm_memory_slot *old) |
1649 | { | 1706 | { |
1650 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; | 1707 | unsigned long npages = mem->memory_size >> PAGE_SHIFT; |
1651 | struct kvm_memory_slot *memslot; | 1708 | struct kvm_memory_slot *memslot; |
1652 | 1709 | ||
1653 | if (npages && old.npages) { | 1710 | if (npages && old->npages) { |
1654 | /* | 1711 | /* |
1655 | * If modifying a memslot, reset all the rmap dirty bits. | 1712 | * If modifying a memslot, reset all the rmap dirty bits. |
1656 | * If this is a new memslot, we don't need to do anything | 1713 | * If this is a new memslot, we don't need to do anything |
@@ -1827,6 +1884,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1827 | cpumask_setall(&kvm->arch.need_tlb_flush); | 1884 | cpumask_setall(&kvm->arch.need_tlb_flush); |
1828 | 1885 | ||
1829 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1886 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
1887 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | ||
1830 | 1888 | ||
1831 | kvm->arch.rma = NULL; | 1889 | kvm->arch.rma = NULL; |
1832 | 1890 | ||
@@ -1872,6 +1930,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) | |||
1872 | kvm->arch.rma = NULL; | 1930 | kvm->arch.rma = NULL; |
1873 | } | 1931 | } |
1874 | 1932 | ||
1933 | kvmppc_rtas_tokens_free(kvm); | ||
1934 | |||
1875 | kvmppc_free_hpt(kvm); | 1935 | kvmppc_free_hpt(kvm); |
1876 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); | 1936 | WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); |
1877 | } | 1937 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 19c93bae1aea..6dcbb49105a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c | |||
@@ -97,17 +97,6 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, | |||
97 | } | 97 | } |
98 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); | 98 | EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); |
99 | 99 | ||
100 | /* | ||
101 | * Note modification of an HPTE; set the HPTE modified bit | ||
102 | * if anyone is interested. | ||
103 | */ | ||
104 | static inline void note_hpte_modification(struct kvm *kvm, | ||
105 | struct revmap_entry *rev) | ||
106 | { | ||
107 | if (atomic_read(&kvm->arch.hpte_mod_interest)) | ||
108 | rev->guest_rpte |= HPTE_GR_MODIFIED; | ||
109 | } | ||
110 | |||
111 | /* Remove this HPTE from the chain for a real page */ | 100 | /* Remove this HPTE from the chain for a real page */ |
112 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, | 101 | static void remove_revmap_chain(struct kvm *kvm, long pte_index, |
113 | struct revmap_entry *rev, | 102 | struct revmap_entry *rev, |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c new file mode 100644 index 000000000000..b4b0082f761c --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c | |||
@@ -0,0 +1,406 @@ | |||
1 | /* | ||
2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License, version 2, as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/err.h> | ||
13 | |||
14 | #include <asm/kvm_book3s.h> | ||
15 | #include <asm/kvm_ppc.h> | ||
16 | #include <asm/hvcall.h> | ||
17 | #include <asm/xics.h> | ||
18 | #include <asm/debug.h> | ||
19 | #include <asm/synch.h> | ||
20 | #include <asm/ppc-opcode.h> | ||
21 | |||
22 | #include "book3s_xics.h" | ||
23 | |||
24 | #define DEBUG_PASSUP | ||
25 | |||
26 | static inline void rm_writeb(unsigned long paddr, u8 val) | ||
27 | { | ||
28 | __asm__ __volatile__("sync; stbcix %0,0,%1" | ||
29 | : : "r" (val), "r" (paddr) : "memory"); | ||
30 | } | ||
31 | |||
32 | static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, | ||
33 | struct kvm_vcpu *this_vcpu) | ||
34 | { | ||
35 | struct kvmppc_icp *this_icp = this_vcpu->arch.icp; | ||
36 | unsigned long xics_phys; | ||
37 | int cpu; | ||
38 | |||
39 | /* Mark the target VCPU as having an interrupt pending */ | ||
40 | vcpu->stat.queue_intr++; | ||
41 | set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); | ||
42 | |||
43 | /* Kick self ? Just set MER and return */ | ||
44 | if (vcpu == this_vcpu) { | ||
45 | mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | /* Check if the core is loaded, if not, too hard */ | ||
50 | cpu = vcpu->cpu; | ||
51 | if (cpu < 0 || cpu >= nr_cpu_ids) { | ||
52 | this_icp->rm_action |= XICS_RM_KICK_VCPU; | ||
53 | this_icp->rm_kick_target = vcpu; | ||
54 | return; | ||
55 | } | ||
56 | /* In SMT cpu will always point to thread 0, we adjust it */ | ||
57 | cpu += vcpu->arch.ptid; | ||
58 | |||
59 | /* Not too hard, then poke the target */ | ||
60 | xics_phys = paca[cpu].kvm_hstate.xics_phys; | ||
61 | rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); | ||
62 | } | ||
63 | |||
64 | static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu) | ||
65 | { | ||
66 | /* Note: Only called on self ! */ | ||
67 | clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, | ||
68 | &vcpu->arch.pending_exceptions); | ||
69 | mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER); | ||
70 | } | ||
71 | |||
72 | static inline bool icp_rm_try_update(struct kvmppc_icp *icp, | ||
73 | union kvmppc_icp_state old, | ||
74 | union kvmppc_icp_state new) | ||
75 | { | ||
76 | struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu; | ||
77 | bool success; | ||
78 | |||
79 | /* Calculate new output value */ | ||
80 | new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); | ||
81 | |||
82 | /* Attempt atomic update */ | ||
83 | success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; | ||
84 | if (!success) | ||
85 | goto bail; | ||
86 | |||
87 | /* | ||
88 | * Check for output state update | ||
89 | * | ||
90 | * Note that this is racy since another processor could be updating | ||
91 | * the state already. This is why we never clear the interrupt output | ||
92 | * here, we only ever set it. The clear only happens prior to doing | ||
93 | * an update and only by the processor itself. Currently we do it | ||
94 | * in Accept (H_XIRR) and Up_Cppr (H_XPPR). | ||
95 | * | ||
96 | * We also do not try to figure out whether the EE state has changed, | ||
97 | * we unconditionally set it if the new state calls for it. The reason | ||
98 | * for that is that we opportunistically remove the pending interrupt | ||
99 | * flag when raising CPPR, so we need to set it back here if an | ||
100 | * interrupt is still pending. | ||
101 | */ | ||
102 | if (new.out_ee) | ||
103 | icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu); | ||
104 | |||
105 | /* Expose the state change for debug purposes */ | ||
106 | this_vcpu->arch.icp->rm_dbgstate = new; | ||
107 | this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu; | ||
108 | |||
109 | bail: | ||
110 | return success; | ||
111 | } | ||
112 | |||
113 | static inline int check_too_hard(struct kvmppc_xics *xics, | ||
114 | struct kvmppc_icp *icp) | ||
115 | { | ||
116 | return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS; | ||
117 | } | ||
118 | |||
119 | static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
120 | u8 new_cppr) | ||
121 | { | ||
122 | union kvmppc_icp_state old_state, new_state; | ||
123 | bool resend; | ||
124 | |||
125 | /* | ||
126 | * This handles several related states in one operation: | ||
127 | * | ||
128 | * ICP State: Down_CPPR | ||
129 | * | ||
130 | * Load CPPR with new value and if the XISR is 0 | ||
131 | * then check for resends: | ||
132 | * | ||
133 | * ICP State: Resend | ||
134 | * | ||
135 | * If MFRR is more favored than CPPR, check for IPIs | ||
136 | * and notify ICS of a potential resend. This is done | ||
137 | * asynchronously (when used in real mode, we will have | ||
138 | * to exit here). | ||
139 | * | ||
140 | * We do not handle the complete Check_IPI as documented | ||
141 | * here. In the PAPR, this state will be used for both | ||
142 | * Set_MFRR and Down_CPPR. However, we know that we aren't | ||
143 | * changing the MFRR state here so we don't need to handle | ||
144 | * the case of an MFRR causing a reject of a pending irq, | ||
145 | * this will have been handled when the MFRR was set in the | ||
146 | * first place. | ||
147 | * | ||
148 | * Thus we don't have to handle rejects, only resends. | ||
149 | * | ||
150 | * When implementing real mode for HV KVM, resend will lead to | ||
151 | * a H_TOO_HARD return and the whole transaction will be handled | ||
152 | * in virtual mode. | ||
153 | */ | ||
154 | do { | ||
155 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
156 | |||
157 | /* Down_CPPR */ | ||
158 | new_state.cppr = new_cppr; | ||
159 | |||
160 | /* | ||
161 | * Cut down Resend / Check_IPI / IPI | ||
162 | * | ||
163 | * The logic is that we cannot have a pending interrupt | ||
164 | * trumped by an IPI at this point (see above), so we | ||
165 | * know that either the pending interrupt is already an | ||
166 | * IPI (in which case we don't care to override it) or | ||
167 | * it's either more favored than us or non existent | ||
168 | */ | ||
169 | if (new_state.mfrr < new_cppr && | ||
170 | new_state.mfrr <= new_state.pending_pri) { | ||
171 | new_state.pending_pri = new_state.mfrr; | ||
172 | new_state.xisr = XICS_IPI; | ||
173 | } | ||
174 | |||
175 | /* Latch/clear resend bit */ | ||
176 | resend = new_state.need_resend; | ||
177 | new_state.need_resend = 0; | ||
178 | |||
179 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
180 | |||
181 | /* | ||
182 | * Now handle resend checks. Those are asynchronous to the ICP | ||
183 | * state update in HW (ie bus transactions) so we can handle them | ||
184 | * separately here as well. | ||
185 | */ | ||
186 | if (resend) | ||
187 | icp->rm_action |= XICS_RM_CHECK_RESEND; | ||
188 | } | ||
189 | |||
190 | |||
191 | unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) | ||
192 | { | ||
193 | union kvmppc_icp_state old_state, new_state; | ||
194 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
195 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
196 | u32 xirr; | ||
197 | |||
198 | if (!xics || !xics->real_mode) | ||
199 | return H_TOO_HARD; | ||
200 | |||
201 | /* First clear the interrupt */ | ||
202 | icp_rm_clr_vcpu_irq(icp->vcpu); | ||
203 | |||
204 | /* | ||
205 | * ICP State: Accept_Interrupt | ||
206 | * | ||
207 | * Return the pending interrupt (if any) along with the | ||
208 | * current CPPR, then clear the XISR & set CPPR to the | ||
209 | * pending priority | ||
210 | */ | ||
211 | do { | ||
212 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
213 | |||
214 | xirr = old_state.xisr | (((u32)old_state.cppr) << 24); | ||
215 | if (!old_state.xisr) | ||
216 | break; | ||
217 | new_state.cppr = new_state.pending_pri; | ||
218 | new_state.pending_pri = 0xff; | ||
219 | new_state.xisr = 0; | ||
220 | |||
221 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
222 | |||
223 | /* Return the result in GPR4 */ | ||
224 | vcpu->arch.gpr[4] = xirr; | ||
225 | |||
226 | return check_too_hard(xics, icp); | ||
227 | } | ||
228 | |||
229 | int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | ||
230 | unsigned long mfrr) | ||
231 | { | ||
232 | union kvmppc_icp_state old_state, new_state; | ||
233 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
234 | struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp; | ||
235 | u32 reject; | ||
236 | bool resend; | ||
237 | bool local; | ||
238 | |||
239 | if (!xics || !xics->real_mode) | ||
240 | return H_TOO_HARD; | ||
241 | |||
242 | local = this_icp->server_num == server; | ||
243 | if (local) | ||
244 | icp = this_icp; | ||
245 | else | ||
246 | icp = kvmppc_xics_find_server(vcpu->kvm, server); | ||
247 | if (!icp) | ||
248 | return H_PARAMETER; | ||
249 | |||
250 | /* | ||
251 | * ICP state: Set_MFRR | ||
252 | * | ||
253 | * If the CPPR is more favored than the new MFRR, then | ||
254 | * nothing needs to be done as there can be no XISR to | ||
255 | * reject. | ||
256 | * | ||
257 | * If the CPPR is less favored, then we might be replacing | ||
258 | * an interrupt, and thus need to possibly reject it as in | ||
259 | * | ||
260 | * ICP state: Check_IPI | ||
261 | */ | ||
262 | do { | ||
263 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
264 | |||
265 | /* Set_MFRR */ | ||
266 | new_state.mfrr = mfrr; | ||
267 | |||
268 | /* Check_IPI */ | ||
269 | reject = 0; | ||
270 | resend = false; | ||
271 | if (mfrr < new_state.cppr) { | ||
272 | /* Reject a pending interrupt if not an IPI */ | ||
273 | if (mfrr <= new_state.pending_pri) | ||
274 | reject = new_state.xisr; | ||
275 | new_state.pending_pri = mfrr; | ||
276 | new_state.xisr = XICS_IPI; | ||
277 | } | ||
278 | |||
279 | if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { | ||
280 | resend = new_state.need_resend; | ||
281 | new_state.need_resend = 0; | ||
282 | } | ||
283 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
284 | |||
285 | /* Pass rejects to virtual mode */ | ||
286 | if (reject && reject != XICS_IPI) { | ||
287 | this_icp->rm_action |= XICS_RM_REJECT; | ||
288 | this_icp->rm_reject = reject; | ||
289 | } | ||
290 | |||
291 | /* Pass resends to virtual mode */ | ||
292 | if (resend) | ||
293 | this_icp->rm_action |= XICS_RM_CHECK_RESEND; | ||
294 | |||
295 | return check_too_hard(xics, this_icp); | ||
296 | } | ||
297 | |||
298 | int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | ||
299 | { | ||
300 | union kvmppc_icp_state old_state, new_state; | ||
301 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
302 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
303 | u32 reject; | ||
304 | |||
305 | if (!xics || !xics->real_mode) | ||
306 | return H_TOO_HARD; | ||
307 | |||
308 | /* | ||
309 | * ICP State: Set_CPPR | ||
310 | * | ||
311 | * We can safely compare the new value with the current | ||
312 | * value outside of the transaction as the CPPR is only | ||
313 | * ever changed by the processor on itself | ||
314 | */ | ||
315 | if (cppr > icp->state.cppr) { | ||
316 | icp_rm_down_cppr(xics, icp, cppr); | ||
317 | goto bail; | ||
318 | } else if (cppr == icp->state.cppr) | ||
319 | return H_SUCCESS; | ||
320 | |||
321 | /* | ||
322 | * ICP State: Up_CPPR | ||
323 | * | ||
324 | * The processor is raising its priority, this can result | ||
325 | * in a rejection of a pending interrupt: | ||
326 | * | ||
327 | * ICP State: Reject_Current | ||
328 | * | ||
329 | * We can remove EE from the current processor, the update | ||
330 | * transaction will set it again if needed | ||
331 | */ | ||
332 | icp_rm_clr_vcpu_irq(icp->vcpu); | ||
333 | |||
334 | do { | ||
335 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
336 | |||
337 | reject = 0; | ||
338 | new_state.cppr = cppr; | ||
339 | |||
340 | if (cppr <= new_state.pending_pri) { | ||
341 | reject = new_state.xisr; | ||
342 | new_state.xisr = 0; | ||
343 | new_state.pending_pri = 0xff; | ||
344 | } | ||
345 | |||
346 | } while (!icp_rm_try_update(icp, old_state, new_state)); | ||
347 | |||
348 | /* Pass rejects to virtual mode */ | ||
349 | if (reject && reject != XICS_IPI) { | ||
350 | icp->rm_action |= XICS_RM_REJECT; | ||
351 | icp->rm_reject = reject; | ||
352 | } | ||
353 | bail: | ||
354 | return check_too_hard(xics, icp); | ||
355 | } | ||
356 | |||
357 | int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
358 | { | ||
359 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
360 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
361 | struct kvmppc_ics *ics; | ||
362 | struct ics_irq_state *state; | ||
363 | u32 irq = xirr & 0x00ffffff; | ||
364 | u16 src; | ||
365 | |||
366 | if (!xics || !xics->real_mode) | ||
367 | return H_TOO_HARD; | ||
368 | |||
369 | /* | ||
370 | * ICP State: EOI | ||
371 | * | ||
372 | * Note: If EOI is incorrectly used by SW to lower the CPPR | ||
373 | * value (ie more favored), we do not check for rejection of | ||
374 | * a pending interrupt, this is a SW error and PAPR sepcifies | ||
375 | * that we don't have to deal with it. | ||
376 | * | ||
377 | * The sending of an EOI to the ICS is handled after the | ||
378 | * CPPR update | ||
379 | * | ||
380 | * ICP State: Down_CPPR which we handle | ||
381 | * in a separate function as it's shared with H_CPPR. | ||
382 | */ | ||
383 | icp_rm_down_cppr(xics, icp, xirr >> 24); | ||
384 | |||
385 | /* IPIs have no EOI */ | ||
386 | if (irq == XICS_IPI) | ||
387 | goto bail; | ||
388 | /* | ||
389 | * EOI handling: If the interrupt is still asserted, we need to | ||
390 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
391 | * | ||
392 | * "Message" interrupts will never have "asserted" set | ||
393 | */ | ||
394 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
395 | if (!ics) | ||
396 | goto bail; | ||
397 | state = &ics->irq_state[src]; | ||
398 | |||
399 | /* Still asserted, resend it, we make it look like a reject */ | ||
400 | if (state->asserted) { | ||
401 | icp->rm_action |= XICS_RM_REJECT; | ||
402 | icp->rm_reject = irq; | ||
403 | } | ||
404 | bail: | ||
405 | return check_too_hard(xics, icp); | ||
406 | } | ||
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e33d11f1b977..b02f91e4c70d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -79,10 +79,6 @@ _GLOBAL(kvmppc_hv_entry_trampoline) | |||
79 | * * | 79 | * * |
80 | *****************************************************************************/ | 80 | *****************************************************************************/ |
81 | 81 | ||
82 | #define XICS_XIRR 4 | ||
83 | #define XICS_QIRR 0xc | ||
84 | #define XICS_IPI 2 /* interrupt source # for IPIs */ | ||
85 | |||
86 | /* | 82 | /* |
87 | * We come in here when wakened from nap mode on a secondary hw thread. | 83 | * We come in here when wakened from nap mode on a secondary hw thread. |
88 | * Relocation is off and most register values are lost. | 84 | * Relocation is off and most register values are lost. |
@@ -101,50 +97,51 @@ kvm_start_guest: | |||
101 | li r0,1 | 97 | li r0,1 |
102 | stb r0,PACA_NAPSTATELOST(r13) | 98 | stb r0,PACA_NAPSTATELOST(r13) |
103 | 99 | ||
104 | /* get vcpu pointer, NULL if we have no vcpu to run */ | 100 | /* were we napping due to cede? */ |
105 | ld r4,HSTATE_KVM_VCPU(r13) | 101 | lbz r0,HSTATE_NAPPING(r13) |
106 | cmpdi cr1,r4,0 | 102 | cmpwi r0,0 |
103 | bne kvm_end_cede | ||
104 | |||
105 | /* | ||
106 | * We weren't napping due to cede, so this must be a secondary | ||
107 | * thread being woken up to run a guest, or being woken up due | ||
108 | * to a stray IPI. (Or due to some machine check or hypervisor | ||
109 | * maintenance interrupt while the core is in KVM.) | ||
110 | */ | ||
107 | 111 | ||
108 | /* Check the wake reason in SRR1 to see why we got here */ | 112 | /* Check the wake reason in SRR1 to see why we got here */ |
109 | mfspr r3,SPRN_SRR1 | 113 | mfspr r3,SPRN_SRR1 |
110 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ | 114 | rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ |
111 | cmpwi r3,4 /* was it an external interrupt? */ | 115 | cmpwi r3,4 /* was it an external interrupt? */ |
112 | bne 27f | 116 | bne 27f /* if not */ |
113 | 117 | ld r5,HSTATE_XICS_PHYS(r13) | |
114 | /* | 118 | li r7,XICS_XIRR /* if it was an external interrupt, */ |
115 | * External interrupt - for now assume it is an IPI, since we | ||
116 | * should never get any other interrupts sent to offline threads. | ||
117 | * Only do this for secondary threads. | ||
118 | */ | ||
119 | beq cr1,25f | ||
120 | lwz r3,VCPU_PTID(r4) | ||
121 | cmpwi r3,0 | ||
122 | beq 27f | ||
123 | 25: ld r5,HSTATE_XICS_PHYS(r13) | ||
124 | li r0,0xff | ||
125 | li r6,XICS_QIRR | ||
126 | li r7,XICS_XIRR | ||
127 | lwzcix r8,r5,r7 /* get and ack the interrupt */ | 119 | lwzcix r8,r5,r7 /* get and ack the interrupt */ |
128 | sync | 120 | sync |
129 | clrldi. r9,r8,40 /* get interrupt source ID. */ | 121 | clrldi. r9,r8,40 /* get interrupt source ID. */ |
130 | beq 27f /* none there? */ | 122 | beq 28f /* none there? */ |
131 | cmpwi r9,XICS_IPI | 123 | cmpwi r9,XICS_IPI /* was it an IPI? */ |
132 | bne 26f | 124 | bne 29f |
125 | li r0,0xff | ||
126 | li r6,XICS_MFRR | ||
133 | stbcix r0,r5,r6 /* clear IPI */ | 127 | stbcix r0,r5,r6 /* clear IPI */ |
134 | 26: stwcix r8,r5,r7 /* EOI the interrupt */ | 128 | stwcix r8,r5,r7 /* EOI the interrupt */ |
135 | 129 | sync /* order loading of vcpu after that */ | |
136 | 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ | ||
137 | 130 | ||
138 | /* reload vcpu pointer after clearing the IPI */ | 131 | /* get vcpu pointer, NULL if we have no vcpu to run */ |
139 | ld r4,HSTATE_KVM_VCPU(r13) | 132 | ld r4,HSTATE_KVM_VCPU(r13) |
140 | cmpdi r4,0 | 133 | cmpdi r4,0 |
141 | /* if we have no vcpu to run, go back to sleep */ | 134 | /* if we have no vcpu to run, go back to sleep */ |
142 | beq kvm_no_guest | 135 | beq kvm_no_guest |
136 | b kvmppc_hv_entry | ||
143 | 137 | ||
144 | /* were we napping due to cede? */ | 138 | 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ |
145 | lbz r0,HSTATE_NAPPING(r13) | 139 | b kvm_no_guest |
146 | cmpwi r0,0 | 140 | 28: /* SRR1 said external but ICP said nope?? */ |
147 | bne kvm_end_cede | 141 | b kvm_no_guest |
142 | 29: /* External non-IPI interrupt to offline secondary thread? help?? */ | ||
143 | stw r8,HSTATE_SAVED_XIRR(r13) | ||
144 | b kvm_no_guest | ||
148 | 145 | ||
149 | .global kvmppc_hv_entry | 146 | .global kvmppc_hv_entry |
150 | kvmppc_hv_entry: | 147 | kvmppc_hv_entry: |
@@ -260,6 +257,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
260 | lwz r5, LPPACA_YIELDCOUNT(r3) | 257 | lwz r5, LPPACA_YIELDCOUNT(r3) |
261 | addi r5, r5, 1 | 258 | addi r5, r5, 1 |
262 | stw r5, LPPACA_YIELDCOUNT(r3) | 259 | stw r5, LPPACA_YIELDCOUNT(r3) |
260 | li r6, 1 | ||
261 | stb r6, VCPU_VPA_DIRTY(r4) | ||
263 | 25: | 262 | 25: |
264 | /* Load up DAR and DSISR */ | 263 | /* Load up DAR and DSISR */ |
265 | ld r5, VCPU_DAR(r4) | 264 | ld r5, VCPU_DAR(r4) |
@@ -485,20 +484,20 @@ toc_tlbie_lock: | |||
485 | mtctr r6 | 484 | mtctr r6 |
486 | mtxer r7 | 485 | mtxer r7 |
487 | 486 | ||
487 | ld r10, VCPU_PC(r4) | ||
488 | ld r11, VCPU_MSR(r4) | ||
488 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ | 489 | kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ |
489 | ld r6, VCPU_SRR0(r4) | 490 | ld r6, VCPU_SRR0(r4) |
490 | ld r7, VCPU_SRR1(r4) | 491 | ld r7, VCPU_SRR1(r4) |
491 | ld r10, VCPU_PC(r4) | ||
492 | ld r11, VCPU_MSR(r4) /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
493 | 492 | ||
493 | /* r11 = vcpu->arch.msr & ~MSR_HV */ | ||
494 | rldicl r11, r11, 63 - MSR_HV_LG, 1 | 494 | rldicl r11, r11, 63 - MSR_HV_LG, 1 |
495 | rotldi r11, r11, 1 + MSR_HV_LG | 495 | rotldi r11, r11, 1 + MSR_HV_LG |
496 | ori r11, r11, MSR_ME | 496 | ori r11, r11, MSR_ME |
497 | 497 | ||
498 | /* Check if we can deliver an external or decrementer interrupt now */ | 498 | /* Check if we can deliver an external or decrementer interrupt now */ |
499 | ld r0,VCPU_PENDING_EXC(r4) | 499 | ld r0,VCPU_PENDING_EXC(r4) |
500 | li r8,(1 << BOOK3S_IRQPRIO_EXTERNAL) | 500 | lis r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h |
501 | oris r8,r8,(1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h | ||
502 | and r0,r0,r8 | 501 | and r0,r0,r8 |
503 | cmpdi cr1,r0,0 | 502 | cmpdi cr1,r0,0 |
504 | andi. r0,r11,MSR_EE | 503 | andi. r0,r11,MSR_EE |
@@ -526,10 +525,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
526 | /* Move SRR0 and SRR1 into the respective regs */ | 525 | /* Move SRR0 and SRR1 into the respective regs */ |
527 | 5: mtspr SPRN_SRR0, r6 | 526 | 5: mtspr SPRN_SRR0, r6 |
528 | mtspr SPRN_SRR1, r7 | 527 | mtspr SPRN_SRR1, r7 |
529 | li r0,0 | ||
530 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
531 | 528 | ||
532 | fast_guest_return: | 529 | fast_guest_return: |
530 | li r0,0 | ||
531 | stb r0,VCPU_CEDED(r4) /* cancel cede */ | ||
533 | mtspr SPRN_HSRR0,r10 | 532 | mtspr SPRN_HSRR0,r10 |
534 | mtspr SPRN_HSRR1,r11 | 533 | mtspr SPRN_HSRR1,r11 |
535 | 534 | ||
@@ -676,17 +675,99 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
676 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL | 675 | cmpwi r12,BOOK3S_INTERRUPT_SYSCALL |
677 | beq hcall_try_real_mode | 676 | beq hcall_try_real_mode |
678 | 677 | ||
679 | /* Check for mediated interrupts (could be done earlier really ...) */ | 678 | /* Only handle external interrupts here on arch 206 and later */ |
680 | BEGIN_FTR_SECTION | 679 | BEGIN_FTR_SECTION |
681 | cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL | 680 | b ext_interrupt_to_host |
682 | bne+ 1f | 681 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) |
683 | andi. r0,r11,MSR_EE | 682 | |
684 | beq 1f | 683 | /* External interrupt ? */ |
685 | mfspr r5,SPRN_LPCR | 684 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
686 | andi. r0,r5,LPCR_MER | 685 | bne+ ext_interrupt_to_host |
687 | bne bounce_ext_interrupt | 686 | |
688 | 1: | 687 | /* External interrupt, first check for host_ipi. If this is |
689 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | 688 | * set, we know the host wants us out so let's do it now |
689 | */ | ||
690 | do_ext_interrupt: | ||
691 | lbz r0, HSTATE_HOST_IPI(r13) | ||
692 | cmpwi r0, 0 | ||
693 | bne ext_interrupt_to_host | ||
694 | |||
695 | /* Now read the interrupt from the ICP */ | ||
696 | ld r5, HSTATE_XICS_PHYS(r13) | ||
697 | li r7, XICS_XIRR | ||
698 | cmpdi r5, 0 | ||
699 | beq- ext_interrupt_to_host | ||
700 | lwzcix r3, r5, r7 | ||
701 | rlwinm. r0, r3, 0, 0xffffff | ||
702 | sync | ||
703 | beq 3f /* if nothing pending in the ICP */ | ||
704 | |||
705 | /* We found something in the ICP... | ||
706 | * | ||
707 | * If it's not an IPI, stash it in the PACA and return to | ||
708 | * the host, we don't (yet) handle directing real external | ||
709 | * interrupts directly to the guest | ||
710 | */ | ||
711 | cmpwi r0, XICS_IPI | ||
712 | bne ext_stash_for_host | ||
713 | |||
714 | /* It's an IPI, clear the MFRR and EOI it */ | ||
715 | li r0, 0xff | ||
716 | li r6, XICS_MFRR | ||
717 | stbcix r0, r5, r6 /* clear the IPI */ | ||
718 | stwcix r3, r5, r7 /* EOI it */ | ||
719 | sync | ||
720 | |||
721 | /* We need to re-check host IPI now in case it got set in the | ||
722 | * meantime. If it's clear, we bounce the interrupt to the | ||
723 | * guest | ||
724 | */ | ||
725 | lbz r0, HSTATE_HOST_IPI(r13) | ||
726 | cmpwi r0, 0 | ||
727 | bne- 1f | ||
728 | |||
729 | /* Allright, looks like an IPI for the guest, we need to set MER */ | ||
730 | 3: | ||
731 | /* Check if any CPU is heading out to the host, if so head out too */ | ||
732 | ld r5, HSTATE_KVM_VCORE(r13) | ||
733 | lwz r0, VCORE_ENTRY_EXIT(r5) | ||
734 | cmpwi r0, 0x100 | ||
735 | bge ext_interrupt_to_host | ||
736 | |||
737 | /* See if there is a pending interrupt for the guest */ | ||
738 | mfspr r8, SPRN_LPCR | ||
739 | ld r0, VCPU_PENDING_EXC(r9) | ||
740 | /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ | ||
741 | rldicl. r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 | ||
742 | rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH | ||
743 | beq 2f | ||
744 | |||
745 | /* And if the guest EE is set, we can deliver immediately, else | ||
746 | * we return to the guest with MER set | ||
747 | */ | ||
748 | andi. r0, r11, MSR_EE | ||
749 | beq 2f | ||
750 | mtspr SPRN_SRR0, r10 | ||
751 | mtspr SPRN_SRR1, r11 | ||
752 | li r10, BOOK3S_INTERRUPT_EXTERNAL | ||
753 | li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
754 | rotldi r11, r11, 63 | ||
755 | 2: mr r4, r9 | ||
756 | mtspr SPRN_LPCR, r8 | ||
757 | b fast_guest_return | ||
758 | |||
759 | /* We raced with the host, we need to resend that IPI, bummer */ | ||
760 | 1: li r0, IPI_PRIORITY | ||
761 | stbcix r0, r5, r6 /* set the IPI */ | ||
762 | sync | ||
763 | b ext_interrupt_to_host | ||
764 | |||
765 | ext_stash_for_host: | ||
766 | /* It's not an IPI and it's for the host, stash it in the PACA | ||
767 | * before exit, it will be picked up by the host ICP driver | ||
768 | */ | ||
769 | stw r3, HSTATE_SAVED_XIRR(r13) | ||
770 | ext_interrupt_to_host: | ||
690 | 771 | ||
691 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ | 772 | guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ |
692 | /* Save DEC */ | 773 | /* Save DEC */ |
@@ -829,7 +910,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) | |||
829 | beq 44f | 910 | beq 44f |
830 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ | 911 | ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */ |
831 | li r0,IPI_PRIORITY | 912 | li r0,IPI_PRIORITY |
832 | li r7,XICS_QIRR | 913 | li r7,XICS_MFRR |
833 | stbcix r0,r7,r8 /* trigger the IPI */ | 914 | stbcix r0,r7,r8 /* trigger the IPI */ |
834 | 44: srdi. r3,r3,1 | 915 | 44: srdi. r3,r3,1 |
835 | addi r6,r6,PACA_SIZE | 916 | addi r6,r6,PACA_SIZE |
@@ -1018,6 +1099,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) | |||
1018 | lwz r3, LPPACA_YIELDCOUNT(r8) | 1099 | lwz r3, LPPACA_YIELDCOUNT(r8) |
1019 | addi r3, r3, 1 | 1100 | addi r3, r3, 1 |
1020 | stw r3, LPPACA_YIELDCOUNT(r8) | 1101 | stw r3, LPPACA_YIELDCOUNT(r8) |
1102 | li r3, 1 | ||
1103 | stb r3, VCPU_VPA_DIRTY(r9) | ||
1021 | 25: | 1104 | 25: |
1022 | /* Save PMU registers if requested */ | 1105 | /* Save PMU registers if requested */ |
1023 | /* r8 and cr0.eq are live here */ | 1106 | /* r8 and cr0.eq are live here */ |
@@ -1350,11 +1433,19 @@ hcall_real_table: | |||
1350 | .long 0 /* 0x58 */ | 1433 | .long 0 /* 0x58 */ |
1351 | .long 0 /* 0x5c */ | 1434 | .long 0 /* 0x5c */ |
1352 | .long 0 /* 0x60 */ | 1435 | .long 0 /* 0x60 */ |
1353 | .long 0 /* 0x64 */ | 1436 | #ifdef CONFIG_KVM_XICS |
1354 | .long 0 /* 0x68 */ | 1437 | .long .kvmppc_rm_h_eoi - hcall_real_table |
1355 | .long 0 /* 0x6c */ | 1438 | .long .kvmppc_rm_h_cppr - hcall_real_table |
1356 | .long 0 /* 0x70 */ | 1439 | .long .kvmppc_rm_h_ipi - hcall_real_table |
1357 | .long 0 /* 0x74 */ | 1440 | .long 0 /* 0x70 - H_IPOLL */ |
1441 | .long .kvmppc_rm_h_xirr - hcall_real_table | ||
1442 | #else | ||
1443 | .long 0 /* 0x64 - H_EOI */ | ||
1444 | .long 0 /* 0x68 - H_CPPR */ | ||
1445 | .long 0 /* 0x6c - H_IPI */ | ||
1446 | .long 0 /* 0x70 - H_IPOLL */ | ||
1447 | .long 0 /* 0x74 - H_XIRR */ | ||
1448 | #endif | ||
1358 | .long 0 /* 0x78 */ | 1449 | .long 0 /* 0x78 */ |
1359 | .long 0 /* 0x7c */ | 1450 | .long 0 /* 0x7c */ |
1360 | .long 0 /* 0x80 */ | 1451 | .long 0 /* 0x80 */ |
@@ -1405,15 +1496,6 @@ ignore_hdec: | |||
1405 | mr r4,r9 | 1496 | mr r4,r9 |
1406 | b fast_guest_return | 1497 | b fast_guest_return |
1407 | 1498 | ||
1408 | bounce_ext_interrupt: | ||
1409 | mr r4,r9 | ||
1410 | mtspr SPRN_SRR0,r10 | ||
1411 | mtspr SPRN_SRR1,r11 | ||
1412 | li r10,BOOK3S_INTERRUPT_EXTERNAL | ||
1413 | li r11,(MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ | ||
1414 | rotldi r11,r11,63 | ||
1415 | b fast_guest_return | ||
1416 | |||
1417 | _GLOBAL(kvmppc_h_set_dabr) | 1499 | _GLOBAL(kvmppc_h_set_dabr) |
1418 | std r4,VCPU_DABR(r3) | 1500 | std r4,VCPU_DABR(r3) |
1419 | /* Work around P7 bug where DABR can get corrupted on mtspr */ | 1501 | /* Work around P7 bug where DABR can get corrupted on mtspr */ |
@@ -1519,6 +1601,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) | |||
1519 | b . | 1601 | b . |
1520 | 1602 | ||
1521 | kvm_end_cede: | 1603 | kvm_end_cede: |
1604 | /* get vcpu pointer */ | ||
1605 | ld r4, HSTATE_KVM_VCPU(r13) | ||
1606 | |||
1522 | /* Woken by external or decrementer interrupt */ | 1607 | /* Woken by external or decrementer interrupt */ |
1523 | ld r1, HSTATE_HOST_R1(r13) | 1608 | ld r1, HSTATE_HOST_R1(r13) |
1524 | 1609 | ||
@@ -1558,6 +1643,16 @@ kvm_end_cede: | |||
1558 | li r0,0 | 1643 | li r0,0 |
1559 | stb r0,HSTATE_NAPPING(r13) | 1644 | stb r0,HSTATE_NAPPING(r13) |
1560 | 1645 | ||
1646 | /* Check the wake reason in SRR1 to see why we got here */ | ||
1647 | mfspr r3, SPRN_SRR1 | ||
1648 | rlwinm r3, r3, 44-31, 0x7 /* extract wake reason field */ | ||
1649 | cmpwi r3, 4 /* was it an external interrupt? */ | ||
1650 | li r12, BOOK3S_INTERRUPT_EXTERNAL | ||
1651 | mr r9, r4 | ||
1652 | ld r10, VCPU_PC(r9) | ||
1653 | ld r11, VCPU_MSR(r9) | ||
1654 | beq do_ext_interrupt /* if so */ | ||
1655 | |||
1561 | /* see if any other thread is already exiting */ | 1656 | /* see if any other thread is already exiting */ |
1562 | lwz r0,VCORE_ENTRY_EXIT(r5) | 1657 | lwz r0,VCORE_ENTRY_EXIT(r5) |
1563 | cmpwi r0,0x100 | 1658 | cmpwi r0,0x100 |
@@ -1577,8 +1672,7 @@ kvm_cede_prodded: | |||
1577 | 1672 | ||
1578 | /* we've ceded but we want to give control to the host */ | 1673 | /* we've ceded but we want to give control to the host */ |
1579 | kvm_cede_exit: | 1674 | kvm_cede_exit: |
1580 | li r3,H_TOO_HARD | 1675 | b hcall_real_fallback |
1581 | blr | ||
1582 | 1676 | ||
1583 | /* Try to handle a machine check in real mode */ | 1677 | /* Try to handle a machine check in real mode */ |
1584 | machine_check_realmode: | 1678 | machine_check_realmode: |
@@ -1626,7 +1720,7 @@ secondary_nap: | |||
1626 | beq 37f | 1720 | beq 37f |
1627 | sync | 1721 | sync |
1628 | li r0, 0xff | 1722 | li r0, 0xff |
1629 | li r6, XICS_QIRR | 1723 | li r6, XICS_MFRR |
1630 | stbcix r0, r5, r6 /* clear the IPI */ | 1724 | stbcix r0, r5, r6 /* clear the IPI */ |
1631 | stwcix r3, r5, r7 /* EOI it */ | 1725 | stwcix r3, r5, r7 /* EOI it */ |
1632 | 37: sync | 1726 | 37: sync |
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dbdc15aa8127..bdc40b8e77d9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c | |||
@@ -762,9 +762,7 @@ program_interrupt: | |||
762 | run->exit_reason = KVM_EXIT_MMIO; | 762 | run->exit_reason = KVM_EXIT_MMIO; |
763 | r = RESUME_HOST_NV; | 763 | r = RESUME_HOST_NV; |
764 | break; | 764 | break; |
765 | case EMULATE_DO_PAPR: | 765 | case EMULATE_EXIT_USER: |
766 | run->exit_reason = KVM_EXIT_PAPR_HCALL; | ||
767 | vcpu->arch.hcall_needed = 1; | ||
768 | r = RESUME_HOST_NV; | 766 | r = RESUME_HOST_NV; |
769 | break; | 767 | break; |
770 | default: | 768 | default: |
@@ -1283,7 +1281,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1283 | 1281 | ||
1284 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1282 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
1285 | struct kvm_userspace_memory_region *mem, | 1283 | struct kvm_userspace_memory_region *mem, |
1286 | struct kvm_memory_slot old) | 1284 | const struct kvm_memory_slot *old) |
1287 | { | 1285 | { |
1288 | } | 1286 | } |
1289 | 1287 | ||
@@ -1298,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) | |||
1298 | { | 1296 | { |
1299 | #ifdef CONFIG_PPC64 | 1297 | #ifdef CONFIG_PPC64 |
1300 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); | 1298 | INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); |
1299 | INIT_LIST_HEAD(&kvm->arch.rtas_tokens); | ||
1301 | #endif | 1300 | #endif |
1302 | 1301 | ||
1303 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { | 1302 | if (firmware_has_feature(FW_FEATURE_SET_MODE)) { |
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index ee02b30878ed..b24309c6c2d5 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c | |||
@@ -227,6 +227,13 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) | |||
227 | return EMULATE_DONE; | 227 | return EMULATE_DONE; |
228 | } | 228 | } |
229 | 229 | ||
230 | static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) | ||
231 | { | ||
232 | long rc = kvmppc_xics_hcall(vcpu, cmd); | ||
233 | kvmppc_set_gpr(vcpu, 3, rc); | ||
234 | return EMULATE_DONE; | ||
235 | } | ||
236 | |||
230 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | 237 | int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) |
231 | { | 238 | { |
232 | switch (cmd) { | 239 | switch (cmd) { |
@@ -246,6 +253,20 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) | |||
246 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | 253 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); |
247 | vcpu->stat.halt_wakeup++; | 254 | vcpu->stat.halt_wakeup++; |
248 | return EMULATE_DONE; | 255 | return EMULATE_DONE; |
256 | case H_XIRR: | ||
257 | case H_CPPR: | ||
258 | case H_EOI: | ||
259 | case H_IPI: | ||
260 | if (kvmppc_xics_enabled(vcpu)) | ||
261 | return kvmppc_h_pr_xics_hcall(vcpu, cmd); | ||
262 | break; | ||
263 | case H_RTAS: | ||
264 | if (list_empty(&vcpu->kvm->arch.rtas_tokens)) | ||
265 | return RESUME_HOST; | ||
266 | if (kvmppc_rtas_hcall(vcpu)) | ||
267 | break; | ||
268 | kvmppc_set_gpr(vcpu, 3, 0); | ||
269 | return EMULATE_DONE; | ||
249 | } | 270 | } |
250 | 271 | ||
251 | return EMULATE_FAIL; | 272 | return EMULATE_FAIL; |
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c new file mode 100644 index 000000000000..3219ba895246 --- /dev/null +++ b/arch/powerpc/kvm/book3s_rtas.c | |||
@@ -0,0 +1,274 @@ | |||
1 | /* | ||
2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License, version 2, as | ||
6 | * published by the Free Software Foundation. | ||
7 | */ | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/kvm_host.h> | ||
11 | #include <linux/kvm.h> | ||
12 | #include <linux/err.h> | ||
13 | |||
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/kvm_book3s.h> | ||
16 | #include <asm/kvm_ppc.h> | ||
17 | #include <asm/hvcall.h> | ||
18 | #include <asm/rtas.h> | ||
19 | |||
20 | #ifdef CONFIG_KVM_XICS | ||
21 | static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
22 | { | ||
23 | u32 irq, server, priority; | ||
24 | int rc; | ||
25 | |||
26 | if (args->nargs != 3 || args->nret != 1) { | ||
27 | rc = -3; | ||
28 | goto out; | ||
29 | } | ||
30 | |||
31 | irq = args->args[0]; | ||
32 | server = args->args[1]; | ||
33 | priority = args->args[2]; | ||
34 | |||
35 | rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); | ||
36 | if (rc) | ||
37 | rc = -3; | ||
38 | out: | ||
39 | args->rets[0] = rc; | ||
40 | } | ||
41 | |||
42 | static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
43 | { | ||
44 | u32 irq, server, priority; | ||
45 | int rc; | ||
46 | |||
47 | if (args->nargs != 1 || args->nret != 3) { | ||
48 | rc = -3; | ||
49 | goto out; | ||
50 | } | ||
51 | |||
52 | irq = args->args[0]; | ||
53 | |||
54 | server = priority = 0; | ||
55 | rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); | ||
56 | if (rc) { | ||
57 | rc = -3; | ||
58 | goto out; | ||
59 | } | ||
60 | |||
61 | args->rets[1] = server; | ||
62 | args->rets[2] = priority; | ||
63 | out: | ||
64 | args->rets[0] = rc; | ||
65 | } | ||
66 | |||
67 | static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
68 | { | ||
69 | u32 irq; | ||
70 | int rc; | ||
71 | |||
72 | if (args->nargs != 1 || args->nret != 1) { | ||
73 | rc = -3; | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | irq = args->args[0]; | ||
78 | |||
79 | rc = kvmppc_xics_int_off(vcpu->kvm, irq); | ||
80 | if (rc) | ||
81 | rc = -3; | ||
82 | out: | ||
83 | args->rets[0] = rc; | ||
84 | } | ||
85 | |||
86 | static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) | ||
87 | { | ||
88 | u32 irq; | ||
89 | int rc; | ||
90 | |||
91 | if (args->nargs != 1 || args->nret != 1) { | ||
92 | rc = -3; | ||
93 | goto out; | ||
94 | } | ||
95 | |||
96 | irq = args->args[0]; | ||
97 | |||
98 | rc = kvmppc_xics_int_on(vcpu->kvm, irq); | ||
99 | if (rc) | ||
100 | rc = -3; | ||
101 | out: | ||
102 | args->rets[0] = rc; | ||
103 | } | ||
104 | #endif /* CONFIG_KVM_XICS */ | ||
105 | |||
106 | struct rtas_handler { | ||
107 | void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); | ||
108 | char *name; | ||
109 | }; | ||
110 | |||
111 | static struct rtas_handler rtas_handlers[] = { | ||
112 | #ifdef CONFIG_KVM_XICS | ||
113 | { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, | ||
114 | { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, | ||
115 | { .name = "ibm,int-off", .handler = kvm_rtas_int_off }, | ||
116 | { .name = "ibm,int-on", .handler = kvm_rtas_int_on }, | ||
117 | #endif | ||
118 | }; | ||
119 | |||
120 | struct rtas_token_definition { | ||
121 | struct list_head list; | ||
122 | struct rtas_handler *handler; | ||
123 | u64 token; | ||
124 | }; | ||
125 | |||
126 | static int rtas_name_matches(char *s1, char *s2) | ||
127 | { | ||
128 | struct kvm_rtas_token_args args; | ||
129 | return !strncmp(s1, s2, sizeof(args.name)); | ||
130 | } | ||
131 | |||
132 | static int rtas_token_undefine(struct kvm *kvm, char *name) | ||
133 | { | ||
134 | struct rtas_token_definition *d, *tmp; | ||
135 | |||
136 | lockdep_assert_held(&kvm->lock); | ||
137 | |||
138 | list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { | ||
139 | if (rtas_name_matches(d->handler->name, name)) { | ||
140 | list_del(&d->list); | ||
141 | kfree(d); | ||
142 | return 0; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* It's not an error to undefine an undefined token */ | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static int rtas_token_define(struct kvm *kvm, char *name, u64 token) | ||
151 | { | ||
152 | struct rtas_token_definition *d; | ||
153 | struct rtas_handler *h = NULL; | ||
154 | bool found; | ||
155 | int i; | ||
156 | |||
157 | lockdep_assert_held(&kvm->lock); | ||
158 | |||
159 | list_for_each_entry(d, &kvm->arch.rtas_tokens, list) { | ||
160 | if (d->token == token) | ||
161 | return -EEXIST; | ||
162 | } | ||
163 | |||
164 | found = false; | ||
165 | for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) { | ||
166 | h = &rtas_handlers[i]; | ||
167 | if (rtas_name_matches(h->name, name)) { | ||
168 | found = true; | ||
169 | break; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | if (!found) | ||
174 | return -ENOENT; | ||
175 | |||
176 | d = kzalloc(sizeof(*d), GFP_KERNEL); | ||
177 | if (!d) | ||
178 | return -ENOMEM; | ||
179 | |||
180 | d->handler = h; | ||
181 | d->token = token; | ||
182 | |||
183 | list_add_tail(&d->list, &kvm->arch.rtas_tokens); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) | ||
189 | { | ||
190 | struct kvm_rtas_token_args args; | ||
191 | int rc; | ||
192 | |||
193 | if (copy_from_user(&args, argp, sizeof(args))) | ||
194 | return -EFAULT; | ||
195 | |||
196 | mutex_lock(&kvm->lock); | ||
197 | |||
198 | if (args.token) | ||
199 | rc = rtas_token_define(kvm, args.name, args.token); | ||
200 | else | ||
201 | rc = rtas_token_undefine(kvm, args.name); | ||
202 | |||
203 | mutex_unlock(&kvm->lock); | ||
204 | |||
205 | return rc; | ||
206 | } | ||
207 | |||
208 | int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) | ||
209 | { | ||
210 | struct rtas_token_definition *d; | ||
211 | struct rtas_args args; | ||
212 | rtas_arg_t *orig_rets; | ||
213 | gpa_t args_phys; | ||
214 | int rc; | ||
215 | |||
216 | /* r4 contains the guest physical address of the RTAS args */ | ||
217 | args_phys = kvmppc_get_gpr(vcpu, 4); | ||
218 | |||
219 | rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); | ||
220 | if (rc) | ||
221 | goto fail; | ||
222 | |||
223 | /* | ||
224 | * args->rets is a pointer into args->args. Now that we've | ||
225 | * copied args we need to fix it up to point into our copy, | ||
226 | * not the guest args. We also need to save the original | ||
227 | * value so we can restore it on the way out. | ||
228 | */ | ||
229 | orig_rets = args.rets; | ||
230 | args.rets = &args.args[args.nargs]; | ||
231 | |||
232 | mutex_lock(&vcpu->kvm->lock); | ||
233 | |||
234 | rc = -ENOENT; | ||
235 | list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { | ||
236 | if (d->token == args.token) { | ||
237 | d->handler->handler(vcpu, &args); | ||
238 | rc = 0; | ||
239 | break; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | mutex_unlock(&vcpu->kvm->lock); | ||
244 | |||
245 | if (rc == 0) { | ||
246 | args.rets = orig_rets; | ||
247 | rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); | ||
248 | if (rc) | ||
249 | goto fail; | ||
250 | } | ||
251 | |||
252 | return rc; | ||
253 | |||
254 | fail: | ||
255 | /* | ||
256 | * We only get here if the guest has called RTAS with a bogus | ||
257 | * args pointer. That means we can't get to the args, and so we | ||
258 | * can't fail the RTAS call. So fail right out to userspace, | ||
259 | * which should kill the guest. | ||
260 | */ | ||
261 | return rc; | ||
262 | } | ||
263 | |||
264 | void kvmppc_rtas_tokens_free(struct kvm *kvm) | ||
265 | { | ||
266 | struct rtas_token_definition *d, *tmp; | ||
267 | |||
268 | lockdep_assert_held(&kvm->lock); | ||
269 | |||
270 | list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) { | ||
271 | list_del(&d->list); | ||
272 | kfree(d); | ||
273 | } | ||
274 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 000000000000..f7a103756618 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c | |||
@@ -0,0 +1,1270 @@ | |||
1 | /* | ||
2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License, version 2, as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/kvm_host.h> | ||
12 | #include <linux/err.h> | ||
13 | #include <linux/gfp.h> | ||
14 | #include <linux/anon_inodes.h> | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/kvm_book3s.h> | ||
18 | #include <asm/kvm_ppc.h> | ||
19 | #include <asm/hvcall.h> | ||
20 | #include <asm/xics.h> | ||
21 | #include <asm/debug.h> | ||
22 | |||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/seq_file.h> | ||
25 | |||
26 | #include "book3s_xics.h" | ||
27 | |||
28 | #if 1 | ||
29 | #define XICS_DBG(fmt...) do { } while (0) | ||
30 | #else | ||
31 | #define XICS_DBG(fmt...) trace_printk(fmt) | ||
32 | #endif | ||
33 | |||
34 | #define ENABLE_REALMODE true | ||
35 | #define DEBUG_REALMODE false | ||
36 | |||
37 | /* | ||
38 | * LOCKING | ||
39 | * ======= | ||
40 | * | ||
41 | * Each ICS has a mutex protecting the information about the IRQ | ||
42 | * sources and avoiding simultaneous deliveries if the same interrupt. | ||
43 | * | ||
44 | * ICP operations are done via a single compare & swap transaction | ||
45 | * (most ICP state fits in the union kvmppc_icp_state) | ||
46 | */ | ||
47 | |||
48 | /* | ||
49 | * TODO | ||
50 | * ==== | ||
51 | * | ||
52 | * - To speed up resends, keep a bitmap of "resend" set bits in the | ||
53 | * ICS | ||
54 | * | ||
55 | * - Speed up server# -> ICP lookup (array ? hash table ?) | ||
56 | * | ||
57 | * - Make ICS lockless as well, or at least a per-interrupt lock or hashed | ||
58 | * locks array to improve scalability | ||
59 | */ | ||
60 | |||
61 | /* -- ICS routines -- */ | ||
62 | |||
63 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
64 | u32 new_irq); | ||
65 | |||
66 | static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, | ||
67 | bool report_status) | ||
68 | { | ||
69 | struct ics_irq_state *state; | ||
70 | struct kvmppc_ics *ics; | ||
71 | u16 src; | ||
72 | |||
73 | XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); | ||
74 | |||
75 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
76 | if (!ics) { | ||
77 | XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); | ||
78 | return -EINVAL; | ||
79 | } | ||
80 | state = &ics->irq_state[src]; | ||
81 | if (!state->exists) | ||
82 | return -EINVAL; | ||
83 | |||
84 | if (report_status) | ||
85 | return state->asserted; | ||
86 | |||
87 | /* | ||
88 | * We set state->asserted locklessly. This should be fine as | ||
89 | * we are the only setter, thus concurrent access is undefined | ||
90 | * to begin with. | ||
91 | */ | ||
92 | if (level == KVM_INTERRUPT_SET_LEVEL) | ||
93 | state->asserted = 1; | ||
94 | else if (level == KVM_INTERRUPT_UNSET) { | ||
95 | state->asserted = 0; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* Attempt delivery */ | ||
100 | icp_deliver_irq(xics, NULL, irq); | ||
101 | |||
102 | return state->asserted; | ||
103 | } | ||
104 | |||
105 | static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | ||
106 | struct kvmppc_icp *icp) | ||
107 | { | ||
108 | int i; | ||
109 | |||
110 | mutex_lock(&ics->lock); | ||
111 | |||
112 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
113 | struct ics_irq_state *state = &ics->irq_state[i]; | ||
114 | |||
115 | if (!state->resend) | ||
116 | continue; | ||
117 | |||
118 | XICS_DBG("resend %#x prio %#x\n", state->number, | ||
119 | state->priority); | ||
120 | |||
121 | mutex_unlock(&ics->lock); | ||
122 | icp_deliver_irq(xics, icp, state->number); | ||
123 | mutex_lock(&ics->lock); | ||
124 | } | ||
125 | |||
126 | mutex_unlock(&ics->lock); | ||
127 | } | ||
128 | |||
129 | static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, | ||
130 | struct ics_irq_state *state, | ||
131 | u32 server, u32 priority, u32 saved_priority) | ||
132 | { | ||
133 | bool deliver; | ||
134 | |||
135 | mutex_lock(&ics->lock); | ||
136 | |||
137 | state->server = server; | ||
138 | state->priority = priority; | ||
139 | state->saved_priority = saved_priority; | ||
140 | deliver = false; | ||
141 | if ((state->masked_pending || state->resend) && priority != MASKED) { | ||
142 | state->masked_pending = 0; | ||
143 | deliver = true; | ||
144 | } | ||
145 | |||
146 | mutex_unlock(&ics->lock); | ||
147 | |||
148 | return deliver; | ||
149 | } | ||
150 | |||
151 | int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) | ||
152 | { | ||
153 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
154 | struct kvmppc_icp *icp; | ||
155 | struct kvmppc_ics *ics; | ||
156 | struct ics_irq_state *state; | ||
157 | u16 src; | ||
158 | |||
159 | if (!xics) | ||
160 | return -ENODEV; | ||
161 | |||
162 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
163 | if (!ics) | ||
164 | return -EINVAL; | ||
165 | state = &ics->irq_state[src]; | ||
166 | |||
167 | icp = kvmppc_xics_find_server(kvm, server); | ||
168 | if (!icp) | ||
169 | return -EINVAL; | ||
170 | |||
171 | XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", | ||
172 | irq, server, priority, | ||
173 | state->masked_pending, state->resend); | ||
174 | |||
175 | if (write_xive(xics, ics, state, server, priority, priority)) | ||
176 | icp_deliver_irq(xics, icp, irq); | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) | ||
182 | { | ||
183 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
184 | struct kvmppc_ics *ics; | ||
185 | struct ics_irq_state *state; | ||
186 | u16 src; | ||
187 | |||
188 | if (!xics) | ||
189 | return -ENODEV; | ||
190 | |||
191 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
192 | if (!ics) | ||
193 | return -EINVAL; | ||
194 | state = &ics->irq_state[src]; | ||
195 | |||
196 | mutex_lock(&ics->lock); | ||
197 | *server = state->server; | ||
198 | *priority = state->priority; | ||
199 | mutex_unlock(&ics->lock); | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) | ||
205 | { | ||
206 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
207 | struct kvmppc_icp *icp; | ||
208 | struct kvmppc_ics *ics; | ||
209 | struct ics_irq_state *state; | ||
210 | u16 src; | ||
211 | |||
212 | if (!xics) | ||
213 | return -ENODEV; | ||
214 | |||
215 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
216 | if (!ics) | ||
217 | return -EINVAL; | ||
218 | state = &ics->irq_state[src]; | ||
219 | |||
220 | icp = kvmppc_xics_find_server(kvm, state->server); | ||
221 | if (!icp) | ||
222 | return -EINVAL; | ||
223 | |||
224 | if (write_xive(xics, ics, state, state->server, state->saved_priority, | ||
225 | state->saved_priority)) | ||
226 | icp_deliver_irq(xics, icp, irq); | ||
227 | |||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) | ||
232 | { | ||
233 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
234 | struct kvmppc_ics *ics; | ||
235 | struct ics_irq_state *state; | ||
236 | u16 src; | ||
237 | |||
238 | if (!xics) | ||
239 | return -ENODEV; | ||
240 | |||
241 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
242 | if (!ics) | ||
243 | return -EINVAL; | ||
244 | state = &ics->irq_state[src]; | ||
245 | |||
246 | write_xive(xics, ics, state, state->server, MASKED, state->priority); | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* -- ICP routines, including hcalls -- */ | ||
252 | |||
253 | static inline bool icp_try_update(struct kvmppc_icp *icp, | ||
254 | union kvmppc_icp_state old, | ||
255 | union kvmppc_icp_state new, | ||
256 | bool change_self) | ||
257 | { | ||
258 | bool success; | ||
259 | |||
260 | /* Calculate new output value */ | ||
261 | new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); | ||
262 | |||
263 | /* Attempt atomic update */ | ||
264 | success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; | ||
265 | if (!success) | ||
266 | goto bail; | ||
267 | |||
268 | XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", | ||
269 | icp->server_num, | ||
270 | old.cppr, old.mfrr, old.pending_pri, old.xisr, | ||
271 | old.need_resend, old.out_ee); | ||
272 | XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", | ||
273 | new.cppr, new.mfrr, new.pending_pri, new.xisr, | ||
274 | new.need_resend, new.out_ee); | ||
275 | /* | ||
276 | * Check for output state update | ||
277 | * | ||
278 | * Note that this is racy since another processor could be updating | ||
279 | * the state already. This is why we never clear the interrupt output | ||
280 | * here, we only ever set it. The clear only happens prior to doing | ||
281 | * an update and only by the processor itself. Currently we do it | ||
282 | * in Accept (H_XIRR) and Up_Cppr (H_XPPR). | ||
283 | * | ||
284 | * We also do not try to figure out whether the EE state has changed, | ||
285 | * we unconditionally set it if the new state calls for it. The reason | ||
286 | * for that is that we opportunistically remove the pending interrupt | ||
287 | * flag when raising CPPR, so we need to set it back here if an | ||
288 | * interrupt is still pending. | ||
289 | */ | ||
290 | if (new.out_ee) { | ||
291 | kvmppc_book3s_queue_irqprio(icp->vcpu, | ||
292 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
293 | if (!change_self) | ||
294 | kvmppc_fast_vcpu_kick(icp->vcpu); | ||
295 | } | ||
296 | bail: | ||
297 | return success; | ||
298 | } | ||
299 | |||
300 | static void icp_check_resend(struct kvmppc_xics *xics, | ||
301 | struct kvmppc_icp *icp) | ||
302 | { | ||
303 | u32 icsid; | ||
304 | |||
305 | /* Order this load with the test for need_resend in the caller */ | ||
306 | smp_rmb(); | ||
307 | for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { | ||
308 | struct kvmppc_ics *ics = xics->ics[icsid]; | ||
309 | |||
310 | if (!test_and_clear_bit(icsid, icp->resend_map)) | ||
311 | continue; | ||
312 | if (!ics) | ||
313 | continue; | ||
314 | ics_check_resend(xics, ics, icp); | ||
315 | } | ||
316 | } | ||
317 | |||
318 | static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, | ||
319 | u32 *reject) | ||
320 | { | ||
321 | union kvmppc_icp_state old_state, new_state; | ||
322 | bool success; | ||
323 | |||
324 | XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, | ||
325 | icp->server_num); | ||
326 | |||
327 | do { | ||
328 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
329 | |||
330 | *reject = 0; | ||
331 | |||
332 | /* See if we can deliver */ | ||
333 | success = new_state.cppr > priority && | ||
334 | new_state.mfrr > priority && | ||
335 | new_state.pending_pri > priority; | ||
336 | |||
337 | /* | ||
338 | * If we can, check for a rejection and perform the | ||
339 | * delivery | ||
340 | */ | ||
341 | if (success) { | ||
342 | *reject = new_state.xisr; | ||
343 | new_state.xisr = irq; | ||
344 | new_state.pending_pri = priority; | ||
345 | } else { | ||
346 | /* | ||
347 | * If we failed to deliver we set need_resend | ||
348 | * so a subsequent CPPR state change causes us | ||
349 | * to try a new delivery. | ||
350 | */ | ||
351 | new_state.need_resend = true; | ||
352 | } | ||
353 | |||
354 | } while (!icp_try_update(icp, old_state, new_state, false)); | ||
355 | |||
356 | return success; | ||
357 | } | ||
358 | |||
359 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
360 | u32 new_irq) | ||
361 | { | ||
362 | struct ics_irq_state *state; | ||
363 | struct kvmppc_ics *ics; | ||
364 | u32 reject; | ||
365 | u16 src; | ||
366 | |||
367 | /* | ||
368 | * This is used both for initial delivery of an interrupt and | ||
369 | * for subsequent rejection. | ||
370 | * | ||
371 | * Rejection can be racy vs. resends. We have evaluated the | ||
372 | * rejection in an atomic ICP transaction which is now complete, | ||
373 | * so potentially the ICP can already accept the interrupt again. | ||
374 | * | ||
375 | * So we need to retry the delivery. Essentially the reject path | ||
376 | * boils down to a failed delivery. Always. | ||
377 | * | ||
378 | * Now the interrupt could also have moved to a different target, | ||
379 | * thus we may need to re-do the ICP lookup as well | ||
380 | */ | ||
381 | |||
382 | again: | ||
383 | /* Get the ICS state and lock it */ | ||
384 | ics = kvmppc_xics_find_ics(xics, new_irq, &src); | ||
385 | if (!ics) { | ||
386 | XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); | ||
387 | return; | ||
388 | } | ||
389 | state = &ics->irq_state[src]; | ||
390 | |||
391 | /* Get a lock on the ICS */ | ||
392 | mutex_lock(&ics->lock); | ||
393 | |||
394 | /* Get our server */ | ||
395 | if (!icp || state->server != icp->server_num) { | ||
396 | icp = kvmppc_xics_find_server(xics->kvm, state->server); | ||
397 | if (!icp) { | ||
398 | pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", | ||
399 | new_irq, state->server); | ||
400 | goto out; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | /* Clear the resend bit of that interrupt */ | ||
405 | state->resend = 0; | ||
406 | |||
407 | /* | ||
408 | * If masked, bail out | ||
409 | * | ||
410 | * Note: PAPR doesn't mention anything about masked pending | ||
411 | * when doing a resend, only when doing a delivery. | ||
412 | * | ||
413 | * However that would have the effect of losing a masked | ||
414 | * interrupt that was rejected and isn't consistent with | ||
415 | * the whole masked_pending business which is about not | ||
416 | * losing interrupts that occur while masked. | ||
417 | * | ||
418 | * I don't differenciate normal deliveries and resends, this | ||
419 | * implementation will differ from PAPR and not lose such | ||
420 | * interrupts. | ||
421 | */ | ||
422 | if (state->priority == MASKED) { | ||
423 | XICS_DBG("irq %#x masked pending\n", new_irq); | ||
424 | state->masked_pending = 1; | ||
425 | goto out; | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Try the delivery, this will set the need_resend flag | ||
430 | * in the ICP as part of the atomic transaction if the | ||
431 | * delivery is not possible. | ||
432 | * | ||
433 | * Note that if successful, the new delivery might have itself | ||
434 | * rejected an interrupt that was "delivered" before we took the | ||
435 | * icp mutex. | ||
436 | * | ||
437 | * In this case we do the whole sequence all over again for the | ||
438 | * new guy. We cannot assume that the rejected interrupt is less | ||
439 | * favored than the new one, and thus doesn't need to be delivered, | ||
440 | * because by the time we exit icp_try_to_deliver() the target | ||
441 | * processor may well have alrady consumed & completed it, and thus | ||
442 | * the rejected interrupt might actually be already acceptable. | ||
443 | */ | ||
444 | if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { | ||
445 | /* | ||
446 | * Delivery was successful, did we reject somebody else ? | ||
447 | */ | ||
448 | if (reject && reject != XICS_IPI) { | ||
449 | mutex_unlock(&ics->lock); | ||
450 | new_irq = reject; | ||
451 | goto again; | ||
452 | } | ||
453 | } else { | ||
454 | /* | ||
455 | * We failed to deliver the interrupt we need to set the | ||
456 | * resend map bit and mark the ICS state as needing a resend | ||
457 | */ | ||
458 | set_bit(ics->icsid, icp->resend_map); | ||
459 | state->resend = 1; | ||
460 | |||
461 | /* | ||
462 | * If the need_resend flag got cleared in the ICP some time | ||
463 | * between icp_try_to_deliver() atomic update and now, then | ||
464 | * we know it might have missed the resend_map bit. So we | ||
465 | * retry | ||
466 | */ | ||
467 | smp_mb(); | ||
468 | if (!icp->state.need_resend) { | ||
469 | mutex_unlock(&ics->lock); | ||
470 | goto again; | ||
471 | } | ||
472 | } | ||
473 | out: | ||
474 | mutex_unlock(&ics->lock); | ||
475 | } | ||
476 | |||
477 | static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, | ||
478 | u8 new_cppr) | ||
479 | { | ||
480 | union kvmppc_icp_state old_state, new_state; | ||
481 | bool resend; | ||
482 | |||
483 | /* | ||
484 | * This handles several related states in one operation: | ||
485 | * | ||
486 | * ICP State: Down_CPPR | ||
487 | * | ||
488 | * Load CPPR with new value and if the XISR is 0 | ||
489 | * then check for resends: | ||
490 | * | ||
491 | * ICP State: Resend | ||
492 | * | ||
493 | * If MFRR is more favored than CPPR, check for IPIs | ||
494 | * and notify ICS of a potential resend. This is done | ||
495 | * asynchronously (when used in real mode, we will have | ||
496 | * to exit here). | ||
497 | * | ||
498 | * We do not handle the complete Check_IPI as documented | ||
499 | * here. In the PAPR, this state will be used for both | ||
500 | * Set_MFRR and Down_CPPR. However, we know that we aren't | ||
501 | * changing the MFRR state here so we don't need to handle | ||
502 | * the case of an MFRR causing a reject of a pending irq, | ||
503 | * this will have been handled when the MFRR was set in the | ||
504 | * first place. | ||
505 | * | ||
506 | * Thus we don't have to handle rejects, only resends. | ||
507 | * | ||
508 | * When implementing real mode for HV KVM, resend will lead to | ||
509 | * a H_TOO_HARD return and the whole transaction will be handled | ||
510 | * in virtual mode. | ||
511 | */ | ||
512 | do { | ||
513 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
514 | |||
515 | /* Down_CPPR */ | ||
516 | new_state.cppr = new_cppr; | ||
517 | |||
518 | /* | ||
519 | * Cut down Resend / Check_IPI / IPI | ||
520 | * | ||
521 | * The logic is that we cannot have a pending interrupt | ||
522 | * trumped by an IPI at this point (see above), so we | ||
523 | * know that either the pending interrupt is already an | ||
524 | * IPI (in which case we don't care to override it) or | ||
525 | * it's either more favored than us or non existent | ||
526 | */ | ||
527 | if (new_state.mfrr < new_cppr && | ||
528 | new_state.mfrr <= new_state.pending_pri) { | ||
529 | WARN_ON(new_state.xisr != XICS_IPI && | ||
530 | new_state.xisr != 0); | ||
531 | new_state.pending_pri = new_state.mfrr; | ||
532 | new_state.xisr = XICS_IPI; | ||
533 | } | ||
534 | |||
535 | /* Latch/clear resend bit */ | ||
536 | resend = new_state.need_resend; | ||
537 | new_state.need_resend = 0; | ||
538 | |||
539 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
540 | |||
541 | /* | ||
542 | * Now handle resend checks. Those are asynchronous to the ICP | ||
543 | * state update in HW (ie bus transactions) so we can handle them | ||
544 | * separately here too | ||
545 | */ | ||
546 | if (resend) | ||
547 | icp_check_resend(xics, icp); | ||
548 | } | ||
549 | |||
550 | static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) | ||
551 | { | ||
552 | union kvmppc_icp_state old_state, new_state; | ||
553 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
554 | u32 xirr; | ||
555 | |||
556 | /* First, remove EE from the processor */ | ||
557 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
558 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
559 | |||
560 | /* | ||
561 | * ICP State: Accept_Interrupt | ||
562 | * | ||
563 | * Return the pending interrupt (if any) along with the | ||
564 | * current CPPR, then clear the XISR & set CPPR to the | ||
565 | * pending priority | ||
566 | */ | ||
567 | do { | ||
568 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
569 | |||
570 | xirr = old_state.xisr | (((u32)old_state.cppr) << 24); | ||
571 | if (!old_state.xisr) | ||
572 | break; | ||
573 | new_state.cppr = new_state.pending_pri; | ||
574 | new_state.pending_pri = 0xff; | ||
575 | new_state.xisr = 0; | ||
576 | |||
577 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
578 | |||
579 | XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); | ||
580 | |||
581 | return xirr; | ||
582 | } | ||
583 | |||
584 | static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, | ||
585 | unsigned long mfrr) | ||
586 | { | ||
587 | union kvmppc_icp_state old_state, new_state; | ||
588 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
589 | struct kvmppc_icp *icp; | ||
590 | u32 reject; | ||
591 | bool resend; | ||
592 | bool local; | ||
593 | |||
594 | XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", | ||
595 | vcpu->vcpu_id, server, mfrr); | ||
596 | |||
597 | icp = vcpu->arch.icp; | ||
598 | local = icp->server_num == server; | ||
599 | if (!local) { | ||
600 | icp = kvmppc_xics_find_server(vcpu->kvm, server); | ||
601 | if (!icp) | ||
602 | return H_PARAMETER; | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * ICP state: Set_MFRR | ||
607 | * | ||
608 | * If the CPPR is more favored than the new MFRR, then | ||
609 | * nothing needs to be rejected as there can be no XISR to | ||
610 | * reject. If the MFRR is being made less favored then | ||
611 | * there might be a previously-rejected interrupt needing | ||
612 | * to be resent. | ||
613 | * | ||
614 | * If the CPPR is less favored, then we might be replacing | ||
615 | * an interrupt, and thus need to possibly reject it as in | ||
616 | * | ||
617 | * ICP state: Check_IPI | ||
618 | */ | ||
619 | do { | ||
620 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
621 | |||
622 | /* Set_MFRR */ | ||
623 | new_state.mfrr = mfrr; | ||
624 | |||
625 | /* Check_IPI */ | ||
626 | reject = 0; | ||
627 | resend = false; | ||
628 | if (mfrr < new_state.cppr) { | ||
629 | /* Reject a pending interrupt if not an IPI */ | ||
630 | if (mfrr <= new_state.pending_pri) | ||
631 | reject = new_state.xisr; | ||
632 | new_state.pending_pri = mfrr; | ||
633 | new_state.xisr = XICS_IPI; | ||
634 | } | ||
635 | |||
636 | if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { | ||
637 | resend = new_state.need_resend; | ||
638 | new_state.need_resend = 0; | ||
639 | } | ||
640 | } while (!icp_try_update(icp, old_state, new_state, local)); | ||
641 | |||
642 | /* Handle reject */ | ||
643 | if (reject && reject != XICS_IPI) | ||
644 | icp_deliver_irq(xics, icp, reject); | ||
645 | |||
646 | /* Handle resend */ | ||
647 | if (resend) | ||
648 | icp_check_resend(xics, icp); | ||
649 | |||
650 | return H_SUCCESS; | ||
651 | } | ||
652 | |||
653 | static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) | ||
654 | { | ||
655 | union kvmppc_icp_state old_state, new_state; | ||
656 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
657 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
658 | u32 reject; | ||
659 | |||
660 | XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); | ||
661 | |||
662 | /* | ||
663 | * ICP State: Set_CPPR | ||
664 | * | ||
665 | * We can safely compare the new value with the current | ||
666 | * value outside of the transaction as the CPPR is only | ||
667 | * ever changed by the processor on itself | ||
668 | */ | ||
669 | if (cppr > icp->state.cppr) | ||
670 | icp_down_cppr(xics, icp, cppr); | ||
671 | else if (cppr == icp->state.cppr) | ||
672 | return; | ||
673 | |||
674 | /* | ||
675 | * ICP State: Up_CPPR | ||
676 | * | ||
677 | * The processor is raising its priority, this can result | ||
678 | * in a rejection of a pending interrupt: | ||
679 | * | ||
680 | * ICP State: Reject_Current | ||
681 | * | ||
682 | * We can remove EE from the current processor, the update | ||
683 | * transaction will set it again if needed | ||
684 | */ | ||
685 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
686 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
687 | |||
688 | do { | ||
689 | old_state = new_state = ACCESS_ONCE(icp->state); | ||
690 | |||
691 | reject = 0; | ||
692 | new_state.cppr = cppr; | ||
693 | |||
694 | if (cppr <= new_state.pending_pri) { | ||
695 | reject = new_state.xisr; | ||
696 | new_state.xisr = 0; | ||
697 | new_state.pending_pri = 0xff; | ||
698 | } | ||
699 | |||
700 | } while (!icp_try_update(icp, old_state, new_state, true)); | ||
701 | |||
702 | /* | ||
703 | * Check for rejects. They are handled by doing a new delivery | ||
704 | * attempt (see comments in icp_deliver_irq). | ||
705 | */ | ||
706 | if (reject && reject != XICS_IPI) | ||
707 | icp_deliver_irq(xics, icp, reject); | ||
708 | } | ||
709 | |||
710 | static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) | ||
711 | { | ||
712 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
713 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
714 | struct kvmppc_ics *ics; | ||
715 | struct ics_irq_state *state; | ||
716 | u32 irq = xirr & 0x00ffffff; | ||
717 | u16 src; | ||
718 | |||
719 | XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); | ||
720 | |||
721 | /* | ||
722 | * ICP State: EOI | ||
723 | * | ||
724 | * Note: If EOI is incorrectly used by SW to lower the CPPR | ||
725 | * value (ie more favored), we do not check for rejection of | ||
726 | * a pending interrupt, this is a SW error and PAPR sepcifies | ||
727 | * that we don't have to deal with it. | ||
728 | * | ||
729 | * The sending of an EOI to the ICS is handled after the | ||
730 | * CPPR update | ||
731 | * | ||
732 | * ICP State: Down_CPPR which we handle | ||
733 | * in a separate function as it's shared with H_CPPR. | ||
734 | */ | ||
735 | icp_down_cppr(xics, icp, xirr >> 24); | ||
736 | |||
737 | /* IPIs have no EOI */ | ||
738 | if (irq == XICS_IPI) | ||
739 | return H_SUCCESS; | ||
740 | /* | ||
741 | * EOI handling: If the interrupt is still asserted, we need to | ||
742 | * resend it. We can take a lockless "peek" at the ICS state here. | ||
743 | * | ||
744 | * "Message" interrupts will never have "asserted" set | ||
745 | */ | ||
746 | ics = kvmppc_xics_find_ics(xics, irq, &src); | ||
747 | if (!ics) { | ||
748 | XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); | ||
749 | return H_PARAMETER; | ||
750 | } | ||
751 | state = &ics->irq_state[src]; | ||
752 | |||
753 | /* Still asserted, resend it */ | ||
754 | if (state->asserted) | ||
755 | icp_deliver_irq(xics, icp, irq); | ||
756 | |||
757 | return H_SUCCESS; | ||
758 | } | ||
759 | |||
760 | static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) | ||
761 | { | ||
762 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
763 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
764 | |||
765 | XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", | ||
766 | hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); | ||
767 | |||
768 | if (icp->rm_action & XICS_RM_KICK_VCPU) | ||
769 | kvmppc_fast_vcpu_kick(icp->rm_kick_target); | ||
770 | if (icp->rm_action & XICS_RM_CHECK_RESEND) | ||
771 | icp_check_resend(xics, icp); | ||
772 | if (icp->rm_action & XICS_RM_REJECT) | ||
773 | icp_deliver_irq(xics, icp, icp->rm_reject); | ||
774 | |||
775 | icp->rm_action = 0; | ||
776 | |||
777 | return H_SUCCESS; | ||
778 | } | ||
779 | |||
780 | int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) | ||
781 | { | ||
782 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
783 | unsigned long res; | ||
784 | int rc = H_SUCCESS; | ||
785 | |||
786 | /* Check if we have an ICP */ | ||
787 | if (!xics || !vcpu->arch.icp) | ||
788 | return H_HARDWARE; | ||
789 | |||
790 | /* Check for real mode returning too hard */ | ||
791 | if (xics->real_mode) | ||
792 | return kvmppc_xics_rm_complete(vcpu, req); | ||
793 | |||
794 | switch (req) { | ||
795 | case H_XIRR: | ||
796 | res = kvmppc_h_xirr(vcpu); | ||
797 | kvmppc_set_gpr(vcpu, 4, res); | ||
798 | break; | ||
799 | case H_CPPR: | ||
800 | kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); | ||
801 | break; | ||
802 | case H_EOI: | ||
803 | rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); | ||
804 | break; | ||
805 | case H_IPI: | ||
806 | rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), | ||
807 | kvmppc_get_gpr(vcpu, 5)); | ||
808 | break; | ||
809 | } | ||
810 | |||
811 | return rc; | ||
812 | } | ||
813 | |||
814 | |||
815 | /* -- Initialisation code etc. -- */ | ||
816 | |||
817 | static int xics_debug_show(struct seq_file *m, void *private) | ||
818 | { | ||
819 | struct kvmppc_xics *xics = m->private; | ||
820 | struct kvm *kvm = xics->kvm; | ||
821 | struct kvm_vcpu *vcpu; | ||
822 | int icsid, i; | ||
823 | |||
824 | if (!kvm) | ||
825 | return 0; | ||
826 | |||
827 | seq_printf(m, "=========\nICP state\n=========\n"); | ||
828 | |||
829 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
830 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
831 | union kvmppc_icp_state state; | ||
832 | |||
833 | if (!icp) | ||
834 | continue; | ||
835 | |||
836 | state.raw = ACCESS_ONCE(icp->state.raw); | ||
837 | seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", | ||
838 | icp->server_num, state.xisr, | ||
839 | state.pending_pri, state.cppr, state.mfrr, | ||
840 | state.out_ee, state.need_resend); | ||
841 | } | ||
842 | |||
843 | for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { | ||
844 | struct kvmppc_ics *ics = xics->ics[icsid]; | ||
845 | |||
846 | if (!ics) | ||
847 | continue; | ||
848 | |||
849 | seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", | ||
850 | icsid); | ||
851 | |||
852 | mutex_lock(&ics->lock); | ||
853 | |||
854 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
855 | struct ics_irq_state *irq = &ics->irq_state[i]; | ||
856 | |||
857 | seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", | ||
858 | irq->number, irq->server, irq->priority, | ||
859 | irq->saved_priority, irq->asserted, | ||
860 | irq->resend, irq->masked_pending); | ||
861 | |||
862 | } | ||
863 | mutex_unlock(&ics->lock); | ||
864 | } | ||
865 | return 0; | ||
866 | } | ||
867 | |||
868 | static int xics_debug_open(struct inode *inode, struct file *file) | ||
869 | { | ||
870 | return single_open(file, xics_debug_show, inode->i_private); | ||
871 | } | ||
872 | |||
873 | static const struct file_operations xics_debug_fops = { | ||
874 | .open = xics_debug_open, | ||
875 | .read = seq_read, | ||
876 | .llseek = seq_lseek, | ||
877 | .release = single_release, | ||
878 | }; | ||
879 | |||
880 | static void xics_debugfs_init(struct kvmppc_xics *xics) | ||
881 | { | ||
882 | char *name; | ||
883 | |||
884 | name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); | ||
885 | if (!name) { | ||
886 | pr_err("%s: no memory for name\n", __func__); | ||
887 | return; | ||
888 | } | ||
889 | |||
890 | xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, | ||
891 | xics, &xics_debug_fops); | ||
892 | |||
893 | pr_debug("%s: created %s\n", __func__, name); | ||
894 | kfree(name); | ||
895 | } | ||
896 | |||
897 | static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, | ||
898 | struct kvmppc_xics *xics, int irq) | ||
899 | { | ||
900 | struct kvmppc_ics *ics; | ||
901 | int i, icsid; | ||
902 | |||
903 | icsid = irq >> KVMPPC_XICS_ICS_SHIFT; | ||
904 | |||
905 | mutex_lock(&kvm->lock); | ||
906 | |||
907 | /* ICS already exists - somebody else got here first */ | ||
908 | if (xics->ics[icsid]) | ||
909 | goto out; | ||
910 | |||
911 | /* Create the ICS */ | ||
912 | ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); | ||
913 | if (!ics) | ||
914 | goto out; | ||
915 | |||
916 | mutex_init(&ics->lock); | ||
917 | ics->icsid = icsid; | ||
918 | |||
919 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { | ||
920 | ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; | ||
921 | ics->irq_state[i].priority = MASKED; | ||
922 | ics->irq_state[i].saved_priority = MASKED; | ||
923 | } | ||
924 | smp_wmb(); | ||
925 | xics->ics[icsid] = ics; | ||
926 | |||
927 | if (icsid > xics->max_icsid) | ||
928 | xics->max_icsid = icsid; | ||
929 | |||
930 | out: | ||
931 | mutex_unlock(&kvm->lock); | ||
932 | return xics->ics[icsid]; | ||
933 | } | ||
934 | |||
935 | int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) | ||
936 | { | ||
937 | struct kvmppc_icp *icp; | ||
938 | |||
939 | if (!vcpu->kvm->arch.xics) | ||
940 | return -ENODEV; | ||
941 | |||
942 | if (kvmppc_xics_find_server(vcpu->kvm, server_num)) | ||
943 | return -EEXIST; | ||
944 | |||
945 | icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); | ||
946 | if (!icp) | ||
947 | return -ENOMEM; | ||
948 | |||
949 | icp->vcpu = vcpu; | ||
950 | icp->server_num = server_num; | ||
951 | icp->state.mfrr = MASKED; | ||
952 | icp->state.pending_pri = MASKED; | ||
953 | vcpu->arch.icp = icp; | ||
954 | |||
955 | XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); | ||
956 | |||
957 | return 0; | ||
958 | } | ||
959 | |||
960 | u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) | ||
961 | { | ||
962 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
963 | union kvmppc_icp_state state; | ||
964 | |||
965 | if (!icp) | ||
966 | return 0; | ||
967 | state = icp->state; | ||
968 | return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | | ||
969 | ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | | ||
970 | ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | | ||
971 | ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); | ||
972 | } | ||
973 | |||
974 | int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) | ||
975 | { | ||
976 | struct kvmppc_icp *icp = vcpu->arch.icp; | ||
977 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; | ||
978 | union kvmppc_icp_state old_state, new_state; | ||
979 | struct kvmppc_ics *ics; | ||
980 | u8 cppr, mfrr, pending_pri; | ||
981 | u32 xisr; | ||
982 | u16 src; | ||
983 | bool resend; | ||
984 | |||
985 | if (!icp || !xics) | ||
986 | return -ENOENT; | ||
987 | |||
988 | cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; | ||
989 | xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & | ||
990 | KVM_REG_PPC_ICP_XISR_MASK; | ||
991 | mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; | ||
992 | pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; | ||
993 | |||
994 | /* Require the new state to be internally consistent */ | ||
995 | if (xisr == 0) { | ||
996 | if (pending_pri != 0xff) | ||
997 | return -EINVAL; | ||
998 | } else if (xisr == XICS_IPI) { | ||
999 | if (pending_pri != mfrr || pending_pri >= cppr) | ||
1000 | return -EINVAL; | ||
1001 | } else { | ||
1002 | if (pending_pri >= mfrr || pending_pri >= cppr) | ||
1003 | return -EINVAL; | ||
1004 | ics = kvmppc_xics_find_ics(xics, xisr, &src); | ||
1005 | if (!ics) | ||
1006 | return -EINVAL; | ||
1007 | } | ||
1008 | |||
1009 | new_state.raw = 0; | ||
1010 | new_state.cppr = cppr; | ||
1011 | new_state.xisr = xisr; | ||
1012 | new_state.mfrr = mfrr; | ||
1013 | new_state.pending_pri = pending_pri; | ||
1014 | |||
1015 | /* | ||
1016 | * Deassert the CPU interrupt request. | ||
1017 | * icp_try_update will reassert it if necessary. | ||
1018 | */ | ||
1019 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, | ||
1020 | BOOK3S_INTERRUPT_EXTERNAL_LEVEL); | ||
1021 | |||
1022 | /* | ||
1023 | * Note that if we displace an interrupt from old_state.xisr, | ||
1024 | * we don't mark it as rejected. We expect userspace to set | ||
1025 | * the state of the interrupt sources to be consistent with | ||
1026 | * the ICP states (either before or afterwards, which doesn't | ||
1027 | * matter). We do handle resends due to CPPR becoming less | ||
1028 | * favoured because that is necessary to end up with a | ||
1029 | * consistent state in the situation where userspace restores | ||
1030 | * the ICS states before the ICP states. | ||
1031 | */ | ||
1032 | do { | ||
1033 | old_state = ACCESS_ONCE(icp->state); | ||
1034 | |||
1035 | if (new_state.mfrr <= old_state.mfrr) { | ||
1036 | resend = false; | ||
1037 | new_state.need_resend = old_state.need_resend; | ||
1038 | } else { | ||
1039 | resend = old_state.need_resend; | ||
1040 | new_state.need_resend = 0; | ||
1041 | } | ||
1042 | } while (!icp_try_update(icp, old_state, new_state, false)); | ||
1043 | |||
1044 | if (resend) | ||
1045 | icp_check_resend(xics, icp); | ||
1046 | |||
1047 | return 0; | ||
1048 | } | ||
1049 | |||
1050 | static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) | ||
1051 | { | ||
1052 | int ret; | ||
1053 | struct kvmppc_ics *ics; | ||
1054 | struct ics_irq_state *irqp; | ||
1055 | u64 __user *ubufp = (u64 __user *) addr; | ||
1056 | u16 idx; | ||
1057 | u64 val, prio; | ||
1058 | |||
1059 | ics = kvmppc_xics_find_ics(xics, irq, &idx); | ||
1060 | if (!ics) | ||
1061 | return -ENOENT; | ||
1062 | |||
1063 | irqp = &ics->irq_state[idx]; | ||
1064 | mutex_lock(&ics->lock); | ||
1065 | ret = -ENOENT; | ||
1066 | if (irqp->exists) { | ||
1067 | val = irqp->server; | ||
1068 | prio = irqp->priority; | ||
1069 | if (prio == MASKED) { | ||
1070 | val |= KVM_XICS_MASKED; | ||
1071 | prio = irqp->saved_priority; | ||
1072 | } | ||
1073 | val |= prio << KVM_XICS_PRIORITY_SHIFT; | ||
1074 | if (irqp->asserted) | ||
1075 | val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; | ||
1076 | else if (irqp->masked_pending || irqp->resend) | ||
1077 | val |= KVM_XICS_PENDING; | ||
1078 | ret = 0; | ||
1079 | } | ||
1080 | mutex_unlock(&ics->lock); | ||
1081 | |||
1082 | if (!ret && put_user(val, ubufp)) | ||
1083 | ret = -EFAULT; | ||
1084 | |||
1085 | return ret; | ||
1086 | } | ||
1087 | |||
1088 | static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) | ||
1089 | { | ||
1090 | struct kvmppc_ics *ics; | ||
1091 | struct ics_irq_state *irqp; | ||
1092 | u64 __user *ubufp = (u64 __user *) addr; | ||
1093 | u16 idx; | ||
1094 | u64 val; | ||
1095 | u8 prio; | ||
1096 | u32 server; | ||
1097 | |||
1098 | if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) | ||
1099 | return -ENOENT; | ||
1100 | |||
1101 | ics = kvmppc_xics_find_ics(xics, irq, &idx); | ||
1102 | if (!ics) { | ||
1103 | ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); | ||
1104 | if (!ics) | ||
1105 | return -ENOMEM; | ||
1106 | } | ||
1107 | irqp = &ics->irq_state[idx]; | ||
1108 | if (get_user(val, ubufp)) | ||
1109 | return -EFAULT; | ||
1110 | |||
1111 | server = val & KVM_XICS_DESTINATION_MASK; | ||
1112 | prio = val >> KVM_XICS_PRIORITY_SHIFT; | ||
1113 | if (prio != MASKED && | ||
1114 | kvmppc_xics_find_server(xics->kvm, server) == NULL) | ||
1115 | return -EINVAL; | ||
1116 | |||
1117 | mutex_lock(&ics->lock); | ||
1118 | irqp->server = server; | ||
1119 | irqp->saved_priority = prio; | ||
1120 | if (val & KVM_XICS_MASKED) | ||
1121 | prio = MASKED; | ||
1122 | irqp->priority = prio; | ||
1123 | irqp->resend = 0; | ||
1124 | irqp->masked_pending = 0; | ||
1125 | irqp->asserted = 0; | ||
1126 | if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) | ||
1127 | irqp->asserted = 1; | ||
1128 | irqp->exists = 1; | ||
1129 | mutex_unlock(&ics->lock); | ||
1130 | |||
1131 | if (val & KVM_XICS_PENDING) | ||
1132 | icp_deliver_irq(xics, NULL, irqp->number); | ||
1133 | |||
1134 | return 0; | ||
1135 | } | ||
1136 | |||
1137 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, | ||
1138 | bool line_status) | ||
1139 | { | ||
1140 | struct kvmppc_xics *xics = kvm->arch.xics; | ||
1141 | |||
1142 | return ics_deliver_irq(xics, irq, level, line_status); | ||
1143 | } | ||
1144 | |||
1145 | static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1146 | { | ||
1147 | struct kvmppc_xics *xics = dev->private; | ||
1148 | |||
1149 | switch (attr->group) { | ||
1150 | case KVM_DEV_XICS_GRP_SOURCES: | ||
1151 | return xics_set_source(xics, attr->attr, attr->addr); | ||
1152 | } | ||
1153 | return -ENXIO; | ||
1154 | } | ||
1155 | |||
1156 | static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1157 | { | ||
1158 | struct kvmppc_xics *xics = dev->private; | ||
1159 | |||
1160 | switch (attr->group) { | ||
1161 | case KVM_DEV_XICS_GRP_SOURCES: | ||
1162 | return xics_get_source(xics, attr->attr, attr->addr); | ||
1163 | } | ||
1164 | return -ENXIO; | ||
1165 | } | ||
1166 | |||
1167 | static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1168 | { | ||
1169 | switch (attr->group) { | ||
1170 | case KVM_DEV_XICS_GRP_SOURCES: | ||
1171 | if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && | ||
1172 | attr->attr < KVMPPC_XICS_NR_IRQS) | ||
1173 | return 0; | ||
1174 | break; | ||
1175 | } | ||
1176 | return -ENXIO; | ||
1177 | } | ||
1178 | |||
1179 | static void kvmppc_xics_free(struct kvm_device *dev) | ||
1180 | { | ||
1181 | struct kvmppc_xics *xics = dev->private; | ||
1182 | int i; | ||
1183 | struct kvm *kvm = xics->kvm; | ||
1184 | |||
1185 | debugfs_remove(xics->dentry); | ||
1186 | |||
1187 | if (kvm) | ||
1188 | kvm->arch.xics = NULL; | ||
1189 | |||
1190 | for (i = 0; i <= xics->max_icsid; i++) | ||
1191 | kfree(xics->ics[i]); | ||
1192 | kfree(xics); | ||
1193 | kfree(dev); | ||
1194 | } | ||
1195 | |||
1196 | static int kvmppc_xics_create(struct kvm_device *dev, u32 type) | ||
1197 | { | ||
1198 | struct kvmppc_xics *xics; | ||
1199 | struct kvm *kvm = dev->kvm; | ||
1200 | int ret = 0; | ||
1201 | |||
1202 | xics = kzalloc(sizeof(*xics), GFP_KERNEL); | ||
1203 | if (!xics) | ||
1204 | return -ENOMEM; | ||
1205 | |||
1206 | dev->private = xics; | ||
1207 | xics->dev = dev; | ||
1208 | xics->kvm = kvm; | ||
1209 | |||
1210 | /* Already there ? */ | ||
1211 | mutex_lock(&kvm->lock); | ||
1212 | if (kvm->arch.xics) | ||
1213 | ret = -EEXIST; | ||
1214 | else | ||
1215 | kvm->arch.xics = xics; | ||
1216 | mutex_unlock(&kvm->lock); | ||
1217 | |||
1218 | if (ret) | ||
1219 | return ret; | ||
1220 | |||
1221 | xics_debugfs_init(xics); | ||
1222 | |||
1223 | #ifdef CONFIG_KVM_BOOK3S_64_HV | ||
1224 | if (cpu_has_feature(CPU_FTR_ARCH_206)) { | ||
1225 | /* Enable real mode support */ | ||
1226 | xics->real_mode = ENABLE_REALMODE; | ||
1227 | xics->real_mode_dbg = DEBUG_REALMODE; | ||
1228 | } | ||
1229 | #endif /* CONFIG_KVM_BOOK3S_64_HV */ | ||
1230 | |||
1231 | return 0; | ||
1232 | } | ||
1233 | |||
1234 | struct kvm_device_ops kvm_xics_ops = { | ||
1235 | .name = "kvm-xics", | ||
1236 | .create = kvmppc_xics_create, | ||
1237 | .destroy = kvmppc_xics_free, | ||
1238 | .set_attr = xics_set_attr, | ||
1239 | .get_attr = xics_get_attr, | ||
1240 | .has_attr = xics_has_attr, | ||
1241 | }; | ||
1242 | |||
1243 | int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
1244 | u32 xcpu) | ||
1245 | { | ||
1246 | struct kvmppc_xics *xics = dev->private; | ||
1247 | int r = -EBUSY; | ||
1248 | |||
1249 | if (dev->ops != &kvm_xics_ops) | ||
1250 | return -EPERM; | ||
1251 | if (xics->kvm != vcpu->kvm) | ||
1252 | return -EPERM; | ||
1253 | if (vcpu->arch.irq_type) | ||
1254 | return -EBUSY; | ||
1255 | |||
1256 | r = kvmppc_xics_create_icp(vcpu, xcpu); | ||
1257 | if (!r) | ||
1258 | vcpu->arch.irq_type = KVMPPC_IRQ_XICS; | ||
1259 | |||
1260 | return r; | ||
1261 | } | ||
1262 | |||
1263 | void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) | ||
1264 | { | ||
1265 | if (!vcpu->arch.icp) | ||
1266 | return; | ||
1267 | kfree(vcpu->arch.icp); | ||
1268 | vcpu->arch.icp = NULL; | ||
1269 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; | ||
1270 | } | ||
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h new file mode 100644 index 000000000000..dd9326c5c19b --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.h | |||
@@ -0,0 +1,130 @@ | |||
1 | /* | ||
2 | * Copyright 2012 Michael Ellerman, IBM Corporation. | ||
3 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License, version 2, as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #ifndef _KVM_PPC_BOOK3S_XICS_H | ||
11 | #define _KVM_PPC_BOOK3S_XICS_H | ||
12 | |||
13 | /* | ||
14 | * We use a two-level tree to store interrupt source information. | ||
15 | * There are up to 1024 ICS nodes, each of which can represent | ||
16 | * 1024 sources. | ||
17 | */ | ||
18 | #define KVMPPC_XICS_MAX_ICS_ID 1023 | ||
19 | #define KVMPPC_XICS_ICS_SHIFT 10 | ||
20 | #define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT) | ||
21 | #define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1) | ||
22 | |||
23 | /* | ||
24 | * Interrupt source numbers below this are reserved, for example | ||
25 | * 0 is "no interrupt", and 2 is used for IPIs. | ||
26 | */ | ||
27 | #define KVMPPC_XICS_FIRST_IRQ 16 | ||
28 | #define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \ | ||
29 | KVMPPC_XICS_IRQ_PER_ICS) | ||
30 | |||
31 | /* Priority value to use for disabling an interrupt */ | ||
32 | #define MASKED 0xff | ||
33 | |||
34 | /* State for one irq source */ | ||
35 | struct ics_irq_state { | ||
36 | u32 number; | ||
37 | u32 server; | ||
38 | u8 priority; | ||
39 | u8 saved_priority; | ||
40 | u8 resend; | ||
41 | u8 masked_pending; | ||
42 | u8 asserted; /* Only for LSI */ | ||
43 | u8 exists; | ||
44 | }; | ||
45 | |||
46 | /* Atomic ICP state, updated with a single compare & swap */ | ||
47 | union kvmppc_icp_state { | ||
48 | unsigned long raw; | ||
49 | struct { | ||
50 | u8 out_ee:1; | ||
51 | u8 need_resend:1; | ||
52 | u8 cppr; | ||
53 | u8 mfrr; | ||
54 | u8 pending_pri; | ||
55 | u32 xisr; | ||
56 | }; | ||
57 | }; | ||
58 | |||
59 | /* One bit per ICS */ | ||
60 | #define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1) | ||
61 | |||
62 | struct kvmppc_icp { | ||
63 | struct kvm_vcpu *vcpu; | ||
64 | unsigned long server_num; | ||
65 | union kvmppc_icp_state state; | ||
66 | unsigned long resend_map[ICP_RESEND_MAP_SIZE]; | ||
67 | |||
68 | /* Real mode might find something too hard, here's the action | ||
69 | * it might request from virtual mode | ||
70 | */ | ||
71 | #define XICS_RM_KICK_VCPU 0x1 | ||
72 | #define XICS_RM_CHECK_RESEND 0x2 | ||
73 | #define XICS_RM_REJECT 0x4 | ||
74 | u32 rm_action; | ||
75 | struct kvm_vcpu *rm_kick_target; | ||
76 | u32 rm_reject; | ||
77 | |||
78 | /* Debug stuff for real mode */ | ||
79 | union kvmppc_icp_state rm_dbgstate; | ||
80 | struct kvm_vcpu *rm_dbgtgt; | ||
81 | }; | ||
82 | |||
83 | struct kvmppc_ics { | ||
84 | struct mutex lock; | ||
85 | u16 icsid; | ||
86 | struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS]; | ||
87 | }; | ||
88 | |||
89 | struct kvmppc_xics { | ||
90 | struct kvm *kvm; | ||
91 | struct kvm_device *dev; | ||
92 | struct dentry *dentry; | ||
93 | u32 max_icsid; | ||
94 | bool real_mode; | ||
95 | bool real_mode_dbg; | ||
96 | struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1]; | ||
97 | }; | ||
98 | |||
99 | static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, | ||
100 | u32 nr) | ||
101 | { | ||
102 | struct kvm_vcpu *vcpu = NULL; | ||
103 | int i; | ||
104 | |||
105 | kvm_for_each_vcpu(i, vcpu, kvm) { | ||
106 | if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num) | ||
107 | return vcpu->arch.icp; | ||
108 | } | ||
109 | return NULL; | ||
110 | } | ||
111 | |||
112 | static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, | ||
113 | u32 irq, u16 *source) | ||
114 | { | ||
115 | u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; | ||
116 | u16 src = irq & KVMPPC_XICS_SRC_MASK; | ||
117 | struct kvmppc_ics *ics; | ||
118 | |||
119 | if (source) | ||
120 | *source = src; | ||
121 | if (icsid > KVMPPC_XICS_MAX_ICS_ID) | ||
122 | return NULL; | ||
123 | ics = xics->ics[icsid]; | ||
124 | if (!ics) | ||
125 | return NULL; | ||
126 | return ics; | ||
127 | } | ||
128 | |||
129 | |||
130 | #endif /* _KVM_PPC_BOOK3S_XICS_H */ | ||
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 020923e43134..1020119226db 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -222,8 +222,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | |||
222 | kvmppc_booke_queue_irqprio(vcpu, prio); | 222 | kvmppc_booke_queue_irqprio(vcpu, prio); |
223 | } | 223 | } |
224 | 224 | ||
225 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, | 225 | void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) |
226 | struct kvm_interrupt *irq) | ||
227 | { | 226 | { |
228 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); | 227 | clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); |
229 | clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); | 228 | clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); |
@@ -347,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
347 | keep_irq = true; | 346 | keep_irq = true; |
348 | } | 347 | } |
349 | 348 | ||
350 | if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) | 349 | if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) |
351 | update_epr = true; | 350 | update_epr = true; |
352 | 351 | ||
353 | switch (priority) { | 352 | switch (priority) { |
@@ -428,8 +427,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
428 | set_guest_esr(vcpu, vcpu->arch.queued_esr); | 427 | set_guest_esr(vcpu, vcpu->arch.queued_esr); |
429 | if (update_dear == true) | 428 | if (update_dear == true) |
430 | set_guest_dear(vcpu, vcpu->arch.queued_dear); | 429 | set_guest_dear(vcpu, vcpu->arch.queued_dear); |
431 | if (update_epr == true) | 430 | if (update_epr == true) { |
432 | kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); | 431 | if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) |
432 | kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); | ||
433 | else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) { | ||
434 | BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC); | ||
435 | kvmppc_mpic_set_epr(vcpu); | ||
436 | } | ||
437 | } | ||
433 | 438 | ||
434 | new_msr &= msr_mask; | 439 | new_msr &= msr_mask; |
435 | #if defined(CONFIG_64BIT) | 440 | #if defined(CONFIG_64BIT) |
@@ -746,6 +751,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
746 | kvmppc_core_queue_program(vcpu, ESR_PIL); | 751 | kvmppc_core_queue_program(vcpu, ESR_PIL); |
747 | return RESUME_HOST; | 752 | return RESUME_HOST; |
748 | 753 | ||
754 | case EMULATE_EXIT_USER: | ||
755 | return RESUME_HOST; | ||
756 | |||
749 | default: | 757 | default: |
750 | BUG(); | 758 | BUG(); |
751 | } | 759 | } |
@@ -1148,6 +1156,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
1148 | return r; | 1156 | return r; |
1149 | } | 1157 | } |
1150 | 1158 | ||
1159 | static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr) | ||
1160 | { | ||
1161 | u32 old_tsr = vcpu->arch.tsr; | ||
1162 | |||
1163 | vcpu->arch.tsr = new_tsr; | ||
1164 | |||
1165 | if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) | ||
1166 | arm_next_watchdog(vcpu); | ||
1167 | |||
1168 | update_timer_ints(vcpu); | ||
1169 | } | ||
1170 | |||
1151 | /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ | 1171 | /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ |
1152 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | 1172 | int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |
1153 | { | 1173 | { |
@@ -1287,16 +1307,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, | |||
1287 | kvmppc_emulate_dec(vcpu); | 1307 | kvmppc_emulate_dec(vcpu); |
1288 | } | 1308 | } |
1289 | 1309 | ||
1290 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { | 1310 | if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) |
1291 | u32 old_tsr = vcpu->arch.tsr; | 1311 | kvmppc_set_tsr(vcpu, sregs->u.e.tsr); |
1292 | |||
1293 | vcpu->arch.tsr = sregs->u.e.tsr; | ||
1294 | |||
1295 | if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) | ||
1296 | arm_next_watchdog(vcpu); | ||
1297 | |||
1298 | update_timer_ints(vcpu); | ||
1299 | } | ||
1300 | 1312 | ||
1301 | return 0; | 1313 | return 0; |
1302 | } | 1314 | } |
@@ -1409,84 +1421,134 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
1409 | 1421 | ||
1410 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1422 | int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) |
1411 | { | 1423 | { |
1412 | int r = -EINVAL; | 1424 | int r = 0; |
1425 | union kvmppc_one_reg val; | ||
1426 | int size; | ||
1427 | long int i; | ||
1428 | |||
1429 | size = one_reg_size(reg->id); | ||
1430 | if (size > sizeof(val)) | ||
1431 | return -EINVAL; | ||
1413 | 1432 | ||
1414 | switch (reg->id) { | 1433 | switch (reg->id) { |
1415 | case KVM_REG_PPC_IAC1: | 1434 | case KVM_REG_PPC_IAC1: |
1416 | case KVM_REG_PPC_IAC2: | 1435 | case KVM_REG_PPC_IAC2: |
1417 | case KVM_REG_PPC_IAC3: | 1436 | case KVM_REG_PPC_IAC3: |
1418 | case KVM_REG_PPC_IAC4: { | 1437 | case KVM_REG_PPC_IAC4: |
1419 | int iac = reg->id - KVM_REG_PPC_IAC1; | 1438 | i = reg->id - KVM_REG_PPC_IAC1; |
1420 | r = copy_to_user((u64 __user *)(long)reg->addr, | 1439 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]); |
1421 | &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); | ||
1422 | break; | 1440 | break; |
1423 | } | ||
1424 | case KVM_REG_PPC_DAC1: | 1441 | case KVM_REG_PPC_DAC1: |
1425 | case KVM_REG_PPC_DAC2: { | 1442 | case KVM_REG_PPC_DAC2: |
1426 | int dac = reg->id - KVM_REG_PPC_DAC1; | 1443 | i = reg->id - KVM_REG_PPC_DAC1; |
1427 | r = copy_to_user((u64 __user *)(long)reg->addr, | 1444 | val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]); |
1428 | &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); | ||
1429 | break; | 1445 | break; |
1430 | } | ||
1431 | case KVM_REG_PPC_EPR: { | 1446 | case KVM_REG_PPC_EPR: { |
1432 | u32 epr = get_guest_epr(vcpu); | 1447 | u32 epr = get_guest_epr(vcpu); |
1433 | r = put_user(epr, (u32 __user *)(long)reg->addr); | 1448 | val = get_reg_val(reg->id, epr); |
1434 | break; | 1449 | break; |
1435 | } | 1450 | } |
1436 | #if defined(CONFIG_64BIT) | 1451 | #if defined(CONFIG_64BIT) |
1437 | case KVM_REG_PPC_EPCR: | 1452 | case KVM_REG_PPC_EPCR: |
1438 | r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); | 1453 | val = get_reg_val(reg->id, vcpu->arch.epcr); |
1439 | break; | 1454 | break; |
1440 | #endif | 1455 | #endif |
1456 | case KVM_REG_PPC_TCR: | ||
1457 | val = get_reg_val(reg->id, vcpu->arch.tcr); | ||
1458 | break; | ||
1459 | case KVM_REG_PPC_TSR: | ||
1460 | val = get_reg_val(reg->id, vcpu->arch.tsr); | ||
1461 | break; | ||
1462 | case KVM_REG_PPC_DEBUG_INST: | ||
1463 | val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV); | ||
1464 | break; | ||
1441 | default: | 1465 | default: |
1466 | r = kvmppc_get_one_reg(vcpu, reg->id, &val); | ||
1442 | break; | 1467 | break; |
1443 | } | 1468 | } |
1469 | |||
1470 | if (r) | ||
1471 | return r; | ||
1472 | |||
1473 | if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) | ||
1474 | r = -EFAULT; | ||
1475 | |||
1444 | return r; | 1476 | return r; |
1445 | } | 1477 | } |
1446 | 1478 | ||
1447 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) | 1479 | int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) |
1448 | { | 1480 | { |
1449 | int r = -EINVAL; | 1481 | int r = 0; |
1482 | union kvmppc_one_reg val; | ||
1483 | int size; | ||
1484 | long int i; | ||
1485 | |||
1486 | size = one_reg_size(reg->id); | ||
1487 | if (size > sizeof(val)) | ||
1488 | return -EINVAL; | ||
1489 | |||
1490 | if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) | ||
1491 | return -EFAULT; | ||
1450 | 1492 | ||
1451 | switch (reg->id) { | 1493 | switch (reg->id) { |
1452 | case KVM_REG_PPC_IAC1: | 1494 | case KVM_REG_PPC_IAC1: |
1453 | case KVM_REG_PPC_IAC2: | 1495 | case KVM_REG_PPC_IAC2: |
1454 | case KVM_REG_PPC_IAC3: | 1496 | case KVM_REG_PPC_IAC3: |
1455 | case KVM_REG_PPC_IAC4: { | 1497 | case KVM_REG_PPC_IAC4: |
1456 | int iac = reg->id - KVM_REG_PPC_IAC1; | 1498 | i = reg->id - KVM_REG_PPC_IAC1; |
1457 | r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], | 1499 | vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val); |
1458 | (u64 __user *)(long)reg->addr, sizeof(u64)); | ||
1459 | break; | 1500 | break; |
1460 | } | ||
1461 | case KVM_REG_PPC_DAC1: | 1501 | case KVM_REG_PPC_DAC1: |
1462 | case KVM_REG_PPC_DAC2: { | 1502 | case KVM_REG_PPC_DAC2: |
1463 | int dac = reg->id - KVM_REG_PPC_DAC1; | 1503 | i = reg->id - KVM_REG_PPC_DAC1; |
1464 | r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], | 1504 | vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val); |
1465 | (u64 __user *)(long)reg->addr, sizeof(u64)); | ||
1466 | break; | 1505 | break; |
1467 | } | ||
1468 | case KVM_REG_PPC_EPR: { | 1506 | case KVM_REG_PPC_EPR: { |
1469 | u32 new_epr; | 1507 | u32 new_epr = set_reg_val(reg->id, val); |
1470 | r = get_user(new_epr, (u32 __user *)(long)reg->addr); | 1508 | kvmppc_set_epr(vcpu, new_epr); |
1471 | if (!r) | ||
1472 | kvmppc_set_epr(vcpu, new_epr); | ||
1473 | break; | 1509 | break; |
1474 | } | 1510 | } |
1475 | #if defined(CONFIG_64BIT) | 1511 | #if defined(CONFIG_64BIT) |
1476 | case KVM_REG_PPC_EPCR: { | 1512 | case KVM_REG_PPC_EPCR: { |
1477 | u32 new_epcr; | 1513 | u32 new_epcr = set_reg_val(reg->id, val); |
1478 | r = get_user(new_epcr, (u32 __user *)(long)reg->addr); | 1514 | kvmppc_set_epcr(vcpu, new_epcr); |
1479 | if (r == 0) | ||
1480 | kvmppc_set_epcr(vcpu, new_epcr); | ||
1481 | break; | 1515 | break; |
1482 | } | 1516 | } |
1483 | #endif | 1517 | #endif |
1518 | case KVM_REG_PPC_OR_TSR: { | ||
1519 | u32 tsr_bits = set_reg_val(reg->id, val); | ||
1520 | kvmppc_set_tsr_bits(vcpu, tsr_bits); | ||
1521 | break; | ||
1522 | } | ||
1523 | case KVM_REG_PPC_CLEAR_TSR: { | ||
1524 | u32 tsr_bits = set_reg_val(reg->id, val); | ||
1525 | kvmppc_clr_tsr_bits(vcpu, tsr_bits); | ||
1526 | break; | ||
1527 | } | ||
1528 | case KVM_REG_PPC_TSR: { | ||
1529 | u32 tsr = set_reg_val(reg->id, val); | ||
1530 | kvmppc_set_tsr(vcpu, tsr); | ||
1531 | break; | ||
1532 | } | ||
1533 | case KVM_REG_PPC_TCR: { | ||
1534 | u32 tcr = set_reg_val(reg->id, val); | ||
1535 | kvmppc_set_tcr(vcpu, tcr); | ||
1536 | break; | ||
1537 | } | ||
1484 | default: | 1538 | default: |
1539 | r = kvmppc_set_one_reg(vcpu, reg->id, &val); | ||
1485 | break; | 1540 | break; |
1486 | } | 1541 | } |
1542 | |||
1487 | return r; | 1543 | return r; |
1488 | } | 1544 | } |
1489 | 1545 | ||
1546 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
1547 | struct kvm_guest_debug *dbg) | ||
1548 | { | ||
1549 | return -EINVAL; | ||
1550 | } | ||
1551 | |||
1490 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 1552 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
1491 | { | 1553 | { |
1492 | return -ENOTSUPP; | 1554 | return -ENOTSUPP; |
@@ -1531,7 +1593,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, | |||
1531 | 1593 | ||
1532 | void kvmppc_core_commit_memory_region(struct kvm *kvm, | 1594 | void kvmppc_core_commit_memory_region(struct kvm *kvm, |
1533 | struct kvm_userspace_memory_region *mem, | 1595 | struct kvm_userspace_memory_region *mem, |
1534 | struct kvm_memory_slot old) | 1596 | const struct kvm_memory_slot *old) |
1535 | { | 1597 | { |
1536 | } | 1598 | } |
1537 | 1599 | ||
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index f4bb55c96517..2c6deb5ef2fe 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
@@ -54,8 +54,7 @@ | |||
54 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ | 54 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ |
55 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) | 55 | (1<<BOOKE_INTERRUPT_ALIGNMENT)) |
56 | 56 | ||
57 | .macro KVM_HANDLER ivor_nr scratch srr0 | 57 | .macro __KVM_HANDLER ivor_nr scratch srr0 |
58 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
59 | /* Get pointer to vcpu and record exit number. */ | 58 | /* Get pointer to vcpu and record exit number. */ |
60 | mtspr \scratch , r4 | 59 | mtspr \scratch , r4 |
61 | mfspr r4, SPRN_SPRG_THREAD | 60 | mfspr r4, SPRN_SPRG_THREAD |
@@ -76,6 +75,43 @@ _GLOBAL(kvmppc_handler_\ivor_nr) | |||
76 | bctr | 75 | bctr |
77 | .endm | 76 | .endm |
78 | 77 | ||
78 | .macro KVM_HANDLER ivor_nr scratch srr0 | ||
79 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
80 | __KVM_HANDLER \ivor_nr \scratch \srr0 | ||
81 | .endm | ||
82 | |||
83 | .macro KVM_DBG_HANDLER ivor_nr scratch srr0 | ||
84 | _GLOBAL(kvmppc_handler_\ivor_nr) | ||
85 | mtspr \scratch, r4 | ||
86 | mfspr r4, SPRN_SPRG_THREAD | ||
87 | lwz r4, THREAD_KVM_VCPU(r4) | ||
88 | stw r3, VCPU_CRIT_SAVE(r4) | ||
89 | mfcr r3 | ||
90 | mfspr r4, SPRN_CSRR1 | ||
91 | andi. r4, r4, MSR_PR | ||
92 | bne 1f | ||
93 | /* debug interrupt happened in enter/exit path */ | ||
94 | mfspr r4, SPRN_CSRR1 | ||
95 | rlwinm r4, r4, 0, ~MSR_DE | ||
96 | mtspr SPRN_CSRR1, r4 | ||
97 | lis r4, 0xffff | ||
98 | ori r4, r4, 0xffff | ||
99 | mtspr SPRN_DBSR, r4 | ||
100 | mfspr r4, SPRN_SPRG_THREAD | ||
101 | lwz r4, THREAD_KVM_VCPU(r4) | ||
102 | mtcr r3 | ||
103 | lwz r3, VCPU_CRIT_SAVE(r4) | ||
104 | mfspr r4, \scratch | ||
105 | rfci | ||
106 | 1: /* debug interrupt happened in guest */ | ||
107 | mtcr r3 | ||
108 | mfspr r4, SPRN_SPRG_THREAD | ||
109 | lwz r4, THREAD_KVM_VCPU(r4) | ||
110 | lwz r3, VCPU_CRIT_SAVE(r4) | ||
111 | mfspr r4, \scratch | ||
112 | __KVM_HANDLER \ivor_nr \scratch \srr0 | ||
113 | .endm | ||
114 | |||
79 | .macro KVM_HANDLER_ADDR ivor_nr | 115 | .macro KVM_HANDLER_ADDR ivor_nr |
80 | .long kvmppc_handler_\ivor_nr | 116 | .long kvmppc_handler_\ivor_nr |
81 | .endm | 117 | .endm |
@@ -100,7 +136,7 @@ KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | |||
100 | KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | 136 | KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 |
101 | KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 137 | KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
102 | KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 138 | KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
103 | KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 | 139 | KVM_DBG_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 |
104 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 140 | KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
105 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 141 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
106 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 | 142 | KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 6dd4de7802bf..ce6b73c29612 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -425,6 +425,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
425 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 425 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
426 | } | 426 | } |
427 | 427 | ||
428 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
429 | union kvmppc_one_reg *val) | ||
430 | { | ||
431 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
432 | return r; | ||
433 | } | ||
434 | |||
435 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
436 | union kvmppc_one_reg *val) | ||
437 | { | ||
438 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
439 | return r; | ||
440 | } | ||
441 | |||
428 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 442 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
429 | { | 443 | { |
430 | struct kvmppc_vcpu_e500 *vcpu_e500; | 444 | struct kvmppc_vcpu_e500 *vcpu_e500; |
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 33db48a8ce24..c2e5e98453a6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h | |||
@@ -23,6 +23,10 @@ | |||
23 | #include <asm/mmu-book3e.h> | 23 | #include <asm/mmu-book3e.h> |
24 | #include <asm/tlb.h> | 24 | #include <asm/tlb.h> |
25 | 25 | ||
26 | enum vcpu_ftr { | ||
27 | VCPU_FTR_MMU_V2 | ||
28 | }; | ||
29 | |||
26 | #define E500_PID_NUM 3 | 30 | #define E500_PID_NUM 3 |
27 | #define E500_TLB_NUM 2 | 31 | #define E500_TLB_NUM 2 |
28 | 32 | ||
@@ -131,6 +135,10 @@ void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); | |||
131 | void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 135 | void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
132 | int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); | 136 | int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); |
133 | 137 | ||
138 | int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
139 | union kvmppc_one_reg *val); | ||
140 | int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
141 | union kvmppc_one_reg *val); | ||
134 | 142 | ||
135 | #ifdef CONFIG_KVM_E500V2 | 143 | #ifdef CONFIG_KVM_E500V2 |
136 | unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, | 144 | unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, |
@@ -295,4 +303,18 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) | |||
295 | #define get_tlb_sts(gtlbe) (MAS1_TS) | 303 | #define get_tlb_sts(gtlbe) (MAS1_TS) |
296 | #endif /* !BOOKE_HV */ | 304 | #endif /* !BOOKE_HV */ |
297 | 305 | ||
306 | static inline bool has_feature(const struct kvm_vcpu *vcpu, | ||
307 | enum vcpu_ftr ftr) | ||
308 | { | ||
309 | bool has_ftr; | ||
310 | switch (ftr) { | ||
311 | case VCPU_FTR_MMU_V2: | ||
312 | has_ftr = ((vcpu->arch.mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2); | ||
313 | break; | ||
314 | default: | ||
315 | return false; | ||
316 | } | ||
317 | return has_ftr; | ||
318 | } | ||
319 | |||
298 | #endif /* KVM_E500_H */ | 320 | #endif /* KVM_E500_H */ |
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353a836a..b10a01243abd 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -284,6 +284,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
284 | case SPRN_TLB1CFG: | 284 | case SPRN_TLB1CFG: |
285 | *spr_val = vcpu->arch.tlbcfg[1]; | 285 | *spr_val = vcpu->arch.tlbcfg[1]; |
286 | break; | 286 | break; |
287 | case SPRN_TLB0PS: | ||
288 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
289 | return EMULATE_FAIL; | ||
290 | *spr_val = vcpu->arch.tlbps[0]; | ||
291 | break; | ||
292 | case SPRN_TLB1PS: | ||
293 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
294 | return EMULATE_FAIL; | ||
295 | *spr_val = vcpu->arch.tlbps[1]; | ||
296 | break; | ||
287 | case SPRN_L1CSR0: | 297 | case SPRN_L1CSR0: |
288 | *spr_val = vcpu_e500->l1csr0; | 298 | *spr_val = vcpu_e500->l1csr0; |
289 | break; | 299 | break; |
@@ -307,6 +317,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) | |||
307 | case SPRN_MMUCFG: | 317 | case SPRN_MMUCFG: |
308 | *spr_val = vcpu->arch.mmucfg; | 318 | *spr_val = vcpu->arch.mmucfg; |
309 | break; | 319 | break; |
320 | case SPRN_EPTCFG: | ||
321 | if (!has_feature(vcpu, VCPU_FTR_MMU_V2)) | ||
322 | return EMULATE_FAIL; | ||
323 | /* | ||
324 | * Legacy Linux guests access EPTCFG register even if the E.PT | ||
325 | * category is disabled in the VM. Give them a chance to live. | ||
326 | */ | ||
327 | *spr_val = vcpu->arch.eptcfg; | ||
328 | break; | ||
310 | 329 | ||
311 | /* extra exceptions */ | 330 | /* extra exceptions */ |
312 | case SPRN_IVOR32: | 331 | case SPRN_IVOR32: |
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 5c4475983f78..c41a5a96b558 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c | |||
@@ -596,6 +596,140 @@ int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
596 | return 0; | 596 | return 0; |
597 | } | 597 | } |
598 | 598 | ||
599 | int kvmppc_get_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
600 | union kvmppc_one_reg *val) | ||
601 | { | ||
602 | int r = 0; | ||
603 | long int i; | ||
604 | |||
605 | switch (id) { | ||
606 | case KVM_REG_PPC_MAS0: | ||
607 | *val = get_reg_val(id, vcpu->arch.shared->mas0); | ||
608 | break; | ||
609 | case KVM_REG_PPC_MAS1: | ||
610 | *val = get_reg_val(id, vcpu->arch.shared->mas1); | ||
611 | break; | ||
612 | case KVM_REG_PPC_MAS2: | ||
613 | *val = get_reg_val(id, vcpu->arch.shared->mas2); | ||
614 | break; | ||
615 | case KVM_REG_PPC_MAS7_3: | ||
616 | *val = get_reg_val(id, vcpu->arch.shared->mas7_3); | ||
617 | break; | ||
618 | case KVM_REG_PPC_MAS4: | ||
619 | *val = get_reg_val(id, vcpu->arch.shared->mas4); | ||
620 | break; | ||
621 | case KVM_REG_PPC_MAS6: | ||
622 | *val = get_reg_val(id, vcpu->arch.shared->mas6); | ||
623 | break; | ||
624 | case KVM_REG_PPC_MMUCFG: | ||
625 | *val = get_reg_val(id, vcpu->arch.mmucfg); | ||
626 | break; | ||
627 | case KVM_REG_PPC_EPTCFG: | ||
628 | *val = get_reg_val(id, vcpu->arch.eptcfg); | ||
629 | break; | ||
630 | case KVM_REG_PPC_TLB0CFG: | ||
631 | case KVM_REG_PPC_TLB1CFG: | ||
632 | case KVM_REG_PPC_TLB2CFG: | ||
633 | case KVM_REG_PPC_TLB3CFG: | ||
634 | i = id - KVM_REG_PPC_TLB0CFG; | ||
635 | *val = get_reg_val(id, vcpu->arch.tlbcfg[i]); | ||
636 | break; | ||
637 | case KVM_REG_PPC_TLB0PS: | ||
638 | case KVM_REG_PPC_TLB1PS: | ||
639 | case KVM_REG_PPC_TLB2PS: | ||
640 | case KVM_REG_PPC_TLB3PS: | ||
641 | i = id - KVM_REG_PPC_TLB0PS; | ||
642 | *val = get_reg_val(id, vcpu->arch.tlbps[i]); | ||
643 | break; | ||
644 | default: | ||
645 | r = -EINVAL; | ||
646 | break; | ||
647 | } | ||
648 | |||
649 | return r; | ||
650 | } | ||
651 | |||
652 | int kvmppc_set_one_reg_e500_tlb(struct kvm_vcpu *vcpu, u64 id, | ||
653 | union kvmppc_one_reg *val) | ||
654 | { | ||
655 | int r = 0; | ||
656 | long int i; | ||
657 | |||
658 | switch (id) { | ||
659 | case KVM_REG_PPC_MAS0: | ||
660 | vcpu->arch.shared->mas0 = set_reg_val(id, *val); | ||
661 | break; | ||
662 | case KVM_REG_PPC_MAS1: | ||
663 | vcpu->arch.shared->mas1 = set_reg_val(id, *val); | ||
664 | break; | ||
665 | case KVM_REG_PPC_MAS2: | ||
666 | vcpu->arch.shared->mas2 = set_reg_val(id, *val); | ||
667 | break; | ||
668 | case KVM_REG_PPC_MAS7_3: | ||
669 | vcpu->arch.shared->mas7_3 = set_reg_val(id, *val); | ||
670 | break; | ||
671 | case KVM_REG_PPC_MAS4: | ||
672 | vcpu->arch.shared->mas4 = set_reg_val(id, *val); | ||
673 | break; | ||
674 | case KVM_REG_PPC_MAS6: | ||
675 | vcpu->arch.shared->mas6 = set_reg_val(id, *val); | ||
676 | break; | ||
677 | /* Only allow MMU registers to be set to the config supported by KVM */ | ||
678 | case KVM_REG_PPC_MMUCFG: { | ||
679 | u32 reg = set_reg_val(id, *val); | ||
680 | if (reg != vcpu->arch.mmucfg) | ||
681 | r = -EINVAL; | ||
682 | break; | ||
683 | } | ||
684 | case KVM_REG_PPC_EPTCFG: { | ||
685 | u32 reg = set_reg_val(id, *val); | ||
686 | if (reg != vcpu->arch.eptcfg) | ||
687 | r = -EINVAL; | ||
688 | break; | ||
689 | } | ||
690 | case KVM_REG_PPC_TLB0CFG: | ||
691 | case KVM_REG_PPC_TLB1CFG: | ||
692 | case KVM_REG_PPC_TLB2CFG: | ||
693 | case KVM_REG_PPC_TLB3CFG: { | ||
694 | /* MMU geometry (N_ENTRY/ASSOC) can be set only using SW_TLB */ | ||
695 | u32 reg = set_reg_val(id, *val); | ||
696 | i = id - KVM_REG_PPC_TLB0CFG; | ||
697 | if (reg != vcpu->arch.tlbcfg[i]) | ||
698 | r = -EINVAL; | ||
699 | break; | ||
700 | } | ||
701 | case KVM_REG_PPC_TLB0PS: | ||
702 | case KVM_REG_PPC_TLB1PS: | ||
703 | case KVM_REG_PPC_TLB2PS: | ||
704 | case KVM_REG_PPC_TLB3PS: { | ||
705 | u32 reg = set_reg_val(id, *val); | ||
706 | i = id - KVM_REG_PPC_TLB0PS; | ||
707 | if (reg != vcpu->arch.tlbps[i]) | ||
708 | r = -EINVAL; | ||
709 | break; | ||
710 | } | ||
711 | default: | ||
712 | r = -EINVAL; | ||
713 | break; | ||
714 | } | ||
715 | |||
716 | return r; | ||
717 | } | ||
718 | |||
719 | static int vcpu_mmu_geometry_update(struct kvm_vcpu *vcpu, | ||
720 | struct kvm_book3e_206_tlb_params *params) | ||
721 | { | ||
722 | vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
723 | if (params->tlb_sizes[0] <= 2048) | ||
724 | vcpu->arch.tlbcfg[0] |= params->tlb_sizes[0]; | ||
725 | vcpu->arch.tlbcfg[0] |= params->tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; | ||
726 | |||
727 | vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
728 | vcpu->arch.tlbcfg[1] |= params->tlb_sizes[1]; | ||
729 | vcpu->arch.tlbcfg[1] |= params->tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; | ||
730 | return 0; | ||
731 | } | ||
732 | |||
599 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | 733 | int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, |
600 | struct kvm_config_tlb *cfg) | 734 | struct kvm_config_tlb *cfg) |
601 | { | 735 | { |
@@ -692,16 +826,8 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, | |||
692 | vcpu_e500->gtlb_offset[0] = 0; | 826 | vcpu_e500->gtlb_offset[0] = 0; |
693 | vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; | 827 | vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; |
694 | 828 | ||
695 | vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; | 829 | /* Update vcpu's MMU geometry based on SW_TLB input */ |
696 | 830 | vcpu_mmu_geometry_update(vcpu, ¶ms); | |
697 | vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
698 | if (params.tlb_sizes[0] <= 2048) | ||
699 | vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; | ||
700 | vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; | ||
701 | |||
702 | vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
703 | vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; | ||
704 | vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; | ||
705 | 831 | ||
706 | vcpu_e500->shared_tlb_pages = pages; | 832 | vcpu_e500->shared_tlb_pages = pages; |
707 | vcpu_e500->num_shared_tlb_pages = num_pages; | 833 | vcpu_e500->num_shared_tlb_pages = num_pages; |
@@ -737,6 +863,39 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, | |||
737 | return 0; | 863 | return 0; |
738 | } | 864 | } |
739 | 865 | ||
866 | /* Vcpu's MMU default configuration */ | ||
867 | static int vcpu_mmu_init(struct kvm_vcpu *vcpu, | ||
868 | struct kvmppc_e500_tlb_params *params) | ||
869 | { | ||
870 | /* Initialize RASIZE, PIDSIZE, NTLBS and MAVN fields with host values*/ | ||
871 | vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; | ||
872 | |||
873 | /* Initialize TLBnCFG fields with host values and SW_TLB geometry*/ | ||
874 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & | ||
875 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
876 | vcpu->arch.tlbcfg[0] |= params[0].entries; | ||
877 | vcpu->arch.tlbcfg[0] |= params[0].ways << TLBnCFG_ASSOC_SHIFT; | ||
878 | |||
879 | vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & | ||
880 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
881 | vcpu->arch.tlbcfg[1] |= params[1].entries; | ||
882 | vcpu->arch.tlbcfg[1] |= params[1].ways << TLBnCFG_ASSOC_SHIFT; | ||
883 | |||
884 | if (has_feature(vcpu, VCPU_FTR_MMU_V2)) { | ||
885 | vcpu->arch.tlbps[0] = mfspr(SPRN_TLB0PS); | ||
886 | vcpu->arch.tlbps[1] = mfspr(SPRN_TLB1PS); | ||
887 | |||
888 | vcpu->arch.mmucfg &= ~MMUCFG_LRAT; | ||
889 | |||
890 | /* Guest mmu emulation currently doesn't handle E.PT */ | ||
891 | vcpu->arch.eptcfg = 0; | ||
892 | vcpu->arch.tlbcfg[0] &= ~TLBnCFG_PT; | ||
893 | vcpu->arch.tlbcfg[1] &= ~TLBnCFG_IND; | ||
894 | } | ||
895 | |||
896 | return 0; | ||
897 | } | ||
898 | |||
740 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | 899 | int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) |
741 | { | 900 | { |
742 | struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; | 901 | struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; |
@@ -781,18 +940,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
781 | if (!vcpu_e500->g2h_tlb1_map) | 940 | if (!vcpu_e500->g2h_tlb1_map) |
782 | goto err; | 941 | goto err; |
783 | 942 | ||
784 | /* Init TLB configuration register */ | 943 | vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); |
785 | vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & | ||
786 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
787 | vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; | ||
788 | vcpu->arch.tlbcfg[0] |= | ||
789 | vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; | ||
790 | |||
791 | vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & | ||
792 | ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); | ||
793 | vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; | ||
794 | vcpu->arch.tlbcfg[1] |= | ||
795 | vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; | ||
796 | 944 | ||
797 | kvmppc_recalc_tlb1map_range(vcpu_e500); | 945 | kvmppc_recalc_tlb1map_range(vcpu_e500); |
798 | return 0; | 946 | return 0; |
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index 2f4baa074b2e..753cc99eff2b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c | |||
@@ -177,6 +177,8 @@ int kvmppc_core_check_processor_compat(void) | |||
177 | r = 0; | 177 | r = 0; |
178 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) | 178 | else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) |
179 | r = 0; | 179 | r = 0; |
180 | else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0) | ||
181 | r = 0; | ||
180 | else | 182 | else |
181 | r = -ENOTSUPP; | 183 | r = -ENOTSUPP; |
182 | 184 | ||
@@ -260,6 +262,20 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
260 | return kvmppc_set_sregs_ivor(vcpu, sregs); | 262 | return kvmppc_set_sregs_ivor(vcpu, sregs); |
261 | } | 263 | } |
262 | 264 | ||
265 | int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
266 | union kvmppc_one_reg *val) | ||
267 | { | ||
268 | int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val); | ||
269 | return r; | ||
270 | } | ||
271 | |||
272 | int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, | ||
273 | union kvmppc_one_reg *val) | ||
274 | { | ||
275 | int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val); | ||
276 | return r; | ||
277 | } | ||
278 | |||
263 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | 279 | struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) |
264 | { | 280 | { |
265 | struct kvmppc_vcpu_e500 *vcpu_e500; | 281 | struct kvmppc_vcpu_e500 *vcpu_e500; |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 7a73b6f72a8b..631a2650e4e4 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -38,6 +38,7 @@ | |||
38 | 38 | ||
39 | #define OP_31_XOP_TRAP 4 | 39 | #define OP_31_XOP_TRAP 4 |
40 | #define OP_31_XOP_LWZX 23 | 40 | #define OP_31_XOP_LWZX 23 |
41 | #define OP_31_XOP_DCBST 54 | ||
41 | #define OP_31_XOP_TRAP_64 68 | 42 | #define OP_31_XOP_TRAP_64 68 |
42 | #define OP_31_XOP_DCBF 86 | 43 | #define OP_31_XOP_DCBF 86 |
43 | #define OP_31_XOP_LBZX 87 | 44 | #define OP_31_XOP_LBZX 87 |
@@ -370,6 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
370 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); | 371 | emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); |
371 | break; | 372 | break; |
372 | 373 | ||
374 | case OP_31_XOP_DCBST: | ||
373 | case OP_31_XOP_DCBF: | 375 | case OP_31_XOP_DCBF: |
374 | case OP_31_XOP_DCBI: | 376 | case OP_31_XOP_DCBI: |
375 | /* Do nothing. The guest is performing dcbi because | 377 | /* Do nothing. The guest is performing dcbi because |
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000000000000..5a9a10b90762 --- /dev/null +++ b/arch/powerpc/kvm/irq.h | |||
@@ -0,0 +1,20 @@ | |||
1 | #ifndef __IRQ_H | ||
2 | #define __IRQ_H | ||
3 | |||
4 | #include <linux/kvm_host.h> | ||
5 | |||
6 | static inline int irqchip_in_kernel(struct kvm *kvm) | ||
7 | { | ||
8 | int ret = 0; | ||
9 | |||
10 | #ifdef CONFIG_KVM_MPIC | ||
11 | ret = ret || (kvm->arch.mpic != NULL); | ||
12 | #endif | ||
13 | #ifdef CONFIG_KVM_XICS | ||
14 | ret = ret || (kvm->arch.xics != NULL); | ||
15 | #endif | ||
16 | smp_rmb(); | ||
17 | return ret; | ||
18 | } | ||
19 | |||
20 | #endif | ||
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c new file mode 100644 index 000000000000..2861ae9eaae6 --- /dev/null +++ b/arch/powerpc/kvm/mpic.c | |||
@@ -0,0 +1,1853 @@ | |||
1 | /* | ||
2 | * OpenPIC emulation | ||
3 | * | ||
4 | * Copyright (c) 2004 Jocelyn Mayer | ||
5 | * 2011 Alexander Graf | ||
6 | * | ||
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
8 | * of this software and associated documentation files (the "Software"), to deal | ||
9 | * in the Software without restriction, including without limitation the rights | ||
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
11 | * copies of the Software, and to permit persons to whom the Software is | ||
12 | * furnished to do so, subject to the following conditions: | ||
13 | * | ||
14 | * The above copyright notice and this permission notice shall be included in | ||
15 | * all copies or substantial portions of the Software. | ||
16 | * | ||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
23 | * THE SOFTWARE. | ||
24 | */ | ||
25 | |||
26 | #include <linux/slab.h> | ||
27 | #include <linux/mutex.h> | ||
28 | #include <linux/kvm_host.h> | ||
29 | #include <linux/errno.h> | ||
30 | #include <linux/fs.h> | ||
31 | #include <linux/anon_inodes.h> | ||
32 | #include <asm/uaccess.h> | ||
33 | #include <asm/mpic.h> | ||
34 | #include <asm/kvm_para.h> | ||
35 | #include <asm/kvm_host.h> | ||
36 | #include <asm/kvm_ppc.h> | ||
37 | #include "iodev.h" | ||
38 | |||
39 | #define MAX_CPU 32 | ||
40 | #define MAX_SRC 256 | ||
41 | #define MAX_TMR 4 | ||
42 | #define MAX_IPI 4 | ||
43 | #define MAX_MSI 8 | ||
44 | #define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) | ||
45 | #define VID 0x03 /* MPIC version ID */ | ||
46 | |||
47 | /* OpenPIC capability flags */ | ||
48 | #define OPENPIC_FLAG_IDR_CRIT (1 << 0) | ||
49 | #define OPENPIC_FLAG_ILR (2 << 0) | ||
50 | |||
51 | /* OpenPIC address map */ | ||
52 | #define OPENPIC_REG_SIZE 0x40000 | ||
53 | #define OPENPIC_GLB_REG_START 0x0 | ||
54 | #define OPENPIC_GLB_REG_SIZE 0x10F0 | ||
55 | #define OPENPIC_TMR_REG_START 0x10F0 | ||
56 | #define OPENPIC_TMR_REG_SIZE 0x220 | ||
57 | #define OPENPIC_MSI_REG_START 0x1600 | ||
58 | #define OPENPIC_MSI_REG_SIZE 0x200 | ||
59 | #define OPENPIC_SUMMARY_REG_START 0x3800 | ||
60 | #define OPENPIC_SUMMARY_REG_SIZE 0x800 | ||
61 | #define OPENPIC_SRC_REG_START 0x10000 | ||
62 | #define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) | ||
63 | #define OPENPIC_CPU_REG_START 0x20000 | ||
64 | #define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000)) | ||
65 | |||
66 | struct fsl_mpic_info { | ||
67 | int max_ext; | ||
68 | }; | ||
69 | |||
70 | static struct fsl_mpic_info fsl_mpic_20 = { | ||
71 | .max_ext = 12, | ||
72 | }; | ||
73 | |||
74 | static struct fsl_mpic_info fsl_mpic_42 = { | ||
75 | .max_ext = 12, | ||
76 | }; | ||
77 | |||
78 | #define FRR_NIRQ_SHIFT 16 | ||
79 | #define FRR_NCPU_SHIFT 8 | ||
80 | #define FRR_VID_SHIFT 0 | ||
81 | |||
82 | #define VID_REVISION_1_2 2 | ||
83 | #define VID_REVISION_1_3 3 | ||
84 | |||
85 | #define VIR_GENERIC 0x00000000 /* Generic Vendor ID */ | ||
86 | |||
87 | #define GCR_RESET 0x80000000 | ||
88 | #define GCR_MODE_PASS 0x00000000 | ||
89 | #define GCR_MODE_MIXED 0x20000000 | ||
90 | #define GCR_MODE_PROXY 0x60000000 | ||
91 | |||
92 | #define TBCR_CI 0x80000000 /* count inhibit */ | ||
93 | #define TCCR_TOG 0x80000000 /* toggles when decrement to zero */ | ||
94 | |||
95 | #define IDR_EP_SHIFT 31 | ||
96 | #define IDR_EP_MASK (1 << IDR_EP_SHIFT) | ||
97 | #define IDR_CI0_SHIFT 30 | ||
98 | #define IDR_CI1_SHIFT 29 | ||
99 | #define IDR_P1_SHIFT 1 | ||
100 | #define IDR_P0_SHIFT 0 | ||
101 | |||
102 | #define ILR_INTTGT_MASK 0x000000ff | ||
103 | #define ILR_INTTGT_INT 0x00 | ||
104 | #define ILR_INTTGT_CINT 0x01 /* critical */ | ||
105 | #define ILR_INTTGT_MCP 0x02 /* machine check */ | ||
106 | #define NUM_OUTPUTS 3 | ||
107 | |||
108 | #define MSIIR_OFFSET 0x140 | ||
109 | #define MSIIR_SRS_SHIFT 29 | ||
110 | #define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT) | ||
111 | #define MSIIR_IBS_SHIFT 24 | ||
112 | #define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT) | ||
113 | |||
114 | static int get_current_cpu(void) | ||
115 | { | ||
116 | #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) | ||
117 | struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; | ||
118 | return vcpu ? vcpu->arch.irq_cpu_id : -1; | ||
119 | #else | ||
120 | /* XXX */ | ||
121 | return -1; | ||
122 | #endif | ||
123 | } | ||
124 | |||
125 | static int openpic_cpu_write_internal(void *opaque, gpa_t addr, | ||
126 | u32 val, int idx); | ||
127 | static int openpic_cpu_read_internal(void *opaque, gpa_t addr, | ||
128 | u32 *ptr, int idx); | ||
129 | |||
130 | enum irq_type { | ||
131 | IRQ_TYPE_NORMAL = 0, | ||
132 | IRQ_TYPE_FSLINT, /* FSL internal interrupt -- level only */ | ||
133 | IRQ_TYPE_FSLSPECIAL, /* FSL timer/IPI interrupt, edge, no polarity */ | ||
134 | }; | ||
135 | |||
136 | struct irq_queue { | ||
137 | /* Round up to the nearest 64 IRQs so that the queue length | ||
138 | * won't change when moving between 32 and 64 bit hosts. | ||
139 | */ | ||
140 | unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)]; | ||
141 | int next; | ||
142 | int priority; | ||
143 | }; | ||
144 | |||
145 | struct irq_source { | ||
146 | uint32_t ivpr; /* IRQ vector/priority register */ | ||
147 | uint32_t idr; /* IRQ destination register */ | ||
148 | uint32_t destmask; /* bitmap of CPU destinations */ | ||
149 | int last_cpu; | ||
150 | int output; /* IRQ level, e.g. ILR_INTTGT_INT */ | ||
151 | int pending; /* TRUE if IRQ is pending */ | ||
152 | enum irq_type type; | ||
153 | bool level:1; /* level-triggered */ | ||
154 | bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ | ||
155 | }; | ||
156 | |||
157 | #define IVPR_MASK_SHIFT 31 | ||
158 | #define IVPR_MASK_MASK (1 << IVPR_MASK_SHIFT) | ||
159 | #define IVPR_ACTIVITY_SHIFT 30 | ||
160 | #define IVPR_ACTIVITY_MASK (1 << IVPR_ACTIVITY_SHIFT) | ||
161 | #define IVPR_MODE_SHIFT 29 | ||
162 | #define IVPR_MODE_MASK (1 << IVPR_MODE_SHIFT) | ||
163 | #define IVPR_POLARITY_SHIFT 23 | ||
164 | #define IVPR_POLARITY_MASK (1 << IVPR_POLARITY_SHIFT) | ||
165 | #define IVPR_SENSE_SHIFT 22 | ||
166 | #define IVPR_SENSE_MASK (1 << IVPR_SENSE_SHIFT) | ||
167 | |||
168 | #define IVPR_PRIORITY_MASK (0xF << 16) | ||
169 | #define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16)) | ||
170 | #define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask) | ||
171 | |||
172 | /* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */ | ||
173 | #define IDR_EP 0x80000000 /* external pin */ | ||
174 | #define IDR_CI 0x40000000 /* critical interrupt */ | ||
175 | |||
176 | struct irq_dest { | ||
177 | struct kvm_vcpu *vcpu; | ||
178 | |||
179 | int32_t ctpr; /* CPU current task priority */ | ||
180 | struct irq_queue raised; | ||
181 | struct irq_queue servicing; | ||
182 | |||
183 | /* Count of IRQ sources asserting on non-INT outputs */ | ||
184 | uint32_t outputs_active[NUM_OUTPUTS]; | ||
185 | }; | ||
186 | |||
187 | #define MAX_MMIO_REGIONS 10 | ||
188 | |||
189 | struct openpic { | ||
190 | struct kvm *kvm; | ||
191 | struct kvm_device *dev; | ||
192 | struct kvm_io_device mmio; | ||
193 | const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS]; | ||
194 | int num_mmio_regions; | ||
195 | |||
196 | gpa_t reg_base; | ||
197 | spinlock_t lock; | ||
198 | |||
199 | /* Behavior control */ | ||
200 | struct fsl_mpic_info *fsl; | ||
201 | uint32_t model; | ||
202 | uint32_t flags; | ||
203 | uint32_t nb_irqs; | ||
204 | uint32_t vid; | ||
205 | uint32_t vir; /* Vendor identification register */ | ||
206 | uint32_t vector_mask; | ||
207 | uint32_t tfrr_reset; | ||
208 | uint32_t ivpr_reset; | ||
209 | uint32_t idr_reset; | ||
210 | uint32_t brr1; | ||
211 | uint32_t mpic_mode_mask; | ||
212 | |||
213 | /* Global registers */ | ||
214 | uint32_t frr; /* Feature reporting register */ | ||
215 | uint32_t gcr; /* Global configuration register */ | ||
216 | uint32_t pir; /* Processor initialization register */ | ||
217 | uint32_t spve; /* Spurious vector register */ | ||
218 | uint32_t tfrr; /* Timer frequency reporting register */ | ||
219 | /* Source registers */ | ||
220 | struct irq_source src[MAX_IRQ]; | ||
221 | /* Local registers per output pin */ | ||
222 | struct irq_dest dst[MAX_CPU]; | ||
223 | uint32_t nb_cpus; | ||
224 | /* Timer registers */ | ||
225 | struct { | ||
226 | uint32_t tccr; /* Global timer current count register */ | ||
227 | uint32_t tbcr; /* Global timer base count register */ | ||
228 | } timers[MAX_TMR]; | ||
229 | /* Shared MSI registers */ | ||
230 | struct { | ||
231 | uint32_t msir; /* Shared Message Signaled Interrupt Register */ | ||
232 | } msi[MAX_MSI]; | ||
233 | uint32_t max_irq; | ||
234 | uint32_t irq_ipi0; | ||
235 | uint32_t irq_tim0; | ||
236 | uint32_t irq_msi; | ||
237 | }; | ||
238 | |||
239 | |||
240 | static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, | ||
241 | int output) | ||
242 | { | ||
243 | struct kvm_interrupt irq = { | ||
244 | .irq = KVM_INTERRUPT_SET_LEVEL, | ||
245 | }; | ||
246 | |||
247 | if (!dst->vcpu) { | ||
248 | pr_debug("%s: destination cpu %d does not exist\n", | ||
249 | __func__, (int)(dst - &opp->dst[0])); | ||
250 | return; | ||
251 | } | ||
252 | |||
253 | pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, | ||
254 | output); | ||
255 | |||
256 | if (output != ILR_INTTGT_INT) /* TODO */ | ||
257 | return; | ||
258 | |||
259 | kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); | ||
260 | } | ||
261 | |||
262 | static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, | ||
263 | int output) | ||
264 | { | ||
265 | if (!dst->vcpu) { | ||
266 | pr_debug("%s: destination cpu %d does not exist\n", | ||
267 | __func__, (int)(dst - &opp->dst[0])); | ||
268 | return; | ||
269 | } | ||
270 | |||
271 | pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id, | ||
272 | output); | ||
273 | |||
274 | if (output != ILR_INTTGT_INT) /* TODO */ | ||
275 | return; | ||
276 | |||
277 | kvmppc_core_dequeue_external(dst->vcpu); | ||
278 | } | ||
279 | |||
280 | static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) | ||
281 | { | ||
282 | set_bit(n_IRQ, q->queue); | ||
283 | } | ||
284 | |||
285 | static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) | ||
286 | { | ||
287 | clear_bit(n_IRQ, q->queue); | ||
288 | } | ||
289 | |||
290 | static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) | ||
291 | { | ||
292 | return test_bit(n_IRQ, q->queue); | ||
293 | } | ||
294 | |||
295 | static void IRQ_check(struct openpic *opp, struct irq_queue *q) | ||
296 | { | ||
297 | int irq = -1; | ||
298 | int next = -1; | ||
299 | int priority = -1; | ||
300 | |||
301 | for (;;) { | ||
302 | irq = find_next_bit(q->queue, opp->max_irq, irq + 1); | ||
303 | if (irq == opp->max_irq) | ||
304 | break; | ||
305 | |||
306 | pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n", | ||
307 | irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority); | ||
308 | |||
309 | if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) { | ||
310 | next = irq; | ||
311 | priority = IVPR_PRIORITY(opp->src[irq].ivpr); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | q->next = next; | ||
316 | q->priority = priority; | ||
317 | } | ||
318 | |||
319 | static int IRQ_get_next(struct openpic *opp, struct irq_queue *q) | ||
320 | { | ||
321 | /* XXX: optimize */ | ||
322 | IRQ_check(opp, q); | ||
323 | |||
324 | return q->next; | ||
325 | } | ||
326 | |||
327 | static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, | ||
328 | bool active, bool was_active) | ||
329 | { | ||
330 | struct irq_dest *dst; | ||
331 | struct irq_source *src; | ||
332 | int priority; | ||
333 | |||
334 | dst = &opp->dst[n_CPU]; | ||
335 | src = &opp->src[n_IRQ]; | ||
336 | |||
337 | pr_debug("%s: IRQ %d active %d was %d\n", | ||
338 | __func__, n_IRQ, active, was_active); | ||
339 | |||
340 | if (src->output != ILR_INTTGT_INT) { | ||
341 | pr_debug("%s: output %d irq %d active %d was %d count %d\n", | ||
342 | __func__, src->output, n_IRQ, active, was_active, | ||
343 | dst->outputs_active[src->output]); | ||
344 | |||
345 | /* On Freescale MPIC, critical interrupts ignore priority, | ||
346 | * IACK, EOI, etc. Before MPIC v4.1 they also ignore | ||
347 | * masking. | ||
348 | */ | ||
349 | if (active) { | ||
350 | if (!was_active && | ||
351 | dst->outputs_active[src->output]++ == 0) { | ||
352 | pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", | ||
353 | __func__, src->output, n_CPU, n_IRQ); | ||
354 | mpic_irq_raise(opp, dst, src->output); | ||
355 | } | ||
356 | } else { | ||
357 | if (was_active && | ||
358 | --dst->outputs_active[src->output] == 0) { | ||
359 | pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", | ||
360 | __func__, src->output, n_CPU, n_IRQ); | ||
361 | mpic_irq_lower(opp, dst, src->output); | ||
362 | } | ||
363 | } | ||
364 | |||
365 | return; | ||
366 | } | ||
367 | |||
368 | priority = IVPR_PRIORITY(src->ivpr); | ||
369 | |||
370 | /* Even if the interrupt doesn't have enough priority, | ||
371 | * it is still raised, in case ctpr is lowered later. | ||
372 | */ | ||
373 | if (active) | ||
374 | IRQ_setbit(&dst->raised, n_IRQ); | ||
375 | else | ||
376 | IRQ_resetbit(&dst->raised, n_IRQ); | ||
377 | |||
378 | IRQ_check(opp, &dst->raised); | ||
379 | |||
380 | if (active && priority <= dst->ctpr) { | ||
381 | pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n", | ||
382 | __func__, n_IRQ, priority, dst->ctpr, n_CPU); | ||
383 | active = 0; | ||
384 | } | ||
385 | |||
386 | if (active) { | ||
387 | if (IRQ_get_next(opp, &dst->servicing) >= 0 && | ||
388 | priority <= dst->servicing.priority) { | ||
389 | pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n", | ||
390 | __func__, n_IRQ, dst->servicing.next, n_CPU); | ||
391 | } else { | ||
392 | pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", | ||
393 | __func__, n_CPU, n_IRQ, dst->raised.next); | ||
394 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
395 | } | ||
396 | } else { | ||
397 | IRQ_get_next(opp, &dst->servicing); | ||
398 | if (dst->raised.priority > dst->ctpr && | ||
399 | dst->raised.priority > dst->servicing.priority) { | ||
400 | pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n", | ||
401 | __func__, n_IRQ, dst->raised.next, | ||
402 | dst->raised.priority, dst->ctpr, | ||
403 | dst->servicing.priority, n_CPU); | ||
404 | /* IRQ line stays asserted */ | ||
405 | } else { | ||
406 | pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", | ||
407 | __func__, n_IRQ, dst->ctpr, | ||
408 | dst->servicing.priority, n_CPU); | ||
409 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
410 | } | ||
411 | } | ||
412 | } | ||
413 | |||
414 | /* update pic state because registers for n_IRQ have changed value */ | ||
415 | static void openpic_update_irq(struct openpic *opp, int n_IRQ) | ||
416 | { | ||
417 | struct irq_source *src; | ||
418 | bool active, was_active; | ||
419 | int i; | ||
420 | |||
421 | src = &opp->src[n_IRQ]; | ||
422 | active = src->pending; | ||
423 | |||
424 | if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) { | ||
425 | /* Interrupt source is disabled */ | ||
426 | pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ); | ||
427 | active = false; | ||
428 | } | ||
429 | |||
430 | was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK); | ||
431 | |||
432 | /* | ||
433 | * We don't have a similar check for already-active because | ||
434 | * ctpr may have changed and we need to withdraw the interrupt. | ||
435 | */ | ||
436 | if (!active && !was_active) { | ||
437 | pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ); | ||
438 | return; | ||
439 | } | ||
440 | |||
441 | if (active) | ||
442 | src->ivpr |= IVPR_ACTIVITY_MASK; | ||
443 | else | ||
444 | src->ivpr &= ~IVPR_ACTIVITY_MASK; | ||
445 | |||
446 | if (src->destmask == 0) { | ||
447 | /* No target */ | ||
448 | pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ); | ||
449 | return; | ||
450 | } | ||
451 | |||
452 | if (src->destmask == (1 << src->last_cpu)) { | ||
453 | /* Only one CPU is allowed to receive this IRQ */ | ||
454 | IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active); | ||
455 | } else if (!(src->ivpr & IVPR_MODE_MASK)) { | ||
456 | /* Directed delivery mode */ | ||
457 | for (i = 0; i < opp->nb_cpus; i++) { | ||
458 | if (src->destmask & (1 << i)) { | ||
459 | IRQ_local_pipe(opp, i, n_IRQ, active, | ||
460 | was_active); | ||
461 | } | ||
462 | } | ||
463 | } else { | ||
464 | /* Distributed delivery mode */ | ||
465 | for (i = src->last_cpu + 1; i != src->last_cpu; i++) { | ||
466 | if (i == opp->nb_cpus) | ||
467 | i = 0; | ||
468 | |||
469 | if (src->destmask & (1 << i)) { | ||
470 | IRQ_local_pipe(opp, i, n_IRQ, active, | ||
471 | was_active); | ||
472 | src->last_cpu = i; | ||
473 | break; | ||
474 | } | ||
475 | } | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static void openpic_set_irq(void *opaque, int n_IRQ, int level) | ||
480 | { | ||
481 | struct openpic *opp = opaque; | ||
482 | struct irq_source *src; | ||
483 | |||
484 | if (n_IRQ >= MAX_IRQ) { | ||
485 | WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); | ||
486 | return; | ||
487 | } | ||
488 | |||
489 | src = &opp->src[n_IRQ]; | ||
490 | pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n", | ||
491 | n_IRQ, level, src->ivpr); | ||
492 | if (src->level) { | ||
493 | /* level-sensitive irq */ | ||
494 | src->pending = level; | ||
495 | openpic_update_irq(opp, n_IRQ); | ||
496 | } else { | ||
497 | /* edge-sensitive irq */ | ||
498 | if (level) { | ||
499 | src->pending = 1; | ||
500 | openpic_update_irq(opp, n_IRQ); | ||
501 | } | ||
502 | |||
503 | if (src->output != ILR_INTTGT_INT) { | ||
504 | /* Edge-triggered interrupts shouldn't be used | ||
505 | * with non-INT delivery, but just in case, | ||
506 | * try to make it do something sane rather than | ||
507 | * cause an interrupt storm. This is close to | ||
508 | * what you'd probably see happen in real hardware. | ||
509 | */ | ||
510 | src->pending = 0; | ||
511 | openpic_update_irq(opp, n_IRQ); | ||
512 | } | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static void openpic_reset(struct openpic *opp) | ||
517 | { | ||
518 | int i; | ||
519 | |||
520 | opp->gcr = GCR_RESET; | ||
521 | /* Initialise controller registers */ | ||
522 | opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | | ||
523 | (opp->vid << FRR_VID_SHIFT); | ||
524 | |||
525 | opp->pir = 0; | ||
526 | opp->spve = -1 & opp->vector_mask; | ||
527 | opp->tfrr = opp->tfrr_reset; | ||
528 | /* Initialise IRQ sources */ | ||
529 | for (i = 0; i < opp->max_irq; i++) { | ||
530 | opp->src[i].ivpr = opp->ivpr_reset; | ||
531 | opp->src[i].idr = opp->idr_reset; | ||
532 | |||
533 | switch (opp->src[i].type) { | ||
534 | case IRQ_TYPE_NORMAL: | ||
535 | opp->src[i].level = | ||
536 | !!(opp->ivpr_reset & IVPR_SENSE_MASK); | ||
537 | break; | ||
538 | |||
539 | case IRQ_TYPE_FSLINT: | ||
540 | opp->src[i].ivpr |= IVPR_POLARITY_MASK; | ||
541 | break; | ||
542 | |||
543 | case IRQ_TYPE_FSLSPECIAL: | ||
544 | break; | ||
545 | } | ||
546 | } | ||
547 | /* Initialise IRQ destinations */ | ||
548 | for (i = 0; i < MAX_CPU; i++) { | ||
549 | opp->dst[i].ctpr = 15; | ||
550 | memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue)); | ||
551 | opp->dst[i].raised.next = -1; | ||
552 | memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue)); | ||
553 | opp->dst[i].servicing.next = -1; | ||
554 | } | ||
555 | /* Initialise timers */ | ||
556 | for (i = 0; i < MAX_TMR; i++) { | ||
557 | opp->timers[i].tccr = 0; | ||
558 | opp->timers[i].tbcr = TBCR_CI; | ||
559 | } | ||
560 | /* Go out of RESET state */ | ||
561 | opp->gcr = 0; | ||
562 | } | ||
563 | |||
564 | static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) | ||
565 | { | ||
566 | return opp->src[n_IRQ].idr; | ||
567 | } | ||
568 | |||
569 | static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) | ||
570 | { | ||
571 | if (opp->flags & OPENPIC_FLAG_ILR) | ||
572 | return opp->src[n_IRQ].output; | ||
573 | |||
574 | return 0xffffffff; | ||
575 | } | ||
576 | |||
577 | static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ) | ||
578 | { | ||
579 | return opp->src[n_IRQ].ivpr; | ||
580 | } | ||
581 | |||
582 | static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, | ||
583 | uint32_t val) | ||
584 | { | ||
585 | struct irq_source *src = &opp->src[n_IRQ]; | ||
586 | uint32_t normal_mask = (1UL << opp->nb_cpus) - 1; | ||
587 | uint32_t crit_mask = 0; | ||
588 | uint32_t mask = normal_mask; | ||
589 | int crit_shift = IDR_EP_SHIFT - opp->nb_cpus; | ||
590 | int i; | ||
591 | |||
592 | if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { | ||
593 | crit_mask = mask << crit_shift; | ||
594 | mask |= crit_mask | IDR_EP; | ||
595 | } | ||
596 | |||
597 | src->idr = val & mask; | ||
598 | pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr); | ||
599 | |||
600 | if (opp->flags & OPENPIC_FLAG_IDR_CRIT) { | ||
601 | if (src->idr & crit_mask) { | ||
602 | if (src->idr & normal_mask) { | ||
603 | pr_debug("%s: IRQ configured for multiple output types, using critical\n", | ||
604 | __func__); | ||
605 | } | ||
606 | |||
607 | src->output = ILR_INTTGT_CINT; | ||
608 | src->nomask = true; | ||
609 | src->destmask = 0; | ||
610 | |||
611 | for (i = 0; i < opp->nb_cpus; i++) { | ||
612 | int n_ci = IDR_CI0_SHIFT - i; | ||
613 | |||
614 | if (src->idr & (1UL << n_ci)) | ||
615 | src->destmask |= 1UL << i; | ||
616 | } | ||
617 | } else { | ||
618 | src->output = ILR_INTTGT_INT; | ||
619 | src->nomask = false; | ||
620 | src->destmask = src->idr & normal_mask; | ||
621 | } | ||
622 | } else { | ||
623 | src->destmask = src->idr; | ||
624 | } | ||
625 | } | ||
626 | |||
627 | static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, | ||
628 | uint32_t val) | ||
629 | { | ||
630 | if (opp->flags & OPENPIC_FLAG_ILR) { | ||
631 | struct irq_source *src = &opp->src[n_IRQ]; | ||
632 | |||
633 | src->output = val & ILR_INTTGT_MASK; | ||
634 | pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, | ||
635 | src->output); | ||
636 | |||
637 | /* TODO: on MPIC v4.0 only, set nomask for non-INT */ | ||
638 | } | ||
639 | } | ||
640 | |||
641 | static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, | ||
642 | uint32_t val) | ||
643 | { | ||
644 | uint32_t mask; | ||
645 | |||
646 | /* NOTE when implementing newer FSL MPIC models: starting with v4.0, | ||
647 | * the polarity bit is read-only on internal interrupts. | ||
648 | */ | ||
649 | mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK | | ||
650 | IVPR_POLARITY_MASK | opp->vector_mask; | ||
651 | |||
652 | /* ACTIVITY bit is read-only */ | ||
653 | opp->src[n_IRQ].ivpr = | ||
654 | (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask); | ||
655 | |||
656 | /* For FSL internal interrupts, The sense bit is reserved and zero, | ||
657 | * and the interrupt is always level-triggered. Timers and IPIs | ||
658 | * have no sense or polarity bits, and are edge-triggered. | ||
659 | */ | ||
660 | switch (opp->src[n_IRQ].type) { | ||
661 | case IRQ_TYPE_NORMAL: | ||
662 | opp->src[n_IRQ].level = | ||
663 | !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK); | ||
664 | break; | ||
665 | |||
666 | case IRQ_TYPE_FSLINT: | ||
667 | opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK; | ||
668 | break; | ||
669 | |||
670 | case IRQ_TYPE_FSLSPECIAL: | ||
671 | opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK); | ||
672 | break; | ||
673 | } | ||
674 | |||
675 | openpic_update_irq(opp, n_IRQ); | ||
676 | pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val, | ||
677 | opp->src[n_IRQ].ivpr); | ||
678 | } | ||
679 | |||
680 | static void openpic_gcr_write(struct openpic *opp, uint64_t val) | ||
681 | { | ||
682 | if (val & GCR_RESET) { | ||
683 | openpic_reset(opp); | ||
684 | return; | ||
685 | } | ||
686 | |||
687 | opp->gcr &= ~opp->mpic_mode_mask; | ||
688 | opp->gcr |= val & opp->mpic_mode_mask; | ||
689 | } | ||
690 | |||
691 | static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) | ||
692 | { | ||
693 | struct openpic *opp = opaque; | ||
694 | int err = 0; | ||
695 | |||
696 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
697 | if (addr & 0xF) | ||
698 | return 0; | ||
699 | |||
700 | switch (addr) { | ||
701 | case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ | ||
702 | break; | ||
703 | case 0x40: | ||
704 | case 0x50: | ||
705 | case 0x60: | ||
706 | case 0x70: | ||
707 | case 0x80: | ||
708 | case 0x90: | ||
709 | case 0xA0: | ||
710 | case 0xB0: | ||
711 | err = openpic_cpu_write_internal(opp, addr, val, | ||
712 | get_current_cpu()); | ||
713 | break; | ||
714 | case 0x1000: /* FRR */ | ||
715 | break; | ||
716 | case 0x1020: /* GCR */ | ||
717 | openpic_gcr_write(opp, val); | ||
718 | break; | ||
719 | case 0x1080: /* VIR */ | ||
720 | break; | ||
721 | case 0x1090: /* PIR */ | ||
722 | /* | ||
723 | * This register is used to reset a CPU core -- | ||
724 | * let userspace handle it. | ||
725 | */ | ||
726 | err = -ENXIO; | ||
727 | break; | ||
728 | case 0x10A0: /* IPI_IVPR */ | ||
729 | case 0x10B0: | ||
730 | case 0x10C0: | ||
731 | case 0x10D0: { | ||
732 | int idx; | ||
733 | idx = (addr - 0x10A0) >> 4; | ||
734 | write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val); | ||
735 | break; | ||
736 | } | ||
737 | case 0x10E0: /* SPVE */ | ||
738 | opp->spve = val & opp->vector_mask; | ||
739 | break; | ||
740 | default: | ||
741 | break; | ||
742 | } | ||
743 | |||
744 | return err; | ||
745 | } | ||
746 | |||
747 | static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) | ||
748 | { | ||
749 | struct openpic *opp = opaque; | ||
750 | u32 retval; | ||
751 | int err = 0; | ||
752 | |||
753 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
754 | retval = 0xFFFFFFFF; | ||
755 | if (addr & 0xF) | ||
756 | goto out; | ||
757 | |||
758 | switch (addr) { | ||
759 | case 0x1000: /* FRR */ | ||
760 | retval = opp->frr; | ||
761 | retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; | ||
762 | break; | ||
763 | case 0x1020: /* GCR */ | ||
764 | retval = opp->gcr; | ||
765 | break; | ||
766 | case 0x1080: /* VIR */ | ||
767 | retval = opp->vir; | ||
768 | break; | ||
769 | case 0x1090: /* PIR */ | ||
770 | retval = 0x00000000; | ||
771 | break; | ||
772 | case 0x00: /* Block Revision Register1 (BRR1) */ | ||
773 | retval = opp->brr1; | ||
774 | break; | ||
775 | case 0x40: | ||
776 | case 0x50: | ||
777 | case 0x60: | ||
778 | case 0x70: | ||
779 | case 0x80: | ||
780 | case 0x90: | ||
781 | case 0xA0: | ||
782 | case 0xB0: | ||
783 | err = openpic_cpu_read_internal(opp, addr, | ||
784 | &retval, get_current_cpu()); | ||
785 | break; | ||
786 | case 0x10A0: /* IPI_IVPR */ | ||
787 | case 0x10B0: | ||
788 | case 0x10C0: | ||
789 | case 0x10D0: | ||
790 | { | ||
791 | int idx; | ||
792 | idx = (addr - 0x10A0) >> 4; | ||
793 | retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx); | ||
794 | } | ||
795 | break; | ||
796 | case 0x10E0: /* SPVE */ | ||
797 | retval = opp->spve; | ||
798 | break; | ||
799 | default: | ||
800 | break; | ||
801 | } | ||
802 | |||
803 | out: | ||
804 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
805 | *ptr = retval; | ||
806 | return err; | ||
807 | } | ||
808 | |||
809 | static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) | ||
810 | { | ||
811 | struct openpic *opp = opaque; | ||
812 | int idx; | ||
813 | |||
814 | addr += 0x10f0; | ||
815 | |||
816 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
817 | if (addr & 0xF) | ||
818 | return 0; | ||
819 | |||
820 | if (addr == 0x10f0) { | ||
821 | /* TFRR */ | ||
822 | opp->tfrr = val; | ||
823 | return 0; | ||
824 | } | ||
825 | |||
826 | idx = (addr >> 6) & 0x3; | ||
827 | addr = addr & 0x30; | ||
828 | |||
829 | switch (addr & 0x30) { | ||
830 | case 0x00: /* TCCR */ | ||
831 | break; | ||
832 | case 0x10: /* TBCR */ | ||
833 | if ((opp->timers[idx].tccr & TCCR_TOG) != 0 && | ||
834 | (val & TBCR_CI) == 0 && | ||
835 | (opp->timers[idx].tbcr & TBCR_CI) != 0) | ||
836 | opp->timers[idx].tccr &= ~TCCR_TOG; | ||
837 | |||
838 | opp->timers[idx].tbcr = val; | ||
839 | break; | ||
840 | case 0x20: /* TVPR */ | ||
841 | write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val); | ||
842 | break; | ||
843 | case 0x30: /* TDR */ | ||
844 | write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); | ||
845 | break; | ||
846 | } | ||
847 | |||
848 | return 0; | ||
849 | } | ||
850 | |||
851 | static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) | ||
852 | { | ||
853 | struct openpic *opp = opaque; | ||
854 | uint32_t retval = -1; | ||
855 | int idx; | ||
856 | |||
857 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
858 | if (addr & 0xF) | ||
859 | goto out; | ||
860 | |||
861 | idx = (addr >> 6) & 0x3; | ||
862 | if (addr == 0x0) { | ||
863 | /* TFRR */ | ||
864 | retval = opp->tfrr; | ||
865 | goto out; | ||
866 | } | ||
867 | |||
868 | switch (addr & 0x30) { | ||
869 | case 0x00: /* TCCR */ | ||
870 | retval = opp->timers[idx].tccr; | ||
871 | break; | ||
872 | case 0x10: /* TBCR */ | ||
873 | retval = opp->timers[idx].tbcr; | ||
874 | break; | ||
875 | case 0x20: /* TIPV */ | ||
876 | retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx); | ||
877 | break; | ||
878 | case 0x30: /* TIDE (TIDR) */ | ||
879 | retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx); | ||
880 | break; | ||
881 | } | ||
882 | |||
883 | out: | ||
884 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
885 | *ptr = retval; | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | static int openpic_src_write(void *opaque, gpa_t addr, u32 val) | ||
890 | { | ||
891 | struct openpic *opp = opaque; | ||
892 | int idx; | ||
893 | |||
894 | pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); | ||
895 | |||
896 | addr = addr & 0xffff; | ||
897 | idx = addr >> 5; | ||
898 | |||
899 | switch (addr & 0x1f) { | ||
900 | case 0x00: | ||
901 | write_IRQreg_ivpr(opp, idx, val); | ||
902 | break; | ||
903 | case 0x10: | ||
904 | write_IRQreg_idr(opp, idx, val); | ||
905 | break; | ||
906 | case 0x18: | ||
907 | write_IRQreg_ilr(opp, idx, val); | ||
908 | break; | ||
909 | } | ||
910 | |||
911 | return 0; | ||
912 | } | ||
913 | |||
914 | static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) | ||
915 | { | ||
916 | struct openpic *opp = opaque; | ||
917 | uint32_t retval; | ||
918 | int idx; | ||
919 | |||
920 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
921 | retval = 0xFFFFFFFF; | ||
922 | |||
923 | addr = addr & 0xffff; | ||
924 | idx = addr >> 5; | ||
925 | |||
926 | switch (addr & 0x1f) { | ||
927 | case 0x00: | ||
928 | retval = read_IRQreg_ivpr(opp, idx); | ||
929 | break; | ||
930 | case 0x10: | ||
931 | retval = read_IRQreg_idr(opp, idx); | ||
932 | break; | ||
933 | case 0x18: | ||
934 | retval = read_IRQreg_ilr(opp, idx); | ||
935 | break; | ||
936 | } | ||
937 | |||
938 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
939 | *ptr = retval; | ||
940 | return 0; | ||
941 | } | ||
942 | |||
943 | static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) | ||
944 | { | ||
945 | struct openpic *opp = opaque; | ||
946 | int idx = opp->irq_msi; | ||
947 | int srs, ibs; | ||
948 | |||
949 | pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); | ||
950 | if (addr & 0xF) | ||
951 | return 0; | ||
952 | |||
953 | switch (addr) { | ||
954 | case MSIIR_OFFSET: | ||
955 | srs = val >> MSIIR_SRS_SHIFT; | ||
956 | idx += srs; | ||
957 | ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT; | ||
958 | opp->msi[srs].msir |= 1 << ibs; | ||
959 | openpic_set_irq(opp, idx, 1); | ||
960 | break; | ||
961 | default: | ||
962 | /* most registers are read-only, thus ignored */ | ||
963 | break; | ||
964 | } | ||
965 | |||
966 | return 0; | ||
967 | } | ||
968 | |||
969 | static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) | ||
970 | { | ||
971 | struct openpic *opp = opaque; | ||
972 | uint32_t r = 0; | ||
973 | int i, srs; | ||
974 | |||
975 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
976 | if (addr & 0xF) | ||
977 | return -ENXIO; | ||
978 | |||
979 | srs = addr >> 4; | ||
980 | |||
981 | switch (addr) { | ||
982 | case 0x00: | ||
983 | case 0x10: | ||
984 | case 0x20: | ||
985 | case 0x30: | ||
986 | case 0x40: | ||
987 | case 0x50: | ||
988 | case 0x60: | ||
989 | case 0x70: /* MSIRs */ | ||
990 | r = opp->msi[srs].msir; | ||
991 | /* Clear on read */ | ||
992 | opp->msi[srs].msir = 0; | ||
993 | openpic_set_irq(opp, opp->irq_msi + srs, 0); | ||
994 | break; | ||
995 | case 0x120: /* MSISR */ | ||
996 | for (i = 0; i < MAX_MSI; i++) | ||
997 | r |= (opp->msi[i].msir ? 1 : 0) << i; | ||
998 | break; | ||
999 | } | ||
1000 | |||
1001 | pr_debug("%s: => 0x%08x\n", __func__, r); | ||
1002 | *ptr = r; | ||
1003 | return 0; | ||
1004 | } | ||
1005 | |||
1006 | static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) | ||
1007 | { | ||
1008 | uint32_t r = 0; | ||
1009 | |||
1010 | pr_debug("%s: addr %#llx\n", __func__, addr); | ||
1011 | |||
1012 | /* TODO: EISR/EIMR */ | ||
1013 | |||
1014 | *ptr = r; | ||
1015 | return 0; | ||
1016 | } | ||
1017 | |||
1018 | static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) | ||
1019 | { | ||
1020 | pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); | ||
1021 | |||
1022 | /* TODO: EISR/EIMR */ | ||
1023 | return 0; | ||
1024 | } | ||
1025 | |||
1026 | static int openpic_cpu_write_internal(void *opaque, gpa_t addr, | ||
1027 | u32 val, int idx) | ||
1028 | { | ||
1029 | struct openpic *opp = opaque; | ||
1030 | struct irq_source *src; | ||
1031 | struct irq_dest *dst; | ||
1032 | int s_IRQ, n_IRQ; | ||
1033 | |||
1034 | pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, | ||
1035 | addr, val); | ||
1036 | |||
1037 | if (idx < 0) | ||
1038 | return 0; | ||
1039 | |||
1040 | if (addr & 0xF) | ||
1041 | return 0; | ||
1042 | |||
1043 | dst = &opp->dst[idx]; | ||
1044 | addr &= 0xFF0; | ||
1045 | switch (addr) { | ||
1046 | case 0x40: /* IPIDR */ | ||
1047 | case 0x50: | ||
1048 | case 0x60: | ||
1049 | case 0x70: | ||
1050 | idx = (addr - 0x40) >> 4; | ||
1051 | /* we use IDE as mask which CPUs to deliver the IPI to still. */ | ||
1052 | opp->src[opp->irq_ipi0 + idx].destmask |= val; | ||
1053 | openpic_set_irq(opp, opp->irq_ipi0 + idx, 1); | ||
1054 | openpic_set_irq(opp, opp->irq_ipi0 + idx, 0); | ||
1055 | break; | ||
1056 | case 0x80: /* CTPR */ | ||
1057 | dst->ctpr = val & 0x0000000F; | ||
1058 | |||
1059 | pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n", | ||
1060 | __func__, idx, dst->ctpr, dst->raised.priority, | ||
1061 | dst->servicing.priority); | ||
1062 | |||
1063 | if (dst->raised.priority <= dst->ctpr) { | ||
1064 | pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", | ||
1065 | __func__, idx); | ||
1066 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
1067 | } else if (dst->raised.priority > dst->servicing.priority) { | ||
1068 | pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", | ||
1069 | __func__, idx, dst->raised.next); | ||
1070 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
1071 | } | ||
1072 | |||
1073 | break; | ||
1074 | case 0x90: /* WHOAMI */ | ||
1075 | /* Read-only register */ | ||
1076 | break; | ||
1077 | case 0xA0: /* IACK */ | ||
1078 | /* Read-only register */ | ||
1079 | break; | ||
1080 | case 0xB0: { /* EOI */ | ||
1081 | int notify_eoi; | ||
1082 | |||
1083 | pr_debug("EOI\n"); | ||
1084 | s_IRQ = IRQ_get_next(opp, &dst->servicing); | ||
1085 | |||
1086 | if (s_IRQ < 0) { | ||
1087 | pr_debug("%s: EOI with no interrupt in service\n", | ||
1088 | __func__); | ||
1089 | break; | ||
1090 | } | ||
1091 | |||
1092 | IRQ_resetbit(&dst->servicing, s_IRQ); | ||
1093 | /* Notify listeners that the IRQ is over */ | ||
1094 | notify_eoi = s_IRQ; | ||
1095 | /* Set up next servicing IRQ */ | ||
1096 | s_IRQ = IRQ_get_next(opp, &dst->servicing); | ||
1097 | /* Check queued interrupts. */ | ||
1098 | n_IRQ = IRQ_get_next(opp, &dst->raised); | ||
1099 | src = &opp->src[n_IRQ]; | ||
1100 | if (n_IRQ != -1 && | ||
1101 | (s_IRQ == -1 || | ||
1102 | IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { | ||
1103 | pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", | ||
1104 | idx, n_IRQ); | ||
1105 | mpic_irq_raise(opp, dst, ILR_INTTGT_INT); | ||
1106 | } | ||
1107 | |||
1108 | spin_unlock(&opp->lock); | ||
1109 | kvm_notify_acked_irq(opp->kvm, 0, notify_eoi); | ||
1110 | spin_lock(&opp->lock); | ||
1111 | |||
1112 | break; | ||
1113 | } | ||
1114 | default: | ||
1115 | break; | ||
1116 | } | ||
1117 | |||
1118 | return 0; | ||
1119 | } | ||
1120 | |||
1121 | static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) | ||
1122 | { | ||
1123 | struct openpic *opp = opaque; | ||
1124 | |||
1125 | return openpic_cpu_write_internal(opp, addr, val, | ||
1126 | (addr & 0x1f000) >> 12); | ||
1127 | } | ||
1128 | |||
1129 | static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, | ||
1130 | int cpu) | ||
1131 | { | ||
1132 | struct irq_source *src; | ||
1133 | int retval, irq; | ||
1134 | |||
1135 | pr_debug("Lower OpenPIC INT output\n"); | ||
1136 | mpic_irq_lower(opp, dst, ILR_INTTGT_INT); | ||
1137 | |||
1138 | irq = IRQ_get_next(opp, &dst->raised); | ||
1139 | pr_debug("IACK: irq=%d\n", irq); | ||
1140 | |||
1141 | if (irq == -1) | ||
1142 | /* No more interrupt pending */ | ||
1143 | return opp->spve; | ||
1144 | |||
1145 | src = &opp->src[irq]; | ||
1146 | if (!(src->ivpr & IVPR_ACTIVITY_MASK) || | ||
1147 | !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) { | ||
1148 | pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n", | ||
1149 | __func__, irq, dst->ctpr, src->ivpr); | ||
1150 | openpic_update_irq(opp, irq); | ||
1151 | retval = opp->spve; | ||
1152 | } else { | ||
1153 | /* IRQ enter servicing state */ | ||
1154 | IRQ_setbit(&dst->servicing, irq); | ||
1155 | retval = IVPR_VECTOR(opp, src->ivpr); | ||
1156 | } | ||
1157 | |||
1158 | if (!src->level) { | ||
1159 | /* edge-sensitive IRQ */ | ||
1160 | src->ivpr &= ~IVPR_ACTIVITY_MASK; | ||
1161 | src->pending = 0; | ||
1162 | IRQ_resetbit(&dst->raised, irq); | ||
1163 | } | ||
1164 | |||
1165 | if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) { | ||
1166 | src->destmask &= ~(1 << cpu); | ||
1167 | if (src->destmask && !src->level) { | ||
1168 | /* trigger on CPUs that didn't know about it yet */ | ||
1169 | openpic_set_irq(opp, irq, 1); | ||
1170 | openpic_set_irq(opp, irq, 0); | ||
1171 | /* if all CPUs knew about it, set active bit again */ | ||
1172 | src->ivpr |= IVPR_ACTIVITY_MASK; | ||
1173 | } | ||
1174 | } | ||
1175 | |||
1176 | return retval; | ||
1177 | } | ||
1178 | |||
1179 | void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) | ||
1180 | { | ||
1181 | struct openpic *opp = vcpu->arch.mpic; | ||
1182 | int cpu = vcpu->arch.irq_cpu_id; | ||
1183 | unsigned long flags; | ||
1184 | |||
1185 | spin_lock_irqsave(&opp->lock, flags); | ||
1186 | |||
1187 | if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY) | ||
1188 | kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu)); | ||
1189 | |||
1190 | spin_unlock_irqrestore(&opp->lock, flags); | ||
1191 | } | ||
1192 | |||
1193 | static int openpic_cpu_read_internal(void *opaque, gpa_t addr, | ||
1194 | u32 *ptr, int idx) | ||
1195 | { | ||
1196 | struct openpic *opp = opaque; | ||
1197 | struct irq_dest *dst; | ||
1198 | uint32_t retval; | ||
1199 | |||
1200 | pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); | ||
1201 | retval = 0xFFFFFFFF; | ||
1202 | |||
1203 | if (idx < 0) | ||
1204 | goto out; | ||
1205 | |||
1206 | if (addr & 0xF) | ||
1207 | goto out; | ||
1208 | |||
1209 | dst = &opp->dst[idx]; | ||
1210 | addr &= 0xFF0; | ||
1211 | switch (addr) { | ||
1212 | case 0x80: /* CTPR */ | ||
1213 | retval = dst->ctpr; | ||
1214 | break; | ||
1215 | case 0x90: /* WHOAMI */ | ||
1216 | retval = idx; | ||
1217 | break; | ||
1218 | case 0xA0: /* IACK */ | ||
1219 | retval = openpic_iack(opp, dst, idx); | ||
1220 | break; | ||
1221 | case 0xB0: /* EOI */ | ||
1222 | retval = 0; | ||
1223 | break; | ||
1224 | default: | ||
1225 | break; | ||
1226 | } | ||
1227 | pr_debug("%s: => 0x%08x\n", __func__, retval); | ||
1228 | |||
1229 | out: | ||
1230 | *ptr = retval; | ||
1231 | return 0; | ||
1232 | } | ||
1233 | |||
1234 | static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) | ||
1235 | { | ||
1236 | struct openpic *opp = opaque; | ||
1237 | |||
1238 | return openpic_cpu_read_internal(opp, addr, ptr, | ||
1239 | (addr & 0x1f000) >> 12); | ||
1240 | } | ||
1241 | |||
1242 | struct mem_reg { | ||
1243 | int (*read)(void *opaque, gpa_t addr, u32 *ptr); | ||
1244 | int (*write)(void *opaque, gpa_t addr, u32 val); | ||
1245 | gpa_t start_addr; | ||
1246 | int size; | ||
1247 | }; | ||
1248 | |||
1249 | static const struct mem_reg openpic_gbl_mmio = { | ||
1250 | .write = openpic_gbl_write, | ||
1251 | .read = openpic_gbl_read, | ||
1252 | .start_addr = OPENPIC_GLB_REG_START, | ||
1253 | .size = OPENPIC_GLB_REG_SIZE, | ||
1254 | }; | ||
1255 | |||
1256 | static const struct mem_reg openpic_tmr_mmio = { | ||
1257 | .write = openpic_tmr_write, | ||
1258 | .read = openpic_tmr_read, | ||
1259 | .start_addr = OPENPIC_TMR_REG_START, | ||
1260 | .size = OPENPIC_TMR_REG_SIZE, | ||
1261 | }; | ||
1262 | |||
1263 | static const struct mem_reg openpic_cpu_mmio = { | ||
1264 | .write = openpic_cpu_write, | ||
1265 | .read = openpic_cpu_read, | ||
1266 | .start_addr = OPENPIC_CPU_REG_START, | ||
1267 | .size = OPENPIC_CPU_REG_SIZE, | ||
1268 | }; | ||
1269 | |||
1270 | static const struct mem_reg openpic_src_mmio = { | ||
1271 | .write = openpic_src_write, | ||
1272 | .read = openpic_src_read, | ||
1273 | .start_addr = OPENPIC_SRC_REG_START, | ||
1274 | .size = OPENPIC_SRC_REG_SIZE, | ||
1275 | }; | ||
1276 | |||
1277 | static const struct mem_reg openpic_msi_mmio = { | ||
1278 | .read = openpic_msi_read, | ||
1279 | .write = openpic_msi_write, | ||
1280 | .start_addr = OPENPIC_MSI_REG_START, | ||
1281 | .size = OPENPIC_MSI_REG_SIZE, | ||
1282 | }; | ||
1283 | |||
1284 | static const struct mem_reg openpic_summary_mmio = { | ||
1285 | .read = openpic_summary_read, | ||
1286 | .write = openpic_summary_write, | ||
1287 | .start_addr = OPENPIC_SUMMARY_REG_START, | ||
1288 | .size = OPENPIC_SUMMARY_REG_SIZE, | ||
1289 | }; | ||
1290 | |||
1291 | static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr) | ||
1292 | { | ||
1293 | if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) { | ||
1294 | WARN(1, "kvm mpic: too many mmio regions\n"); | ||
1295 | return; | ||
1296 | } | ||
1297 | |||
1298 | opp->mmio_regions[opp->num_mmio_regions++] = mr; | ||
1299 | } | ||
1300 | |||
1301 | static void fsl_common_init(struct openpic *opp) | ||
1302 | { | ||
1303 | int i; | ||
1304 | int virq = MAX_SRC; | ||
1305 | |||
1306 | add_mmio_region(opp, &openpic_msi_mmio); | ||
1307 | add_mmio_region(opp, &openpic_summary_mmio); | ||
1308 | |||
1309 | opp->vid = VID_REVISION_1_2; | ||
1310 | opp->vir = VIR_GENERIC; | ||
1311 | opp->vector_mask = 0xFFFF; | ||
1312 | opp->tfrr_reset = 0; | ||
1313 | opp->ivpr_reset = IVPR_MASK_MASK; | ||
1314 | opp->idr_reset = 1 << 0; | ||
1315 | opp->max_irq = MAX_IRQ; | ||
1316 | |||
1317 | opp->irq_ipi0 = virq; | ||
1318 | virq += MAX_IPI; | ||
1319 | opp->irq_tim0 = virq; | ||
1320 | virq += MAX_TMR; | ||
1321 | |||
1322 | BUG_ON(virq > MAX_IRQ); | ||
1323 | |||
1324 | opp->irq_msi = 224; | ||
1325 | |||
1326 | for (i = 0; i < opp->fsl->max_ext; i++) | ||
1327 | opp->src[i].level = false; | ||
1328 | |||
1329 | /* Internal interrupts, including message and MSI */ | ||
1330 | for (i = 16; i < MAX_SRC; i++) { | ||
1331 | opp->src[i].type = IRQ_TYPE_FSLINT; | ||
1332 | opp->src[i].level = true; | ||
1333 | } | ||
1334 | |||
1335 | /* timers and IPIs */ | ||
1336 | for (i = MAX_SRC; i < virq; i++) { | ||
1337 | opp->src[i].type = IRQ_TYPE_FSLSPECIAL; | ||
1338 | opp->src[i].level = false; | ||
1339 | } | ||
1340 | } | ||
1341 | |||
1342 | static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) | ||
1343 | { | ||
1344 | int i; | ||
1345 | |||
1346 | for (i = 0; i < opp->num_mmio_regions; i++) { | ||
1347 | const struct mem_reg *mr = opp->mmio_regions[i]; | ||
1348 | |||
1349 | if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) | ||
1350 | continue; | ||
1351 | |||
1352 | return mr->read(opp, addr - mr->start_addr, ptr); | ||
1353 | } | ||
1354 | |||
1355 | return -ENXIO; | ||
1356 | } | ||
1357 | |||
1358 | static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) | ||
1359 | { | ||
1360 | int i; | ||
1361 | |||
1362 | for (i = 0; i < opp->num_mmio_regions; i++) { | ||
1363 | const struct mem_reg *mr = opp->mmio_regions[i]; | ||
1364 | |||
1365 | if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) | ||
1366 | continue; | ||
1367 | |||
1368 | return mr->write(opp, addr - mr->start_addr, val); | ||
1369 | } | ||
1370 | |||
1371 | return -ENXIO; | ||
1372 | } | ||
1373 | |||
1374 | static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, | ||
1375 | int len, void *ptr) | ||
1376 | { | ||
1377 | struct openpic *opp = container_of(this, struct openpic, mmio); | ||
1378 | int ret; | ||
1379 | union { | ||
1380 | u32 val; | ||
1381 | u8 bytes[4]; | ||
1382 | } u; | ||
1383 | |||
1384 | if (addr & (len - 1)) { | ||
1385 | pr_debug("%s: bad alignment %llx/%d\n", | ||
1386 | __func__, addr, len); | ||
1387 | return -EINVAL; | ||
1388 | } | ||
1389 | |||
1390 | spin_lock_irq(&opp->lock); | ||
1391 | ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); | ||
1392 | spin_unlock_irq(&opp->lock); | ||
1393 | |||
1394 | /* | ||
1395 | * Technically only 32-bit accesses are allowed, but be nice to | ||
1396 | * people dumping registers a byte at a time -- it works in real | ||
1397 | * hardware (reads only, not writes). | ||
1398 | */ | ||
1399 | if (len == 4) { | ||
1400 | *(u32 *)ptr = u.val; | ||
1401 | pr_debug("%s: addr %llx ret %d len 4 val %x\n", | ||
1402 | __func__, addr, ret, u.val); | ||
1403 | } else if (len == 1) { | ||
1404 | *(u8 *)ptr = u.bytes[addr & 3]; | ||
1405 | pr_debug("%s: addr %llx ret %d len 1 val %x\n", | ||
1406 | __func__, addr, ret, u.bytes[addr & 3]); | ||
1407 | } else { | ||
1408 | pr_debug("%s: bad length %d\n", __func__, len); | ||
1409 | return -EINVAL; | ||
1410 | } | ||
1411 | |||
1412 | return ret; | ||
1413 | } | ||
1414 | |||
1415 | static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, | ||
1416 | int len, const void *ptr) | ||
1417 | { | ||
1418 | struct openpic *opp = container_of(this, struct openpic, mmio); | ||
1419 | int ret; | ||
1420 | |||
1421 | if (len != 4) { | ||
1422 | pr_debug("%s: bad length %d\n", __func__, len); | ||
1423 | return -EOPNOTSUPP; | ||
1424 | } | ||
1425 | if (addr & 3) { | ||
1426 | pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); | ||
1427 | return -EOPNOTSUPP; | ||
1428 | } | ||
1429 | |||
1430 | spin_lock_irq(&opp->lock); | ||
1431 | ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, | ||
1432 | *(const u32 *)ptr); | ||
1433 | spin_unlock_irq(&opp->lock); | ||
1434 | |||
1435 | pr_debug("%s: addr %llx ret %d val %x\n", | ||
1436 | __func__, addr, ret, *(const u32 *)ptr); | ||
1437 | |||
1438 | return ret; | ||
1439 | } | ||
1440 | |||
1441 | static const struct kvm_io_device_ops mpic_mmio_ops = { | ||
1442 | .read = kvm_mpic_read, | ||
1443 | .write = kvm_mpic_write, | ||
1444 | }; | ||
1445 | |||
1446 | static void map_mmio(struct openpic *opp) | ||
1447 | { | ||
1448 | kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); | ||
1449 | |||
1450 | kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, | ||
1451 | opp->reg_base, OPENPIC_REG_SIZE, | ||
1452 | &opp->mmio); | ||
1453 | } | ||
1454 | |||
1455 | static void unmap_mmio(struct openpic *opp) | ||
1456 | { | ||
1457 | kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); | ||
1458 | } | ||
1459 | |||
1460 | static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) | ||
1461 | { | ||
1462 | u64 base; | ||
1463 | |||
1464 | if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) | ||
1465 | return -EFAULT; | ||
1466 | |||
1467 | if (base & 0x3ffff) { | ||
1468 | pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", | ||
1469 | __func__, base); | ||
1470 | return -EINVAL; | ||
1471 | } | ||
1472 | |||
1473 | if (base == opp->reg_base) | ||
1474 | return 0; | ||
1475 | |||
1476 | mutex_lock(&opp->kvm->slots_lock); | ||
1477 | |||
1478 | unmap_mmio(opp); | ||
1479 | opp->reg_base = base; | ||
1480 | |||
1481 | pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", | ||
1482 | __func__, base); | ||
1483 | |||
1484 | if (base == 0) | ||
1485 | goto out; | ||
1486 | |||
1487 | map_mmio(opp); | ||
1488 | |||
1489 | out: | ||
1490 | mutex_unlock(&opp->kvm->slots_lock); | ||
1491 | return 0; | ||
1492 | } | ||
1493 | |||
1494 | #define ATTR_SET 0 | ||
1495 | #define ATTR_GET 1 | ||
1496 | |||
1497 | static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) | ||
1498 | { | ||
1499 | int ret; | ||
1500 | |||
1501 | if (addr & 3) | ||
1502 | return -ENXIO; | ||
1503 | |||
1504 | spin_lock_irq(&opp->lock); | ||
1505 | |||
1506 | if (type == ATTR_SET) | ||
1507 | ret = kvm_mpic_write_internal(opp, addr, *val); | ||
1508 | else | ||
1509 | ret = kvm_mpic_read_internal(opp, addr, val); | ||
1510 | |||
1511 | spin_unlock_irq(&opp->lock); | ||
1512 | |||
1513 | pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); | ||
1514 | |||
1515 | return ret; | ||
1516 | } | ||
1517 | |||
1518 | static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1519 | { | ||
1520 | struct openpic *opp = dev->private; | ||
1521 | u32 attr32; | ||
1522 | |||
1523 | switch (attr->group) { | ||
1524 | case KVM_DEV_MPIC_GRP_MISC: | ||
1525 | switch (attr->attr) { | ||
1526 | case KVM_DEV_MPIC_BASE_ADDR: | ||
1527 | return set_base_addr(opp, attr); | ||
1528 | } | ||
1529 | |||
1530 | break; | ||
1531 | |||
1532 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
1533 | if (get_user(attr32, (u32 __user *)(long)attr->addr)) | ||
1534 | return -EFAULT; | ||
1535 | |||
1536 | return access_reg(opp, attr->attr, &attr32, ATTR_SET); | ||
1537 | |||
1538 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
1539 | if (attr->attr > MAX_SRC) | ||
1540 | return -EINVAL; | ||
1541 | |||
1542 | if (get_user(attr32, (u32 __user *)(long)attr->addr)) | ||
1543 | return -EFAULT; | ||
1544 | |||
1545 | if (attr32 != 0 && attr32 != 1) | ||
1546 | return -EINVAL; | ||
1547 | |||
1548 | spin_lock_irq(&opp->lock); | ||
1549 | openpic_set_irq(opp, attr->attr, attr32); | ||
1550 | spin_unlock_irq(&opp->lock); | ||
1551 | return 0; | ||
1552 | } | ||
1553 | |||
1554 | return -ENXIO; | ||
1555 | } | ||
1556 | |||
1557 | static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1558 | { | ||
1559 | struct openpic *opp = dev->private; | ||
1560 | u64 attr64; | ||
1561 | u32 attr32; | ||
1562 | int ret; | ||
1563 | |||
1564 | switch (attr->group) { | ||
1565 | case KVM_DEV_MPIC_GRP_MISC: | ||
1566 | switch (attr->attr) { | ||
1567 | case KVM_DEV_MPIC_BASE_ADDR: | ||
1568 | mutex_lock(&opp->kvm->slots_lock); | ||
1569 | attr64 = opp->reg_base; | ||
1570 | mutex_unlock(&opp->kvm->slots_lock); | ||
1571 | |||
1572 | if (copy_to_user((u64 __user *)(long)attr->addr, | ||
1573 | &attr64, sizeof(u64))) | ||
1574 | return -EFAULT; | ||
1575 | |||
1576 | return 0; | ||
1577 | } | ||
1578 | |||
1579 | break; | ||
1580 | |||
1581 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
1582 | ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); | ||
1583 | if (ret) | ||
1584 | return ret; | ||
1585 | |||
1586 | if (put_user(attr32, (u32 __user *)(long)attr->addr)) | ||
1587 | return -EFAULT; | ||
1588 | |||
1589 | return 0; | ||
1590 | |||
1591 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
1592 | if (attr->attr > MAX_SRC) | ||
1593 | return -EINVAL; | ||
1594 | |||
1595 | spin_lock_irq(&opp->lock); | ||
1596 | attr32 = opp->src[attr->attr].pending; | ||
1597 | spin_unlock_irq(&opp->lock); | ||
1598 | |||
1599 | if (put_user(attr32, (u32 __user *)(long)attr->addr)) | ||
1600 | return -EFAULT; | ||
1601 | |||
1602 | return 0; | ||
1603 | } | ||
1604 | |||
1605 | return -ENXIO; | ||
1606 | } | ||
1607 | |||
1608 | static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | ||
1609 | { | ||
1610 | switch (attr->group) { | ||
1611 | case KVM_DEV_MPIC_GRP_MISC: | ||
1612 | switch (attr->attr) { | ||
1613 | case KVM_DEV_MPIC_BASE_ADDR: | ||
1614 | return 0; | ||
1615 | } | ||
1616 | |||
1617 | break; | ||
1618 | |||
1619 | case KVM_DEV_MPIC_GRP_REGISTER: | ||
1620 | return 0; | ||
1621 | |||
1622 | case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: | ||
1623 | if (attr->attr > MAX_SRC) | ||
1624 | break; | ||
1625 | |||
1626 | return 0; | ||
1627 | } | ||
1628 | |||
1629 | return -ENXIO; | ||
1630 | } | ||
1631 | |||
1632 | static void mpic_destroy(struct kvm_device *dev) | ||
1633 | { | ||
1634 | struct openpic *opp = dev->private; | ||
1635 | |||
1636 | dev->kvm->arch.mpic = NULL; | ||
1637 | kfree(opp); | ||
1638 | } | ||
1639 | |||
1640 | static int mpic_set_default_irq_routing(struct openpic *opp) | ||
1641 | { | ||
1642 | struct kvm_irq_routing_entry *routing; | ||
1643 | |||
1644 | /* Create a nop default map, so that dereferencing it still works */ | ||
1645 | routing = kzalloc((sizeof(*routing)), GFP_KERNEL); | ||
1646 | if (!routing) | ||
1647 | return -ENOMEM; | ||
1648 | |||
1649 | kvm_set_irq_routing(opp->kvm, routing, 0, 0); | ||
1650 | |||
1651 | kfree(routing); | ||
1652 | return 0; | ||
1653 | } | ||
1654 | |||
1655 | static int mpic_create(struct kvm_device *dev, u32 type) | ||
1656 | { | ||
1657 | struct openpic *opp; | ||
1658 | int ret; | ||
1659 | |||
1660 | /* We only support one MPIC at a time for now */ | ||
1661 | if (dev->kvm->arch.mpic) | ||
1662 | return -EINVAL; | ||
1663 | |||
1664 | opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); | ||
1665 | if (!opp) | ||
1666 | return -ENOMEM; | ||
1667 | |||
1668 | dev->private = opp; | ||
1669 | opp->kvm = dev->kvm; | ||
1670 | opp->dev = dev; | ||
1671 | opp->model = type; | ||
1672 | spin_lock_init(&opp->lock); | ||
1673 | |||
1674 | add_mmio_region(opp, &openpic_gbl_mmio); | ||
1675 | add_mmio_region(opp, &openpic_tmr_mmio); | ||
1676 | add_mmio_region(opp, &openpic_src_mmio); | ||
1677 | add_mmio_region(opp, &openpic_cpu_mmio); | ||
1678 | |||
1679 | switch (opp->model) { | ||
1680 | case KVM_DEV_TYPE_FSL_MPIC_20: | ||
1681 | opp->fsl = &fsl_mpic_20; | ||
1682 | opp->brr1 = 0x00400200; | ||
1683 | opp->flags |= OPENPIC_FLAG_IDR_CRIT; | ||
1684 | opp->nb_irqs = 80; | ||
1685 | opp->mpic_mode_mask = GCR_MODE_MIXED; | ||
1686 | |||
1687 | fsl_common_init(opp); | ||
1688 | |||
1689 | break; | ||
1690 | |||
1691 | case KVM_DEV_TYPE_FSL_MPIC_42: | ||
1692 | opp->fsl = &fsl_mpic_42; | ||
1693 | opp->brr1 = 0x00400402; | ||
1694 | opp->flags |= OPENPIC_FLAG_ILR; | ||
1695 | opp->nb_irqs = 196; | ||
1696 | opp->mpic_mode_mask = GCR_MODE_PROXY; | ||
1697 | |||
1698 | fsl_common_init(opp); | ||
1699 | |||
1700 | break; | ||
1701 | |||
1702 | default: | ||
1703 | ret = -ENODEV; | ||
1704 | goto err; | ||
1705 | } | ||
1706 | |||
1707 | ret = mpic_set_default_irq_routing(opp); | ||
1708 | if (ret) | ||
1709 | goto err; | ||
1710 | |||
1711 | openpic_reset(opp); | ||
1712 | |||
1713 | smp_wmb(); | ||
1714 | dev->kvm->arch.mpic = opp; | ||
1715 | |||
1716 | return 0; | ||
1717 | |||
1718 | err: | ||
1719 | kfree(opp); | ||
1720 | return ret; | ||
1721 | } | ||
1722 | |||
1723 | struct kvm_device_ops kvm_mpic_ops = { | ||
1724 | .name = "kvm-mpic", | ||
1725 | .create = mpic_create, | ||
1726 | .destroy = mpic_destroy, | ||
1727 | .set_attr = mpic_set_attr, | ||
1728 | .get_attr = mpic_get_attr, | ||
1729 | .has_attr = mpic_has_attr, | ||
1730 | }; | ||
1731 | |||
1732 | int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, | ||
1733 | u32 cpu) | ||
1734 | { | ||
1735 | struct openpic *opp = dev->private; | ||
1736 | int ret = 0; | ||
1737 | |||
1738 | if (dev->ops != &kvm_mpic_ops) | ||
1739 | return -EPERM; | ||
1740 | if (opp->kvm != vcpu->kvm) | ||
1741 | return -EPERM; | ||
1742 | if (cpu < 0 || cpu >= MAX_CPU) | ||
1743 | return -EPERM; | ||
1744 | |||
1745 | spin_lock_irq(&opp->lock); | ||
1746 | |||
1747 | if (opp->dst[cpu].vcpu) { | ||
1748 | ret = -EEXIST; | ||
1749 | goto out; | ||
1750 | } | ||
1751 | if (vcpu->arch.irq_type) { | ||
1752 | ret = -EBUSY; | ||
1753 | goto out; | ||
1754 | } | ||
1755 | |||
1756 | opp->dst[cpu].vcpu = vcpu; | ||
1757 | opp->nb_cpus = max(opp->nb_cpus, cpu + 1); | ||
1758 | |||
1759 | vcpu->arch.mpic = opp; | ||
1760 | vcpu->arch.irq_cpu_id = cpu; | ||
1761 | vcpu->arch.irq_type = KVMPPC_IRQ_MPIC; | ||
1762 | |||
1763 | /* This might need to be changed if GCR gets extended */ | ||
1764 | if (opp->mpic_mode_mask == GCR_MODE_PROXY) | ||
1765 | vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; | ||
1766 | |||
1767 | out: | ||
1768 | spin_unlock_irq(&opp->lock); | ||
1769 | return ret; | ||
1770 | } | ||
1771 | |||
1772 | /* | ||
1773 | * This should only happen immediately before the mpic is destroyed, | ||
1774 | * so we shouldn't need to worry about anything still trying to | ||
1775 | * access the vcpu pointer. | ||
1776 | */ | ||
1777 | void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) | ||
1778 | { | ||
1779 | BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); | ||
1780 | |||
1781 | opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; | ||
1782 | } | ||
1783 | |||
1784 | /* | ||
1785 | * Return value: | ||
1786 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | ||
1787 | * = 0 Interrupt was coalesced (previous irq is still pending) | ||
1788 | * > 0 Number of CPUs interrupt was delivered to | ||
1789 | */ | ||
1790 | static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e, | ||
1791 | struct kvm *kvm, int irq_source_id, int level, | ||
1792 | bool line_status) | ||
1793 | { | ||
1794 | u32 irq = e->irqchip.pin; | ||
1795 | struct openpic *opp = kvm->arch.mpic; | ||
1796 | unsigned long flags; | ||
1797 | |||
1798 | spin_lock_irqsave(&opp->lock, flags); | ||
1799 | openpic_set_irq(opp, irq, level); | ||
1800 | spin_unlock_irqrestore(&opp->lock, flags); | ||
1801 | |||
1802 | /* All code paths we care about don't check for the return value */ | ||
1803 | return 0; | ||
1804 | } | ||
1805 | |||
1806 | int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | ||
1807 | struct kvm *kvm, int irq_source_id, int level, bool line_status) | ||
1808 | { | ||
1809 | struct openpic *opp = kvm->arch.mpic; | ||
1810 | unsigned long flags; | ||
1811 | |||
1812 | spin_lock_irqsave(&opp->lock, flags); | ||
1813 | |||
1814 | /* | ||
1815 | * XXX We ignore the target address for now, as we only support | ||
1816 | * a single MSI bank. | ||
1817 | */ | ||
1818 | openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data); | ||
1819 | spin_unlock_irqrestore(&opp->lock, flags); | ||
1820 | |||
1821 | /* All code paths we care about don't check for the return value */ | ||
1822 | return 0; | ||
1823 | } | ||
1824 | |||
1825 | int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, | ||
1826 | struct kvm_kernel_irq_routing_entry *e, | ||
1827 | const struct kvm_irq_routing_entry *ue) | ||
1828 | { | ||
1829 | int r = -EINVAL; | ||
1830 | |||
1831 | switch (ue->type) { | ||
1832 | case KVM_IRQ_ROUTING_IRQCHIP: | ||
1833 | e->set = mpic_set_irq; | ||
1834 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | ||
1835 | e->irqchip.pin = ue->u.irqchip.pin; | ||
1836 | if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) | ||
1837 | goto out; | ||
1838 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
1839 | break; | ||
1840 | case KVM_IRQ_ROUTING_MSI: | ||
1841 | e->set = kvm_set_msi; | ||
1842 | e->msi.address_lo = ue->u.msi.address_lo; | ||
1843 | e->msi.address_hi = ue->u.msi.address_hi; | ||
1844 | e->msi.data = ue->u.msi.data; | ||
1845 | break; | ||
1846 | default: | ||
1847 | goto out; | ||
1848 | } | ||
1849 | |||
1850 | r = 0; | ||
1851 | out: | ||
1852 | return r; | ||
1853 | } | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 934413cd3a1b..6316ee336e88 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/hrtimer.h> | 25 | #include <linux/hrtimer.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
28 | #include <linux/file.h> | ||
28 | #include <asm/cputable.h> | 29 | #include <asm/cputable.h> |
29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
30 | #include <asm/kvm_ppc.h> | 31 | #include <asm/kvm_ppc.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include <asm/cputhreads.h> | 33 | #include <asm/cputhreads.h> |
33 | #include <asm/irqflags.h> | 34 | #include <asm/irqflags.h> |
34 | #include "timing.h" | 35 | #include "timing.h" |
36 | #include "irq.h" | ||
35 | #include "../mm/mmu_decl.h" | 37 | #include "../mm/mmu_decl.h" |
36 | 38 | ||
37 | #define CREATE_TRACE_POINTS | 39 | #define CREATE_TRACE_POINTS |
@@ -317,6 +319,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
317 | case KVM_CAP_ENABLE_CAP: | 319 | case KVM_CAP_ENABLE_CAP: |
318 | case KVM_CAP_ONE_REG: | 320 | case KVM_CAP_ONE_REG: |
319 | case KVM_CAP_IOEVENTFD: | 321 | case KVM_CAP_IOEVENTFD: |
322 | case KVM_CAP_DEVICE_CTRL: | ||
320 | r = 1; | 323 | r = 1; |
321 | break; | 324 | break; |
322 | #ifndef CONFIG_KVM_BOOK3S_64_HV | 325 | #ifndef CONFIG_KVM_BOOK3S_64_HV |
@@ -326,6 +329,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
326 | #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) | 329 | #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) |
327 | case KVM_CAP_SW_TLB: | 330 | case KVM_CAP_SW_TLB: |
328 | #endif | 331 | #endif |
332 | #ifdef CONFIG_KVM_MPIC | ||
333 | case KVM_CAP_IRQ_MPIC: | ||
334 | #endif | ||
329 | r = 1; | 335 | r = 1; |
330 | break; | 336 | break; |
331 | case KVM_CAP_COALESCED_MMIO: | 337 | case KVM_CAP_COALESCED_MMIO: |
@@ -335,6 +341,10 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
335 | #ifdef CONFIG_PPC_BOOK3S_64 | 341 | #ifdef CONFIG_PPC_BOOK3S_64 |
336 | case KVM_CAP_SPAPR_TCE: | 342 | case KVM_CAP_SPAPR_TCE: |
337 | case KVM_CAP_PPC_ALLOC_HTAB: | 343 | case KVM_CAP_PPC_ALLOC_HTAB: |
344 | case KVM_CAP_PPC_RTAS: | ||
345 | #ifdef CONFIG_KVM_XICS | ||
346 | case KVM_CAP_IRQ_XICS: | ||
347 | #endif | ||
338 | r = 1; | 348 | r = 1; |
339 | break; | 349 | break; |
340 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 350 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
@@ -411,18 +421,17 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
411 | } | 421 | } |
412 | 422 | ||
413 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 423 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
414 | struct kvm_memory_slot *memslot, | 424 | struct kvm_memory_slot *memslot, |
415 | struct kvm_memory_slot old, | 425 | struct kvm_userspace_memory_region *mem, |
416 | struct kvm_userspace_memory_region *mem, | 426 | enum kvm_mr_change change) |
417 | bool user_alloc) | ||
418 | { | 427 | { |
419 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); | 428 | return kvmppc_core_prepare_memory_region(kvm, memslot, mem); |
420 | } | 429 | } |
421 | 430 | ||
422 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 431 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
423 | struct kvm_userspace_memory_region *mem, | 432 | struct kvm_userspace_memory_region *mem, |
424 | struct kvm_memory_slot old, | 433 | const struct kvm_memory_slot *old, |
425 | bool user_alloc) | 434 | enum kvm_mr_change change) |
426 | { | 435 | { |
427 | kvmppc_core_commit_memory_region(kvm, mem, old); | 436 | kvmppc_core_commit_memory_region(kvm, mem, old); |
428 | } | 437 | } |
@@ -460,6 +469,16 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
460 | tasklet_kill(&vcpu->arch.tasklet); | 469 | tasklet_kill(&vcpu->arch.tasklet); |
461 | 470 | ||
462 | kvmppc_remove_vcpu_debugfs(vcpu); | 471 | kvmppc_remove_vcpu_debugfs(vcpu); |
472 | |||
473 | switch (vcpu->arch.irq_type) { | ||
474 | case KVMPPC_IRQ_MPIC: | ||
475 | kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); | ||
476 | break; | ||
477 | case KVMPPC_IRQ_XICS: | ||
478 | kvmppc_xics_free_icp(vcpu); | ||
479 | break; | ||
480 | } | ||
481 | |||
463 | kvmppc_core_vcpu_free(vcpu); | 482 | kvmppc_core_vcpu_free(vcpu); |
464 | } | 483 | } |
465 | 484 | ||
@@ -532,12 +551,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
532 | #endif | 551 | #endif |
533 | } | 552 | } |
534 | 553 | ||
535 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | ||
536 | struct kvm_guest_debug *dbg) | ||
537 | { | ||
538 | return -EINVAL; | ||
539 | } | ||
540 | |||
541 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | 554 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, |
542 | struct kvm_run *run) | 555 | struct kvm_run *run) |
543 | { | 556 | { |
@@ -612,6 +625,8 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | |||
612 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 625 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
613 | unsigned int rt, unsigned int bytes, int is_bigendian) | 626 | unsigned int rt, unsigned int bytes, int is_bigendian) |
614 | { | 627 | { |
628 | int idx, ret; | ||
629 | |||
615 | if (bytes > sizeof(run->mmio.data)) { | 630 | if (bytes > sizeof(run->mmio.data)) { |
616 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | 631 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, |
617 | run->mmio.len); | 632 | run->mmio.len); |
@@ -627,8 +642,14 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
627 | vcpu->mmio_is_write = 0; | 642 | vcpu->mmio_is_write = 0; |
628 | vcpu->arch.mmio_sign_extend = 0; | 643 | vcpu->arch.mmio_sign_extend = 0; |
629 | 644 | ||
630 | if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | 645 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
631 | bytes, &run->mmio.data)) { | 646 | |
647 | ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | ||
648 | bytes, &run->mmio.data); | ||
649 | |||
650 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
651 | |||
652 | if (!ret) { | ||
632 | kvmppc_complete_mmio_load(vcpu, run); | 653 | kvmppc_complete_mmio_load(vcpu, run); |
633 | vcpu->mmio_needed = 0; | 654 | vcpu->mmio_needed = 0; |
634 | return EMULATE_DONE; | 655 | return EMULATE_DONE; |
@@ -653,6 +674,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
653 | u64 val, unsigned int bytes, int is_bigendian) | 674 | u64 val, unsigned int bytes, int is_bigendian) |
654 | { | 675 | { |
655 | void *data = run->mmio.data; | 676 | void *data = run->mmio.data; |
677 | int idx, ret; | ||
656 | 678 | ||
657 | if (bytes > sizeof(run->mmio.data)) { | 679 | if (bytes > sizeof(run->mmio.data)) { |
658 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, | 680 | printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, |
@@ -682,9 +704,14 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
682 | } | 704 | } |
683 | } | 705 | } |
684 | 706 | ||
685 | if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, | 707 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
686 | bytes, &run->mmio.data)) { | 708 | |
687 | kvmppc_complete_mmio_load(vcpu, run); | 709 | ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, |
710 | bytes, &run->mmio.data); | ||
711 | |||
712 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
713 | |||
714 | if (!ret) { | ||
688 | vcpu->mmio_needed = 0; | 715 | vcpu->mmio_needed = 0; |
689 | return EMULATE_DONE; | 716 | return EMULATE_DONE; |
690 | } | 717 | } |
@@ -740,7 +767,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |||
740 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) | 767 | int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) |
741 | { | 768 | { |
742 | if (irq->irq == KVM_INTERRUPT_UNSET) { | 769 | if (irq->irq == KVM_INTERRUPT_UNSET) { |
743 | kvmppc_core_dequeue_external(vcpu, irq); | 770 | kvmppc_core_dequeue_external(vcpu); |
744 | return 0; | 771 | return 0; |
745 | } | 772 | } |
746 | 773 | ||
@@ -770,7 +797,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
770 | break; | 797 | break; |
771 | case KVM_CAP_PPC_EPR: | 798 | case KVM_CAP_PPC_EPR: |
772 | r = 0; | 799 | r = 0; |
773 | vcpu->arch.epr_enabled = cap->args[0]; | 800 | if (cap->args[0]) |
801 | vcpu->arch.epr_flags |= KVMPPC_EPR_USER; | ||
802 | else | ||
803 | vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; | ||
774 | break; | 804 | break; |
775 | #ifdef CONFIG_BOOKE | 805 | #ifdef CONFIG_BOOKE |
776 | case KVM_CAP_PPC_BOOKE_WATCHDOG: | 806 | case KVM_CAP_PPC_BOOKE_WATCHDOG: |
@@ -791,6 +821,44 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, | |||
791 | break; | 821 | break; |
792 | } | 822 | } |
793 | #endif | 823 | #endif |
824 | #ifdef CONFIG_KVM_MPIC | ||
825 | case KVM_CAP_IRQ_MPIC: { | ||
826 | struct file *filp; | ||
827 | struct kvm_device *dev; | ||
828 | |||
829 | r = -EBADF; | ||
830 | filp = fget(cap->args[0]); | ||
831 | if (!filp) | ||
832 | break; | ||
833 | |||
834 | r = -EPERM; | ||
835 | dev = kvm_device_from_filp(filp); | ||
836 | if (dev) | ||
837 | r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]); | ||
838 | |||
839 | fput(filp); | ||
840 | break; | ||
841 | } | ||
842 | #endif | ||
843 | #ifdef CONFIG_KVM_XICS | ||
844 | case KVM_CAP_IRQ_XICS: { | ||
845 | struct file *filp; | ||
846 | struct kvm_device *dev; | ||
847 | |||
848 | r = -EBADF; | ||
849 | filp = fget(cap->args[0]); | ||
850 | if (!filp) | ||
851 | break; | ||
852 | |||
853 | r = -EPERM; | ||
854 | dev = kvm_device_from_filp(filp); | ||
855 | if (dev) | ||
856 | r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); | ||
857 | |||
858 | fput(filp); | ||
859 | break; | ||
860 | } | ||
861 | #endif /* CONFIG_KVM_XICS */ | ||
794 | default: | 862 | default: |
795 | r = -EINVAL; | 863 | r = -EINVAL; |
796 | break; | 864 | break; |
@@ -913,9 +981,22 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) | |||
913 | return 0; | 981 | return 0; |
914 | } | 982 | } |
915 | 983 | ||
984 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, | ||
985 | bool line_status) | ||
986 | { | ||
987 | if (!irqchip_in_kernel(kvm)) | ||
988 | return -ENXIO; | ||
989 | |||
990 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | ||
991 | irq_event->irq, irq_event->level, | ||
992 | line_status); | ||
993 | return 0; | ||
994 | } | ||
995 | |||
916 | long kvm_arch_vm_ioctl(struct file *filp, | 996 | long kvm_arch_vm_ioctl(struct file *filp, |
917 | unsigned int ioctl, unsigned long arg) | 997 | unsigned int ioctl, unsigned long arg) |
918 | { | 998 | { |
999 | struct kvm *kvm __maybe_unused = filp->private_data; | ||
919 | void __user *argp = (void __user *)arg; | 1000 | void __user *argp = (void __user *)arg; |
920 | long r; | 1001 | long r; |
921 | 1002 | ||
@@ -934,7 +1015,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
934 | #ifdef CONFIG_PPC_BOOK3S_64 | 1015 | #ifdef CONFIG_PPC_BOOK3S_64 |
935 | case KVM_CREATE_SPAPR_TCE: { | 1016 | case KVM_CREATE_SPAPR_TCE: { |
936 | struct kvm_create_spapr_tce create_tce; | 1017 | struct kvm_create_spapr_tce create_tce; |
937 | struct kvm *kvm = filp->private_data; | ||
938 | 1018 | ||
939 | r = -EFAULT; | 1019 | r = -EFAULT; |
940 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) | 1020 | if (copy_from_user(&create_tce, argp, sizeof(create_tce))) |
@@ -946,8 +1026,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
946 | 1026 | ||
947 | #ifdef CONFIG_KVM_BOOK3S_64_HV | 1027 | #ifdef CONFIG_KVM_BOOK3S_64_HV |
948 | case KVM_ALLOCATE_RMA: { | 1028 | case KVM_ALLOCATE_RMA: { |
949 | struct kvm *kvm = filp->private_data; | ||
950 | struct kvm_allocate_rma rma; | 1029 | struct kvm_allocate_rma rma; |
1030 | struct kvm *kvm = filp->private_data; | ||
951 | 1031 | ||
952 | r = kvm_vm_ioctl_allocate_rma(kvm, &rma); | 1032 | r = kvm_vm_ioctl_allocate_rma(kvm, &rma); |
953 | if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) | 1033 | if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma))) |
@@ -956,7 +1036,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
956 | } | 1036 | } |
957 | 1037 | ||
958 | case KVM_PPC_ALLOCATE_HTAB: { | 1038 | case KVM_PPC_ALLOCATE_HTAB: { |
959 | struct kvm *kvm = filp->private_data; | ||
960 | u32 htab_order; | 1039 | u32 htab_order; |
961 | 1040 | ||
962 | r = -EFAULT; | 1041 | r = -EFAULT; |
@@ -973,7 +1052,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
973 | } | 1052 | } |
974 | 1053 | ||
975 | case KVM_PPC_GET_HTAB_FD: { | 1054 | case KVM_PPC_GET_HTAB_FD: { |
976 | struct kvm *kvm = filp->private_data; | ||
977 | struct kvm_get_htab_fd ghf; | 1055 | struct kvm_get_htab_fd ghf; |
978 | 1056 | ||
979 | r = -EFAULT; | 1057 | r = -EFAULT; |
@@ -986,7 +1064,6 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
986 | 1064 | ||
987 | #ifdef CONFIG_PPC_BOOK3S_64 | 1065 | #ifdef CONFIG_PPC_BOOK3S_64 |
988 | case KVM_PPC_GET_SMMU_INFO: { | 1066 | case KVM_PPC_GET_SMMU_INFO: { |
989 | struct kvm *kvm = filp->private_data; | ||
990 | struct kvm_ppc_smmu_info info; | 1067 | struct kvm_ppc_smmu_info info; |
991 | 1068 | ||
992 | memset(&info, 0, sizeof(info)); | 1069 | memset(&info, 0, sizeof(info)); |
@@ -995,6 +1072,12 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
995 | r = -EFAULT; | 1072 | r = -EFAULT; |
996 | break; | 1073 | break; |
997 | } | 1074 | } |
1075 | case KVM_PPC_RTAS_DEFINE_TOKEN: { | ||
1076 | struct kvm *kvm = filp->private_data; | ||
1077 | |||
1078 | r = kvm_vm_ioctl_rtas_define_token(kvm, argp); | ||
1079 | break; | ||
1080 | } | ||
998 | #endif /* CONFIG_PPC_BOOK3S_64 */ | 1081 | #endif /* CONFIG_PPC_BOOK3S_64 */ |
999 | default: | 1082 | default: |
1000 | r = -ENOTTY; | 1083 | r = -ENOTTY; |
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 89db29d17c25..7cd728b3b5e4 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c | |||
@@ -51,6 +51,12 @@ static struct icp_ipl __iomem *icp_native_regs[NR_CPUS]; | |||
51 | static inline unsigned int icp_native_get_xirr(void) | 51 | static inline unsigned int icp_native_get_xirr(void) |
52 | { | 52 | { |
53 | int cpu = smp_processor_id(); | 53 | int cpu = smp_processor_id(); |
54 | unsigned int xirr; | ||
55 | |||
56 | /* Handled an interrupt latched by KVM */ | ||
57 | xirr = kvmppc_get_xics_latch(); | ||
58 | if (xirr) | ||
59 | return xirr; | ||
54 | 60 | ||
55 | return in_be32(&icp_native_regs[cpu]->xirr.word); | 61 | return in_be32(&icp_native_regs[cpu]->xirr.word); |
56 | } | 62 | } |
@@ -138,6 +144,7 @@ static unsigned int icp_native_get_irq(void) | |||
138 | 144 | ||
139 | static void icp_native_cause_ipi(int cpu, unsigned long data) | 145 | static void icp_native_cause_ipi(int cpu, unsigned long data) |
140 | { | 146 | { |
147 | kvmppc_set_host_ipi(cpu, 1); | ||
141 | icp_native_set_qirr(cpu, IPI_PRIORITY); | 148 | icp_native_set_qirr(cpu, IPI_PRIORITY); |
142 | } | 149 | } |
143 | 150 | ||
@@ -151,6 +158,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id) | |||
151 | { | 158 | { |
152 | int cpu = smp_processor_id(); | 159 | int cpu = smp_processor_id(); |
153 | 160 | ||
161 | kvmppc_set_host_ipi(cpu, 0); | ||
154 | icp_native_set_qirr(cpu, 0xff); | 162 | icp_native_set_qirr(cpu, 0xff); |
155 | 163 | ||
156 | return smp_ipi_demux(); | 164 | return smp_ipi_demux(); |
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 7bf68fff7c5d..9ccd1905bdad 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild | |||
@@ -44,5 +44,6 @@ header-y += termios.h | |||
44 | header-y += types.h | 44 | header-y += types.h |
45 | header-y += ucontext.h | 45 | header-y += ucontext.h |
46 | header-y += unistd.h | 46 | header-y += unistd.h |
47 | header-y += virtio-ccw.h | ||
47 | header-y += vtoc.h | 48 | header-y += vtoc.h |
48 | header-y += zcrypt.h | 49 | header-y += zcrypt.h |
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h new file mode 100644 index 000000000000..a9a4ebf79fa7 --- /dev/null +++ b/arch/s390/include/uapi/asm/virtio-ccw.h | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * Definitions for virtio-ccw devices. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2013 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2 only) | ||
8 | * as published by the Free Software Foundation. | ||
9 | * | ||
10 | * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> | ||
11 | */ | ||
12 | #ifndef __KVM_VIRTIO_CCW_H | ||
13 | #define __KVM_VIRTIO_CCW_H | ||
14 | |||
15 | /* Alignment of vring buffers. */ | ||
16 | #define KVM_VIRTIO_CCW_RING_ALIGN 4096 | ||
17 | |||
18 | /* Subcode for diagnose 500 (virtio hypercall). */ | ||
19 | #define KVM_S390_VIRTIO_CCW_NOTIFY 3 | ||
20 | |||
21 | #endif | ||
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 60f9f8ae0fc8..70b46eacf8e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
@@ -22,6 +22,7 @@ config KVM | |||
22 | select PREEMPT_NOTIFIERS | 22 | select PREEMPT_NOTIFIERS |
23 | select ANON_INODES | 23 | select ANON_INODES |
24 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 24 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
25 | select HAVE_KVM_EVENTFD | ||
25 | ---help--- | 26 | ---help--- |
26 | Support hosting paravirtualized guest machines using the SIE | 27 | Support hosting paravirtualized guest machines using the SIE |
27 | virtualization capability on the mainframe. This should work | 28 | virtualization capability on the mainframe. This should work |
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 3975722bb19d..8fe9d65a4585 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile | |||
@@ -6,7 +6,7 @@ | |||
6 | # it under the terms of the GNU General Public License (version 2 only) | 6 | # it under the terms of the GNU General Public License (version 2 only) |
7 | # as published by the Free Software Foundation. | 7 | # as published by the Free Software Foundation. |
8 | 8 | ||
9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) | 9 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o) |
10 | 10 | ||
11 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm | 11 | ccflags-y := -Ivirt/kvm -Iarch/s390/kvm |
12 | 12 | ||
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a390687feb13..1c01a9912989 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #include <linux/kvm.h> | 14 | #include <linux/kvm.h> |
15 | #include <linux/kvm_host.h> | 15 | #include <linux/kvm_host.h> |
16 | #include <asm/virtio-ccw.h> | ||
16 | #include "kvm-s390.h" | 17 | #include "kvm-s390.h" |
17 | #include "trace.h" | 18 | #include "trace.h" |
18 | #include "trace-s390.h" | 19 | #include "trace-s390.h" |
@@ -104,6 +105,29 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) | |||
104 | return -EREMOTE; | 105 | return -EREMOTE; |
105 | } | 106 | } |
106 | 107 | ||
108 | static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) | ||
109 | { | ||
110 | int ret, idx; | ||
111 | |||
112 | /* No virtio-ccw notification? Get out quickly. */ | ||
113 | if (!vcpu->kvm->arch.css_support || | ||
114 | (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) | ||
115 | return -EOPNOTSUPP; | ||
116 | |||
117 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
118 | /* | ||
119 | * The layout is as follows: | ||
120 | * - gpr 2 contains the subchannel id (passed as addr) | ||
121 | * - gpr 3 contains the virtqueue index (passed as datamatch) | ||
122 | */ | ||
123 | ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, | ||
124 | vcpu->run->s.regs.gprs[2], | ||
125 | 8, &vcpu->run->s.regs.gprs[3]); | ||
126 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
127 | /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ | ||
128 | return ret < 0 ? ret : 0; | ||
129 | } | ||
130 | |||
107 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | 131 | int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) |
108 | { | 132 | { |
109 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; | 133 | int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; |
@@ -118,6 +142,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) | |||
118 | return __diag_time_slice_end_directed(vcpu); | 142 | return __diag_time_slice_end_directed(vcpu); |
119 | case 0x308: | 143 | case 0x308: |
120 | return __diag_ipl_functions(vcpu); | 144 | return __diag_ipl_functions(vcpu); |
145 | case 0x500: | ||
146 | return __diag_virtio_hypercall(vcpu); | ||
121 | default: | 147 | default: |
122 | return -EOPNOTSUPP; | 148 | return -EOPNOTSUPP; |
123 | } | 149 | } |
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4703f129e95e..302e0e52b009 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h | |||
@@ -18,369 +18,86 @@ | |||
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include "kvm-s390.h" | 19 | #include "kvm-s390.h" |
20 | 20 | ||
21 | static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, | 21 | static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu, |
22 | unsigned long guestaddr) | 22 | void __user *gptr, |
23 | int prefixing) | ||
23 | { | 24 | { |
24 | unsigned long prefix = vcpu->arch.sie_block->prefix; | 25 | unsigned long prefix = vcpu->arch.sie_block->prefix; |
25 | 26 | unsigned long gaddr = (unsigned long) gptr; | |
26 | if (guestaddr < 2 * PAGE_SIZE) | 27 | unsigned long uaddr; |
27 | guestaddr += prefix; | 28 | |
28 | else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) | 29 | if (prefixing) { |
29 | guestaddr -= prefix; | 30 | if (gaddr < 2 * PAGE_SIZE) |
30 | 31 | gaddr += prefix; | |
31 | return (void __user *) gmap_fault(guestaddr, vcpu->arch.gmap); | 32 | else if ((gaddr >= prefix) && (gaddr < prefix + 2 * PAGE_SIZE)) |
32 | } | 33 | gaddr -= prefix; |
33 | |||
34 | static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
35 | u64 *result) | ||
36 | { | ||
37 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
38 | |||
39 | BUG_ON(guestaddr & 7); | ||
40 | |||
41 | if (IS_ERR((void __force *) uptr)) | ||
42 | return PTR_ERR((void __force *) uptr); | ||
43 | |||
44 | return get_user(*result, (unsigned long __user *) uptr); | ||
45 | } | ||
46 | |||
47 | static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
48 | u32 *result) | ||
49 | { | ||
50 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
51 | |||
52 | BUG_ON(guestaddr & 3); | ||
53 | |||
54 | if (IS_ERR((void __force *) uptr)) | ||
55 | return PTR_ERR((void __force *) uptr); | ||
56 | |||
57 | return get_user(*result, (u32 __user *) uptr); | ||
58 | } | ||
59 | |||
60 | static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
61 | u16 *result) | ||
62 | { | ||
63 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
64 | |||
65 | BUG_ON(guestaddr & 1); | ||
66 | |||
67 | if (IS_ERR(uptr)) | ||
68 | return PTR_ERR(uptr); | ||
69 | |||
70 | return get_user(*result, (u16 __user *) uptr); | ||
71 | } | ||
72 | |||
73 | static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
74 | u8 *result) | ||
75 | { | ||
76 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
77 | |||
78 | if (IS_ERR((void __force *) uptr)) | ||
79 | return PTR_ERR((void __force *) uptr); | ||
80 | |||
81 | return get_user(*result, (u8 __user *) uptr); | ||
82 | } | ||
83 | |||
84 | static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
85 | u64 value) | ||
86 | { | ||
87 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
88 | |||
89 | BUG_ON(guestaddr & 7); | ||
90 | |||
91 | if (IS_ERR((void __force *) uptr)) | ||
92 | return PTR_ERR((void __force *) uptr); | ||
93 | |||
94 | return put_user(value, (u64 __user *) uptr); | ||
95 | } | ||
96 | |||
97 | static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
98 | u32 value) | ||
99 | { | ||
100 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
101 | |||
102 | BUG_ON(guestaddr & 3); | ||
103 | |||
104 | if (IS_ERR((void __force *) uptr)) | ||
105 | return PTR_ERR((void __force *) uptr); | ||
106 | |||
107 | return put_user(value, (u32 __user *) uptr); | ||
108 | } | ||
109 | |||
110 | static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
111 | u16 value) | ||
112 | { | ||
113 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
114 | |||
115 | BUG_ON(guestaddr & 1); | ||
116 | |||
117 | if (IS_ERR((void __force *) uptr)) | ||
118 | return PTR_ERR((void __force *) uptr); | ||
119 | |||
120 | return put_user(value, (u16 __user *) uptr); | ||
121 | } | ||
122 | |||
123 | static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, | ||
124 | u8 value) | ||
125 | { | ||
126 | void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); | ||
127 | |||
128 | if (IS_ERR((void __force *) uptr)) | ||
129 | return PTR_ERR((void __force *) uptr); | ||
130 | |||
131 | return put_user(value, (u8 __user *) uptr); | ||
132 | } | ||
133 | |||
134 | |||
135 | static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, | ||
136 | unsigned long guestdest, | ||
137 | void *from, unsigned long n) | ||
138 | { | ||
139 | int rc; | ||
140 | unsigned long i; | ||
141 | u8 *data = from; | ||
142 | |||
143 | for (i = 0; i < n; i++) { | ||
144 | rc = put_guest_u8(vcpu, guestdest++, *(data++)); | ||
145 | if (rc < 0) | ||
146 | return rc; | ||
147 | } | 34 | } |
148 | return 0; | 35 | uaddr = gmap_fault(gaddr, vcpu->arch.gmap); |
149 | } | 36 | if (IS_ERR_VALUE(uaddr)) |
150 | 37 | uaddr = -EFAULT; | |
151 | static inline int __copy_to_guest_fast(struct kvm_vcpu *vcpu, | 38 | return (void __user *)uaddr; |
152 | unsigned long guestdest, | 39 | } |
153 | void *from, unsigned long n) | 40 | |
154 | { | 41 | #define get_guest(vcpu, x, gptr) \ |
155 | int r; | 42 | ({ \ |
43 | __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ | ||
44 | int __mask = sizeof(__typeof__(*(gptr))) - 1; \ | ||
45 | int __ret = PTR_RET((void __force *)__uptr); \ | ||
46 | \ | ||
47 | if (!__ret) { \ | ||
48 | BUG_ON((unsigned long)__uptr & __mask); \ | ||
49 | __ret = get_user(x, __uptr); \ | ||
50 | } \ | ||
51 | __ret; \ | ||
52 | }) | ||
53 | |||
54 | #define put_guest(vcpu, x, gptr) \ | ||
55 | ({ \ | ||
56 | __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ | ||
57 | int __mask = sizeof(__typeof__(*(gptr))) - 1; \ | ||
58 | int __ret = PTR_RET((void __force *)__uptr); \ | ||
59 | \ | ||
60 | if (!__ret) { \ | ||
61 | BUG_ON((unsigned long)__uptr & __mask); \ | ||
62 | __ret = put_user(x, __uptr); \ | ||
63 | } \ | ||
64 | __ret; \ | ||
65 | }) | ||
66 | |||
67 | static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to, | ||
68 | unsigned long from, unsigned long len, | ||
69 | int to_guest, int prefixing) | ||
70 | { | ||
71 | unsigned long _len, rc; | ||
156 | void __user *uptr; | 72 | void __user *uptr; |
157 | unsigned long size; | ||
158 | |||
159 | if (guestdest + n < guestdest) | ||
160 | return -EFAULT; | ||
161 | |||
162 | /* simple case: all within one segment table entry? */ | ||
163 | if ((guestdest & PMD_MASK) == ((guestdest+n) & PMD_MASK)) { | ||
164 | uptr = (void __user *) gmap_fault(guestdest, vcpu->arch.gmap); | ||
165 | |||
166 | if (IS_ERR((void __force *) uptr)) | ||
167 | return PTR_ERR((void __force *) uptr); | ||
168 | |||
169 | r = copy_to_user(uptr, from, n); | ||
170 | |||
171 | if (r) | ||
172 | r = -EFAULT; | ||
173 | |||
174 | goto out; | ||
175 | } | ||
176 | |||
177 | /* copy first segment */ | ||
178 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | ||
179 | |||
180 | if (IS_ERR((void __force *) uptr)) | ||
181 | return PTR_ERR((void __force *) uptr); | ||
182 | 73 | ||
183 | size = PMD_SIZE - (guestdest & ~PMD_MASK); | 74 | while (len) { |
184 | 75 | uptr = to_guest ? (void __user *)to : (void __user *)from; | |
185 | r = copy_to_user(uptr, from, size); | 76 | uptr = __gptr_to_uptr(vcpu, uptr, prefixing); |
186 | 77 | if (IS_ERR((void __force *)uptr)) | |
187 | if (r) { | 78 | return -EFAULT; |
188 | r = -EFAULT; | 79 | _len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1)); |
189 | goto out; | 80 | _len = min(_len, len); |
190 | } | 81 | if (to_guest) |
191 | from += size; | 82 | rc = copy_to_user((void __user *) uptr, (void *)from, _len); |
192 | n -= size; | 83 | else |
193 | guestdest += size; | 84 | rc = copy_from_user((void *)to, (void __user *)uptr, _len); |
194 | 85 | if (rc) | |
195 | /* copy full segments */ | 86 | return -EFAULT; |
196 | while (n >= PMD_SIZE) { | 87 | len -= _len; |
197 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | 88 | from += _len; |
198 | 89 | to += _len; | |
199 | if (IS_ERR((void __force *) uptr)) | ||
200 | return PTR_ERR((void __force *) uptr); | ||
201 | |||
202 | r = copy_to_user(uptr, from, PMD_SIZE); | ||
203 | |||
204 | if (r) { | ||
205 | r = -EFAULT; | ||
206 | goto out; | ||
207 | } | ||
208 | from += PMD_SIZE; | ||
209 | n -= PMD_SIZE; | ||
210 | guestdest += PMD_SIZE; | ||
211 | } | ||
212 | |||
213 | /* copy the tail segment */ | ||
214 | if (n) { | ||
215 | uptr = (void __user *)gmap_fault(guestdest, vcpu->arch.gmap); | ||
216 | |||
217 | if (IS_ERR((void __force *) uptr)) | ||
218 | return PTR_ERR((void __force *) uptr); | ||
219 | |||
220 | r = copy_to_user(uptr, from, n); | ||
221 | |||
222 | if (r) | ||
223 | r = -EFAULT; | ||
224 | } | ||
225 | out: | ||
226 | return r; | ||
227 | } | ||
228 | |||
229 | static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, | ||
230 | unsigned long guestdest, | ||
231 | void *from, unsigned long n) | ||
232 | { | ||
233 | return __copy_to_guest_fast(vcpu, guestdest, from, n); | ||
234 | } | ||
235 | |||
236 | static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, | ||
237 | void *from, unsigned long n) | ||
238 | { | ||
239 | unsigned long prefix = vcpu->arch.sie_block->prefix; | ||
240 | |||
241 | if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) | ||
242 | goto slowpath; | ||
243 | |||
244 | if ((guestdest < prefix) && (guestdest + n > prefix)) | ||
245 | goto slowpath; | ||
246 | |||
247 | if ((guestdest < prefix + 2 * PAGE_SIZE) | ||
248 | && (guestdest + n > prefix + 2 * PAGE_SIZE)) | ||
249 | goto slowpath; | ||
250 | |||
251 | if (guestdest < 2 * PAGE_SIZE) | ||
252 | guestdest += prefix; | ||
253 | else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) | ||
254 | guestdest -= prefix; | ||
255 | |||
256 | return __copy_to_guest_fast(vcpu, guestdest, from, n); | ||
257 | slowpath: | ||
258 | return __copy_to_guest_slow(vcpu, guestdest, from, n); | ||
259 | } | ||
260 | |||
261 | static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, | ||
262 | unsigned long guestsrc, | ||
263 | unsigned long n) | ||
264 | { | ||
265 | int rc; | ||
266 | unsigned long i; | ||
267 | u8 *data = to; | ||
268 | |||
269 | for (i = 0; i < n; i++) { | ||
270 | rc = get_guest_u8(vcpu, guestsrc++, data++); | ||
271 | if (rc < 0) | ||
272 | return rc; | ||
273 | } | 90 | } |
274 | return 0; | 91 | return 0; |
275 | } | 92 | } |
276 | 93 | ||
277 | static inline int __copy_from_guest_fast(struct kvm_vcpu *vcpu, void *to, | 94 | #define copy_to_guest(vcpu, to, from, size) \ |
278 | unsigned long guestsrc, | 95 | __copy_guest(vcpu, to, (unsigned long)from, size, 1, 1) |
279 | unsigned long n) | 96 | #define copy_from_guest(vcpu, to, from, size) \ |
280 | { | 97 | __copy_guest(vcpu, (unsigned long)to, from, size, 0, 1) |
281 | int r; | 98 | #define copy_to_guest_absolute(vcpu, to, from, size) \ |
282 | void __user *uptr; | 99 | __copy_guest(vcpu, to, (unsigned long)from, size, 1, 0) |
283 | unsigned long size; | 100 | #define copy_from_guest_absolute(vcpu, to, from, size) \ |
284 | 101 | __copy_guest(vcpu, (unsigned long)to, from, size, 0, 0) | |
285 | if (guestsrc + n < guestsrc) | ||
286 | return -EFAULT; | ||
287 | |||
288 | /* simple case: all within one segment table entry? */ | ||
289 | if ((guestsrc & PMD_MASK) == ((guestsrc+n) & PMD_MASK)) { | ||
290 | uptr = (void __user *) gmap_fault(guestsrc, vcpu->arch.gmap); | ||
291 | |||
292 | if (IS_ERR((void __force *) uptr)) | ||
293 | return PTR_ERR((void __force *) uptr); | ||
294 | |||
295 | r = copy_from_user(to, uptr, n); | ||
296 | |||
297 | if (r) | ||
298 | r = -EFAULT; | ||
299 | |||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | /* copy first segment */ | ||
304 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
305 | |||
306 | if (IS_ERR((void __force *) uptr)) | ||
307 | return PTR_ERR((void __force *) uptr); | ||
308 | |||
309 | size = PMD_SIZE - (guestsrc & ~PMD_MASK); | ||
310 | |||
311 | r = copy_from_user(to, uptr, size); | ||
312 | |||
313 | if (r) { | ||
314 | r = -EFAULT; | ||
315 | goto out; | ||
316 | } | ||
317 | to += size; | ||
318 | n -= size; | ||
319 | guestsrc += size; | ||
320 | |||
321 | /* copy full segments */ | ||
322 | while (n >= PMD_SIZE) { | ||
323 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
324 | |||
325 | if (IS_ERR((void __force *) uptr)) | ||
326 | return PTR_ERR((void __force *) uptr); | ||
327 | |||
328 | r = copy_from_user(to, uptr, PMD_SIZE); | ||
329 | |||
330 | if (r) { | ||
331 | r = -EFAULT; | ||
332 | goto out; | ||
333 | } | ||
334 | to += PMD_SIZE; | ||
335 | n -= PMD_SIZE; | ||
336 | guestsrc += PMD_SIZE; | ||
337 | } | ||
338 | |||
339 | /* copy the tail segment */ | ||
340 | if (n) { | ||
341 | uptr = (void __user *)gmap_fault(guestsrc, vcpu->arch.gmap); | ||
342 | |||
343 | if (IS_ERR((void __force *) uptr)) | ||
344 | return PTR_ERR((void __force *) uptr); | ||
345 | |||
346 | r = copy_from_user(to, uptr, n); | ||
347 | |||
348 | if (r) | ||
349 | r = -EFAULT; | ||
350 | } | ||
351 | out: | ||
352 | return r; | ||
353 | } | ||
354 | |||
355 | static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, | ||
356 | unsigned long guestsrc, | ||
357 | unsigned long n) | ||
358 | { | ||
359 | return __copy_from_guest_fast(vcpu, to, guestsrc, n); | ||
360 | } | ||
361 | |||
362 | static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, | ||
363 | unsigned long guestsrc, unsigned long n) | ||
364 | { | ||
365 | unsigned long prefix = vcpu->arch.sie_block->prefix; | ||
366 | |||
367 | if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) | ||
368 | goto slowpath; | ||
369 | 102 | ||
370 | if ((guestsrc < prefix) && (guestsrc + n > prefix)) | 103 | #endif /* __KVM_S390_GACCESS_H */ |
371 | goto slowpath; | ||
372 | |||
373 | if ((guestsrc < prefix + 2 * PAGE_SIZE) | ||
374 | && (guestsrc + n > prefix + 2 * PAGE_SIZE)) | ||
375 | goto slowpath; | ||
376 | |||
377 | if (guestsrc < 2 * PAGE_SIZE) | ||
378 | guestsrc += prefix; | ||
379 | else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) | ||
380 | guestsrc -= prefix; | ||
381 | |||
382 | return __copy_from_guest_fast(vcpu, to, guestsrc, n); | ||
383 | slowpath: | ||
384 | return __copy_from_guest_slow(vcpu, to, guestsrc, n); | ||
385 | } | ||
386 | #endif | ||
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f26ff1e31bdb..b7d1b2edeeb3 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
@@ -43,12 +43,10 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) | |||
43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); | 43 | trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr); |
44 | 44 | ||
45 | do { | 45 | do { |
46 | rc = get_guest_u64(vcpu, useraddr, | 46 | rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg], |
47 | &vcpu->arch.sie_block->gcr[reg]); | 47 | (u64 __user *) useraddr); |
48 | if (rc == -EFAULT) { | 48 | if (rc) |
49 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 49 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
50 | break; | ||
51 | } | ||
52 | useraddr += 8; | 50 | useraddr += 8; |
53 | if (reg == reg3) | 51 | if (reg == reg3) |
54 | break; | 52 | break; |
@@ -78,11 +76,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) | |||
78 | 76 | ||
79 | reg = reg1; | 77 | reg = reg1; |
80 | do { | 78 | do { |
81 | rc = get_guest_u32(vcpu, useraddr, &val); | 79 | rc = get_guest(vcpu, val, (u32 __user *) useraddr); |
82 | if (rc == -EFAULT) { | 80 | if (rc) |
83 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 81 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
84 | break; | ||
85 | } | ||
86 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; | 82 | vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; |
87 | vcpu->arch.sie_block->gcr[reg] |= val; | 83 | vcpu->arch.sie_block->gcr[reg] |= val; |
88 | useraddr += 4; | 84 | useraddr += 4; |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37116a77cb4b..5c948177529e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
@@ -180,7 +180,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
180 | struct kvm_s390_interrupt_info *inti) | 180 | struct kvm_s390_interrupt_info *inti) |
181 | { | 181 | { |
182 | const unsigned short table[] = { 2, 4, 4, 6 }; | 182 | const unsigned short table[] = { 2, 4, 4, 6 }; |
183 | int rc, exception = 0; | 183 | int rc = 0; |
184 | 184 | ||
185 | switch (inti->type) { | 185 | switch (inti->type) { |
186 | case KVM_S390_INT_EMERGENCY: | 186 | case KVM_S390_INT_EMERGENCY: |
@@ -188,74 +188,41 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
188 | vcpu->stat.deliver_emergency_signal++; | 188 | vcpu->stat.deliver_emergency_signal++; |
189 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 189 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
190 | inti->emerg.code, 0); | 190 | inti->emerg.code, 0); |
191 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); | 191 | rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE); |
192 | if (rc == -EFAULT) | 192 | rc |= put_guest(vcpu, inti->emerg.code, |
193 | exception = 1; | 193 | (u16 __user *)__LC_EXT_CPU_ADDR); |
194 | 194 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | |
195 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->emerg.code); | 195 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
196 | if (rc == -EFAULT) | 196 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
197 | exception = 1; | 197 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
198 | |||
199 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
200 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
201 | if (rc == -EFAULT) | ||
202 | exception = 1; | ||
203 | |||
204 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
205 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
206 | if (rc == -EFAULT) | ||
207 | exception = 1; | ||
208 | break; | 198 | break; |
209 | |||
210 | case KVM_S390_INT_EXTERNAL_CALL: | 199 | case KVM_S390_INT_EXTERNAL_CALL: |
211 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); | 200 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); |
212 | vcpu->stat.deliver_external_call++; | 201 | vcpu->stat.deliver_external_call++; |
213 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 202 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
214 | inti->extcall.code, 0); | 203 | inti->extcall.code, 0); |
215 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); | 204 | rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE); |
216 | if (rc == -EFAULT) | 205 | rc |= put_guest(vcpu, inti->extcall.code, |
217 | exception = 1; | 206 | (u16 __user *)__LC_EXT_CPU_ADDR); |
218 | 207 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | |
219 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, inti->extcall.code); | 208 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
220 | if (rc == -EFAULT) | 209 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
221 | exception = 1; | 210 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
222 | |||
223 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | ||
224 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
225 | if (rc == -EFAULT) | ||
226 | exception = 1; | ||
227 | |||
228 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
229 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
230 | if (rc == -EFAULT) | ||
231 | exception = 1; | ||
232 | break; | 211 | break; |
233 | |||
234 | case KVM_S390_INT_SERVICE: | 212 | case KVM_S390_INT_SERVICE: |
235 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", | 213 | VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", |
236 | inti->ext.ext_params); | 214 | inti->ext.ext_params); |
237 | vcpu->stat.deliver_service_signal++; | 215 | vcpu->stat.deliver_service_signal++; |
238 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 216 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
239 | inti->ext.ext_params, 0); | 217 | inti->ext.ext_params, 0); |
240 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); | 218 | rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE); |
241 | if (rc == -EFAULT) | 219 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
242 | exception = 1; | 220 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
243 | 221 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | |
244 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 222 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
245 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 223 | rc |= put_guest(vcpu, inti->ext.ext_params, |
246 | if (rc == -EFAULT) | 224 | (u32 __user *)__LC_EXT_PARAMS); |
247 | exception = 1; | ||
248 | |||
249 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
250 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
251 | if (rc == -EFAULT) | ||
252 | exception = 1; | ||
253 | |||
254 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
255 | if (rc == -EFAULT) | ||
256 | exception = 1; | ||
257 | break; | 225 | break; |
258 | |||
259 | case KVM_S390_INT_VIRTIO: | 226 | case KVM_S390_INT_VIRTIO: |
260 | VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", | 227 | VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", |
261 | inti->ext.ext_params, inti->ext.ext_params2); | 228 | inti->ext.ext_params, inti->ext.ext_params2); |
@@ -263,34 +230,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
263 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 230 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
264 | inti->ext.ext_params, | 231 | inti->ext.ext_params, |
265 | inti->ext.ext_params2); | 232 | inti->ext.ext_params2); |
266 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); | 233 | rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE); |
267 | if (rc == -EFAULT) | 234 | rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR); |
268 | exception = 1; | 235 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
269 | 236 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
270 | rc = put_guest_u16(vcpu, __LC_EXT_CPU_ADDR, 0x0d00); | 237 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
271 | if (rc == -EFAULT) | 238 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
272 | exception = 1; | 239 | rc |= put_guest(vcpu, inti->ext.ext_params, |
273 | 240 | (u32 __user *)__LC_EXT_PARAMS); | |
274 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 241 | rc |= put_guest(vcpu, inti->ext.ext_params2, |
275 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 242 | (u64 __user *)__LC_EXT_PARAMS2); |
276 | if (rc == -EFAULT) | ||
277 | exception = 1; | ||
278 | |||
279 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
280 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
281 | if (rc == -EFAULT) | ||
282 | exception = 1; | ||
283 | |||
284 | rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); | ||
285 | if (rc == -EFAULT) | ||
286 | exception = 1; | ||
287 | |||
288 | rc = put_guest_u64(vcpu, __LC_EXT_PARAMS2, | ||
289 | inti->ext.ext_params2); | ||
290 | if (rc == -EFAULT) | ||
291 | exception = 1; | ||
292 | break; | 243 | break; |
293 | |||
294 | case KVM_S390_SIGP_STOP: | 244 | case KVM_S390_SIGP_STOP: |
295 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); | 245 | VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); |
296 | vcpu->stat.deliver_stop_signal++; | 246 | vcpu->stat.deliver_stop_signal++; |
@@ -313,18 +263,14 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
313 | vcpu->stat.deliver_restart_signal++; | 263 | vcpu->stat.deliver_restart_signal++; |
314 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 264 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
315 | 0, 0); | 265 | 0, 0); |
316 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, | 266 | rc = copy_to_guest(vcpu, |
317 | restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 267 | offsetof(struct _lowcore, restart_old_psw), |
318 | if (rc == -EFAULT) | 268 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
319 | exception = 1; | 269 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
320 | 270 | offsetof(struct _lowcore, restart_psw), | |
321 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | 271 | sizeof(psw_t)); |
322 | offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); | ||
323 | if (rc == -EFAULT) | ||
324 | exception = 1; | ||
325 | atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); | 272 | atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); |
326 | break; | 273 | break; |
327 | |||
328 | case KVM_S390_PROGRAM_INT: | 274 | case KVM_S390_PROGRAM_INT: |
329 | VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", | 275 | VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", |
330 | inti->pgm.code, | 276 | inti->pgm.code, |
@@ -332,24 +278,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
332 | vcpu->stat.deliver_program_int++; | 278 | vcpu->stat.deliver_program_int++; |
333 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 279 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
334 | inti->pgm.code, 0); | 280 | inti->pgm.code, 0); |
335 | rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); | 281 | rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE); |
336 | if (rc == -EFAULT) | 282 | rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14], |
337 | exception = 1; | 283 | (u16 __user *)__LC_PGM_ILC); |
338 | 284 | rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW, | |
339 | rc = put_guest_u16(vcpu, __LC_PGM_ILC, | 285 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
340 | table[vcpu->arch.sie_block->ipa >> 14]); | 286 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
341 | if (rc == -EFAULT) | 287 | __LC_PGM_NEW_PSW, sizeof(psw_t)); |
342 | exception = 1; | ||
343 | |||
344 | rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, | ||
345 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
346 | if (rc == -EFAULT) | ||
347 | exception = 1; | ||
348 | |||
349 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
350 | __LC_PGM_NEW_PSW, sizeof(psw_t)); | ||
351 | if (rc == -EFAULT) | ||
352 | exception = 1; | ||
353 | break; | 288 | break; |
354 | 289 | ||
355 | case KVM_S390_MCHK: | 290 | case KVM_S390_MCHK: |
@@ -358,24 +293,13 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
358 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 293 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
359 | inti->mchk.cr14, | 294 | inti->mchk.cr14, |
360 | inti->mchk.mcic); | 295 | inti->mchk.mcic); |
361 | rc = kvm_s390_vcpu_store_status(vcpu, | 296 | rc = kvm_s390_vcpu_store_status(vcpu, |
362 | KVM_S390_STORE_STATUS_PREFIXED); | 297 | KVM_S390_STORE_STATUS_PREFIXED); |
363 | if (rc == -EFAULT) | 298 | rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE); |
364 | exception = 1; | 299 | rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW, |
365 | 300 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
366 | rc = put_guest_u64(vcpu, __LC_MCCK_CODE, inti->mchk.mcic); | 301 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
367 | if (rc == -EFAULT) | 302 | __LC_MCK_NEW_PSW, sizeof(psw_t)); |
368 | exception = 1; | ||
369 | |||
370 | rc = copy_to_guest(vcpu, __LC_MCK_OLD_PSW, | ||
371 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
372 | if (rc == -EFAULT) | ||
373 | exception = 1; | ||
374 | |||
375 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
376 | __LC_MCK_NEW_PSW, sizeof(psw_t)); | ||
377 | if (rc == -EFAULT) | ||
378 | exception = 1; | ||
379 | break; | 303 | break; |
380 | 304 | ||
381 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: | 305 | case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: |
@@ -388,67 +312,44 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, | |||
388 | vcpu->stat.deliver_io_int++; | 312 | vcpu->stat.deliver_io_int++; |
389 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, | 313 | trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, |
390 | param0, param1); | 314 | param0, param1); |
391 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_ID, | 315 | rc = put_guest(vcpu, inti->io.subchannel_id, |
392 | inti->io.subchannel_id); | 316 | (u16 __user *) __LC_SUBCHANNEL_ID); |
393 | if (rc == -EFAULT) | 317 | rc |= put_guest(vcpu, inti->io.subchannel_nr, |
394 | exception = 1; | 318 | (u16 __user *) __LC_SUBCHANNEL_NR); |
395 | 319 | rc |= put_guest(vcpu, inti->io.io_int_parm, | |
396 | rc = put_guest_u16(vcpu, __LC_SUBCHANNEL_NR, | 320 | (u32 __user *) __LC_IO_INT_PARM); |
397 | inti->io.subchannel_nr); | 321 | rc |= put_guest(vcpu, inti->io.io_int_word, |
398 | if (rc == -EFAULT) | 322 | (u32 __user *) __LC_IO_INT_WORD); |
399 | exception = 1; | 323 | rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW, |
400 | 324 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | |
401 | rc = put_guest_u32(vcpu, __LC_IO_INT_PARM, | 325 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
402 | inti->io.io_int_parm); | 326 | __LC_IO_NEW_PSW, sizeof(psw_t)); |
403 | if (rc == -EFAULT) | ||
404 | exception = 1; | ||
405 | |||
406 | rc = put_guest_u32(vcpu, __LC_IO_INT_WORD, | ||
407 | inti->io.io_int_word); | ||
408 | if (rc == -EFAULT) | ||
409 | exception = 1; | ||
410 | |||
411 | rc = copy_to_guest(vcpu, __LC_IO_OLD_PSW, | ||
412 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | ||
413 | if (rc == -EFAULT) | ||
414 | exception = 1; | ||
415 | |||
416 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
417 | __LC_IO_NEW_PSW, sizeof(psw_t)); | ||
418 | if (rc == -EFAULT) | ||
419 | exception = 1; | ||
420 | break; | 327 | break; |
421 | } | 328 | } |
422 | default: | 329 | default: |
423 | BUG(); | 330 | BUG(); |
424 | } | 331 | } |
425 | if (exception) { | 332 | if (rc) { |
426 | printk("kvm: The guest lowcore is not mapped during interrupt " | 333 | printk("kvm: The guest lowcore is not mapped during interrupt " |
427 | "delivery, killing userspace\n"); | 334 | "delivery, killing userspace\n"); |
428 | do_exit(SIGKILL); | 335 | do_exit(SIGKILL); |
429 | } | 336 | } |
430 | } | 337 | } |
431 | 338 | ||
432 | static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) | 339 | static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) |
433 | { | 340 | { |
434 | int rc, exception = 0; | 341 | int rc; |
435 | 342 | ||
436 | if (psw_extint_disabled(vcpu)) | 343 | if (psw_extint_disabled(vcpu)) |
437 | return 0; | 344 | return 0; |
438 | if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) | 345 | if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) |
439 | return 0; | 346 | return 0; |
440 | rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); | 347 | rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE); |
441 | if (rc == -EFAULT) | 348 | rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW, |
442 | exception = 1; | 349 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); |
443 | rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, | 350 | rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, |
444 | &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); | 351 | __LC_EXT_NEW_PSW, sizeof(psw_t)); |
445 | if (rc == -EFAULT) | 352 | if (rc) { |
446 | exception = 1; | ||
447 | rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, | ||
448 | __LC_EXT_NEW_PSW, sizeof(psw_t)); | ||
449 | if (rc == -EFAULT) | ||
450 | exception = 1; | ||
451 | if (exception) { | ||
452 | printk("kvm: The guest lowcore is not mapped during interrupt " | 353 | printk("kvm: The guest lowcore is not mapped during interrupt " |
453 | "delivery, killing userspace\n"); | 354 | "delivery, killing userspace\n"); |
454 | do_exit(SIGKILL); | 355 | do_exit(SIGKILL); |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4cf35a0a79e7..c1c7c683fa26 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -142,12 +142,16 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
142 | case KVM_CAP_ONE_REG: | 142 | case KVM_CAP_ONE_REG: |
143 | case KVM_CAP_ENABLE_CAP: | 143 | case KVM_CAP_ENABLE_CAP: |
144 | case KVM_CAP_S390_CSS_SUPPORT: | 144 | case KVM_CAP_S390_CSS_SUPPORT: |
145 | case KVM_CAP_IOEVENTFD: | ||
145 | r = 1; | 146 | r = 1; |
146 | break; | 147 | break; |
147 | case KVM_CAP_NR_VCPUS: | 148 | case KVM_CAP_NR_VCPUS: |
148 | case KVM_CAP_MAX_VCPUS: | 149 | case KVM_CAP_MAX_VCPUS: |
149 | r = KVM_MAX_VCPUS; | 150 | r = KVM_MAX_VCPUS; |
150 | break; | 151 | break; |
152 | case KVM_CAP_NR_MEMSLOTS: | ||
153 | r = KVM_USER_MEM_SLOTS; | ||
154 | break; | ||
151 | case KVM_CAP_S390_COW: | 155 | case KVM_CAP_S390_COW: |
152 | r = MACHINE_HAS_ESOP; | 156 | r = MACHINE_HAS_ESOP; |
153 | break; | 157 | break; |
@@ -632,8 +636,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
632 | } else { | 636 | } else { |
633 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); | 637 | VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); |
634 | trace_kvm_s390_sie_fault(vcpu); | 638 | trace_kvm_s390_sie_fault(vcpu); |
635 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 639 | rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
636 | rc = 0; | ||
637 | } | 640 | } |
638 | } | 641 | } |
639 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", | 642 | VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", |
@@ -974,22 +977,13 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
974 | /* Section: memory related */ | 977 | /* Section: memory related */ |
975 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 978 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
976 | struct kvm_memory_slot *memslot, | 979 | struct kvm_memory_slot *memslot, |
977 | struct kvm_memory_slot old, | ||
978 | struct kvm_userspace_memory_region *mem, | 980 | struct kvm_userspace_memory_region *mem, |
979 | bool user_alloc) | 981 | enum kvm_mr_change change) |
980 | { | 982 | { |
981 | /* A few sanity checks. We can have exactly one memory slot which has | 983 | /* A few sanity checks. We can have memory slots which have to be |
982 | to start at guest virtual zero and which has to be located at a | 984 | located/ended at a segment boundary (1MB). The memory in userland is |
983 | page boundary in userland and which has to end at a page boundary. | 985 | ok to be fragmented into various different vmas. It is okay to mmap() |
984 | The memory in userland is ok to be fragmented into various different | 986 | and munmap() stuff in this slot after doing this call at any time */ |
985 | vmas. It is okay to mmap() and munmap() stuff in this slot after | ||
986 | doing this call at any time */ | ||
987 | |||
988 | if (mem->slot) | ||
989 | return -EINVAL; | ||
990 | |||
991 | if (mem->guest_phys_addr) | ||
992 | return -EINVAL; | ||
993 | 987 | ||
994 | if (mem->userspace_addr & 0xffffful) | 988 | if (mem->userspace_addr & 0xffffful) |
995 | return -EINVAL; | 989 | return -EINVAL; |
@@ -997,19 +991,26 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
997 | if (mem->memory_size & 0xffffful) | 991 | if (mem->memory_size & 0xffffful) |
998 | return -EINVAL; | 992 | return -EINVAL; |
999 | 993 | ||
1000 | if (!user_alloc) | ||
1001 | return -EINVAL; | ||
1002 | |||
1003 | return 0; | 994 | return 0; |
1004 | } | 995 | } |
1005 | 996 | ||
1006 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 997 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
1007 | struct kvm_userspace_memory_region *mem, | 998 | struct kvm_userspace_memory_region *mem, |
1008 | struct kvm_memory_slot old, | 999 | const struct kvm_memory_slot *old, |
1009 | bool user_alloc) | 1000 | enum kvm_mr_change change) |
1010 | { | 1001 | { |
1011 | int rc; | 1002 | int rc; |
1012 | 1003 | ||
1004 | /* If the basics of the memslot do not change, we do not want | ||
1005 | * to update the gmap. Every update causes several unnecessary | ||
1006 | * segment translation exceptions. This is usually handled just | ||
1007 | * fine by the normal fault handler + gmap, but it will also | ||
1008 | * cause faults on the prefix page of running guest CPUs. | ||
1009 | */ | ||
1010 | if (old->userspace_addr == mem->userspace_addr && | ||
1011 | old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && | ||
1012 | old->npages * PAGE_SIZE == mem->memory_size) | ||
1013 | return; | ||
1013 | 1014 | ||
1014 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, | 1015 | rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, |
1015 | mem->guest_phys_addr, mem->memory_size); | 1016 | mem->guest_phys_addr, mem->memory_size); |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4d89d64a8161..efc14f687265 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -110,12 +110,12 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | |||
110 | void kvm_s390_tasklet(unsigned long parm); | 110 | void kvm_s390_tasklet(unsigned long parm); |
111 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | 111 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); |
112 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); | 112 | void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); |
113 | int kvm_s390_inject_vm(struct kvm *kvm, | 113 | int __must_check kvm_s390_inject_vm(struct kvm *kvm, |
114 | struct kvm_s390_interrupt *s390int); | 114 | struct kvm_s390_interrupt *s390int); |
115 | int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, | 115 | int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, |
116 | struct kvm_s390_interrupt *s390int); | 116 | struct kvm_s390_interrupt *s390int); |
117 | int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); | 117 | int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); |
118 | int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); | 118 | int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); |
119 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, | 119 | struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, |
120 | u64 cr6, u64 schid); | 120 | u64 cr6, u64 schid); |
121 | 121 | ||
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 0ef9894606e5..6bbd7b5a0bbe 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <linux/kvm.h> | 14 | #include <linux/kvm.h> |
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
17 | #include <linux/compat.h> | ||
18 | #include <asm/asm-offsets.h> | ||
17 | #include <asm/current.h> | 19 | #include <asm/current.h> |
18 | #include <asm/debug.h> | 20 | #include <asm/debug.h> |
19 | #include <asm/ebcdic.h> | 21 | #include <asm/ebcdic.h> |
@@ -35,31 +37,24 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) | |||
35 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 37 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
36 | 38 | ||
37 | /* must be word boundary */ | 39 | /* must be word boundary */ |
38 | if (operand2 & 3) { | 40 | if (operand2 & 3) |
39 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 41 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
40 | goto out; | ||
41 | } | ||
42 | 42 | ||
43 | /* get the value */ | 43 | /* get the value */ |
44 | if (get_guest_u32(vcpu, operand2, &address)) { | 44 | if (get_guest(vcpu, address, (u32 __user *) operand2)) |
45 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 45 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
46 | goto out; | ||
47 | } | ||
48 | 46 | ||
49 | address = address & 0x7fffe000u; | 47 | address = address & 0x7fffe000u; |
50 | 48 | ||
51 | /* make sure that the new value is valid memory */ | 49 | /* make sure that the new value is valid memory */ |
52 | if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || | 50 | if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || |
53 | (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { | 51 | (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) |
54 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 52 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
55 | goto out; | ||
56 | } | ||
57 | 53 | ||
58 | kvm_s390_set_prefix(vcpu, address); | 54 | kvm_s390_set_prefix(vcpu, address); |
59 | 55 | ||
60 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); | 56 | VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); |
61 | trace_kvm_s390_handle_prefix(vcpu, 1, address); | 57 | trace_kvm_s390_handle_prefix(vcpu, 1, address); |
62 | out: | ||
63 | return 0; | 58 | return 0; |
64 | } | 59 | } |
65 | 60 | ||
@@ -73,49 +68,37 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) | |||
73 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 68 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
74 | 69 | ||
75 | /* must be word boundary */ | 70 | /* must be word boundary */ |
76 | if (operand2 & 3) { | 71 | if (operand2 & 3) |
77 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 72 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
78 | goto out; | ||
79 | } | ||
80 | 73 | ||
81 | address = vcpu->arch.sie_block->prefix; | 74 | address = vcpu->arch.sie_block->prefix; |
82 | address = address & 0x7fffe000u; | 75 | address = address & 0x7fffe000u; |
83 | 76 | ||
84 | /* get the value */ | 77 | /* get the value */ |
85 | if (put_guest_u32(vcpu, operand2, address)) { | 78 | if (put_guest(vcpu, address, (u32 __user *)operand2)) |
86 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 79 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
87 | goto out; | ||
88 | } | ||
89 | 80 | ||
90 | VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); | 81 | VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); |
91 | trace_kvm_s390_handle_prefix(vcpu, 0, address); | 82 | trace_kvm_s390_handle_prefix(vcpu, 0, address); |
92 | out: | ||
93 | return 0; | 83 | return 0; |
94 | } | 84 | } |
95 | 85 | ||
96 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) | 86 | static int handle_store_cpu_address(struct kvm_vcpu *vcpu) |
97 | { | 87 | { |
98 | u64 useraddr; | 88 | u64 useraddr; |
99 | int rc; | ||
100 | 89 | ||
101 | vcpu->stat.instruction_stap++; | 90 | vcpu->stat.instruction_stap++; |
102 | 91 | ||
103 | useraddr = kvm_s390_get_base_disp_s(vcpu); | 92 | useraddr = kvm_s390_get_base_disp_s(vcpu); |
104 | 93 | ||
105 | if (useraddr & 1) { | 94 | if (useraddr & 1) |
106 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 95 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
107 | goto out; | ||
108 | } | ||
109 | 96 | ||
110 | rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); | 97 | if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr)) |
111 | if (rc == -EFAULT) { | 98 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
112 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
113 | goto out; | ||
114 | } | ||
115 | 99 | ||
116 | VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); | 100 | VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr); |
117 | trace_kvm_s390_handle_stap(vcpu, useraddr); | 101 | trace_kvm_s390_handle_stap(vcpu, useraddr); |
118 | out: | ||
119 | return 0; | 102 | return 0; |
120 | } | 103 | } |
121 | 104 | ||
@@ -129,36 +112,38 @@ static int handle_skey(struct kvm_vcpu *vcpu) | |||
129 | 112 | ||
130 | static int handle_tpi(struct kvm_vcpu *vcpu) | 113 | static int handle_tpi(struct kvm_vcpu *vcpu) |
131 | { | 114 | { |
132 | u64 addr; | ||
133 | struct kvm_s390_interrupt_info *inti; | 115 | struct kvm_s390_interrupt_info *inti; |
116 | u64 addr; | ||
134 | int cc; | 117 | int cc; |
135 | 118 | ||
136 | addr = kvm_s390_get_base_disp_s(vcpu); | 119 | addr = kvm_s390_get_base_disp_s(vcpu); |
137 | 120 | if (addr & 3) | |
121 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
122 | cc = 0; | ||
138 | inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); | 123 | inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); |
139 | if (inti) { | 124 | if (!inti) |
140 | if (addr) { | 125 | goto no_interrupt; |
141 | /* | 126 | cc = 1; |
142 | * Store the two-word I/O interruption code into the | 127 | if (addr) { |
143 | * provided area. | 128 | /* |
144 | */ | 129 | * Store the two-word I/O interruption code into the |
145 | put_guest_u16(vcpu, addr, inti->io.subchannel_id); | 130 | * provided area. |
146 | put_guest_u16(vcpu, addr + 2, inti->io.subchannel_nr); | 131 | */ |
147 | put_guest_u32(vcpu, addr + 4, inti->io.io_int_parm); | 132 | put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) addr); |
148 | } else { | 133 | put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) (addr + 2)); |
149 | /* | 134 | put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) (addr + 4)); |
150 | * Store the three-word I/O interruption code into | 135 | } else { |
151 | * the appropriate lowcore area. | 136 | /* |
152 | */ | 137 | * Store the three-word I/O interruption code into |
153 | put_guest_u16(vcpu, 184, inti->io.subchannel_id); | 138 | * the appropriate lowcore area. |
154 | put_guest_u16(vcpu, 186, inti->io.subchannel_nr); | 139 | */ |
155 | put_guest_u32(vcpu, 188, inti->io.io_int_parm); | 140 | put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID); |
156 | put_guest_u32(vcpu, 192, inti->io.io_int_word); | 141 | put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR); |
157 | } | 142 | put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM); |
158 | cc = 1; | 143 | put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD); |
159 | } else | 144 | } |
160 | cc = 0; | ||
161 | kfree(inti); | 145 | kfree(inti); |
146 | no_interrupt: | ||
162 | /* Set condition code and we're done. */ | 147 | /* Set condition code and we're done. */ |
163 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 148 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
164 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; | 149 | vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; |
@@ -230,13 +215,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
230 | 215 | ||
231 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 216 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
232 | &facility_list, sizeof(facility_list)); | 217 | &facility_list, sizeof(facility_list)); |
233 | if (rc == -EFAULT) | 218 | if (rc) |
234 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 219 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
235 | else { | 220 | VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); |
236 | VCPU_EVENT(vcpu, 5, "store facility list value %x", | 221 | trace_kvm_s390_handle_stfl(vcpu, facility_list); |
237 | facility_list); | ||
238 | trace_kvm_s390_handle_stfl(vcpu, facility_list); | ||
239 | } | ||
240 | return 0; | 222 | return 0; |
241 | } | 223 | } |
242 | 224 | ||
@@ -249,112 +231,80 @@ static void handle_new_psw(struct kvm_vcpu *vcpu) | |||
249 | 231 | ||
250 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) | 232 | #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA) |
251 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL | 233 | #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL |
252 | #define PSW_ADDR_24 0x00000000000fffffUL | 234 | #define PSW_ADDR_24 0x0000000000ffffffUL |
253 | #define PSW_ADDR_31 0x000000007fffffffUL | 235 | #define PSW_ADDR_31 0x000000007fffffffUL |
254 | 236 | ||
237 | static int is_valid_psw(psw_t *psw) { | ||
238 | if (psw->mask & PSW_MASK_UNASSIGNED) | ||
239 | return 0; | ||
240 | if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) { | ||
241 | if (psw->addr & ~PSW_ADDR_31) | ||
242 | return 0; | ||
243 | } | ||
244 | if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24)) | ||
245 | return 0; | ||
246 | if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_EA) | ||
247 | return 0; | ||
248 | return 1; | ||
249 | } | ||
250 | |||
255 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) | 251 | int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) |
256 | { | 252 | { |
257 | u64 addr; | 253 | psw_t *gpsw = &vcpu->arch.sie_block->gpsw; |
258 | psw_compat_t new_psw; | 254 | psw_compat_t new_psw; |
255 | u64 addr; | ||
259 | 256 | ||
260 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | 257 | if (gpsw->mask & PSW_MASK_PSTATE) |
261 | return kvm_s390_inject_program_int(vcpu, | 258 | return kvm_s390_inject_program_int(vcpu, |
262 | PGM_PRIVILEGED_OPERATION); | 259 | PGM_PRIVILEGED_OPERATION); |
263 | |||
264 | addr = kvm_s390_get_base_disp_s(vcpu); | 260 | addr = kvm_s390_get_base_disp_s(vcpu); |
265 | 261 | if (addr & 7) | |
266 | if (addr & 7) { | 262 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
267 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 263 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) |
268 | goto out; | 264 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
269 | } | 265 | if (!(new_psw.mask & PSW32_MASK_BASE)) |
270 | 266 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | |
271 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | 267 | gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32; |
272 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 268 | gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE; |
273 | goto out; | 269 | gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE; |
274 | } | 270 | if (!is_valid_psw(gpsw)) |
275 | 271 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | |
276 | if (!(new_psw.mask & PSW32_MASK_BASE)) { | ||
277 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
278 | goto out; | ||
279 | } | ||
280 | |||
281 | vcpu->arch.sie_block->gpsw.mask = | ||
282 | (new_psw.mask & ~PSW32_MASK_BASE) << 32; | ||
283 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
284 | |||
285 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
286 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
287 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
288 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
289 | PSW_MASK_EA)) { | ||
290 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
291 | goto out; | ||
292 | } | ||
293 | |||
294 | handle_new_psw(vcpu); | 272 | handle_new_psw(vcpu); |
295 | out: | ||
296 | return 0; | 273 | return 0; |
297 | } | 274 | } |
298 | 275 | ||
299 | static int handle_lpswe(struct kvm_vcpu *vcpu) | 276 | static int handle_lpswe(struct kvm_vcpu *vcpu) |
300 | { | 277 | { |
301 | u64 addr; | ||
302 | psw_t new_psw; | 278 | psw_t new_psw; |
279 | u64 addr; | ||
303 | 280 | ||
304 | addr = kvm_s390_get_base_disp_s(vcpu); | 281 | addr = kvm_s390_get_base_disp_s(vcpu); |
305 | 282 | if (addr & 7) | |
306 | if (addr & 7) { | 283 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
307 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 284 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) |
308 | goto out; | 285 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
309 | } | 286 | vcpu->arch.sie_block->gpsw = new_psw; |
310 | 287 | if (!is_valid_psw(&vcpu->arch.sie_block->gpsw)) | |
311 | if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw))) { | 288 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
312 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
313 | goto out; | ||
314 | } | ||
315 | |||
316 | vcpu->arch.sie_block->gpsw.mask = new_psw.mask; | ||
317 | vcpu->arch.sie_block->gpsw.addr = new_psw.addr; | ||
318 | |||
319 | if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_UNASSIGNED) || | ||
320 | (((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
321 | PSW_MASK_BA) && | ||
322 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_31)) || | ||
323 | (!(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) && | ||
324 | (vcpu->arch.sie_block->gpsw.addr & ~PSW_ADDR_24)) || | ||
325 | ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_ADDR_MODE) == | ||
326 | PSW_MASK_EA)) { | ||
327 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | ||
328 | goto out; | ||
329 | } | ||
330 | |||
331 | handle_new_psw(vcpu); | 289 | handle_new_psw(vcpu); |
332 | out: | ||
333 | return 0; | 290 | return 0; |
334 | } | 291 | } |
335 | 292 | ||
336 | static int handle_stidp(struct kvm_vcpu *vcpu) | 293 | static int handle_stidp(struct kvm_vcpu *vcpu) |
337 | { | 294 | { |
338 | u64 operand2; | 295 | u64 operand2; |
339 | int rc; | ||
340 | 296 | ||
341 | vcpu->stat.instruction_stidp++; | 297 | vcpu->stat.instruction_stidp++; |
342 | 298 | ||
343 | operand2 = kvm_s390_get_base_disp_s(vcpu); | 299 | operand2 = kvm_s390_get_base_disp_s(vcpu); |
344 | 300 | ||
345 | if (operand2 & 7) { | 301 | if (operand2 & 7) |
346 | kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); | 302 | return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); |
347 | goto out; | ||
348 | } | ||
349 | 303 | ||
350 | rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); | 304 | if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2)) |
351 | if (rc == -EFAULT) { | 305 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
352 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
353 | goto out; | ||
354 | } | ||
355 | 306 | ||
356 | VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); | 307 | VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); |
357 | out: | ||
358 | return 0; | 308 | return 0; |
359 | } | 309 | } |
360 | 310 | ||
@@ -394,8 +344,9 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
394 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; | 344 | int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; |
395 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; | 345 | int sel1 = vcpu->run->s.regs.gprs[0] & 0xff; |
396 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; | 346 | int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff; |
347 | unsigned long mem = 0; | ||
397 | u64 operand2; | 348 | u64 operand2; |
398 | unsigned long mem; | 349 | int rc = 0; |
399 | 350 | ||
400 | vcpu->stat.instruction_stsi++; | 351 | vcpu->stat.instruction_stsi++; |
401 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); | 352 | VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); |
@@ -414,37 +365,37 @@ static int handle_stsi(struct kvm_vcpu *vcpu) | |||
414 | case 2: | 365 | case 2: |
415 | mem = get_zeroed_page(GFP_KERNEL); | 366 | mem = get_zeroed_page(GFP_KERNEL); |
416 | if (!mem) | 367 | if (!mem) |
417 | goto out_fail; | 368 | goto out_no_data; |
418 | if (stsi((void *) mem, fc, sel1, sel2)) | 369 | if (stsi((void *) mem, fc, sel1, sel2)) |
419 | goto out_mem; | 370 | goto out_no_data; |
420 | break; | 371 | break; |
421 | case 3: | 372 | case 3: |
422 | if (sel1 != 2 || sel2 != 2) | 373 | if (sel1 != 2 || sel2 != 2) |
423 | goto out_fail; | 374 | goto out_no_data; |
424 | mem = get_zeroed_page(GFP_KERNEL); | 375 | mem = get_zeroed_page(GFP_KERNEL); |
425 | if (!mem) | 376 | if (!mem) |
426 | goto out_fail; | 377 | goto out_no_data; |
427 | handle_stsi_3_2_2(vcpu, (void *) mem); | 378 | handle_stsi_3_2_2(vcpu, (void *) mem); |
428 | break; | 379 | break; |
429 | default: | 380 | default: |
430 | goto out_fail; | 381 | goto out_no_data; |
431 | } | 382 | } |
432 | 383 | ||
433 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { | 384 | if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { |
434 | kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | 385 | rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); |
435 | goto out_mem; | 386 | goto out_exception; |
436 | } | 387 | } |
437 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); | 388 | trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); |
438 | free_page(mem); | 389 | free_page(mem); |
439 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 390 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
440 | vcpu->run->s.regs.gprs[0] = 0; | 391 | vcpu->run->s.regs.gprs[0] = 0; |
441 | return 0; | 392 | return 0; |
442 | out_mem: | 393 | out_no_data: |
443 | free_page(mem); | ||
444 | out_fail: | ||
445 | /* condition code 3 */ | 394 | /* condition code 3 */ |
446 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; | 395 | vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; |
447 | return 0; | 396 | out_exception: |
397 | free_page(mem); | ||
398 | return rc; | ||
448 | } | 399 | } |
449 | 400 | ||
450 | static const intercept_handler_t b2_handlers[256] = { | 401 | static const intercept_handler_t b2_handlers[256] = { |
@@ -575,20 +526,13 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
575 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) | 526 | if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) |
576 | return -EOPNOTSUPP; | 527 | return -EOPNOTSUPP; |
577 | 528 | ||
578 | |||
579 | /* we must resolve the address without holding the mmap semaphore. | ||
580 | * This is ok since the userspace hypervisor is not supposed to change | ||
581 | * the mapping while the guest queries the memory. Otherwise the guest | ||
582 | * might crash or get wrong info anyway. */ | ||
583 | user_address = (unsigned long) __guestaddr_to_user(vcpu, address1); | ||
584 | |||
585 | down_read(¤t->mm->mmap_sem); | 529 | down_read(¤t->mm->mmap_sem); |
530 | user_address = __gmap_translate(address1, vcpu->arch.gmap); | ||
531 | if (IS_ERR_VALUE(user_address)) | ||
532 | goto out_inject; | ||
586 | vma = find_vma(current->mm, user_address); | 533 | vma = find_vma(current->mm, user_address); |
587 | if (!vma) { | 534 | if (!vma) |
588 | up_read(¤t->mm->mmap_sem); | 535 | goto out_inject; |
589 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
590 | } | ||
591 | |||
592 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | 536 | vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); |
593 | if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) | 537 | if (!(vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_READ)) |
594 | vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); | 538 | vcpu->arch.sie_block->gpsw.mask |= (1ul << 44); |
@@ -597,6 +541,10 @@ static int handle_tprot(struct kvm_vcpu *vcpu) | |||
597 | 541 | ||
598 | up_read(¤t->mm->mmap_sem); | 542 | up_read(¤t->mm->mmap_sem); |
599 | return 0; | 543 | return 0; |
544 | |||
545 | out_inject: | ||
546 | up_read(¤t->mm->mmap_sem); | ||
547 | return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | ||
600 | } | 548 | } |
601 | 549 | ||
602 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) | 550 | int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 40afa0005c69..9bd4ecac72be 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -19,6 +19,10 @@ BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | |||
19 | 19 | ||
20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) | 20 | BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) |
21 | 21 | ||
22 | #ifdef CONFIG_HAVE_KVM | ||
23 | BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) | ||
24 | #endif | ||
25 | |||
22 | /* | 26 | /* |
23 | * every pentium local APIC has two 'local interrupts', with a | 27 | * every pentium local APIC has two 'local interrupts', with a |
24 | * soft-definable vector attached to both interrupts, one of | 28 | * soft-definable vector attached to both interrupts, one of |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 81f04cee5f74..ab0ae1aa6d0a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -12,6 +12,9 @@ typedef struct { | |||
12 | unsigned int irq_spurious_count; | 12 | unsigned int irq_spurious_count; |
13 | unsigned int icr_read_retry_count; | 13 | unsigned int icr_read_retry_count; |
14 | #endif | 14 | #endif |
15 | #ifdef CONFIG_HAVE_KVM | ||
16 | unsigned int kvm_posted_intr_ipis; | ||
17 | #endif | ||
15 | unsigned int x86_platform_ipis; /* arch dependent */ | 18 | unsigned int x86_platform_ipis; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | 19 | unsigned int apic_perf_irqs; |
17 | unsigned int apic_irq_work_irqs; | 20 | unsigned int apic_irq_work_irqs; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 10a78c3d3d5a..1da97efad08a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -28,6 +28,7 @@ | |||
28 | /* Interrupt handlers registered during init_IRQ */ | 28 | /* Interrupt handlers registered during init_IRQ */ |
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void x86_platform_ipi(void); | 30 | extern void x86_platform_ipi(void); |
31 | extern void kvm_posted_intr_ipi(void); | ||
31 | extern void error_interrupt(void); | 32 | extern void error_interrupt(void); |
32 | extern void irq_work_interrupt(void); | 33 | extern void irq_work_interrupt(void); |
33 | 34 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index aac5fa62a86c..5702d7e3111d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -102,6 +102,11 @@ | |||
102 | */ | 102 | */ |
103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 | 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 |
104 | 104 | ||
105 | /* Vector for KVM to deliver posted interrupt IPI */ | ||
106 | #ifdef CONFIG_HAVE_KVM | ||
107 | #define POSTED_INTR_VECTOR 0xf2 | ||
108 | #endif | ||
109 | |||
105 | /* | 110 | /* |
106 | * IRQ work vector: | 111 | * IRQ work vector: |
107 | */ | 112 | */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4979778cc7fb..3741c653767c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/msr-index.h> | 31 | #include <asm/msr-index.h> |
32 | #include <asm/asm.h> | 32 | #include <asm/asm.h> |
33 | 33 | ||
34 | #define KVM_MAX_VCPUS 254 | 34 | #define KVM_MAX_VCPUS 255 |
35 | #define KVM_SOFT_MAX_VCPUS 160 | 35 | #define KVM_SOFT_MAX_VCPUS 160 |
36 | #define KVM_USER_MEM_SLOTS 125 | 36 | #define KVM_USER_MEM_SLOTS 125 |
37 | /* memory slots that are not exposed to userspace */ | 37 | /* memory slots that are not exposed to userspace */ |
@@ -43,6 +43,8 @@ | |||
43 | #define KVM_PIO_PAGE_OFFSET 1 | 43 | #define KVM_PIO_PAGE_OFFSET 1 |
44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 | 44 | #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 |
45 | 45 | ||
46 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS | ||
47 | |||
46 | #define CR0_RESERVED_BITS \ | 48 | #define CR0_RESERVED_BITS \ |
47 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | 49 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ |
48 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | 50 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ |
@@ -94,9 +96,6 @@ | |||
94 | 96 | ||
95 | #define ASYNC_PF_PER_VCPU 64 | 97 | #define ASYNC_PF_PER_VCPU 64 |
96 | 98 | ||
97 | extern raw_spinlock_t kvm_lock; | ||
98 | extern struct list_head vm_list; | ||
99 | |||
100 | struct kvm_vcpu; | 99 | struct kvm_vcpu; |
101 | struct kvm; | 100 | struct kvm; |
102 | struct kvm_async_pf; | 101 | struct kvm_async_pf; |
@@ -230,6 +229,7 @@ struct kvm_mmu_page { | |||
230 | #endif | 229 | #endif |
231 | 230 | ||
232 | int write_flooding_count; | 231 | int write_flooding_count; |
232 | bool mmio_cached; | ||
233 | }; | 233 | }; |
234 | 234 | ||
235 | struct kvm_pio_request { | 235 | struct kvm_pio_request { |
@@ -345,7 +345,6 @@ struct kvm_vcpu_arch { | |||
345 | unsigned long apic_attention; | 345 | unsigned long apic_attention; |
346 | int32_t apic_arb_prio; | 346 | int32_t apic_arb_prio; |
347 | int mp_state; | 347 | int mp_state; |
348 | int sipi_vector; | ||
349 | u64 ia32_misc_enable_msr; | 348 | u64 ia32_misc_enable_msr; |
350 | bool tpr_access_reporting; | 349 | bool tpr_access_reporting; |
351 | 350 | ||
@@ -643,7 +642,7 @@ struct kvm_x86_ops { | |||
643 | /* Create, but do not attach this VCPU */ | 642 | /* Create, but do not attach this VCPU */ |
644 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 643 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
645 | void (*vcpu_free)(struct kvm_vcpu *vcpu); | 644 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
646 | int (*vcpu_reset)(struct kvm_vcpu *vcpu); | 645 | void (*vcpu_reset)(struct kvm_vcpu *vcpu); |
647 | 646 | ||
648 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); | 647 | void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); |
649 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 648 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
@@ -696,14 +695,16 @@ struct kvm_x86_ops { | |||
696 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 695 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
697 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 696 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
698 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | 697 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); |
699 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 698 | int (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
700 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 699 | int (*enable_irq_window)(struct kvm_vcpu *vcpu); |
701 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 700 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
702 | int (*vm_has_apicv)(struct kvm *kvm); | 701 | int (*vm_has_apicv)(struct kvm *kvm); |
703 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 702 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
704 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); | 703 | void (*hwapic_isr_update)(struct kvm *kvm, int isr); |
705 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); | 704 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
706 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); | 705 | void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); |
706 | void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); | ||
707 | void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); | ||
707 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 708 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
708 | int (*get_tdp_level)(void); | 709 | int (*get_tdp_level)(void); |
709 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 710 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
@@ -730,6 +731,7 @@ struct kvm_x86_ops { | |||
730 | int (*check_intercept)(struct kvm_vcpu *vcpu, | 731 | int (*check_intercept)(struct kvm_vcpu *vcpu, |
731 | struct x86_instruction_info *info, | 732 | struct x86_instruction_info *info, |
732 | enum x86_intercept_stage stage); | 733 | enum x86_intercept_stage stage); |
734 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); | ||
733 | }; | 735 | }; |
734 | 736 | ||
735 | struct kvm_arch_async_pf { | 737 | struct kvm_arch_async_pf { |
@@ -767,6 +769,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, | |||
767 | struct kvm_memory_slot *slot, | 769 | struct kvm_memory_slot *slot, |
768 | gfn_t gfn_offset, unsigned long mask); | 770 | gfn_t gfn_offset, unsigned long mask); |
769 | void kvm_mmu_zap_all(struct kvm *kvm); | 771 | void kvm_mmu_zap_all(struct kvm *kvm); |
772 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm); | ||
770 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); | 773 | unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); |
771 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); | 774 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); |
772 | 775 | ||
@@ -797,6 +800,7 @@ enum emulation_result { | |||
797 | #define EMULTYPE_TRAP_UD (1 << 1) | 800 | #define EMULTYPE_TRAP_UD (1 << 1) |
798 | #define EMULTYPE_SKIP (1 << 2) | 801 | #define EMULTYPE_SKIP (1 << 2) |
799 | #define EMULTYPE_RETRY (1 << 3) | 802 | #define EMULTYPE_RETRY (1 << 3) |
803 | #define EMULTYPE_NO_REEXECUTE (1 << 4) | ||
800 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, | 804 | int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, |
801 | int emulation_type, void *insn, int insn_len); | 805 | int emulation_type, void *insn, int insn_len); |
802 | 806 | ||
@@ -807,6 +811,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu, | |||
807 | } | 811 | } |
808 | 812 | ||
809 | void kvm_enable_efer_bits(u64); | 813 | void kvm_enable_efer_bits(u64); |
814 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); | ||
810 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); | 815 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); |
811 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); | 816 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); |
812 | 817 | ||
@@ -819,6 +824,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); | |||
819 | 824 | ||
820 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 825 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
821 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); | 826 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
827 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector); | ||
822 | 828 | ||
823 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, | 829 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
824 | int reason, bool has_error_code, u32 error_code); | 830 | int reason, bool has_error_code, u32 error_code); |
@@ -973,7 +979,6 @@ enum { | |||
973 | * Trap the fault and ignore the instruction if that happens. | 979 | * Trap the fault and ignore the instruction if that happens. |
974 | */ | 980 | */ |
975 | asmlinkage void kvm_spurious_fault(void); | 981 | asmlinkage void kvm_spurious_fault(void); |
976 | extern bool kvm_rebooting; | ||
977 | 982 | ||
978 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ | 983 | #define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ |
979 | "666: " insn "\n\t" \ | 984 | "666: " insn "\n\t" \ |
@@ -1002,6 +1007,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); | |||
1002 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | 1007 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
1003 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 1008 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
1004 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 1009 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
1010 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
1005 | 1011 | ||
1006 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1012 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1007 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1013 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
@@ -1027,7 +1033,7 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); | |||
1027 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); | 1033 | void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); |
1028 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); | 1034 | bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); |
1029 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 1035 | int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
1030 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 1036 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
1031 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); | 1037 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); |
1032 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); | 1038 | void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); |
1033 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); | 1039 | void kvm_deliver_pmi(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index b6fbf860e398..f3e01a2cbaa1 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -65,11 +65,16 @@ | |||
65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 | 65 | #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 |
66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | 66 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 |
67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 | 67 | #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 |
68 | #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 | ||
68 | 69 | ||
69 | 70 | ||
70 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 71 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
71 | #define PIN_BASED_NMI_EXITING 0x00000008 | 72 | #define PIN_BASED_NMI_EXITING 0x00000008 |
72 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 | 73 | #define PIN_BASED_VIRTUAL_NMIS 0x00000020 |
74 | #define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 | ||
75 | #define PIN_BASED_POSTED_INTR 0x00000080 | ||
76 | |||
77 | #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 | ||
73 | 78 | ||
74 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 | 79 | #define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 |
75 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 80 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
@@ -81,6 +86,8 @@ | |||
81 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | 86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 |
82 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | 87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 |
83 | 88 | ||
89 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff | ||
90 | |||
84 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 | 91 | #define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 |
85 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 92 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
86 | #define VM_ENTRY_SMM 0x00000400 | 93 | #define VM_ENTRY_SMM 0x00000400 |
@@ -89,9 +96,15 @@ | |||
89 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | 96 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 |
90 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | 97 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 |
91 | 98 | ||
99 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff | ||
100 | |||
101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | ||
102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | ||
103 | |||
92 | /* VMCS Encodings */ | 104 | /* VMCS Encodings */ |
93 | enum vmcs_field { | 105 | enum vmcs_field { |
94 | VIRTUAL_PROCESSOR_ID = 0x00000000, | 106 | VIRTUAL_PROCESSOR_ID = 0x00000000, |
107 | POSTED_INTR_NV = 0x00000002, | ||
95 | GUEST_ES_SELECTOR = 0x00000800, | 108 | GUEST_ES_SELECTOR = 0x00000800, |
96 | GUEST_CS_SELECTOR = 0x00000802, | 109 | GUEST_CS_SELECTOR = 0x00000802, |
97 | GUEST_SS_SELECTOR = 0x00000804, | 110 | GUEST_SS_SELECTOR = 0x00000804, |
@@ -126,6 +139,8 @@ enum vmcs_field { | |||
126 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, | 139 | VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, |
127 | APIC_ACCESS_ADDR = 0x00002014, | 140 | APIC_ACCESS_ADDR = 0x00002014, |
128 | APIC_ACCESS_ADDR_HIGH = 0x00002015, | 141 | APIC_ACCESS_ADDR_HIGH = 0x00002015, |
142 | POSTED_INTR_DESC_ADDR = 0x00002016, | ||
143 | POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, | ||
129 | EPT_POINTER = 0x0000201a, | 144 | EPT_POINTER = 0x0000201a, |
130 | EPT_POINTER_HIGH = 0x0000201b, | 145 | EPT_POINTER_HIGH = 0x0000201b, |
131 | EOI_EXIT_BITMAP0 = 0x0000201c, | 146 | EOI_EXIT_BITMAP0 = 0x0000201c, |
@@ -136,6 +151,8 @@ enum vmcs_field { | |||
136 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, | 151 | EOI_EXIT_BITMAP2_HIGH = 0x00002021, |
137 | EOI_EXIT_BITMAP3 = 0x00002022, | 152 | EOI_EXIT_BITMAP3 = 0x00002022, |
138 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, | 153 | EOI_EXIT_BITMAP3_HIGH = 0x00002023, |
154 | VMREAD_BITMAP = 0x00002026, | ||
155 | VMWRITE_BITMAP = 0x00002028, | ||
139 | GUEST_PHYSICAL_ADDRESS = 0x00002400, | 156 | GUEST_PHYSICAL_ADDRESS = 0x00002400, |
140 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, | 157 | GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, |
141 | VMCS_LINK_POINTER = 0x00002800, | 158 | VMCS_LINK_POINTER = 0x00002800, |
@@ -209,6 +226,7 @@ enum vmcs_field { | |||
209 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, | 226 | GUEST_INTERRUPTIBILITY_INFO = 0x00004824, |
210 | GUEST_ACTIVITY_STATE = 0X00004826, | 227 | GUEST_ACTIVITY_STATE = 0X00004826, |
211 | GUEST_SYSENTER_CS = 0x0000482A, | 228 | GUEST_SYSENTER_CS = 0x0000482A, |
229 | VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, | ||
212 | HOST_IA32_SYSENTER_CS = 0x00004c00, | 230 | HOST_IA32_SYSENTER_CS = 0x00004c00, |
213 | CR0_GUEST_HOST_MASK = 0x00006000, | 231 | CR0_GUEST_HOST_MASK = 0x00006000, |
214 | CR4_GUEST_HOST_MASK = 0x00006002, | 232 | CR4_GUEST_HOST_MASK = 0x00006002, |
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a65ec29e6ffb..5d9a3033b3d7 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h | |||
@@ -29,7 +29,6 @@ | |||
29 | #define __KVM_HAVE_PIT | 29 | #define __KVM_HAVE_PIT |
30 | #define __KVM_HAVE_IOAPIC | 30 | #define __KVM_HAVE_IOAPIC |
31 | #define __KVM_HAVE_IRQ_LINE | 31 | #define __KVM_HAVE_IRQ_LINE |
32 | #define __KVM_HAVE_DEVICE_ASSIGNMENT | ||
33 | #define __KVM_HAVE_MSI | 32 | #define __KVM_HAVE_MSI |
34 | #define __KVM_HAVE_USER_NMI | 33 | #define __KVM_HAVE_USER_NMI |
35 | #define __KVM_HAVE_GUEST_DEBUG | 34 | #define __KVM_HAVE_GUEST_DEBUG |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index b5757885d7a4..b3a4866661c5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -528,6 +528,8 @@ | |||
528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU | 528 | #define VMX_BASIC_MEM_TYPE_WB 6LLU |
529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU | 529 | #define VMX_BASIC_INOUT 0x0040000000000000LLU |
530 | 530 | ||
531 | /* MSR_IA32_VMX_MISC bits */ | ||
532 | #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) | ||
531 | /* AMD-V MSRs */ | 533 | /* AMD-V MSRs */ |
532 | 534 | ||
533 | #define MSR_VM_CR 0xc0010114 | 535 | #define MSR_VM_CR 0xc0010114 |
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 2871fccfee68..d651082c7cf7 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -65,6 +65,7 @@ | |||
65 | #define EXIT_REASON_EOI_INDUCED 45 | 65 | #define EXIT_REASON_EOI_INDUCED 45 |
66 | #define EXIT_REASON_EPT_VIOLATION 48 | 66 | #define EXIT_REASON_EPT_VIOLATION 48 |
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_PREEMPTION_TIMER 52 | ||
68 | #define EXIT_REASON_WBINVD 54 | 69 | #define EXIT_REASON_WBINVD 54 |
69 | #define EXIT_REASON_XSETBV 55 | 70 | #define EXIT_REASON_XSETBV 55 |
70 | #define EXIT_REASON_APIC_WRITE 56 | 71 | #define EXIT_REASON_APIC_WRITE 56 |
@@ -110,7 +111,7 @@ | |||
110 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 111 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
111 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 112 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
112 | { EXIT_REASON_INVD, "INVD" }, \ | 113 | { EXIT_REASON_INVD, "INVD" }, \ |
113 | { EXIT_REASON_INVPCID, "INVPCID" } | 114 | { EXIT_REASON_INVPCID, "INVPCID" }, \ |
114 | 115 | { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" } | |
115 | 116 | ||
116 | #endif /* _UAPIVMX_H */ | 117 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c1d01e6ca790..727208941030 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1166,6 +1166,11 @@ apicinterrupt LOCAL_TIMER_VECTOR \ | |||
1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ | 1166 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
1167 | x86_platform_ipi smp_x86_platform_ipi | 1167 | x86_platform_ipi smp_x86_platform_ipi |
1168 | 1168 | ||
1169 | #ifdef CONFIG_HAVE_KVM | ||
1170 | apicinterrupt POSTED_INTR_VECTOR \ | ||
1171 | kvm_posted_intr_ipi smp_kvm_posted_intr_ipi | ||
1172 | #endif | ||
1173 | |||
1169 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1174 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1170 | threshold_interrupt smp_threshold_interrupt | 1175 | threshold_interrupt smp_threshold_interrupt |
1171 | apicinterrupt THERMAL_APIC_VECTOR \ | 1176 | apicinterrupt THERMAL_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 84b778962c66..ac0631d8996f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -224,6 +224,28 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
224 | set_irq_regs(old_regs); | 224 | set_irq_regs(old_regs); |
225 | } | 225 | } |
226 | 226 | ||
227 | #ifdef CONFIG_HAVE_KVM | ||
228 | /* | ||
229 | * Handler for POSTED_INTERRUPT_VECTOR. | ||
230 | */ | ||
231 | void smp_kvm_posted_intr_ipi(struct pt_regs *regs) | ||
232 | { | ||
233 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
234 | |||
235 | ack_APIC_irq(); | ||
236 | |||
237 | irq_enter(); | ||
238 | |||
239 | exit_idle(); | ||
240 | |||
241 | inc_irq_stat(kvm_posted_intr_ipis); | ||
242 | |||
243 | irq_exit(); | ||
244 | |||
245 | set_irq_regs(old_regs); | ||
246 | } | ||
247 | #endif | ||
248 | |||
227 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 249 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
228 | 250 | ||
229 | #ifdef CONFIG_HOTPLUG_CPU | 251 | #ifdef CONFIG_HOTPLUG_CPU |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7dc4e459c2b3..a2a1fbc594ff 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -172,6 +172,10 @@ static void __init apic_intr_init(void) | |||
172 | 172 | ||
173 | /* IPI for X86 platform specific use */ | 173 | /* IPI for X86 platform specific use */ |
174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); | 174 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
175 | #ifdef CONFIG_HAVE_KVM | ||
176 | /* IPI for KVM to deliver posted interrupt */ | ||
177 | alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); | ||
178 | #endif | ||
175 | 179 | ||
176 | /* IPI vectors for APIC spurious and error interrupts */ | 180 | /* IPI vectors for APIC spurious and error interrupts */ |
177 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 181 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 0732f0089a3d..d2c381280e3c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -160,8 +160,12 @@ int kvm_register_clock(char *txt) | |||
160 | { | 160 | { |
161 | int cpu = smp_processor_id(); | 161 | int cpu = smp_processor_id(); |
162 | int low, high, ret; | 162 | int low, high, ret; |
163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | 163 | struct pvclock_vcpu_time_info *src; |
164 | |||
165 | if (!hv_clock) | ||
166 | return 0; | ||
164 | 167 | ||
168 | src = &hv_clock[cpu].pvti; | ||
165 | low = (int)slow_virt_to_phys(src) | 1; | 169 | low = (int)slow_virt_to_phys(src) | 1; |
166 | high = ((u64)slow_virt_to_phys(src) >> 32); | 170 | high = ((u64)slow_virt_to_phys(src) >> 32); |
167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 171 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
@@ -276,6 +280,9 @@ int __init kvm_setup_vsyscall_timeinfo(void) | |||
276 | struct pvclock_vcpu_time_info *vcpu_time; | 280 | struct pvclock_vcpu_time_info *vcpu_time; |
277 | unsigned int size; | 281 | unsigned int size; |
278 | 282 | ||
283 | if (!hv_clock) | ||
284 | return 0; | ||
285 | |||
279 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); | 286 | size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); |
280 | 287 | ||
281 | preempt_disable(); | 288 | preempt_disable(); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f00059805..a47a3e54b964 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -21,14 +21,13 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | depends on HIGH_RES_TIMERS | 23 | depends on HIGH_RES_TIMERS |
24 | # for device assignment: | ||
25 | depends on PCI | ||
26 | # for TASKSTATS/TASK_DELAY_ACCT: | 24 | # for TASKSTATS/TASK_DELAY_ACCT: |
27 | depends on NET | 25 | depends on NET |
28 | select PREEMPT_NOTIFIERS | 26 | select PREEMPT_NOTIFIERS |
29 | select MMU_NOTIFIER | 27 | select MMU_NOTIFIER |
30 | select ANON_INODES | 28 | select ANON_INODES |
31 | select HAVE_KVM_IRQCHIP | 29 | select HAVE_KVM_IRQCHIP |
30 | select HAVE_KVM_IRQ_ROUTING | ||
32 | select HAVE_KVM_EVENTFD | 31 | select HAVE_KVM_EVENTFD |
33 | select KVM_APIC_ARCHITECTURE | 32 | select KVM_APIC_ARCHITECTURE |
34 | select KVM_ASYNC_PF | 33 | select KVM_ASYNC_PF |
@@ -82,6 +81,17 @@ config KVM_MMU_AUDIT | |||
82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 81 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
83 | audit KVM MMU at runtime. | 82 | audit KVM MMU at runtime. |
84 | 83 | ||
84 | config KVM_DEVICE_ASSIGNMENT | ||
85 | bool "KVM legacy PCI device assignment support" | ||
86 | depends on KVM && PCI && IOMMU_API | ||
87 | default y | ||
88 | ---help--- | ||
89 | Provide support for legacy PCI device assignment through KVM. The | ||
90 | kernel now also supports a full featured userspace device driver | ||
91 | framework through VFIO, which supersedes much of this support. | ||
92 | |||
93 | If unsure, say Y. | ||
94 | |||
85 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 95 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
86 | # the virtualization menu. | 96 | # the virtualization menu. |
87 | source drivers/vhost/Kconfig | 97 | source drivers/vhost/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d30401c5cb..d609e1d84048 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I. | |||
7 | 7 | ||
8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
9 | coalesced_mmio.o irq_comm.o eventfd.o \ | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
10 | assigned-dev.o) | 10 | irqchip.o) |
11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \ |
12 | assigned-dev.o iommu.o) | ||
12 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) | 13 | kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) |
13 | 14 | ||
14 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 15 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a335cc6cde72..8e517bba6a7c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -132,8 +132,9 @@ | |||
132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | 132 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ |
133 | #define No64 (1<<28) | 133 | #define No64 (1<<28) |
134 | #define PageTable (1 << 29) /* instruction used to write page table */ | 134 | #define PageTable (1 << 29) /* instruction used to write page table */ |
135 | #define NotImpl (1 << 30) /* instruction is not implemented */ | ||
135 | /* Source 2 operand type */ | 136 | /* Source 2 operand type */ |
136 | #define Src2Shift (30) | 137 | #define Src2Shift (31) |
137 | #define Src2None (OpNone << Src2Shift) | 138 | #define Src2None (OpNone << Src2Shift) |
138 | #define Src2CL (OpCL << Src2Shift) | 139 | #define Src2CL (OpCL << Src2Shift) |
139 | #define Src2ImmByte (OpImmByte << Src2Shift) | 140 | #define Src2ImmByte (OpImmByte << Src2Shift) |
@@ -1578,12 +1579,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1578 | 1579 | ||
1579 | memset(&seg_desc, 0, sizeof seg_desc); | 1580 | memset(&seg_desc, 0, sizeof seg_desc); |
1580 | 1581 | ||
1581 | if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) | 1582 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1582 | || ctxt->mode == X86EMUL_MODE_REAL) { | 1583 | /* set real mode segment descriptor (keep limit etc. for |
1583 | /* set real mode segment descriptor */ | 1584 | * unreal mode) */ |
1584 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); | 1585 | ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); |
1585 | set_desc_base(&seg_desc, selector << 4); | 1586 | set_desc_base(&seg_desc, selector << 4); |
1586 | goto load; | 1587 | goto load; |
1588 | } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) { | ||
1589 | /* VM86 needs a clean new segment descriptor */ | ||
1590 | set_desc_base(&seg_desc, selector << 4); | ||
1591 | set_desc_limit(&seg_desc, 0xffff); | ||
1592 | seg_desc.type = 3; | ||
1593 | seg_desc.p = 1; | ||
1594 | seg_desc.s = 1; | ||
1595 | seg_desc.dpl = 3; | ||
1596 | goto load; | ||
1587 | } | 1597 | } |
1588 | 1598 | ||
1589 | rpl = selector & 3; | 1599 | rpl = selector & 3; |
@@ -3615,7 +3625,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3615 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | 3625 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } |
3616 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | 3626 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ |
3617 | .check_perm = (_p) } | 3627 | .check_perm = (_p) } |
3618 | #define N D(0) | 3628 | #define N D(NotImpl) |
3619 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3629 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
3620 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3630 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
3621 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } | 3631 | #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } |
@@ -3713,7 +3723,7 @@ static const struct opcode group5[] = { | |||
3713 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), | 3723 | I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), |
3714 | I(SrcMem | Stack, em_grp45), | 3724 | I(SrcMem | Stack, em_grp45), |
3715 | I(SrcMemFAddr | ImplicitOps, em_grp45), | 3725 | I(SrcMemFAddr | ImplicitOps, em_grp45), |
3716 | I(SrcMem | Stack, em_grp45), N, | 3726 | I(SrcMem | Stack, em_grp45), D(Undefined), |
3717 | }; | 3727 | }; |
3718 | 3728 | ||
3719 | static const struct opcode group6[] = { | 3729 | static const struct opcode group6[] = { |
@@ -4162,6 +4172,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4162 | break; | 4172 | break; |
4163 | case OpMem8: | 4173 | case OpMem8: |
4164 | ctxt->memop.bytes = 1; | 4174 | ctxt->memop.bytes = 1; |
4175 | if (ctxt->memop.type == OP_REG) { | ||
4176 | ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm, 1); | ||
4177 | fetch_register_operand(&ctxt->memop); | ||
4178 | } | ||
4165 | goto mem_common; | 4179 | goto mem_common; |
4166 | case OpMem16: | 4180 | case OpMem16: |
4167 | ctxt->memop.bytes = 2; | 4181 | ctxt->memop.bytes = 2; |
@@ -4373,7 +4387,7 @@ done_prefixes: | |||
4373 | ctxt->intercept = opcode.intercept; | 4387 | ctxt->intercept = opcode.intercept; |
4374 | 4388 | ||
4375 | /* Unrecognised? */ | 4389 | /* Unrecognised? */ |
4376 | if (ctxt->d == 0 || (ctxt->d & Undefined)) | 4390 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) |
4377 | return EMULATION_FAILED; | 4391 | return EMULATION_FAILED; |
4378 | 4392 | ||
4379 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | 4393 | if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn) |
@@ -4511,7 +4525,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4511 | 4525 | ||
4512 | ctxt->mem_read.pos = 0; | 4526 | ctxt->mem_read.pos = 0; |
4513 | 4527 | ||
4514 | if (ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) { | 4528 | if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || |
4529 | (ctxt->d & Undefined)) { | ||
4515 | rc = emulate_ud(ctxt); | 4530 | rc = emulate_ud(ctxt); |
4516 | goto done; | 4531 | goto done; |
4517 | } | 4532 | } |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c1d30b2fc9bb..412a5aa0ef94 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -290,8 +290,8 @@ static void pit_do_work(struct kthread_work *work) | |||
290 | } | 290 | } |
291 | spin_unlock(&ps->inject_lock); | 291 | spin_unlock(&ps->inject_lock); |
292 | if (inject) { | 292 | if (inject) { |
293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 293 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1, false); |
294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 294 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0, false); |
295 | 295 | ||
296 | /* | 296 | /* |
297 | * Provides NMI watchdog support via Virtual Wire mode. | 297 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f77df1c5de6e..e1adbb4aca75 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -94,6 +94,14 @@ static inline int apic_test_vector(int vec, void *bitmap) | |||
94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 94 | return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
95 | } | 95 | } |
96 | 96 | ||
97 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) | ||
98 | { | ||
99 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
100 | |||
101 | return apic_test_vector(vector, apic->regs + APIC_ISR) || | ||
102 | apic_test_vector(vector, apic->regs + APIC_IRR); | ||
103 | } | ||
104 | |||
97 | static inline void apic_set_vector(int vec, void *bitmap) | 105 | static inline void apic_set_vector(int vec, void *bitmap) |
98 | { | 106 | { |
99 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | 107 | set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); |
@@ -145,53 +153,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) | |||
145 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; | 153 | return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; |
146 | } | 154 | } |
147 | 155 | ||
148 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | ||
149 | struct kvm_lapic_irq *irq, | ||
150 | u64 *eoi_exit_bitmap) | ||
151 | { | ||
152 | struct kvm_lapic **dst; | ||
153 | struct kvm_apic_map *map; | ||
154 | unsigned long bitmap = 1; | ||
155 | int i; | ||
156 | |||
157 | rcu_read_lock(); | ||
158 | map = rcu_dereference(vcpu->kvm->arch.apic_map); | ||
159 | |||
160 | if (unlikely(!map)) { | ||
161 | __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); | ||
162 | goto out; | ||
163 | } | ||
164 | |||
165 | if (irq->dest_mode == 0) { /* physical mode */ | ||
166 | if (irq->delivery_mode == APIC_DM_LOWEST || | ||
167 | irq->dest_id == 0xff) { | ||
168 | __set_bit(irq->vector, | ||
169 | (unsigned long *)eoi_exit_bitmap); | ||
170 | goto out; | ||
171 | } | ||
172 | dst = &map->phys_map[irq->dest_id & 0xff]; | ||
173 | } else { | ||
174 | u32 mda = irq->dest_id << (32 - map->ldr_bits); | ||
175 | |||
176 | dst = map->logical_map[apic_cluster_id(map, mda)]; | ||
177 | |||
178 | bitmap = apic_logical_id(map, mda); | ||
179 | } | ||
180 | |||
181 | for_each_set_bit(i, &bitmap, 16) { | ||
182 | if (!dst[i]) | ||
183 | continue; | ||
184 | if (dst[i]->vcpu == vcpu) { | ||
185 | __set_bit(irq->vector, | ||
186 | (unsigned long *)eoi_exit_bitmap); | ||
187 | break; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | out: | ||
192 | rcu_read_unlock(); | ||
193 | } | ||
194 | |||
195 | static void recalculate_apic_map(struct kvm *kvm) | 156 | static void recalculate_apic_map(struct kvm *kvm) |
196 | { | 157 | { |
197 | struct kvm_apic_map *new, *old = NULL; | 158 | struct kvm_apic_map *new, *old = NULL; |
@@ -256,7 +217,7 @@ out: | |||
256 | if (old) | 217 | if (old) |
257 | kfree_rcu(old, rcu); | 218 | kfree_rcu(old, rcu); |
258 | 219 | ||
259 | kvm_ioapic_make_eoibitmap_request(kvm); | 220 | kvm_vcpu_request_scan_ioapic(kvm); |
260 | } | 221 | } |
261 | 222 | ||
262 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) | 223 | static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) |
@@ -357,6 +318,19 @@ static u8 count_vectors(void *bitmap) | |||
357 | return count; | 318 | return count; |
358 | } | 319 | } |
359 | 320 | ||
321 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) | ||
322 | { | ||
323 | u32 i, pir_val; | ||
324 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
325 | |||
326 | for (i = 0; i <= 7; i++) { | ||
327 | pir_val = xchg(&pir[i], 0); | ||
328 | if (pir_val) | ||
329 | *((u32 *)(apic->regs + APIC_IRR + i * 0x10)) |= pir_val; | ||
330 | } | ||
331 | } | ||
332 | EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | ||
333 | |||
360 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | 334 | static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) |
361 | { | 335 | { |
362 | apic->irr_pending = true; | 336 | apic->irr_pending = true; |
@@ -379,6 +353,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) | |||
379 | if (!apic->irr_pending) | 353 | if (!apic->irr_pending) |
380 | return -1; | 354 | return -1; |
381 | 355 | ||
356 | kvm_x86_ops->sync_pir_to_irr(apic->vcpu); | ||
382 | result = apic_search_irr(apic); | 357 | result = apic_search_irr(apic); |
383 | ASSERT(result == -1 || result >= 16); | 358 | ASSERT(result == -1 || result >= 16); |
384 | 359 | ||
@@ -431,14 +406,16 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
431 | } | 406 | } |
432 | 407 | ||
433 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 408 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
434 | int vector, int level, int trig_mode); | 409 | int vector, int level, int trig_mode, |
410 | unsigned long *dest_map); | ||
435 | 411 | ||
436 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | 412 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
413 | unsigned long *dest_map) | ||
437 | { | 414 | { |
438 | struct kvm_lapic *apic = vcpu->arch.apic; | 415 | struct kvm_lapic *apic = vcpu->arch.apic; |
439 | 416 | ||
440 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, | 417 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, |
441 | irq->level, irq->trig_mode); | 418 | irq->level, irq->trig_mode, dest_map); |
442 | } | 419 | } |
443 | 420 | ||
444 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) | 421 | static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) |
@@ -505,6 +482,15 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) | |||
505 | return result; | 482 | return result; |
506 | } | 483 | } |
507 | 484 | ||
485 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) | ||
486 | { | ||
487 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
488 | int i; | ||
489 | |||
490 | for (i = 0; i < 8; i++) | ||
491 | apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); | ||
492 | } | ||
493 | |||
508 | static void apic_update_ppr(struct kvm_lapic *apic) | 494 | static void apic_update_ppr(struct kvm_lapic *apic) |
509 | { | 495 | { |
510 | u32 tpr, isrv, ppr, old_ppr; | 496 | u32 tpr, isrv, ppr, old_ppr; |
@@ -611,7 +597,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
611 | } | 597 | } |
612 | 598 | ||
613 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 599 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
614 | struct kvm_lapic_irq *irq, int *r) | 600 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map) |
615 | { | 601 | { |
616 | struct kvm_apic_map *map; | 602 | struct kvm_apic_map *map; |
617 | unsigned long bitmap = 1; | 603 | unsigned long bitmap = 1; |
@@ -622,7 +608,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
622 | *r = -1; | 608 | *r = -1; |
623 | 609 | ||
624 | if (irq->shorthand == APIC_DEST_SELF) { | 610 | if (irq->shorthand == APIC_DEST_SELF) { |
625 | *r = kvm_apic_set_irq(src->vcpu, irq); | 611 | *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); |
626 | return true; | 612 | return true; |
627 | } | 613 | } |
628 | 614 | ||
@@ -667,7 +653,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
667 | continue; | 653 | continue; |
668 | if (*r < 0) | 654 | if (*r < 0) |
669 | *r = 0; | 655 | *r = 0; |
670 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq); | 656 | *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); |
671 | } | 657 | } |
672 | 658 | ||
673 | ret = true; | 659 | ret = true; |
@@ -681,7 +667,8 @@ out: | |||
681 | * Return 1 if successfully added and 0 if discarded. | 667 | * Return 1 if successfully added and 0 if discarded. |
682 | */ | 668 | */ |
683 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 669 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
684 | int vector, int level, int trig_mode) | 670 | int vector, int level, int trig_mode, |
671 | unsigned long *dest_map) | ||
685 | { | 672 | { |
686 | int result = 0; | 673 | int result = 0; |
687 | struct kvm_vcpu *vcpu = apic->vcpu; | 674 | struct kvm_vcpu *vcpu = apic->vcpu; |
@@ -694,24 +681,28 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
694 | if (unlikely(!apic_enabled(apic))) | 681 | if (unlikely(!apic_enabled(apic))) |
695 | break; | 682 | break; |
696 | 683 | ||
697 | if (trig_mode) { | 684 | if (dest_map) |
698 | apic_debug("level trig mode for vector %d", vector); | 685 | __set_bit(vcpu->vcpu_id, dest_map); |
699 | apic_set_vector(vector, apic->regs + APIC_TMR); | ||
700 | } else | ||
701 | apic_clear_vector(vector, apic->regs + APIC_TMR); | ||
702 | 686 | ||
703 | result = !apic_test_and_set_irr(vector, apic); | 687 | if (kvm_x86_ops->deliver_posted_interrupt) { |
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | 688 | result = 1; |
705 | trig_mode, vector, !result); | 689 | kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); |
706 | if (!result) { | 690 | } else { |
707 | if (trig_mode) | 691 | result = !apic_test_and_set_irr(vector, apic); |
708 | apic_debug("level trig mode repeatedly for " | ||
709 | "vector %d", vector); | ||
710 | break; | ||
711 | } | ||
712 | 692 | ||
713 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 693 | if (!result) { |
714 | kvm_vcpu_kick(vcpu); | 694 | if (trig_mode) |
695 | apic_debug("level trig mode repeatedly " | ||
696 | "for vector %d", vector); | ||
697 | goto out; | ||
698 | } | ||
699 | |||
700 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
701 | kvm_vcpu_kick(vcpu); | ||
702 | } | ||
703 | out: | ||
704 | trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | ||
705 | trig_mode, vector, !result); | ||
715 | break; | 706 | break; |
716 | 707 | ||
717 | case APIC_DM_REMRD: | 708 | case APIC_DM_REMRD: |
@@ -731,7 +722,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
731 | case APIC_DM_INIT: | 722 | case APIC_DM_INIT: |
732 | if (!trig_mode || level) { | 723 | if (!trig_mode || level) { |
733 | result = 1; | 724 | result = 1; |
734 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 725 | /* assumes that there are only KVM_APIC_INIT/SIPI */ |
726 | apic->pending_events = (1UL << KVM_APIC_INIT); | ||
727 | /* make sure pending_events is visible before sending | ||
728 | * the request */ | ||
729 | smp_wmb(); | ||
735 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 730 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
736 | kvm_vcpu_kick(vcpu); | 731 | kvm_vcpu_kick(vcpu); |
737 | } else { | 732 | } else { |
@@ -743,13 +738,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
743 | case APIC_DM_STARTUP: | 738 | case APIC_DM_STARTUP: |
744 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", | 739 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
745 | vcpu->vcpu_id, vector); | 740 | vcpu->vcpu_id, vector); |
746 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 741 | result = 1; |
747 | result = 1; | 742 | apic->sipi_vector = vector; |
748 | vcpu->arch.sipi_vector = vector; | 743 | /* make sure sipi_vector is visible for the receiver */ |
749 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 744 | smp_wmb(); |
750 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 745 | set_bit(KVM_APIC_SIPI, &apic->pending_events); |
751 | kvm_vcpu_kick(vcpu); | 746 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
752 | } | 747 | kvm_vcpu_kick(vcpu); |
753 | break; | 748 | break; |
754 | 749 | ||
755 | case APIC_DM_EXTINT: | 750 | case APIC_DM_EXTINT: |
@@ -782,7 +777,7 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) | |||
782 | trigger_mode = IOAPIC_LEVEL_TRIG; | 777 | trigger_mode = IOAPIC_LEVEL_TRIG; |
783 | else | 778 | else |
784 | trigger_mode = IOAPIC_EDGE_TRIG; | 779 | trigger_mode = IOAPIC_EDGE_TRIG; |
785 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 780 | kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); |
786 | } | 781 | } |
787 | } | 782 | } |
788 | 783 | ||
@@ -848,7 +843,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
848 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 843 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
849 | irq.vector); | 844 | irq.vector); |
850 | 845 | ||
851 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 846 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); |
852 | } | 847 | } |
853 | 848 | ||
854 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 849 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
@@ -1484,7 +1479,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) | |||
1484 | vector = reg & APIC_VECTOR_MASK; | 1479 | vector = reg & APIC_VECTOR_MASK; |
1485 | mode = reg & APIC_MODE_MASK; | 1480 | mode = reg & APIC_MODE_MASK; |
1486 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | 1481 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; |
1487 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode); | 1482 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode, |
1483 | NULL); | ||
1488 | } | 1484 | } |
1489 | return 0; | 1485 | return 0; |
1490 | } | 1486 | } |
@@ -1654,6 +1650,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | |||
1654 | apic->highest_isr_cache = -1; | 1650 | apic->highest_isr_cache = -1; |
1655 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); | 1651 | kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); |
1656 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 1652 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
1653 | kvm_rtc_eoi_tracking_restore_one(vcpu); | ||
1657 | } | 1654 | } |
1658 | 1655 | ||
1659 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | 1656 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) |
@@ -1860,6 +1857,34 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) | |||
1860 | addr, sizeof(u8)); | 1857 | addr, sizeof(u8)); |
1861 | } | 1858 | } |
1862 | 1859 | ||
1860 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | ||
1861 | { | ||
1862 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1863 | unsigned int sipi_vector; | ||
1864 | |||
1865 | if (!kvm_vcpu_has_lapic(vcpu)) | ||
1866 | return; | ||
1867 | |||
1868 | if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { | ||
1869 | kvm_lapic_reset(vcpu); | ||
1870 | kvm_vcpu_reset(vcpu); | ||
1871 | if (kvm_vcpu_is_bsp(apic->vcpu)) | ||
1872 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1873 | else | ||
1874 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
1875 | } | ||
1876 | if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && | ||
1877 | vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
1878 | /* evaluate pending_events before reading the vector */ | ||
1879 | smp_rmb(); | ||
1880 | sipi_vector = apic->sipi_vector; | ||
1881 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
1882 | vcpu->vcpu_id, sipi_vector); | ||
1883 | kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); | ||
1884 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
1885 | } | ||
1886 | } | ||
1887 | |||
1863 | void kvm_lapic_init(void) | 1888 | void kvm_lapic_init(void) |
1864 | { | 1889 | { |
1865 | /* do not patch jump label more than once per second */ | 1890 | /* do not patch jump label more than once per second */ |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 1676d34ddb4e..c730ac9fe801 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -5,6 +5,9 @@ | |||
5 | 5 | ||
6 | #include <linux/kvm_host.h> | 6 | #include <linux/kvm_host.h> |
7 | 7 | ||
8 | #define KVM_APIC_INIT 0 | ||
9 | #define KVM_APIC_SIPI 1 | ||
10 | |||
8 | struct kvm_timer { | 11 | struct kvm_timer { |
9 | struct hrtimer timer; | 12 | struct hrtimer timer; |
10 | s64 period; /* unit: ns */ | 13 | s64 period; /* unit: ns */ |
@@ -32,6 +35,8 @@ struct kvm_lapic { | |||
32 | void *regs; | 35 | void *regs; |
33 | gpa_t vapic_addr; | 36 | gpa_t vapic_addr; |
34 | struct page *vapic_page; | 37 | struct page *vapic_page; |
38 | unsigned long pending_events; | ||
39 | unsigned int sipi_vector; | ||
35 | }; | 40 | }; |
36 | int kvm_create_lapic(struct kvm_vcpu *vcpu); | 41 | int kvm_create_lapic(struct kvm_vcpu *vcpu); |
37 | void kvm_free_lapic(struct kvm_vcpu *vcpu); | 42 | void kvm_free_lapic(struct kvm_vcpu *vcpu); |
@@ -39,6 +44,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); | |||
39 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); | 44 | int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); |
40 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); | 45 | int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); |
41 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); | 46 | int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); |
47 | void kvm_apic_accept_events(struct kvm_vcpu *vcpu); | ||
42 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); | 48 | void kvm_lapic_reset(struct kvm_vcpu *vcpu); |
43 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 49 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
44 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 50 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
@@ -47,13 +53,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | |||
47 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | 53 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); |
48 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); | 54 | void kvm_apic_set_version(struct kvm_vcpu *vcpu); |
49 | 55 | ||
56 | void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); | ||
57 | void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); | ||
50 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 58 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
51 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 59 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
52 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | 60 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, |
61 | unsigned long *dest_map); | ||
53 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); | 62 | int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); |
54 | 63 | ||
55 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | 64 | bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, |
56 | struct kvm_lapic_irq *irq, int *r); | 65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); |
57 | 66 | ||
58 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
59 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 68 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
@@ -154,8 +163,11 @@ static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) | |||
154 | return ldr & map->lid_mask; | 163 | return ldr & map->lid_mask; |
155 | } | 164 | } |
156 | 165 | ||
157 | void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, | 166 | static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) |
158 | struct kvm_lapic_irq *irq, | 167 | { |
159 | u64 *eoi_bitmap); | 168 | return vcpu->arch.apic->pending_events; |
169 | } | ||
170 | |||
171 | bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); | ||
160 | 172 | ||
161 | #endif | 173 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 956ca358108a..004cc87b781c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -199,8 +199,11 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); | |||
199 | 199 | ||
200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) | 200 | static void mark_mmio_spte(u64 *sptep, u64 gfn, unsigned access) |
201 | { | 201 | { |
202 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
203 | |||
202 | access &= ACC_WRITE_MASK | ACC_USER_MASK; | 204 | access &= ACC_WRITE_MASK | ACC_USER_MASK; |
203 | 205 | ||
206 | sp->mmio_cached = true; | ||
204 | trace_mark_mmio_spte(sptep, gfn, access); | 207 | trace_mark_mmio_spte(sptep, gfn, access); |
205 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); | 208 | mmu_spte_set(sptep, shadow_mmio_mask | access | gfn << PAGE_SHIFT); |
206 | } | 209 | } |
@@ -1502,6 +1505,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
1502 | u64 *parent_pte, int direct) | 1505 | u64 *parent_pte, int direct) |
1503 | { | 1506 | { |
1504 | struct kvm_mmu_page *sp; | 1507 | struct kvm_mmu_page *sp; |
1508 | |||
1505 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); | 1509 | sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); |
1506 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); | 1510 | sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); |
1507 | if (!direct) | 1511 | if (!direct) |
@@ -1644,16 +1648,14 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
1644 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 1648 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
1645 | struct list_head *invalid_list); | 1649 | struct list_head *invalid_list); |
1646 | 1650 | ||
1647 | #define for_each_gfn_sp(kvm, sp, gfn) \ | 1651 | #define for_each_gfn_sp(_kvm, _sp, _gfn) \ |
1648 | hlist_for_each_entry(sp, \ | 1652 | hlist_for_each_entry(_sp, \ |
1649 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1653 | &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ |
1650 | if ((sp)->gfn != (gfn)) {} else | 1654 | if ((_sp)->gfn != (_gfn)) {} else |
1651 | 1655 | ||
1652 | #define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ | 1656 | #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ |
1653 | hlist_for_each_entry(sp, \ | 1657 | for_each_gfn_sp(_kvm, _sp, _gfn) \ |
1654 | &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ | 1658 | if ((_sp)->role.direct || (_sp)->role.invalid) {} else |
1655 | if ((sp)->gfn != (gfn) || (sp)->role.direct || \ | ||
1656 | (sp)->role.invalid) {} else | ||
1657 | 1659 | ||
1658 | /* @sp->gfn should be write-protected at the call site */ | 1660 | /* @sp->gfn should be write-protected at the call site */ |
1659 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 1661 | static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
@@ -2089,7 +2091,7 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, | |||
2089 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, | 2091 | static void kvm_mmu_commit_zap_page(struct kvm *kvm, |
2090 | struct list_head *invalid_list) | 2092 | struct list_head *invalid_list) |
2091 | { | 2093 | { |
2092 | struct kvm_mmu_page *sp; | 2094 | struct kvm_mmu_page *sp, *nsp; |
2093 | 2095 | ||
2094 | if (list_empty(invalid_list)) | 2096 | if (list_empty(invalid_list)) |
2095 | return; | 2097 | return; |
@@ -2106,11 +2108,25 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2106 | */ | 2108 | */ |
2107 | kvm_flush_remote_tlbs(kvm); | 2109 | kvm_flush_remote_tlbs(kvm); |
2108 | 2110 | ||
2109 | do { | 2111 | list_for_each_entry_safe(sp, nsp, invalid_list, link) { |
2110 | sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); | ||
2111 | WARN_ON(!sp->role.invalid || sp->root_count); | 2112 | WARN_ON(!sp->role.invalid || sp->root_count); |
2112 | kvm_mmu_free_page(sp); | 2113 | kvm_mmu_free_page(sp); |
2113 | } while (!list_empty(invalid_list)); | 2114 | } |
2115 | } | ||
2116 | |||
2117 | static bool prepare_zap_oldest_mmu_page(struct kvm *kvm, | ||
2118 | struct list_head *invalid_list) | ||
2119 | { | ||
2120 | struct kvm_mmu_page *sp; | ||
2121 | |||
2122 | if (list_empty(&kvm->arch.active_mmu_pages)) | ||
2123 | return false; | ||
2124 | |||
2125 | sp = list_entry(kvm->arch.active_mmu_pages.prev, | ||
2126 | struct kvm_mmu_page, link); | ||
2127 | kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); | ||
2128 | |||
2129 | return true; | ||
2114 | } | 2130 | } |
2115 | 2131 | ||
2116 | /* | 2132 | /* |
@@ -2120,23 +2136,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, | |||
2120 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) | 2136 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) |
2121 | { | 2137 | { |
2122 | LIST_HEAD(invalid_list); | 2138 | LIST_HEAD(invalid_list); |
2123 | /* | ||
2124 | * If we set the number of mmu pages to be smaller be than the | ||
2125 | * number of actived pages , we must to free some mmu pages before we | ||
2126 | * change the value | ||
2127 | */ | ||
2128 | 2139 | ||
2129 | spin_lock(&kvm->mmu_lock); | 2140 | spin_lock(&kvm->mmu_lock); |
2130 | 2141 | ||
2131 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { | 2142 | if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { |
2132 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && | 2143 | /* Need to free some mmu pages to achieve the goal. */ |
2133 | !list_empty(&kvm->arch.active_mmu_pages)) { | 2144 | while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) |
2134 | struct kvm_mmu_page *page; | 2145 | if (!prepare_zap_oldest_mmu_page(kvm, &invalid_list)) |
2146 | break; | ||
2135 | 2147 | ||
2136 | page = container_of(kvm->arch.active_mmu_pages.prev, | ||
2137 | struct kvm_mmu_page, link); | ||
2138 | kvm_mmu_prepare_zap_page(kvm, page, &invalid_list); | ||
2139 | } | ||
2140 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 2148 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
2141 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; | 2149 | goal_nr_mmu_pages = kvm->arch.n_used_mmu_pages; |
2142 | } | 2150 | } |
@@ -2794,6 +2802,7 @@ exit: | |||
2794 | 2802 | ||
2795 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, | 2803 | static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, |
2796 | gva_t gva, pfn_t *pfn, bool write, bool *writable); | 2804 | gva_t gva, pfn_t *pfn, bool write, bool *writable); |
2805 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu); | ||
2797 | 2806 | ||
2798 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | 2807 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, |
2799 | gfn_t gfn, bool prefault) | 2808 | gfn_t gfn, bool prefault) |
@@ -2835,7 +2844,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, | |||
2835 | spin_lock(&vcpu->kvm->mmu_lock); | 2844 | spin_lock(&vcpu->kvm->mmu_lock); |
2836 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 2845 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
2837 | goto out_unlock; | 2846 | goto out_unlock; |
2838 | kvm_mmu_free_some_pages(vcpu); | 2847 | make_mmu_pages_available(vcpu); |
2839 | if (likely(!force_pt_level)) | 2848 | if (likely(!force_pt_level)) |
2840 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 2849 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
2841 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, | 2850 | r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, |
@@ -2913,7 +2922,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2913 | 2922 | ||
2914 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | 2923 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { |
2915 | spin_lock(&vcpu->kvm->mmu_lock); | 2924 | spin_lock(&vcpu->kvm->mmu_lock); |
2916 | kvm_mmu_free_some_pages(vcpu); | 2925 | make_mmu_pages_available(vcpu); |
2917 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, | 2926 | sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, |
2918 | 1, ACC_ALL, NULL); | 2927 | 1, ACC_ALL, NULL); |
2919 | ++sp->root_count; | 2928 | ++sp->root_count; |
@@ -2925,7 +2934,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) | |||
2925 | 2934 | ||
2926 | ASSERT(!VALID_PAGE(root)); | 2935 | ASSERT(!VALID_PAGE(root)); |
2927 | spin_lock(&vcpu->kvm->mmu_lock); | 2936 | spin_lock(&vcpu->kvm->mmu_lock); |
2928 | kvm_mmu_free_some_pages(vcpu); | 2937 | make_mmu_pages_available(vcpu); |
2929 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), | 2938 | sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), |
2930 | i << 30, | 2939 | i << 30, |
2931 | PT32_ROOT_LEVEL, 1, ACC_ALL, | 2940 | PT32_ROOT_LEVEL, 1, ACC_ALL, |
@@ -2964,7 +2973,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
2964 | ASSERT(!VALID_PAGE(root)); | 2973 | ASSERT(!VALID_PAGE(root)); |
2965 | 2974 | ||
2966 | spin_lock(&vcpu->kvm->mmu_lock); | 2975 | spin_lock(&vcpu->kvm->mmu_lock); |
2967 | kvm_mmu_free_some_pages(vcpu); | 2976 | make_mmu_pages_available(vcpu); |
2968 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, | 2977 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, |
2969 | 0, ACC_ALL, NULL); | 2978 | 0, ACC_ALL, NULL); |
2970 | root = __pa(sp->spt); | 2979 | root = __pa(sp->spt); |
@@ -2998,7 +3007,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) | |||
2998 | return 1; | 3007 | return 1; |
2999 | } | 3008 | } |
3000 | spin_lock(&vcpu->kvm->mmu_lock); | 3009 | spin_lock(&vcpu->kvm->mmu_lock); |
3001 | kvm_mmu_free_some_pages(vcpu); | 3010 | make_mmu_pages_available(vcpu); |
3002 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 3011 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
3003 | PT32_ROOT_LEVEL, 0, | 3012 | PT32_ROOT_LEVEL, 0, |
3004 | ACC_ALL, NULL); | 3013 | ACC_ALL, NULL); |
@@ -3304,7 +3313,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, | |||
3304 | spin_lock(&vcpu->kvm->mmu_lock); | 3313 | spin_lock(&vcpu->kvm->mmu_lock); |
3305 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | 3314 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) |
3306 | goto out_unlock; | 3315 | goto out_unlock; |
3307 | kvm_mmu_free_some_pages(vcpu); | 3316 | make_mmu_pages_available(vcpu); |
3308 | if (likely(!force_pt_level)) | 3317 | if (likely(!force_pt_level)) |
3309 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); | 3318 | transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); |
3310 | r = __direct_map(vcpu, gpa, write, map_writable, | 3319 | r = __direct_map(vcpu, gpa, write, map_writable, |
@@ -4006,17 +4015,17 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
4006 | } | 4015 | } |
4007 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | 4016 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); |
4008 | 4017 | ||
4009 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 4018 | static void make_mmu_pages_available(struct kvm_vcpu *vcpu) |
4010 | { | 4019 | { |
4011 | LIST_HEAD(invalid_list); | 4020 | LIST_HEAD(invalid_list); |
4012 | 4021 | ||
4013 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES && | 4022 | if (likely(kvm_mmu_available_pages(vcpu->kvm) >= KVM_MIN_FREE_MMU_PAGES)) |
4014 | !list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | 4023 | return; |
4015 | struct kvm_mmu_page *sp; | 4024 | |
4025 | while (kvm_mmu_available_pages(vcpu->kvm) < KVM_REFILL_PAGES) { | ||
4026 | if (!prepare_zap_oldest_mmu_page(vcpu->kvm, &invalid_list)) | ||
4027 | break; | ||
4016 | 4028 | ||
4017 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.prev, | ||
4018 | struct kvm_mmu_page, link); | ||
4019 | kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); | ||
4020 | ++vcpu->kvm->stat.mmu_recycled; | 4029 | ++vcpu->kvm->stat.mmu_recycled; |
4021 | } | 4030 | } |
4022 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 4031 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
@@ -4185,17 +4194,22 @@ restart: | |||
4185 | spin_unlock(&kvm->mmu_lock); | 4194 | spin_unlock(&kvm->mmu_lock); |
4186 | } | 4195 | } |
4187 | 4196 | ||
4188 | static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm, | 4197 | void kvm_mmu_zap_mmio_sptes(struct kvm *kvm) |
4189 | struct list_head *invalid_list) | ||
4190 | { | 4198 | { |
4191 | struct kvm_mmu_page *page; | 4199 | struct kvm_mmu_page *sp, *node; |
4200 | LIST_HEAD(invalid_list); | ||
4192 | 4201 | ||
4193 | if (list_empty(&kvm->arch.active_mmu_pages)) | 4202 | spin_lock(&kvm->mmu_lock); |
4194 | return; | 4203 | restart: |
4204 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { | ||
4205 | if (!sp->mmio_cached) | ||
4206 | continue; | ||
4207 | if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list)) | ||
4208 | goto restart; | ||
4209 | } | ||
4195 | 4210 | ||
4196 | page = container_of(kvm->arch.active_mmu_pages.prev, | 4211 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4197 | struct kvm_mmu_page, link); | 4212 | spin_unlock(&kvm->mmu_lock); |
4198 | kvm_mmu_prepare_zap_page(kvm, page, invalid_list); | ||
4199 | } | 4213 | } |
4200 | 4214 | ||
4201 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | 4215 | static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) |
@@ -4232,7 +4246,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
4232 | idx = srcu_read_lock(&kvm->srcu); | 4246 | idx = srcu_read_lock(&kvm->srcu); |
4233 | spin_lock(&kvm->mmu_lock); | 4247 | spin_lock(&kvm->mmu_lock); |
4234 | 4248 | ||
4235 | kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list); | 4249 | prepare_zap_oldest_mmu_page(kvm, &invalid_list); |
4236 | kvm_mmu_commit_zap_page(kvm, &invalid_list); | 4250 | kvm_mmu_commit_zap_page(kvm, &invalid_list); |
4237 | 4251 | ||
4238 | spin_unlock(&kvm->mmu_lock); | 4252 | spin_unlock(&kvm->mmu_lock); |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 69871080e866..2adcbc2cac6d 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -57,14 +57,11 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context); | |||
57 | 57 | ||
58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) | 58 | static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) |
59 | { | 59 | { |
60 | return kvm->arch.n_max_mmu_pages - | 60 | if (kvm->arch.n_max_mmu_pages > kvm->arch.n_used_mmu_pages) |
61 | kvm->arch.n_used_mmu_pages; | 61 | return kvm->arch.n_max_mmu_pages - |
62 | } | 62 | kvm->arch.n_used_mmu_pages; |
63 | 63 | ||
64 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 64 | return 0; |
65 | { | ||
66 | if (unlikely(kvm_mmu_available_pages(vcpu->kvm)< KVM_MIN_FREE_MMU_PAGES)) | ||
67 | __kvm_mmu_free_some_pages(vcpu); | ||
68 | } | 65 | } |
69 | 66 | ||
70 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | 67 | static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 105dd5bd550e..da20860b457a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -627,7 +627,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, | |||
627 | goto out_unlock; | 627 | goto out_unlock; |
628 | 628 | ||
629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); | 629 | kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); |
630 | kvm_mmu_free_some_pages(vcpu); | 630 | make_mmu_pages_available(vcpu); |
631 | if (!force_pt_level) | 631 | if (!force_pt_level) |
632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); | 632 | transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); |
633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, | 633 | r = FNAME(fetch)(vcpu, addr, &walker, write_fault, |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cfc258a6bf97..c53e797e7369 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -360,10 +360,12 @@ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) | |||
360 | return 1; | 360 | return 1; |
361 | } | 361 | } |
362 | 362 | ||
363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | 363 | int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
364 | { | 364 | { |
365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 365 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
366 | struct kvm_pmc *pmc; | 366 | struct kvm_pmc *pmc; |
367 | u32 index = msr_info->index; | ||
368 | u64 data = msr_info->data; | ||
367 | 369 | ||
368 | switch (index) { | 370 | switch (index) { |
369 | case MSR_CORE_PERF_FIXED_CTR_CTRL: | 371 | case MSR_CORE_PERF_FIXED_CTR_CTRL: |
@@ -375,6 +377,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
375 | } | 377 | } |
376 | break; | 378 | break; |
377 | case MSR_CORE_PERF_GLOBAL_STATUS: | 379 | case MSR_CORE_PERF_GLOBAL_STATUS: |
380 | if (msr_info->host_initiated) { | ||
381 | pmu->global_status = data; | ||
382 | return 0; | ||
383 | } | ||
378 | break; /* RO MSR */ | 384 | break; /* RO MSR */ |
379 | case MSR_CORE_PERF_GLOBAL_CTRL: | 385 | case MSR_CORE_PERF_GLOBAL_CTRL: |
380 | if (pmu->global_ctrl == data) | 386 | if (pmu->global_ctrl == data) |
@@ -386,7 +392,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
386 | break; | 392 | break; |
387 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: | 393 | case MSR_CORE_PERF_GLOBAL_OVF_CTRL: |
388 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { | 394 | if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) { |
389 | pmu->global_status &= ~data; | 395 | if (!msr_info->host_initiated) |
396 | pmu->global_status &= ~data; | ||
390 | pmu->global_ovf_ctrl = data; | 397 | pmu->global_ovf_ctrl = data; |
391 | return 0; | 398 | return 0; |
392 | } | 399 | } |
@@ -394,7 +401,8 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) | |||
394 | default: | 401 | default: |
395 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || | 402 | if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || |
396 | (pmc = get_fixed_pmc(pmu, index))) { | 403 | (pmc = get_fixed_pmc(pmu, index))) { |
397 | data = (s64)(s32)data; | 404 | if (!msr_info->host_initiated) |
405 | data = (s64)(s32)data; | ||
398 | pmc->counter += data - read_pmc(pmc); | 406 | pmc->counter += data - read_pmc(pmc); |
399 | return 0; | 407 | return 0; |
400 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | 408 | } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7d39d70647e3..a14a6eaf871d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1131,17 +1131,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1131 | init_seg(&save->gs); | 1131 | init_seg(&save->gs); |
1132 | 1132 | ||
1133 | save->cs.selector = 0xf000; | 1133 | save->cs.selector = 0xf000; |
1134 | save->cs.base = 0xffff0000; | ||
1134 | /* Executable/Readable Code Segment */ | 1135 | /* Executable/Readable Code Segment */ |
1135 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | | 1136 | save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | |
1136 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; | 1137 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; |
1137 | save->cs.limit = 0xffff; | 1138 | save->cs.limit = 0xffff; |
1138 | /* | ||
1139 | * cs.base should really be 0xffff0000, but vmx can't handle that, so | ||
1140 | * be consistent with it. | ||
1141 | * | ||
1142 | * Replace when we have real mode working for vmx. | ||
1143 | */ | ||
1144 | save->cs.base = 0xf0000; | ||
1145 | 1139 | ||
1146 | save->gdtr.limit = 0xffff; | 1140 | save->gdtr.limit = 0xffff; |
1147 | save->idtr.limit = 0xffff; | 1141 | save->idtr.limit = 0xffff; |
@@ -1191,7 +1185,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1191 | enable_gif(svm); | 1185 | enable_gif(svm); |
1192 | } | 1186 | } |
1193 | 1187 | ||
1194 | static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | 1188 | static void svm_vcpu_reset(struct kvm_vcpu *vcpu) |
1195 | { | 1189 | { |
1196 | struct vcpu_svm *svm = to_svm(vcpu); | 1190 | struct vcpu_svm *svm = to_svm(vcpu); |
1197 | u32 dummy; | 1191 | u32 dummy; |
@@ -1199,16 +1193,8 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
1199 | 1193 | ||
1200 | init_vmcb(svm); | 1194 | init_vmcb(svm); |
1201 | 1195 | ||
1202 | if (!kvm_vcpu_is_bsp(vcpu)) { | ||
1203 | kvm_rip_write(vcpu, 0); | ||
1204 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; | ||
1205 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; | ||
1206 | } | ||
1207 | |||
1208 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); | 1196 | kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); |
1209 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); | 1197 | kvm_register_write(vcpu, VCPU_REGS_RDX, eax); |
1210 | |||
1211 | return 0; | ||
1212 | } | 1198 | } |
1213 | 1199 | ||
1214 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | 1200 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) |
@@ -3487,7 +3473,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3487 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && | 3473 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
3488 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && | 3474 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && |
3489 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) | 3475 | exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) |
3490 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 3476 | printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " |
3491 | "exit_code 0x%x\n", | 3477 | "exit_code 0x%x\n", |
3492 | __func__, svm->vmcb->control.exit_int_info, | 3478 | __func__, svm->vmcb->control.exit_int_info, |
3493 | exit_code); | 3479 | exit_code); |
@@ -3591,6 +3577,11 @@ static void svm_hwapic_isr_update(struct kvm *kvm, int isr) | |||
3591 | return; | 3577 | return; |
3592 | } | 3578 | } |
3593 | 3579 | ||
3580 | static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
3581 | { | ||
3582 | return; | ||
3583 | } | ||
3584 | |||
3594 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | 3585 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
3595 | { | 3586 | { |
3596 | struct vcpu_svm *svm = to_svm(vcpu); | 3587 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -3641,7 +3632,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3641 | return ret; | 3632 | return ret; |
3642 | } | 3633 | } |
3643 | 3634 | ||
3644 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 3635 | static int enable_irq_window(struct kvm_vcpu *vcpu) |
3645 | { | 3636 | { |
3646 | struct vcpu_svm *svm = to_svm(vcpu); | 3637 | struct vcpu_svm *svm = to_svm(vcpu); |
3647 | 3638 | ||
@@ -3655,15 +3646,16 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
3655 | svm_set_vintr(svm); | 3646 | svm_set_vintr(svm); |
3656 | svm_inject_irq(svm, 0x0); | 3647 | svm_inject_irq(svm, 0x0); |
3657 | } | 3648 | } |
3649 | return 0; | ||
3658 | } | 3650 | } |
3659 | 3651 | ||
3660 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 3652 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
3661 | { | 3653 | { |
3662 | struct vcpu_svm *svm = to_svm(vcpu); | 3654 | struct vcpu_svm *svm = to_svm(vcpu); |
3663 | 3655 | ||
3664 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) | 3656 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
3665 | == HF_NMI_MASK) | 3657 | == HF_NMI_MASK) |
3666 | return; /* IRET will cause a vm exit */ | 3658 | return 0; /* IRET will cause a vm exit */ |
3667 | 3659 | ||
3668 | /* | 3660 | /* |
3669 | * Something prevents NMI from been injected. Single step over possible | 3661 | * Something prevents NMI from been injected. Single step over possible |
@@ -3672,6 +3664,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
3672 | svm->nmi_singlestep = true; | 3664 | svm->nmi_singlestep = true; |
3673 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3665 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
3674 | update_db_bp_intercept(vcpu); | 3666 | update_db_bp_intercept(vcpu); |
3667 | return 0; | ||
3675 | } | 3668 | } |
3676 | 3669 | ||
3677 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3670 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -4247,6 +4240,11 @@ out: | |||
4247 | return ret; | 4240 | return ret; |
4248 | } | 4241 | } |
4249 | 4242 | ||
4243 | static void svm_handle_external_intr(struct kvm_vcpu *vcpu) | ||
4244 | { | ||
4245 | local_irq_enable(); | ||
4246 | } | ||
4247 | |||
4250 | static struct kvm_x86_ops svm_x86_ops = { | 4248 | static struct kvm_x86_ops svm_x86_ops = { |
4251 | .cpu_has_kvm_support = has_svm, | 4249 | .cpu_has_kvm_support = has_svm, |
4252 | .disabled_by_bios = is_disabled, | 4250 | .disabled_by_bios = is_disabled, |
@@ -4314,6 +4312,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4314 | .vm_has_apicv = svm_vm_has_apicv, | 4312 | .vm_has_apicv = svm_vm_has_apicv, |
4315 | .load_eoi_exitmap = svm_load_eoi_exitmap, | 4313 | .load_eoi_exitmap = svm_load_eoi_exitmap, |
4316 | .hwapic_isr_update = svm_hwapic_isr_update, | 4314 | .hwapic_isr_update = svm_hwapic_isr_update, |
4315 | .sync_pir_to_irr = svm_sync_pir_to_irr, | ||
4317 | 4316 | ||
4318 | .set_tss_addr = svm_set_tss_addr, | 4317 | .set_tss_addr = svm_set_tss_addr, |
4319 | .get_tdp_level = get_npt_level, | 4318 | .get_tdp_level = get_npt_level, |
@@ -4342,6 +4341,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4342 | .set_tdp_cr3 = set_tdp_cr3, | 4341 | .set_tdp_cr3 = set_tdp_cr3, |
4343 | 4342 | ||
4344 | .check_intercept = svm_check_intercept, | 4343 | .check_intercept = svm_check_intercept, |
4344 | .handle_external_intr = svm_handle_external_intr, | ||
4345 | }; | 4345 | }; |
4346 | 4346 | ||
4347 | static int __init svm_init(void) | 4347 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 867b81037f96..25a791ed21c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -84,8 +84,11 @@ module_param(vmm_exclusive, bool, S_IRUGO); | |||
84 | static bool __read_mostly fasteoi = 1; | 84 | static bool __read_mostly fasteoi = 1; |
85 | module_param(fasteoi, bool, S_IRUGO); | 85 | module_param(fasteoi, bool, S_IRUGO); |
86 | 86 | ||
87 | static bool __read_mostly enable_apicv_reg_vid; | 87 | static bool __read_mostly enable_apicv = 1; |
88 | module_param(enable_apicv, bool, S_IRUGO); | ||
88 | 89 | ||
90 | static bool __read_mostly enable_shadow_vmcs = 1; | ||
91 | module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); | ||
89 | /* | 92 | /* |
90 | * If nested=1, nested virtualization is supported, i.e., guests may use | 93 | * If nested=1, nested virtualization is supported, i.e., guests may use |
91 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not | 94 | * VMX and be a hypervisor for its own guests. If nested=0, guests may not |
@@ -298,7 +301,8 @@ struct __packed vmcs12 { | |||
298 | u32 guest_activity_state; | 301 | u32 guest_activity_state; |
299 | u32 guest_sysenter_cs; | 302 | u32 guest_sysenter_cs; |
300 | u32 host_ia32_sysenter_cs; | 303 | u32 host_ia32_sysenter_cs; |
301 | u32 padding32[8]; /* room for future expansion */ | 304 | u32 vmx_preemption_timer_value; |
305 | u32 padding32[7]; /* room for future expansion */ | ||
302 | u16 virtual_processor_id; | 306 | u16 virtual_processor_id; |
303 | u16 guest_es_selector; | 307 | u16 guest_es_selector; |
304 | u16 guest_cs_selector; | 308 | u16 guest_cs_selector; |
@@ -351,6 +355,12 @@ struct nested_vmx { | |||
351 | /* The host-usable pointer to the above */ | 355 | /* The host-usable pointer to the above */ |
352 | struct page *current_vmcs12_page; | 356 | struct page *current_vmcs12_page; |
353 | struct vmcs12 *current_vmcs12; | 357 | struct vmcs12 *current_vmcs12; |
358 | struct vmcs *current_shadow_vmcs; | ||
359 | /* | ||
360 | * Indicates if the shadow vmcs must be updated with the | ||
361 | * data hold by vmcs12 | ||
362 | */ | ||
363 | bool sync_shadow_vmcs; | ||
354 | 364 | ||
355 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | 365 | /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
356 | struct list_head vmcs02_pool; | 366 | struct list_head vmcs02_pool; |
@@ -365,6 +375,31 @@ struct nested_vmx { | |||
365 | struct page *apic_access_page; | 375 | struct page *apic_access_page; |
366 | }; | 376 | }; |
367 | 377 | ||
378 | #define POSTED_INTR_ON 0 | ||
379 | /* Posted-Interrupt Descriptor */ | ||
380 | struct pi_desc { | ||
381 | u32 pir[8]; /* Posted interrupt requested */ | ||
382 | u32 control; /* bit 0 of control is outstanding notification bit */ | ||
383 | u32 rsvd[7]; | ||
384 | } __aligned(64); | ||
385 | |||
386 | static bool pi_test_and_set_on(struct pi_desc *pi_desc) | ||
387 | { | ||
388 | return test_and_set_bit(POSTED_INTR_ON, | ||
389 | (unsigned long *)&pi_desc->control); | ||
390 | } | ||
391 | |||
392 | static bool pi_test_and_clear_on(struct pi_desc *pi_desc) | ||
393 | { | ||
394 | return test_and_clear_bit(POSTED_INTR_ON, | ||
395 | (unsigned long *)&pi_desc->control); | ||
396 | } | ||
397 | |||
398 | static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) | ||
399 | { | ||
400 | return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); | ||
401 | } | ||
402 | |||
368 | struct vcpu_vmx { | 403 | struct vcpu_vmx { |
369 | struct kvm_vcpu vcpu; | 404 | struct kvm_vcpu vcpu; |
370 | unsigned long host_rsp; | 405 | unsigned long host_rsp; |
@@ -377,6 +412,7 @@ struct vcpu_vmx { | |||
377 | struct shared_msr_entry *guest_msrs; | 412 | struct shared_msr_entry *guest_msrs; |
378 | int nmsrs; | 413 | int nmsrs; |
379 | int save_nmsrs; | 414 | int save_nmsrs; |
415 | unsigned long host_idt_base; | ||
380 | #ifdef CONFIG_X86_64 | 416 | #ifdef CONFIG_X86_64 |
381 | u64 msr_host_kernel_gs_base; | 417 | u64 msr_host_kernel_gs_base; |
382 | u64 msr_guest_kernel_gs_base; | 418 | u64 msr_guest_kernel_gs_base; |
@@ -428,6 +464,9 @@ struct vcpu_vmx { | |||
428 | 464 | ||
429 | bool rdtscp_enabled; | 465 | bool rdtscp_enabled; |
430 | 466 | ||
467 | /* Posted interrupt descriptor */ | ||
468 | struct pi_desc pi_desc; | ||
469 | |||
431 | /* Support for a guest hypervisor (nested VMX) */ | 470 | /* Support for a guest hypervisor (nested VMX) */ |
432 | struct nested_vmx nested; | 471 | struct nested_vmx nested; |
433 | }; | 472 | }; |
@@ -451,6 +490,64 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
451 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ | 490 | #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ |
452 | [number##_HIGH] = VMCS12_OFFSET(name)+4 | 491 | [number##_HIGH] = VMCS12_OFFSET(name)+4 |
453 | 492 | ||
493 | |||
494 | static const unsigned long shadow_read_only_fields[] = { | ||
495 | /* | ||
496 | * We do NOT shadow fields that are modified when L0 | ||
497 | * traps and emulates any vmx instruction (e.g. VMPTRLD, | ||
498 | * VMXON...) executed by L1. | ||
499 | * For example, VM_INSTRUCTION_ERROR is read | ||
500 | * by L1 if a vmx instruction fails (part of the error path). | ||
501 | * Note the code assumes this logic. If for some reason | ||
502 | * we start shadowing these fields then we need to | ||
503 | * force a shadow sync when L0 emulates vmx instructions | ||
504 | * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified | ||
505 | * by nested_vmx_failValid) | ||
506 | */ | ||
507 | VM_EXIT_REASON, | ||
508 | VM_EXIT_INTR_INFO, | ||
509 | VM_EXIT_INSTRUCTION_LEN, | ||
510 | IDT_VECTORING_INFO_FIELD, | ||
511 | IDT_VECTORING_ERROR_CODE, | ||
512 | VM_EXIT_INTR_ERROR_CODE, | ||
513 | EXIT_QUALIFICATION, | ||
514 | GUEST_LINEAR_ADDRESS, | ||
515 | GUEST_PHYSICAL_ADDRESS | ||
516 | }; | ||
517 | static const int max_shadow_read_only_fields = | ||
518 | ARRAY_SIZE(shadow_read_only_fields); | ||
519 | |||
520 | static const unsigned long shadow_read_write_fields[] = { | ||
521 | GUEST_RIP, | ||
522 | GUEST_RSP, | ||
523 | GUEST_CR0, | ||
524 | GUEST_CR3, | ||
525 | GUEST_CR4, | ||
526 | GUEST_INTERRUPTIBILITY_INFO, | ||
527 | GUEST_RFLAGS, | ||
528 | GUEST_CS_SELECTOR, | ||
529 | GUEST_CS_AR_BYTES, | ||
530 | GUEST_CS_LIMIT, | ||
531 | GUEST_CS_BASE, | ||
532 | GUEST_ES_BASE, | ||
533 | CR0_GUEST_HOST_MASK, | ||
534 | CR0_READ_SHADOW, | ||
535 | CR4_READ_SHADOW, | ||
536 | TSC_OFFSET, | ||
537 | EXCEPTION_BITMAP, | ||
538 | CPU_BASED_VM_EXEC_CONTROL, | ||
539 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
540 | VM_ENTRY_INTR_INFO_FIELD, | ||
541 | VM_ENTRY_INSTRUCTION_LEN, | ||
542 | VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
543 | HOST_FS_BASE, | ||
544 | HOST_GS_BASE, | ||
545 | HOST_FS_SELECTOR, | ||
546 | HOST_GS_SELECTOR | ||
547 | }; | ||
548 | static const int max_shadow_read_write_fields = | ||
549 | ARRAY_SIZE(shadow_read_write_fields); | ||
550 | |||
454 | static const unsigned short vmcs_field_to_offset_table[] = { | 551 | static const unsigned short vmcs_field_to_offset_table[] = { |
455 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), | 552 | FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), |
456 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), | 553 | FIELD(GUEST_ES_SELECTOR, guest_es_selector), |
@@ -537,6 +634,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
537 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), | 634 | FIELD(GUEST_ACTIVITY_STATE, guest_activity_state), |
538 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), | 635 | FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs), |
539 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), | 636 | FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs), |
637 | FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value), | ||
540 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), | 638 | FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask), |
541 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), | 639 | FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), |
542 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), | 640 | FIELD(CR0_READ_SHADOW, cr0_read_shadow), |
@@ -624,6 +722,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
624 | struct kvm_segment *var, int seg); | 722 | struct kvm_segment *var, int seg); |
625 | static bool guest_state_valid(struct kvm_vcpu *vcpu); | 723 | static bool guest_state_valid(struct kvm_vcpu *vcpu); |
626 | static u32 vmx_segment_access_rights(struct kvm_segment *var); | 724 | static u32 vmx_segment_access_rights(struct kvm_segment *var); |
725 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | ||
726 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | ||
727 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | ||
627 | 728 | ||
628 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 729 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
629 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 730 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -640,6 +741,8 @@ static unsigned long *vmx_msr_bitmap_legacy; | |||
640 | static unsigned long *vmx_msr_bitmap_longmode; | 741 | static unsigned long *vmx_msr_bitmap_longmode; |
641 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; | 742 | static unsigned long *vmx_msr_bitmap_legacy_x2apic; |
642 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; | 743 | static unsigned long *vmx_msr_bitmap_longmode_x2apic; |
744 | static unsigned long *vmx_vmread_bitmap; | ||
745 | static unsigned long *vmx_vmwrite_bitmap; | ||
643 | 746 | ||
644 | static bool cpu_has_load_ia32_efer; | 747 | static bool cpu_has_load_ia32_efer; |
645 | static bool cpu_has_load_perf_global_ctrl; | 748 | static bool cpu_has_load_perf_global_ctrl; |
@@ -782,6 +885,18 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) | |||
782 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 885 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; |
783 | } | 886 | } |
784 | 887 | ||
888 | static inline bool cpu_has_vmx_posted_intr(void) | ||
889 | { | ||
890 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; | ||
891 | } | ||
892 | |||
893 | static inline bool cpu_has_vmx_apicv(void) | ||
894 | { | ||
895 | return cpu_has_vmx_apic_register_virt() && | ||
896 | cpu_has_vmx_virtual_intr_delivery() && | ||
897 | cpu_has_vmx_posted_intr(); | ||
898 | } | ||
899 | |||
785 | static inline bool cpu_has_vmx_flexpriority(void) | 900 | static inline bool cpu_has_vmx_flexpriority(void) |
786 | { | 901 | { |
787 | return cpu_has_vmx_tpr_shadow() && | 902 | return cpu_has_vmx_tpr_shadow() && |
@@ -895,6 +1010,18 @@ static inline bool cpu_has_vmx_wbinvd_exit(void) | |||
895 | SECONDARY_EXEC_WBINVD_EXITING; | 1010 | SECONDARY_EXEC_WBINVD_EXITING; |
896 | } | 1011 | } |
897 | 1012 | ||
1013 | static inline bool cpu_has_vmx_shadow_vmcs(void) | ||
1014 | { | ||
1015 | u64 vmx_msr; | ||
1016 | rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); | ||
1017 | /* check if the cpu supports writing r/o exit information fields */ | ||
1018 | if (!(vmx_msr & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) | ||
1019 | return false; | ||
1020 | |||
1021 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
1022 | SECONDARY_EXEC_SHADOW_VMCS; | ||
1023 | } | ||
1024 | |||
898 | static inline bool report_flexpriority(void) | 1025 | static inline bool report_flexpriority(void) |
899 | { | 1026 | { |
900 | return flexpriority_enabled; | 1027 | return flexpriority_enabled; |
@@ -1790,7 +1917,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
1790 | u32 intr_info = nr | INTR_INFO_VALID_MASK; | 1917 | u32 intr_info = nr | INTR_INFO_VALID_MASK; |
1791 | 1918 | ||
1792 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && | 1919 | if (nr == PF_VECTOR && is_guest_mode(vcpu) && |
1793 | nested_pf_handled(vcpu)) | 1920 | !vmx->nested.nested_run_pending && nested_pf_handled(vcpu)) |
1794 | return; | 1921 | return; |
1795 | 1922 | ||
1796 | if (has_error_code) { | 1923 | if (has_error_code) { |
@@ -2022,6 +2149,7 @@ static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | |||
2022 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | 2149 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; |
2023 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2150 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2024 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2151 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2152 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | ||
2025 | static __init void nested_vmx_setup_ctls_msrs(void) | 2153 | static __init void nested_vmx_setup_ctls_msrs(void) |
2026 | { | 2154 | { |
2027 | /* | 2155 | /* |
@@ -2040,30 +2168,40 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2040 | */ | 2168 | */ |
2041 | 2169 | ||
2042 | /* pin-based controls */ | 2170 | /* pin-based controls */ |
2171 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | ||
2172 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); | ||
2043 | /* | 2173 | /* |
2044 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is | 2174 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is |
2045 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. | 2175 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. |
2046 | */ | 2176 | */ |
2047 | nested_vmx_pinbased_ctls_low = 0x16 ; | 2177 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2048 | nested_vmx_pinbased_ctls_high = 0x16 | | 2178 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
2049 | PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | | 2179 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | |
2050 | PIN_BASED_VIRTUAL_NMIS; | 2180 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2181 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2051 | 2182 | ||
2052 | /* exit controls */ | 2183 | /* |
2053 | nested_vmx_exit_ctls_low = 0; | 2184 | * Exit controls |
2185 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | ||
2186 | * 17 must be 1. | ||
2187 | */ | ||
2188 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2054 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ | 2189 | /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */ |
2055 | #ifdef CONFIG_X86_64 | 2190 | #ifdef CONFIG_X86_64 |
2056 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2191 | nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2057 | #else | 2192 | #else |
2058 | nested_vmx_exit_ctls_high = 0; | 2193 | nested_vmx_exit_ctls_high = 0; |
2059 | #endif | 2194 | #endif |
2195 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2060 | 2196 | ||
2061 | /* entry controls */ | 2197 | /* entry controls */ |
2062 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2198 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
2063 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | 2199 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); |
2064 | nested_vmx_entry_ctls_low = 0; | 2200 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ |
2201 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2065 | nested_vmx_entry_ctls_high &= | 2202 | nested_vmx_entry_ctls_high &= |
2066 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; | 2203 | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE; |
2204 | nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2067 | 2205 | ||
2068 | /* cpu-based controls */ | 2206 | /* cpu-based controls */ |
2069 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2207 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2080,6 +2218,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2080 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | | 2218 | CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | |
2081 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | | 2219 | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | |
2082 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | | 2220 | CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | |
2221 | CPU_BASED_PAUSE_EXITING | | ||
2083 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 2222 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
2084 | /* | 2223 | /* |
2085 | * We can allow some features even when not supported by the | 2224 | * We can allow some features even when not supported by the |
@@ -2094,7 +2233,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2094 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); | 2233 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); |
2095 | nested_vmx_secondary_ctls_low = 0; | 2234 | nested_vmx_secondary_ctls_low = 0; |
2096 | nested_vmx_secondary_ctls_high &= | 2235 | nested_vmx_secondary_ctls_high &= |
2097 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2236 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | |
2237 | SECONDARY_EXEC_WBINVD_EXITING; | ||
2238 | |||
2239 | /* miscellaneous data */ | ||
2240 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | ||
2241 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | ||
2242 | VMX_MISC_SAVE_EFER_LMA; | ||
2243 | nested_vmx_misc_high = 0; | ||
2098 | } | 2244 | } |
2099 | 2245 | ||
2100 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) | 2246 | static inline bool vmx_control_verify(u32 control, u32 low, u32 high) |
@@ -2165,7 +2311,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2165 | nested_vmx_entry_ctls_high); | 2311 | nested_vmx_entry_ctls_high); |
2166 | break; | 2312 | break; |
2167 | case MSR_IA32_VMX_MISC: | 2313 | case MSR_IA32_VMX_MISC: |
2168 | *pdata = 0; | 2314 | *pdata = vmx_control_msr(nested_vmx_misc_low, |
2315 | nested_vmx_misc_high); | ||
2169 | break; | 2316 | break; |
2170 | /* | 2317 | /* |
2171 | * These MSRs specify bits which the guest must keep fixed (on or off) | 2318 | * These MSRs specify bits which the guest must keep fixed (on or off) |
@@ -2529,12 +2676,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2529 | u32 _vmexit_control = 0; | 2676 | u32 _vmexit_control = 0; |
2530 | u32 _vmentry_control = 0; | 2677 | u32 _vmentry_control = 0; |
2531 | 2678 | ||
2532 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
2533 | opt = PIN_BASED_VIRTUAL_NMIS; | ||
2534 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
2535 | &_pin_based_exec_control) < 0) | ||
2536 | return -EIO; | ||
2537 | |||
2538 | min = CPU_BASED_HLT_EXITING | | 2679 | min = CPU_BASED_HLT_EXITING | |
2539 | #ifdef CONFIG_X86_64 | 2680 | #ifdef CONFIG_X86_64 |
2540 | CPU_BASED_CR8_LOAD_EXITING | | 2681 | CPU_BASED_CR8_LOAD_EXITING | |
@@ -2573,7 +2714,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2573 | SECONDARY_EXEC_RDTSCP | | 2714 | SECONDARY_EXEC_RDTSCP | |
2574 | SECONDARY_EXEC_ENABLE_INVPCID | | 2715 | SECONDARY_EXEC_ENABLE_INVPCID | |
2575 | SECONDARY_EXEC_APIC_REGISTER_VIRT | | 2716 | SECONDARY_EXEC_APIC_REGISTER_VIRT | |
2576 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; | 2717 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | |
2718 | SECONDARY_EXEC_SHADOW_VMCS; | ||
2577 | if (adjust_vmx_controls(min2, opt2, | 2719 | if (adjust_vmx_controls(min2, opt2, |
2578 | MSR_IA32_VMX_PROCBASED_CTLS2, | 2720 | MSR_IA32_VMX_PROCBASED_CTLS2, |
2579 | &_cpu_based_2nd_exec_control) < 0) | 2721 | &_cpu_based_2nd_exec_control) < 0) |
@@ -2605,11 +2747,23 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2605 | #ifdef CONFIG_X86_64 | 2747 | #ifdef CONFIG_X86_64 |
2606 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2748 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2607 | #endif | 2749 | #endif |
2608 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; | 2750 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | |
2751 | VM_EXIT_ACK_INTR_ON_EXIT; | ||
2609 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 2752 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
2610 | &_vmexit_control) < 0) | 2753 | &_vmexit_control) < 0) |
2611 | return -EIO; | 2754 | return -EIO; |
2612 | 2755 | ||
2756 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | ||
2757 | opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR; | ||
2758 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | ||
2759 | &_pin_based_exec_control) < 0) | ||
2760 | return -EIO; | ||
2761 | |||
2762 | if (!(_cpu_based_2nd_exec_control & | ||
2763 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) || | ||
2764 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) | ||
2765 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; | ||
2766 | |||
2613 | min = 0; | 2767 | min = 0; |
2614 | opt = VM_ENTRY_LOAD_IA32_PAT; | 2768 | opt = VM_ENTRY_LOAD_IA32_PAT; |
2615 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 2769 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2762,6 +2916,8 @@ static __init int hardware_setup(void) | |||
2762 | 2916 | ||
2763 | if (!cpu_has_vmx_vpid()) | 2917 | if (!cpu_has_vmx_vpid()) |
2764 | enable_vpid = 0; | 2918 | enable_vpid = 0; |
2919 | if (!cpu_has_vmx_shadow_vmcs()) | ||
2920 | enable_shadow_vmcs = 0; | ||
2765 | 2921 | ||
2766 | if (!cpu_has_vmx_ept() || | 2922 | if (!cpu_has_vmx_ept() || |
2767 | !cpu_has_vmx_ept_4levels()) { | 2923 | !cpu_has_vmx_ept_4levels()) { |
@@ -2788,14 +2944,16 @@ static __init int hardware_setup(void) | |||
2788 | if (!cpu_has_vmx_ple()) | 2944 | if (!cpu_has_vmx_ple()) |
2789 | ple_gap = 0; | 2945 | ple_gap = 0; |
2790 | 2946 | ||
2791 | if (!cpu_has_vmx_apic_register_virt() || | 2947 | if (!cpu_has_vmx_apicv()) |
2792 | !cpu_has_vmx_virtual_intr_delivery()) | 2948 | enable_apicv = 0; |
2793 | enable_apicv_reg_vid = 0; | ||
2794 | 2949 | ||
2795 | if (enable_apicv_reg_vid) | 2950 | if (enable_apicv) |
2796 | kvm_x86_ops->update_cr8_intercept = NULL; | 2951 | kvm_x86_ops->update_cr8_intercept = NULL; |
2797 | else | 2952 | else { |
2798 | kvm_x86_ops->hwapic_irr_update = NULL; | 2953 | kvm_x86_ops->hwapic_irr_update = NULL; |
2954 | kvm_x86_ops->deliver_posted_interrupt = NULL; | ||
2955 | kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; | ||
2956 | } | ||
2799 | 2957 | ||
2800 | if (nested) | 2958 | if (nested) |
2801 | nested_vmx_setup_ctls_msrs(); | 2959 | nested_vmx_setup_ctls_msrs(); |
@@ -2876,22 +3034,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
2876 | vmx->cpl = 0; | 3034 | vmx->cpl = 0; |
2877 | } | 3035 | } |
2878 | 3036 | ||
2879 | static gva_t rmode_tss_base(struct kvm *kvm) | ||
2880 | { | ||
2881 | if (!kvm->arch.tss_addr) { | ||
2882 | struct kvm_memslots *slots; | ||
2883 | struct kvm_memory_slot *slot; | ||
2884 | gfn_t base_gfn; | ||
2885 | |||
2886 | slots = kvm_memslots(kvm); | ||
2887 | slot = id_to_memslot(slots, 0); | ||
2888 | base_gfn = slot->base_gfn + slot->npages - 3; | ||
2889 | |||
2890 | return base_gfn << PAGE_SHIFT; | ||
2891 | } | ||
2892 | return kvm->arch.tss_addr; | ||
2893 | } | ||
2894 | |||
2895 | static void fix_rmode_seg(int seg, struct kvm_segment *save) | 3037 | static void fix_rmode_seg(int seg, struct kvm_segment *save) |
2896 | { | 3038 | { |
2897 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 3039 | const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
@@ -2942,19 +3084,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
2942 | 3084 | ||
2943 | /* | 3085 | /* |
2944 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | 3086 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering |
2945 | * vcpu. Call it here with phys address pointing 16M below 4G. | 3087 | * vcpu. Warn the user that an update is overdue. |
2946 | */ | 3088 | */ |
2947 | if (!vcpu->kvm->arch.tss_addr) { | 3089 | if (!vcpu->kvm->arch.tss_addr) |
2948 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | 3090 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " |
2949 | "called before entering vcpu\n"); | 3091 | "called before entering vcpu\n"); |
2950 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
2951 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
2952 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
2953 | } | ||
2954 | 3092 | ||
2955 | vmx_segment_cache_clear(vmx); | 3093 | vmx_segment_cache_clear(vmx); |
2956 | 3094 | ||
2957 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 3095 | vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); |
2958 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); | 3096 | vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); |
2959 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); | 3097 | vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); |
2960 | 3098 | ||
@@ -3214,7 +3352,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
3214 | */ | 3352 | */ |
3215 | if (!nested_vmx_allowed(vcpu)) | 3353 | if (!nested_vmx_allowed(vcpu)) |
3216 | return 1; | 3354 | return 1; |
3217 | } else if (to_vmx(vcpu)->nested.vmxon) | 3355 | } |
3356 | if (to_vmx(vcpu)->nested.vmxon && | ||
3357 | ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) | ||
3218 | return 1; | 3358 | return 1; |
3219 | 3359 | ||
3220 | vcpu->arch.cr4 = cr4; | 3360 | vcpu->arch.cr4 = cr4; |
@@ -3550,7 +3690,7 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
3550 | return true; | 3690 | return true; |
3551 | 3691 | ||
3552 | /* real mode guest state checks */ | 3692 | /* real mode guest state checks */ |
3553 | if (!is_protmode(vcpu)) { | 3693 | if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { |
3554 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 3694 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
3555 | return false; | 3695 | return false; |
3556 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 3696 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -3599,7 +3739,7 @@ static int init_rmode_tss(struct kvm *kvm) | |||
3599 | int r, idx, ret = 0; | 3739 | int r, idx, ret = 0; |
3600 | 3740 | ||
3601 | idx = srcu_read_lock(&kvm->srcu); | 3741 | idx = srcu_read_lock(&kvm->srcu); |
3602 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 3742 | fn = kvm->arch.tss_addr >> PAGE_SHIFT; |
3603 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 3743 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
3604 | if (r < 0) | 3744 | if (r < 0) |
3605 | goto out; | 3745 | goto out; |
@@ -3692,7 +3832,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
3692 | kvm_userspace_mem.flags = 0; | 3832 | kvm_userspace_mem.flags = 0; |
3693 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; | 3833 | kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; |
3694 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3834 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3695 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3835 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
3696 | if (r) | 3836 | if (r) |
3697 | goto out; | 3837 | goto out; |
3698 | 3838 | ||
@@ -3722,7 +3862,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
3722 | kvm_userspace_mem.guest_phys_addr = | 3862 | kvm_userspace_mem.guest_phys_addr = |
3723 | kvm->arch.ept_identity_map_addr; | 3863 | kvm->arch.ept_identity_map_addr; |
3724 | kvm_userspace_mem.memory_size = PAGE_SIZE; | 3864 | kvm_userspace_mem.memory_size = PAGE_SIZE; |
3725 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); | 3865 | r = __kvm_set_memory_region(kvm, &kvm_userspace_mem); |
3726 | if (r) | 3866 | if (r) |
3727 | goto out; | 3867 | goto out; |
3728 | 3868 | ||
@@ -3869,13 +4009,59 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) | |||
3869 | msr, MSR_TYPE_W); | 4009 | msr, MSR_TYPE_W); |
3870 | } | 4010 | } |
3871 | 4011 | ||
4012 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
4013 | { | ||
4014 | return enable_apicv && irqchip_in_kernel(kvm); | ||
4015 | } | ||
4016 | |||
4017 | /* | ||
4018 | * Send interrupt to vcpu via posted interrupt way. | ||
4019 | * 1. If target vcpu is running(non-root mode), send posted interrupt | ||
4020 | * notification to vcpu and hardware will sync PIR to vIRR atomically. | ||
4021 | * 2. If target vcpu isn't running(root mode), kick it to pick up the | ||
4022 | * interrupt from PIR in next vmentry. | ||
4023 | */ | ||
4024 | static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) | ||
4025 | { | ||
4026 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4027 | int r; | ||
4028 | |||
4029 | if (pi_test_and_set_pir(vector, &vmx->pi_desc)) | ||
4030 | return; | ||
4031 | |||
4032 | r = pi_test_and_set_on(&vmx->pi_desc); | ||
4033 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
4034 | #ifdef CONFIG_SMP | ||
4035 | if (!r && (vcpu->mode == IN_GUEST_MODE)) | ||
4036 | apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), | ||
4037 | POSTED_INTR_VECTOR); | ||
4038 | else | ||
4039 | #endif | ||
4040 | kvm_vcpu_kick(vcpu); | ||
4041 | } | ||
4042 | |||
4043 | static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) | ||
4044 | { | ||
4045 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4046 | |||
4047 | if (!pi_test_and_clear_on(&vmx->pi_desc)) | ||
4048 | return; | ||
4049 | |||
4050 | kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); | ||
4051 | } | ||
4052 | |||
4053 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) | ||
4054 | { | ||
4055 | return; | ||
4056 | } | ||
4057 | |||
3872 | /* | 4058 | /* |
3873 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that | 4059 | * Set up the vmcs's constant host-state fields, i.e., host-state fields that |
3874 | * will not change in the lifetime of the guest. | 4060 | * will not change in the lifetime of the guest. |
3875 | * Note that host-state that does change is set elsewhere. E.g., host-state | 4061 | * Note that host-state that does change is set elsewhere. E.g., host-state |
3876 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. | 4062 | * that is set differently for each CPU is set in vmx_vcpu_load(), not here. |
3877 | */ | 4063 | */ |
3878 | static void vmx_set_constant_host_state(void) | 4064 | static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) |
3879 | { | 4065 | { |
3880 | u32 low32, high32; | 4066 | u32 low32, high32; |
3881 | unsigned long tmpl; | 4067 | unsigned long tmpl; |
@@ -3903,6 +4089,7 @@ static void vmx_set_constant_host_state(void) | |||
3903 | 4089 | ||
3904 | native_store_idt(&dt); | 4090 | native_store_idt(&dt); |
3905 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ | 4091 | vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ |
4092 | vmx->host_idt_base = dt.address; | ||
3906 | 4093 | ||
3907 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ | 4094 | vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ |
3908 | 4095 | ||
@@ -3928,6 +4115,15 @@ static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) | |||
3928 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | 4115 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); |
3929 | } | 4116 | } |
3930 | 4117 | ||
4118 | static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | ||
4119 | { | ||
4120 | u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; | ||
4121 | |||
4122 | if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) | ||
4123 | pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; | ||
4124 | return pin_based_exec_ctrl; | ||
4125 | } | ||
4126 | |||
3931 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4127 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
3932 | { | 4128 | { |
3933 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | 4129 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; |
@@ -3945,11 +4141,6 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
3945 | return exec_control; | 4141 | return exec_control; |
3946 | } | 4142 | } |
3947 | 4143 | ||
3948 | static int vmx_vm_has_apicv(struct kvm *kvm) | ||
3949 | { | ||
3950 | return enable_apicv_reg_vid && irqchip_in_kernel(kvm); | ||
3951 | } | ||
3952 | |||
3953 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | 4144 | static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) |
3954 | { | 4145 | { |
3955 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; | 4146 | u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; |
@@ -3971,6 +4162,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) | |||
3971 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | | 4162 | exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | |
3972 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); | 4163 | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); |
3973 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; | 4164 | exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; |
4165 | /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD | ||
4166 | (handle_vmptrld). | ||
4167 | We can NOT enable shadow_vmcs here because we don't have yet | ||
4168 | a current VMCS12 | ||
4169 | */ | ||
4170 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
3974 | return exec_control; | 4171 | return exec_control; |
3975 | } | 4172 | } |
3976 | 4173 | ||
@@ -3999,14 +4196,17 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
3999 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); | 4196 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
4000 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); | 4197 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); |
4001 | 4198 | ||
4199 | if (enable_shadow_vmcs) { | ||
4200 | vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); | ||
4201 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
4202 | } | ||
4002 | if (cpu_has_vmx_msr_bitmap()) | 4203 | if (cpu_has_vmx_msr_bitmap()) |
4003 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); | 4204 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
4004 | 4205 | ||
4005 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 4206 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
4006 | 4207 | ||
4007 | /* Control */ | 4208 | /* Control */ |
4008 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 4209 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); |
4009 | vmcs_config.pin_based_exec_ctrl); | ||
4010 | 4210 | ||
4011 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); | 4211 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); |
4012 | 4212 | ||
@@ -4015,13 +4215,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4015 | vmx_secondary_exec_control(vmx)); | 4215 | vmx_secondary_exec_control(vmx)); |
4016 | } | 4216 | } |
4017 | 4217 | ||
4018 | if (enable_apicv_reg_vid) { | 4218 | if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { |
4019 | vmcs_write64(EOI_EXIT_BITMAP0, 0); | 4219 | vmcs_write64(EOI_EXIT_BITMAP0, 0); |
4020 | vmcs_write64(EOI_EXIT_BITMAP1, 0); | 4220 | vmcs_write64(EOI_EXIT_BITMAP1, 0); |
4021 | vmcs_write64(EOI_EXIT_BITMAP2, 0); | 4221 | vmcs_write64(EOI_EXIT_BITMAP2, 0); |
4022 | vmcs_write64(EOI_EXIT_BITMAP3, 0); | 4222 | vmcs_write64(EOI_EXIT_BITMAP3, 0); |
4023 | 4223 | ||
4024 | vmcs_write16(GUEST_INTR_STATUS, 0); | 4224 | vmcs_write16(GUEST_INTR_STATUS, 0); |
4225 | |||
4226 | vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); | ||
4227 | vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); | ||
4025 | } | 4228 | } |
4026 | 4229 | ||
4027 | if (ple_gap) { | 4230 | if (ple_gap) { |
@@ -4035,7 +4238,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4035 | 4238 | ||
4036 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ | 4239 | vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */ |
4037 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ | 4240 | vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */ |
4038 | vmx_set_constant_host_state(); | 4241 | vmx_set_constant_host_state(vmx); |
4039 | #ifdef CONFIG_X86_64 | 4242 | #ifdef CONFIG_X86_64 |
4040 | rdmsrl(MSR_FS_BASE, a); | 4243 | rdmsrl(MSR_FS_BASE, a); |
4041 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ | 4244 | vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */ |
@@ -4089,11 +4292,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4089 | return 0; | 4292 | return 0; |
4090 | } | 4293 | } |
4091 | 4294 | ||
4092 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4295 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
4093 | { | 4296 | { |
4094 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4297 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4095 | u64 msr; | 4298 | u64 msr; |
4096 | int ret; | ||
4097 | 4299 | ||
4098 | vmx->rmode.vm86_active = 0; | 4300 | vmx->rmode.vm86_active = 0; |
4099 | 4301 | ||
@@ -4109,12 +4311,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4109 | vmx_segment_cache_clear(vmx); | 4311 | vmx_segment_cache_clear(vmx); |
4110 | 4312 | ||
4111 | seg_setup(VCPU_SREG_CS); | 4313 | seg_setup(VCPU_SREG_CS); |
4112 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4314 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); |
4113 | vmcs_write16(GUEST_CS_SELECTOR, 0xf000); | 4315 | vmcs_write32(GUEST_CS_BASE, 0xffff0000); |
4114 | else { | ||
4115 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | ||
4116 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | ||
4117 | } | ||
4118 | 4316 | ||
4119 | seg_setup(VCPU_SREG_DS); | 4317 | seg_setup(VCPU_SREG_DS); |
4120 | seg_setup(VCPU_SREG_ES); | 4318 | seg_setup(VCPU_SREG_ES); |
@@ -4137,10 +4335,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4137 | vmcs_writel(GUEST_SYSENTER_EIP, 0); | 4335 | vmcs_writel(GUEST_SYSENTER_EIP, 0); |
4138 | 4336 | ||
4139 | vmcs_writel(GUEST_RFLAGS, 0x02); | 4337 | vmcs_writel(GUEST_RFLAGS, 0x02); |
4140 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4338 | kvm_rip_write(vcpu, 0xfff0); |
4141 | kvm_rip_write(vcpu, 0xfff0); | ||
4142 | else | ||
4143 | kvm_rip_write(vcpu, 0); | ||
4144 | 4339 | ||
4145 | vmcs_writel(GUEST_GDTR_BASE, 0); | 4340 | vmcs_writel(GUEST_GDTR_BASE, 0); |
4146 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); | 4341 | vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); |
@@ -4171,23 +4366,20 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4171 | vmcs_write64(APIC_ACCESS_ADDR, | 4366 | vmcs_write64(APIC_ACCESS_ADDR, |
4172 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); | 4367 | page_to_phys(vmx->vcpu.kvm->arch.apic_access_page)); |
4173 | 4368 | ||
4369 | if (vmx_vm_has_apicv(vcpu->kvm)) | ||
4370 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | ||
4371 | |||
4174 | if (vmx->vpid != 0) | 4372 | if (vmx->vpid != 0) |
4175 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 4373 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
4176 | 4374 | ||
4177 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 4375 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
4178 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
4179 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ | 4376 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
4180 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
4181 | vmx_set_cr4(&vmx->vcpu, 0); | 4377 | vmx_set_cr4(&vmx->vcpu, 0); |
4182 | vmx_set_efer(&vmx->vcpu, 0); | 4378 | vmx_set_efer(&vmx->vcpu, 0); |
4183 | vmx_fpu_activate(&vmx->vcpu); | 4379 | vmx_fpu_activate(&vmx->vcpu); |
4184 | update_exception_bitmap(&vmx->vcpu); | 4380 | update_exception_bitmap(&vmx->vcpu); |
4185 | 4381 | ||
4186 | vpid_sync_context(vmx); | 4382 | vpid_sync_context(vmx); |
4187 | |||
4188 | ret = 0; | ||
4189 | |||
4190 | return ret; | ||
4191 | } | 4383 | } |
4192 | 4384 | ||
4193 | /* | 4385 | /* |
@@ -4200,40 +4392,45 @@ static bool nested_exit_on_intr(struct kvm_vcpu *vcpu) | |||
4200 | PIN_BASED_EXT_INTR_MASK; | 4392 | PIN_BASED_EXT_INTR_MASK; |
4201 | } | 4393 | } |
4202 | 4394 | ||
4203 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 4395 | static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) |
4396 | { | ||
4397 | return get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
4398 | PIN_BASED_NMI_EXITING; | ||
4399 | } | ||
4400 | |||
4401 | static int enable_irq_window(struct kvm_vcpu *vcpu) | ||
4204 | { | 4402 | { |
4205 | u32 cpu_based_vm_exec_control; | 4403 | u32 cpu_based_vm_exec_control; |
4206 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4404 | |
4405 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
4207 | /* | 4406 | /* |
4208 | * We get here if vmx_interrupt_allowed() said we can't | 4407 | * We get here if vmx_interrupt_allowed() said we can't |
4209 | * inject to L1 now because L2 must run. Ask L2 to exit | 4408 | * inject to L1 now because L2 must run. The caller will have |
4210 | * right after entry, so we can inject to L1 more promptly. | 4409 | * to make L2 exit right after entry, so we can inject to L1 |
4410 | * more promptly. | ||
4211 | */ | 4411 | */ |
4212 | kvm_make_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | 4412 | return -EBUSY; |
4213 | return; | ||
4214 | } | ||
4215 | 4413 | ||
4216 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4414 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4217 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 4415 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
4218 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4416 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4417 | return 0; | ||
4219 | } | 4418 | } |
4220 | 4419 | ||
4221 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | 4420 | static int enable_nmi_window(struct kvm_vcpu *vcpu) |
4222 | { | 4421 | { |
4223 | u32 cpu_based_vm_exec_control; | 4422 | u32 cpu_based_vm_exec_control; |
4224 | 4423 | ||
4225 | if (!cpu_has_virtual_nmis()) { | 4424 | if (!cpu_has_virtual_nmis()) |
4226 | enable_irq_window(vcpu); | 4425 | return enable_irq_window(vcpu); |
4227 | return; | 4426 | |
4228 | } | 4427 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) |
4428 | return enable_irq_window(vcpu); | ||
4229 | 4429 | ||
4230 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { | ||
4231 | enable_irq_window(vcpu); | ||
4232 | return; | ||
4233 | } | ||
4234 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4430 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4235 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 4431 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
4236 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4432 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4433 | return 0; | ||
4237 | } | 4434 | } |
4238 | 4435 | ||
4239 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4436 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -4294,16 +4491,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
4294 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 4491 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
4295 | } | 4492 | } |
4296 | 4493 | ||
4297 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
4298 | { | ||
4299 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
4300 | return 0; | ||
4301 | |||
4302 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
4303 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
4304 | | GUEST_INTR_STATE_NMI)); | ||
4305 | } | ||
4306 | |||
4307 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 4494 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
4308 | { | 4495 | { |
4309 | if (!cpu_has_virtual_nmis()) | 4496 | if (!cpu_has_virtual_nmis()) |
@@ -4333,18 +4520,52 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4333 | } | 4520 | } |
4334 | } | 4521 | } |
4335 | 4522 | ||
4523 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | ||
4524 | { | ||
4525 | if (is_guest_mode(vcpu)) { | ||
4526 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4527 | |||
4528 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
4529 | return 0; | ||
4530 | if (nested_exit_on_nmi(vcpu)) { | ||
4531 | nested_vmx_vmexit(vcpu); | ||
4532 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | ||
4533 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | ||
4534 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
4535 | /* | ||
4536 | * The NMI-triggered VM exit counts as injection: | ||
4537 | * clear this one and block further NMIs. | ||
4538 | */ | ||
4539 | vcpu->arch.nmi_pending = 0; | ||
4540 | vmx_set_nmi_mask(vcpu, true); | ||
4541 | return 0; | ||
4542 | } | ||
4543 | } | ||
4544 | |||
4545 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
4546 | return 0; | ||
4547 | |||
4548 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
4549 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | ||
4550 | | GUEST_INTR_STATE_NMI)); | ||
4551 | } | ||
4552 | |||
4336 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4553 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4337 | { | 4554 | { |
4338 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) { | 4555 | if (is_guest_mode(vcpu)) { |
4339 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4556 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4340 | if (to_vmx(vcpu)->nested.nested_run_pending || | 4557 | |
4341 | (vmcs12->idt_vectoring_info_field & | 4558 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4342 | VECTORING_INFO_VALID_MASK)) | ||
4343 | return 0; | 4559 | return 0; |
4344 | nested_vmx_vmexit(vcpu); | 4560 | if (nested_exit_on_intr(vcpu)) { |
4345 | vmcs12->vm_exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; | 4561 | nested_vmx_vmexit(vcpu); |
4346 | vmcs12->vm_exit_intr_info = 0; | 4562 | vmcs12->vm_exit_reason = |
4347 | /* fall through to normal code, but now in L1, not L2 */ | 4563 | EXIT_REASON_EXTERNAL_INTERRUPT; |
4564 | vmcs12->vm_exit_intr_info = 0; | ||
4565 | /* | ||
4566 | * fall through to normal code, but now in L1, not L2 | ||
4567 | */ | ||
4568 | } | ||
4348 | } | 4569 | } |
4349 | 4570 | ||
4350 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 4571 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
@@ -4362,7 +4583,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
4362 | .flags = 0, | 4583 | .flags = 0, |
4363 | }; | 4584 | }; |
4364 | 4585 | ||
4365 | ret = kvm_set_memory_region(kvm, &tss_mem, false); | 4586 | ret = kvm_set_memory_region(kvm, &tss_mem); |
4366 | if (ret) | 4587 | if (ret) |
4367 | return ret; | 4588 | return ret; |
4368 | kvm->arch.tss_addr = addr; | 4589 | kvm->arch.tss_addr = addr; |
@@ -4603,34 +4824,50 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4603 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ | 4824 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
4604 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4825 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
4605 | { | 4826 | { |
4606 | if (to_vmx(vcpu)->nested.vmxon && | ||
4607 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
4608 | return 1; | ||
4609 | |||
4610 | if (is_guest_mode(vcpu)) { | 4827 | if (is_guest_mode(vcpu)) { |
4828 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
4829 | unsigned long orig_val = val; | ||
4830 | |||
4611 | /* | 4831 | /* |
4612 | * We get here when L2 changed cr0 in a way that did not change | 4832 | * We get here when L2 changed cr0 in a way that did not change |
4613 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), | 4833 | * any of L1's shadowed bits (see nested_vmx_exit_handled_cr), |
4614 | * but did change L0 shadowed bits. This can currently happen | 4834 | * but did change L0 shadowed bits. So we first calculate the |
4615 | * with the TS bit: L0 may want to leave TS on (for lazy fpu | 4835 | * effective cr0 value that L1 would like to write into the |
4616 | * loading) while pretending to allow the guest to change it. | 4836 | * hardware. It consists of the L2-owned bits from the new |
4837 | * value combined with the L1-owned bits from L1's guest_cr0. | ||
4617 | */ | 4838 | */ |
4618 | if (kvm_set_cr0(vcpu, (val & vcpu->arch.cr0_guest_owned_bits) | | 4839 | val = (val & ~vmcs12->cr0_guest_host_mask) | |
4619 | (vcpu->arch.cr0 & ~vcpu->arch.cr0_guest_owned_bits))) | 4840 | (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask); |
4841 | |||
4842 | /* TODO: will have to take unrestricted guest mode into | ||
4843 | * account */ | ||
4844 | if ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) | ||
4620 | return 1; | 4845 | return 1; |
4621 | vmcs_writel(CR0_READ_SHADOW, val); | 4846 | |
4847 | if (kvm_set_cr0(vcpu, val)) | ||
4848 | return 1; | ||
4849 | vmcs_writel(CR0_READ_SHADOW, orig_val); | ||
4622 | return 0; | 4850 | return 0; |
4623 | } else | 4851 | } else { |
4852 | if (to_vmx(vcpu)->nested.vmxon && | ||
4853 | ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON)) | ||
4854 | return 1; | ||
4624 | return kvm_set_cr0(vcpu, val); | 4855 | return kvm_set_cr0(vcpu, val); |
4856 | } | ||
4625 | } | 4857 | } |
4626 | 4858 | ||
4627 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) | 4859 | static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) |
4628 | { | 4860 | { |
4629 | if (is_guest_mode(vcpu)) { | 4861 | if (is_guest_mode(vcpu)) { |
4630 | if (kvm_set_cr4(vcpu, (val & vcpu->arch.cr4_guest_owned_bits) | | 4862 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
4631 | (vcpu->arch.cr4 & ~vcpu->arch.cr4_guest_owned_bits))) | 4863 | unsigned long orig_val = val; |
4864 | |||
4865 | /* analogously to handle_set_cr0 */ | ||
4866 | val = (val & ~vmcs12->cr4_guest_host_mask) | | ||
4867 | (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask); | ||
4868 | if (kvm_set_cr4(vcpu, val)) | ||
4632 | return 1; | 4869 | return 1; |
4633 | vmcs_writel(CR4_READ_SHADOW, val); | 4870 | vmcs_writel(CR4_READ_SHADOW, orig_val); |
4634 | return 0; | 4871 | return 0; |
4635 | } else | 4872 | } else |
4636 | return kvm_set_cr4(vcpu, val); | 4873 | return kvm_set_cr4(vcpu, val); |
@@ -5183,7 +5420,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5183 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) | 5420 | if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) |
5184 | return 1; | 5421 | return 1; |
5185 | 5422 | ||
5186 | err = emulate_instruction(vcpu, 0); | 5423 | err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); |
5187 | 5424 | ||
5188 | if (err == EMULATE_DO_MMIO) { | 5425 | if (err == EMULATE_DO_MMIO) { |
5189 | ret = 0; | 5426 | ret = 0; |
@@ -5259,8 +5496,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | |||
5259 | } | 5496 | } |
5260 | 5497 | ||
5261 | /* Create a new VMCS */ | 5498 | /* Create a new VMCS */ |
5262 | item = (struct vmcs02_list *) | 5499 | item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
5263 | kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
5264 | if (!item) | 5500 | if (!item) |
5265 | return NULL; | 5501 | return NULL; |
5266 | item->vmcs02.vmcs = alloc_vmcs(); | 5502 | item->vmcs02.vmcs = alloc_vmcs(); |
@@ -5309,6 +5545,9 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | |||
5309 | free_loaded_vmcs(&vmx->vmcs01); | 5545 | free_loaded_vmcs(&vmx->vmcs01); |
5310 | } | 5546 | } |
5311 | 5547 | ||
5548 | static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | ||
5549 | u32 vm_instruction_error); | ||
5550 | |||
5312 | /* | 5551 | /* |
5313 | * Emulate the VMXON instruction. | 5552 | * Emulate the VMXON instruction. |
5314 | * Currently, we just remember that VMX is active, and do not save or even | 5553 | * Currently, we just remember that VMX is active, and do not save or even |
@@ -5321,6 +5560,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5321 | { | 5560 | { |
5322 | struct kvm_segment cs; | 5561 | struct kvm_segment cs; |
5323 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 5562 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
5563 | struct vmcs *shadow_vmcs; | ||
5324 | 5564 | ||
5325 | /* The Intel VMX Instruction Reference lists a bunch of bits that | 5565 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
5326 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | 5566 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
@@ -5344,6 +5584,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5344 | kvm_inject_gp(vcpu, 0); | 5584 | kvm_inject_gp(vcpu, 0); |
5345 | return 1; | 5585 | return 1; |
5346 | } | 5586 | } |
5587 | if (vmx->nested.vmxon) { | ||
5588 | nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); | ||
5589 | skip_emulated_instruction(vcpu); | ||
5590 | return 1; | ||
5591 | } | ||
5592 | if (enable_shadow_vmcs) { | ||
5593 | shadow_vmcs = alloc_vmcs(); | ||
5594 | if (!shadow_vmcs) | ||
5595 | return -ENOMEM; | ||
5596 | /* mark vmcs as shadow */ | ||
5597 | shadow_vmcs->revision_id |= (1u << 31); | ||
5598 | /* init shadow vmcs */ | ||
5599 | vmcs_clear(shadow_vmcs); | ||
5600 | vmx->nested.current_shadow_vmcs = shadow_vmcs; | ||
5601 | } | ||
5347 | 5602 | ||
5348 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | 5603 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
5349 | vmx->nested.vmcs02_num = 0; | 5604 | vmx->nested.vmcs02_num = 0; |
@@ -5384,6 +5639,25 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
5384 | return 1; | 5639 | return 1; |
5385 | } | 5640 | } |
5386 | 5641 | ||
5642 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | ||
5643 | { | ||
5644 | u32 exec_control; | ||
5645 | if (enable_shadow_vmcs) { | ||
5646 | if (vmx->nested.current_vmcs12 != NULL) { | ||
5647 | /* copy to memory all shadowed fields in case | ||
5648 | they were modified */ | ||
5649 | copy_shadow_to_vmcs12(vmx); | ||
5650 | vmx->nested.sync_shadow_vmcs = false; | ||
5651 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
5652 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | ||
5653 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
5654 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | ||
5655 | } | ||
5656 | } | ||
5657 | kunmap(vmx->nested.current_vmcs12_page); | ||
5658 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5659 | } | ||
5660 | |||
5387 | /* | 5661 | /* |
5388 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or | 5662 | * Free whatever needs to be freed from vmx->nested when L1 goes down, or |
5389 | * just stops using VMX. | 5663 | * just stops using VMX. |
@@ -5394,11 +5668,12 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
5394 | return; | 5668 | return; |
5395 | vmx->nested.vmxon = false; | 5669 | vmx->nested.vmxon = false; |
5396 | if (vmx->nested.current_vmptr != -1ull) { | 5670 | if (vmx->nested.current_vmptr != -1ull) { |
5397 | kunmap(vmx->nested.current_vmcs12_page); | 5671 | nested_release_vmcs12(vmx); |
5398 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5399 | vmx->nested.current_vmptr = -1ull; | 5672 | vmx->nested.current_vmptr = -1ull; |
5400 | vmx->nested.current_vmcs12 = NULL; | 5673 | vmx->nested.current_vmcs12 = NULL; |
5401 | } | 5674 | } |
5675 | if (enable_shadow_vmcs) | ||
5676 | free_vmcs(vmx->nested.current_shadow_vmcs); | ||
5402 | /* Unpin physical memory we referred to in current vmcs02 */ | 5677 | /* Unpin physical memory we referred to in current vmcs02 */ |
5403 | if (vmx->nested.apic_access_page) { | 5678 | if (vmx->nested.apic_access_page) { |
5404 | nested_release_page(vmx->nested.apic_access_page); | 5679 | nested_release_page(vmx->nested.apic_access_page); |
@@ -5507,6 +5782,10 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
5507 | X86_EFLAGS_SF | X86_EFLAGS_OF)) | 5782 | X86_EFLAGS_SF | X86_EFLAGS_OF)) |
5508 | | X86_EFLAGS_ZF); | 5783 | | X86_EFLAGS_ZF); |
5509 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | 5784 | get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; |
5785 | /* | ||
5786 | * We don't need to force a shadow sync because | ||
5787 | * VM_INSTRUCTION_ERROR is not shadowed | ||
5788 | */ | ||
5510 | } | 5789 | } |
5511 | 5790 | ||
5512 | /* Emulate the VMCLEAR instruction */ | 5791 | /* Emulate the VMCLEAR instruction */ |
@@ -5539,8 +5818,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
5539 | } | 5818 | } |
5540 | 5819 | ||
5541 | if (vmptr == vmx->nested.current_vmptr) { | 5820 | if (vmptr == vmx->nested.current_vmptr) { |
5542 | kunmap(vmx->nested.current_vmcs12_page); | 5821 | nested_release_vmcs12(vmx); |
5543 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5544 | vmx->nested.current_vmptr = -1ull; | 5822 | vmx->nested.current_vmptr = -1ull; |
5545 | vmx->nested.current_vmcs12 = NULL; | 5823 | vmx->nested.current_vmcs12 = NULL; |
5546 | } | 5824 | } |
@@ -5639,6 +5917,111 @@ static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu, | |||
5639 | } | 5917 | } |
5640 | } | 5918 | } |
5641 | 5919 | ||
5920 | |||
5921 | static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu, | ||
5922 | unsigned long field, u64 field_value){ | ||
5923 | short offset = vmcs_field_to_offset(field); | ||
5924 | char *p = ((char *) get_vmcs12(vcpu)) + offset; | ||
5925 | if (offset < 0) | ||
5926 | return false; | ||
5927 | |||
5928 | switch (vmcs_field_type(field)) { | ||
5929 | case VMCS_FIELD_TYPE_U16: | ||
5930 | *(u16 *)p = field_value; | ||
5931 | return true; | ||
5932 | case VMCS_FIELD_TYPE_U32: | ||
5933 | *(u32 *)p = field_value; | ||
5934 | return true; | ||
5935 | case VMCS_FIELD_TYPE_U64: | ||
5936 | *(u64 *)p = field_value; | ||
5937 | return true; | ||
5938 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5939 | *(natural_width *)p = field_value; | ||
5940 | return true; | ||
5941 | default: | ||
5942 | return false; /* can never happen. */ | ||
5943 | } | ||
5944 | |||
5945 | } | ||
5946 | |||
5947 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | ||
5948 | { | ||
5949 | int i; | ||
5950 | unsigned long field; | ||
5951 | u64 field_value; | ||
5952 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
5953 | unsigned long *fields = (unsigned long *)shadow_read_write_fields; | ||
5954 | int num_fields = max_shadow_read_write_fields; | ||
5955 | |||
5956 | vmcs_load(shadow_vmcs); | ||
5957 | |||
5958 | for (i = 0; i < num_fields; i++) { | ||
5959 | field = fields[i]; | ||
5960 | switch (vmcs_field_type(field)) { | ||
5961 | case VMCS_FIELD_TYPE_U16: | ||
5962 | field_value = vmcs_read16(field); | ||
5963 | break; | ||
5964 | case VMCS_FIELD_TYPE_U32: | ||
5965 | field_value = vmcs_read32(field); | ||
5966 | break; | ||
5967 | case VMCS_FIELD_TYPE_U64: | ||
5968 | field_value = vmcs_read64(field); | ||
5969 | break; | ||
5970 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5971 | field_value = vmcs_readl(field); | ||
5972 | break; | ||
5973 | } | ||
5974 | vmcs12_write_any(&vmx->vcpu, field, field_value); | ||
5975 | } | ||
5976 | |||
5977 | vmcs_clear(shadow_vmcs); | ||
5978 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
5979 | } | ||
5980 | |||
5981 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | ||
5982 | { | ||
5983 | unsigned long *fields[] = { | ||
5984 | (unsigned long *)shadow_read_write_fields, | ||
5985 | (unsigned long *)shadow_read_only_fields | ||
5986 | }; | ||
5987 | int num_lists = ARRAY_SIZE(fields); | ||
5988 | int max_fields[] = { | ||
5989 | max_shadow_read_write_fields, | ||
5990 | max_shadow_read_only_fields | ||
5991 | }; | ||
5992 | int i, q; | ||
5993 | unsigned long field; | ||
5994 | u64 field_value = 0; | ||
5995 | struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | ||
5996 | |||
5997 | vmcs_load(shadow_vmcs); | ||
5998 | |||
5999 | for (q = 0; q < num_lists; q++) { | ||
6000 | for (i = 0; i < max_fields[q]; i++) { | ||
6001 | field = fields[q][i]; | ||
6002 | vmcs12_read_any(&vmx->vcpu, field, &field_value); | ||
6003 | |||
6004 | switch (vmcs_field_type(field)) { | ||
6005 | case VMCS_FIELD_TYPE_U16: | ||
6006 | vmcs_write16(field, (u16)field_value); | ||
6007 | break; | ||
6008 | case VMCS_FIELD_TYPE_U32: | ||
6009 | vmcs_write32(field, (u32)field_value); | ||
6010 | break; | ||
6011 | case VMCS_FIELD_TYPE_U64: | ||
6012 | vmcs_write64(field, (u64)field_value); | ||
6013 | break; | ||
6014 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
6015 | vmcs_writel(field, (long)field_value); | ||
6016 | break; | ||
6017 | } | ||
6018 | } | ||
6019 | } | ||
6020 | |||
6021 | vmcs_clear(shadow_vmcs); | ||
6022 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
6023 | } | ||
6024 | |||
5642 | /* | 6025 | /* |
5643 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was | 6026 | * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was |
5644 | * used before) all generate the same failure when it is missing. | 6027 | * used before) all generate the same failure when it is missing. |
@@ -5703,8 +6086,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
5703 | gva_t gva; | 6086 | gva_t gva; |
5704 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6087 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
5705 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 6088 | u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
5706 | char *p; | ||
5707 | short offset; | ||
5708 | /* The value to write might be 32 or 64 bits, depending on L1's long | 6089 | /* The value to write might be 32 or 64 bits, depending on L1's long |
5709 | * mode, and eventually we need to write that into a field of several | 6090 | * mode, and eventually we need to write that into a field of several |
5710 | * possible lengths. The code below first zero-extends the value to 64 | 6091 | * possible lengths. The code below first zero-extends the value to 64 |
@@ -5741,28 +6122,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
5741 | return 1; | 6122 | return 1; |
5742 | } | 6123 | } |
5743 | 6124 | ||
5744 | offset = vmcs_field_to_offset(field); | 6125 | if (!vmcs12_write_any(vcpu, field, field_value)) { |
5745 | if (offset < 0) { | ||
5746 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | ||
5747 | skip_emulated_instruction(vcpu); | ||
5748 | return 1; | ||
5749 | } | ||
5750 | p = ((char *) get_vmcs12(vcpu)) + offset; | ||
5751 | |||
5752 | switch (vmcs_field_type(field)) { | ||
5753 | case VMCS_FIELD_TYPE_U16: | ||
5754 | *(u16 *)p = field_value; | ||
5755 | break; | ||
5756 | case VMCS_FIELD_TYPE_U32: | ||
5757 | *(u32 *)p = field_value; | ||
5758 | break; | ||
5759 | case VMCS_FIELD_TYPE_U64: | ||
5760 | *(u64 *)p = field_value; | ||
5761 | break; | ||
5762 | case VMCS_FIELD_TYPE_NATURAL_WIDTH: | ||
5763 | *(natural_width *)p = field_value; | ||
5764 | break; | ||
5765 | default: | ||
5766 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6126 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
5767 | skip_emulated_instruction(vcpu); | 6127 | skip_emulated_instruction(vcpu); |
5768 | return 1; | 6128 | return 1; |
@@ -5780,6 +6140,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
5780 | gva_t gva; | 6140 | gva_t gva; |
5781 | gpa_t vmptr; | 6141 | gpa_t vmptr; |
5782 | struct x86_exception e; | 6142 | struct x86_exception e; |
6143 | u32 exec_control; | ||
5783 | 6144 | ||
5784 | if (!nested_vmx_check_permission(vcpu)) | 6145 | if (!nested_vmx_check_permission(vcpu)) |
5785 | return 1; | 6146 | return 1; |
@@ -5818,14 +6179,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
5818 | skip_emulated_instruction(vcpu); | 6179 | skip_emulated_instruction(vcpu); |
5819 | return 1; | 6180 | return 1; |
5820 | } | 6181 | } |
5821 | if (vmx->nested.current_vmptr != -1ull) { | 6182 | if (vmx->nested.current_vmptr != -1ull) |
5822 | kunmap(vmx->nested.current_vmcs12_page); | 6183 | nested_release_vmcs12(vmx); |
5823 | nested_release_page(vmx->nested.current_vmcs12_page); | ||
5824 | } | ||
5825 | 6184 | ||
5826 | vmx->nested.current_vmptr = vmptr; | 6185 | vmx->nested.current_vmptr = vmptr; |
5827 | vmx->nested.current_vmcs12 = new_vmcs12; | 6186 | vmx->nested.current_vmcs12 = new_vmcs12; |
5828 | vmx->nested.current_vmcs12_page = page; | 6187 | vmx->nested.current_vmcs12_page = page; |
6188 | if (enable_shadow_vmcs) { | ||
6189 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
6190 | exec_control |= SECONDARY_EXEC_SHADOW_VMCS; | ||
6191 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | ||
6192 | vmcs_write64(VMCS_LINK_POINTER, | ||
6193 | __pa(vmx->nested.current_shadow_vmcs)); | ||
6194 | vmx->nested.sync_shadow_vmcs = true; | ||
6195 | } | ||
5829 | } | 6196 | } |
5830 | 6197 | ||
5831 | nested_vmx_succeed(vcpu); | 6198 | nested_vmx_succeed(vcpu); |
@@ -5908,6 +6275,52 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
5908 | static const int kvm_vmx_max_exit_handlers = | 6275 | static const int kvm_vmx_max_exit_handlers = |
5909 | ARRAY_SIZE(kvm_vmx_exit_handlers); | 6276 | ARRAY_SIZE(kvm_vmx_exit_handlers); |
5910 | 6277 | ||
6278 | static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | ||
6279 | struct vmcs12 *vmcs12) | ||
6280 | { | ||
6281 | unsigned long exit_qualification; | ||
6282 | gpa_t bitmap, last_bitmap; | ||
6283 | unsigned int port; | ||
6284 | int size; | ||
6285 | u8 b; | ||
6286 | |||
6287 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
6288 | return 1; | ||
6289 | |||
6290 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | ||
6291 | return 0; | ||
6292 | |||
6293 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | ||
6294 | |||
6295 | port = exit_qualification >> 16; | ||
6296 | size = (exit_qualification & 7) + 1; | ||
6297 | |||
6298 | last_bitmap = (gpa_t)-1; | ||
6299 | b = -1; | ||
6300 | |||
6301 | while (size > 0) { | ||
6302 | if (port < 0x8000) | ||
6303 | bitmap = vmcs12->io_bitmap_a; | ||
6304 | else if (port < 0x10000) | ||
6305 | bitmap = vmcs12->io_bitmap_b; | ||
6306 | else | ||
6307 | return 1; | ||
6308 | bitmap += (port & 0x7fff) / 8; | ||
6309 | |||
6310 | if (last_bitmap != bitmap) | ||
6311 | if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) | ||
6312 | return 1; | ||
6313 | if (b & (1 << (port & 7))) | ||
6314 | return 1; | ||
6315 | |||
6316 | port++; | ||
6317 | size--; | ||
6318 | last_bitmap = bitmap; | ||
6319 | } | ||
6320 | |||
6321 | return 0; | ||
6322 | } | ||
6323 | |||
5911 | /* | 6324 | /* |
5912 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, | 6325 | * Return 1 if we should exit from L2 to L1 to handle an MSR access access, |
5913 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed | 6326 | * rather than handle it ourselves in L0. I.e., check whether L1 expressed |
@@ -5939,7 +6352,8 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, | |||
5939 | /* Then read the msr_index'th bit from this bitmap: */ | 6352 | /* Then read the msr_index'th bit from this bitmap: */ |
5940 | if (msr_index < 1024*8) { | 6353 | if (msr_index < 1024*8) { |
5941 | unsigned char b; | 6354 | unsigned char b; |
5942 | kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1); | 6355 | if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) |
6356 | return 1; | ||
5943 | return 1 & (b >> (msr_index & 7)); | 6357 | return 1 & (b >> (msr_index & 7)); |
5944 | } else | 6358 | } else |
5945 | return 1; /* let L1 handle the wrong parameter */ | 6359 | return 1; /* let L1 handle the wrong parameter */ |
@@ -6033,10 +6447,10 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
6033 | */ | 6447 | */ |
6034 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | 6448 | static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) |
6035 | { | 6449 | { |
6036 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
6037 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 6450 | u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
6038 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6451 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
6039 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6452 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
6453 | u32 exit_reason = vmx->exit_reason; | ||
6040 | 6454 | ||
6041 | if (vmx->nested.nested_run_pending) | 6455 | if (vmx->nested.nested_run_pending) |
6042 | return 0; | 6456 | return 0; |
@@ -6060,14 +6474,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6060 | case EXIT_REASON_TRIPLE_FAULT: | 6474 | case EXIT_REASON_TRIPLE_FAULT: |
6061 | return 1; | 6475 | return 1; |
6062 | case EXIT_REASON_PENDING_INTERRUPT: | 6476 | case EXIT_REASON_PENDING_INTERRUPT: |
6477 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); | ||
6063 | case EXIT_REASON_NMI_WINDOW: | 6478 | case EXIT_REASON_NMI_WINDOW: |
6064 | /* | 6479 | return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); |
6065 | * prepare_vmcs02() set the CPU_BASED_VIRTUAL_INTR_PENDING bit | ||
6066 | * (aka Interrupt Window Exiting) only when L1 turned it on, | ||
6067 | * so if we got a PENDING_INTERRUPT exit, this must be for L1. | ||
6068 | * Same for NMI Window Exiting. | ||
6069 | */ | ||
6070 | return 1; | ||
6071 | case EXIT_REASON_TASK_SWITCH: | 6480 | case EXIT_REASON_TASK_SWITCH: |
6072 | return 1; | 6481 | return 1; |
6073 | case EXIT_REASON_CPUID: | 6482 | case EXIT_REASON_CPUID: |
@@ -6097,8 +6506,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6097 | case EXIT_REASON_DR_ACCESS: | 6506 | case EXIT_REASON_DR_ACCESS: |
6098 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); | 6507 | return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING); |
6099 | case EXIT_REASON_IO_INSTRUCTION: | 6508 | case EXIT_REASON_IO_INSTRUCTION: |
6100 | /* TODO: support IO bitmaps */ | 6509 | return nested_vmx_exit_handled_io(vcpu, vmcs12); |
6101 | return 1; | ||
6102 | case EXIT_REASON_MSR_READ: | 6510 | case EXIT_REASON_MSR_READ: |
6103 | case EXIT_REASON_MSR_WRITE: | 6511 | case EXIT_REASON_MSR_WRITE: |
6104 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); | 6512 | return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); |
@@ -6122,6 +6530,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6122 | case EXIT_REASON_EPT_VIOLATION: | 6530 | case EXIT_REASON_EPT_VIOLATION: |
6123 | case EXIT_REASON_EPT_MISCONFIG: | 6531 | case EXIT_REASON_EPT_MISCONFIG: |
6124 | return 0; | 6532 | return 0; |
6533 | case EXIT_REASON_PREEMPTION_TIMER: | ||
6534 | return vmcs12->pin_based_vm_exec_control & | ||
6535 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
6125 | case EXIT_REASON_WBINVD: | 6536 | case EXIT_REASON_WBINVD: |
6126 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 6537 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
6127 | case EXIT_REASON_XSETBV: | 6538 | case EXIT_REASON_XSETBV: |
@@ -6316,6 +6727,9 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
6316 | 6727 | ||
6317 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 6728 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
6318 | { | 6729 | { |
6730 | if (!vmx_vm_has_apicv(vcpu->kvm)) | ||
6731 | return; | ||
6732 | |||
6319 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); | 6733 | vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); |
6320 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); | 6734 | vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); |
6321 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); | 6735 | vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); |
@@ -6346,6 +6760,52 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) | |||
6346 | } | 6760 | } |
6347 | } | 6761 | } |
6348 | 6762 | ||
6763 | static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | ||
6764 | { | ||
6765 | u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
6766 | |||
6767 | /* | ||
6768 | * If external interrupt exists, IF bit is set in rflags/eflags on the | ||
6769 | * interrupt stack frame, and interrupt will be enabled on a return | ||
6770 | * from interrupt handler. | ||
6771 | */ | ||
6772 | if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) | ||
6773 | == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { | ||
6774 | unsigned int vector; | ||
6775 | unsigned long entry; | ||
6776 | gate_desc *desc; | ||
6777 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
6778 | #ifdef CONFIG_X86_64 | ||
6779 | unsigned long tmp; | ||
6780 | #endif | ||
6781 | |||
6782 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | ||
6783 | desc = (gate_desc *)vmx->host_idt_base + vector; | ||
6784 | entry = gate_offset(*desc); | ||
6785 | asm volatile( | ||
6786 | #ifdef CONFIG_X86_64 | ||
6787 | "mov %%" _ASM_SP ", %[sp]\n\t" | ||
6788 | "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" | ||
6789 | "push $%c[ss]\n\t" | ||
6790 | "push %[sp]\n\t" | ||
6791 | #endif | ||
6792 | "pushf\n\t" | ||
6793 | "orl $0x200, (%%" _ASM_SP ")\n\t" | ||
6794 | __ASM_SIZE(push) " $%c[cs]\n\t" | ||
6795 | "call *%[entry]\n\t" | ||
6796 | : | ||
6797 | #ifdef CONFIG_X86_64 | ||
6798 | [sp]"=&r"(tmp) | ||
6799 | #endif | ||
6800 | : | ||
6801 | [entry]"r"(entry), | ||
6802 | [ss]"i"(__KERNEL_DS), | ||
6803 | [cs]"i"(__KERNEL_CS) | ||
6804 | ); | ||
6805 | } else | ||
6806 | local_irq_enable(); | ||
6807 | } | ||
6808 | |||
6349 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 6809 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
6350 | { | 6810 | { |
6351 | u32 exit_intr_info; | 6811 | u32 exit_intr_info; |
@@ -6388,7 +6848,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | |||
6388 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 6848 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
6389 | } | 6849 | } |
6390 | 6850 | ||
6391 | static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | 6851 | static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, |
6392 | u32 idt_vectoring_info, | 6852 | u32 idt_vectoring_info, |
6393 | int instr_len_field, | 6853 | int instr_len_field, |
6394 | int error_code_field) | 6854 | int error_code_field) |
@@ -6399,46 +6859,43 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
6399 | 6859 | ||
6400 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 6860 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
6401 | 6861 | ||
6402 | vmx->vcpu.arch.nmi_injected = false; | 6862 | vcpu->arch.nmi_injected = false; |
6403 | kvm_clear_exception_queue(&vmx->vcpu); | 6863 | kvm_clear_exception_queue(vcpu); |
6404 | kvm_clear_interrupt_queue(&vmx->vcpu); | 6864 | kvm_clear_interrupt_queue(vcpu); |
6405 | 6865 | ||
6406 | if (!idtv_info_valid) | 6866 | if (!idtv_info_valid) |
6407 | return; | 6867 | return; |
6408 | 6868 | ||
6409 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | 6869 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6410 | 6870 | ||
6411 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 6871 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
6412 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 6872 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
6413 | 6873 | ||
6414 | switch (type) { | 6874 | switch (type) { |
6415 | case INTR_TYPE_NMI_INTR: | 6875 | case INTR_TYPE_NMI_INTR: |
6416 | vmx->vcpu.arch.nmi_injected = true; | 6876 | vcpu->arch.nmi_injected = true; |
6417 | /* | 6877 | /* |
6418 | * SDM 3: 27.7.1.2 (September 2008) | 6878 | * SDM 3: 27.7.1.2 (September 2008) |
6419 | * Clear bit "block by NMI" before VM entry if a NMI | 6879 | * Clear bit "block by NMI" before VM entry if a NMI |
6420 | * delivery faulted. | 6880 | * delivery faulted. |
6421 | */ | 6881 | */ |
6422 | vmx_set_nmi_mask(&vmx->vcpu, false); | 6882 | vmx_set_nmi_mask(vcpu, false); |
6423 | break; | 6883 | break; |
6424 | case INTR_TYPE_SOFT_EXCEPTION: | 6884 | case INTR_TYPE_SOFT_EXCEPTION: |
6425 | vmx->vcpu.arch.event_exit_inst_len = | 6885 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
6426 | vmcs_read32(instr_len_field); | ||
6427 | /* fall through */ | 6886 | /* fall through */ |
6428 | case INTR_TYPE_HARD_EXCEPTION: | 6887 | case INTR_TYPE_HARD_EXCEPTION: |
6429 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 6888 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
6430 | u32 err = vmcs_read32(error_code_field); | 6889 | u32 err = vmcs_read32(error_code_field); |
6431 | kvm_queue_exception_e(&vmx->vcpu, vector, err); | 6890 | kvm_queue_exception_e(vcpu, vector, err); |
6432 | } else | 6891 | } else |
6433 | kvm_queue_exception(&vmx->vcpu, vector); | 6892 | kvm_queue_exception(vcpu, vector); |
6434 | break; | 6893 | break; |
6435 | case INTR_TYPE_SOFT_INTR: | 6894 | case INTR_TYPE_SOFT_INTR: |
6436 | vmx->vcpu.arch.event_exit_inst_len = | 6895 | vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field); |
6437 | vmcs_read32(instr_len_field); | ||
6438 | /* fall through */ | 6896 | /* fall through */ |
6439 | case INTR_TYPE_EXT_INTR: | 6897 | case INTR_TYPE_EXT_INTR: |
6440 | kvm_queue_interrupt(&vmx->vcpu, vector, | 6898 | kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR); |
6441 | type == INTR_TYPE_SOFT_INTR); | ||
6442 | break; | 6899 | break; |
6443 | default: | 6900 | default: |
6444 | break; | 6901 | break; |
@@ -6447,18 +6904,14 @@ static void __vmx_complete_interrupts(struct vcpu_vmx *vmx, | |||
6447 | 6904 | ||
6448 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 6905 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
6449 | { | 6906 | { |
6450 | if (is_guest_mode(&vmx->vcpu)) | 6907 | __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, |
6451 | return; | ||
6452 | __vmx_complete_interrupts(vmx, vmx->idt_vectoring_info, | ||
6453 | VM_EXIT_INSTRUCTION_LEN, | 6908 | VM_EXIT_INSTRUCTION_LEN, |
6454 | IDT_VECTORING_ERROR_CODE); | 6909 | IDT_VECTORING_ERROR_CODE); |
6455 | } | 6910 | } |
6456 | 6911 | ||
6457 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | 6912 | static void vmx_cancel_injection(struct kvm_vcpu *vcpu) |
6458 | { | 6913 | { |
6459 | if (is_guest_mode(vcpu)) | 6914 | __vmx_complete_interrupts(vcpu, |
6460 | return; | ||
6461 | __vmx_complete_interrupts(to_vmx(vcpu), | ||
6462 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), | 6915 | vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), |
6463 | VM_ENTRY_INSTRUCTION_LEN, | 6916 | VM_ENTRY_INSTRUCTION_LEN, |
6464 | VM_ENTRY_EXCEPTION_ERROR_CODE); | 6917 | VM_ENTRY_EXCEPTION_ERROR_CODE); |
@@ -6489,21 +6942,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6489 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 6942 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
6490 | unsigned long debugctlmsr; | 6943 | unsigned long debugctlmsr; |
6491 | 6944 | ||
6492 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { | ||
6493 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6494 | if (vmcs12->idt_vectoring_info_field & | ||
6495 | VECTORING_INFO_VALID_MASK) { | ||
6496 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
6497 | vmcs12->idt_vectoring_info_field); | ||
6498 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
6499 | vmcs12->vm_exit_instruction_len); | ||
6500 | if (vmcs12->idt_vectoring_info_field & | ||
6501 | VECTORING_INFO_DELIVER_CODE_MASK) | ||
6502 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | ||
6503 | vmcs12->idt_vectoring_error_code); | ||
6504 | } | ||
6505 | } | ||
6506 | |||
6507 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 6945 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
6508 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 6946 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
6509 | vmx->entry_time = ktime_get(); | 6947 | vmx->entry_time = ktime_get(); |
@@ -6513,6 +6951,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6513 | if (vmx->emulation_required) | 6951 | if (vmx->emulation_required) |
6514 | return; | 6952 | return; |
6515 | 6953 | ||
6954 | if (vmx->nested.sync_shadow_vmcs) { | ||
6955 | copy_vmcs12_to_shadow(vmx); | ||
6956 | vmx->nested.sync_shadow_vmcs = false; | ||
6957 | } | ||
6958 | |||
6516 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6959 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
6517 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 6960 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
6518 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | 6961 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) |
@@ -6662,17 +7105,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
6662 | 7105 | ||
6663 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7106 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
6664 | 7107 | ||
6665 | if (is_guest_mode(vcpu)) { | ||
6666 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
6667 | vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info; | ||
6668 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | ||
6669 | vmcs12->idt_vectoring_error_code = | ||
6670 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | ||
6671 | vmcs12->vm_exit_instruction_len = | ||
6672 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
6673 | } | ||
6674 | } | ||
6675 | |||
6676 | vmx->loaded_vmcs->launched = 1; | 7108 | vmx->loaded_vmcs->launched = 1; |
6677 | 7109 | ||
6678 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | 7110 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); |
@@ -6734,10 +7166,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
6734 | put_cpu(); | 7166 | put_cpu(); |
6735 | if (err) | 7167 | if (err) |
6736 | goto free_vmcs; | 7168 | goto free_vmcs; |
6737 | if (vm_need_virtualize_apic_accesses(kvm)) | 7169 | if (vm_need_virtualize_apic_accesses(kvm)) { |
6738 | err = alloc_apic_access_page(kvm); | 7170 | err = alloc_apic_access_page(kvm); |
6739 | if (err) | 7171 | if (err) |
6740 | goto free_vmcs; | 7172 | goto free_vmcs; |
7173 | } | ||
6741 | 7174 | ||
6742 | if (enable_ept) { | 7175 | if (enable_ept) { |
6743 | if (!kvm->arch.ept_identity_map_addr) | 7176 | if (!kvm->arch.ept_identity_map_addr) |
@@ -6931,9 +7364,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
6931 | vmcs12->vm_entry_instruction_len); | 7364 | vmcs12->vm_entry_instruction_len); |
6932 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 7365 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
6933 | vmcs12->guest_interruptibility_info); | 7366 | vmcs12->guest_interruptibility_info); |
6934 | vmcs_write32(GUEST_ACTIVITY_STATE, vmcs12->guest_activity_state); | ||
6935 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7367 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
6936 | vmcs_writel(GUEST_DR7, vmcs12->guest_dr7); | 7368 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); |
6937 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); | 7369 | vmcs_writel(GUEST_RFLAGS, vmcs12->guest_rflags); |
6938 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7370 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
6939 | vmcs12->guest_pending_dbg_exceptions); | 7371 | vmcs12->guest_pending_dbg_exceptions); |
@@ -6946,6 +7378,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
6946 | (vmcs_config.pin_based_exec_ctrl | | 7378 | (vmcs_config.pin_based_exec_ctrl | |
6947 | vmcs12->pin_based_vm_exec_control)); | 7379 | vmcs12->pin_based_vm_exec_control)); |
6948 | 7380 | ||
7381 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
7382 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, | ||
7383 | vmcs12->vmx_preemption_timer_value); | ||
7384 | |||
6949 | /* | 7385 | /* |
6950 | * Whether page-faults are trapped is determined by a combination of | 7386 | * Whether page-faults are trapped is determined by a combination of |
6951 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. | 7387 | * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. |
@@ -7016,7 +7452,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7016 | * Other fields are different per CPU, and will be set later when | 7452 | * Other fields are different per CPU, and will be set later when |
7017 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. | 7453 | * vmx_vcpu_load() is called, and when vmx_save_host_state() is called. |
7018 | */ | 7454 | */ |
7019 | vmx_set_constant_host_state(); | 7455 | vmx_set_constant_host_state(vmx); |
7020 | 7456 | ||
7021 | /* | 7457 | /* |
7022 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before | 7458 | * HOST_RSP is normally set correctly in vmx_vcpu_run() just before |
@@ -7082,7 +7518,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7082 | 7518 | ||
7083 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) | 7519 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) |
7084 | vcpu->arch.efer = vmcs12->guest_ia32_efer; | 7520 | vcpu->arch.efer = vmcs12->guest_ia32_efer; |
7085 | if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) | 7521 | else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) |
7086 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7522 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
7087 | else | 7523 | else |
7088 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7524 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
@@ -7121,6 +7557,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7121 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7557 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7122 | int cpu; | 7558 | int cpu; |
7123 | struct loaded_vmcs *vmcs02; | 7559 | struct loaded_vmcs *vmcs02; |
7560 | bool ia32e; | ||
7124 | 7561 | ||
7125 | if (!nested_vmx_check_permission(vcpu) || | 7562 | if (!nested_vmx_check_permission(vcpu) || |
7126 | !nested_vmx_check_vmcs12(vcpu)) | 7563 | !nested_vmx_check_vmcs12(vcpu)) |
@@ -7129,6 +7566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7129 | skip_emulated_instruction(vcpu); | 7566 | skip_emulated_instruction(vcpu); |
7130 | vmcs12 = get_vmcs12(vcpu); | 7567 | vmcs12 = get_vmcs12(vcpu); |
7131 | 7568 | ||
7569 | if (enable_shadow_vmcs) | ||
7570 | copy_shadow_to_vmcs12(vmx); | ||
7571 | |||
7132 | /* | 7572 | /* |
7133 | * The nested entry process starts with enforcing various prerequisites | 7573 | * The nested entry process starts with enforcing various prerequisites |
7134 | * on vmcs12 as required by the Intel SDM, and act appropriately when | 7574 | * on vmcs12 as required by the Intel SDM, and act appropriately when |
@@ -7146,6 +7586,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7146 | return 1; | 7586 | return 1; |
7147 | } | 7587 | } |
7148 | 7588 | ||
7589 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | ||
7590 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
7591 | return 1; | ||
7592 | } | ||
7593 | |||
7149 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | 7594 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && |
7150 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { | 7595 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { |
7151 | /*TODO: Also verify bits beyond physical address width are 0*/ | 7596 | /*TODO: Also verify bits beyond physical address width are 0*/ |
@@ -7204,6 +7649,45 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7204 | } | 7649 | } |
7205 | 7650 | ||
7206 | /* | 7651 | /* |
7652 | * If the load IA32_EFER VM-entry control is 1, the following checks | ||
7653 | * are performed on the field for the IA32_EFER MSR: | ||
7654 | * - Bits reserved in the IA32_EFER MSR must be 0. | ||
7655 | * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of | ||
7656 | * the IA-32e mode guest VM-exit control. It must also be identical | ||
7657 | * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to | ||
7658 | * CR0.PG) is 1. | ||
7659 | */ | ||
7660 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { | ||
7661 | ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; | ||
7662 | if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || | ||
7663 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || | ||
7664 | ((vmcs12->guest_cr0 & X86_CR0_PG) && | ||
7665 | ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { | ||
7666 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
7667 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
7668 | return 1; | ||
7669 | } | ||
7670 | } | ||
7671 | |||
7672 | /* | ||
7673 | * If the load IA32_EFER VM-exit control is 1, bits reserved in the | ||
7674 | * IA32_EFER MSR must be 0 in the field for that register. In addition, | ||
7675 | * the values of the LMA and LME bits in the field must each be that of | ||
7676 | * the host address-space size VM-exit control. | ||
7677 | */ | ||
7678 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { | ||
7679 | ia32e = (vmcs12->vm_exit_controls & | ||
7680 | VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; | ||
7681 | if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || | ||
7682 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || | ||
7683 | ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { | ||
7684 | nested_vmx_entry_failure(vcpu, vmcs12, | ||
7685 | EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); | ||
7686 | return 1; | ||
7687 | } | ||
7688 | } | ||
7689 | |||
7690 | /* | ||
7207 | * We're finally done with prerequisite checking, and can start with | 7691 | * We're finally done with prerequisite checking, and can start with |
7208 | * the nested entry. | 7692 | * the nested entry. |
7209 | */ | 7693 | */ |
@@ -7223,6 +7707,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7223 | vcpu->cpu = cpu; | 7707 | vcpu->cpu = cpu; |
7224 | put_cpu(); | 7708 | put_cpu(); |
7225 | 7709 | ||
7710 | vmx_segment_cache_clear(vmx); | ||
7711 | |||
7226 | vmcs12->launch_state = 1; | 7712 | vmcs12->launch_state = 1; |
7227 | 7713 | ||
7228 | prepare_vmcs02(vcpu, vmcs12); | 7714 | prepare_vmcs02(vcpu, vmcs12); |
@@ -7273,6 +7759,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7273 | vcpu->arch.cr4_guest_owned_bits)); | 7759 | vcpu->arch.cr4_guest_owned_bits)); |
7274 | } | 7760 | } |
7275 | 7761 | ||
7762 | static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | ||
7763 | struct vmcs12 *vmcs12) | ||
7764 | { | ||
7765 | u32 idt_vectoring; | ||
7766 | unsigned int nr; | ||
7767 | |||
7768 | if (vcpu->arch.exception.pending) { | ||
7769 | nr = vcpu->arch.exception.nr; | ||
7770 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
7771 | |||
7772 | if (kvm_exception_is_soft(nr)) { | ||
7773 | vmcs12->vm_exit_instruction_len = | ||
7774 | vcpu->arch.event_exit_inst_len; | ||
7775 | idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION; | ||
7776 | } else | ||
7777 | idt_vectoring |= INTR_TYPE_HARD_EXCEPTION; | ||
7778 | |||
7779 | if (vcpu->arch.exception.has_error_code) { | ||
7780 | idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK; | ||
7781 | vmcs12->idt_vectoring_error_code = | ||
7782 | vcpu->arch.exception.error_code; | ||
7783 | } | ||
7784 | |||
7785 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
7786 | } else if (vcpu->arch.nmi_pending) { | ||
7787 | vmcs12->idt_vectoring_info_field = | ||
7788 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; | ||
7789 | } else if (vcpu->arch.interrupt.pending) { | ||
7790 | nr = vcpu->arch.interrupt.nr; | ||
7791 | idt_vectoring = nr | VECTORING_INFO_VALID_MASK; | ||
7792 | |||
7793 | if (vcpu->arch.interrupt.soft) { | ||
7794 | idt_vectoring |= INTR_TYPE_SOFT_INTR; | ||
7795 | vmcs12->vm_entry_instruction_len = | ||
7796 | vcpu->arch.event_exit_inst_len; | ||
7797 | } else | ||
7798 | idt_vectoring |= INTR_TYPE_EXT_INTR; | ||
7799 | |||
7800 | vmcs12->idt_vectoring_info_field = idt_vectoring; | ||
7801 | } | ||
7802 | } | ||
7803 | |||
7276 | /* | 7804 | /* |
7277 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 7805 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits |
7278 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 7806 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), |
@@ -7284,7 +7812,7 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7284 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 7812 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
7285 | * which already writes to vmcs12 directly. | 7813 | * which already writes to vmcs12 directly. |
7286 | */ | 7814 | */ |
7287 | void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7815 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
7288 | { | 7816 | { |
7289 | /* update guest state fields: */ | 7817 | /* update guest state fields: */ |
7290 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 7818 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
@@ -7332,16 +7860,19 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7332 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); | 7860 | vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); |
7333 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); | 7861 | vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); |
7334 | 7862 | ||
7335 | vmcs12->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE); | ||
7336 | vmcs12->guest_interruptibility_info = | 7863 | vmcs12->guest_interruptibility_info = |
7337 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 7864 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
7338 | vmcs12->guest_pending_dbg_exceptions = | 7865 | vmcs12->guest_pending_dbg_exceptions = |
7339 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 7866 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
7340 | 7867 | ||
7868 | vmcs12->vm_entry_controls = | ||
7869 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | ||
7870 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | ||
7871 | |||
7341 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 7872 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
7342 | * the relevant bit asks not to trap the change */ | 7873 | * the relevant bit asks not to trap the change */ |
7343 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | 7874 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
7344 | if (vmcs12->vm_entry_controls & VM_EXIT_SAVE_IA32_PAT) | 7875 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) |
7345 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | 7876 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); |
7346 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 7877 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
7347 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 7878 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
@@ -7349,21 +7880,38 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7349 | 7880 | ||
7350 | /* update exit information fields: */ | 7881 | /* update exit information fields: */ |
7351 | 7882 | ||
7352 | vmcs12->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); | 7883 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; |
7353 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 7884 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
7354 | 7885 | ||
7355 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 7886 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
7356 | vmcs12->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 7887 | if ((vmcs12->vm_exit_intr_info & |
7357 | vmcs12->idt_vectoring_info_field = | 7888 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
7358 | vmcs_read32(IDT_VECTORING_INFO_FIELD); | 7889 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
7359 | vmcs12->idt_vectoring_error_code = | 7890 | vmcs12->vm_exit_intr_error_code = |
7360 | vmcs_read32(IDT_VECTORING_ERROR_CODE); | 7891 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
7892 | vmcs12->idt_vectoring_info_field = 0; | ||
7361 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 7893 | vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
7362 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 7894 | vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
7363 | 7895 | ||
7364 | /* clear vm-entry fields which are to be cleared on exit */ | 7896 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { |
7365 | if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | 7897 | /* vm_entry_intr_info_field is cleared on exit. Emulate this |
7898 | * instead of reading the real value. */ | ||
7366 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; | 7899 | vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; |
7900 | |||
7901 | /* | ||
7902 | * Transfer the event that L0 or L1 may wanted to inject into | ||
7903 | * L2 to IDT_VECTORING_INFO_FIELD. | ||
7904 | */ | ||
7905 | vmcs12_save_pending_event(vcpu, vmcs12); | ||
7906 | } | ||
7907 | |||
7908 | /* | ||
7909 | * Drop what we picked up for L2 via vmx_complete_interrupts. It is | ||
7910 | * preserved above and would only end up incorrectly in L1. | ||
7911 | */ | ||
7912 | vcpu->arch.nmi_injected = false; | ||
7913 | kvm_clear_exception_queue(vcpu); | ||
7914 | kvm_clear_interrupt_queue(vcpu); | ||
7367 | } | 7915 | } |
7368 | 7916 | ||
7369 | /* | 7917 | /* |
@@ -7375,11 +7923,12 @@ void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7375 | * Failures During or After Loading Guest State"). | 7923 | * Failures During or After Loading Guest State"). |
7376 | * This function should be called when the active VMCS is L1's (vmcs01). | 7924 | * This function should be called when the active VMCS is L1's (vmcs01). |
7377 | */ | 7925 | */ |
7378 | void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 7926 | static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
7927 | struct vmcs12 *vmcs12) | ||
7379 | { | 7928 | { |
7380 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | 7929 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) |
7381 | vcpu->arch.efer = vmcs12->host_ia32_efer; | 7930 | vcpu->arch.efer = vmcs12->host_ia32_efer; |
7382 | if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | 7931 | else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) |
7383 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); | 7932 | vcpu->arch.efer |= (EFER_LMA | EFER_LME); |
7384 | else | 7933 | else |
7385 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); | 7934 | vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); |
@@ -7387,6 +7936,7 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7387 | 7936 | ||
7388 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); | 7937 | kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); |
7389 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); | 7938 | kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); |
7939 | vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); | ||
7390 | /* | 7940 | /* |
7391 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't | 7941 | * Note that calling vmx_set_cr0 is important, even if cr0 hasn't |
7392 | * actually changed, because it depends on the current state of | 7942 | * actually changed, because it depends on the current state of |
@@ -7445,6 +7995,9 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7445 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) | 7995 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) |
7446 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | 7996 | vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, |
7447 | vmcs12->host_ia32_perf_global_ctrl); | 7997 | vmcs12->host_ia32_perf_global_ctrl); |
7998 | |||
7999 | kvm_set_dr(vcpu, 7, 0x400); | ||
8000 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
7448 | } | 8001 | } |
7449 | 8002 | ||
7450 | /* | 8003 | /* |
@@ -7458,6 +8011,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7458 | int cpu; | 8011 | int cpu; |
7459 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 8012 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
7460 | 8013 | ||
8014 | /* trying to cancel vmlaunch/vmresume is a bug */ | ||
8015 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | ||
8016 | |||
7461 | leave_guest_mode(vcpu); | 8017 | leave_guest_mode(vcpu); |
7462 | prepare_vmcs12(vcpu, vmcs12); | 8018 | prepare_vmcs12(vcpu, vmcs12); |
7463 | 8019 | ||
@@ -7468,6 +8024,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7468 | vcpu->cpu = cpu; | 8024 | vcpu->cpu = cpu; |
7469 | put_cpu(); | 8025 | put_cpu(); |
7470 | 8026 | ||
8027 | vmx_segment_cache_clear(vmx); | ||
8028 | |||
7471 | /* if no vmcs02 cache requested, remove the one we used */ | 8029 | /* if no vmcs02 cache requested, remove the one we used */ |
7472 | if (VMCS02_POOL_SIZE == 0) | 8030 | if (VMCS02_POOL_SIZE == 0) |
7473 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | 8031 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
@@ -7496,6 +8054,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
7496 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); | 8054 | nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR)); |
7497 | } else | 8055 | } else |
7498 | nested_vmx_succeed(vcpu); | 8056 | nested_vmx_succeed(vcpu); |
8057 | if (enable_shadow_vmcs) | ||
8058 | vmx->nested.sync_shadow_vmcs = true; | ||
7499 | } | 8059 | } |
7500 | 8060 | ||
7501 | /* | 8061 | /* |
@@ -7513,6 +8073,8 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | |||
7513 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; | 8073 | vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; |
7514 | vmcs12->exit_qualification = qualification; | 8074 | vmcs12->exit_qualification = qualification; |
7515 | nested_vmx_succeed(vcpu); | 8075 | nested_vmx_succeed(vcpu); |
8076 | if (enable_shadow_vmcs) | ||
8077 | to_vmx(vcpu)->nested.sync_shadow_vmcs = true; | ||
7516 | } | 8078 | } |
7517 | 8079 | ||
7518 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, | 8080 | static int vmx_check_intercept(struct kvm_vcpu *vcpu, |
@@ -7590,6 +8152,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7590 | .load_eoi_exitmap = vmx_load_eoi_exitmap, | 8152 | .load_eoi_exitmap = vmx_load_eoi_exitmap, |
7591 | .hwapic_irr_update = vmx_hwapic_irr_update, | 8153 | .hwapic_irr_update = vmx_hwapic_irr_update, |
7592 | .hwapic_isr_update = vmx_hwapic_isr_update, | 8154 | .hwapic_isr_update = vmx_hwapic_isr_update, |
8155 | .sync_pir_to_irr = vmx_sync_pir_to_irr, | ||
8156 | .deliver_posted_interrupt = vmx_deliver_posted_interrupt, | ||
7593 | 8157 | ||
7594 | .set_tss_addr = vmx_set_tss_addr, | 8158 | .set_tss_addr = vmx_set_tss_addr, |
7595 | .get_tdp_level = get_ept_level, | 8159 | .get_tdp_level = get_ept_level, |
@@ -7618,6 +8182,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
7618 | .set_tdp_cr3 = vmx_set_cr3, | 8182 | .set_tdp_cr3 = vmx_set_cr3, |
7619 | 8183 | ||
7620 | .check_intercept = vmx_check_intercept, | 8184 | .check_intercept = vmx_check_intercept, |
8185 | .handle_external_intr = vmx_handle_external_intr, | ||
7621 | }; | 8186 | }; |
7622 | 8187 | ||
7623 | static int __init vmx_init(void) | 8188 | static int __init vmx_init(void) |
@@ -7656,6 +8221,24 @@ static int __init vmx_init(void) | |||
7656 | (unsigned long *)__get_free_page(GFP_KERNEL); | 8221 | (unsigned long *)__get_free_page(GFP_KERNEL); |
7657 | if (!vmx_msr_bitmap_longmode_x2apic) | 8222 | if (!vmx_msr_bitmap_longmode_x2apic) |
7658 | goto out4; | 8223 | goto out4; |
8224 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
8225 | if (!vmx_vmread_bitmap) | ||
8226 | goto out5; | ||
8227 | |||
8228 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
8229 | if (!vmx_vmwrite_bitmap) | ||
8230 | goto out6; | ||
8231 | |||
8232 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
8233 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
8234 | /* shadowed read/write fields */ | ||
8235 | for (i = 0; i < max_shadow_read_write_fields; i++) { | ||
8236 | clear_bit(shadow_read_write_fields[i], vmx_vmwrite_bitmap); | ||
8237 | clear_bit(shadow_read_write_fields[i], vmx_vmread_bitmap); | ||
8238 | } | ||
8239 | /* shadowed read only fields */ | ||
8240 | for (i = 0; i < max_shadow_read_only_fields; i++) | ||
8241 | clear_bit(shadow_read_only_fields[i], vmx_vmread_bitmap); | ||
7659 | 8242 | ||
7660 | /* | 8243 | /* |
7661 | * Allow direct access to the PC debug port (it is often used for I/O | 8244 | * Allow direct access to the PC debug port (it is often used for I/O |
@@ -7674,7 +8257,7 @@ static int __init vmx_init(void) | |||
7674 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), | 8257 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), |
7675 | __alignof__(struct vcpu_vmx), THIS_MODULE); | 8258 | __alignof__(struct vcpu_vmx), THIS_MODULE); |
7676 | if (r) | 8259 | if (r) |
7677 | goto out3; | 8260 | goto out7; |
7678 | 8261 | ||
7679 | #ifdef CONFIG_KEXEC | 8262 | #ifdef CONFIG_KEXEC |
7680 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, | 8263 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, |
@@ -7692,7 +8275,7 @@ static int __init vmx_init(void) | |||
7692 | memcpy(vmx_msr_bitmap_longmode_x2apic, | 8275 | memcpy(vmx_msr_bitmap_longmode_x2apic, |
7693 | vmx_msr_bitmap_longmode, PAGE_SIZE); | 8276 | vmx_msr_bitmap_longmode, PAGE_SIZE); |
7694 | 8277 | ||
7695 | if (enable_apicv_reg_vid) { | 8278 | if (enable_apicv) { |
7696 | for (msr = 0x800; msr <= 0x8ff; msr++) | 8279 | for (msr = 0x800; msr <= 0x8ff; msr++) |
7697 | vmx_disable_intercept_msr_read_x2apic(msr); | 8280 | vmx_disable_intercept_msr_read_x2apic(msr); |
7698 | 8281 | ||
@@ -7722,6 +8305,12 @@ static int __init vmx_init(void) | |||
7722 | 8305 | ||
7723 | return 0; | 8306 | return 0; |
7724 | 8307 | ||
8308 | out7: | ||
8309 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
8310 | out6: | ||
8311 | free_page((unsigned long)vmx_vmread_bitmap); | ||
8312 | out5: | ||
8313 | free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
7725 | out4: | 8314 | out4: |
7726 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8315 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7727 | out3: | 8316 | out3: |
@@ -7743,6 +8332,8 @@ static void __exit vmx_exit(void) | |||
7743 | free_page((unsigned long)vmx_msr_bitmap_longmode); | 8332 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
7744 | free_page((unsigned long)vmx_io_bitmap_b); | 8333 | free_page((unsigned long)vmx_io_bitmap_b); |
7745 | free_page((unsigned long)vmx_io_bitmap_a); | 8334 | free_page((unsigned long)vmx_io_bitmap_a); |
8335 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
8336 | free_page((unsigned long)vmx_vmread_bitmap); | ||
7746 | 8337 | ||
7747 | #ifdef CONFIG_KEXEC | 8338 | #ifdef CONFIG_KEXEC |
7748 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); | 8339 | rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1721324c271..05a8b1a2300d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -162,8 +162,6 @@ u64 __read_mostly host_xcr0; | |||
162 | 162 | ||
163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); | 163 | static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); |
164 | 164 | ||
165 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); | ||
166 | |||
167 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) | 165 | static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) |
168 | { | 166 | { |
169 | int i; | 167 | int i; |
@@ -263,6 +261,13 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
263 | } | 261 | } |
264 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 262 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
265 | 263 | ||
264 | asmlinkage void kvm_spurious_fault(void) | ||
265 | { | ||
266 | /* Fault while not rebooting. We want the trace. */ | ||
267 | BUG(); | ||
268 | } | ||
269 | EXPORT_SYMBOL_GPL(kvm_spurious_fault); | ||
270 | |||
266 | #define EXCPT_BENIGN 0 | 271 | #define EXCPT_BENIGN 0 |
267 | #define EXCPT_CONTRIBUTORY 1 | 272 | #define EXCPT_CONTRIBUTORY 1 |
268 | #define EXCPT_PF 2 | 273 | #define EXCPT_PF 2 |
@@ -840,23 +845,17 @@ static const u32 emulated_msrs[] = { | |||
840 | MSR_IA32_MCG_CTL, | 845 | MSR_IA32_MCG_CTL, |
841 | }; | 846 | }; |
842 | 847 | ||
843 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | 848 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) |
844 | { | 849 | { |
845 | u64 old_efer = vcpu->arch.efer; | ||
846 | |||
847 | if (efer & efer_reserved_bits) | 850 | if (efer & efer_reserved_bits) |
848 | return 1; | 851 | return false; |
849 | |||
850 | if (is_paging(vcpu) | ||
851 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
852 | return 1; | ||
853 | 852 | ||
854 | if (efer & EFER_FFXSR) { | 853 | if (efer & EFER_FFXSR) { |
855 | struct kvm_cpuid_entry2 *feat; | 854 | struct kvm_cpuid_entry2 *feat; |
856 | 855 | ||
857 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 856 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
858 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) | 857 | if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) |
859 | return 1; | 858 | return false; |
860 | } | 859 | } |
861 | 860 | ||
862 | if (efer & EFER_SVME) { | 861 | if (efer & EFER_SVME) { |
@@ -864,9 +863,24 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
864 | 863 | ||
865 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 864 | feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
866 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) | 865 | if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) |
867 | return 1; | 866 | return false; |
868 | } | 867 | } |
869 | 868 | ||
869 | return true; | ||
870 | } | ||
871 | EXPORT_SYMBOL_GPL(kvm_valid_efer); | ||
872 | |||
873 | static int set_efer(struct kvm_vcpu *vcpu, u64 efer) | ||
874 | { | ||
875 | u64 old_efer = vcpu->arch.efer; | ||
876 | |||
877 | if (!kvm_valid_efer(vcpu, efer)) | ||
878 | return 1; | ||
879 | |||
880 | if (is_paging(vcpu) | ||
881 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) | ||
882 | return 1; | ||
883 | |||
870 | efer &= ~EFER_LMA; | 884 | efer &= ~EFER_LMA; |
871 | efer |= vcpu->arch.efer & EFER_LMA; | 885 | efer |= vcpu->arch.efer & EFER_LMA; |
872 | 886 | ||
@@ -1079,6 +1093,10 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) | |||
1079 | u32 thresh_lo, thresh_hi; | 1093 | u32 thresh_lo, thresh_hi; |
1080 | int use_scaling = 0; | 1094 | int use_scaling = 0; |
1081 | 1095 | ||
1096 | /* tsc_khz can be zero if TSC calibration fails */ | ||
1097 | if (this_tsc_khz == 0) | ||
1098 | return; | ||
1099 | |||
1082 | /* Compute a scale to convert nanoseconds in TSC cycles */ | 1100 | /* Compute a scale to convert nanoseconds in TSC cycles */ |
1083 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, | 1101 | kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, |
1084 | &vcpu->arch.virtual_tsc_shift, | 1102 | &vcpu->arch.virtual_tsc_shift, |
@@ -1156,20 +1174,23 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1156 | ns = get_kernel_ns(); | 1174 | ns = get_kernel_ns(); |
1157 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1175 | elapsed = ns - kvm->arch.last_tsc_nsec; |
1158 | 1176 | ||
1159 | /* n.b - signed multiplication and division required */ | 1177 | if (vcpu->arch.virtual_tsc_khz) { |
1160 | usdiff = data - kvm->arch.last_tsc_write; | 1178 | /* n.b - signed multiplication and division required */ |
1179 | usdiff = data - kvm->arch.last_tsc_write; | ||
1161 | #ifdef CONFIG_X86_64 | 1180 | #ifdef CONFIG_X86_64 |
1162 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; | 1181 | usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; |
1163 | #else | 1182 | #else |
1164 | /* do_div() only does unsigned */ | 1183 | /* do_div() only does unsigned */ |
1165 | asm("idivl %2; xor %%edx, %%edx" | 1184 | asm("idivl %2; xor %%edx, %%edx" |
1166 | : "=A"(usdiff) | 1185 | : "=A"(usdiff) |
1167 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); | 1186 | : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz)); |
1168 | #endif | 1187 | #endif |
1169 | do_div(elapsed, 1000); | 1188 | do_div(elapsed, 1000); |
1170 | usdiff -= elapsed; | 1189 | usdiff -= elapsed; |
1171 | if (usdiff < 0) | 1190 | if (usdiff < 0) |
1172 | usdiff = -usdiff; | 1191 | usdiff = -usdiff; |
1192 | } else | ||
1193 | usdiff = USEC_PER_SEC; /* disable TSC match window below */ | ||
1173 | 1194 | ||
1174 | /* | 1195 | /* |
1175 | * Special case: TSC write with a small delta (1 second) of virtual | 1196 | * Special case: TSC write with a small delta (1 second) of virtual |
@@ -2034,7 +2055,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2034 | case MSR_P6_EVNTSEL0: | 2055 | case MSR_P6_EVNTSEL0: |
2035 | case MSR_P6_EVNTSEL1: | 2056 | case MSR_P6_EVNTSEL1: |
2036 | if (kvm_pmu_msr(vcpu, msr)) | 2057 | if (kvm_pmu_msr(vcpu, msr)) |
2037 | return kvm_pmu_set_msr(vcpu, msr, data); | 2058 | return kvm_pmu_set_msr(vcpu, msr_info); |
2038 | 2059 | ||
2039 | if (pr || data != 0) | 2060 | if (pr || data != 0) |
2040 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " | 2061 | vcpu_unimpl(vcpu, "disabled perfctr wrmsr: " |
@@ -2080,7 +2101,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2080 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 2101 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
2081 | return xen_hvm_config(vcpu, data); | 2102 | return xen_hvm_config(vcpu, data); |
2082 | if (kvm_pmu_msr(vcpu, msr)) | 2103 | if (kvm_pmu_msr(vcpu, msr)) |
2083 | return kvm_pmu_set_msr(vcpu, msr, data); | 2104 | return kvm_pmu_set_msr(vcpu, msr_info); |
2084 | if (!ignore_msrs) { | 2105 | if (!ignore_msrs) { |
2085 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 2106 | vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
2086 | msr, data); | 2107 | msr, data); |
@@ -2479,7 +2500,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2479 | case KVM_CAP_USER_NMI: | 2500 | case KVM_CAP_USER_NMI: |
2480 | case KVM_CAP_REINJECT_CONTROL: | 2501 | case KVM_CAP_REINJECT_CONTROL: |
2481 | case KVM_CAP_IRQ_INJECT_STATUS: | 2502 | case KVM_CAP_IRQ_INJECT_STATUS: |
2482 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
2483 | case KVM_CAP_IRQFD: | 2503 | case KVM_CAP_IRQFD: |
2484 | case KVM_CAP_IOEVENTFD: | 2504 | case KVM_CAP_IOEVENTFD: |
2485 | case KVM_CAP_PIT2: | 2505 | case KVM_CAP_PIT2: |
@@ -2497,10 +2517,12 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2497 | case KVM_CAP_XSAVE: | 2517 | case KVM_CAP_XSAVE: |
2498 | case KVM_CAP_ASYNC_PF: | 2518 | case KVM_CAP_ASYNC_PF: |
2499 | case KVM_CAP_GET_TSC_KHZ: | 2519 | case KVM_CAP_GET_TSC_KHZ: |
2500 | case KVM_CAP_PCI_2_3: | ||
2501 | case KVM_CAP_KVMCLOCK_CTRL: | 2520 | case KVM_CAP_KVMCLOCK_CTRL: |
2502 | case KVM_CAP_READONLY_MEM: | 2521 | case KVM_CAP_READONLY_MEM: |
2503 | case KVM_CAP_IRQFD_RESAMPLE: | 2522 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2523 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
2524 | case KVM_CAP_PCI_2_3: | ||
2525 | #endif | ||
2504 | r = 1; | 2526 | r = 1; |
2505 | break; | 2527 | break; |
2506 | case KVM_CAP_COALESCED_MMIO: | 2528 | case KVM_CAP_COALESCED_MMIO: |
@@ -2521,9 +2543,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2521 | case KVM_CAP_PV_MMU: /* obsolete */ | 2543 | case KVM_CAP_PV_MMU: /* obsolete */ |
2522 | r = 0; | 2544 | r = 0; |
2523 | break; | 2545 | break; |
2546 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | ||
2524 | case KVM_CAP_IOMMU: | 2547 | case KVM_CAP_IOMMU: |
2525 | r = iommu_present(&pci_bus_type); | 2548 | r = iommu_present(&pci_bus_type); |
2526 | break; | 2549 | break; |
2550 | #endif | ||
2527 | case KVM_CAP_MCE: | 2551 | case KVM_CAP_MCE: |
2528 | r = KVM_MAX_MCE_BANKS; | 2552 | r = KVM_MAX_MCE_BANKS; |
2529 | break; | 2553 | break; |
@@ -2679,6 +2703,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | |||
2679 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, | 2703 | static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, |
2680 | struct kvm_lapic_state *s) | 2704 | struct kvm_lapic_state *s) |
2681 | { | 2705 | { |
2706 | kvm_x86_ops->sync_pir_to_irr(vcpu); | ||
2682 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); | 2707 | memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); |
2683 | 2708 | ||
2684 | return 0; | 2709 | return 0; |
@@ -2696,7 +2721,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, | |||
2696 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | 2721 | static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, |
2697 | struct kvm_interrupt *irq) | 2722 | struct kvm_interrupt *irq) |
2698 | { | 2723 | { |
2699 | if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) | 2724 | if (irq->irq >= KVM_NR_INTERRUPTS) |
2700 | return -EINVAL; | 2725 | return -EINVAL; |
2701 | if (irqchip_in_kernel(vcpu->kvm)) | 2726 | if (irqchip_in_kernel(vcpu->kvm)) |
2702 | return -ENXIO; | 2727 | return -ENXIO; |
@@ -2819,10 +2844,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2819 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | 2844 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); |
2820 | events->nmi.pad = 0; | 2845 | events->nmi.pad = 0; |
2821 | 2846 | ||
2822 | events->sipi_vector = vcpu->arch.sipi_vector; | 2847 | events->sipi_vector = 0; /* never valid when reporting to user space */ |
2823 | 2848 | ||
2824 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | 2849 | events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING |
2825 | | KVM_VCPUEVENT_VALID_SIPI_VECTOR | ||
2826 | | KVM_VCPUEVENT_VALID_SHADOW); | 2850 | | KVM_VCPUEVENT_VALID_SHADOW); |
2827 | memset(&events->reserved, 0, sizeof(events->reserved)); | 2851 | memset(&events->reserved, 0, sizeof(events->reserved)); |
2828 | } | 2852 | } |
@@ -2853,8 +2877,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2853 | vcpu->arch.nmi_pending = events->nmi.pending; | 2877 | vcpu->arch.nmi_pending = events->nmi.pending; |
2854 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | 2878 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); |
2855 | 2879 | ||
2856 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) | 2880 | if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR && |
2857 | vcpu->arch.sipi_vector = events->sipi_vector; | 2881 | kvm_vcpu_has_lapic(vcpu)) |
2882 | vcpu->arch.apic->sipi_vector = events->sipi_vector; | ||
2858 | 2883 | ||
2859 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 2884 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
2860 | 2885 | ||
@@ -3478,13 +3503,15 @@ out: | |||
3478 | return r; | 3503 | return r; |
3479 | } | 3504 | } |
3480 | 3505 | ||
3481 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) | 3506 | int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, |
3507 | bool line_status) | ||
3482 | { | 3508 | { |
3483 | if (!irqchip_in_kernel(kvm)) | 3509 | if (!irqchip_in_kernel(kvm)) |
3484 | return -ENXIO; | 3510 | return -ENXIO; |
3485 | 3511 | ||
3486 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 3512 | irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
3487 | irq_event->irq, irq_event->level); | 3513 | irq_event->irq, irq_event->level, |
3514 | line_status); | ||
3488 | return 0; | 3515 | return 0; |
3489 | } | 3516 | } |
3490 | 3517 | ||
@@ -4752,11 +4779,15 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) | |||
4752 | } | 4779 | } |
4753 | 4780 | ||
4754 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, | 4781 | static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, |
4755 | bool write_fault_to_shadow_pgtable) | 4782 | bool write_fault_to_shadow_pgtable, |
4783 | int emulation_type) | ||
4756 | { | 4784 | { |
4757 | gpa_t gpa = cr2; | 4785 | gpa_t gpa = cr2; |
4758 | pfn_t pfn; | 4786 | pfn_t pfn; |
4759 | 4787 | ||
4788 | if (emulation_type & EMULTYPE_NO_REEXECUTE) | ||
4789 | return false; | ||
4790 | |||
4760 | if (!vcpu->arch.mmu.direct_map) { | 4791 | if (!vcpu->arch.mmu.direct_map) { |
4761 | /* | 4792 | /* |
4762 | * Write permission should be allowed since only | 4793 | * Write permission should be allowed since only |
@@ -4899,8 +4930,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4899 | if (r != EMULATION_OK) { | 4930 | if (r != EMULATION_OK) { |
4900 | if (emulation_type & EMULTYPE_TRAP_UD) | 4931 | if (emulation_type & EMULTYPE_TRAP_UD) |
4901 | return EMULATE_FAIL; | 4932 | return EMULATE_FAIL; |
4902 | if (reexecute_instruction(vcpu, cr2, | 4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
4903 | write_fault_to_spt)) | 4934 | emulation_type)) |
4904 | return EMULATE_DONE; | 4935 | return EMULATE_DONE; |
4905 | if (emulation_type & EMULTYPE_SKIP) | 4936 | if (emulation_type & EMULTYPE_SKIP) |
4906 | return EMULATE_FAIL; | 4937 | return EMULATE_FAIL; |
@@ -4930,7 +4961,8 @@ restart: | |||
4930 | return EMULATE_DONE; | 4961 | return EMULATE_DONE; |
4931 | 4962 | ||
4932 | if (r == EMULATION_FAILED) { | 4963 | if (r == EMULATION_FAILED) { |
4933 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) | 4964 | if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, |
4965 | emulation_type)) | ||
4934 | return EMULATE_DONE; | 4966 | return EMULATE_DONE; |
4935 | 4967 | ||
4936 | return handle_emulation_failure(vcpu); | 4968 | return handle_emulation_failure(vcpu); |
@@ -5641,14 +5673,20 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) | |||
5641 | #endif | 5673 | #endif |
5642 | } | 5674 | } |
5643 | 5675 | ||
5644 | static void update_eoi_exitmap(struct kvm_vcpu *vcpu) | 5676 | static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) |
5645 | { | 5677 | { |
5646 | u64 eoi_exit_bitmap[4]; | 5678 | u64 eoi_exit_bitmap[4]; |
5679 | u32 tmr[8]; | ||
5680 | |||
5681 | if (!kvm_apic_hw_enabled(vcpu->arch.apic)) | ||
5682 | return; | ||
5647 | 5683 | ||
5648 | memset(eoi_exit_bitmap, 0, 32); | 5684 | memset(eoi_exit_bitmap, 0, 32); |
5685 | memset(tmr, 0, 32); | ||
5649 | 5686 | ||
5650 | kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5687 | kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); |
5651 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); | 5688 | kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); |
5689 | kvm_apic_update_tmr(vcpu, tmr); | ||
5652 | } | 5690 | } |
5653 | 5691 | ||
5654 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5692 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
@@ -5656,7 +5694,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5656 | int r; | 5694 | int r; |
5657 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 5695 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
5658 | vcpu->run->request_interrupt_window; | 5696 | vcpu->run->request_interrupt_window; |
5659 | bool req_immediate_exit = 0; | 5697 | bool req_immediate_exit = false; |
5660 | 5698 | ||
5661 | if (vcpu->requests) { | 5699 | if (vcpu->requests) { |
5662 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) | 5700 | if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) |
@@ -5698,24 +5736,30 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5698 | record_steal_time(vcpu); | 5736 | record_steal_time(vcpu); |
5699 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | 5737 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) |
5700 | process_nmi(vcpu); | 5738 | process_nmi(vcpu); |
5701 | req_immediate_exit = | ||
5702 | kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu); | ||
5703 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) | 5739 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) |
5704 | kvm_handle_pmu_event(vcpu); | 5740 | kvm_handle_pmu_event(vcpu); |
5705 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) | 5741 | if (kvm_check_request(KVM_REQ_PMI, vcpu)) |
5706 | kvm_deliver_pmi(vcpu); | 5742 | kvm_deliver_pmi(vcpu); |
5707 | if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) | 5743 | if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) |
5708 | update_eoi_exitmap(vcpu); | 5744 | vcpu_scan_ioapic(vcpu); |
5709 | } | 5745 | } |
5710 | 5746 | ||
5711 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { | 5747 | if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { |
5748 | kvm_apic_accept_events(vcpu); | ||
5749 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | ||
5750 | r = 1; | ||
5751 | goto out; | ||
5752 | } | ||
5753 | |||
5712 | inject_pending_event(vcpu); | 5754 | inject_pending_event(vcpu); |
5713 | 5755 | ||
5714 | /* enable NMI/IRQ window open exits if needed */ | 5756 | /* enable NMI/IRQ window open exits if needed */ |
5715 | if (vcpu->arch.nmi_pending) | 5757 | if (vcpu->arch.nmi_pending) |
5716 | kvm_x86_ops->enable_nmi_window(vcpu); | 5758 | req_immediate_exit = |
5759 | kvm_x86_ops->enable_nmi_window(vcpu) != 0; | ||
5717 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 5760 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5718 | kvm_x86_ops->enable_irq_window(vcpu); | 5761 | req_immediate_exit = |
5762 | kvm_x86_ops->enable_irq_window(vcpu) != 0; | ||
5719 | 5763 | ||
5720 | if (kvm_lapic_enabled(vcpu)) { | 5764 | if (kvm_lapic_enabled(vcpu)) { |
5721 | /* | 5765 | /* |
@@ -5794,7 +5838,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5794 | 5838 | ||
5795 | vcpu->mode = OUTSIDE_GUEST_MODE; | 5839 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5796 | smp_wmb(); | 5840 | smp_wmb(); |
5797 | local_irq_enable(); | 5841 | |
5842 | /* Interrupt is enabled by handle_external_intr() */ | ||
5843 | kvm_x86_ops->handle_external_intr(vcpu); | ||
5798 | 5844 | ||
5799 | ++vcpu->stat.exits; | 5845 | ++vcpu->stat.exits; |
5800 | 5846 | ||
@@ -5843,16 +5889,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5843 | int r; | 5889 | int r; |
5844 | struct kvm *kvm = vcpu->kvm; | 5890 | struct kvm *kvm = vcpu->kvm; |
5845 | 5891 | ||
5846 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | ||
5847 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
5848 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | ||
5849 | kvm_lapic_reset(vcpu); | ||
5850 | r = kvm_vcpu_reset(vcpu); | ||
5851 | if (r) | ||
5852 | return r; | ||
5853 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
5854 | } | ||
5855 | |||
5856 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5892 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
5857 | r = vapic_enter(vcpu); | 5893 | r = vapic_enter(vcpu); |
5858 | if (r) { | 5894 | if (r) { |
@@ -5869,8 +5905,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5869 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 5905 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
5870 | kvm_vcpu_block(vcpu); | 5906 | kvm_vcpu_block(vcpu); |
5871 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 5907 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
5872 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) | 5908 | if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { |
5873 | { | 5909 | kvm_apic_accept_events(vcpu); |
5874 | switch(vcpu->arch.mp_state) { | 5910 | switch(vcpu->arch.mp_state) { |
5875 | case KVM_MP_STATE_HALTED: | 5911 | case KVM_MP_STATE_HALTED: |
5876 | vcpu->arch.mp_state = | 5912 | vcpu->arch.mp_state = |
@@ -5878,7 +5914,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
5878 | case KVM_MP_STATE_RUNNABLE: | 5914 | case KVM_MP_STATE_RUNNABLE: |
5879 | vcpu->arch.apf.halted = false; | 5915 | vcpu->arch.apf.halted = false; |
5880 | break; | 5916 | break; |
5881 | case KVM_MP_STATE_SIPI_RECEIVED: | 5917 | case KVM_MP_STATE_INIT_RECEIVED: |
5918 | break; | ||
5882 | default: | 5919 | default: |
5883 | r = -EINTR; | 5920 | r = -EINTR; |
5884 | break; | 5921 | break; |
@@ -6013,6 +6050,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
6013 | 6050 | ||
6014 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 6051 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
6015 | kvm_vcpu_block(vcpu); | 6052 | kvm_vcpu_block(vcpu); |
6053 | kvm_apic_accept_events(vcpu); | ||
6016 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | 6054 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); |
6017 | r = -EAGAIN; | 6055 | r = -EAGAIN; |
6018 | goto out; | 6056 | goto out; |
@@ -6169,6 +6207,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
6169 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 6207 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
6170 | struct kvm_mp_state *mp_state) | 6208 | struct kvm_mp_state *mp_state) |
6171 | { | 6209 | { |
6210 | kvm_apic_accept_events(vcpu); | ||
6172 | mp_state->mp_state = vcpu->arch.mp_state; | 6211 | mp_state->mp_state = vcpu->arch.mp_state; |
6173 | return 0; | 6212 | return 0; |
6174 | } | 6213 | } |
@@ -6176,7 +6215,15 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | |||
6176 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 6215 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
6177 | struct kvm_mp_state *mp_state) | 6216 | struct kvm_mp_state *mp_state) |
6178 | { | 6217 | { |
6179 | vcpu->arch.mp_state = mp_state->mp_state; | 6218 | if (!kvm_vcpu_has_lapic(vcpu) && |
6219 | mp_state->mp_state != KVM_MP_STATE_RUNNABLE) | ||
6220 | return -EINVAL; | ||
6221 | |||
6222 | if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { | ||
6223 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | ||
6224 | set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); | ||
6225 | } else | ||
6226 | vcpu->arch.mp_state = mp_state->mp_state; | ||
6180 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 6227 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
6181 | return 0; | 6228 | return 0; |
6182 | } | 6229 | } |
@@ -6475,9 +6522,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
6475 | r = vcpu_load(vcpu); | 6522 | r = vcpu_load(vcpu); |
6476 | if (r) | 6523 | if (r) |
6477 | return r; | 6524 | return r; |
6478 | r = kvm_vcpu_reset(vcpu); | 6525 | kvm_vcpu_reset(vcpu); |
6479 | if (r == 0) | 6526 | r = kvm_mmu_setup(vcpu); |
6480 | r = kvm_mmu_setup(vcpu); | ||
6481 | vcpu_put(vcpu); | 6527 | vcpu_put(vcpu); |
6482 | 6528 | ||
6483 | return r; | 6529 | return r; |
@@ -6514,7 +6560,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
6514 | kvm_x86_ops->vcpu_free(vcpu); | 6560 | kvm_x86_ops->vcpu_free(vcpu); |
6515 | } | 6561 | } |
6516 | 6562 | ||
6517 | static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | 6563 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu) |
6518 | { | 6564 | { |
6519 | atomic_set(&vcpu->arch.nmi_queued, 0); | 6565 | atomic_set(&vcpu->arch.nmi_queued, 0); |
6520 | vcpu->arch.nmi_pending = 0; | 6566 | vcpu->arch.nmi_pending = 0; |
@@ -6541,7 +6587,18 @@ static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6541 | vcpu->arch.regs_avail = ~0; | 6587 | vcpu->arch.regs_avail = ~0; |
6542 | vcpu->arch.regs_dirty = ~0; | 6588 | vcpu->arch.regs_dirty = ~0; |
6543 | 6589 | ||
6544 | return kvm_x86_ops->vcpu_reset(vcpu); | 6590 | kvm_x86_ops->vcpu_reset(vcpu); |
6591 | } | ||
6592 | |||
6593 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector) | ||
6594 | { | ||
6595 | struct kvm_segment cs; | ||
6596 | |||
6597 | kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
6598 | cs.selector = vector << 8; | ||
6599 | cs.base = vector << 12; | ||
6600 | kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); | ||
6601 | kvm_rip_write(vcpu, 0); | ||
6545 | } | 6602 | } |
6546 | 6603 | ||
6547 | int kvm_arch_hardware_enable(void *garbage) | 6604 | int kvm_arch_hardware_enable(void *garbage) |
@@ -6706,8 +6763,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
6706 | } | 6763 | } |
6707 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; | 6764 | vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; |
6708 | 6765 | ||
6709 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) | 6766 | if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { |
6767 | r = -ENOMEM; | ||
6710 | goto fail_free_mce_banks; | 6768 | goto fail_free_mce_banks; |
6769 | } | ||
6711 | 6770 | ||
6712 | r = fx_init(vcpu); | 6771 | r = fx_init(vcpu); |
6713 | if (r) | 6772 | if (r) |
@@ -6811,6 +6870,23 @@ void kvm_arch_sync_events(struct kvm *kvm) | |||
6811 | 6870 | ||
6812 | void kvm_arch_destroy_vm(struct kvm *kvm) | 6871 | void kvm_arch_destroy_vm(struct kvm *kvm) |
6813 | { | 6872 | { |
6873 | if (current->mm == kvm->mm) { | ||
6874 | /* | ||
6875 | * Free memory regions allocated on behalf of userspace, | ||
6876 | * unless the the memory map has changed due to process exit | ||
6877 | * or fd copying. | ||
6878 | */ | ||
6879 | struct kvm_userspace_memory_region mem; | ||
6880 | memset(&mem, 0, sizeof(mem)); | ||
6881 | mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | ||
6882 | kvm_set_memory_region(kvm, &mem); | ||
6883 | |||
6884 | mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | ||
6885 | kvm_set_memory_region(kvm, &mem); | ||
6886 | |||
6887 | mem.slot = TSS_PRIVATE_MEMSLOT; | ||
6888 | kvm_set_memory_region(kvm, &mem); | ||
6889 | } | ||
6814 | kvm_iommu_unmap_guest(kvm); | 6890 | kvm_iommu_unmap_guest(kvm); |
6815 | kfree(kvm->arch.vpic); | 6891 | kfree(kvm->arch.vpic); |
6816 | kfree(kvm->arch.vioapic); | 6892 | kfree(kvm->arch.vioapic); |
@@ -6903,24 +6979,21 @@ out_free: | |||
6903 | 6979 | ||
6904 | int kvm_arch_prepare_memory_region(struct kvm *kvm, | 6980 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
6905 | struct kvm_memory_slot *memslot, | 6981 | struct kvm_memory_slot *memslot, |
6906 | struct kvm_memory_slot old, | ||
6907 | struct kvm_userspace_memory_region *mem, | 6982 | struct kvm_userspace_memory_region *mem, |
6908 | bool user_alloc) | 6983 | enum kvm_mr_change change) |
6909 | { | 6984 | { |
6910 | int npages = memslot->npages; | ||
6911 | |||
6912 | /* | 6985 | /* |
6913 | * Only private memory slots need to be mapped here since | 6986 | * Only private memory slots need to be mapped here since |
6914 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. | 6987 | * KVM_SET_MEMORY_REGION ioctl is no longer supported. |
6915 | */ | 6988 | */ |
6916 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { | 6989 | if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { |
6917 | unsigned long userspace_addr; | 6990 | unsigned long userspace_addr; |
6918 | 6991 | ||
6919 | /* | 6992 | /* |
6920 | * MAP_SHARED to prevent internal slot pages from being moved | 6993 | * MAP_SHARED to prevent internal slot pages from being moved |
6921 | * by fork()/COW. | 6994 | * by fork()/COW. |
6922 | */ | 6995 | */ |
6923 | userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, | 6996 | userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, |
6924 | PROT_READ | PROT_WRITE, | 6997 | PROT_READ | PROT_WRITE, |
6925 | MAP_SHARED | MAP_ANONYMOUS, 0); | 6998 | MAP_SHARED | MAP_ANONYMOUS, 0); |
6926 | 6999 | ||
@@ -6935,17 +7008,17 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6935 | 7008 | ||
6936 | void kvm_arch_commit_memory_region(struct kvm *kvm, | 7009 | void kvm_arch_commit_memory_region(struct kvm *kvm, |
6937 | struct kvm_userspace_memory_region *mem, | 7010 | struct kvm_userspace_memory_region *mem, |
6938 | struct kvm_memory_slot old, | 7011 | const struct kvm_memory_slot *old, |
6939 | bool user_alloc) | 7012 | enum kvm_mr_change change) |
6940 | { | 7013 | { |
6941 | 7014 | ||
6942 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; | 7015 | int nr_mmu_pages = 0; |
6943 | 7016 | ||
6944 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { | 7017 | if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) { |
6945 | int ret; | 7018 | int ret; |
6946 | 7019 | ||
6947 | ret = vm_munmap(old.userspace_addr, | 7020 | ret = vm_munmap(old->userspace_addr, |
6948 | old.npages * PAGE_SIZE); | 7021 | old->npages * PAGE_SIZE); |
6949 | if (ret < 0) | 7022 | if (ret < 0) |
6950 | printk(KERN_WARNING | 7023 | printk(KERN_WARNING |
6951 | "kvm_vm_ioctl_set_memory_region: " | 7024 | "kvm_vm_ioctl_set_memory_region: " |
@@ -6962,14 +7035,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6962 | * Existing largepage mappings are destroyed here and new ones will | 7035 | * Existing largepage mappings are destroyed here and new ones will |
6963 | * not be created until the end of the logging. | 7036 | * not be created until the end of the logging. |
6964 | */ | 7037 | */ |
6965 | if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | 7038 | if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) |
6966 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 7039 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6967 | /* | 7040 | /* |
6968 | * If memory slot is created, or moved, we need to clear all | 7041 | * If memory slot is created, or moved, we need to clear all |
6969 | * mmio sptes. | 7042 | * mmio sptes. |
6970 | */ | 7043 | */ |
6971 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { | 7044 | if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { |
6972 | kvm_mmu_zap_all(kvm); | 7045 | kvm_mmu_zap_mmio_sptes(kvm); |
6973 | kvm_reload_remote_mmus(kvm); | 7046 | kvm_reload_remote_mmus(kvm); |
6974 | } | 7047 | } |
6975 | } | 7048 | } |
@@ -6991,7 +7064,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
6991 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7064 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
6992 | !vcpu->arch.apf.halted) | 7065 | !vcpu->arch.apf.halted) |
6993 | || !list_empty_careful(&vcpu->async_pf.done) | 7066 | || !list_empty_careful(&vcpu->async_pf.done) |
6994 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED | 7067 | || kvm_apic_has_events(vcpu) |
6995 | || atomic_read(&vcpu->arch.nmi_queued) || | 7068 | || atomic_read(&vcpu->arch.nmi_queued) || |
6996 | (kvm_arch_interrupt_allowed(vcpu) && | 7069 | (kvm_arch_interrupt_allowed(vcpu) && |
6997 | kvm_cpu_has_interrupt(vcpu)); | 7070 | kvm_cpu_has_interrupt(vcpu)); |