aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-01-23 07:39:51 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-01-23 07:39:51 -0500
commit1c6007d59a20762052cc92c0a2889ff11030d23a (patch)
tree40bd72fe4e4d38a811312e5ae35bafd04c995d40
parentc6156df9d32141e5f1abb43078c56f2e5a0cb294 (diff)
parent4b990589952f0e30aa860184ac6c76219a74632e (diff)
Merge tag 'kvm-arm-for-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-next
KVM/ARM changes for v3.20 including GICv3 emulation, dirty page logging, added trace symbols, and adding an explicit VGIC init device control IOCTL. Conflicts: arch/arm64/include/asm/kvm_arm.h arch/arm64/kvm/handle_exit.c
-rw-r--r--Documentation/virtual/kvm/api.txt13
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt37
-rw-r--r--arch/arm/include/asm/kvm_asm.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/kvm_mmio.h1
-rw-r--r--arch/arm/include/asm/kvm_mmu.h21
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/Kconfig2
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/arm.c55
-rw-r--r--arch/arm/kvm/handle_exit.c8
-rw-r--r--arch/arm/kvm/interrupts.S11
-rw-r--r--arch/arm/kvm/mmu.c257
-rw-r--r--arch/arm/kvm/psci.c17
-rw-r--r--arch/arm/kvm/trace.h11
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h10
-rw-r--r--arch/arm64/include/asm/kvm_host.h6
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h21
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h9
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kvm/Kconfig2
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/handle_exit.c13
-rw-r--r--arch/arm64/kvm/hyp.S22
-rw-r--r--arch/arm64/kvm/sys_regs.c40
-rw-r--r--arch/arm64/kvm/trace.h55
-rw-r--r--arch/arm64/kvm/vgic-v3-switch.S14
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/x86.c72
-rw-r--r--drivers/irqchip/irq-gic-v3.c14
-rw-r--r--include/kvm/arm_vgic.h43
-rw-r--r--include/linux/irqchip/arm-gic-v3.h44
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--include/uapi/linux/kvm.h2
-rw-r--r--virt/kvm/Kconfig6
-rw-r--r--virt/kvm/arm/vgic-v2-emul.c847
-rw-r--r--virt/kvm/arm/vgic-v2.c4
-rw-r--r--virt/kvm/arm/vgic-v3-emul.c1036
-rw-r--r--virt/kvm/arm/vgic-v3.c82
-rw-r--r--virt/kvm/arm/vgic.c1127
-rw-r--r--virt/kvm/arm/vgic.h123
-rw-r--r--virt/kvm/kvm_main.c82
50 files changed, 3152 insertions, 996 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0007fef4ed81..f4b19d78782b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -612,11 +612,14 @@ Type: vm ioctl
612Parameters: none 612Parameters: none
613Returns: 0 on success, -1 on error 613Returns: 0 on success, -1 on error
614 614
615Creates an interrupt controller model in the kernel. On x86, creates a virtual 615Creates an interrupt controller model in the kernel.
616ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a 616On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
617local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 617future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both
618only go to the IOAPIC. On ARM/arm64, a GIC is 618PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
619created. On s390, a dummy irq routing table is created. 619On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
620KVM_CREATE_DEVICE, which also supports creating a GICv2. Using
621KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
622On s390, a dummy irq routing table is created.
620 623
621Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled 624Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
622before KVM_CREATE_IRQCHIP can be used. 625before KVM_CREATE_IRQCHIP can be used.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index df8b0c7540b6..3fb905429e8a 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -3,22 +3,42 @@ ARM Virtual Generic Interrupt Controller (VGIC)
3 3
4Device types supported: 4Device types supported:
5 KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 5 KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
6 KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
6 7
7Only one VGIC instance may be instantiated through either this API or the 8Only one VGIC instance may be instantiated through either this API or the
8legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt 9legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
9controller, requiring emulated user-space devices to inject interrupts to the 10controller, requiring emulated user-space devices to inject interrupts to the
10VGIC instead of directly to CPUs. 11VGIC instead of directly to CPUs.
11 12
13Creating a guest GICv3 device requires a host GICv3 as well.
14GICv3 implementations with hardware compatibility support allow a guest GICv2
15as well.
16
12Groups: 17Groups:
13 KVM_DEV_ARM_VGIC_GRP_ADDR 18 KVM_DEV_ARM_VGIC_GRP_ADDR
14 Attributes: 19 Attributes:
15 KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) 20 KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
16 Base address in the guest physical address space of the GIC distributor 21 Base address in the guest physical address space of the GIC distributor
17 register mappings. 22 register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
23 This address needs to be 4K aligned and the region covers 4 KByte.
18 24
19 KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) 25 KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
20 Base address in the guest physical address space of the GIC virtual cpu 26 Base address in the guest physical address space of the GIC virtual cpu
21 interface register mappings. 27 interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
28 This address needs to be 4K aligned and the region covers 4 KByte.
29
30 KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
31 Base address in the guest physical address space of the GICv3 distributor
32 register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
33 This address needs to be 64K aligned and the region covers 64 KByte.
34
35 KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
36 Base address in the guest physical address space of the GICv3
37 redistributor register mappings. There are two 64K pages for each
38 VCPU and all of the redistributor pages are contiguous.
39 Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
40 This address needs to be 64K aligned.
41
22 42
23 KVM_DEV_ARM_VGIC_GRP_DIST_REGS 43 KVM_DEV_ARM_VGIC_GRP_DIST_REGS
24 Attributes: 44 Attributes:
@@ -36,6 +56,7 @@ Groups:
36 the register. 56 the register.
37 Limitations: 57 Limitations:
38 - Priorities are not implemented, and registers are RAZ/WI 58 - Priorities are not implemented, and registers are RAZ/WI
59 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
39 Errors: 60 Errors:
40 -ENODEV: Getting or setting this register is not yet supported 61 -ENODEV: Getting or setting this register is not yet supported
41 -EBUSY: One or more VCPUs are running 62 -EBUSY: One or more VCPUs are running
@@ -68,6 +89,7 @@ Groups:
68 89
69 Limitations: 90 Limitations:
70 - Priorities are not implemented, and registers are RAZ/WI 91 - Priorities are not implemented, and registers are RAZ/WI
92 - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
71 Errors: 93 Errors:
72 -ENODEV: Getting or setting this register is not yet supported 94 -ENODEV: Getting or setting this register is not yet supported
73 -EBUSY: One or more VCPUs are running 95 -EBUSY: One or more VCPUs are running
@@ -81,3 +103,14 @@ Groups:
81 -EINVAL: Value set is out of the expected range 103 -EINVAL: Value set is out of the expected range
82 -EBUSY: Value has already be set, or GIC has already been initialized 104 -EBUSY: Value has already be set, or GIC has already been initialized
83 with default values. 105 with default values.
106
107 KVM_DEV_ARM_VGIC_GRP_CTRL
108 Attributes:
109 KVM_DEV_ARM_VGIC_CTRL_INIT
110 request the initialization of the VGIC, no additional parameter in
111 kvm_device_attr.addr.
112 Errors:
113 -ENXIO: VGIC not properly configured as required prior to calling
114 this attribute
115 -ENODEV: no online VCPU
116 -ENOMEM: memory shortage when allocating vgic internal data
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 3a67bec72d0c..25410b2d8bc1 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -96,6 +96,7 @@ extern char __kvm_hyp_code_end[];
96 96
97extern void __kvm_flush_vm_context(void); 97extern void __kvm_flush_vm_context(void);
98extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 98extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
99extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
99 100
100extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 101extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
101#endif 102#endif
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 66ce17655bb9..c52861577567 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@
23#include <asm/kvm_asm.h> 23#include <asm/kvm_asm.h>
24#include <asm/kvm_mmio.h> 24#include <asm/kvm_mmio.h>
25#include <asm/kvm_arm.h> 25#include <asm/kvm_arm.h>
26#include <asm/cputype.h>
26 27
27unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); 28unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
28unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu); 29unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
@@ -167,9 +168,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
167 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; 168 return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
168} 169}
169 170
170static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 171static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
171{ 172{
172 return vcpu->arch.cp15[c0_MPIDR]; 173 return vcpu->arch.cp15[c0_MPIDR] & MPIDR_HWID_BITMASK;
173} 174}
174 175
175static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 176static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 254e0650e48b..bde494654bcc 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -68,6 +68,7 @@ struct kvm_arch {
68 68
69 /* Interrupt controller */ 69 /* Interrupt controller */
70 struct vgic_dist vgic; 70 struct vgic_dist vgic;
71 int max_vcpus;
71}; 72};
72 73
73#define KVM_NR_MEM_OBJS 40 74#define KVM_NR_MEM_OBJS 40
@@ -234,6 +235,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
234int kvm_perf_init(void); 235int kvm_perf_init(void);
235int kvm_perf_teardown(void); 236int kvm_perf_teardown(void);
236 237
238void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
239
240struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
241
237static inline void kvm_arch_hardware_disable(void) {} 242static inline void kvm_arch_hardware_disable(void) {}
238static inline void kvm_arch_hardware_unsetup(void) {} 243static inline void kvm_arch_hardware_unsetup(void) {}
239static inline void kvm_arch_sync_events(struct kvm *kvm) {} 244static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index adcc0d7d3175..3f83db2f6cf0 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -37,6 +37,7 @@ struct kvm_exit_mmio {
37 u8 data[8]; 37 u8 data[8];
38 u32 len; 38 u32 len;
39 bool is_write; 39 bool is_write;
40 void *private;
40}; 41};
41 42
42static inline void kvm_prepare_mmio(struct kvm_run *run, 43static inline void kvm_prepare_mmio(struct kvm_run *run,
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 63e0ecc04901..2672cf84afd1 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,6 +114,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
114 pmd_val(*pmd) |= L_PMD_S2_RDWR; 114 pmd_val(*pmd) |= L_PMD_S2_RDWR;
115} 115}
116 116
117static inline void kvm_set_s2pte_readonly(pte_t *pte)
118{
119 pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
120}
121
122static inline bool kvm_s2pte_readonly(pte_t *pte)
123{
124 return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
125}
126
127static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
128{
129 pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
130}
131
132static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
133{
134 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
135}
136
137
117/* Open coded p*d_addr_end that can deal with 64bit addresses */ 138/* Open coded p*d_addr_end that can deal with 64bit addresses */
118#define kvm_pgd_addr_end(addr, end) \ 139#define kvm_pgd_addr_end(addr, end) \
119({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 140({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index a31ecdad4b59..ae1d30a1aaae 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -130,6 +130,7 @@
130#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ 130#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
131#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 131#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
132 132
133#define L_PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[1] */
133#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 134#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
134 135
135/* 136/*
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 09ee408c1a67..0db25bc32864 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -175,6 +175,8 @@ struct kvm_arch_memory_slot {
175#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 175#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
176#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 176#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
177#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 177#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
178#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
179#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
178 180
179/* KVM_IRQ_LINE irq field index values */ 181/* KVM_IRQ_LINE irq field index values */
180#define KVM_ARM_IRQ_TYPE_SHIFT 24 182#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd299b1a8..a8d1ace3ea51 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,8 +21,10 @@ config KVM
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select HAVE_KVM_CPU_RELAX_INTERCEPT 23 select HAVE_KVM_CPU_RELAX_INTERCEPT
24 select HAVE_KVM_ARCH_TLB_FLUSH_ALL
24 select KVM_MMIO 25 select KVM_MMIO
25 select KVM_ARM_HOST 26 select KVM_ARM_HOST
27 select KVM_GENERIC_DIRTYLOG_READ_PROTECT
26 depends on ARM_VIRT_EXT && ARM_LPAE 28 depends on ARM_VIRT_EXT && ARM_LPAE
27 ---help--- 29 ---help---
28 Support hosting virtualized guest machines. You will also 30 Support hosting virtualized guest machines. You will also
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f7057ed045b6..443b8bea43e9 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -22,4 +22,5 @@ obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o 22obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 23obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
24obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o 24obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
25obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
25obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o 26obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2d6d91001062..6fbfa5fff05d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -132,6 +132,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
132 /* Mark the initial VMID generation invalid */ 132 /* Mark the initial VMID generation invalid */
133 kvm->arch.vmid_gen = 0; 133 kvm->arch.vmid_gen = 0;
134 134
135 /* The maximum number of VCPUs is limited by the host's GIC model */
136 kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
137
135 return ret; 138 return ret;
136out_free_stage2_pgd: 139out_free_stage2_pgd:
137 kvm_free_stage2_pgd(kvm); 140 kvm_free_stage2_pgd(kvm);
@@ -218,6 +221,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
218 goto out; 221 goto out;
219 } 222 }
220 223
224 if (id >= kvm->arch.max_vcpus) {
225 err = -EINVAL;
226 goto out;
227 }
228
221 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 229 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
222 if (!vcpu) { 230 if (!vcpu) {
223 err = -ENOMEM; 231 err = -ENOMEM;
@@ -787,9 +795,39 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
787 } 795 }
788} 796}
789 797
798/**
799 * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot
800 * @kvm: kvm instance
801 * @log: slot id and address to which we copy the log
802 *
803 * Steps 1-4 below provide general overview of dirty page logging. See
804 * kvm_get_dirty_log_protect() function description for additional details.
805 *
806 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
807 * always flush the TLB (step 4) even if previous step failed and the dirty
808 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
809 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
810 * writes will be marked dirty for next log read.
811 *
812 * 1. Take a snapshot of the bit and clear it if needed.
813 * 2. Write protect the corresponding page.
814 * 3. Copy the snapshot to the userspace.
815 * 4. Flush TLB's if needed.
816 */
790int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 817int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
791{ 818{
792 return -EINVAL; 819 bool is_dirty = false;
820 int r;
821
822 mutex_lock(&kvm->slots_lock);
823
824 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
825
826 if (is_dirty)
827 kvm_flush_remote_tlbs(kvm);
828
829 mutex_unlock(&kvm->slots_lock);
830 return r;
793} 831}
794 832
795static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, 833static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
@@ -821,7 +859,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
821 switch (ioctl) { 859 switch (ioctl) {
822 case KVM_CREATE_IRQCHIP: { 860 case KVM_CREATE_IRQCHIP: {
823 if (vgic_present) 861 if (vgic_present)
824 return kvm_vgic_create(kvm); 862 return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
825 else 863 else
826 return -ENXIO; 864 return -ENXIO;
827 } 865 }
@@ -1045,6 +1083,19 @@ static void check_kvm_target_cpu(void *ret)
1045 *(int *)ret = kvm_target_cpu(); 1083 *(int *)ret = kvm_target_cpu();
1046} 1084}
1047 1085
1086struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
1087{
1088 struct kvm_vcpu *vcpu;
1089 int i;
1090
1091 mpidr &= MPIDR_HWID_BITMASK;
1092 kvm_for_each_vcpu(i, vcpu, kvm) {
1093 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
1094 return vcpu;
1095 }
1096 return NULL;
1097}
1098
1048/** 1099/**
1049 * Initialize Hyp-mode and memory mappings on all CPUs. 1100 * Initialize Hyp-mode and memory mappings on all CPUs.
1050 */ 1101 */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a96a8043277c..95f12b2ccdcb 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -87,11 +87,13 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
87 */ 87 */
88static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 88static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
89{ 89{
90 trace_kvm_wfi(*vcpu_pc(vcpu)); 90 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
91 if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) 91 trace_kvm_wfx(*vcpu_pc(vcpu), true);
92 kvm_vcpu_on_spin(vcpu); 92 kvm_vcpu_on_spin(vcpu);
93 else 93 } else {
94 trace_kvm_wfx(*vcpu_pc(vcpu), false);
94 kvm_vcpu_block(vcpu); 95 kvm_vcpu_block(vcpu);
96 }
95 97
96 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 98 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
97 99
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 01dcb0e752d9..79caf79b304a 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -66,6 +66,17 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
66 bx lr 66 bx lr
67ENDPROC(__kvm_tlb_flush_vmid_ipa) 67ENDPROC(__kvm_tlb_flush_vmid_ipa)
68 68
69/**
70 * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
71 *
72 * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address
73 * parameter
74 */
75
76ENTRY(__kvm_tlb_flush_vmid)
77 b __kvm_tlb_flush_vmid_ipa
78ENDPROC(__kvm_tlb_flush_vmid)
79
69/******************************************************************** 80/********************************************************************
70 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable 81 * Flush TLBs and instruction caches of all CPUs inside the inner-shareable
71 * domain, for all VMIDs 82 * domain, for all VMIDs
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 1dc9778a00af..74aeabaa3c4d 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -45,6 +45,26 @@ static phys_addr_t hyp_idmap_vector;
45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 45#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
46 46
47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 47#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
48#define kvm_pud_huge(_x) pud_huge(_x)
49
50#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
51#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
52
53static bool memslot_is_logging(struct kvm_memory_slot *memslot)
54{
55 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
56}
57
58/**
59 * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
60 * @kvm: pointer to kvm structure.
61 *
62 * Interface to HYP function to flush all VM TLB entries
63 */
64void kvm_flush_remote_tlbs(struct kvm *kvm)
65{
66 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
67}
48 68
49static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 69static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
50{ 70{
@@ -58,6 +78,25 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
58 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 78 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
59} 79}
60 80
81/**
82 * stage2_dissolve_pmd() - clear and flush huge PMD entry
83 * @kvm: pointer to kvm structure.
84 * @addr: IPA
85 * @pmd: pmd pointer for IPA
86 *
87 * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all
88 * pages in the range dirty.
89 */
90static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
91{
92 if (!kvm_pmd_huge(*pmd))
93 return;
94
95 pmd_clear(pmd);
96 kvm_tlb_flush_vmid_ipa(kvm, addr);
97 put_page(virt_to_page(pmd));
98}
99
61static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, 100static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
62 int min, int max) 101 int min, int max)
63{ 102{
@@ -767,10 +806,15 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
767} 806}
768 807
769static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 808static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
770 phys_addr_t addr, const pte_t *new_pte, bool iomap) 809 phys_addr_t addr, const pte_t *new_pte,
810 unsigned long flags)
771{ 811{
772 pmd_t *pmd; 812 pmd_t *pmd;
773 pte_t *pte, old_pte; 813 pte_t *pte, old_pte;
814 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
815 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
816
817 VM_BUG_ON(logging_active && !cache);
774 818
775 /* Create stage-2 page table mapping - Levels 0 and 1 */ 819 /* Create stage-2 page table mapping - Levels 0 and 1 */
776 pmd = stage2_get_pmd(kvm, cache, addr); 820 pmd = stage2_get_pmd(kvm, cache, addr);
@@ -782,6 +826,13 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
782 return 0; 826 return 0;
783 } 827 }
784 828
829 /*
830 * While dirty page logging - dissolve huge PMD, then continue on to
831 * allocate page.
832 */
833 if (logging_active)
834 stage2_dissolve_pmd(kvm, addr, pmd);
835
785 /* Create stage-2 page mappings - Level 2 */ 836 /* Create stage-2 page mappings - Level 2 */
786 if (pmd_none(*pmd)) { 837 if (pmd_none(*pmd)) {
787 if (!cache) 838 if (!cache)
@@ -838,7 +889,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
838 if (ret) 889 if (ret)
839 goto out; 890 goto out;
840 spin_lock(&kvm->mmu_lock); 891 spin_lock(&kvm->mmu_lock);
841 ret = stage2_set_pte(kvm, &cache, addr, &pte, true); 892 ret = stage2_set_pte(kvm, &cache, addr, &pte,
893 KVM_S2PTE_FLAG_IS_IOMAP);
842 spin_unlock(&kvm->mmu_lock); 894 spin_unlock(&kvm->mmu_lock);
843 if (ret) 895 if (ret)
844 goto out; 896 goto out;
@@ -905,6 +957,151 @@ static bool kvm_is_device_pfn(unsigned long pfn)
905 return !pfn_valid(pfn); 957 return !pfn_valid(pfn);
906} 958}
907 959
960/**
961 * stage2_wp_ptes - write protect PMD range
962 * @pmd: pointer to pmd entry
963 * @addr: range start address
964 * @end: range end address
965 */
966static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
967{
968 pte_t *pte;
969
970 pte = pte_offset_kernel(pmd, addr);
971 do {
972 if (!pte_none(*pte)) {
973 if (!kvm_s2pte_readonly(pte))
974 kvm_set_s2pte_readonly(pte);
975 }
976 } while (pte++, addr += PAGE_SIZE, addr != end);
977}
978
979/**
980 * stage2_wp_pmds - write protect PUD range
981 * @pud: pointer to pud entry
982 * @addr: range start address
983 * @end: range end address
984 */
985static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
986{
987 pmd_t *pmd;
988 phys_addr_t next;
989
990 pmd = pmd_offset(pud, addr);
991
992 do {
993 next = kvm_pmd_addr_end(addr, end);
994 if (!pmd_none(*pmd)) {
995 if (kvm_pmd_huge(*pmd)) {
996 if (!kvm_s2pmd_readonly(pmd))
997 kvm_set_s2pmd_readonly(pmd);
998 } else {
999 stage2_wp_ptes(pmd, addr, next);
1000 }
1001 }
1002 } while (pmd++, addr = next, addr != end);
1003}
1004
1005/**
1006 * stage2_wp_puds - write protect PGD range
1007 * @pgd: pointer to pgd entry
1008 * @addr: range start address
1009 * @end: range end address
1010 *
1011 * Process PUD entries, for a huge PUD we cause a panic.
1012 */
1013static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1014{
1015 pud_t *pud;
1016 phys_addr_t next;
1017
1018 pud = pud_offset(pgd, addr);
1019 do {
1020 next = kvm_pud_addr_end(addr, end);
1021 if (!pud_none(*pud)) {
1022 /* TODO:PUD not supported, revisit later if supported */
1023 BUG_ON(kvm_pud_huge(*pud));
1024 stage2_wp_pmds(pud, addr, next);
1025 }
1026 } while (pud++, addr = next, addr != end);
1027}
1028
1029/**
1030 * stage2_wp_range() - write protect stage2 memory region range
1031 * @kvm: The KVM pointer
1032 * @addr: Start address of range
1033 * @end: End address of range
1034 */
1035static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1036{
1037 pgd_t *pgd;
1038 phys_addr_t next;
1039
1040 pgd = kvm->arch.pgd + pgd_index(addr);
1041 do {
1042 /*
1043 * Release kvm_mmu_lock periodically if the memory region is
1044 * large. Otherwise, we may see kernel panics with
1045 * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR,
1046 * CONFIG_LOCKDEP. Additionally, holding the lock too long
1047 * will also starve other vCPUs.
1048 */
1049 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1050 cond_resched_lock(&kvm->mmu_lock);
1051
1052 next = kvm_pgd_addr_end(addr, end);
1053 if (pgd_present(*pgd))
1054 stage2_wp_puds(pgd, addr, next);
1055 } while (pgd++, addr = next, addr != end);
1056}
1057
1058/**
1059 * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot
1060 * @kvm: The KVM pointer
1061 * @slot: The memory slot to write protect
1062 *
1063 * Called to start logging dirty pages after memory region
1064 * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns
1065 * all present PMD and PTEs are write protected in the memory region.
1066 * Afterwards read of dirty page log can be called.
1067 *
1068 * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
1069 * serializing operations for VM memory regions.
1070 */
1071void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1072{
1073 struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot);
1074 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1075 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1076
1077 spin_lock(&kvm->mmu_lock);
1078 stage2_wp_range(kvm, start, end);
1079 spin_unlock(&kvm->mmu_lock);
1080 kvm_flush_remote_tlbs(kvm);
1081}
1082
1083/**
1084 * kvm_arch_mmu_write_protect_pt_masked() - write protect dirty pages
1085 * @kvm: The KVM pointer
1086 * @slot: The memory slot associated with mask
1087 * @gfn_offset: The gfn offset in memory slot
1088 * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
1089 * slot to be write protected
1090 *
1091 * Walks bits set in mask write protects the associated pte's. Caller must
1092 * acquire kvm_mmu_lock.
1093 */
1094void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm,
1095 struct kvm_memory_slot *slot,
1096 gfn_t gfn_offset, unsigned long mask)
1097{
1098 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1099 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1100 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1101
1102 stage2_wp_range(kvm, start, end);
1103}
1104
908static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, 1105static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
909 struct kvm_memory_slot *memslot, unsigned long hva, 1106 struct kvm_memory_slot *memslot, unsigned long hva,
910 unsigned long fault_status) 1107 unsigned long fault_status)
@@ -919,6 +1116,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
919 pfn_t pfn; 1116 pfn_t pfn;
920 pgprot_t mem_type = PAGE_S2; 1117 pgprot_t mem_type = PAGE_S2;
921 bool fault_ipa_uncached; 1118 bool fault_ipa_uncached;
1119 bool logging_active = memslot_is_logging(memslot);
1120 unsigned long flags = 0;
922 1121
923 write_fault = kvm_is_write_fault(vcpu); 1122 write_fault = kvm_is_write_fault(vcpu);
924 if (fault_status == FSC_PERM && !write_fault) { 1123 if (fault_status == FSC_PERM && !write_fault) {
@@ -935,7 +1134,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
935 return -EFAULT; 1134 return -EFAULT;
936 } 1135 }
937 1136
938 if (is_vm_hugetlb_page(vma)) { 1137 if (is_vm_hugetlb_page(vma) && !logging_active) {
939 hugetlb = true; 1138 hugetlb = true;
940 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; 1139 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
941 } else { 1140 } else {
@@ -976,12 +1175,30 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
976 if (is_error_pfn(pfn)) 1175 if (is_error_pfn(pfn))
977 return -EFAULT; 1176 return -EFAULT;
978 1177
979 if (kvm_is_device_pfn(pfn)) 1178 if (kvm_is_device_pfn(pfn)) {
980 mem_type = PAGE_S2_DEVICE; 1179 mem_type = PAGE_S2_DEVICE;
1180 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1181 } else if (logging_active) {
1182 /*
1183 * Faults on pages in a memslot with logging enabled
1184 * should not be mapped with huge pages (it introduces churn
1185 * and performance degradation), so force a pte mapping.
1186 */
1187 force_pte = true;
1188 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1189
1190 /*
1191 * Only actually map the page as writable if this was a write
1192 * fault.
1193 */
1194 if (!write_fault)
1195 writable = false;
1196 }
981 1197
982 spin_lock(&kvm->mmu_lock); 1198 spin_lock(&kvm->mmu_lock);
983 if (mmu_notifier_retry(kvm, mmu_seq)) 1199 if (mmu_notifier_retry(kvm, mmu_seq))
984 goto out_unlock; 1200 goto out_unlock;
1201
985 if (!hugetlb && !force_pte) 1202 if (!hugetlb && !force_pte)
986 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa); 1203 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
987 1204
@@ -999,17 +1216,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
999 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 1216 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1000 } else { 1217 } else {
1001 pte_t new_pte = pfn_pte(pfn, mem_type); 1218 pte_t new_pte = pfn_pte(pfn, mem_type);
1219
1002 if (writable) { 1220 if (writable) {
1003 kvm_set_s2pte_writable(&new_pte); 1221 kvm_set_s2pte_writable(&new_pte);
1004 kvm_set_pfn_dirty(pfn); 1222 kvm_set_pfn_dirty(pfn);
1223 mark_page_dirty(kvm, gfn);
1005 } 1224 }
1006 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, 1225 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
1007 fault_ipa_uncached); 1226 fault_ipa_uncached);
1008 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 1227 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1009 pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
1010 } 1228 }
1011 1229
1012
1013out_unlock: 1230out_unlock:
1014 spin_unlock(&kvm->mmu_lock); 1231 spin_unlock(&kvm->mmu_lock);
1015 kvm_release_pfn_clean(pfn); 1232 kvm_release_pfn_clean(pfn);
@@ -1159,7 +1376,14 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
1159{ 1376{
1160 pte_t *pte = (pte_t *)data; 1377 pte_t *pte = (pte_t *)data;
1161 1378
1162 stage2_set_pte(kvm, NULL, gpa, pte, false); 1379 /*
1380 * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE
1381 * flag clear because MMU notifiers will have unmapped a huge PMD before
1382 * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and
1383 * therefore stage2_set_pte() never needs to clear out a huge PMD
1384 * through this calling path.
1385 */
1386 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1163} 1387}
1164 1388
1165 1389
@@ -1292,6 +1516,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
1292 const struct kvm_memory_slot *old, 1516 const struct kvm_memory_slot *old,
1293 enum kvm_mr_change change) 1517 enum kvm_mr_change change)
1294{ 1518{
1519 /*
1520 * At this point memslot has been committed and there is an
1521 * allocated dirty_bitmap[], dirty pages will be be tracked while the
1522 * memory slot is write protected.
1523 */
1524 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1525 kvm_mmu_wp_memory_region(kvm, mem->slot);
1295} 1526}
1296 1527
1297int kvm_arch_prepare_memory_region(struct kvm *kvm, 1528int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1304,7 +1535,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1304 bool writable = !(mem->flags & KVM_MEM_READONLY); 1535 bool writable = !(mem->flags & KVM_MEM_READONLY);
1305 int ret = 0; 1536 int ret = 0;
1306 1537
1307 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) 1538 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1539 change != KVM_MR_FLAGS_ONLY)
1308 return 0; 1540 return 0;
1309 1541
1310 /* 1542 /*
@@ -1355,6 +1587,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1355 phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + 1587 phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
1356 vm_start - vma->vm_start; 1588 vm_start - vma->vm_start;
1357 1589
1590 /* IO region dirty page logging not allowed */
1591 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
1592 return -EINVAL;
1593
1358 ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 1594 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1359 vm_end - vm_start, 1595 vm_end - vm_start,
1360 writable); 1596 writable);
@@ -1364,6 +1600,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
1364 hva = vm_end; 1600 hva = vm_end;
1365 } while (hva < reg_end); 1601 } while (hva < reg_end);
1366 1602
1603 if (change == KVM_MR_FLAGS_ONLY)
1604 return ret;
1605
1367 spin_lock(&kvm->mmu_lock); 1606 spin_lock(&kvm->mmu_lock);
1368 if (ret) 1607 if (ret)
1369 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1608 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 58cb3248d277..02fa8eff6ae1 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -22,6 +22,7 @@
22#include <asm/cputype.h> 22#include <asm/cputype.h>
23#include <asm/kvm_emulate.h> 23#include <asm/kvm_emulate.h>
24#include <asm/kvm_psci.h> 24#include <asm/kvm_psci.h>
25#include <asm/kvm_host.h>
25 26
26/* 27/*
27 * This is an implementation of the Power State Coordination Interface 28 * This is an implementation of the Power State Coordination Interface
@@ -66,25 +67,17 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
66static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) 67static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
67{ 68{
68 struct kvm *kvm = source_vcpu->kvm; 69 struct kvm *kvm = source_vcpu->kvm;
69 struct kvm_vcpu *vcpu = NULL, *tmp; 70 struct kvm_vcpu *vcpu = NULL;
70 wait_queue_head_t *wq; 71 wait_queue_head_t *wq;
71 unsigned long cpu_id; 72 unsigned long cpu_id;
72 unsigned long context_id; 73 unsigned long context_id;
73 unsigned long mpidr;
74 phys_addr_t target_pc; 74 phys_addr_t target_pc;
75 int i;
76 75
77 cpu_id = *vcpu_reg(source_vcpu, 1); 76 cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
78 if (vcpu_mode_is_32bit(source_vcpu)) 77 if (vcpu_mode_is_32bit(source_vcpu))
79 cpu_id &= ~((u32) 0); 78 cpu_id &= ~((u32) 0);
80 79
81 kvm_for_each_vcpu(i, tmp, kvm) { 80 vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
82 mpidr = kvm_vcpu_get_mpidr(tmp);
83 if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
84 vcpu = tmp;
85 break;
86 }
87 }
88 81
89 /* 82 /*
90 * Make sure the caller requested a valid CPU and that the CPU is 83 * Make sure the caller requested a valid CPU and that the CPU is
@@ -155,7 +148,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
155 * then ON else OFF 148 * then ON else OFF
156 */ 149 */
157 kvm_for_each_vcpu(i, tmp, kvm) { 150 kvm_for_each_vcpu(i, tmp, kvm) {
158 mpidr = kvm_vcpu_get_mpidr(tmp); 151 mpidr = kvm_vcpu_get_mpidr_aff(tmp);
159 if (((mpidr & target_affinity_mask) == target_affinity) && 152 if (((mpidr & target_affinity_mask) == target_affinity) &&
160 !tmp->arch.pause) { 153 !tmp->arch.pause) {
161 return PSCI_0_2_AFFINITY_LEVEL_ON; 154 return PSCI_0_2_AFFINITY_LEVEL_ON;
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index b1d640f78623..f741449121f3 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -140,19 +140,22 @@ TRACE_EVENT(kvm_emulate_cp15_imp,
140 __entry->CRm, __entry->Op2) 140 __entry->CRm, __entry->Op2)
141); 141);
142 142
143TRACE_EVENT(kvm_wfi, 143TRACE_EVENT(kvm_wfx,
144 TP_PROTO(unsigned long vcpu_pc), 144 TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
145 TP_ARGS(vcpu_pc), 145 TP_ARGS(vcpu_pc, is_wfe),
146 146
147 TP_STRUCT__entry( 147 TP_STRUCT__entry(
148 __field( unsigned long, vcpu_pc ) 148 __field( unsigned long, vcpu_pc )
149 __field( bool, is_wfe )
149 ), 150 ),
150 151
151 TP_fast_assign( 152 TP_fast_assign(
152 __entry->vcpu_pc = vcpu_pc; 153 __entry->vcpu_pc = vcpu_pc;
154 __entry->is_wfe = is_wfe;
153 ), 155 ),
154 156
155 TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc) 157 TP_printk("guest executed wf%c at: 0x%08lx",
158 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
156); 159);
157 160
158TRACE_EVENT(kvm_unmap_hva, 161TRACE_EVENT(kvm_unmap_hva,
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 62167090937d..92bbae381598 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -96,6 +96,7 @@
96#define ESR_ELx_COND_SHIFT (20) 96#define ESR_ELx_COND_SHIFT (20)
97#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) 97#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
98#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) 98#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
99#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
99 100
100#ifndef __ASSEMBLY__ 101#ifndef __ASSEMBLY__
101#include <asm/types.h> 102#include <asm/types.h>
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 483842180f8f..4f7310fa77f0 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -126,6 +126,7 @@ extern char __kvm_hyp_vector[];
126 126
127extern void __kvm_flush_vm_context(void); 127extern void __kvm_flush_vm_context(void);
128extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); 128extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
129extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
129 130
130extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); 131extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
131 132
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5c56c0d2cef1..c3baa971edab 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -29,6 +29,7 @@
29#include <asm/kvm_asm.h> 29#include <asm/kvm_asm.h>
30#include <asm/kvm_mmio.h> 30#include <asm/kvm_mmio.h>
31#include <asm/ptrace.h> 31#include <asm/ptrace.h>
32#include <asm/cputype.h>
32 33
33unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); 34unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
34unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu); 35unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
@@ -128,6 +129,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
128 return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; 129 return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
129} 130}
130 131
132static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
133{
134 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
135}
136
131static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) 137static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
132{ 138{
133 return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); 139 return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
@@ -189,9 +195,9 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
189 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; 195 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
190} 196}
191 197
192static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) 198static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
193{ 199{
194 return vcpu_sys_reg(vcpu, MPIDR_EL1); 200 return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
195} 201}
196 202
197static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) 203static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0b7dfdb931df..2c49aa4ac818 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -59,6 +59,9 @@ struct kvm_arch {
59 /* VTTBR value associated with above pgd and vmid */ 59 /* VTTBR value associated with above pgd and vmid */
60 u64 vttbr; 60 u64 vttbr;
61 61
62 /* The maximum number of vCPUs depends on the used GIC model */
63 int max_vcpus;
64
62 /* Interrupt controller */ 65 /* Interrupt controller */
63 struct vgic_dist vgic; 66 struct vgic_dist vgic;
64 67
@@ -199,6 +202,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
199 202
200u64 kvm_call_hyp(void *hypfn, ...); 203u64 kvm_call_hyp(void *hypfn, ...);
201void force_vm_exit(const cpumask_t *mask); 204void force_vm_exit(const cpumask_t *mask);
205void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
202 206
203int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, 207int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
204 int exception_index); 208 int exception_index);
@@ -206,6 +210,8 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
206int kvm_perf_init(void); 210int kvm_perf_init(void);
207int kvm_perf_teardown(void); 211int kvm_perf_teardown(void);
208 212
213struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
214
209static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, 215static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
210 phys_addr_t pgd_ptr, 216 phys_addr_t pgd_ptr,
211 unsigned long hyp_stack_ptr, 217 unsigned long hyp_stack_ptr,
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index fc2f689c0694..9f52beb7cb13 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -40,6 +40,7 @@ struct kvm_exit_mmio {
40 u8 data[8]; 40 u8 data[8];
41 u32 len; 41 u32 len;
42 bool is_write; 42 bool is_write;
43 void *private;
43}; 44};
44 45
45static inline void kvm_prepare_mmio(struct kvm_run *run, 46static inline void kvm_prepare_mmio(struct kvm_run *run,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 14a74f136272..66577581ce68 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -118,6 +118,27 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
118 pmd_val(*pmd) |= PMD_S2_RDWR; 118 pmd_val(*pmd) |= PMD_S2_RDWR;
119} 119}
120 120
121static inline void kvm_set_s2pte_readonly(pte_t *pte)
122{
123 pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY;
124}
125
126static inline bool kvm_s2pte_readonly(pte_t *pte)
127{
128 return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
129}
130
131static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
132{
133 pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY;
134}
135
136static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
137{
138 return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY;
139}
140
141
121#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) 142#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
122#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 143#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
123#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 144#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 88174e0bfafe..5f930cc9ea83 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -119,6 +119,7 @@
119#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ 119#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
120#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ 120#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
121 121
122#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */
122#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ 123#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
123 124
124/* 125/*
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 8e38878c87c6..3ef77a466018 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -78,6 +78,13 @@ struct kvm_regs {
78#define KVM_VGIC_V2_DIST_SIZE 0x1000 78#define KVM_VGIC_V2_DIST_SIZE 0x1000
79#define KVM_VGIC_V2_CPU_SIZE 0x2000 79#define KVM_VGIC_V2_CPU_SIZE 0x2000
80 80
81/* Supported VGICv3 address types */
82#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
83#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
84
85#define KVM_VGIC_V3_DIST_SIZE SZ_64K
86#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
87
81#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ 88#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
82#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ 89#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
83#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ 90#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
@@ -161,6 +168,8 @@ struct kvm_arch_memory_slot {
161#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 168#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
162#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) 169#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
163#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 170#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
171#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
172#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
164 173
165/* KVM_IRQ_LINE irq field index values */ 174/* KVM_IRQ_LINE irq field index values */
166#define KVM_ARM_IRQ_TYPE_SHIFT 24 175#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9a9fce090d58..9d34486985fd 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -140,6 +140,7 @@ int main(void)
140 DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); 140 DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
141 DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); 141 DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
142 DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); 142 DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
143 DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
143 DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); 144 DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
144 DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); 145 DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
145 DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); 146 DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8ba85e9ea388..3ce389b3c21c 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,10 +22,12 @@ config KVM
22 select PREEMPT_NOTIFIERS 22 select PREEMPT_NOTIFIERS
23 select ANON_INODES 23 select ANON_INODES
24 select HAVE_KVM_CPU_RELAX_INTERCEPT 24 select HAVE_KVM_CPU_RELAX_INTERCEPT
25 select HAVE_KVM_ARCH_TLB_FLUSH_ALL
25 select KVM_MMIO 26 select KVM_MMIO
26 select KVM_ARM_HOST 27 select KVM_ARM_HOST
27 select KVM_ARM_VGIC 28 select KVM_ARM_VGIC
28 select KVM_ARM_TIMER 29 select KVM_ARM_TIMER
30 select KVM_GENERIC_DIRTYLOG_READ_PROTECT
29 ---help--- 31 ---help---
30 Support hosting virtualized guest machines. 32 Support hosting virtualized guest machines.
31 33
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 32a096174b94..4e6e09ee4033 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -21,7 +21,9 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
21 21
22kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o 22kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
23kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o 23kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
24kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
24kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o 25kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
25kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o 26kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
27kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
26kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o 28kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
27kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o 29kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 29b184a8f3f8..524fa25671fc 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,12 +28,18 @@
28#include <asm/kvm_mmu.h> 28#include <asm/kvm_mmu.h>
29#include <asm/kvm_psci.h> 29#include <asm/kvm_psci.h>
30 30
31#define CREATE_TRACE_POINTS
32#include "trace.h"
33
31typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); 34typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
32 35
33static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) 36static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
34{ 37{
35 int ret; 38 int ret;
36 39
40 trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
41 kvm_vcpu_hvc_get_imm(vcpu));
42
37 ret = kvm_psci_call(vcpu); 43 ret = kvm_psci_call(vcpu);
38 if (ret < 0) { 44 if (ret < 0) {
39 kvm_inject_undefined(vcpu); 45 kvm_inject_undefined(vcpu);
@@ -63,10 +69,13 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
63 */ 69 */
64static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) 70static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
65{ 71{
66 if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) 72 if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
73 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
67 kvm_vcpu_on_spin(vcpu); 74 kvm_vcpu_on_spin(vcpu);
68 else 75 } else {
76 trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
69 kvm_vcpu_block(vcpu); 77 kvm_vcpu_block(vcpu);
78 }
70 79
71 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); 80 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
72 81
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index c0d820280a5e..31b4911b8522 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -1031,6 +1031,28 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
1031 ret 1031 ret
1032ENDPROC(__kvm_tlb_flush_vmid_ipa) 1032ENDPROC(__kvm_tlb_flush_vmid_ipa)
1033 1033
1034/**
1035 * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
1036 * @struct kvm *kvm - pointer to kvm structure
1037 *
1038 * Invalidates all Stage 1 and 2 TLB entries for current VMID.
1039 */
1040ENTRY(__kvm_tlb_flush_vmid)
1041 dsb ishst
1042
1043 kern_hyp_va x0
1044 ldr x2, [x0, #KVM_VTTBR]
1045 msr vttbr_el2, x2
1046 isb
1047
1048 tlbi vmalls12e1is
1049 dsb ish
1050 isb
1051
1052 msr vttbr_el2, xzr
1053 ret
1054ENDPROC(__kvm_tlb_flush_vmid)
1055
1034ENTRY(__kvm_flush_vm_context) 1056ENTRY(__kvm_flush_vm_context)
1035 dsb ishst 1057 dsb ishst
1036 tlbi alle1is 1058 tlbi alle1is
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 6b859d7a48e7..7ad7af51856f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -168,6 +168,27 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
168 return true; 168 return true;
169} 169}
170 170
171/*
172 * Trap handler for the GICv3 SGI generation system register.
173 * Forward the request to the VGIC emulation.
174 * The cp15_64 code makes sure this automatically works
175 * for both AArch64 and AArch32 accesses.
176 */
177static bool access_gic_sgi(struct kvm_vcpu *vcpu,
178 const struct sys_reg_params *p,
179 const struct sys_reg_desc *r)
180{
181 u64 val;
182
183 if (!p->is_write)
184 return read_from_write_only(vcpu, p);
185
186 val = *vcpu_reg(vcpu, p->Rt);
187 vgic_v3_dispatch_sgi(vcpu, val);
188
189 return true;
190}
191
171static bool trap_raz_wi(struct kvm_vcpu *vcpu, 192static bool trap_raz_wi(struct kvm_vcpu *vcpu,
172 const struct sys_reg_params *p, 193 const struct sys_reg_params *p,
173 const struct sys_reg_desc *r) 194 const struct sys_reg_desc *r)
@@ -255,10 +276,19 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
255 276
256static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) 277static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
257{ 278{
279 u64 mpidr;
280
258 /* 281 /*
259 * Simply map the vcpu_id into the Aff0 field of the MPIDR. 282 * Map the vcpu_id into the first three affinity level fields of
283 * the MPIDR. We limit the number of VCPUs in level 0 due to a
284 * limitation to 16 CPUs in that level in the ICC_SGIxR registers
285 * of the GICv3 to be able to address each CPU directly when
286 * sending IPIs.
260 */ 287 */
261 vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff); 288 mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
289 mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
290 mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
291 vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
262} 292}
263 293
264/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ 294/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
@@ -428,6 +458,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
428 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), 458 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
429 NULL, reset_val, VBAR_EL1, 0 }, 459 NULL, reset_val, VBAR_EL1, 0 },
430 460
461 /* ICC_SGI1R_EL1 */
462 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101),
463 access_gic_sgi },
431 /* ICC_SRE_EL1 */ 464 /* ICC_SRE_EL1 */
432 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), 465 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101),
433 trap_raz_wi }, 466 trap_raz_wi },
@@ -660,6 +693,8 @@ static const struct sys_reg_desc cp14_64_regs[] = {
660 * register). 693 * register).
661 */ 694 */
662static const struct sys_reg_desc cp15_regs[] = { 695static const struct sys_reg_desc cp15_regs[] = {
696 { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
697
663 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, 698 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
664 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 699 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
665 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, 700 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -707,6 +742,7 @@ static const struct sys_reg_desc cp15_regs[] = {
707 742
708static const struct sys_reg_desc cp15_64_regs[] = { 743static const struct sys_reg_desc cp15_64_regs[] = {
709 { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, 744 { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
745 { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
710 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, 746 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
711}; 747};
712 748
diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h
new file mode 100644
index 000000000000..157416e963f2
--- /dev/null
+++ b/arch/arm64/kvm/trace.h
@@ -0,0 +1,55 @@
1#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_ARM64_KVM_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm
8
9TRACE_EVENT(kvm_wfx_arm64,
10 TP_PROTO(unsigned long vcpu_pc, bool is_wfe),
11 TP_ARGS(vcpu_pc, is_wfe),
12
13 TP_STRUCT__entry(
14 __field(unsigned long, vcpu_pc)
15 __field(bool, is_wfe)
16 ),
17
18 TP_fast_assign(
19 __entry->vcpu_pc = vcpu_pc;
20 __entry->is_wfe = is_wfe;
21 ),
22
23 TP_printk("guest executed wf%c at: 0x%08lx",
24 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
25);
26
27TRACE_EVENT(kvm_hvc_arm64,
28 TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
29 TP_ARGS(vcpu_pc, r0, imm),
30
31 TP_STRUCT__entry(
32 __field(unsigned long, vcpu_pc)
33 __field(unsigned long, r0)
34 __field(unsigned long, imm)
35 ),
36
37 TP_fast_assign(
38 __entry->vcpu_pc = vcpu_pc;
39 __entry->r0 = r0;
40 __entry->imm = imm;
41 ),
42
43 TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)",
44 __entry->vcpu_pc, __entry->r0, __entry->imm)
45);
46
47#endif /* _TRACE_ARM64_KVM_H */
48
49#undef TRACE_INCLUDE_PATH
50#define TRACE_INCLUDE_PATH .
51#undef TRACE_INCLUDE_FILE
52#define TRACE_INCLUDE_FILE trace
53
54/* This part must be outside protection */
55#include <trace/define_trace.h>
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index d16046999e06..617a012a0107 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -148,17 +148,18 @@
148 * x0: Register pointing to VCPU struct 148 * x0: Register pointing to VCPU struct
149 */ 149 */
150.macro restore_vgic_v3_state 150.macro restore_vgic_v3_state
151 // Disable SRE_EL1 access. Necessary, otherwise
152 // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
153 msr_s ICC_SRE_EL1, xzr
154 isb
155
156 // Compute the address of struct vgic_cpu 151 // Compute the address of struct vgic_cpu
157 add x3, x0, #VCPU_VGIC_CPU 152 add x3, x0, #VCPU_VGIC_CPU
158 153
159 // Restore all interesting registers 154 // Restore all interesting registers
160 ldr w4, [x3, #VGIC_V3_CPU_HCR] 155 ldr w4, [x3, #VGIC_V3_CPU_HCR]
161 ldr w5, [x3, #VGIC_V3_CPU_VMCR] 156 ldr w5, [x3, #VGIC_V3_CPU_VMCR]
157 ldr w25, [x3, #VGIC_V3_CPU_SRE]
158
159 msr_s ICC_SRE_EL1, x25
160
161 // make sure SRE is valid before writing the other registers
162 isb
162 163
163 msr_s ICH_HCR_EL2, x4 164 msr_s ICH_HCR_EL2, x4
164 msr_s ICH_VMCR_EL2, x5 165 msr_s ICH_VMCR_EL2, x5
@@ -244,9 +245,12 @@
244 dsb sy 245 dsb sy
245 246
246 // Prevent the guest from touching the GIC system registers 247 // Prevent the guest from touching the GIC system registers
248 // if SRE isn't enabled for GICv3 emulation
249 cbnz x25, 1f
247 mrs_s x5, ICC_SRE_EL2 250 mrs_s x5, ICC_SRE_EL2
248 and x5, x5, #~ICC_SRE_EL2_ENABLE 251 and x5, x5, #~ICC_SRE_EL2_ENABLE
249 msr_s ICC_SRE_EL2, x5 252 msr_s ICC_SRE_EL2, x5
2531:
250.endm 254.endm
251 255
252ENTRY(__save_vgic_v3_state) 256ENTRY(__save_vgic_v3_state)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4327af53e544..843bea0e70fd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -835,9 +835,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
835 835
836void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 836void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
837void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 837void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
838void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
839 struct kvm_memory_slot *slot,
840 gfn_t gfn_offset, unsigned long mask);
841void kvm_mmu_zap_all(struct kvm *kvm); 838void kvm_mmu_zap_all(struct kvm *kvm);
842void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm); 839void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
843unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); 840unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f9d16ff56c6b..d07359466d5d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -39,6 +39,7 @@ config KVM
39 select PERF_EVENTS 39 select PERF_EVENTS
40 select HAVE_KVM_MSI 40 select HAVE_KVM_MSI
41 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 select HAVE_KVM_CPU_RELAX_INTERCEPT
42 select KVM_GENERIC_DIRTYLOG_READ_PROTECT
42 select KVM_VFIO 43 select KVM_VFIO
43 ---help--- 44 ---help---
44 Support hosting fully virtualized guest machines using hardware 45 Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 97898abe8386..0ed9f795e4f0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1216,7 +1216,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1216} 1216}
1217 1217
1218/** 1218/**
1219 * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages 1219 * kvm_arch_mmu_write_protect_pt_masked - write protect selected PT level pages
1220 * @kvm: kvm instance 1220 * @kvm: kvm instance
1221 * @slot: slot to protect 1221 * @slot: slot to protect
1222 * @gfn_offset: start of the BITS_PER_LONG pages we care about 1222 * @gfn_offset: start of the BITS_PER_LONG pages we care about
@@ -1225,7 +1225,7 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
1225 * Used when we do not need to care about huge page mappings: e.g. during dirty 1225 * Used when we do not need to care about huge page mappings: e.g. during dirty
1226 * logging we do not have any such mappings. 1226 * logging we do not have any such mappings.
1227 */ 1227 */
1228void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, 1228void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm,
1229 struct kvm_memory_slot *slot, 1229 struct kvm_memory_slot *slot,
1230 gfn_t gfn_offset, unsigned long mask) 1230 gfn_t gfn_offset, unsigned long mask)
1231{ 1231{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 917672f8034a..d2bbb2d86610 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3759,83 +3759,37 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3759 * @kvm: kvm instance 3759 * @kvm: kvm instance
3760 * @log: slot id and address to which we copy the log 3760 * @log: slot id and address to which we copy the log
3761 * 3761 *
3762 * We need to keep it in mind that VCPU threads can write to the bitmap 3762 * Steps 1-4 below provide general overview of dirty page logging. See
3763 * concurrently. So, to avoid losing data, we keep the following order for 3763 * kvm_get_dirty_log_protect() function description for additional details.
3764 * each bit: 3764 *
3765 * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we
3766 * always flush the TLB (step 4) even if previous step failed and the dirty
3767 * bitmap may be corrupt. Regardless of previous outcome the KVM logging API
3768 * does not preclude user space subsequent dirty log read. Flushing TLB ensures
3769 * writes will be marked dirty for next log read.
3765 * 3770 *
3766 * 1. Take a snapshot of the bit and clear it if needed. 3771 * 1. Take a snapshot of the bit and clear it if needed.
3767 * 2. Write protect the corresponding page. 3772 * 2. Write protect the corresponding page.
3768 * 3. Flush TLB's if needed. 3773 * 3. Copy the snapshot to the userspace.
3769 * 4. Copy the snapshot to the userspace. 3774 * 4. Flush TLB's if needed.
3770 *
3771 * Between 2 and 3, the guest may write to the page using the remaining TLB
3772 * entry. This is not a problem because the page will be reported dirty at
3773 * step 4 using the snapshot taken before and step 3 ensures that successive
3774 * writes will be logged for the next call.
3775 */ 3775 */
3776int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 3776int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3777{ 3777{
3778 int r;
3779 struct kvm_memory_slot *memslot;
3780 unsigned long n, i;
3781 unsigned long *dirty_bitmap;
3782 unsigned long *dirty_bitmap_buffer;
3783 bool is_dirty = false; 3778 bool is_dirty = false;
3779 int r;
3784 3780
3785 mutex_lock(&kvm->slots_lock); 3781 mutex_lock(&kvm->slots_lock);
3786 3782
3787 r = -EINVAL; 3783 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
3788 if (log->slot >= KVM_USER_MEM_SLOTS)
3789 goto out;
3790
3791 memslot = id_to_memslot(kvm->memslots, log->slot);
3792
3793 dirty_bitmap = memslot->dirty_bitmap;
3794 r = -ENOENT;
3795 if (!dirty_bitmap)
3796 goto out;
3797
3798 n = kvm_dirty_bitmap_bytes(memslot);
3799
3800 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3801 memset(dirty_bitmap_buffer, 0, n);
3802
3803 spin_lock(&kvm->mmu_lock);
3804
3805 for (i = 0; i < n / sizeof(long); i++) {
3806 unsigned long mask;
3807 gfn_t offset;
3808
3809 if (!dirty_bitmap[i])
3810 continue;
3811
3812 is_dirty = true;
3813
3814 mask = xchg(&dirty_bitmap[i], 0);
3815 dirty_bitmap_buffer[i] = mask;
3816
3817 offset = i * BITS_PER_LONG;
3818 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3819 }
3820
3821 spin_unlock(&kvm->mmu_lock);
3822
3823 /* See the comments in kvm_mmu_slot_remove_write_access(). */
3824 lockdep_assert_held(&kvm->slots_lock);
3825 3784
3826 /* 3785 /*
3827 * All the TLBs can be flushed out of mmu lock, see the comments in 3786 * All the TLBs can be flushed out of mmu lock, see the comments in
3828 * kvm_mmu_slot_remove_write_access(). 3787 * kvm_mmu_slot_remove_write_access().
3829 */ 3788 */
3789 lockdep_assert_held(&kvm->slots_lock);
3830 if (is_dirty) 3790 if (is_dirty)
3831 kvm_flush_remote_tlbs(kvm); 3791 kvm_flush_remote_tlbs(kvm);
3832 3792
3833 r = -EFAULT;
3834 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3835 goto out;
3836
3837 r = 0;
3838out:
3839 mutex_unlock(&kvm->slots_lock); 3793 mutex_unlock(&kvm->slots_lock);
3840 return r; 3794 return r;
3841} 3795}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 1a146ccee701..2ab290bec655 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -481,15 +481,19 @@ out:
481 return tlist; 481 return tlist;
482} 482}
483 483
484#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
485 (MPIDR_AFFINITY_LEVEL(cluster_id, level) \
486 << ICC_SGI1R_AFFINITY_## level ##_SHIFT)
487
484static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) 488static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq)
485{ 489{
486 u64 val; 490 u64 val;
487 491
488 val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48 | 492 val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) |
489 MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32 | 493 MPIDR_TO_SGI_AFFINITY(cluster_id, 2) |
490 irq << 24 | 494 irq << ICC_SGI1R_SGI_ID_SHIFT |
491 MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16 | 495 MPIDR_TO_SGI_AFFINITY(cluster_id, 1) |
492 tlist); 496 tlist << ICC_SGI1R_TARGET_LIST_SHIFT);
493 497
494 pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); 498 pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val);
495 gic_write_sgi1r(val); 499 gic_write_sgi1r(val);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ac4888dc86bc..7c55dd5dd2c9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -33,10 +33,11 @@
33#define VGIC_V2_MAX_LRS (1 << 6) 33#define VGIC_V2_MAX_LRS (1 << 6)
34#define VGIC_V3_MAX_LRS 16 34#define VGIC_V3_MAX_LRS 16
35#define VGIC_MAX_IRQS 1024 35#define VGIC_MAX_IRQS 1024
36#define VGIC_V2_MAX_CPUS 8
36 37
37/* Sanity checks... */ 38/* Sanity checks... */
38#if (KVM_MAX_VCPUS > 8) 39#if (KVM_MAX_VCPUS > 255)
39#error Invalid number of CPU interfaces 40#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
40#endif 41#endif
41 42
42#if (VGIC_NR_IRQS_LEGACY & 31) 43#if (VGIC_NR_IRQS_LEGACY & 31)
@@ -132,6 +133,18 @@ struct vgic_params {
132 unsigned int maint_irq; 133 unsigned int maint_irq;
133 /* Virtual control interface base address */ 134 /* Virtual control interface base address */
134 void __iomem *vctrl_base; 135 void __iomem *vctrl_base;
136 int max_gic_vcpus;
137 /* Only needed for the legacy KVM_CREATE_IRQCHIP */
138 bool can_emulate_gicv2;
139};
140
141struct vgic_vm_ops {
142 bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
143 struct kvm_exit_mmio *);
144 bool (*queue_sgi)(struct kvm_vcpu *, int irq);
145 void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
146 int (*init_model)(struct kvm *);
147 int (*map_resources)(struct kvm *, const struct vgic_params *);
135}; 148};
136 149
137struct vgic_dist { 150struct vgic_dist {
@@ -140,6 +153,9 @@ struct vgic_dist {
140 bool in_kernel; 153 bool in_kernel;
141 bool ready; 154 bool ready;
142 155
156 /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
157 u32 vgic_model;
158
143 int nr_cpus; 159 int nr_cpus;
144 int nr_irqs; 160 int nr_irqs;
145 161
@@ -148,7 +164,11 @@ struct vgic_dist {
148 164
149 /* Distributor and vcpu interface mapping in the guest */ 165 /* Distributor and vcpu interface mapping in the guest */
150 phys_addr_t vgic_dist_base; 166 phys_addr_t vgic_dist_base;
151 phys_addr_t vgic_cpu_base; 167 /* GICv2 and GICv3 use different mapped register blocks */
168 union {
169 phys_addr_t vgic_cpu_base;
170 phys_addr_t vgic_redist_base;
171 };
152 172
153 /* Distributor enabled */ 173 /* Distributor enabled */
154 u32 enabled; 174 u32 enabled;
@@ -210,8 +230,13 @@ struct vgic_dist {
210 */ 230 */
211 struct vgic_bitmap *irq_spi_target; 231 struct vgic_bitmap *irq_spi_target;
212 232
233 /* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */
234 u32 *irq_spi_mpidr;
235
213 /* Bitmap indicating which CPU has something pending */ 236 /* Bitmap indicating which CPU has something pending */
214 unsigned long *irq_pending_on_cpu; 237 unsigned long *irq_pending_on_cpu;
238
239 struct vgic_vm_ops vm_ops;
215#endif 240#endif
216}; 241};
217 242
@@ -229,6 +254,7 @@ struct vgic_v3_cpu_if {
229#ifdef CONFIG_ARM_GIC_V3 254#ifdef CONFIG_ARM_GIC_V3
230 u32 vgic_hcr; 255 u32 vgic_hcr;
231 u32 vgic_vmcr; 256 u32 vgic_vmcr;
257 u32 vgic_sre; /* Restored only, change ignored */
232 u32 vgic_misr; /* Saved only */ 258 u32 vgic_misr; /* Saved only */
233 u32 vgic_eisr; /* Saved only */ 259 u32 vgic_eisr; /* Saved only */
234 u32 vgic_elrsr; /* Saved only */ 260 u32 vgic_elrsr; /* Saved only */
@@ -275,13 +301,15 @@ struct kvm_exit_mmio;
275int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); 301int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
276int kvm_vgic_hyp_init(void); 302int kvm_vgic_hyp_init(void);
277int kvm_vgic_map_resources(struct kvm *kvm); 303int kvm_vgic_map_resources(struct kvm *kvm);
278int kvm_vgic_create(struct kvm *kvm); 304int kvm_vgic_get_max_vcpus(void);
305int kvm_vgic_create(struct kvm *kvm, u32 type);
279void kvm_vgic_destroy(struct kvm *kvm); 306void kvm_vgic_destroy(struct kvm *kvm);
280void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); 307void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
281void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); 308void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
282void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); 309void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
283int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 310int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
284 bool level); 311 bool level);
312void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
285int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); 313int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
286bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, 314bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
287 struct kvm_exit_mmio *mmio); 315 struct kvm_exit_mmio *mmio);
@@ -327,7 +355,7 @@ static inline int kvm_vgic_map_resources(struct kvm *kvm)
327 return 0; 355 return 0;
328} 356}
329 357
330static inline int kvm_vgic_create(struct kvm *kvm) 358static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
331{ 359{
332 return 0; 360 return 0;
333} 361}
@@ -379,6 +407,11 @@ static inline bool vgic_ready(struct kvm *kvm)
379{ 407{
380 return true; 408 return true;
381} 409}
410
411static inline int kvm_vgic_get_max_vcpus(void)
412{
413 return KVM_MAX_VCPUS;
414}
382#endif 415#endif
383 416
384#endif 417#endif
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1e8b0cf30792..800544bc7bfd 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -33,6 +33,7 @@
33#define GICD_SETSPI_SR 0x0050 33#define GICD_SETSPI_SR 0x0050
34#define GICD_CLRSPI_SR 0x0058 34#define GICD_CLRSPI_SR 0x0058
35#define GICD_SEIR 0x0068 35#define GICD_SEIR 0x0068
36#define GICD_IGROUPR 0x0080
36#define GICD_ISENABLER 0x0100 37#define GICD_ISENABLER 0x0100
37#define GICD_ICENABLER 0x0180 38#define GICD_ICENABLER 0x0180
38#define GICD_ISPENDR 0x0200 39#define GICD_ISPENDR 0x0200
@@ -41,14 +42,37 @@
41#define GICD_ICACTIVER 0x0380 42#define GICD_ICACTIVER 0x0380
42#define GICD_IPRIORITYR 0x0400 43#define GICD_IPRIORITYR 0x0400
43#define GICD_ICFGR 0x0C00 44#define GICD_ICFGR 0x0C00
45#define GICD_IGRPMODR 0x0D00
46#define GICD_NSACR 0x0E00
44#define GICD_IROUTER 0x6000 47#define GICD_IROUTER 0x6000
48#define GICD_IDREGS 0xFFD0
45#define GICD_PIDR2 0xFFE8 49#define GICD_PIDR2 0xFFE8
46 50
51/*
52 * Those registers are actually from GICv2, but the spec demands that they
53 * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
54 */
55#define GICD_ITARGETSR 0x0800
56#define GICD_SGIR 0x0F00
57#define GICD_CPENDSGIR 0x0F10
58#define GICD_SPENDSGIR 0x0F20
59
47#define GICD_CTLR_RWP (1U << 31) 60#define GICD_CTLR_RWP (1U << 31)
61#define GICD_CTLR_DS (1U << 6)
48#define GICD_CTLR_ARE_NS (1U << 4) 62#define GICD_CTLR_ARE_NS (1U << 4)
49#define GICD_CTLR_ENABLE_G1A (1U << 1) 63#define GICD_CTLR_ENABLE_G1A (1U << 1)
50#define GICD_CTLR_ENABLE_G1 (1U << 0) 64#define GICD_CTLR_ENABLE_G1 (1U << 0)
51 65
66/*
67 * In systems with a single security state (what we emulate in KVM)
68 * the meaning of the interrupt group enable bits is slightly different
69 */
70#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
71#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
72
73#define GICD_TYPER_LPIS (1U << 17)
74#define GICD_TYPER_MBIS (1U << 16)
75
52#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) 76#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
53#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32) 77#define GICD_TYPER_IRQS(typer) ((((typer) & 0x1f) + 1) * 32)
54#define GICD_TYPER_LPIS (1U << 17) 78#define GICD_TYPER_LPIS (1U << 17)
@@ -60,6 +84,8 @@
60#define GIC_PIDR2_ARCH_GICv3 0x30 84#define GIC_PIDR2_ARCH_GICv3 0x30
61#define GIC_PIDR2_ARCH_GICv4 0x40 85#define GIC_PIDR2_ARCH_GICv4 0x40
62 86
87#define GIC_V3_DIST_SIZE 0x10000
88
63/* 89/*
64 * Re-Distributor registers, offsets from RD_base 90 * Re-Distributor registers, offsets from RD_base
65 */ 91 */
@@ -78,6 +104,7 @@
78#define GICR_SYNCR 0x00C0 104#define GICR_SYNCR 0x00C0
79#define GICR_MOVLPIR 0x0100 105#define GICR_MOVLPIR 0x0100
80#define GICR_MOVALLR 0x0110 106#define GICR_MOVALLR 0x0110
107#define GICR_IDREGS GICD_IDREGS
81#define GICR_PIDR2 GICD_PIDR2 108#define GICR_PIDR2 GICD_PIDR2
82 109
83#define GICR_CTLR_ENABLE_LPIS (1UL << 0) 110#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
@@ -104,6 +131,7 @@
104/* 131/*
105 * Re-Distributor registers, offsets from SGI_base 132 * Re-Distributor registers, offsets from SGI_base
106 */ 133 */
134#define GICR_IGROUPR0 GICD_IGROUPR
107#define GICR_ISENABLER0 GICD_ISENABLER 135#define GICR_ISENABLER0 GICD_ISENABLER
108#define GICR_ICENABLER0 GICD_ICENABLER 136#define GICR_ICENABLER0 GICD_ICENABLER
109#define GICR_ISPENDR0 GICD_ISPENDR 137#define GICR_ISPENDR0 GICD_ISPENDR
@@ -112,11 +140,15 @@
112#define GICR_ICACTIVER0 GICD_ICACTIVER 140#define GICR_ICACTIVER0 GICD_ICACTIVER
113#define GICR_IPRIORITYR0 GICD_IPRIORITYR 141#define GICR_IPRIORITYR0 GICD_IPRIORITYR
114#define GICR_ICFGR0 GICD_ICFGR 142#define GICR_ICFGR0 GICD_ICFGR
143#define GICR_IGRPMODR0 GICD_IGRPMODR
144#define GICR_NSACR GICD_NSACR
115 145
116#define GICR_TYPER_PLPIS (1U << 0) 146#define GICR_TYPER_PLPIS (1U << 0)
117#define GICR_TYPER_VLPIS (1U << 1) 147#define GICR_TYPER_VLPIS (1U << 1)
118#define GICR_TYPER_LAST (1U << 4) 148#define GICR_TYPER_LAST (1U << 4)
119 149
150#define GIC_V3_REDIST_SIZE 0x20000
151
120#define LPI_PROP_GROUP1 (1 << 1) 152#define LPI_PROP_GROUP1 (1 << 1)
121#define LPI_PROP_ENABLED (1 << 0) 153#define LPI_PROP_ENABLED (1 << 0)
122 154
@@ -248,6 +280,18 @@
248#define ICC_SRE_EL2_SRE (1 << 0) 280#define ICC_SRE_EL2_SRE (1 << 0)
249#define ICC_SRE_EL2_ENABLE (1 << 3) 281#define ICC_SRE_EL2_ENABLE (1 << 3)
250 282
283#define ICC_SGI1R_TARGET_LIST_SHIFT 0
284#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
285#define ICC_SGI1R_AFFINITY_1_SHIFT 16
286#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
287#define ICC_SGI1R_SGI_ID_SHIFT 24
288#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT)
289#define ICC_SGI1R_AFFINITY_2_SHIFT 32
290#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
291#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
292#define ICC_SGI1R_AFFINITY_3_SHIFT 48
293#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT)
294
251/* 295/*
252 * System register definitions 296 * System register definitions
253 */ 297 */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 26f106022c88..0ef2daa199d8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -611,6 +611,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
611 611
612int kvm_get_dirty_log(struct kvm *kvm, 612int kvm_get_dirty_log(struct kvm *kvm,
613 struct kvm_dirty_log *log, int *is_dirty); 613 struct kvm_dirty_log *log, int *is_dirty);
614
615int kvm_get_dirty_log_protect(struct kvm *kvm,
616 struct kvm_dirty_log *log, bool *is_dirty);
617
618void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm,
619 struct kvm_memory_slot *slot,
620 gfn_t gfn_offset,
621 unsigned long mask);
622
614int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 623int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
615 struct kvm_dirty_log *log); 624 struct kvm_dirty_log *log);
616 625
@@ -1042,6 +1051,8 @@ void kvm_unregister_device_ops(u32 type);
1042 1051
1043extern struct kvm_device_ops kvm_mpic_ops; 1052extern struct kvm_device_ops kvm_mpic_ops;
1044extern struct kvm_device_ops kvm_xics_ops; 1053extern struct kvm_device_ops kvm_xics_ops;
1054extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1055extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
1045 1056
1046#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT 1057#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1047 1058
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a37fd1224f36..b4e6f1e70f03 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -952,6 +952,8 @@ enum kvm_device_type {
952#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 952#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
953 KVM_DEV_TYPE_FLIC, 953 KVM_DEV_TYPE_FLIC,
954#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC 954#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
955 KVM_DEV_TYPE_ARM_VGIC_V3,
956#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3
955 KVM_DEV_TYPE_MAX, 957 KVM_DEV_TYPE_MAX,
956}; 958};
957 959
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fc0c5e603eb4..50d110654b42 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -37,3 +37,9 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
37 37
38config KVM_VFIO 38config KVM_VFIO
39 bool 39 bool
40
41config HAVE_KVM_ARCH_TLB_FLUSH_ALL
42 bool
43
44config KVM_GENERIC_DIRTYLOG_READ_PROTECT
45 bool
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
new file mode 100644
index 000000000000..19c6210f02cf
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -0,0 +1,847 @@
1/*
2 * Contains GICv2 specific emulation code, was in vgic.c before.
3 *
4 * Copyright (C) 2012 ARM Ltd.
5 * Author: Marc Zyngier <marc.zyngier@arm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/cpu.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/interrupt.h>
24#include <linux/io.h>
25#include <linux/uaccess.h>
26
27#include <linux/irqchip/arm-gic.h>
28
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_arm.h>
31#include <asm/kvm_mmu.h>
32
33#include "vgic.h"
34
35#define GICC_ARCH_VERSION_V2 0x2
36
37static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
38static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
39{
40 return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
41}
42
43static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
44 struct kvm_exit_mmio *mmio, phys_addr_t offset)
45{
46 u32 reg;
47 u32 word_offset = offset & 3;
48
49 switch (offset & ~3) {
50 case 0: /* GICD_CTLR */
51 reg = vcpu->kvm->arch.vgic.enabled;
52 vgic_reg_access(mmio, &reg, word_offset,
53 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
54 if (mmio->is_write) {
55 vcpu->kvm->arch.vgic.enabled = reg & 1;
56 vgic_update_state(vcpu->kvm);
57 return true;
58 }
59 break;
60
61 case 4: /* GICD_TYPER */
62 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
63 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
64 vgic_reg_access(mmio, &reg, word_offset,
65 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
66 break;
67
68 case 8: /* GICD_IIDR */
69 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
70 vgic_reg_access(mmio, &reg, word_offset,
71 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
72 break;
73 }
74
75 return false;
76}
77
78static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
79 struct kvm_exit_mmio *mmio,
80 phys_addr_t offset)
81{
82 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
83 vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
84}
85
86static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
87 struct kvm_exit_mmio *mmio,
88 phys_addr_t offset)
89{
90 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
91 vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
92}
93
94static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
95 struct kvm_exit_mmio *mmio,
96 phys_addr_t offset)
97{
98 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
99 vcpu->vcpu_id);
100}
101
102static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
103 struct kvm_exit_mmio *mmio,
104 phys_addr_t offset)
105{
106 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
107 vcpu->vcpu_id);
108}
109
110static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
111 struct kvm_exit_mmio *mmio,
112 phys_addr_t offset)
113{
114 u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
115 vcpu->vcpu_id, offset);
116 vgic_reg_access(mmio, reg, offset,
117 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
118 return false;
119}
120
121#define GICD_ITARGETSR_SIZE 32
122#define GICD_CPUTARGETS_BITS 8
123#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
124static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
125{
126 struct vgic_dist *dist = &kvm->arch.vgic;
127 int i;
128 u32 val = 0;
129
130 irq -= VGIC_NR_PRIVATE_IRQS;
131
132 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
133 val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
134
135 return val;
136}
137
138static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
139{
140 struct vgic_dist *dist = &kvm->arch.vgic;
141 struct kvm_vcpu *vcpu;
142 int i, c;
143 unsigned long *bmap;
144 u32 target;
145
146 irq -= VGIC_NR_PRIVATE_IRQS;
147
148 /*
149 * Pick the LSB in each byte. This ensures we target exactly
150 * one vcpu per IRQ. If the byte is null, assume we target
151 * CPU0.
152 */
153 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
154 int shift = i * GICD_CPUTARGETS_BITS;
155
156 target = ffs((val >> shift) & 0xffU);
157 target = target ? (target - 1) : 0;
158 dist->irq_spi_cpu[irq + i] = target;
159 kvm_for_each_vcpu(c, vcpu, kvm) {
160 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
161 if (c == target)
162 set_bit(irq + i, bmap);
163 else
164 clear_bit(irq + i, bmap);
165 }
166 }
167}
168
169static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
170 struct kvm_exit_mmio *mmio,
171 phys_addr_t offset)
172{
173 u32 reg;
174
175 /* We treat the banked interrupts targets as read-only */
176 if (offset < 32) {
177 u32 roreg;
178
179 roreg = 1 << vcpu->vcpu_id;
180 roreg |= roreg << 8;
181 roreg |= roreg << 16;
182
183 vgic_reg_access(mmio, &roreg, offset,
184 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
185 return false;
186 }
187
188 reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
189 vgic_reg_access(mmio, &reg, offset,
190 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
191 if (mmio->is_write) {
192 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
193 vgic_update_state(vcpu->kvm);
194 return true;
195 }
196
197 return false;
198}
199
200static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
201 struct kvm_exit_mmio *mmio, phys_addr_t offset)
202{
203 u32 *reg;
204
205 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
206 vcpu->vcpu_id, offset >> 1);
207
208 return vgic_handle_cfg_reg(reg, mmio, offset);
209}
210
211static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
212 struct kvm_exit_mmio *mmio, phys_addr_t offset)
213{
214 u32 reg;
215
216 vgic_reg_access(mmio, &reg, offset,
217 ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
218 if (mmio->is_write) {
219 vgic_dispatch_sgi(vcpu, reg);
220 vgic_update_state(vcpu->kvm);
221 return true;
222 }
223
224 return false;
225}
226
227/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
228static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
229 struct kvm_exit_mmio *mmio,
230 phys_addr_t offset)
231{
232 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
233 int sgi;
234 int min_sgi = (offset & ~0x3);
235 int max_sgi = min_sgi + 3;
236 int vcpu_id = vcpu->vcpu_id;
237 u32 reg = 0;
238
239 /* Copy source SGIs from distributor side */
240 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
241 u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi);
242
243 reg |= ((u32)sources) << (8 * (sgi - min_sgi));
244 }
245
246 mmio_data_write(mmio, ~0, reg);
247 return false;
248}
249
250static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
251 struct kvm_exit_mmio *mmio,
252 phys_addr_t offset, bool set)
253{
254 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
255 int sgi;
256 int min_sgi = (offset & ~0x3);
257 int max_sgi = min_sgi + 3;
258 int vcpu_id = vcpu->vcpu_id;
259 u32 reg;
260 bool updated = false;
261
262 reg = mmio_data_read(mmio, ~0);
263
264 /* Clear pending SGIs on the distributor */
265 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
266 u8 mask = reg >> (8 * (sgi - min_sgi));
267 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
268
269 if (set) {
270 if ((*src & mask) != mask)
271 updated = true;
272 *src |= mask;
273 } else {
274 if (*src & mask)
275 updated = true;
276 *src &= ~mask;
277 }
278 }
279
280 if (updated)
281 vgic_update_state(vcpu->kvm);
282
283 return updated;
284}
285
286static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
287 struct kvm_exit_mmio *mmio,
288 phys_addr_t offset)
289{
290 if (!mmio->is_write)
291 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
292 else
293 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
294}
295
296static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
297 struct kvm_exit_mmio *mmio,
298 phys_addr_t offset)
299{
300 if (!mmio->is_write)
301 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
302 else
303 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
304}
305
306static const struct kvm_mmio_range vgic_dist_ranges[] = {
307 {
308 .base = GIC_DIST_CTRL,
309 .len = 12,
310 .bits_per_irq = 0,
311 .handle_mmio = handle_mmio_misc,
312 },
313 {
314 .base = GIC_DIST_IGROUP,
315 .len = VGIC_MAX_IRQS / 8,
316 .bits_per_irq = 1,
317 .handle_mmio = handle_mmio_raz_wi,
318 },
319 {
320 .base = GIC_DIST_ENABLE_SET,
321 .len = VGIC_MAX_IRQS / 8,
322 .bits_per_irq = 1,
323 .handle_mmio = handle_mmio_set_enable_reg,
324 },
325 {
326 .base = GIC_DIST_ENABLE_CLEAR,
327 .len = VGIC_MAX_IRQS / 8,
328 .bits_per_irq = 1,
329 .handle_mmio = handle_mmio_clear_enable_reg,
330 },
331 {
332 .base = GIC_DIST_PENDING_SET,
333 .len = VGIC_MAX_IRQS / 8,
334 .bits_per_irq = 1,
335 .handle_mmio = handle_mmio_set_pending_reg,
336 },
337 {
338 .base = GIC_DIST_PENDING_CLEAR,
339 .len = VGIC_MAX_IRQS / 8,
340 .bits_per_irq = 1,
341 .handle_mmio = handle_mmio_clear_pending_reg,
342 },
343 {
344 .base = GIC_DIST_ACTIVE_SET,
345 .len = VGIC_MAX_IRQS / 8,
346 .bits_per_irq = 1,
347 .handle_mmio = handle_mmio_raz_wi,
348 },
349 {
350 .base = GIC_DIST_ACTIVE_CLEAR,
351 .len = VGIC_MAX_IRQS / 8,
352 .bits_per_irq = 1,
353 .handle_mmio = handle_mmio_raz_wi,
354 },
355 {
356 .base = GIC_DIST_PRI,
357 .len = VGIC_MAX_IRQS,
358 .bits_per_irq = 8,
359 .handle_mmio = handle_mmio_priority_reg,
360 },
361 {
362 .base = GIC_DIST_TARGET,
363 .len = VGIC_MAX_IRQS,
364 .bits_per_irq = 8,
365 .handle_mmio = handle_mmio_target_reg,
366 },
367 {
368 .base = GIC_DIST_CONFIG,
369 .len = VGIC_MAX_IRQS / 4,
370 .bits_per_irq = 2,
371 .handle_mmio = handle_mmio_cfg_reg,
372 },
373 {
374 .base = GIC_DIST_SOFTINT,
375 .len = 4,
376 .handle_mmio = handle_mmio_sgi_reg,
377 },
378 {
379 .base = GIC_DIST_SGI_PENDING_CLEAR,
380 .len = VGIC_NR_SGIS,
381 .handle_mmio = handle_mmio_sgi_clear,
382 },
383 {
384 .base = GIC_DIST_SGI_PENDING_SET,
385 .len = VGIC_NR_SGIS,
386 .handle_mmio = handle_mmio_sgi_set,
387 },
388 {}
389};
390
391static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
392 struct kvm_exit_mmio *mmio)
393{
394 unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
395
396 if (!is_in_range(mmio->phys_addr, mmio->len, base,
397 KVM_VGIC_V2_DIST_SIZE))
398 return false;
399
400 /* GICv2 does not support accesses wider than 32 bits */
401 if (mmio->len > 4) {
402 kvm_inject_dabt(vcpu, mmio->phys_addr);
403 return true;
404 }
405
406 return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
407}
408
409static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
410{
411 struct kvm *kvm = vcpu->kvm;
412 struct vgic_dist *dist = &kvm->arch.vgic;
413 int nrcpus = atomic_read(&kvm->online_vcpus);
414 u8 target_cpus;
415 int sgi, mode, c, vcpu_id;
416
417 vcpu_id = vcpu->vcpu_id;
418
419 sgi = reg & 0xf;
420 target_cpus = (reg >> 16) & 0xff;
421 mode = (reg >> 24) & 3;
422
423 switch (mode) {
424 case 0:
425 if (!target_cpus)
426 return;
427 break;
428
429 case 1:
430 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
431 break;
432
433 case 2:
434 target_cpus = 1 << vcpu_id;
435 break;
436 }
437
438 kvm_for_each_vcpu(c, vcpu, kvm) {
439 if (target_cpus & 1) {
440 /* Flag the SGI as pending */
441 vgic_dist_irq_set_pending(vcpu, sgi);
442 *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
443 kvm_debug("SGI%d from CPU%d to CPU%d\n",
444 sgi, vcpu_id, c);
445 }
446
447 target_cpus >>= 1;
448 }
449}
450
451static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
452{
453 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
454 unsigned long sources;
455 int vcpu_id = vcpu->vcpu_id;
456 int c;
457
458 sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
459
460 for_each_set_bit(c, &sources, dist->nr_cpus) {
461 if (vgic_queue_irq(vcpu, c, irq))
462 clear_bit(c, &sources);
463 }
464
465 *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
466
467 /*
468 * If the sources bitmap has been cleared it means that we
469 * could queue all the SGIs onto link registers (see the
470 * clear_bit above), and therefore we are done with them in
471 * our emulated gic and can get rid of them.
472 */
473 if (!sources) {
474 vgic_dist_irq_clear_pending(vcpu, irq);
475 vgic_cpu_irq_clear(vcpu, irq);
476 return true;
477 }
478
479 return false;
480}
481
482/**
483 * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
484 * @kvm: pointer to the kvm struct
485 *
486 * Map the virtual CPU interface into the VM before running any VCPUs. We
487 * can't do this at creation time, because user space must first set the
488 * virtual CPU interface address in the guest physical address space.
489 */
490static int vgic_v2_map_resources(struct kvm *kvm,
491 const struct vgic_params *params)
492{
493 int ret = 0;
494
495 if (!irqchip_in_kernel(kvm))
496 return 0;
497
498 mutex_lock(&kvm->lock);
499
500 if (vgic_ready(kvm))
501 goto out;
502
503 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
504 IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
505 kvm_err("Need to set vgic cpu and dist addresses first\n");
506 ret = -ENXIO;
507 goto out;
508 }
509
510 /*
511 * Initialize the vgic if this hasn't already been done on demand by
512 * accessing the vgic state from userspace.
513 */
514 ret = vgic_init(kvm);
515 if (ret) {
516 kvm_err("Unable to allocate maps\n");
517 goto out;
518 }
519
520 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
521 params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
522 true);
523 if (ret) {
524 kvm_err("Unable to remap VGIC CPU to VCPU\n");
525 goto out;
526 }
527
528 kvm->arch.vgic.ready = true;
529out:
530 if (ret)
531 kvm_vgic_destroy(kvm);
532 mutex_unlock(&kvm->lock);
533 return ret;
534}
535
536static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
537{
538 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
539
540 *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source;
541}
542
543static int vgic_v2_init_model(struct kvm *kvm)
544{
545 int i;
546
547 for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
548 vgic_set_target_reg(kvm, 0, i);
549
550 return 0;
551}
552
553void vgic_v2_init_emulation(struct kvm *kvm)
554{
555 struct vgic_dist *dist = &kvm->arch.vgic;
556
557 dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
558 dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
559 dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
560 dist->vm_ops.init_model = vgic_v2_init_model;
561 dist->vm_ops.map_resources = vgic_v2_map_resources;
562
563 kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
564}
565
566static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
567 struct kvm_exit_mmio *mmio, phys_addr_t offset)
568{
569 bool updated = false;
570 struct vgic_vmcr vmcr;
571 u32 *vmcr_field;
572 u32 reg;
573
574 vgic_get_vmcr(vcpu, &vmcr);
575
576 switch (offset & ~0x3) {
577 case GIC_CPU_CTRL:
578 vmcr_field = &vmcr.ctlr;
579 break;
580 case GIC_CPU_PRIMASK:
581 vmcr_field = &vmcr.pmr;
582 break;
583 case GIC_CPU_BINPOINT:
584 vmcr_field = &vmcr.bpr;
585 break;
586 case GIC_CPU_ALIAS_BINPOINT:
587 vmcr_field = &vmcr.abpr;
588 break;
589 default:
590 BUG();
591 }
592
593 if (!mmio->is_write) {
594 reg = *vmcr_field;
595 mmio_data_write(mmio, ~0, reg);
596 } else {
597 reg = mmio_data_read(mmio, ~0);
598 if (reg != *vmcr_field) {
599 *vmcr_field = reg;
600 vgic_set_vmcr(vcpu, &vmcr);
601 updated = true;
602 }
603 }
604 return updated;
605}
606
607static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
608 struct kvm_exit_mmio *mmio, phys_addr_t offset)
609{
610 return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
611}
612
613static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
614 struct kvm_exit_mmio *mmio,
615 phys_addr_t offset)
616{
617 u32 reg;
618
619 if (mmio->is_write)
620 return false;
621
622 /* GICC_IIDR */
623 reg = (PRODUCT_ID_KVM << 20) |
624 (GICC_ARCH_VERSION_V2 << 16) |
625 (IMPLEMENTER_ARM << 0);
626 mmio_data_write(mmio, ~0, reg);
627 return false;
628}
629
630/*
631 * CPU Interface Register accesses - these are not accessed by the VM, but by
632 * user space for saving and restoring VGIC state.
633 */
634static const struct kvm_mmio_range vgic_cpu_ranges[] = {
635 {
636 .base = GIC_CPU_CTRL,
637 .len = 12,
638 .handle_mmio = handle_cpu_mmio_misc,
639 },
640 {
641 .base = GIC_CPU_ALIAS_BINPOINT,
642 .len = 4,
643 .handle_mmio = handle_mmio_abpr,
644 },
645 {
646 .base = GIC_CPU_ACTIVEPRIO,
647 .len = 16,
648 .handle_mmio = handle_mmio_raz_wi,
649 },
650 {
651 .base = GIC_CPU_IDENT,
652 .len = 4,
653 .handle_mmio = handle_cpu_mmio_ident,
654 },
655};
656
657static int vgic_attr_regs_access(struct kvm_device *dev,
658 struct kvm_device_attr *attr,
659 u32 *reg, bool is_write)
660{
661 const struct kvm_mmio_range *r = NULL, *ranges;
662 phys_addr_t offset;
663 int ret, cpuid, c;
664 struct kvm_vcpu *vcpu, *tmp_vcpu;
665 struct vgic_dist *vgic;
666 struct kvm_exit_mmio mmio;
667
668 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
669 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
670 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
671
672 mutex_lock(&dev->kvm->lock);
673
674 ret = vgic_init(dev->kvm);
675 if (ret)
676 goto out;
677
678 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
679 ret = -EINVAL;
680 goto out;
681 }
682
683 vcpu = kvm_get_vcpu(dev->kvm, cpuid);
684 vgic = &dev->kvm->arch.vgic;
685
686 mmio.len = 4;
687 mmio.is_write = is_write;
688 if (is_write)
689 mmio_data_write(&mmio, ~0, *reg);
690 switch (attr->group) {
691 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
692 mmio.phys_addr = vgic->vgic_dist_base + offset;
693 ranges = vgic_dist_ranges;
694 break;
695 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
696 mmio.phys_addr = vgic->vgic_cpu_base + offset;
697 ranges = vgic_cpu_ranges;
698 break;
699 default:
700 BUG();
701 }
702 r = vgic_find_range(ranges, &mmio, offset);
703
704 if (unlikely(!r || !r->handle_mmio)) {
705 ret = -ENXIO;
706 goto out;
707 }
708
709
710 spin_lock(&vgic->lock);
711
712 /*
713 * Ensure that no other VCPU is running by checking the vcpu->cpu
714 * field. If no other VPCUs are running we can safely access the VGIC
715 * state, because even if another VPU is run after this point, that
716 * VCPU will not touch the vgic state, because it will block on
717 * getting the vgic->lock in kvm_vgic_sync_hwstate().
718 */
719 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
720 if (unlikely(tmp_vcpu->cpu != -1)) {
721 ret = -EBUSY;
722 goto out_vgic_unlock;
723 }
724 }
725
726 /*
727 * Move all pending IRQs from the LRs on all VCPUs so the pending
728 * state can be properly represented in the register state accessible
729 * through this API.
730 */
731 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
732 vgic_unqueue_irqs(tmp_vcpu);
733
734 offset -= r->base;
735 r->handle_mmio(vcpu, &mmio, offset);
736
737 if (!is_write)
738 *reg = mmio_data_read(&mmio, ~0);
739
740 ret = 0;
741out_vgic_unlock:
742 spin_unlock(&vgic->lock);
743out:
744 mutex_unlock(&dev->kvm->lock);
745 return ret;
746}
747
748static int vgic_v2_create(struct kvm_device *dev, u32 type)
749{
750 return kvm_vgic_create(dev->kvm, type);
751}
752
753static void vgic_v2_destroy(struct kvm_device *dev)
754{
755 kfree(dev);
756}
757
758static int vgic_v2_set_attr(struct kvm_device *dev,
759 struct kvm_device_attr *attr)
760{
761 int ret;
762
763 ret = vgic_set_common_attr(dev, attr);
764 if (ret != -ENXIO)
765 return ret;
766
767 switch (attr->group) {
768 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
769 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
770 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
771 u32 reg;
772
773 if (get_user(reg, uaddr))
774 return -EFAULT;
775
776 return vgic_attr_regs_access(dev, attr, &reg, true);
777 }
778
779 }
780
781 return -ENXIO;
782}
783
784static int vgic_v2_get_attr(struct kvm_device *dev,
785 struct kvm_device_attr *attr)
786{
787 int ret;
788
789 ret = vgic_get_common_attr(dev, attr);
790 if (ret != -ENXIO)
791 return ret;
792
793 switch (attr->group) {
794 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
795 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
796 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
797 u32 reg = 0;
798
799 ret = vgic_attr_regs_access(dev, attr, &reg, false);
800 if (ret)
801 return ret;
802 return put_user(reg, uaddr);
803 }
804
805 }
806
807 return -ENXIO;
808}
809
810static int vgic_v2_has_attr(struct kvm_device *dev,
811 struct kvm_device_attr *attr)
812{
813 phys_addr_t offset;
814
815 switch (attr->group) {
816 case KVM_DEV_ARM_VGIC_GRP_ADDR:
817 switch (attr->attr) {
818 case KVM_VGIC_V2_ADDR_TYPE_DIST:
819 case KVM_VGIC_V2_ADDR_TYPE_CPU:
820 return 0;
821 }
822 break;
823 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
824 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
825 return vgic_has_attr_regs(vgic_dist_ranges, offset);
826 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
827 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
828 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
829 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
830 return 0;
831 case KVM_DEV_ARM_VGIC_GRP_CTRL:
832 switch (attr->attr) {
833 case KVM_DEV_ARM_VGIC_CTRL_INIT:
834 return 0;
835 }
836 }
837 return -ENXIO;
838}
839
840struct kvm_device_ops kvm_arm_vgic_v2_ops = {
841 .name = "kvm-arm-vgic-v2",
842 .create = vgic_v2_create,
843 .destroy = vgic_v2_destroy,
844 .set_attr = vgic_v2_set_attr,
845 .get_attr = vgic_v2_get_attr,
846 .has_attr = vgic_v2_has_attr,
847};
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 2935405ad22f..a0a7b5d1a070 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -229,12 +229,16 @@ int vgic_v2_probe(struct device_node *vgic_node,
229 goto out_unmap; 229 goto out_unmap;
230 } 230 }
231 231
232 vgic->can_emulate_gicv2 = true;
233 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
234
232 vgic->vcpu_base = vcpu_res.start; 235 vgic->vcpu_base = vcpu_res.start;
233 236
234 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 237 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
235 vctrl_res.start, vgic->maint_irq); 238 vctrl_res.start, vgic->maint_irq);
236 239
237 vgic->type = VGIC_V2; 240 vgic->type = VGIC_V2;
241 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
238 *ops = &vgic_v2_ops; 242 *ops = &vgic_v2_ops;
239 *params = vgic; 243 *params = vgic;
240 goto out; 244 goto out;
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
new file mode 100644
index 000000000000..b3f154631515
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -0,0 +1,1036 @@
1/*
2 * GICv3 distributor and redistributor emulation
3 *
4 * GICv3 emulation is currently only supported on a GICv3 host (because
5 * we rely on the hardware's CPU interface virtualization support), but
6 * supports both hardware with or without the optional GICv2 backwards
7 * compatibility features.
8 *
9 * Limitations of the emulation:
10 * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore)
11 * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI.
12 * - We do not support the message based interrupts (MBIs) triggered by
13 * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0.
14 * - We do not support the (optional) backwards compatibility feature.
15 * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports
16 * the compatiblity feature, you can use a GICv2 in the guest, though.
17 * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI.
18 * - Priorities are not emulated (same as the GICv2 emulation). Linux
19 * as a guest is fine with this, because it does not use priorities.
20 * - We only support Group1 interrupts. Again Linux uses only those.
21 *
22 * Copyright (C) 2014 ARM Ltd.
23 * Author: Andre Przywara <andre.przywara@arm.com>
24 *
25 * This program is free software; you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License version 2 as
27 * published by the Free Software Foundation.
28 *
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
33 *
34 * You should have received a copy of the GNU General Public License
35 * along with this program. If not, see <http://www.gnu.org/licenses/>.
36 */
37
38#include <linux/cpu.h>
39#include <linux/kvm.h>
40#include <linux/kvm_host.h>
41#include <linux/interrupt.h>
42
43#include <linux/irqchip/arm-gic-v3.h>
44#include <kvm/arm_vgic.h>
45
46#include <asm/kvm_emulate.h>
47#include <asm/kvm_arm.h>
48#include <asm/kvm_mmu.h>
49
50#include "vgic.h"
51
52static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu,
53 struct kvm_exit_mmio *mmio, phys_addr_t offset)
54{
55 u32 reg = 0xffffffff;
56
57 vgic_reg_access(mmio, &reg, offset,
58 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
59
60 return false;
61}
62
63static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu,
64 struct kvm_exit_mmio *mmio, phys_addr_t offset)
65{
66 u32 reg = 0;
67
68 /*
69 * Force ARE and DS to 1, the guest cannot change this.
70 * For the time being we only support Group1 interrupts.
71 */
72 if (vcpu->kvm->arch.vgic.enabled)
73 reg = GICD_CTLR_ENABLE_SS_G1;
74 reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
75
76 vgic_reg_access(mmio, &reg, offset,
77 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
78 if (mmio->is_write) {
79 if (reg & GICD_CTLR_ENABLE_SS_G0)
80 kvm_info("guest tried to enable unsupported Group0 interrupts\n");
81 vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1);
82 vgic_update_state(vcpu->kvm);
83 return true;
84 }
85 return false;
86}
87
88/*
89 * As this implementation does not provide compatibility
90 * with GICv2 (ARE==1), we report zero CPUs in bits [5..7].
91 * Also LPIs and MBIs are not supported, so we set the respective bits to 0.
92 * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs).
93 */
94#define INTERRUPT_ID_BITS 10
95static bool handle_mmio_typer(struct kvm_vcpu *vcpu,
96 struct kvm_exit_mmio *mmio, phys_addr_t offset)
97{
98 u32 reg;
99
100 reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1;
101
102 reg |= (INTERRUPT_ID_BITS - 1) << 19;
103
104 vgic_reg_access(mmio, &reg, offset,
105 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
106
107 return false;
108}
109
110static bool handle_mmio_iidr(struct kvm_vcpu *vcpu,
111 struct kvm_exit_mmio *mmio, phys_addr_t offset)
112{
113 u32 reg;
114
115 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
116 vgic_reg_access(mmio, &reg, offset,
117 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
118
119 return false;
120}
121
122static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu,
123 struct kvm_exit_mmio *mmio,
124 phys_addr_t offset)
125{
126 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
127 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
128 vcpu->vcpu_id,
129 ACCESS_WRITE_SETBIT);
130
131 vgic_reg_access(mmio, NULL, offset,
132 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
133 return false;
134}
135
136static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu,
137 struct kvm_exit_mmio *mmio,
138 phys_addr_t offset)
139{
140 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
141 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
142 vcpu->vcpu_id,
143 ACCESS_WRITE_CLEARBIT);
144
145 vgic_reg_access(mmio, NULL, offset,
146 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
147 return false;
148}
149
150static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu,
151 struct kvm_exit_mmio *mmio,
152 phys_addr_t offset)
153{
154 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
155 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
156 vcpu->vcpu_id);
157
158 vgic_reg_access(mmio, NULL, offset,
159 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
160 return false;
161}
162
163static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu,
164 struct kvm_exit_mmio *mmio,
165 phys_addr_t offset)
166{
167 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
168 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
169 vcpu->vcpu_id);
170
171 vgic_reg_access(mmio, NULL, offset,
172 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
173 return false;
174}
175
176static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu,
177 struct kvm_exit_mmio *mmio,
178 phys_addr_t offset)
179{
180 u32 *reg;
181
182 if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) {
183 vgic_reg_access(mmio, NULL, offset,
184 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
185 return false;
186 }
187
188 reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
189 vcpu->vcpu_id, offset);
190 vgic_reg_access(mmio, reg, offset,
191 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
192 return false;
193}
194
195static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu,
196 struct kvm_exit_mmio *mmio,
197 phys_addr_t offset)
198{
199 u32 *reg;
200
201 if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) {
202 vgic_reg_access(mmio, NULL, offset,
203 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
204 return false;
205 }
206
207 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
208 vcpu->vcpu_id, offset >> 1);
209
210 return vgic_handle_cfg_reg(reg, mmio, offset);
211}
212
213/*
214 * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word)
215 * when we store the target MPIDR written by the guest.
216 */
217static u32 compress_mpidr(unsigned long mpidr)
218{
219 u32 ret;
220
221 ret = MPIDR_AFFINITY_LEVEL(mpidr, 0);
222 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8;
223 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16;
224 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24;
225
226 return ret;
227}
228
229static unsigned long uncompress_mpidr(u32 value)
230{
231 unsigned long mpidr;
232
233 mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0);
234 mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1);
235 mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2);
236 mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3);
237
238 return mpidr;
239}
240
241/*
242 * Lookup the given MPIDR value to get the vcpu_id (if there is one)
243 * and store that in the irq_spi_cpu[] array.
244 * This limits the number of VCPUs to 255 for now, extending the data
245 * type (or storing kvm_vcpu pointers) should lift the limit.
246 * Store the original MPIDR value in an extra array to support read-as-written.
247 * Unallocated MPIDRs are translated to a special value and caught
248 * before any array accesses.
249 */
250static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu,
251 struct kvm_exit_mmio *mmio,
252 phys_addr_t offset)
253{
254 struct kvm *kvm = vcpu->kvm;
255 struct vgic_dist *dist = &kvm->arch.vgic;
256 int spi;
257 u32 reg;
258 int vcpu_id;
259 unsigned long *bmap, mpidr;
260
261 /*
262 * The upper 32 bits of each 64 bit register are zero,
263 * as we don't support Aff3.
264 */
265 if ((offset & 4)) {
266 vgic_reg_access(mmio, NULL, offset,
267 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
268 return false;
269 }
270
271 /* This region only covers SPIs, so no handling of private IRQs here. */
272 spi = offset / 8;
273
274 /* get the stored MPIDR for this IRQ */
275 mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]);
276 reg = mpidr;
277
278 vgic_reg_access(mmio, &reg, offset,
279 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
280
281 if (!mmio->is_write)
282 return false;
283
284 /*
285 * Now clear the currently assigned vCPU from the map, making room
286 * for the new one to be written below
287 */
288 vcpu = kvm_mpidr_to_vcpu(kvm, mpidr);
289 if (likely(vcpu)) {
290 vcpu_id = vcpu->vcpu_id;
291 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
292 __clear_bit(spi, bmap);
293 }
294
295 dist->irq_spi_mpidr[spi] = compress_mpidr(reg);
296 vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK);
297
298 /*
299 * The spec says that non-existent MPIDR values should not be
300 * forwarded to any existent (v)CPU, but should be able to become
301 * pending anyway. We simply keep the irq_spi_target[] array empty, so
302 * the interrupt will never be injected.
303 * irq_spi_cpu[irq] gets a magic value in this case.
304 */
305 if (likely(vcpu)) {
306 vcpu_id = vcpu->vcpu_id;
307 dist->irq_spi_cpu[spi] = vcpu_id;
308 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
309 __set_bit(spi, bmap);
310 } else {
311 dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED;
312 }
313
314 vgic_update_state(kvm);
315
316 return true;
317}
318
319/*
320 * We should be careful about promising too much when a guest reads
321 * this register. Don't claim to be like any hardware implementation,
322 * but just report the GIC as version 3 - which is what a Linux guest
323 * would check.
324 */
325static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
326 struct kvm_exit_mmio *mmio,
327 phys_addr_t offset)
328{
329 u32 reg = 0;
330
331 switch (offset + GICD_IDREGS) {
332 case GICD_PIDR2:
333 reg = 0x3b;
334 break;
335 }
336
337 vgic_reg_access(mmio, &reg, offset,
338 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
339
340 return false;
341}
342
343static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
344 {
345 .base = GICD_CTLR,
346 .len = 0x04,
347 .bits_per_irq = 0,
348 .handle_mmio = handle_mmio_ctlr,
349 },
350 {
351 .base = GICD_TYPER,
352 .len = 0x04,
353 .bits_per_irq = 0,
354 .handle_mmio = handle_mmio_typer,
355 },
356 {
357 .base = GICD_IIDR,
358 .len = 0x04,
359 .bits_per_irq = 0,
360 .handle_mmio = handle_mmio_iidr,
361 },
362 {
363 /* this register is optional, it is RAZ/WI if not implemented */
364 .base = GICD_STATUSR,
365 .len = 0x04,
366 .bits_per_irq = 0,
367 .handle_mmio = handle_mmio_raz_wi,
368 },
369 {
370 /* this write only register is WI when TYPER.MBIS=0 */
371 .base = GICD_SETSPI_NSR,
372 .len = 0x04,
373 .bits_per_irq = 0,
374 .handle_mmio = handle_mmio_raz_wi,
375 },
376 {
377 /* this write only register is WI when TYPER.MBIS=0 */
378 .base = GICD_CLRSPI_NSR,
379 .len = 0x04,
380 .bits_per_irq = 0,
381 .handle_mmio = handle_mmio_raz_wi,
382 },
383 {
384 /* this is RAZ/WI when DS=1 */
385 .base = GICD_SETSPI_SR,
386 .len = 0x04,
387 .bits_per_irq = 0,
388 .handle_mmio = handle_mmio_raz_wi,
389 },
390 {
391 /* this is RAZ/WI when DS=1 */
392 .base = GICD_CLRSPI_SR,
393 .len = 0x04,
394 .bits_per_irq = 0,
395 .handle_mmio = handle_mmio_raz_wi,
396 },
397 {
398 .base = GICD_IGROUPR,
399 .len = 0x80,
400 .bits_per_irq = 1,
401 .handle_mmio = handle_mmio_rao_wi,
402 },
403 {
404 .base = GICD_ISENABLER,
405 .len = 0x80,
406 .bits_per_irq = 1,
407 .handle_mmio = handle_mmio_set_enable_reg_dist,
408 },
409 {
410 .base = GICD_ICENABLER,
411 .len = 0x80,
412 .bits_per_irq = 1,
413 .handle_mmio = handle_mmio_clear_enable_reg_dist,
414 },
415 {
416 .base = GICD_ISPENDR,
417 .len = 0x80,
418 .bits_per_irq = 1,
419 .handle_mmio = handle_mmio_set_pending_reg_dist,
420 },
421 {
422 .base = GICD_ICPENDR,
423 .len = 0x80,
424 .bits_per_irq = 1,
425 .handle_mmio = handle_mmio_clear_pending_reg_dist,
426 },
427 {
428 .base = GICD_ISACTIVER,
429 .len = 0x80,
430 .bits_per_irq = 1,
431 .handle_mmio = handle_mmio_raz_wi,
432 },
433 {
434 .base = GICD_ICACTIVER,
435 .len = 0x80,
436 .bits_per_irq = 1,
437 .handle_mmio = handle_mmio_raz_wi,
438 },
439 {
440 .base = GICD_IPRIORITYR,
441 .len = 0x400,
442 .bits_per_irq = 8,
443 .handle_mmio = handle_mmio_priority_reg_dist,
444 },
445 {
446 /* TARGETSRn is RES0 when ARE=1 */
447 .base = GICD_ITARGETSR,
448 .len = 0x400,
449 .bits_per_irq = 8,
450 .handle_mmio = handle_mmio_raz_wi,
451 },
452 {
453 .base = GICD_ICFGR,
454 .len = 0x100,
455 .bits_per_irq = 2,
456 .handle_mmio = handle_mmio_cfg_reg_dist,
457 },
458 {
459 /* this is RAZ/WI when DS=1 */
460 .base = GICD_IGRPMODR,
461 .len = 0x80,
462 .bits_per_irq = 1,
463 .handle_mmio = handle_mmio_raz_wi,
464 },
465 {
466 /* this is RAZ/WI when DS=1 */
467 .base = GICD_NSACR,
468 .len = 0x100,
469 .bits_per_irq = 2,
470 .handle_mmio = handle_mmio_raz_wi,
471 },
472 {
473 /* this is RAZ/WI when ARE=1 */
474 .base = GICD_SGIR,
475 .len = 0x04,
476 .handle_mmio = handle_mmio_raz_wi,
477 },
478 {
479 /* this is RAZ/WI when ARE=1 */
480 .base = GICD_CPENDSGIR,
481 .len = 0x10,
482 .handle_mmio = handle_mmio_raz_wi,
483 },
484 {
485 /* this is RAZ/WI when ARE=1 */
486 .base = GICD_SPENDSGIR,
487 .len = 0x10,
488 .handle_mmio = handle_mmio_raz_wi,
489 },
490 {
491 .base = GICD_IROUTER + 0x100,
492 .len = 0x1ee0,
493 .bits_per_irq = 64,
494 .handle_mmio = handle_mmio_route_reg,
495 },
496 {
497 .base = GICD_IDREGS,
498 .len = 0x30,
499 .bits_per_irq = 0,
500 .handle_mmio = handle_mmio_idregs,
501 },
502 {},
503};
504
505static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
506 struct kvm_exit_mmio *mmio,
507 phys_addr_t offset)
508{
509 struct kvm_vcpu *redist_vcpu = mmio->private;
510
511 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
512 redist_vcpu->vcpu_id,
513 ACCESS_WRITE_SETBIT);
514}
515
516static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu,
517 struct kvm_exit_mmio *mmio,
518 phys_addr_t offset)
519{
520 struct kvm_vcpu *redist_vcpu = mmio->private;
521
522 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
523 redist_vcpu->vcpu_id,
524 ACCESS_WRITE_CLEARBIT);
525}
526
527static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu,
528 struct kvm_exit_mmio *mmio,
529 phys_addr_t offset)
530{
531 struct kvm_vcpu *redist_vcpu = mmio->private;
532
533 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
534 redist_vcpu->vcpu_id);
535}
536
537static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu,
538 struct kvm_exit_mmio *mmio,
539 phys_addr_t offset)
540{
541 struct kvm_vcpu *redist_vcpu = mmio->private;
542
543 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
544 redist_vcpu->vcpu_id);
545}
546
547static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu,
548 struct kvm_exit_mmio *mmio,
549 phys_addr_t offset)
550{
551 struct kvm_vcpu *redist_vcpu = mmio->private;
552 u32 *reg;
553
554 reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
555 redist_vcpu->vcpu_id, offset);
556 vgic_reg_access(mmio, reg, offset,
557 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
558 return false;
559}
560
561static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
562 struct kvm_exit_mmio *mmio,
563 phys_addr_t offset)
564{
565 struct kvm_vcpu *redist_vcpu = mmio->private;
566
567 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
568 redist_vcpu->vcpu_id, offset >> 1);
569
570 return vgic_handle_cfg_reg(reg, mmio, offset);
571}
572
573static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
574 {
575 .base = GICR_IGROUPR0,
576 .len = 0x04,
577 .bits_per_irq = 1,
578 .handle_mmio = handle_mmio_rao_wi,
579 },
580 {
581 .base = GICR_ISENABLER0,
582 .len = 0x04,
583 .bits_per_irq = 1,
584 .handle_mmio = handle_mmio_set_enable_reg_redist,
585 },
586 {
587 .base = GICR_ICENABLER0,
588 .len = 0x04,
589 .bits_per_irq = 1,
590 .handle_mmio = handle_mmio_clear_enable_reg_redist,
591 },
592 {
593 .base = GICR_ISPENDR0,
594 .len = 0x04,
595 .bits_per_irq = 1,
596 .handle_mmio = handle_mmio_set_pending_reg_redist,
597 },
598 {
599 .base = GICR_ICPENDR0,
600 .len = 0x04,
601 .bits_per_irq = 1,
602 .handle_mmio = handle_mmio_clear_pending_reg_redist,
603 },
604 {
605 .base = GICR_ISACTIVER0,
606 .len = 0x04,
607 .bits_per_irq = 1,
608 .handle_mmio = handle_mmio_raz_wi,
609 },
610 {
611 .base = GICR_ICACTIVER0,
612 .len = 0x04,
613 .bits_per_irq = 1,
614 .handle_mmio = handle_mmio_raz_wi,
615 },
616 {
617 .base = GICR_IPRIORITYR0,
618 .len = 0x20,
619 .bits_per_irq = 8,
620 .handle_mmio = handle_mmio_priority_reg_redist,
621 },
622 {
623 .base = GICR_ICFGR0,
624 .len = 0x08,
625 .bits_per_irq = 2,
626 .handle_mmio = handle_mmio_cfg_reg_redist,
627 },
628 {
629 .base = GICR_IGRPMODR0,
630 .len = 0x04,
631 .bits_per_irq = 1,
632 .handle_mmio = handle_mmio_raz_wi,
633 },
634 {
635 .base = GICR_NSACR,
636 .len = 0x04,
637 .handle_mmio = handle_mmio_raz_wi,
638 },
639 {},
640};
641
642static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
643 struct kvm_exit_mmio *mmio,
644 phys_addr_t offset)
645{
646 /* since we don't support LPIs, this register is zero for now */
647 vgic_reg_access(mmio, NULL, offset,
648 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
649 return false;
650}
651
652static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
653 struct kvm_exit_mmio *mmio,
654 phys_addr_t offset)
655{
656 u32 reg;
657 u64 mpidr;
658 struct kvm_vcpu *redist_vcpu = mmio->private;
659 int target_vcpu_id = redist_vcpu->vcpu_id;
660
661 /* the upper 32 bits contain the affinity value */
662 if ((offset & ~3) == 4) {
663 mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
664 reg = compress_mpidr(mpidr);
665
666 vgic_reg_access(mmio, &reg, offset,
667 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
668 return false;
669 }
670
671 reg = redist_vcpu->vcpu_id << 8;
672 if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
673 reg |= GICR_TYPER_LAST;
674 vgic_reg_access(mmio, &reg, offset,
675 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
676 return false;
677}
678
679static const struct kvm_mmio_range vgic_redist_ranges[] = {
680 {
681 .base = GICR_CTLR,
682 .len = 0x04,
683 .bits_per_irq = 0,
684 .handle_mmio = handle_mmio_ctlr_redist,
685 },
686 {
687 .base = GICR_TYPER,
688 .len = 0x08,
689 .bits_per_irq = 0,
690 .handle_mmio = handle_mmio_typer_redist,
691 },
692 {
693 .base = GICR_IIDR,
694 .len = 0x04,
695 .bits_per_irq = 0,
696 .handle_mmio = handle_mmio_iidr,
697 },
698 {
699 .base = GICR_WAKER,
700 .len = 0x04,
701 .bits_per_irq = 0,
702 .handle_mmio = handle_mmio_raz_wi,
703 },
704 {
705 .base = GICR_IDREGS,
706 .len = 0x30,
707 .bits_per_irq = 0,
708 .handle_mmio = handle_mmio_idregs,
709 },
710 {},
711};
712
713/*
714 * This function splits accesses between the distributor and the two
715 * redistributor parts (private/SPI). As each redistributor is accessible
716 * from any CPU, we have to determine the affected VCPU by taking the faulting
717 * address into account. We then pass this VCPU to the handler function via
718 * the private parameter.
719 */
720#define SGI_BASE_OFFSET SZ_64K
721static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
722 struct kvm_exit_mmio *mmio)
723{
724 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
725 unsigned long dbase = dist->vgic_dist_base;
726 unsigned long rdbase = dist->vgic_redist_base;
727 int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
728 int vcpu_id;
729 const struct kvm_mmio_range *mmio_range;
730
731 if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
732 return vgic_handle_mmio_range(vcpu, run, mmio,
733 vgic_v3_dist_ranges, dbase);
734 }
735
736 if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
737 GIC_V3_REDIST_SIZE * nrcpus))
738 return false;
739
740 vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
741 rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
742 mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
743
744 if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
745 rdbase += SGI_BASE_OFFSET;
746 mmio_range = vgic_redist_sgi_ranges;
747 } else {
748 mmio_range = vgic_redist_ranges;
749 }
750 return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
751}
752
753static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
754{
755 if (vgic_queue_irq(vcpu, 0, irq)) {
756 vgic_dist_irq_clear_pending(vcpu, irq);
757 vgic_cpu_irq_clear(vcpu, irq);
758 return true;
759 }
760
761 return false;
762}
763
764static int vgic_v3_map_resources(struct kvm *kvm,
765 const struct vgic_params *params)
766{
767 int ret = 0;
768 struct vgic_dist *dist = &kvm->arch.vgic;
769
770 if (!irqchip_in_kernel(kvm))
771 return 0;
772
773 mutex_lock(&kvm->lock);
774
775 if (vgic_ready(kvm))
776 goto out;
777
778 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
779 IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
780 kvm_err("Need to set vgic distributor addresses first\n");
781 ret = -ENXIO;
782 goto out;
783 }
784
785 /*
786 * For a VGICv3 we require the userland to explicitly initialize
787 * the VGIC before we need to use it.
788 */
789 if (!vgic_initialized(kvm)) {
790 ret = -EBUSY;
791 goto out;
792 }
793
794 kvm->arch.vgic.ready = true;
795out:
796 if (ret)
797 kvm_vgic_destroy(kvm);
798 mutex_unlock(&kvm->lock);
799 return ret;
800}
801
802static int vgic_v3_init_model(struct kvm *kvm)
803{
804 int i;
805 u32 mpidr;
806 struct vgic_dist *dist = &kvm->arch.vgic;
807 int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
808
809 dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]),
810 GFP_KERNEL);
811
812 if (!dist->irq_spi_mpidr)
813 return -ENOMEM;
814
815 /* Initialize the target VCPUs for each IRQ to VCPU 0 */
816 mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0)));
817 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) {
818 dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0;
819 dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr;
820 vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1);
821 }
822
823 return 0;
824}
825
826/* GICv3 does not keep track of SGI sources anymore. */
827static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
828{
829}
830
831void vgic_v3_init_emulation(struct kvm *kvm)
832{
833 struct vgic_dist *dist = &kvm->arch.vgic;
834
835 dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
836 dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
837 dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
838 dist->vm_ops.init_model = vgic_v3_init_model;
839 dist->vm_ops.map_resources = vgic_v3_map_resources;
840
841 kvm->arch.max_vcpus = KVM_MAX_VCPUS;
842}
843
844/*
845 * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
846 * generation register ICC_SGI1R_EL1) with a given VCPU.
847 * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
848 * return -1.
849 */
850static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
851{
852 unsigned long affinity;
853 int level0;
854
855 /*
856 * Split the current VCPU's MPIDR into affinity level 0 and the
857 * rest as this is what we have to compare against.
858 */
859 affinity = kvm_vcpu_get_mpidr_aff(vcpu);
860 level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
861 affinity &= ~MPIDR_LEVEL_MASK;
862
863 /* bail out if the upper three levels don't match */
864 if (sgi_aff != affinity)
865 return -1;
866
867 /* Is this VCPU's bit set in the mask ? */
868 if (!(sgi_cpu_mask & BIT(level0)))
869 return -1;
870
871 return level0;
872}
873
874#define SGI_AFFINITY_LEVEL(reg, level) \
875 ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
876 >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
877
878/**
879 * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
880 * @vcpu: The VCPU requesting a SGI
881 * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
882 *
883 * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
884 * This will trap in sys_regs.c and call this function.
885 * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
886 * target processors as well as a bitmask of 16 Aff0 CPUs.
887 * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
888 * check for matching ones. If this bit is set, we signal all, but not the
889 * calling VCPU.
890 */
891void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
892{
893 struct kvm *kvm = vcpu->kvm;
894 struct kvm_vcpu *c_vcpu;
895 struct vgic_dist *dist = &kvm->arch.vgic;
896 u16 target_cpus;
897 u64 mpidr;
898 int sgi, c;
899 int vcpu_id = vcpu->vcpu_id;
900 bool broadcast;
901 int updated = 0;
902
903 sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
904 broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
905 target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
906 mpidr = SGI_AFFINITY_LEVEL(reg, 3);
907 mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
908 mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
909
910 /*
911 * We take the dist lock here, because we come from the sysregs
912 * code path and not from the MMIO one (which already takes the lock).
913 */
914 spin_lock(&dist->lock);
915
916 /*
917 * We iterate over all VCPUs to find the MPIDRs matching the request.
918 * If we have handled one CPU, we clear it's bit to detect early
919 * if we are already finished. This avoids iterating through all
920 * VCPUs when most of the times we just signal a single VCPU.
921 */
922 kvm_for_each_vcpu(c, c_vcpu, kvm) {
923
924 /* Exit early if we have dealt with all requested CPUs */
925 if (!broadcast && target_cpus == 0)
926 break;
927
928 /* Don't signal the calling VCPU */
929 if (broadcast && c == vcpu_id)
930 continue;
931
932 if (!broadcast) {
933 int level0;
934
935 level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
936 if (level0 == -1)
937 continue;
938
939 /* remove this matching VCPU from the mask */
940 target_cpus &= ~BIT(level0);
941 }
942
943 /* Flag the SGI as pending */
944 vgic_dist_irq_set_pending(c_vcpu, sgi);
945 updated = 1;
946 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
947 }
948 if (updated)
949 vgic_update_state(vcpu->kvm);
950 spin_unlock(&dist->lock);
951 if (updated)
952 vgic_kick_vcpus(vcpu->kvm);
953}
954
955static int vgic_v3_create(struct kvm_device *dev, u32 type)
956{
957 return kvm_vgic_create(dev->kvm, type);
958}
959
960static void vgic_v3_destroy(struct kvm_device *dev)
961{
962 kfree(dev);
963}
964
965static int vgic_v3_set_attr(struct kvm_device *dev,
966 struct kvm_device_attr *attr)
967{
968 int ret;
969
970 ret = vgic_set_common_attr(dev, attr);
971 if (ret != -ENXIO)
972 return ret;
973
974 switch (attr->group) {
975 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
976 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
977 return -ENXIO;
978 }
979
980 return -ENXIO;
981}
982
983static int vgic_v3_get_attr(struct kvm_device *dev,
984 struct kvm_device_attr *attr)
985{
986 int ret;
987
988 ret = vgic_get_common_attr(dev, attr);
989 if (ret != -ENXIO)
990 return ret;
991
992 switch (attr->group) {
993 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
994 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
995 return -ENXIO;
996 }
997
998 return -ENXIO;
999}
1000
1001static int vgic_v3_has_attr(struct kvm_device *dev,
1002 struct kvm_device_attr *attr)
1003{
1004 switch (attr->group) {
1005 case KVM_DEV_ARM_VGIC_GRP_ADDR:
1006 switch (attr->attr) {
1007 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1008 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1009 return -ENXIO;
1010 case KVM_VGIC_V3_ADDR_TYPE_DIST:
1011 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
1012 return 0;
1013 }
1014 break;
1015 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1016 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1017 return -ENXIO;
1018 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
1019 return 0;
1020 case KVM_DEV_ARM_VGIC_GRP_CTRL:
1021 switch (attr->attr) {
1022 case KVM_DEV_ARM_VGIC_CTRL_INIT:
1023 return 0;
1024 }
1025 }
1026 return -ENXIO;
1027}
1028
1029struct kvm_device_ops kvm_arm_vgic_v3_ops = {
1030 .name = "kvm-arm-vgic-v3",
1031 .create = vgic_v3_create,
1032 .destroy = vgic_v3_destroy,
1033 .set_attr = vgic_v3_set_attr,
1034 .get_attr = vgic_v3_get_attr,
1035 .has_attr = vgic_v3_has_attr,
1036};
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 1c2c8eef0599..3a62d8a9a2c6 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -34,6 +34,7 @@
34#define GICH_LR_VIRTUALID (0x3ffUL << 0) 34#define GICH_LR_VIRTUALID (0x3ffUL << 0)
35#define GICH_LR_PHYSID_CPUID_SHIFT (10) 35#define GICH_LR_PHYSID_CPUID_SHIFT (10)
36#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) 36#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
37#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
37 38
38/* 39/*
39 * LRs are stored in reverse order in memory. make sure we index them 40 * LRs are stored in reverse order in memory. make sure we index them
@@ -48,12 +49,17 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
48 struct vgic_lr lr_desc; 49 struct vgic_lr lr_desc;
49 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; 50 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
50 51
51 lr_desc.irq = val & GICH_LR_VIRTUALID; 52 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
52 if (lr_desc.irq <= 15) 53 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
53 lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
54 else 54 else
55 lr_desc.source = 0; 55 lr_desc.irq = val & GICH_LR_VIRTUALID;
56 lr_desc.state = 0; 56
57 lr_desc.source = 0;
58 if (lr_desc.irq <= 15 &&
59 vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
60 lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
61
62 lr_desc.state = 0;
57 63
58 if (val & ICH_LR_PENDING_BIT) 64 if (val & ICH_LR_PENDING_BIT)
59 lr_desc.state |= LR_STATE_PENDING; 65 lr_desc.state |= LR_STATE_PENDING;
@@ -68,8 +74,20 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
68static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, 74static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
69 struct vgic_lr lr_desc) 75 struct vgic_lr lr_desc)
70{ 76{
71 u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | 77 u64 lr_val;
72 lr_desc.irq); 78
79 lr_val = lr_desc.irq;
80
81 /*
82 * Currently all guest IRQs are Group1, as Group0 would result
83 * in a FIQ in the guest, which it wouldn't expect.
84 * Eventually we want to make this configurable, so we may revisit
85 * this in the future.
86 */
87 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
88 lr_val |= ICH_LR_GROUP;
89 else
90 lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
73 91
74 if (lr_desc.state & LR_STATE_PENDING) 92 if (lr_desc.state & LR_STATE_PENDING)
75 lr_val |= ICH_LR_PENDING_BIT; 93 lr_val |= ICH_LR_PENDING_BIT;
@@ -145,15 +163,27 @@ static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
145 163
146static void vgic_v3_enable(struct kvm_vcpu *vcpu) 164static void vgic_v3_enable(struct kvm_vcpu *vcpu)
147{ 165{
166 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
167
148 /* 168 /*
149 * By forcing VMCR to zero, the GIC will restore the binary 169 * By forcing VMCR to zero, the GIC will restore the binary
150 * points to their reset values. Anything else resets to zero 170 * points to their reset values. Anything else resets to zero
151 * anyway. 171 * anyway.
152 */ 172 */
153 vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0; 173 vgic_v3->vgic_vmcr = 0;
174
175 /*
176 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
177 * way, so we force SRE to 1 to demonstrate this to the guest.
178 * This goes with the spec allowing the value to be RAO/WI.
179 */
180 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
181 vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
182 else
183 vgic_v3->vgic_sre = 0;
154 184
155 /* Get the show on the road... */ 185 /* Get the show on the road... */
156 vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN; 186 vgic_v3->vgic_hcr = ICH_HCR_EN;
157} 187}
158 188
159static const struct vgic_ops vgic_v3_ops = { 189static const struct vgic_ops vgic_v3_ops = {
@@ -205,35 +235,37 @@ int vgic_v3_probe(struct device_node *vgic_node,
205 * maximum of 16 list registers. Just ignore bit 4... 235 * maximum of 16 list registers. Just ignore bit 4...
206 */ 236 */
207 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; 237 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
238 vgic->can_emulate_gicv2 = false;
208 239
209 if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) 240 if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
210 gicv_idx = 1; 241 gicv_idx = 1;
211 242
212 gicv_idx += 3; /* Also skip GICD, GICC, GICH */ 243 gicv_idx += 3; /* Also skip GICD, GICC, GICH */
213 if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { 244 if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
214 kvm_err("Cannot obtain GICV region\n"); 245 kvm_info("GICv3: no GICV resource entry\n");
215 ret = -ENXIO; 246 vgic->vcpu_base = 0;
216 goto out; 247 } else if (!PAGE_ALIGNED(vcpu_res.start)) {
217 } 248 pr_warn("GICV physical address 0x%llx not page aligned\n",
218
219 if (!PAGE_ALIGNED(vcpu_res.start)) {
220 kvm_err("GICV physical address 0x%llx not page aligned\n",
221 (unsigned long long)vcpu_res.start); 249 (unsigned long long)vcpu_res.start);
222 ret = -ENXIO; 250 vgic->vcpu_base = 0;
223 goto out; 251 } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
224 } 252 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
225
226 if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
227 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
228 (unsigned long long)resource_size(&vcpu_res), 253 (unsigned long long)resource_size(&vcpu_res),
229 PAGE_SIZE); 254 PAGE_SIZE);
230 ret = -ENXIO; 255 vgic->vcpu_base = 0;
231 goto out; 256 } else {
257 vgic->vcpu_base = vcpu_res.start;
258 vgic->can_emulate_gicv2 = true;
259 kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
260 KVM_DEV_TYPE_ARM_VGIC_V2);
232 } 261 }
262 if (vgic->vcpu_base == 0)
263 kvm_info("disabling GICv2 emulation\n");
264 kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
233 265
234 vgic->vcpu_base = vcpu_res.start;
235 vgic->vctrl_base = NULL; 266 vgic->vctrl_base = NULL;
236 vgic->type = VGIC_V3; 267 vgic->type = VGIC_V3;
268 vgic->max_gic_vcpus = KVM_MAX_VCPUS;
237 269
238 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 270 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
239 vcpu_res.start, vgic->maint_irq); 271 vcpu_res.start, vgic->maint_irq);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 03affc7bf453..0cc6ab6005a0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -75,37 +75,31 @@
75 * inactive as long as the external input line is held high. 75 * inactive as long as the external input line is held high.
76 */ 76 */
77 77
78#define VGIC_ADDR_UNDEF (-1) 78#include "vgic.h"
79#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) 79
80
81#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
82#define IMPLEMENTER_ARM 0x43b
83#define GICC_ARCH_VERSION_V2 0x2
84
85#define ACCESS_READ_VALUE (1 << 0)
86#define ACCESS_READ_RAZ (0 << 0)
87#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
88#define ACCESS_WRITE_IGNORED (0 << 1)
89#define ACCESS_WRITE_SETBIT (1 << 1)
90#define ACCESS_WRITE_CLEARBIT (2 << 1)
91#define ACCESS_WRITE_VALUE (3 << 1)
92#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
93
94static int vgic_init(struct kvm *kvm);
95static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); 80static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
96static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 81static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
97static void vgic_update_state(struct kvm *kvm);
98static void vgic_kick_vcpus(struct kvm *kvm);
99static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
100static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
101static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 82static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
102static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 83static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
103static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
104static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
105 84
106static const struct vgic_ops *vgic_ops; 85static const struct vgic_ops *vgic_ops;
107static const struct vgic_params *vgic; 86static const struct vgic_params *vgic;
108 87
88static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
89{
90 vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
91}
92
93static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
94{
95 return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
96}
97
98int kvm_vgic_map_resources(struct kvm *kvm)
99{
100 return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
101}
102
109/* 103/*
110 * struct vgic_bitmap contains a bitmap made of unsigned longs, but 104 * struct vgic_bitmap contains a bitmap made of unsigned longs, but
111 * extracts u32s out of them. 105 * extracts u32s out of them.
@@ -160,8 +154,7 @@ static unsigned long *u64_to_bitmask(u64 *val)
160 return (unsigned long *)val; 154 return (unsigned long *)val;
161} 155}
162 156
163static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 157u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
164 int cpuid, u32 offset)
165{ 158{
166 offset >>= 2; 159 offset >>= 2;
167 if (!offset) 160 if (!offset)
@@ -179,8 +172,8 @@ static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
179 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); 172 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
180} 173}
181 174
182static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, 175void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
183 int irq, int val) 176 int irq, int val)
184{ 177{
185 unsigned long *reg; 178 unsigned long *reg;
186 179
@@ -202,7 +195,7 @@ static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
202 return x->private + cpuid; 195 return x->private + cpuid;
203} 196}
204 197
205static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) 198unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
206{ 199{
207 return x->shared; 200 return x->shared;
208} 201}
@@ -229,7 +222,7 @@ static void vgic_free_bytemap(struct vgic_bytemap *b)
229 b->shared = NULL; 222 b->shared = NULL;
230} 223}
231 224
232static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) 225u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
233{ 226{
234 u32 *reg; 227 u32 *reg;
235 228
@@ -326,14 +319,14 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
326 return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); 319 return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
327} 320}
328 321
329static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) 322void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
330{ 323{
331 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 324 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
332 325
333 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); 326 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
334} 327}
335 328
336static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) 329void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
337{ 330{
338 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 331 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
339 332
@@ -349,7 +342,7 @@ static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
349 vcpu->arch.vgic_cpu.pending_shared); 342 vcpu->arch.vgic_cpu.pending_shared);
350} 343}
351 344
352static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) 345void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
353{ 346{
354 if (irq < VGIC_NR_PRIVATE_IRQS) 347 if (irq < VGIC_NR_PRIVATE_IRQS)
355 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); 348 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
@@ -363,16 +356,6 @@ static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
363 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); 356 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
364} 357}
365 358
366static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
367{
368 return le32_to_cpu(*((u32 *)mmio->data)) & mask;
369}
370
371static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
372{
373 *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
374}
375
376/** 359/**
377 * vgic_reg_access - access vgic register 360 * vgic_reg_access - access vgic register
378 * @mmio: pointer to the data describing the mmio access 361 * @mmio: pointer to the data describing the mmio access
@@ -384,8 +367,8 @@ static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
384 * modes defined for vgic register access 367 * modes defined for vgic register access
385 * (read,raz,write-ignored,setbit,clearbit,write) 368 * (read,raz,write-ignored,setbit,clearbit,write)
386 */ 369 */
387static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, 370void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
388 phys_addr_t offset, int mode) 371 phys_addr_t offset, int mode)
389{ 372{
390 int word_offset = (offset & 3) * 8; 373 int word_offset = (offset & 3) * 8;
391 u32 mask = (1UL << (mmio->len * 8)) - 1; 374 u32 mask = (1UL << (mmio->len * 8)) - 1;
@@ -434,107 +417,58 @@ static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
434 } 417 }
435} 418}
436 419
437static bool handle_mmio_misc(struct kvm_vcpu *vcpu, 420bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
438 struct kvm_exit_mmio *mmio, phys_addr_t offset) 421 phys_addr_t offset)
439{
440 u32 reg;
441 u32 word_offset = offset & 3;
442
443 switch (offset & ~3) {
444 case 0: /* GICD_CTLR */
445 reg = vcpu->kvm->arch.vgic.enabled;
446 vgic_reg_access(mmio, &reg, word_offset,
447 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
448 if (mmio->is_write) {
449 vcpu->kvm->arch.vgic.enabled = reg & 1;
450 vgic_update_state(vcpu->kvm);
451 return true;
452 }
453 break;
454
455 case 4: /* GICD_TYPER */
456 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
457 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
458 vgic_reg_access(mmio, &reg, word_offset,
459 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
460 break;
461
462 case 8: /* GICD_IIDR */
463 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
464 vgic_reg_access(mmio, &reg, word_offset,
465 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
466 break;
467 }
468
469 return false;
470}
471
472static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
473 struct kvm_exit_mmio *mmio, phys_addr_t offset)
474{ 422{
475 vgic_reg_access(mmio, NULL, offset, 423 vgic_reg_access(mmio, NULL, offset,
476 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); 424 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
477 return false; 425 return false;
478} 426}
479 427
480static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, 428bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
481 struct kvm_exit_mmio *mmio, 429 phys_addr_t offset, int vcpu_id, int access)
482 phys_addr_t offset)
483{ 430{
484 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, 431 u32 *reg;
485 vcpu->vcpu_id, offset); 432 int mode = ACCESS_READ_VALUE | access;
486 vgic_reg_access(mmio, reg, offset, 433 struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
487 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
488 if (mmio->is_write) {
489 vgic_update_state(vcpu->kvm);
490 return true;
491 }
492
493 return false;
494}
495 434
496static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, 435 reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
497 struct kvm_exit_mmio *mmio, 436 vgic_reg_access(mmio, reg, offset, mode);
498 phys_addr_t offset)
499{
500 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
501 vcpu->vcpu_id, offset);
502 vgic_reg_access(mmio, reg, offset,
503 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
504 if (mmio->is_write) { 437 if (mmio->is_write) {
505 if (offset < 4) /* Force SGI enabled */ 438 if (access & ACCESS_WRITE_CLEARBIT) {
506 *reg |= 0xffff; 439 if (offset < 4) /* Force SGI enabled */
507 vgic_retire_disabled_irqs(vcpu); 440 *reg |= 0xffff;
508 vgic_update_state(vcpu->kvm); 441 vgic_retire_disabled_irqs(target_vcpu);
442 }
443 vgic_update_state(kvm);
509 return true; 444 return true;
510 } 445 }
511 446
512 return false; 447 return false;
513} 448}
514 449
515static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, 450bool vgic_handle_set_pending_reg(struct kvm *kvm,
516 struct kvm_exit_mmio *mmio, 451 struct kvm_exit_mmio *mmio,
517 phys_addr_t offset) 452 phys_addr_t offset, int vcpu_id)
518{ 453{
519 u32 *reg, orig; 454 u32 *reg, orig;
520 u32 level_mask; 455 u32 level_mask;
521 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 456 int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
457 struct vgic_dist *dist = &kvm->arch.vgic;
522 458
523 reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); 459 reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
524 level_mask = (~(*reg)); 460 level_mask = (~(*reg));
525 461
526 /* Mark both level and edge triggered irqs as pending */ 462 /* Mark both level and edge triggered irqs as pending */
527 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); 463 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
528 orig = *reg; 464 orig = *reg;
529 vgic_reg_access(mmio, reg, offset, 465 vgic_reg_access(mmio, reg, offset, mode);
530 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
531 466
532 if (mmio->is_write) { 467 if (mmio->is_write) {
533 /* Set the soft-pending flag only for level-triggered irqs */ 468 /* Set the soft-pending flag only for level-triggered irqs */
534 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, 469 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
535 vcpu->vcpu_id, offset); 470 vcpu_id, offset);
536 vgic_reg_access(mmio, reg, offset, 471 vgic_reg_access(mmio, reg, offset, mode);
537 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
538 *reg &= level_mask; 472 *reg &= level_mask;
539 473
540 /* Ignore writes to SGIs */ 474 /* Ignore writes to SGIs */
@@ -543,31 +477,30 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
543 *reg |= orig & 0xffff; 477 *reg |= orig & 0xffff;
544 } 478 }
545 479
546 vgic_update_state(vcpu->kvm); 480 vgic_update_state(kvm);
547 return true; 481 return true;
548 } 482 }
549 483
550 return false; 484 return false;
551} 485}
552 486
553static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, 487bool vgic_handle_clear_pending_reg(struct kvm *kvm,
554 struct kvm_exit_mmio *mmio, 488 struct kvm_exit_mmio *mmio,
555 phys_addr_t offset) 489 phys_addr_t offset, int vcpu_id)
556{ 490{
557 u32 *level_active; 491 u32 *level_active;
558 u32 *reg, orig; 492 u32 *reg, orig;
559 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 493 int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
494 struct vgic_dist *dist = &kvm->arch.vgic;
560 495
561 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); 496 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
562 orig = *reg; 497 orig = *reg;
563 vgic_reg_access(mmio, reg, offset, 498 vgic_reg_access(mmio, reg, offset, mode);
564 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
565 if (mmio->is_write) { 499 if (mmio->is_write) {
566 /* Re-set level triggered level-active interrupts */ 500 /* Re-set level triggered level-active interrupts */
567 level_active = vgic_bitmap_get_reg(&dist->irq_level, 501 level_active = vgic_bitmap_get_reg(&dist->irq_level,
568 vcpu->vcpu_id, offset); 502 vcpu_id, offset);
569 reg = vgic_bitmap_get_reg(&dist->irq_pending, 503 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
570 vcpu->vcpu_id, offset);
571 *reg |= *level_active; 504 *reg |= *level_active;
572 505
573 /* Ignore writes to SGIs */ 506 /* Ignore writes to SGIs */
@@ -578,101 +511,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
578 511
579 /* Clear soft-pending flags */ 512 /* Clear soft-pending flags */
580 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, 513 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
581 vcpu->vcpu_id, offset); 514 vcpu_id, offset);
582 vgic_reg_access(mmio, reg, offset, 515 vgic_reg_access(mmio, reg, offset, mode);
583 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
584 516
585 vgic_update_state(vcpu->kvm); 517 vgic_update_state(kvm);
586 return true; 518 return true;
587 } 519 }
588
589 return false;
590}
591
592static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
593 struct kvm_exit_mmio *mmio,
594 phys_addr_t offset)
595{
596 u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
597 vcpu->vcpu_id, offset);
598 vgic_reg_access(mmio, reg, offset,
599 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
600 return false;
601}
602
603#define GICD_ITARGETSR_SIZE 32
604#define GICD_CPUTARGETS_BITS 8
605#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
606static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
607{
608 struct vgic_dist *dist = &kvm->arch.vgic;
609 int i;
610 u32 val = 0;
611
612 irq -= VGIC_NR_PRIVATE_IRQS;
613
614 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
615 val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
616
617 return val;
618}
619
620static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
621{
622 struct vgic_dist *dist = &kvm->arch.vgic;
623 struct kvm_vcpu *vcpu;
624 int i, c;
625 unsigned long *bmap;
626 u32 target;
627
628 irq -= VGIC_NR_PRIVATE_IRQS;
629
630 /*
631 * Pick the LSB in each byte. This ensures we target exactly
632 * one vcpu per IRQ. If the byte is null, assume we target
633 * CPU0.
634 */
635 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
636 int shift = i * GICD_CPUTARGETS_BITS;
637 target = ffs((val >> shift) & 0xffU);
638 target = target ? (target - 1) : 0;
639 dist->irq_spi_cpu[irq + i] = target;
640 kvm_for_each_vcpu(c, vcpu, kvm) {
641 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
642 if (c == target)
643 set_bit(irq + i, bmap);
644 else
645 clear_bit(irq + i, bmap);
646 }
647 }
648}
649
650static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
651 struct kvm_exit_mmio *mmio,
652 phys_addr_t offset)
653{
654 u32 reg;
655
656 /* We treat the banked interrupts targets as read-only */
657 if (offset < 32) {
658 u32 roreg = 1 << vcpu->vcpu_id;
659 roreg |= roreg << 8;
660 roreg |= roreg << 16;
661
662 vgic_reg_access(mmio, &roreg, offset,
663 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
664 return false;
665 }
666
667 reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
668 vgic_reg_access(mmio, &reg, offset,
669 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
670 if (mmio->is_write) {
671 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
672 vgic_update_state(vcpu->kvm);
673 return true;
674 }
675
676 return false; 520 return false;
677} 521}
678 522
@@ -711,14 +555,10 @@ static u16 vgic_cfg_compress(u32 val)
711 * LSB is always 0. As such, we only keep the upper bit, and use the 555 * LSB is always 0. As such, we only keep the upper bit, and use the
712 * two above functions to compress/expand the bits 556 * two above functions to compress/expand the bits
713 */ 557 */
714static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, 558bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
715 struct kvm_exit_mmio *mmio, phys_addr_t offset) 559 phys_addr_t offset)
716{ 560{
717 u32 val; 561 u32 val;
718 u32 *reg;
719
720 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
721 vcpu->vcpu_id, offset >> 1);
722 562
723 if (offset & 4) 563 if (offset & 4)
724 val = *reg >> 16; 564 val = *reg >> 16;
@@ -747,21 +587,6 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
747 return false; 587 return false;
748} 588}
749 589
750static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
751 struct kvm_exit_mmio *mmio, phys_addr_t offset)
752{
753 u32 reg;
754 vgic_reg_access(mmio, &reg, offset,
755 ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
756 if (mmio->is_write) {
757 vgic_dispatch_sgi(vcpu, reg);
758 vgic_update_state(vcpu->kvm);
759 return true;
760 }
761
762 return false;
763}
764
765/** 590/**
766 * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor 591 * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
767 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs 592 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@ -774,11 +599,9 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
774 * to the distributor but the active state stays in the LRs, because we don't 599 * to the distributor but the active state stays in the LRs, because we don't
775 * track the active state on the distributor side. 600 * track the active state on the distributor side.
776 */ 601 */
777static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 602void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
778{ 603{
779 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
780 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 604 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
781 int vcpu_id = vcpu->vcpu_id;
782 int i; 605 int i;
783 606
784 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { 607 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
@@ -805,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
805 */ 628 */
806 vgic_dist_irq_set_pending(vcpu, lr.irq); 629 vgic_dist_irq_set_pending(vcpu, lr.irq);
807 if (lr.irq < VGIC_NR_SGIS) 630 if (lr.irq < VGIC_NR_SGIS)
808 *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; 631 add_sgi_source(vcpu, lr.irq, lr.source);
809 lr.state &= ~LR_STATE_PENDING; 632 lr.state &= ~LR_STATE_PENDING;
810 vgic_set_lr(vcpu, i, lr); 633 vgic_set_lr(vcpu, i, lr);
811 634
@@ -824,188 +647,12 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
824 } 647 }
825} 648}
826 649
827/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ 650const
828static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, 651struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
829 struct kvm_exit_mmio *mmio,
830 phys_addr_t offset)
831{
832 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
833 int sgi;
834 int min_sgi = (offset & ~0x3);
835 int max_sgi = min_sgi + 3;
836 int vcpu_id = vcpu->vcpu_id;
837 u32 reg = 0;
838
839 /* Copy source SGIs from distributor side */
840 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
841 int shift = 8 * (sgi - min_sgi);
842 reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
843 }
844
845 mmio_data_write(mmio, ~0, reg);
846 return false;
847}
848
849static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
850 struct kvm_exit_mmio *mmio,
851 phys_addr_t offset, bool set)
852{
853 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
854 int sgi;
855 int min_sgi = (offset & ~0x3);
856 int max_sgi = min_sgi + 3;
857 int vcpu_id = vcpu->vcpu_id;
858 u32 reg;
859 bool updated = false;
860
861 reg = mmio_data_read(mmio, ~0);
862
863 /* Clear pending SGIs on the distributor */
864 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
865 u8 mask = reg >> (8 * (sgi - min_sgi));
866 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
867 if (set) {
868 if ((*src & mask) != mask)
869 updated = true;
870 *src |= mask;
871 } else {
872 if (*src & mask)
873 updated = true;
874 *src &= ~mask;
875 }
876 }
877
878 if (updated)
879 vgic_update_state(vcpu->kvm);
880
881 return updated;
882}
883
884static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
885 struct kvm_exit_mmio *mmio,
886 phys_addr_t offset)
887{
888 if (!mmio->is_write)
889 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
890 else
891 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
892}
893
894static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
895 struct kvm_exit_mmio *mmio,
896 phys_addr_t offset)
897{
898 if (!mmio->is_write)
899 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
900 else
901 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
902}
903
904/*
905 * I would have liked to use the kvm_bus_io_*() API instead, but it
906 * cannot cope with banked registers (only the VM pointer is passed
907 * around, and we need the vcpu). One of these days, someone please
908 * fix it!
909 */
910struct mmio_range {
911 phys_addr_t base;
912 unsigned long len;
913 int bits_per_irq;
914 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
915 phys_addr_t offset);
916};
917
918static const struct mmio_range vgic_dist_ranges[] = {
919 {
920 .base = GIC_DIST_CTRL,
921 .len = 12,
922 .bits_per_irq = 0,
923 .handle_mmio = handle_mmio_misc,
924 },
925 {
926 .base = GIC_DIST_IGROUP,
927 .len = VGIC_MAX_IRQS / 8,
928 .bits_per_irq = 1,
929 .handle_mmio = handle_mmio_raz_wi,
930 },
931 {
932 .base = GIC_DIST_ENABLE_SET,
933 .len = VGIC_MAX_IRQS / 8,
934 .bits_per_irq = 1,
935 .handle_mmio = handle_mmio_set_enable_reg,
936 },
937 {
938 .base = GIC_DIST_ENABLE_CLEAR,
939 .len = VGIC_MAX_IRQS / 8,
940 .bits_per_irq = 1,
941 .handle_mmio = handle_mmio_clear_enable_reg,
942 },
943 {
944 .base = GIC_DIST_PENDING_SET,
945 .len = VGIC_MAX_IRQS / 8,
946 .bits_per_irq = 1,
947 .handle_mmio = handle_mmio_set_pending_reg,
948 },
949 {
950 .base = GIC_DIST_PENDING_CLEAR,
951 .len = VGIC_MAX_IRQS / 8,
952 .bits_per_irq = 1,
953 .handle_mmio = handle_mmio_clear_pending_reg,
954 },
955 {
956 .base = GIC_DIST_ACTIVE_SET,
957 .len = VGIC_MAX_IRQS / 8,
958 .bits_per_irq = 1,
959 .handle_mmio = handle_mmio_raz_wi,
960 },
961 {
962 .base = GIC_DIST_ACTIVE_CLEAR,
963 .len = VGIC_MAX_IRQS / 8,
964 .bits_per_irq = 1,
965 .handle_mmio = handle_mmio_raz_wi,
966 },
967 {
968 .base = GIC_DIST_PRI,
969 .len = VGIC_MAX_IRQS,
970 .bits_per_irq = 8,
971 .handle_mmio = handle_mmio_priority_reg,
972 },
973 {
974 .base = GIC_DIST_TARGET,
975 .len = VGIC_MAX_IRQS,
976 .bits_per_irq = 8,
977 .handle_mmio = handle_mmio_target_reg,
978 },
979 {
980 .base = GIC_DIST_CONFIG,
981 .len = VGIC_MAX_IRQS / 4,
982 .bits_per_irq = 2,
983 .handle_mmio = handle_mmio_cfg_reg,
984 },
985 {
986 .base = GIC_DIST_SOFTINT,
987 .len = 4,
988 .handle_mmio = handle_mmio_sgi_reg,
989 },
990 {
991 .base = GIC_DIST_SGI_PENDING_CLEAR,
992 .len = VGIC_NR_SGIS,
993 .handle_mmio = handle_mmio_sgi_clear,
994 },
995 {
996 .base = GIC_DIST_SGI_PENDING_SET,
997 .len = VGIC_NR_SGIS,
998 .handle_mmio = handle_mmio_sgi_set,
999 },
1000 {}
1001};
1002
1003static const
1004struct mmio_range *find_matching_range(const struct mmio_range *ranges,
1005 struct kvm_exit_mmio *mmio, 652 struct kvm_exit_mmio *mmio,
1006 phys_addr_t offset) 653 phys_addr_t offset)
1007{ 654{
1008 const struct mmio_range *r = ranges; 655 const struct kvm_mmio_range *r = ranges;
1009 656
1010 while (r->len) { 657 while (r->len) {
1011 if (offset >= r->base && 658 if (offset >= r->base &&
@@ -1018,7 +665,7 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
1018} 665}
1019 666
1020static bool vgic_validate_access(const struct vgic_dist *dist, 667static bool vgic_validate_access(const struct vgic_dist *dist,
1021 const struct mmio_range *range, 668 const struct kvm_mmio_range *range,
1022 unsigned long offset) 669 unsigned long offset)
1023{ 670{
1024 int irq; 671 int irq;
@@ -1033,37 +680,76 @@ static bool vgic_validate_access(const struct vgic_dist *dist,
1033 return true; 680 return true;
1034} 681}
1035 682
683/*
684 * Call the respective handler function for the given range.
685 * We split up any 64 bit accesses into two consecutive 32 bit
686 * handler calls and merge the result afterwards.
687 * We do this in a little endian fashion regardless of the host's
688 * or guest's endianness, because the GIC is always LE and the rest of
689 * the code (vgic_reg_access) also puts it in a LE fashion already.
690 * At this point we have already identified the handle function, so
691 * range points to that one entry and offset is relative to this.
692 */
693static bool call_range_handler(struct kvm_vcpu *vcpu,
694 struct kvm_exit_mmio *mmio,
695 unsigned long offset,
696 const struct kvm_mmio_range *range)
697{
698 u32 *data32 = (void *)mmio->data;
699 struct kvm_exit_mmio mmio32;
700 bool ret;
701
702 if (likely(mmio->len <= 4))
703 return range->handle_mmio(vcpu, mmio, offset);
704
705 /*
706 * Any access bigger than 4 bytes (that we currently handle in KVM)
707 * is actually 8 bytes long, caused by a 64-bit access
708 */
709
710 mmio32.len = 4;
711 mmio32.is_write = mmio->is_write;
712 mmio32.private = mmio->private;
713
714 mmio32.phys_addr = mmio->phys_addr + 4;
715 if (mmio->is_write)
716 *(u32 *)mmio32.data = data32[1];
717 ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
718 if (!mmio->is_write)
719 data32[1] = *(u32 *)mmio32.data;
720
721 mmio32.phys_addr = mmio->phys_addr;
722 if (mmio->is_write)
723 *(u32 *)mmio32.data = data32[0];
724 ret |= range->handle_mmio(vcpu, &mmio32, offset);
725 if (!mmio->is_write)
726 data32[0] = *(u32 *)mmio32.data;
727
728 return ret;
729}
730
1036/** 731/**
1037 * vgic_handle_mmio - handle an in-kernel MMIO access 732 * vgic_handle_mmio_range - handle an in-kernel MMIO access
1038 * @vcpu: pointer to the vcpu performing the access 733 * @vcpu: pointer to the vcpu performing the access
1039 * @run: pointer to the kvm_run structure 734 * @run: pointer to the kvm_run structure
1040 * @mmio: pointer to the data describing the access 735 * @mmio: pointer to the data describing the access
736 * @ranges: array of MMIO ranges in a given region
737 * @mmio_base: base address of that region
1041 * 738 *
1042 * returns true if the MMIO access has been performed in kernel space, 739 * returns true if the MMIO access could be performed
1043 * and false if it needs to be emulated in user space.
1044 */ 740 */
1045bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, 741bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
1046 struct kvm_exit_mmio *mmio) 742 struct kvm_exit_mmio *mmio,
743 const struct kvm_mmio_range *ranges,
744 unsigned long mmio_base)
1047{ 745{
1048 const struct mmio_range *range; 746 const struct kvm_mmio_range *range;
1049 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 747 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1050 unsigned long base = dist->vgic_dist_base;
1051 bool updated_state; 748 bool updated_state;
1052 unsigned long offset; 749 unsigned long offset;
1053 750
1054 if (!irqchip_in_kernel(vcpu->kvm) || 751 offset = mmio->phys_addr - mmio_base;
1055 mmio->phys_addr < base || 752 range = vgic_find_range(ranges, mmio, offset);
1056 (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
1057 return false;
1058
1059 /* We don't support ldrd / strd or ldm / stm to the emulated vgic */
1060 if (mmio->len > 4) {
1061 kvm_inject_dabt(vcpu, mmio->phys_addr);
1062 return true;
1063 }
1064
1065 offset = mmio->phys_addr - base;
1066 range = find_matching_range(vgic_dist_ranges, mmio, offset);
1067 if (unlikely(!range || !range->handle_mmio)) { 753 if (unlikely(!range || !range->handle_mmio)) {
1068 pr_warn("Unhandled access %d %08llx %d\n", 754 pr_warn("Unhandled access %d %08llx %d\n",
1069 mmio->is_write, mmio->phys_addr, mmio->len); 755 mmio->is_write, mmio->phys_addr, mmio->len);
@@ -1071,12 +757,12 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
1071 } 757 }
1072 758
1073 spin_lock(&vcpu->kvm->arch.vgic.lock); 759 spin_lock(&vcpu->kvm->arch.vgic.lock);
1074 offset = mmio->phys_addr - range->base - base; 760 offset -= range->base;
1075 if (vgic_validate_access(dist, range, offset)) { 761 if (vgic_validate_access(dist, range, offset)) {
1076 updated_state = range->handle_mmio(vcpu, mmio, offset); 762 updated_state = call_range_handler(vcpu, mmio, offset, range);
1077 } else { 763 } else {
1078 vgic_reg_access(mmio, NULL, offset, 764 if (!mmio->is_write)
1079 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); 765 memset(mmio->data, 0, mmio->len);
1080 updated_state = false; 766 updated_state = false;
1081 } 767 }
1082 spin_unlock(&vcpu->kvm->arch.vgic.lock); 768 spin_unlock(&vcpu->kvm->arch.vgic.lock);
@@ -1089,50 +775,28 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
1089 return true; 775 return true;
1090} 776}
1091 777
1092static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) 778/**
1093{ 779 * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
1094 return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; 780 * @vcpu: pointer to the vcpu performing the access
1095} 781 * @run: pointer to the kvm_run structure
1096 782 * @mmio: pointer to the data describing the access
1097static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) 783 *
784 * returns true if the MMIO access has been performed in kernel space,
785 * and false if it needs to be emulated in user space.
786 * Calls the actual handling routine for the selected VGIC model.
787 */
788bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
789 struct kvm_exit_mmio *mmio)
1098{ 790{
1099 struct kvm *kvm = vcpu->kvm; 791 if (!irqchip_in_kernel(vcpu->kvm))
1100 struct vgic_dist *dist = &kvm->arch.vgic; 792 return false;
1101 int nrcpus = atomic_read(&kvm->online_vcpus);
1102 u8 target_cpus;
1103 int sgi, mode, c, vcpu_id;
1104
1105 vcpu_id = vcpu->vcpu_id;
1106
1107 sgi = reg & 0xf;
1108 target_cpus = (reg >> 16) & 0xff;
1109 mode = (reg >> 24) & 3;
1110
1111 switch (mode) {
1112 case 0:
1113 if (!target_cpus)
1114 return;
1115 break;
1116
1117 case 1:
1118 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
1119 break;
1120
1121 case 2:
1122 target_cpus = 1 << vcpu_id;
1123 break;
1124 }
1125
1126 kvm_for_each_vcpu(c, vcpu, kvm) {
1127 if (target_cpus & 1) {
1128 /* Flag the SGI as pending */
1129 vgic_dist_irq_set_pending(vcpu, sgi);
1130 *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
1131 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
1132 }
1133 793
1134 target_cpus >>= 1; 794 /*
1135 } 795 * This will currently call either vgic_v2_handle_mmio() or
796 * vgic_v3_handle_mmio(), which in turn will call
797 * vgic_handle_mmio_range() defined above.
798 */
799 return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
1136} 800}
1137 801
1138static int vgic_nr_shared_irqs(struct vgic_dist *dist) 802static int vgic_nr_shared_irqs(struct vgic_dist *dist)
@@ -1173,7 +837,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
1173 * Update the interrupt state and determine which CPUs have pending 837 * Update the interrupt state and determine which CPUs have pending
1174 * interrupts. Must be called with distributor lock held. 838 * interrupts. Must be called with distributor lock held.
1175 */ 839 */
1176static void vgic_update_state(struct kvm *kvm) 840void vgic_update_state(struct kvm *kvm)
1177{ 841{
1178 struct vgic_dist *dist = &kvm->arch.vgic; 842 struct vgic_dist *dist = &kvm->arch.vgic;
1179 struct kvm_vcpu *vcpu; 843 struct kvm_vcpu *vcpu;
@@ -1234,12 +898,12 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1234 vgic_ops->disable_underflow(vcpu); 898 vgic_ops->disable_underflow(vcpu);
1235} 899}
1236 900
1237static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 901void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1238{ 902{
1239 vgic_ops->get_vmcr(vcpu, vmcr); 903 vgic_ops->get_vmcr(vcpu, vmcr);
1240} 904}
1241 905
1242static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 906void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1243{ 907{
1244 vgic_ops->set_vmcr(vcpu, vmcr); 908 vgic_ops->set_vmcr(vcpu, vmcr);
1245} 909}
@@ -1288,8 +952,9 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1288/* 952/*
1289 * Queue an interrupt to a CPU virtual interface. Return true on success, 953 * Queue an interrupt to a CPU virtual interface. Return true on success,
1290 * or false if it wasn't possible to queue it. 954 * or false if it wasn't possible to queue it.
955 * sgi_source must be zero for any non-SGI interrupts.
1291 */ 956 */
1292static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 957bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1293{ 958{
1294 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 959 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1295 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 960 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -1338,37 +1003,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1338 return true; 1003 return true;
1339} 1004}
1340 1005
1341static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1342{
1343 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1344 unsigned long sources;
1345 int vcpu_id = vcpu->vcpu_id;
1346 int c;
1347
1348 sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
1349
1350 for_each_set_bit(c, &sources, dist->nr_cpus) {
1351 if (vgic_queue_irq(vcpu, c, irq))
1352 clear_bit(c, &sources);
1353 }
1354
1355 *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
1356
1357 /*
1358 * If the sources bitmap has been cleared it means that we
1359 * could queue all the SGIs onto link registers (see the
1360 * clear_bit above), and therefore we are done with them in
1361 * our emulated gic and can get rid of them.
1362 */
1363 if (!sources) {
1364 vgic_dist_irq_clear_pending(vcpu, irq);
1365 vgic_cpu_irq_clear(vcpu, irq);
1366 return true;
1367 }
1368
1369 return false;
1370}
1371
1372static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) 1006static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1373{ 1007{
1374 if (!vgic_can_sample_irq(vcpu, irq)) 1008 if (!vgic_can_sample_irq(vcpu, irq))
@@ -1413,7 +1047,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1413 1047
1414 /* SGIs */ 1048 /* SGIs */
1415 for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { 1049 for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
1416 if (!vgic_queue_sgi(vcpu, i)) 1050 if (!queue_sgi(vcpu, i))
1417 overflow = 1; 1051 overflow = 1;
1418 } 1052 }
1419 1053
@@ -1575,7 +1209,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1575 return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); 1209 return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1576} 1210}
1577 1211
1578static void vgic_kick_vcpus(struct kvm *kvm) 1212void vgic_kick_vcpus(struct kvm *kvm)
1579{ 1213{
1580 struct kvm_vcpu *vcpu; 1214 struct kvm_vcpu *vcpu;
1581 int c; 1215 int c;
@@ -1615,7 +1249,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1615 struct kvm_vcpu *vcpu; 1249 struct kvm_vcpu *vcpu;
1616 int edge_triggered, level_triggered; 1250 int edge_triggered, level_triggered;
1617 int enabled; 1251 int enabled;
1618 bool ret = true; 1252 bool ret = true, can_inject = true;
1619 1253
1620 spin_lock(&dist->lock); 1254 spin_lock(&dist->lock);
1621 1255
@@ -1630,6 +1264,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1630 1264
1631 if (irq_num >= VGIC_NR_PRIVATE_IRQS) { 1265 if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1632 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; 1266 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1267 if (cpuid == VCPU_NOT_ALLOCATED) {
1268 /* Pretend we use CPU0, and prevent injection */
1269 cpuid = 0;
1270 can_inject = false;
1271 }
1633 vcpu = kvm_get_vcpu(kvm, cpuid); 1272 vcpu = kvm_get_vcpu(kvm, cpuid);
1634 } 1273 }
1635 1274
@@ -1652,7 +1291,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1652 1291
1653 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1292 enabled = vgic_irq_is_enabled(vcpu, irq_num);
1654 1293
1655 if (!enabled) { 1294 if (!enabled || !can_inject) {
1656 ret = false; 1295 ret = false;
1657 goto out; 1296 goto out;
1658 } 1297 }
@@ -1698,6 +1337,16 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1698 int vcpu_id; 1337 int vcpu_id;
1699 1338
1700 if (unlikely(!vgic_initialized(kvm))) { 1339 if (unlikely(!vgic_initialized(kvm))) {
1340 /*
1341 * We only provide the automatic initialization of the VGIC
1342 * for the legacy case of a GICv2. Any other type must
1343 * be explicitly initialized once setup with the respective
1344 * KVM device call.
1345 */
1346 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
1347 ret = -EBUSY;
1348 goto out;
1349 }
1701 mutex_lock(&kvm->lock); 1350 mutex_lock(&kvm->lock);
1702 ret = vgic_init(kvm); 1351 ret = vgic_init(kvm);
1703 mutex_unlock(&kvm->lock); 1352 mutex_unlock(&kvm->lock);
@@ -1762,6 +1411,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1762 return 0; 1411 return 0;
1763} 1412}
1764 1413
1414/**
1415 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1416 *
1417 * The host's GIC naturally limits the maximum amount of VCPUs a guest
1418 * can use.
1419 */
1420int kvm_vgic_get_max_vcpus(void)
1421{
1422 return vgic->max_gic_vcpus;
1423}
1424
1765void kvm_vgic_destroy(struct kvm *kvm) 1425void kvm_vgic_destroy(struct kvm *kvm)
1766{ 1426{
1767 struct vgic_dist *dist = &kvm->arch.vgic; 1427 struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1784,6 +1444,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
1784 } 1444 }
1785 kfree(dist->irq_sgi_sources); 1445 kfree(dist->irq_sgi_sources);
1786 kfree(dist->irq_spi_cpu); 1446 kfree(dist->irq_spi_cpu);
1447 kfree(dist->irq_spi_mpidr);
1787 kfree(dist->irq_spi_target); 1448 kfree(dist->irq_spi_target);
1788 kfree(dist->irq_pending_on_cpu); 1449 kfree(dist->irq_pending_on_cpu);
1789 dist->irq_sgi_sources = NULL; 1450 dist->irq_sgi_sources = NULL;
@@ -1797,7 +1458,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
1797 * Allocate and initialize the various data structures. Must be called 1458 * Allocate and initialize the various data structures. Must be called
1798 * with kvm->lock held! 1459 * with kvm->lock held!
1799 */ 1460 */
1800static int vgic_init(struct kvm *kvm) 1461int vgic_init(struct kvm *kvm)
1801{ 1462{
1802 struct vgic_dist *dist = &kvm->arch.vgic; 1463 struct vgic_dist *dist = &kvm->arch.vgic;
1803 struct kvm_vcpu *vcpu; 1464 struct kvm_vcpu *vcpu;
@@ -1809,7 +1470,7 @@ static int vgic_init(struct kvm *kvm)
1809 1470
1810 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); 1471 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1811 if (!nr_cpus) /* No vcpus? Can't be good... */ 1472 if (!nr_cpus) /* No vcpus? Can't be good... */
1812 return -EINVAL; 1473 return -ENODEV;
1813 1474
1814 /* 1475 /*
1815 * If nobody configured the number of interrupts, use the 1476 * If nobody configured the number of interrupts, use the
@@ -1852,8 +1513,9 @@ static int vgic_init(struct kvm *kvm)
1852 if (ret) 1513 if (ret)
1853 goto out; 1514 goto out;
1854 1515
1855 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) 1516 ret = kvm->arch.vgic.vm_ops.init_model(kvm);
1856 vgic_set_target_reg(kvm, 0, i); 1517 if (ret)
1518 goto out;
1857 1519
1858 kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { 1520 kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1859 ret = vgic_vcpu_init_maps(vcpu, nr_irqs); 1521 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
@@ -1882,72 +1544,49 @@ out:
1882 return ret; 1544 return ret;
1883} 1545}
1884 1546
1885/** 1547static int init_vgic_model(struct kvm *kvm, int type)
1886 * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
1887 * @kvm: pointer to the kvm struct
1888 *
1889 * Map the virtual CPU interface into the VM before running any VCPUs. We
1890 * can't do this at creation time, because user space must first set the
1891 * virtual CPU interface address in the guest physical address space.
1892 */
1893int kvm_vgic_map_resources(struct kvm *kvm)
1894{ 1548{
1895 int ret = 0; 1549 switch (type) {
1896 1550 case KVM_DEV_TYPE_ARM_VGIC_V2:
1897 if (!irqchip_in_kernel(kvm)) 1551 vgic_v2_init_emulation(kvm);
1898 return 0; 1552 break;
1899 1553#ifdef CONFIG_ARM_GIC_V3
1900 mutex_lock(&kvm->lock); 1554 case KVM_DEV_TYPE_ARM_VGIC_V3:
1901 1555 vgic_v3_init_emulation(kvm);
1902 if (vgic_ready(kvm)) 1556 break;
1903 goto out; 1557#endif
1904 1558 default:
1905 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || 1559 return -ENODEV;
1906 IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
1907 kvm_err("Need to set vgic cpu and dist addresses first\n");
1908 ret = -ENXIO;
1909 goto out;
1910 }
1911
1912 /*
1913 * Initialize the vgic if this hasn't already been done on demand by
1914 * accessing the vgic state from userspace.
1915 */
1916 ret = vgic_init(kvm);
1917 if (ret) {
1918 kvm_err("Unable to allocate maps\n");
1919 goto out;
1920 } 1560 }
1921 1561
1922 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, 1562 if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
1923 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE, 1563 return -E2BIG;
1924 true);
1925 if (ret) {
1926 kvm_err("Unable to remap VGIC CPU to VCPU\n");
1927 goto out;
1928 }
1929 1564
1930 kvm->arch.vgic.ready = true; 1565 return 0;
1931out:
1932 if (ret)
1933 kvm_vgic_destroy(kvm);
1934 mutex_unlock(&kvm->lock);
1935 return ret;
1936} 1566}
1937 1567
1938int kvm_vgic_create(struct kvm *kvm) 1568int kvm_vgic_create(struct kvm *kvm, u32 type)
1939{ 1569{
1940 int i, vcpu_lock_idx = -1, ret; 1570 int i, vcpu_lock_idx = -1, ret;
1941 struct kvm_vcpu *vcpu; 1571 struct kvm_vcpu *vcpu;
1942 1572
1943 mutex_lock(&kvm->lock); 1573 mutex_lock(&kvm->lock);
1944 1574
1945 if (kvm->arch.vgic.vctrl_base) { 1575 if (irqchip_in_kernel(kvm)) {
1946 ret = -EEXIST; 1576 ret = -EEXIST;
1947 goto out; 1577 goto out;
1948 } 1578 }
1949 1579
1950 /* 1580 /*
1581 * This function is also called by the KVM_CREATE_IRQCHIP handler,
1582 * which had no chance yet to check the availability of the GICv2
1583 * emulation. So check this here again. KVM_CREATE_DEVICE does
1584 * the proper checks already.
1585 */
1586 if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
1587 return -ENODEV;
1588
1589 /*
1951 * Any time a vcpu is run, vcpu_load is called which tries to grab the 1590 * Any time a vcpu is run, vcpu_load is called which tries to grab the
1952 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure 1591 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
1953 * that no other VCPUs are run while we create the vgic. 1592 * that no other VCPUs are run while we create the vgic.
@@ -1965,11 +1604,17 @@ int kvm_vgic_create(struct kvm *kvm)
1965 } 1604 }
1966 ret = 0; 1605 ret = 0;
1967 1606
1607 ret = init_vgic_model(kvm, type);
1608 if (ret)
1609 goto out_unlock;
1610
1968 spin_lock_init(&kvm->arch.vgic.lock); 1611 spin_lock_init(&kvm->arch.vgic.lock);
1969 kvm->arch.vgic.in_kernel = true; 1612 kvm->arch.vgic.in_kernel = true;
1613 kvm->arch.vgic.vgic_model = type;
1970 kvm->arch.vgic.vctrl_base = vgic->vctrl_base; 1614 kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
1971 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; 1615 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1972 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; 1616 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1617 kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
1973 1618
1974out_unlock: 1619out_unlock:
1975 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { 1620 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
@@ -2022,7 +1667,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
2022/** 1667/**
2023 * kvm_vgic_addr - set or get vgic VM base addresses 1668 * kvm_vgic_addr - set or get vgic VM base addresses
2024 * @kvm: pointer to the vm struct 1669 * @kvm: pointer to the vm struct
2025 * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX 1670 * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
2026 * @addr: pointer to address value 1671 * @addr: pointer to address value
2027 * @write: if true set the address in the VM address space, if false read the 1672 * @write: if true set the address in the VM address space, if false read the
2028 * address 1673 * address
@@ -2036,216 +1681,64 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
2036{ 1681{
2037 int r = 0; 1682 int r = 0;
2038 struct vgic_dist *vgic = &kvm->arch.vgic; 1683 struct vgic_dist *vgic = &kvm->arch.vgic;
1684 int type_needed;
1685 phys_addr_t *addr_ptr, block_size;
1686 phys_addr_t alignment;
2039 1687
2040 mutex_lock(&kvm->lock); 1688 mutex_lock(&kvm->lock);
2041 switch (type) { 1689 switch (type) {
2042 case KVM_VGIC_V2_ADDR_TYPE_DIST: 1690 case KVM_VGIC_V2_ADDR_TYPE_DIST:
2043 if (write) { 1691 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2044 r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, 1692 addr_ptr = &vgic->vgic_dist_base;
2045 *addr, KVM_VGIC_V2_DIST_SIZE); 1693 block_size = KVM_VGIC_V2_DIST_SIZE;
2046 } else { 1694 alignment = SZ_4K;
2047 *addr = vgic->vgic_dist_base;
2048 }
2049 break; 1695 break;
2050 case KVM_VGIC_V2_ADDR_TYPE_CPU: 1696 case KVM_VGIC_V2_ADDR_TYPE_CPU:
2051 if (write) { 1697 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2052 r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, 1698 addr_ptr = &vgic->vgic_cpu_base;
2053 *addr, KVM_VGIC_V2_CPU_SIZE); 1699 block_size = KVM_VGIC_V2_CPU_SIZE;
2054 } else { 1700 alignment = SZ_4K;
2055 *addr = vgic->vgic_cpu_base;
2056 }
2057 break; 1701 break;
2058 default: 1702#ifdef CONFIG_ARM_GIC_V3
2059 r = -ENODEV; 1703 case KVM_VGIC_V3_ADDR_TYPE_DIST:
2060 } 1704 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2061 1705 addr_ptr = &vgic->vgic_dist_base;
2062 mutex_unlock(&kvm->lock); 1706 block_size = KVM_VGIC_V3_DIST_SIZE;
2063 return r; 1707 alignment = SZ_64K;
2064}
2065
2066static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
2067 struct kvm_exit_mmio *mmio, phys_addr_t offset)
2068{
2069 bool updated = false;
2070 struct vgic_vmcr vmcr;
2071 u32 *vmcr_field;
2072 u32 reg;
2073
2074 vgic_get_vmcr(vcpu, &vmcr);
2075
2076 switch (offset & ~0x3) {
2077 case GIC_CPU_CTRL:
2078 vmcr_field = &vmcr.ctlr;
2079 break;
2080 case GIC_CPU_PRIMASK:
2081 vmcr_field = &vmcr.pmr;
2082 break; 1708 break;
2083 case GIC_CPU_BINPOINT: 1709 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
2084 vmcr_field = &vmcr.bpr; 1710 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2085 break; 1711 addr_ptr = &vgic->vgic_redist_base;
2086 case GIC_CPU_ALIAS_BINPOINT: 1712 block_size = KVM_VGIC_V3_REDIST_SIZE;
2087 vmcr_field = &vmcr.abpr; 1713 alignment = SZ_64K;
2088 break; 1714 break;
1715#endif
2089 default: 1716 default:
2090 BUG(); 1717 r = -ENODEV;
2091 }
2092
2093 if (!mmio->is_write) {
2094 reg = *vmcr_field;
2095 mmio_data_write(mmio, ~0, reg);
2096 } else {
2097 reg = mmio_data_read(mmio, ~0);
2098 if (reg != *vmcr_field) {
2099 *vmcr_field = reg;
2100 vgic_set_vmcr(vcpu, &vmcr);
2101 updated = true;
2102 }
2103 }
2104 return updated;
2105}
2106
2107static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
2108 struct kvm_exit_mmio *mmio, phys_addr_t offset)
2109{
2110 return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
2111}
2112
2113static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
2114 struct kvm_exit_mmio *mmio,
2115 phys_addr_t offset)
2116{
2117 u32 reg;
2118
2119 if (mmio->is_write)
2120 return false;
2121
2122 /* GICC_IIDR */
2123 reg = (PRODUCT_ID_KVM << 20) |
2124 (GICC_ARCH_VERSION_V2 << 16) |
2125 (IMPLEMENTER_ARM << 0);
2126 mmio_data_write(mmio, ~0, reg);
2127 return false;
2128}
2129
2130/*
2131 * CPU Interface Register accesses - these are not accessed by the VM, but by
2132 * user space for saving and restoring VGIC state.
2133 */
2134static const struct mmio_range vgic_cpu_ranges[] = {
2135 {
2136 .base = GIC_CPU_CTRL,
2137 .len = 12,
2138 .handle_mmio = handle_cpu_mmio_misc,
2139 },
2140 {
2141 .base = GIC_CPU_ALIAS_BINPOINT,
2142 .len = 4,
2143 .handle_mmio = handle_mmio_abpr,
2144 },
2145 {
2146 .base = GIC_CPU_ACTIVEPRIO,
2147 .len = 16,
2148 .handle_mmio = handle_mmio_raz_wi,
2149 },
2150 {
2151 .base = GIC_CPU_IDENT,
2152 .len = 4,
2153 .handle_mmio = handle_cpu_mmio_ident,
2154 },
2155};
2156
2157static int vgic_attr_regs_access(struct kvm_device *dev,
2158 struct kvm_device_attr *attr,
2159 u32 *reg, bool is_write)
2160{
2161 const struct mmio_range *r = NULL, *ranges;
2162 phys_addr_t offset;
2163 int ret, cpuid, c;
2164 struct kvm_vcpu *vcpu, *tmp_vcpu;
2165 struct vgic_dist *vgic;
2166 struct kvm_exit_mmio mmio;
2167
2168 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2169 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
2170 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
2171
2172 mutex_lock(&dev->kvm->lock);
2173
2174 ret = vgic_init(dev->kvm);
2175 if (ret)
2176 goto out;
2177
2178 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
2179 ret = -EINVAL;
2180 goto out; 1718 goto out;
2181 } 1719 }
2182 1720
2183 vcpu = kvm_get_vcpu(dev->kvm, cpuid); 1721 if (vgic->vgic_model != type_needed) {
2184 vgic = &dev->kvm->arch.vgic; 1722 r = -ENODEV;
2185
2186 mmio.len = 4;
2187 mmio.is_write = is_write;
2188 if (is_write)
2189 mmio_data_write(&mmio, ~0, *reg);
2190 switch (attr->group) {
2191 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2192 mmio.phys_addr = vgic->vgic_dist_base + offset;
2193 ranges = vgic_dist_ranges;
2194 break;
2195 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2196 mmio.phys_addr = vgic->vgic_cpu_base + offset;
2197 ranges = vgic_cpu_ranges;
2198 break;
2199 default:
2200 BUG();
2201 }
2202 r = find_matching_range(ranges, &mmio, offset);
2203
2204 if (unlikely(!r || !r->handle_mmio)) {
2205 ret = -ENXIO;
2206 goto out; 1723 goto out;
2207 } 1724 }
2208 1725
2209 1726 if (write) {
2210 spin_lock(&vgic->lock); 1727 if (!IS_ALIGNED(*addr, alignment))
2211 1728 r = -EINVAL;
2212 /* 1729 else
2213 * Ensure that no other VCPU is running by checking the vcpu->cpu 1730 r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
2214 * field. If no other VPCUs are running we can safely access the VGIC 1731 block_size);
2215 * state, because even if another VPU is run after this point, that 1732 } else {
2216 * VCPU will not touch the vgic state, because it will block on 1733 *addr = *addr_ptr;
2217 * getting the vgic->lock in kvm_vgic_sync_hwstate().
2218 */
2219 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
2220 if (unlikely(tmp_vcpu->cpu != -1)) {
2221 ret = -EBUSY;
2222 goto out_vgic_unlock;
2223 }
2224 } 1734 }
2225 1735
2226 /*
2227 * Move all pending IRQs from the LRs on all VCPUs so the pending
2228 * state can be properly represented in the register state accessible
2229 * through this API.
2230 */
2231 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
2232 vgic_unqueue_irqs(tmp_vcpu);
2233
2234 offset -= r->base;
2235 r->handle_mmio(vcpu, &mmio, offset);
2236
2237 if (!is_write)
2238 *reg = mmio_data_read(&mmio, ~0);
2239
2240 ret = 0;
2241out_vgic_unlock:
2242 spin_unlock(&vgic->lock);
2243out: 1736out:
2244 mutex_unlock(&dev->kvm->lock); 1737 mutex_unlock(&kvm->lock);
2245 return ret; 1738 return r;
2246} 1739}
2247 1740
2248static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1741int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2249{ 1742{
2250 int r; 1743 int r;
2251 1744
@@ -2261,17 +1754,6 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2261 r = kvm_vgic_addr(dev->kvm, type, &addr, true); 1754 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
2262 return (r == -ENODEV) ? -ENXIO : r; 1755 return (r == -ENODEV) ? -ENXIO : r;
2263 } 1756 }
2264
2265 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2266 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
2267 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2268 u32 reg;
2269
2270 if (get_user(reg, uaddr))
2271 return -EFAULT;
2272
2273 return vgic_attr_regs_access(dev, attr, &reg, true);
2274 }
2275 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 1757 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2276 u32 __user *uaddr = (u32 __user *)(long)attr->addr; 1758 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2277 u32 val; 1759 u32 val;
@@ -2302,13 +1784,20 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2302 1784
2303 return ret; 1785 return ret;
2304 } 1786 }
2305 1787 case KVM_DEV_ARM_VGIC_GRP_CTRL: {
1788 switch (attr->attr) {
1789 case KVM_DEV_ARM_VGIC_CTRL_INIT:
1790 r = vgic_init(dev->kvm);
1791 return r;
1792 }
1793 break;
1794 }
2306 } 1795 }
2307 1796
2308 return -ENXIO; 1797 return -ENXIO;
2309} 1798}
2310 1799
2311static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1800int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2312{ 1801{
2313 int r = -ENXIO; 1802 int r = -ENXIO;
2314 1803
@@ -2326,20 +1815,9 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2326 return -EFAULT; 1815 return -EFAULT;
2327 break; 1816 break;
2328 } 1817 }
2329
2330 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2331 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
2332 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2333 u32 reg = 0;
2334
2335 r = vgic_attr_regs_access(dev, attr, &reg, false);
2336 if (r)
2337 return r;
2338 r = put_user(reg, uaddr);
2339 break;
2340 }
2341 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { 1818 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2342 u32 __user *uaddr = (u32 __user *)(long)attr->addr; 1819 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1820
2343 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); 1821 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2344 break; 1822 break;
2345 } 1823 }
@@ -2349,61 +1827,17 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2349 return r; 1827 return r;
2350} 1828}
2351 1829
2352static int vgic_has_attr_regs(const struct mmio_range *ranges, 1830int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
2353 phys_addr_t offset)
2354{ 1831{
2355 struct kvm_exit_mmio dev_attr_mmio; 1832 struct kvm_exit_mmio dev_attr_mmio;
2356 1833
2357 dev_attr_mmio.len = 4; 1834 dev_attr_mmio.len = 4;
2358 if (find_matching_range(ranges, &dev_attr_mmio, offset)) 1835 if (vgic_find_range(ranges, &dev_attr_mmio, offset))
2359 return 0; 1836 return 0;
2360 else 1837 else
2361 return -ENXIO; 1838 return -ENXIO;
2362} 1839}
2363 1840
2364static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2365{
2366 phys_addr_t offset;
2367
2368 switch (attr->group) {
2369 case KVM_DEV_ARM_VGIC_GRP_ADDR:
2370 switch (attr->attr) {
2371 case KVM_VGIC_V2_ADDR_TYPE_DIST:
2372 case KVM_VGIC_V2_ADDR_TYPE_CPU:
2373 return 0;
2374 }
2375 break;
2376 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2377 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2378 return vgic_has_attr_regs(vgic_dist_ranges, offset);
2379 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2380 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2381 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
2382 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
2383 return 0;
2384 }
2385 return -ENXIO;
2386}
2387
2388static void vgic_destroy(struct kvm_device *dev)
2389{
2390 kfree(dev);
2391}
2392
2393static int vgic_create(struct kvm_device *dev, u32 type)
2394{
2395 return kvm_vgic_create(dev->kvm);
2396}
2397
2398static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2399 .name = "kvm-arm-vgic",
2400 .create = vgic_create,
2401 .destroy = vgic_destroy,
2402 .set_attr = vgic_set_attr,
2403 .get_attr = vgic_get_attr,
2404 .has_attr = vgic_has_attr,
2405};
2406
2407static void vgic_init_maintenance_interrupt(void *info) 1841static void vgic_init_maintenance_interrupt(void *info)
2408{ 1842{
2409 enable_percpu_irq(vgic->maint_irq, 0); 1843 enable_percpu_irq(vgic->maint_irq, 0);
@@ -2474,8 +1908,7 @@ int kvm_vgic_hyp_init(void)
2474 1908
2475 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); 1909 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2476 1910
2477 return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, 1911 return 0;
2478 KVM_DEV_TYPE_ARM_VGIC_V2);
2479 1912
2480out_free_irq: 1913out_free_irq:
2481 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); 1914 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
new file mode 100644
index 000000000000..1e83bdf5f499
--- /dev/null
+++ b/virt/kvm/arm/vgic.h
@@ -0,0 +1,123 @@
1/*
2 * Copyright (C) 2012-2014 ARM Ltd.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * Derived from virt/kvm/arm/vgic.c
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#ifndef __KVM_VGIC_H__
21#define __KVM_VGIC_H__
22
23#define VGIC_ADDR_UNDEF (-1)
24#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
25
26#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
27#define IMPLEMENTER_ARM 0x43b
28
29#define ACCESS_READ_VALUE (1 << 0)
30#define ACCESS_READ_RAZ (0 << 0)
31#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
32#define ACCESS_WRITE_IGNORED (0 << 1)
33#define ACCESS_WRITE_SETBIT (1 << 1)
34#define ACCESS_WRITE_CLEARBIT (2 << 1)
35#define ACCESS_WRITE_VALUE (3 << 1)
36#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
37
38#define VCPU_NOT_ALLOCATED ((u8)-1)
39
40unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x);
41
42void vgic_update_state(struct kvm *kvm);
43int vgic_init_common_maps(struct kvm *kvm);
44
45u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset);
46u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset);
47
48void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq);
49void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq);
50void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq);
51void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
52 int irq, int val);
53
54void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
55void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
56
57bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
58void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
59
60void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
61 phys_addr_t offset, int mode);
62bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
63 phys_addr_t offset);
64
65static inline
66u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
67{
68 return le32_to_cpu(*((u32 *)mmio->data)) & mask;
69}
70
71static inline
72void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
73{
74 *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
75}
76
77struct kvm_mmio_range {
78 phys_addr_t base;
79 unsigned long len;
80 int bits_per_irq;
81 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
82 phys_addr_t offset);
83};
84
85static inline bool is_in_range(phys_addr_t addr, unsigned long len,
86 phys_addr_t baseaddr, unsigned long size)
87{
88 return (addr >= baseaddr) && (addr + len <= baseaddr + size);
89}
90
91const
92struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
93 struct kvm_exit_mmio *mmio,
94 phys_addr_t offset);
95
96bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
97 struct kvm_exit_mmio *mmio,
98 const struct kvm_mmio_range *ranges,
99 unsigned long mmio_base);
100
101bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
102 phys_addr_t offset, int vcpu_id, int access);
103
104bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
105 phys_addr_t offset, int vcpu_id);
106
107bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
108 phys_addr_t offset, int vcpu_id);
109
110bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
111 phys_addr_t offset);
112
113void vgic_kick_vcpus(struct kvm *kvm);
114
115int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
116int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
117int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
118
119int vgic_init(struct kvm *kvm);
120void vgic_v2_init_emulation(struct kvm *kvm);
121void vgic_v3_init_emulation(struct kvm *kvm);
122
123#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 167e8c14b143..246cf291c6fd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -176,6 +176,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
176 return called; 176 return called;
177} 177}
178 178
179#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
179void kvm_flush_remote_tlbs(struct kvm *kvm) 180void kvm_flush_remote_tlbs(struct kvm *kvm)
180{ 181{
181 long dirty_count = kvm->tlbs_dirty; 182 long dirty_count = kvm->tlbs_dirty;
@@ -186,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
186 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 187 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
187} 188}
188EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 189EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
190#endif
189 191
190void kvm_reload_remote_mmus(struct kvm *kvm) 192void kvm_reload_remote_mmus(struct kvm *kvm)
191{ 193{
@@ -993,6 +995,86 @@ out:
993} 995}
994EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 996EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
995 997
998#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
999/**
1000 * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
1001 * are dirty write protect them for next write.
1002 * @kvm: pointer to kvm instance
1003 * @log: slot id and address to which we copy the log
1004 * @is_dirty: flag set if any page is dirty
1005 *
1006 * We need to keep it in mind that VCPU threads can write to the bitmap
1007 * concurrently. So, to avoid losing track of dirty pages we keep the
1008 * following order:
1009 *
1010 * 1. Take a snapshot of the bit and clear it if needed.
1011 * 2. Write protect the corresponding page.
1012 * 3. Copy the snapshot to the userspace.
1013 * 4. Upon return caller flushes TLB's if needed.
1014 *
1015 * Between 2 and 4, the guest may write to the page using the remaining TLB
1016 * entry. This is not a problem because the page is reported dirty using
1017 * the snapshot taken before and step 4 ensures that writes done after
1018 * exiting to userspace will be logged for the next call.
1019 *
1020 */
1021int kvm_get_dirty_log_protect(struct kvm *kvm,
1022 struct kvm_dirty_log *log, bool *is_dirty)
1023{
1024 struct kvm_memory_slot *memslot;
1025 int r, i;
1026 unsigned long n;
1027 unsigned long *dirty_bitmap;
1028 unsigned long *dirty_bitmap_buffer;
1029
1030 r = -EINVAL;
1031 if (log->slot >= KVM_USER_MEM_SLOTS)
1032 goto out;
1033
1034 memslot = id_to_memslot(kvm->memslots, log->slot);
1035
1036 dirty_bitmap = memslot->dirty_bitmap;
1037 r = -ENOENT;
1038 if (!dirty_bitmap)
1039 goto out;
1040
1041 n = kvm_dirty_bitmap_bytes(memslot);
1042
1043 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
1044 memset(dirty_bitmap_buffer, 0, n);
1045
1046 spin_lock(&kvm->mmu_lock);
1047 *is_dirty = false;
1048 for (i = 0; i < n / sizeof(long); i++) {
1049 unsigned long mask;
1050 gfn_t offset;
1051
1052 if (!dirty_bitmap[i])
1053 continue;
1054
1055 *is_dirty = true;
1056
1057 mask = xchg(&dirty_bitmap[i], 0);
1058 dirty_bitmap_buffer[i] = mask;
1059
1060 offset = i * BITS_PER_LONG;
1061 kvm_arch_mmu_write_protect_pt_masked(kvm, memslot, offset,
1062 mask);
1063 }
1064
1065 spin_unlock(&kvm->mmu_lock);
1066
1067 r = -EFAULT;
1068 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
1069 goto out;
1070
1071 r = 0;
1072out:
1073 return r;
1074}
1075EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
1076#endif
1077
996bool kvm_largepages_enabled(void) 1078bool kvm_largepages_enabled(void)
997{ 1079{
998 return largepages_enabled; 1080 return largepages_enabled;