aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 14:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 14:29:23 -0400
commit5ccd414080822d5257c3569f4aeca74f63f4a257 (patch)
tree4b567bfc0a8b29dbac9712821062b28337eb3408
parent29250d301b0c75ef142b51eebee6b7403cc79624 (diff)
parent36c344f3f1ffc0b1b20abd237b7401dc6687ee8f (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini: "ARM: - bugfixes - moved shared 32-bit/64-bit files to virt/kvm/arm - support for saving/restoring virtual ITS state to userspace PPC: - XIVE (eXternal Interrupt Virtualization Engine) support x86: - nVMX improvements, including emulated page modification logging (PML) which brings nice performance improvements on some workloads" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (45 commits) KVM: arm/arm64: vgic-its: Cleanup after failed ITT restore KVM: arm/arm64: Don't call map_resources when restoring ITS tables KVM: arm/arm64: Register ITS iodev when setting base address KVM: arm/arm64: Get rid of its->initialized field KVM: arm/arm64: Register iodevs when setting redist base and creating VCPUs KVM: arm/arm64: Slightly rework kvm_vgic_addr KVM: arm/arm64: Make vgic_v3_check_base more broadly usable KVM: arm/arm64: Refactor vgic_register_redist_iodevs KVM: Add kvm_vcpu_get_idx to get vcpu index in kvm->vcpus nVMX: Advertise PML to L1 hypervisor nVMX: Implement emulated Page Modification Logging kvm: x86: Add a hook for arch specific dirty logging emulation kvm: nVMX: Validate CR3 target count on nested VM-entry KVM: set no_llseek in stat_fops_per_vm KVM: arm/arm64: vgic: Rename kvm_vgic_vcpu_init to kvm_vgic_vcpu_enable KVM: arm/arm64: Clarification and relaxation to ITS save/restore ABI KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES KVM: arm64: vgic-its: Fix pending table sync KVM: arm64: vgic-its: ITT save and restore KVM: arm64: vgic-its: Device table save/restore ...
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-its.txt121
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-v3.txt6
-rw-r--r--arch/arm/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm/kvm/Makefile7
-rw-r--r--arch/arm/kvm/trace.h247
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h6
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h6
-rw-r--r--arch/arm64/kvm/Makefile5
-rw-r--r--arch/arm64/kvm/sys_regs.c8
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h28
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h74
-rw-r--r--arch/powerpc/include/asm/xive.h9
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kvm/Kconfig5
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/book3s.c75
-rw-r--r--arch/powerpc/kvm/book3s_hv.c51
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c103
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c10
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xive.c47
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S62
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c21
-rw-r--r--arch/powerpc/kvm/book3s_xics.c35
-rw-r--r--arch/powerpc/kvm/book3s_xics.h7
-rw-r--r--arch/powerpc/kvm/book3s_xive.c1894
-rw-r--r--arch/powerpc/kvm/book3s_xive.h256
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c503
-rw-r--r--arch/powerpc/kvm/irq.h1
-rw-r--r--arch/powerpc/kvm/powerpc.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal.c1
-rw-r--r--arch/powerpc/sysdev/xive/common.c142
-rw-r--r--arch/powerpc/sysdev/xive/native.c86
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/mmu.c15
-rw-r--r--arch/x86/kvm/mmu.h1
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/vmx.c105
-rw-r--r--include/kvm/arm_vgic.h5
-rw-r--r--include/linux/irqchip/arm-gic-v3.h14
-rw-r--r--include/linux/kvm_host.h12
-rw-r--r--virt/kvm/arm/arm.c (renamed from arch/arm/kvm/arm.c)2
-rw-r--r--virt/kvm/arm/mmio.c (renamed from arch/arm/kvm/mmio.c)0
-rw-r--r--virt/kvm/arm/mmu.c (renamed from arch/arm/kvm/mmu.c)0
-rw-r--r--virt/kvm/arm/perf.c (renamed from arch/arm/kvm/perf.c)0
-rw-r--r--virt/kvm/arm/psci.c (renamed from arch/arm/kvm/psci.c)0
-rw-r--r--virt/kvm/arm/trace.h246
-rw-r--r--virt/kvm/arm/vgic/trace.h37
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c25
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c1234
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c53
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c147
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c11
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h14
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c128
-rw-r--r--virt/kvm/arm/vgic/vgic.c2
-rw-r--r--virt/kvm/arm/vgic/vgic.h33
-rw-r--r--virt/kvm/kvm_main.c8
58 files changed, 5318 insertions, 635 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 6081a5b7fc1e..eb06beb75960 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -32,7 +32,128 @@ Groups:
32 KVM_DEV_ARM_VGIC_CTRL_INIT 32 KVM_DEV_ARM_VGIC_CTRL_INIT
33 request the initialization of the ITS, no additional parameter in 33 request the initialization of the ITS, no additional parameter in
34 kvm_device_attr.addr. 34 kvm_device_attr.addr.
35
36 KVM_DEV_ARM_ITS_SAVE_TABLES
37 save the ITS table data into guest RAM, at the location provisioned
38 by the guest in corresponding registers/table entries.
39
40 The layout of the tables in guest memory defines an ABI. The entries
41 are laid out in little endian format as described in the last paragraph.
42
43 KVM_DEV_ARM_ITS_RESTORE_TABLES
44 restore the ITS tables from guest RAM to ITS internal structures.
45
46 The GICV3 must be restored before the ITS and all ITS registers but
47 the GITS_CTLR must be restored before restoring the ITS tables.
48
49 The GITS_IIDR read-only register must also be restored before
50 calling KVM_DEV_ARM_ITS_RESTORE_TABLES as the IIDR revision field
51 encodes the ABI revision.
52
53 The expected ordering when restoring the GICv3/ITS is described in section
54 "ITS Restore Sequence".
55
35 Errors: 56 Errors:
36 -ENXIO: ITS not properly configured as required prior to setting 57 -ENXIO: ITS not properly configured as required prior to setting
37 this attribute 58 this attribute
38 -ENOMEM: Memory shortage when allocating ITS internal data 59 -ENOMEM: Memory shortage when allocating ITS internal data
60 -EINVAL: Inconsistent restored data
61 -EFAULT: Invalid guest ram access
62 -EBUSY: One or more VCPUS are running
63
64 KVM_DEV_ARM_VGIC_GRP_ITS_REGS
65 Attributes:
66 The attr field of kvm_device_attr encodes the offset of the
67 ITS register, relative to the ITS control frame base address
68 (ITS_base).
69
70 kvm_device_attr.addr points to a __u64 value whatever the width
71 of the addressed register (32/64 bits). 64 bit registers can only
72 be accessed with full length.
73
74 Writes to read-only registers are ignored by the kernel except for:
75 - GITS_CREADR. It must be restored otherwise commands in the queue
76 will be re-executed after restoring CWRITER. GITS_CREADR must be
77 restored before restoring the GITS_CTLR which is likely to enable the
78 ITS. Also it must be restored after GITS_CBASER since a write to
79 GITS_CBASER resets GITS_CREADR.
80 - GITS_IIDR. The Revision field encodes the table layout ABI revision.
81 In the future we might implement direct injection of virtual LPIs.
82 This will require an upgrade of the table layout and an evolution of
83 the ABI. GITS_IIDR must be restored before calling
84 KVM_DEV_ARM_ITS_RESTORE_TABLES.
85
86 For other registers, getting or setting a register has the same
87 effect as reading/writing the register on real hardware.
88 Errors:
89 -ENXIO: Offset does not correspond to any supported register
90 -EFAULT: Invalid user pointer for attr->addr
91 -EINVAL: Offset is not 64-bit aligned
92 -EBUSY: one or more VCPUS are running
93
94 ITS Restore Sequence:
95 -------------------------
96
97The following ordering must be followed when restoring the GIC and the ITS:
98a) restore all guest memory and create vcpus
99b) restore all redistributors
100c) provide the its base address
101 (KVM_DEV_ARM_VGIC_GRP_ADDR)
102d) restore the ITS in the following order:
103 1. Restore GITS_CBASER
104 2. Restore all other GITS_ registers, except GITS_CTLR!
105 3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
106 4. Restore GITS_CTLR
107
108Then vcpus can be started.
109
110 ITS Table ABI REV0:
111 -------------------
112
113 Revision 0 of the ABI only supports the features of a virtual GICv3, and does
114 not support a virtual GICv4 with support for direct injection of virtual
115 interrupts for nested hypervisors.
116
117 The device table and ITT are indexed by the DeviceID and EventID,
118 respectively. The collection table is not indexed by CollectionID, and the
119 entries in the collection are listed in no particular order.
120 All entries are 8 bytes.
121
122 Device Table Entry (DTE):
123
124 bits: | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
125 values: | V | next | ITT_addr | Size |
126
127 where;
128 - V indicates whether the entry is valid. If not, other fields
129 are not meaningful.
130 - next: equals to 0 if this entry is the last one; otherwise it
131 corresponds to the DeviceID offset to the next DTE, capped by
132 2^14 -1.
133 - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
134 - Size specifies the supported number of bits for the EventID,
135 minus one
136
137 Collection Table Entry (CTE):
138
139 bits: | 63| 62 .. 52 | 51 ... 16 | 15 ... 0 |
140 values: | V | RES0 | RDBase | ICID |
141
142 where:
143 - V indicates whether the entry is valid. If not, other fields are
144 not meaningful.
145 - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
146 - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
147 - ICID is the collection ID
148
149 Interrupt Translation Entry (ITE):
150
151 bits: | 63 ... 48 | 47 ... 16 | 15 ... 0 |
152 values: | next | pINTID | ICID |
153
154 where:
155 - next: equals to 0 if this entry is the last one; otherwise it corresponds
156 to the EventID offset to the next ITE capped by 2^16 -1.
157 - pINTID is the physical LPI ID; if zero, it means the entry is not valid
158 and other fields are not meaningful.
159 - ICID is the collection ID
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
index c1a24612c198..9293b45abdb9 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
@@ -167,11 +167,17 @@ Groups:
167 KVM_DEV_ARM_VGIC_CTRL_INIT 167 KVM_DEV_ARM_VGIC_CTRL_INIT
168 request the initialization of the VGIC, no additional parameter in 168 request the initialization of the VGIC, no additional parameter in
169 kvm_device_attr.addr. 169 kvm_device_attr.addr.
170 KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
171 save all LPI pending bits into guest RAM pending tables.
172
173 The first kB of the pending table is not altered by this operation.
170 Errors: 174 Errors:
171 -ENXIO: VGIC not properly configured as required prior to calling 175 -ENXIO: VGIC not properly configured as required prior to calling
172 this attribute 176 this attribute
173 -ENODEV: no online VCPU 177 -ENODEV: no online VCPU
174 -ENOMEM: memory shortage when allocating vgic internal data 178 -ENOMEM: memory shortage when allocating vgic internal data
179 -EFAULT: Invalid guest ram access
180 -EBUSY: One or more VCPUS are running
175 181
176 182
177 KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 183 KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index a88726359e5f..5e3c673fa3f4 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -196,13 +196,17 @@ struct kvm_arch_memory_slot {
196#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 196#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
197#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 197#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
198#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 198#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
199#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
199#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 200#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
200#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ 201#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
201 (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) 202 (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
202#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff 203#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
203#define VGIC_LEVEL_INFO_LINE_LEVEL 0 204#define VGIC_LEVEL_INFO_LINE_LEVEL 0
204 205
205#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 206#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
207#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
208#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
209#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
206 210
207/* KVM_IRQ_LINE irq field index values */ 211/* KVM_IRQ_LINE irq field index values */
208#define KVM_ARM_IRQ_TYPE_SHIFT 24 212#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 7b3670c2ae7b..d9beee652d36 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -18,9 +18,12 @@ KVM := ../../../virt/kvm
18kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o 18kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
19 19
20obj-$(CONFIG_KVM_ARM_HOST) += hyp/ 20obj-$(CONFIG_KVM_ARM_HOST) += hyp/
21
21obj-y += kvm-arm.o init.o interrupts.o 22obj-y += kvm-arm.o init.o interrupts.o
22obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o 23obj-y += handle_exit.o guest.o emulate.o reset.o
23obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o vgic-v3-coproc.o 24obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
25obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
26obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
24obj-y += $(KVM)/arm/aarch32.o 27obj-y += $(KVM)/arm/aarch32.o
25 28
26obj-y += $(KVM)/arm/vgic/vgic.o 29obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index c25a88598eb0..fc0943776db2 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -6,133 +6,6 @@
6#undef TRACE_SYSTEM 6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm 7#define TRACE_SYSTEM kvm
8 8
9/*
10 * Tracepoints for entry/exit to guest
11 */
12TRACE_EVENT(kvm_entry,
13 TP_PROTO(unsigned long vcpu_pc),
14 TP_ARGS(vcpu_pc),
15
16 TP_STRUCT__entry(
17 __field( unsigned long, vcpu_pc )
18 ),
19
20 TP_fast_assign(
21 __entry->vcpu_pc = vcpu_pc;
22 ),
23
24 TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
25);
26
27TRACE_EVENT(kvm_exit,
28 TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
29 TP_ARGS(idx, exit_reason, vcpu_pc),
30
31 TP_STRUCT__entry(
32 __field( int, idx )
33 __field( unsigned int, exit_reason )
34 __field( unsigned long, vcpu_pc )
35 ),
36
37 TP_fast_assign(
38 __entry->idx = idx;
39 __entry->exit_reason = exit_reason;
40 __entry->vcpu_pc = vcpu_pc;
41 ),
42
43 TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
44 __print_symbolic(__entry->idx, kvm_arm_exception_type),
45 __entry->exit_reason,
46 __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
47 __entry->vcpu_pc)
48);
49
50TRACE_EVENT(kvm_guest_fault,
51 TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
52 unsigned long hxfar,
53 unsigned long long ipa),
54 TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
55
56 TP_STRUCT__entry(
57 __field( unsigned long, vcpu_pc )
58 __field( unsigned long, hsr )
59 __field( unsigned long, hxfar )
60 __field( unsigned long long, ipa )
61 ),
62
63 TP_fast_assign(
64 __entry->vcpu_pc = vcpu_pc;
65 __entry->hsr = hsr;
66 __entry->hxfar = hxfar;
67 __entry->ipa = ipa;
68 ),
69
70 TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
71 __entry->ipa, __entry->hsr,
72 __entry->hxfar, __entry->vcpu_pc)
73);
74
75TRACE_EVENT(kvm_access_fault,
76 TP_PROTO(unsigned long ipa),
77 TP_ARGS(ipa),
78
79 TP_STRUCT__entry(
80 __field( unsigned long, ipa )
81 ),
82
83 TP_fast_assign(
84 __entry->ipa = ipa;
85 ),
86
87 TP_printk("IPA: %lx", __entry->ipa)
88);
89
90TRACE_EVENT(kvm_irq_line,
91 TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
92 TP_ARGS(type, vcpu_idx, irq_num, level),
93
94 TP_STRUCT__entry(
95 __field( unsigned int, type )
96 __field( int, vcpu_idx )
97 __field( int, irq_num )
98 __field( int, level )
99 ),
100
101 TP_fast_assign(
102 __entry->type = type;
103 __entry->vcpu_idx = vcpu_idx;
104 __entry->irq_num = irq_num;
105 __entry->level = level;
106 ),
107
108 TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
109 (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
110 (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
111 (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
112 __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
113);
114
115TRACE_EVENT(kvm_mmio_emulate,
116 TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
117 unsigned long cpsr),
118 TP_ARGS(vcpu_pc, instr, cpsr),
119
120 TP_STRUCT__entry(
121 __field( unsigned long, vcpu_pc )
122 __field( unsigned long, instr )
123 __field( unsigned long, cpsr )
124 ),
125
126 TP_fast_assign(
127 __entry->vcpu_pc = vcpu_pc;
128 __entry->instr = instr;
129 __entry->cpsr = cpsr;
130 ),
131
132 TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
133 __entry->vcpu_pc, __entry->instr, __entry->cpsr)
134);
135
136/* Architecturally implementation defined CP15 register access */ 9/* Architecturally implementation defined CP15 register access */
137TRACE_EVENT(kvm_emulate_cp15_imp, 10TRACE_EVENT(kvm_emulate_cp15_imp,
138 TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn, 11 TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
@@ -181,87 +54,6 @@ TRACE_EVENT(kvm_wfx,
181 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) 54 __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc)
182); 55);
183 56
184TRACE_EVENT(kvm_unmap_hva,
185 TP_PROTO(unsigned long hva),
186 TP_ARGS(hva),
187
188 TP_STRUCT__entry(
189 __field( unsigned long, hva )
190 ),
191
192 TP_fast_assign(
193 __entry->hva = hva;
194 ),
195
196 TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
197);
198
199TRACE_EVENT(kvm_unmap_hva_range,
200 TP_PROTO(unsigned long start, unsigned long end),
201 TP_ARGS(start, end),
202
203 TP_STRUCT__entry(
204 __field( unsigned long, start )
205 __field( unsigned long, end )
206 ),
207
208 TP_fast_assign(
209 __entry->start = start;
210 __entry->end = end;
211 ),
212
213 TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
214 __entry->start, __entry->end)
215);
216
217TRACE_EVENT(kvm_set_spte_hva,
218 TP_PROTO(unsigned long hva),
219 TP_ARGS(hva),
220
221 TP_STRUCT__entry(
222 __field( unsigned long, hva )
223 ),
224
225 TP_fast_assign(
226 __entry->hva = hva;
227 ),
228
229 TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
230);
231
232TRACE_EVENT(kvm_age_hva,
233 TP_PROTO(unsigned long start, unsigned long end),
234 TP_ARGS(start, end),
235
236 TP_STRUCT__entry(
237 __field( unsigned long, start )
238 __field( unsigned long, end )
239 ),
240
241 TP_fast_assign(
242 __entry->start = start;
243 __entry->end = end;
244 ),
245
246 TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
247 __entry->start, __entry->end)
248);
249
250TRACE_EVENT(kvm_test_age_hva,
251 TP_PROTO(unsigned long hva),
252 TP_ARGS(hva),
253
254 TP_STRUCT__entry(
255 __field( unsigned long, hva )
256 ),
257
258 TP_fast_assign(
259 __entry->hva = hva;
260 ),
261
262 TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
263);
264
265TRACE_EVENT(kvm_hvc, 57TRACE_EVENT(kvm_hvc,
266 TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), 58 TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
267 TP_ARGS(vcpu_pc, r0, imm), 59 TP_ARGS(vcpu_pc, r0, imm),
@@ -282,45 +74,6 @@ TRACE_EVENT(kvm_hvc,
282 __entry->vcpu_pc, __entry->r0, __entry->imm) 74 __entry->vcpu_pc, __entry->r0, __entry->imm)
283); 75);
284 76
285TRACE_EVENT(kvm_set_way_flush,
286 TP_PROTO(unsigned long vcpu_pc, bool cache),
287 TP_ARGS(vcpu_pc, cache),
288
289 TP_STRUCT__entry(
290 __field( unsigned long, vcpu_pc )
291 __field( bool, cache )
292 ),
293
294 TP_fast_assign(
295 __entry->vcpu_pc = vcpu_pc;
296 __entry->cache = cache;
297 ),
298
299 TP_printk("S/W flush at 0x%016lx (cache %s)",
300 __entry->vcpu_pc, __entry->cache ? "on" : "off")
301);
302
303TRACE_EVENT(kvm_toggle_cache,
304 TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
305 TP_ARGS(vcpu_pc, was, now),
306
307 TP_STRUCT__entry(
308 __field( unsigned long, vcpu_pc )
309 __field( bool, was )
310 __field( bool, now )
311 ),
312
313 TP_fast_assign(
314 __entry->vcpu_pc = vcpu_pc;
315 __entry->was = was;
316 __entry->now = now;
317 ),
318
319 TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
320 __entry->vcpu_pc, __entry->was ? "on" : "off",
321 __entry->now ? "on" : "off")
322);
323
324#endif /* _TRACE_KVM_H */ 77#endif /* _TRACE_KVM_H */
325 78
326#undef TRACE_INCLUDE_PATH 79#undef TRACE_INCLUDE_PATH
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f5ea0ba70f07..fe39e6841326 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -240,6 +240,12 @@ static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
240 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; 240 return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE;
241} 241}
242 242
243static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
244{
245 u32 esr = kvm_vcpu_get_hsr(vcpu);
246 return (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
247}
248
243static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) 249static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
244{ 250{
245 return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; 251 return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 869ee480deed..70eea2ecc663 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -216,13 +216,17 @@ struct kvm_arch_memory_slot {
216#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 216#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
217#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 217#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
218#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 218#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
219#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
219#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 220#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
220#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ 221#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
221 (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) 222 (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
222#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff 223#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
223#define VGIC_LEVEL_INFO_LINE_LEVEL 0 224#define VGIC_LEVEL_INFO_LINE_LEVEL 0
224 225
225#define KVM_DEV_ARM_VGIC_CTRL_INIT 0 226#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
227#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
228#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
229#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
226 230
227/* Device Control API on vcpu fd */ 231/* Device Control API on vcpu fd */
228#define KVM_ARM_VCPU_PMU_V3_CTRL 0 232#define KVM_ARM_VCPU_PMU_V3_CTRL 0
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index afd51bebb9c5..5d9810086c25 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_arm.o := -I.
7CFLAGS_mmu.o := -I. 7CFLAGS_mmu.o := -I.
8 8
9KVM=../../../virt/kvm 9KVM=../../../virt/kvm
10ARM=../../../arch/arm/kvm
11 10
12obj-$(CONFIG_KVM_ARM_HOST) += kvm.o 11obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
13obj-$(CONFIG_KVM_ARM_HOST) += hyp/ 12obj-$(CONFIG_KVM_ARM_HOST) += hyp/
14 13
15kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o 14kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
16kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o 15kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
17kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o 16kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
18 17
19kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o 18kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
20kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o 19kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index efbe9e8e7a78..0fe27024a2e1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1529,8 +1529,8 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
1529{ 1529{
1530 struct sys_reg_params params; 1530 struct sys_reg_params params;
1531 u32 hsr = kvm_vcpu_get_hsr(vcpu); 1531 u32 hsr = kvm_vcpu_get_hsr(vcpu);
1532 int Rt = (hsr >> 5) & 0xf; 1532 int Rt = kvm_vcpu_sys_get_rt(vcpu);
1533 int Rt2 = (hsr >> 10) & 0xf; 1533 int Rt2 = (hsr >> 10) & 0x1f;
1534 1534
1535 params.is_aarch32 = true; 1535 params.is_aarch32 = true;
1536 params.is_32bit = false; 1536 params.is_32bit = false;
@@ -1586,7 +1586,7 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
1586{ 1586{
1587 struct sys_reg_params params; 1587 struct sys_reg_params params;
1588 u32 hsr = kvm_vcpu_get_hsr(vcpu); 1588 u32 hsr = kvm_vcpu_get_hsr(vcpu);
1589 int Rt = (hsr >> 5) & 0xf; 1589 int Rt = kvm_vcpu_sys_get_rt(vcpu);
1590 1590
1591 params.is_aarch32 = true; 1591 params.is_aarch32 = true;
1592 params.is_32bit = true; 1592 params.is_32bit = true;
@@ -1688,7 +1688,7 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
1688{ 1688{
1689 struct sys_reg_params params; 1689 struct sys_reg_params params;
1690 unsigned long esr = kvm_vcpu_get_hsr(vcpu); 1690 unsigned long esr = kvm_vcpu_get_hsr(vcpu);
1691 int Rt = (esr >> 5) & 0x1f; 1691 int Rt = kvm_vcpu_sys_get_rt(vcpu);
1692 int ret; 1692 int ret;
1693 1693
1694 trace_kvm_handle_sys_reg(esr); 1694 trace_kvm_handle_sys_reg(esr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 0593d9479f74..b148496ffe36 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -111,6 +111,8 @@ struct kvmppc_host_state {
111 struct kvm_vcpu *kvm_vcpu; 111 struct kvm_vcpu *kvm_vcpu;
112 struct kvmppc_vcore *kvm_vcore; 112 struct kvmppc_vcore *kvm_vcore;
113 void __iomem *xics_phys; 113 void __iomem *xics_phys;
114 void __iomem *xive_tima_phys;
115 void __iomem *xive_tima_virt;
114 u32 saved_xirr; 116 u32 saved_xirr;
115 u64 dabr; 117 u64 dabr;
116 u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */ 118 u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 77c60826d145..9c51ac4b8f36 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -210,6 +210,12 @@ struct kvmppc_spapr_tce_table {
210/* XICS components, defined in book3s_xics.c */ 210/* XICS components, defined in book3s_xics.c */
211struct kvmppc_xics; 211struct kvmppc_xics;
212struct kvmppc_icp; 212struct kvmppc_icp;
213extern struct kvm_device_ops kvm_xics_ops;
214
215/* XIVE components, defined in book3s_xive.c */
216struct kvmppc_xive;
217struct kvmppc_xive_vcpu;
218extern struct kvm_device_ops kvm_xive_ops;
213 219
214struct kvmppc_passthru_irqmap; 220struct kvmppc_passthru_irqmap;
215 221
@@ -298,6 +304,7 @@ struct kvm_arch {
298#endif 304#endif
299#ifdef CONFIG_KVM_XICS 305#ifdef CONFIG_KVM_XICS
300 struct kvmppc_xics *xics; 306 struct kvmppc_xics *xics;
307 struct kvmppc_xive *xive;
301 struct kvmppc_passthru_irqmap *pimap; 308 struct kvmppc_passthru_irqmap *pimap;
302#endif 309#endif
303 struct kvmppc_ops *kvm_ops; 310 struct kvmppc_ops *kvm_ops;
@@ -427,7 +434,7 @@ struct kvmppc_passthru_irqmap {
427 434
428#define KVMPPC_IRQ_DEFAULT 0 435#define KVMPPC_IRQ_DEFAULT 0
429#define KVMPPC_IRQ_MPIC 1 436#define KVMPPC_IRQ_MPIC 1
430#define KVMPPC_IRQ_XICS 2 437#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
431 438
432#define MMIO_HPTE_CACHE_SIZE 4 439#define MMIO_HPTE_CACHE_SIZE 4
433 440
@@ -454,6 +461,21 @@ struct mmio_hpte_cache {
454 461
455struct openpic; 462struct openpic;
456 463
464/* W0 and W1 of a XIVE thread management context */
465union xive_tma_w01 {
466 struct {
467 u8 nsr;
468 u8 cppr;
469 u8 ipb;
470 u8 lsmfb;
471 u8 ack;
472 u8 inc;
473 u8 age;
474 u8 pipr;
475 };
476 __be64 w01;
477};
478
457struct kvm_vcpu_arch { 479struct kvm_vcpu_arch {
458 ulong host_stack; 480 ulong host_stack;
459 u32 host_pid; 481 u32 host_pid;
@@ -714,6 +736,10 @@ struct kvm_vcpu_arch {
714 struct openpic *mpic; /* KVM_IRQ_MPIC */ 736 struct openpic *mpic; /* KVM_IRQ_MPIC */
715#ifdef CONFIG_KVM_XICS 737#ifdef CONFIG_KVM_XICS
716 struct kvmppc_icp *icp; /* XICS presentation controller */ 738 struct kvmppc_icp *icp; /* XICS presentation controller */
739 struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
740 __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
741 u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
742 union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
717#endif 743#endif
718 744
719#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 745#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 76e940a3c145..e0d88c38602b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -240,6 +240,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
240extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); 240extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
241extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); 241extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
242extern void kvmppc_rtas_tokens_free(struct kvm *kvm); 242extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
243
243extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, 244extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
244 u32 priority); 245 u32 priority);
245extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, 246extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@@ -428,6 +429,14 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
428 paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr; 429 paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
429} 430}
430 431
432static inline void kvmppc_set_xive_tima(int cpu,
433 unsigned long phys_addr,
434 void __iomem *virt_addr)
435{
436 paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
437 paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
438}
439
431static inline u32 kvmppc_get_xics_latch(void) 440static inline u32 kvmppc_get_xics_latch(void)
432{ 441{
433 u32 xirr; 442 u32 xirr;
@@ -458,6 +467,11 @@ static inline void __init kvm_cma_reserve(void)
458static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 467static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
459{} 468{}
460 469
470static inline void kvmppc_set_xive_tima(int cpu,
471 unsigned long phys_addr,
472 void __iomem *virt_addr)
473{}
474
461static inline u32 kvmppc_get_xics_latch(void) 475static inline u32 kvmppc_get_xics_latch(void)
462{ 476{
463 return 0; 477 return 0;
@@ -508,6 +522,10 @@ extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
508 struct kvmppc_irq_map *irq_map, 522 struct kvmppc_irq_map *irq_map,
509 struct kvmppc_passthru_irqmap *pimap, 523 struct kvmppc_passthru_irqmap *pimap,
510 bool *again); 524 bool *again);
525
526extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
527 int level, bool line_status);
528
511extern int h_ipi_redirect; 529extern int h_ipi_redirect;
512#else 530#else
513static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( 531static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -525,6 +543,60 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
525 { return 0; } 543 { return 0; }
526#endif 544#endif
527 545
546#ifdef CONFIG_KVM_XIVE
547/*
548 * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
549 * ie. P9 new interrupt controller, while the second "xive" is the legacy
550 * "eXternal Interrupt Vector Entry" which is the configuration of an
551 * interrupt on the "xics" interrupt controller on P8 and earlier. Those
552 * two function consume or produce a legacy "XIVE" state from the
553 * new "XIVE" interrupt controller.
554 */
555extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
556 u32 priority);
557extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
558 u32 *priority);
559extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
560extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
561extern void kvmppc_xive_init_module(void);
562extern void kvmppc_xive_exit_module(void);
563
564extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
565 struct kvm_vcpu *vcpu, u32 cpu);
566extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
567extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
568 struct irq_desc *host_desc);
569extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
570 struct irq_desc *host_desc);
571extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
572extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
573
574extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
575 int level, bool line_status);
576#else
577static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
578 u32 priority) { return -1; }
579static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
580 u32 *priority) { return -1; }
581static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
582static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
583static inline void kvmppc_xive_init_module(void) { }
584static inline void kvmppc_xive_exit_module(void) { }
585
586static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
587 struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
588static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
589static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
590 struct irq_desc *host_desc) { return -ENODEV; }
591static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
592 struct irq_desc *host_desc) { return -ENODEV; }
593static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
594static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
595
596static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
597 int level, bool line_status) { return -ENODEV; }
598#endif /* CONFIG_KVM_XIVE */
599
528/* 600/*
529 * Prototypes for functions called only from assembler code. 601 * Prototypes for functions called only from assembler code.
530 * Having prototypes reduces sparse errors. 602 * Having prototypes reduces sparse errors.
@@ -562,6 +634,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
562long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 634long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
563 unsigned long slb_v, unsigned int status, bool data); 635 unsigned long slb_v, unsigned int status, bool data);
564unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); 636unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
637unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu);
638unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
565int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 639int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
566 unsigned long mfrr); 640 unsigned long mfrr);
567int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); 641int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3cdbeaeac397..c8a822acf962 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -99,7 +99,6 @@ struct xive_q {
99#define XIVE_ESB_SET_PQ_01 0xd00 99#define XIVE_ESB_SET_PQ_01 0xd00
100#define XIVE_ESB_SET_PQ_10 0xe00 100#define XIVE_ESB_SET_PQ_10 0xe00
101#define XIVE_ESB_SET_PQ_11 0xf00 101#define XIVE_ESB_SET_PQ_11 0xf00
102#define XIVE_ESB_MASK XIVE_ESB_SET_PQ_01
103 102
104#define XIVE_ESB_VAL_P 0x2 103#define XIVE_ESB_VAL_P 0x2
105#define XIVE_ESB_VAL_Q 0x1 104#define XIVE_ESB_VAL_Q 0x1
@@ -136,11 +135,11 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
136 __be32 *qpage, u32 order, bool can_escalate); 135 __be32 *qpage, u32 order, bool can_escalate);
137extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); 136extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
138 137
139extern bool __xive_irq_trigger(struct xive_irq_data *xd); 138extern void xive_native_sync_source(u32 hw_irq);
140extern bool __xive_irq_retrigger(struct xive_irq_data *xd);
141extern void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd);
142
143extern bool is_xive_irq(struct irq_chip *chip); 139extern bool is_xive_irq(struct irq_chip *chip);
140extern int xive_native_enable_vp(u32 vp_id);
141extern int xive_native_disable_vp(u32 vp_id);
142extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
144 143
145#else 144#else
146 145
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 439c257dec4a..709e23425317 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -634,6 +634,8 @@ int main(void)
634 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); 634 HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
635 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); 635 HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
636 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); 636 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
637 HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
638 HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
637 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); 639 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
638 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); 640 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
639 HSTATE_FIELD(HSTATE_PTID, ptid); 641 HSTATE_FIELD(HSTATE_PTID, ptid);
@@ -719,6 +721,14 @@ int main(void)
719 OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); 721 OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
720#endif 722#endif
721 723
724#ifdef CONFIG_KVM_XICS
725 DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
726 arch.xive_saved_state));
727 DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
728 arch.xive_cam_word));
729 DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
730#endif
731
722#ifdef CONFIG_KVM_EXIT_TIMING 732#ifdef CONFIG_KVM_EXIT_TIMING
723 OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); 733 OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
724 OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); 734 OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 65a471de96de..24de532c1736 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -197,6 +197,11 @@ config KVM_XICS
197 Specification) interrupt controller architecture used on 197 Specification) interrupt controller architecture used on
198 IBM POWER (pSeries) servers. 198 IBM POWER (pSeries) servers.
199 199
200config KVM_XIVE
201 bool
202 default y
203 depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
204
200source drivers/vhost/Kconfig 205source drivers/vhost/Kconfig
201 206
202endif # VIRTUALIZATION 207endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b87ccde2137a..d91a2604c496 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,7 +74,7 @@ kvm-hv-y += \
74 book3s_64_mmu_radix.o 74 book3s_64_mmu_radix.o
75 75
76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 76kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
77 book3s_hv_rm_xics.o 77 book3s_hv_rm_xics.o book3s_hv_rm_xive.o
78 78
79ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 79ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
80kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ 80kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
@@ -89,6 +89,8 @@ endif
89kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ 89kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
90 book3s_xics.o 90 book3s_xics.o
91 91
92kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
93
92kvm-book3s_64-module-objs := \ 94kvm-book3s_64-module-objs := \
93 $(common-objs-y) \ 95 $(common-objs-y) \
94 book3s.o \ 96 book3s.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 8c4d7e9d27d2..72d977e30952 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -35,6 +35,7 @@
35#include <asm/kvm_book3s.h> 35#include <asm/kvm_book3s.h>
36#include <asm/mmu_context.h> 36#include <asm/mmu_context.h>
37#include <asm/page.h> 37#include <asm/page.h>
38#include <asm/xive.h>
38 39
39#include "book3s.h" 40#include "book3s.h"
40#include "trace.h" 41#include "trace.h"
@@ -596,11 +597,14 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
596 break; 597 break;
597#ifdef CONFIG_KVM_XICS 598#ifdef CONFIG_KVM_XICS
598 case KVM_REG_PPC_ICP_STATE: 599 case KVM_REG_PPC_ICP_STATE:
599 if (!vcpu->arch.icp) { 600 if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
600 r = -ENXIO; 601 r = -ENXIO;
601 break; 602 break;
602 } 603 }
603 *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); 604 if (xive_enabled())
605 *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
606 else
607 *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
604 break; 608 break;
605#endif /* CONFIG_KVM_XICS */ 609#endif /* CONFIG_KVM_XICS */
606 case KVM_REG_PPC_FSCR: 610 case KVM_REG_PPC_FSCR:
@@ -666,12 +670,14 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
666#endif /* CONFIG_VSX */ 670#endif /* CONFIG_VSX */
667#ifdef CONFIG_KVM_XICS 671#ifdef CONFIG_KVM_XICS
668 case KVM_REG_PPC_ICP_STATE: 672 case KVM_REG_PPC_ICP_STATE:
669 if (!vcpu->arch.icp) { 673 if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
670 r = -ENXIO; 674 r = -ENXIO;
671 break; 675 break;
672 } 676 }
673 r = kvmppc_xics_set_icp(vcpu, 677 if (xive_enabled())
674 set_reg_val(id, *val)); 678 r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
679 else
680 r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
675 break; 681 break;
676#endif /* CONFIG_KVM_XICS */ 682#endif /* CONFIG_KVM_XICS */
677 case KVM_REG_PPC_FSCR: 683 case KVM_REG_PPC_FSCR:
@@ -942,6 +948,50 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
942 return kvm->arch.kvm_ops->hcall_implemented(hcall); 948 return kvm->arch.kvm_ops->hcall_implemented(hcall);
943} 949}
944 950
951#ifdef CONFIG_KVM_XICS
952int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
953 bool line_status)
954{
955 if (xive_enabled())
956 return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
957 line_status);
958 else
959 return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
960 line_status);
961}
962
963int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
964 struct kvm *kvm, int irq_source_id,
965 int level, bool line_status)
966{
967 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
968 level, line_status);
969}
970static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
971 struct kvm *kvm, int irq_source_id, int level,
972 bool line_status)
973{
974 return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
975}
976
977int kvm_irq_map_gsi(struct kvm *kvm,
978 struct kvm_kernel_irq_routing_entry *entries, int gsi)
979{
980 entries->gsi = gsi;
981 entries->type = KVM_IRQ_ROUTING_IRQCHIP;
982 entries->set = kvmppc_book3s_set_irq;
983 entries->irqchip.irqchip = 0;
984 entries->irqchip.pin = gsi;
985 return 1;
986}
987
988int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
989{
990 return pin;
991}
992
993#endif /* CONFIG_KVM_XICS */
994
945static int kvmppc_book3s_init(void) 995static int kvmppc_book3s_init(void)
946{ 996{
947 int r; 997 int r;
@@ -952,12 +1002,25 @@ static int kvmppc_book3s_init(void)
952#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1002#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
953 r = kvmppc_book3s_init_pr(); 1003 r = kvmppc_book3s_init_pr();
954#endif 1004#endif
955 return r;
956 1005
1006#ifdef CONFIG_KVM_XICS
1007#ifdef CONFIG_KVM_XIVE
1008 if (xive_enabled()) {
1009 kvmppc_xive_init_module();
1010 kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
1011 } else
1012#endif
1013 kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
1014#endif
1015 return r;
957} 1016}
958 1017
959static void kvmppc_book3s_exit(void) 1018static void kvmppc_book3s_exit(void)
960{ 1019{
1020#ifdef CONFIG_KVM_XICS
1021 if (xive_enabled())
1022 kvmppc_xive_exit_module();
1023#endif
961#ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1024#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
962 kvmppc_book3s_exit_pr(); 1025 kvmppc_book3s_exit_pr();
963#endif 1026#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 549dd6070dee..42b7a4fd57d9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,7 @@
67#include <asm/mmu.h> 67#include <asm/mmu.h>
68#include <asm/opal.h> 68#include <asm/opal.h>
69#include <asm/xics.h> 69#include <asm/xics.h>
70#include <asm/xive.h>
70 71
71#include "book3s.h" 72#include "book3s.h"
72 73
@@ -837,6 +838,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
837 case H_IPOLL: 838 case H_IPOLL:
838 case H_XIRR_X: 839 case H_XIRR_X:
839 if (kvmppc_xics_enabled(vcpu)) { 840 if (kvmppc_xics_enabled(vcpu)) {
841 if (xive_enabled()) {
842 ret = H_NOT_AVAILABLE;
843 return RESUME_GUEST;
844 }
840 ret = kvmppc_xics_hcall(vcpu, req); 845 ret = kvmppc_xics_hcall(vcpu, req);
841 break; 846 break;
842 } 847 }
@@ -2947,8 +2952,12 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
2947 r = kvmppc_book3s_hv_page_fault(run, vcpu, 2952 r = kvmppc_book3s_hv_page_fault(run, vcpu,
2948 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 2953 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
2949 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 2954 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2950 } else if (r == RESUME_PASSTHROUGH) 2955 } else if (r == RESUME_PASSTHROUGH) {
2951 r = kvmppc_xics_rm_complete(vcpu, 0); 2956 if (WARN_ON(xive_enabled()))
2957 r = H_SUCCESS;
2958 else
2959 r = kvmppc_xics_rm_complete(vcpu, 0);
2960 }
2952 } while (is_kvmppc_resume_guest(r)); 2961 } while (is_kvmppc_resume_guest(r));
2953 2962
2954 out: 2963 out:
@@ -3400,10 +3409,20 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
3400 /* 3409 /*
3401 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) 3410 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
3402 * Set HVICE bit to enable hypervisor virtualization interrupts. 3411 * Set HVICE bit to enable hypervisor virtualization interrupts.
3412 * Set HEIC to prevent OS interrupts to go to hypervisor (should
3413 * be unnecessary but better safe than sorry in case we re-enable
3414 * EE in HV mode with this LPCR still set)
3403 */ 3415 */
3404 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 3416 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
3405 lpcr &= ~LPCR_VPM0; 3417 lpcr &= ~LPCR_VPM0;
3406 lpcr |= LPCR_HVICE; 3418 lpcr |= LPCR_HVICE | LPCR_HEIC;
3419
3420 /*
3421 * If xive is enabled, we route 0x500 interrupts directly
3422 * to the guest.
3423 */
3424 if (xive_enabled())
3425 lpcr |= LPCR_LPES;
3407 } 3426 }
3408 3427
3409 /* 3428 /*
@@ -3533,7 +3552,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3533 struct kvmppc_irq_map *irq_map; 3552 struct kvmppc_irq_map *irq_map;
3534 struct kvmppc_passthru_irqmap *pimap; 3553 struct kvmppc_passthru_irqmap *pimap;
3535 struct irq_chip *chip; 3554 struct irq_chip *chip;
3536 int i; 3555 int i, rc = 0;
3537 3556
3538 if (!kvm_irq_bypass) 3557 if (!kvm_irq_bypass)
3539 return 1; 3558 return 1;
@@ -3558,10 +3577,10 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3558 /* 3577 /*
3559 * For now, we only support interrupts for which the EOI operation 3578 * For now, we only support interrupts for which the EOI operation
3560 * is an OPAL call followed by a write to XIRR, since that's 3579 * is an OPAL call followed by a write to XIRR, since that's
3561 * what our real-mode EOI code does. 3580 * what our real-mode EOI code does, or a XIVE interrupt
3562 */ 3581 */
3563 chip = irq_data_get_irq_chip(&desc->irq_data); 3582 chip = irq_data_get_irq_chip(&desc->irq_data);
3564 if (!chip || !is_pnv_opal_msi(chip)) { 3583 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
3565 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", 3584 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
3566 host_irq, guest_gsi); 3585 host_irq, guest_gsi);
3567 mutex_unlock(&kvm->lock); 3586 mutex_unlock(&kvm->lock);
@@ -3603,7 +3622,12 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3603 if (i == pimap->n_mapped) 3622 if (i == pimap->n_mapped)
3604 pimap->n_mapped++; 3623 pimap->n_mapped++;
3605 3624
3606 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); 3625 if (xive_enabled())
3626 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
3627 else
3628 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
3629 if (rc)
3630 irq_map->r_hwirq = 0;
3607 3631
3608 mutex_unlock(&kvm->lock); 3632 mutex_unlock(&kvm->lock);
3609 3633
@@ -3614,7 +3638,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3614{ 3638{
3615 struct irq_desc *desc; 3639 struct irq_desc *desc;
3616 struct kvmppc_passthru_irqmap *pimap; 3640 struct kvmppc_passthru_irqmap *pimap;
3617 int i; 3641 int i, rc = 0;
3618 3642
3619 if (!kvm_irq_bypass) 3643 if (!kvm_irq_bypass)
3620 return 0; 3644 return 0;
@@ -3639,9 +3663,12 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3639 return -ENODEV; 3663 return -ENODEV;
3640 } 3664 }
3641 3665
3642 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); 3666 if (xive_enabled())
3667 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
3668 else
3669 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
3643 3670
3644 /* invalidate the entry */ 3671 /* invalidate the entry (what do do on error from the above ?) */
3645 pimap->mapped[i].r_hwirq = 0; 3672 pimap->mapped[i].r_hwirq = 0;
3646 3673
3647 /* 3674 /*
@@ -3650,7 +3677,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3650 */ 3677 */
3651 unlock: 3678 unlock:
3652 mutex_unlock(&kvm->lock); 3679 mutex_unlock(&kvm->lock);
3653 return 0; 3680 return rc;
3654} 3681}
3655 3682
3656static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons, 3683static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
@@ -3928,7 +3955,7 @@ static int kvmppc_book3s_init_hv(void)
3928 * indirectly, via OPAL. 3955 * indirectly, via OPAL.
3929 */ 3956 */
3930#ifdef CONFIG_SMP 3957#ifdef CONFIG_SMP
3931 if (!get_paca()->kvm_hstate.xics_phys) { 3958 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
3932 struct device_node *np; 3959 struct device_node *np;
3933 3960
3934 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); 3961 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 9c71c72e65ce..88a65923c649 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -32,6 +32,24 @@
32 32
33#define KVM_CMA_CHUNK_ORDER 18 33#define KVM_CMA_CHUNK_ORDER 18
34 34
35#include "book3s_xics.h"
36#include "book3s_xive.h"
37
38/*
39 * The XIVE module will populate these when it loads
40 */
41unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
42unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
43int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
44 unsigned long mfrr);
45int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
46int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
47EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
48EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
49EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
50EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
51EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
52
35/* 53/*
36 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) 54 * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
37 * should be power of 2. 55 * should be power of 2.
@@ -211,6 +229,7 @@ void kvmhv_rm_send_ipi(int cpu)
211 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); 229 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
212 return; 230 return;
213 } 231 }
232
214 /* On POWER8 for IPIs to threads in the same core, use msgsnd. */ 233 /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
215 if (cpu_has_feature(CPU_FTR_ARCH_207S) && 234 if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
216 cpu_first_thread_sibling(cpu) == 235 cpu_first_thread_sibling(cpu) ==
@@ -407,6 +426,9 @@ static long kvmppc_read_one_intr(bool *again)
407 u8 host_ipi; 426 u8 host_ipi;
408 int64_t rc; 427 int64_t rc;
409 428
429 if (xive_enabled())
430 return 1;
431
410 /* see if a host IPI is pending */ 432 /* see if a host IPI is pending */
411 host_ipi = local_paca->kvm_hstate.host_ipi; 433 host_ipi = local_paca->kvm_hstate.host_ipi;
412 if (host_ipi) 434 if (host_ipi)
@@ -491,3 +513,84 @@ static long kvmppc_read_one_intr(bool *again)
491 513
492 return kvmppc_check_passthru(xisr, xirr, again); 514 return kvmppc_check_passthru(xisr, xirr, again);
493} 515}
516
517#ifdef CONFIG_KVM_XICS
518static inline bool is_rm(void)
519{
520 return !(mfmsr() & MSR_DR);
521}
522
523unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
524{
525 if (xive_enabled()) {
526 if (is_rm())
527 return xive_rm_h_xirr(vcpu);
528 if (unlikely(!__xive_vm_h_xirr))
529 return H_NOT_AVAILABLE;
530 return __xive_vm_h_xirr(vcpu);
531 } else
532 return xics_rm_h_xirr(vcpu);
533}
534
535unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
536{
537 vcpu->arch.gpr[5] = get_tb();
538 if (xive_enabled()) {
539 if (is_rm())
540 return xive_rm_h_xirr(vcpu);
541 if (unlikely(!__xive_vm_h_xirr))
542 return H_NOT_AVAILABLE;
543 return __xive_vm_h_xirr(vcpu);
544 } else
545 return xics_rm_h_xirr(vcpu);
546}
547
548unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
549{
550 if (xive_enabled()) {
551 if (is_rm())
552 return xive_rm_h_ipoll(vcpu, server);
553 if (unlikely(!__xive_vm_h_ipoll))
554 return H_NOT_AVAILABLE;
555 return __xive_vm_h_ipoll(vcpu, server);
556 } else
557 return H_TOO_HARD;
558}
559
560int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
561 unsigned long mfrr)
562{
563 if (xive_enabled()) {
564 if (is_rm())
565 return xive_rm_h_ipi(vcpu, server, mfrr);
566 if (unlikely(!__xive_vm_h_ipi))
567 return H_NOT_AVAILABLE;
568 return __xive_vm_h_ipi(vcpu, server, mfrr);
569 } else
570 return xics_rm_h_ipi(vcpu, server, mfrr);
571}
572
573int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
574{
575 if (xive_enabled()) {
576 if (is_rm())
577 return xive_rm_h_cppr(vcpu, cppr);
578 if (unlikely(!__xive_vm_h_cppr))
579 return H_NOT_AVAILABLE;
580 return __xive_vm_h_cppr(vcpu, cppr);
581 } else
582 return xics_rm_h_cppr(vcpu, cppr);
583}
584
585int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
586{
587 if (xive_enabled()) {
588 if (is_rm())
589 return xive_rm_h_eoi(vcpu, xirr);
590 if (unlikely(!__xive_vm_h_eoi))
591 return H_NOT_AVAILABLE;
592 return __xive_vm_h_eoi(vcpu, xirr);
593 } else
594 return xics_rm_h_eoi(vcpu, xirr);
595}
596#endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index ffde4507ddfd..2a862618f072 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -484,7 +484,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
484} 484}
485 485
486 486
487unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) 487unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
488{ 488{
489 union kvmppc_icp_state old_state, new_state; 489 union kvmppc_icp_state old_state, new_state;
490 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 490 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -522,8 +522,8 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
522 return check_too_hard(xics, icp); 522 return check_too_hard(xics, icp);
523} 523}
524 524
525int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 525int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
526 unsigned long mfrr) 526 unsigned long mfrr)
527{ 527{
528 union kvmppc_icp_state old_state, new_state; 528 union kvmppc_icp_state old_state, new_state;
529 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 529 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -609,7 +609,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
609 return check_too_hard(xics, this_icp); 609 return check_too_hard(xics, this_icp);
610} 610}
611 611
612int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) 612int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
613{ 613{
614 union kvmppc_icp_state old_state, new_state; 614 union kvmppc_icp_state old_state, new_state;
615 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 615 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -729,7 +729,7 @@ static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
729 return check_too_hard(xics, icp); 729 return check_too_hard(xics, icp);
730} 730}
731 731
732int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) 732int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
733{ 733{
734 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 734 struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
735 struct kvmppc_icp *icp = vcpu->arch.icp; 735 struct kvmppc_icp *icp = vcpu->arch.icp;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c
new file mode 100644
index 000000000000..abf5f01b6eb1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c
@@ -0,0 +1,47 @@
1#include <linux/kernel.h>
2#include <linux/kvm_host.h>
3#include <linux/err.h>
4#include <linux/kernel_stat.h>
5
6#include <asm/kvm_book3s.h>
7#include <asm/kvm_ppc.h>
8#include <asm/hvcall.h>
9#include <asm/xics.h>
10#include <asm/debug.h>
11#include <asm/synch.h>
12#include <asm/cputhreads.h>
13#include <asm/pgtable.h>
14#include <asm/ppc-opcode.h>
15#include <asm/pnv-pci.h>
16#include <asm/opal.h>
17#include <asm/smp.h>
18#include <asm/asm-prototypes.h>
19#include <asm/xive.h>
20#include <asm/xive-regs.h>
21
22#include "book3s_xive.h"
23
24/* XXX */
25#include <asm/udbg.h>
26//#define DBG(fmt...) udbg_printf(fmt)
27#define DBG(fmt...) do { } while(0)
28
29static inline void __iomem *get_tima_phys(void)
30{
31 return local_paca->kvm_hstate.xive_tima_phys;
32}
33
34#undef XIVE_RUNTIME_CHECKS
35#define X_PFX xive_rm_
36#define X_STATIC
37#define X_STAT_PFX stat_rm_
38#define __x_tima get_tima_phys()
39#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
40#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
41#define __x_readb __raw_rm_readb
42#define __x_writeb __raw_rm_writeb
43#define __x_readw __raw_rm_readw
44#define __x_readq __raw_rm_readq
45#define __x_writeq __raw_rm_writeq
46
47#include "book3s_xive_template.c"
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7c6477d1840a..bdb3f76ceb6b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -30,6 +30,7 @@
30#include <asm/book3s/64/mmu-hash.h> 30#include <asm/book3s/64/mmu-hash.h>
31#include <asm/tm.h> 31#include <asm/tm.h>
32#include <asm/opal.h> 32#include <asm/opal.h>
33#include <asm/xive-regs.h>
33 34
34#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) 35#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
35 36
@@ -970,6 +971,23 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
970 cmpwi r3, 512 /* 1 microsecond */ 971 cmpwi r3, 512 /* 1 microsecond */
971 blt hdec_soon 972 blt hdec_soon
972 973
974#ifdef CONFIG_KVM_XICS
975 /* We are entering the guest on that thread, push VCPU to XIVE */
976 ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
977 cmpldi cr0, r10, r0
978 beq no_xive
979 ld r11, VCPU_XIVE_SAVED_STATE(r4)
980 li r9, TM_QW1_OS
981 stdcix r11,r9,r10
982 eieio
983 lwz r11, VCPU_XIVE_CAM_WORD(r4)
984 li r9, TM_QW1_OS + TM_WORD2
985 stwcix r11,r9,r10
986 li r9, 1
987 stw r9, VCPU_XIVE_PUSHED(r4)
988no_xive:
989#endif /* CONFIG_KVM_XICS */
990
973deliver_guest_interrupt: 991deliver_guest_interrupt:
974 ld r6, VCPU_CTR(r4) 992 ld r6, VCPU_CTR(r4)
975 ld r7, VCPU_XER(r4) 993 ld r7, VCPU_XER(r4)
@@ -1307,6 +1325,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1307 blt deliver_guest_interrupt 1325 blt deliver_guest_interrupt
1308 1326
1309guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 1327guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1328#ifdef CONFIG_KVM_XICS
1329 /* We are exiting, pull the VP from the XIVE */
1330 lwz r0, VCPU_XIVE_PUSHED(r9)
1331 cmpwi cr0, r0, 0
1332 beq 1f
1333 li r7, TM_SPC_PULL_OS_CTX
1334 li r6, TM_QW1_OS
1335 mfmsr r0
1336 andi. r0, r0, MSR_IR /* in real mode? */
1337 beq 2f
1338 ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
1339 cmpldi cr0, r10, 0
1340 beq 1f
1341 /* First load to pull the context, we ignore the value */
1342 lwzx r11, r7, r10
1343 eieio
1344 /* Second load to recover the context state (Words 0 and 1) */
1345 ldx r11, r6, r10
1346 b 3f
13472: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
1348 cmpldi cr0, r10, 0
1349 beq 1f
1350 /* First load to pull the context, we ignore the value */
1351 lwzcix r11, r7, r10
1352 eieio
1353 /* Second load to recover the context state (Words 0 and 1) */
1354 ldcix r11, r6, r10
13553: std r11, VCPU_XIVE_SAVED_STATE(r9)
1356 /* Fixup some of the state for the next load */
1357 li r10, 0
1358 li r0, 0xff
1359 stw r10, VCPU_XIVE_PUSHED(r9)
1360 stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1361 stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
13621:
1363#endif /* CONFIG_KVM_XICS */
1310 /* Save more register state */ 1364 /* Save more register state */
1311 mfdar r6 1365 mfdar r6
1312 mfdsisr r7 1366 mfdsisr r7
@@ -2011,7 +2065,7 @@ hcall_real_table:
2011 .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table 2065 .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
2012 .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table 2066 .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
2013 .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table 2067 .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
2014 .long 0 /* 0x70 - H_IPOLL */ 2068 .long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
2015 .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table 2069 .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
2016#else 2070#else
2017 .long 0 /* 0x64 - H_EOI */ 2071 .long 0 /* 0x64 - H_EOI */
@@ -2181,7 +2235,11 @@ hcall_real_table:
2181 .long 0 /* 0x2f0 */ 2235 .long 0 /* 0x2f0 */
2182 .long 0 /* 0x2f4 */ 2236 .long 0 /* 0x2f4 */
2183 .long 0 /* 0x2f8 */ 2237 .long 0 /* 0x2f8 */
2184 .long 0 /* 0x2fc */ 2238#ifdef CONFIG_KVM_XICS
2239 .long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
2240#else
2241 .long 0 /* 0x2fc - H_XIRR_X*/
2242#endif
2185 .long DOTSYM(kvmppc_h_random) - hcall_real_table 2243 .long DOTSYM(kvmppc_h_random) - hcall_real_table
2186 .globl hcall_real_table_end 2244 .globl hcall_real_table_end
2187hcall_real_table_end: 2245hcall_real_table_end:
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 20528701835b..2d3b2b1cc272 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,7 @@
16#include <asm/kvm_ppc.h> 16#include <asm/kvm_ppc.h>
17#include <asm/hvcall.h> 17#include <asm/hvcall.h>
18#include <asm/rtas.h> 18#include <asm/rtas.h>
19#include <asm/xive.h>
19 20
20#ifdef CONFIG_KVM_XICS 21#ifdef CONFIG_KVM_XICS
21static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) 22static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -32,7 +33,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
32 server = be32_to_cpu(args->args[1]); 33 server = be32_to_cpu(args->args[1]);
33 priority = be32_to_cpu(args->args[2]); 34 priority = be32_to_cpu(args->args[2]);
34 35
35 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); 36 if (xive_enabled())
37 rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
38 else
39 rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
36 if (rc) 40 if (rc)
37 rc = -3; 41 rc = -3;
38out: 42out:
@@ -52,7 +56,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
52 irq = be32_to_cpu(args->args[0]); 56 irq = be32_to_cpu(args->args[0]);
53 57
54 server = priority = 0; 58 server = priority = 0;
55 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); 59 if (xive_enabled())
60 rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
61 else
62 rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
56 if (rc) { 63 if (rc) {
57 rc = -3; 64 rc = -3;
58 goto out; 65 goto out;
@@ -76,7 +83,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
76 83
77 irq = be32_to_cpu(args->args[0]); 84 irq = be32_to_cpu(args->args[0]);
78 85
79 rc = kvmppc_xics_int_off(vcpu->kvm, irq); 86 if (xive_enabled())
87 rc = kvmppc_xive_int_off(vcpu->kvm, irq);
88 else
89 rc = kvmppc_xics_int_off(vcpu->kvm, irq);
80 if (rc) 90 if (rc)
81 rc = -3; 91 rc = -3;
82out: 92out:
@@ -95,7 +105,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
95 105
96 irq = be32_to_cpu(args->args[0]); 106 irq = be32_to_cpu(args->args[0]);
97 107
98 rc = kvmppc_xics_int_on(vcpu->kvm, irq); 108 if (xive_enabled())
109 rc = kvmppc_xive_int_on(vcpu->kvm, irq);
110 else
111 rc = kvmppc_xics_int_on(vcpu->kvm, irq);
99 if (rc) 112 if (rc)
100 rc = -3; 113 rc = -3;
101out: 114out:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 459b72cb617a..d329b2add7e2 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1306,8 +1306,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
1306 return 0; 1306 return 0;
1307} 1307}
1308 1308
1309int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 1309int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1310 bool line_status) 1310 bool line_status)
1311{ 1311{
1312 struct kvmppc_xics *xics = kvm->arch.xics; 1312 struct kvmppc_xics *xics = kvm->arch.xics;
1313 1313
@@ -1316,14 +1316,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1316 return ics_deliver_irq(xics, irq, level); 1316 return ics_deliver_irq(xics, irq, level);
1317} 1317}
1318 1318
1319int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
1320 struct kvm *kvm, int irq_source_id,
1321 int level, bool line_status)
1322{
1323 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
1324 level, line_status);
1325}
1326
1327static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1319static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1328{ 1320{
1329 struct kvmppc_xics *xics = dev->private; 1321 struct kvmppc_xics *xics = dev->private;
@@ -1457,29 +1449,6 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
1457 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 1449 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1458} 1450}
1459 1451
1460static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
1461 struct kvm *kvm, int irq_source_id, int level,
1462 bool line_status)
1463{
1464 return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
1465}
1466
1467int kvm_irq_map_gsi(struct kvm *kvm,
1468 struct kvm_kernel_irq_routing_entry *entries, int gsi)
1469{
1470 entries->gsi = gsi;
1471 entries->type = KVM_IRQ_ROUTING_IRQCHIP;
1472 entries->set = xics_set_irq;
1473 entries->irqchip.irqchip = 0;
1474 entries->irqchip.pin = gsi;
1475 return 1;
1476}
1477
1478int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
1479{
1480 return pin;
1481}
1482
1483void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, 1452void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
1484 unsigned long host_irq) 1453 unsigned long host_irq)
1485{ 1454{
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index ec5474cf70c6..453c9e518c19 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -10,6 +10,7 @@
10#ifndef _KVM_PPC_BOOK3S_XICS_H 10#ifndef _KVM_PPC_BOOK3S_XICS_H
11#define _KVM_PPC_BOOK3S_XICS_H 11#define _KVM_PPC_BOOK3S_XICS_H
12 12
13#ifdef CONFIG_KVM_XICS
13/* 14/*
14 * We use a two-level tree to store interrupt source information. 15 * We use a two-level tree to store interrupt source information.
15 * There are up to 1024 ICS nodes, each of which can represent 16 * There are up to 1024 ICS nodes, each of which can represent
@@ -144,5 +145,11 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
144 return ics; 145 return ics;
145} 146}
146 147
148extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
149extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
150 unsigned long mfrr);
151extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
152extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
147 153
154#endif /* CONFIG_KVM_XICS */
148#endif /* _KVM_PPC_BOOK3S_XICS_H */ 155#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 000000000000..ffe1da95033a
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,1894 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#define pr_fmt(fmt) "xive-kvm: " fmt
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
13#include <linux/err.h>
14#include <linux/gfp.h>
15#include <linux/spinlock.h>
16#include <linux/delay.h>
17#include <linux/percpu.h>
18#include <linux/cpumask.h>
19#include <asm/uaccess.h>
20#include <asm/kvm_book3s.h>
21#include <asm/kvm_ppc.h>
22#include <asm/hvcall.h>
23#include <asm/xics.h>
24#include <asm/xive.h>
25#include <asm/xive-regs.h>
26#include <asm/debug.h>
27#include <asm/debugfs.h>
28#include <asm/time.h>
29#include <asm/opal.h>
30
31#include <linux/debugfs.h>
32#include <linux/seq_file.h>
33
34#include "book3s_xive.h"
35
36
37/*
38 * Virtual mode variants of the hcalls for use on radix/radix
39 * with AIL. They require the VCPU's VP to be "pushed"
40 *
41 * We still instanciate them here because we use some of the
42 * generated utility functions as well in this file.
43 */
44#define XIVE_RUNTIME_CHECKS
45#define X_PFX xive_vm_
46#define X_STATIC static
47#define X_STAT_PFX stat_vm_
48#define __x_tima xive_tima
49#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
50#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
51#define __x_readb __raw_readb
52#define __x_writeb __raw_writeb
53#define __x_readw __raw_readw
54#define __x_readq __raw_readq
55#define __x_writeq __raw_writeq
56
57#include "book3s_xive_template.c"
58
59/*
60 * We leave a gap of a couple of interrupts in the queue to
61 * account for the IPI and additional safety guard.
62 */
63#define XIVE_Q_GAP 2
64
65/*
66 * This is a simple trigger for a generic XIVE IRQ. This must
67 * only be called for interrupts that support a trigger page
68 */
69static bool xive_irq_trigger(struct xive_irq_data *xd)
70{
71 /* This should be only for MSIs */
72 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
73 return false;
74
75 /* Those interrupts should always have a trigger page */
76 if (WARN_ON(!xd->trig_mmio))
77 return false;
78
79 out_be64(xd->trig_mmio, 0);
80
81 return true;
82}
83
84static irqreturn_t xive_esc_irq(int irq, void *data)
85{
86 struct kvm_vcpu *vcpu = data;
87
88 /* We use the existing H_PROD mechanism to wake up the target */
89 vcpu->arch.prodded = 1;
90 smp_mb();
91 if (vcpu->arch.ceded)
92 kvmppc_fast_vcpu_kick(vcpu);
93
94 return IRQ_HANDLED;
95}
96
97static int xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio)
98{
99 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
100 struct xive_q *q = &xc->queues[prio];
101 char *name = NULL;
102 int rc;
103
104 /* Already there ? */
105 if (xc->esc_virq[prio])
106 return 0;
107
108 /* Hook up the escalation interrupt */
109 xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
110 if (!xc->esc_virq[prio]) {
111 pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
112 prio, xc->server_num);
113 return -EIO;
114 }
115
116 /*
117 * Future improvement: start with them disabled
118 * and handle DD2 and later scheme of merged escalation
119 * interrupts
120 */
121 name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
122 vcpu->kvm->arch.lpid, xc->server_num, prio);
123 if (!name) {
124 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
125 prio, xc->server_num);
126 rc = -ENOMEM;
127 goto error;
128 }
129 rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
130 IRQF_NO_THREAD, name, vcpu);
131 if (rc) {
132 pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
133 prio, xc->server_num);
134 goto error;
135 }
136 xc->esc_virq_names[prio] = name;
137 return 0;
138error:
139 irq_dispose_mapping(xc->esc_virq[prio]);
140 xc->esc_virq[prio] = 0;
141 kfree(name);
142 return rc;
143}
144
145static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
146{
147 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
148 struct kvmppc_xive *xive = xc->xive;
149 struct xive_q *q = &xc->queues[prio];
150 void *qpage;
151 int rc;
152
153 if (WARN_ON(q->qpage))
154 return 0;
155
156 /* Allocate the queue and retrieve infos on current node for now */
157 qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
158 if (!qpage) {
159 pr_err("Failed to allocate queue %d for VCPU %d\n",
160 prio, xc->server_num);
161 return -ENOMEM;;
162 }
163 memset(qpage, 0, 1 << xive->q_order);
164
165 /*
166 * Reconfigure the queue. This will set q->qpage only once the
167 * queue is fully configured. This is a requirement for prio 0
168 * as we will stop doing EOIs for every IPI as soon as we observe
169 * qpage being non-NULL, and instead will only EOI when we receive
170 * corresponding queue 0 entries
171 */
172 rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
173 xive->q_order, true);
174 if (rc)
175 pr_err("Failed to configure queue %d for VCPU %d\n",
176 prio, xc->server_num);
177 return rc;
178}
179
180/* Called with kvm_lock held */
181static int xive_check_provisioning(struct kvm *kvm, u8 prio)
182{
183 struct kvmppc_xive *xive = kvm->arch.xive;
184 struct kvm_vcpu *vcpu;
185 int i, rc;
186
187 lockdep_assert_held(&kvm->lock);
188
189 /* Already provisioned ? */
190 if (xive->qmap & (1 << prio))
191 return 0;
192
193 pr_devel("Provisioning prio... %d\n", prio);
194
195 /* Provision each VCPU and enable escalations */
196 kvm_for_each_vcpu(i, vcpu, kvm) {
197 if (!vcpu->arch.xive_vcpu)
198 continue;
199 rc = xive_provision_queue(vcpu, prio);
200 if (rc == 0)
201 xive_attach_escalation(vcpu, prio);
202 if (rc)
203 return rc;
204 }
205
206 /* Order previous stores and mark it as provisioned */
207 mb();
208 xive->qmap |= (1 << prio);
209 return 0;
210}
211
212static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
213{
214 struct kvm_vcpu *vcpu;
215 struct kvmppc_xive_vcpu *xc;
216 struct xive_q *q;
217
218 /* Locate target server */
219 vcpu = kvmppc_xive_find_server(kvm, server);
220 if (!vcpu) {
221 pr_warn("%s: Can't find server %d\n", __func__, server);
222 return;
223 }
224 xc = vcpu->arch.xive_vcpu;
225 if (WARN_ON(!xc))
226 return;
227
228 q = &xc->queues[prio];
229 atomic_inc(&q->pending_count);
230}
231
232static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
233{
234 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
235 struct xive_q *q;
236 u32 max;
237
238 if (WARN_ON(!xc))
239 return -ENXIO;
240 if (!xc->valid)
241 return -ENXIO;
242
243 q = &xc->queues[prio];
244 if (WARN_ON(!q->qpage))
245 return -ENXIO;
246
247 /* Calculate max number of interrupts in that queue. */
248 max = (q->msk + 1) - XIVE_Q_GAP;
249 return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
250}
251
252static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
253{
254 struct kvm_vcpu *vcpu;
255 int i, rc;
256
257 /* Locate target server */
258 vcpu = kvmppc_xive_find_server(kvm, *server);
259 if (!vcpu) {
260 pr_devel("Can't find server %d\n", *server);
261 return -EINVAL;
262 }
263
264 pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
265
266 /* Try pick it */
267 rc = xive_try_pick_queue(vcpu, prio);
268 if (rc == 0)
269 return rc;
270
271 pr_devel(" .. failed, looking up candidate...\n");
272
273 /* Failed, pick another VCPU */
274 kvm_for_each_vcpu(i, vcpu, kvm) {
275 if (!vcpu->arch.xive_vcpu)
276 continue;
277 rc = xive_try_pick_queue(vcpu, prio);
278 if (rc == 0) {
279 *server = vcpu->arch.xive_vcpu->server_num;
280 pr_devel(" found on 0x%x/%d\n", *server, prio);
281 return rc;
282 }
283 }
284 pr_devel(" no available target !\n");
285
286 /* No available target ! */
287 return -EBUSY;
288}
289
290static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
291 struct kvmppc_xive_src_block *sb,
292 struct kvmppc_xive_irq_state *state)
293{
294 struct xive_irq_data *xd;
295 u32 hw_num;
296 u8 old_prio;
297 u64 val;
298
299 /*
300 * Take the lock, set masked, try again if racing
301 * with H_EOI
302 */
303 for (;;) {
304 arch_spin_lock(&sb->lock);
305 old_prio = state->guest_priority;
306 state->guest_priority = MASKED;
307 mb();
308 if (!state->in_eoi)
309 break;
310 state->guest_priority = old_prio;
311 arch_spin_unlock(&sb->lock);
312 }
313
314 /* No change ? Bail */
315 if (old_prio == MASKED)
316 return old_prio;
317
318 /* Get the right irq */
319 kvmppc_xive_select_irq(state, &hw_num, &xd);
320
321 /*
322 * If the interrupt is marked as needing masking via
323 * firmware, we do it here. Firmware masking however
324 * is "lossy", it won't return the old p and q bits
325 * and won't set the interrupt to a state where it will
326 * record queued ones. If this is an issue we should do
327 * lazy masking instead.
328 *
329 * For now, we work around this in unmask by forcing
330 * an interrupt whenever we unmask a non-LSI via FW
331 * (if ever).
332 */
333 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
334 xive_native_configure_irq(hw_num,
335 xive->vp_base + state->act_server,
336 MASKED, state->number);
337 /* set old_p so we can track if an H_EOI was done */
338 state->old_p = true;
339 state->old_q = false;
340 } else {
341 /* Set PQ to 10, return old P and old Q and remember them */
342 val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
343 state->old_p = !!(val & 2);
344 state->old_q = !!(val & 1);
345
346 /*
347 * Synchronize hardware to sensure the queues are updated
348 * when masking
349 */
350 xive_native_sync_source(hw_num);
351 }
352
353 return old_prio;
354}
355
356static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
357 struct kvmppc_xive_irq_state *state)
358{
359 /*
360 * Take the lock try again if racing with H_EOI
361 */
362 for (;;) {
363 arch_spin_lock(&sb->lock);
364 if (!state->in_eoi)
365 break;
366 arch_spin_unlock(&sb->lock);
367 }
368}
369
370static void xive_finish_unmask(struct kvmppc_xive *xive,
371 struct kvmppc_xive_src_block *sb,
372 struct kvmppc_xive_irq_state *state,
373 u8 prio)
374{
375 struct xive_irq_data *xd;
376 u32 hw_num;
377
378 /* If we aren't changing a thing, move on */
379 if (state->guest_priority != MASKED)
380 goto bail;
381
382 /* Get the right irq */
383 kvmppc_xive_select_irq(state, &hw_num, &xd);
384
385 /*
386 * See command in xive_lock_and_mask() concerning masking
387 * via firmware.
388 */
389 if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
390 xive_native_configure_irq(hw_num,
391 xive->vp_base + state->act_server,
392 state->act_priority, state->number);
393 /* If an EOI is needed, do it here */
394 if (!state->old_p)
395 xive_vm_source_eoi(hw_num, xd);
396 /* If this is not an LSI, force a trigger */
397 if (!(xd->flags & OPAL_XIVE_IRQ_LSI))
398 xive_irq_trigger(xd);
399 goto bail;
400 }
401
402 /* Old Q set, set PQ to 11 */
403 if (state->old_q)
404 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
405
406 /*
407 * If not old P, then perform an "effective" EOI,
408 * on the source. This will handle the cases where
409 * FW EOI is needed.
410 */
411 if (!state->old_p)
412 xive_vm_source_eoi(hw_num, xd);
413
414 /* Synchronize ordering and mark unmasked */
415 mb();
416bail:
417 state->guest_priority = prio;
418}
419
420/*
421 * Target an interrupt to a given server/prio, this will fallback
422 * to another server if necessary and perform the HW targetting
423 * updates as needed
424 *
425 * NOTE: Must be called with the state lock held
426 */
427static int xive_target_interrupt(struct kvm *kvm,
428 struct kvmppc_xive_irq_state *state,
429 u32 server, u8 prio)
430{
431 struct kvmppc_xive *xive = kvm->arch.xive;
432 u32 hw_num;
433 int rc;
434
435 /*
436 * This will return a tentative server and actual
437 * priority. The count for that new target will have
438 * already been incremented.
439 */
440 rc = xive_select_target(kvm, &server, prio);
441
442 /*
443 * We failed to find a target ? Not much we can do
444 * at least until we support the GIQ.
445 */
446 if (rc)
447 return rc;
448
449 /*
450 * Increment the old queue pending count if there
451 * was one so that the old queue count gets adjusted later
452 * when observed to be empty.
453 */
454 if (state->act_priority != MASKED)
455 xive_inc_q_pending(kvm,
456 state->act_server,
457 state->act_priority);
458 /*
459 * Update state and HW
460 */
461 state->act_priority = prio;
462 state->act_server = server;
463
464 /* Get the right irq */
465 kvmppc_xive_select_irq(state, &hw_num, NULL);
466
467 return xive_native_configure_irq(hw_num,
468 xive->vp_base + server,
469 prio, state->number);
470}
471
472/*
473 * Targetting rules: In order to avoid losing track of
474 * pending interrupts accross mask and unmask, which would
475 * allow queue overflows, we implement the following rules:
476 *
477 * - Unless it was never enabled (or we run out of capacity)
478 * an interrupt is always targetted at a valid server/queue
479 * pair even when "masked" by the guest. This pair tends to
480 * be the last one used but it can be changed under some
481 * circumstances. That allows us to separate targetting
482 * from masking, we only handle accounting during (re)targetting,
483 * this also allows us to let an interrupt drain into its target
484 * queue after masking, avoiding complex schemes to remove
485 * interrupts out of remote processor queues.
486 *
487 * - When masking, we set PQ to 10 and save the previous value
488 * of P and Q.
489 *
490 * - When unmasking, if saved Q was set, we set PQ to 11
491 * otherwise we leave PQ to the HW state which will be either
492 * 10 if nothing happened or 11 if the interrupt fired while
493 * masked. Effectively we are OR'ing the previous Q into the
494 * HW Q.
495 *
496 * Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
497 * which will unmask the interrupt and shoot a new one if Q was
498 * set.
499 *
500 * Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
501 * effectively meaning an H_EOI from the guest is still expected
502 * for that interrupt).
503 *
504 * - If H_EOI occurs while masked, we clear the saved P.
505 *
506 * - When changing target, we account on the new target and
507 * increment a separate "pending" counter on the old one.
508 * This pending counter will be used to decrement the old
509 * target's count when its queue has been observed empty.
510 */
511
512int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
513 u32 priority)
514{
515 struct kvmppc_xive *xive = kvm->arch.xive;
516 struct kvmppc_xive_src_block *sb;
517 struct kvmppc_xive_irq_state *state;
518 u8 new_act_prio;
519 int rc = 0;
520 u16 idx;
521
522 if (!xive)
523 return -ENODEV;
524
525 pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
526 irq, server, priority);
527
528 /* First, check provisioning of queues */
529 if (priority != MASKED)
530 rc = xive_check_provisioning(xive->kvm,
531 xive_prio_from_guest(priority));
532 if (rc) {
533 pr_devel(" provisioning failure %d !\n", rc);
534 return rc;
535 }
536
537 sb = kvmppc_xive_find_source(xive, irq, &idx);
538 if (!sb)
539 return -EINVAL;
540 state = &sb->irq_state[idx];
541
542 /*
543 * We first handle masking/unmasking since the locking
544 * might need to be retried due to EOIs, we'll handle
545 * targetting changes later. These functions will return
546 * with the SB lock held.
547 *
548 * xive_lock_and_mask() will also set state->guest_priority
549 * but won't otherwise change other fields of the state.
550 *
551 * xive_lock_for_unmask will not actually unmask, this will
552 * be done later by xive_finish_unmask() once the targetting
553 * has been done, so we don't try to unmask an interrupt
554 * that hasn't yet been targetted.
555 */
556 if (priority == MASKED)
557 xive_lock_and_mask(xive, sb, state);
558 else
559 xive_lock_for_unmask(sb, state);
560
561
562 /*
563 * Then we handle targetting.
564 *
565 * First calculate a new "actual priority"
566 */
567 new_act_prio = state->act_priority;
568 if (priority != MASKED)
569 new_act_prio = xive_prio_from_guest(priority);
570
571 pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
572 new_act_prio, state->act_server, state->act_priority);
573
574 /*
575 * Then check if we actually need to change anything,
576 *
577 * The condition for re-targetting the interrupt is that
578 * we have a valid new priority (new_act_prio is not 0xff)
579 * and either the server or the priority changed.
580 *
581 * Note: If act_priority was ff and the new priority is
582 * also ff, we don't do anything and leave the interrupt
583 * untargetted. An attempt of doing an int_on on an
584 * untargetted interrupt will fail. If that is a problem
585 * we could initialize interrupts with valid default
586 */
587
588 if (new_act_prio != MASKED &&
589 (state->act_server != server ||
590 state->act_priority != new_act_prio))
591 rc = xive_target_interrupt(kvm, state, server, new_act_prio);
592
593 /*
594 * Perform the final unmasking of the interrupt source
595 * if necessary
596 */
597 if (priority != MASKED)
598 xive_finish_unmask(xive, sb, state, priority);
599
600 /*
601 * Finally Update saved_priority to match. Only int_on/off
602 * set this field to a different value.
603 */
604 state->saved_priority = priority;
605
606 arch_spin_unlock(&sb->lock);
607 return rc;
608}
609
610int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
611 u32 *priority)
612{
613 struct kvmppc_xive *xive = kvm->arch.xive;
614 struct kvmppc_xive_src_block *sb;
615 struct kvmppc_xive_irq_state *state;
616 u16 idx;
617
618 if (!xive)
619 return -ENODEV;
620
621 sb = kvmppc_xive_find_source(xive, irq, &idx);
622 if (!sb)
623 return -EINVAL;
624 state = &sb->irq_state[idx];
625 arch_spin_lock(&sb->lock);
626 *server = state->guest_server;
627 *priority = state->guest_priority;
628 arch_spin_unlock(&sb->lock);
629
630 return 0;
631}
632
633int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
634{
635 struct kvmppc_xive *xive = kvm->arch.xive;
636 struct kvmppc_xive_src_block *sb;
637 struct kvmppc_xive_irq_state *state;
638 u16 idx;
639
640 if (!xive)
641 return -ENODEV;
642
643 sb = kvmppc_xive_find_source(xive, irq, &idx);
644 if (!sb)
645 return -EINVAL;
646 state = &sb->irq_state[idx];
647
648 pr_devel("int_on(irq=0x%x)\n", irq);
649
650 /*
651 * Check if interrupt was not targetted
652 */
653 if (state->act_priority == MASKED) {
654 pr_devel("int_on on untargetted interrupt\n");
655 return -EINVAL;
656 }
657
658 /* If saved_priority is 0xff, do nothing */
659 if (state->saved_priority == MASKED)
660 return 0;
661
662 /*
663 * Lock and unmask it.
664 */
665 xive_lock_for_unmask(sb, state);
666 xive_finish_unmask(xive, sb, state, state->saved_priority);
667 arch_spin_unlock(&sb->lock);
668
669 return 0;
670}
671
672int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
673{
674 struct kvmppc_xive *xive = kvm->arch.xive;
675 struct kvmppc_xive_src_block *sb;
676 struct kvmppc_xive_irq_state *state;
677 u16 idx;
678
679 if (!xive)
680 return -ENODEV;
681
682 sb = kvmppc_xive_find_source(xive, irq, &idx);
683 if (!sb)
684 return -EINVAL;
685 state = &sb->irq_state[idx];
686
687 pr_devel("int_off(irq=0x%x)\n", irq);
688
689 /*
690 * Lock and mask
691 */
692 state->saved_priority = xive_lock_and_mask(xive, sb, state);
693 arch_spin_unlock(&sb->lock);
694
695 return 0;
696}
697
698static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
699{
700 struct kvmppc_xive_src_block *sb;
701 struct kvmppc_xive_irq_state *state;
702 u16 idx;
703
704 sb = kvmppc_xive_find_source(xive, irq, &idx);
705 if (!sb)
706 return false;
707 state = &sb->irq_state[idx];
708 if (!state->valid)
709 return false;
710
711 /*
712 * Trigger the IPI. This assumes we never restore a pass-through
713 * interrupt which should be safe enough
714 */
715 xive_irq_trigger(&state->ipi_data);
716
717 return true;
718}
719
720u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
721{
722 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
723
724 if (!xc)
725 return 0;
726
727 /* Return the per-cpu state for state saving/migration */
728 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
729 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
730}
731
732int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
733{
734 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
735 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
736 u8 cppr, mfrr;
737 u32 xisr;
738
739 if (!xc || !xive)
740 return -ENOENT;
741
742 /* Grab individual state fields. We don't use pending_pri */
743 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
744 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
745 KVM_REG_PPC_ICP_XISR_MASK;
746 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
747
748 pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
749 xc->server_num, cppr, mfrr, xisr);
750
751 /*
752 * We can't update the state of a "pushed" VCPU, but that
753 * shouldn't happen.
754 */
755 if (WARN_ON(vcpu->arch.xive_pushed))
756 return -EIO;
757
758 /* Update VCPU HW saved state */
759 vcpu->arch.xive_saved_state.cppr = cppr;
760 xc->hw_cppr = xc->cppr = cppr;
761
762 /*
763 * Update MFRR state. If it's not 0xff, we mark the VCPU as
764 * having a pending MFRR change, which will re-evaluate the
765 * target. The VCPU will thus potentially get a spurious
766 * interrupt but that's not a big deal.
767 */
768 xc->mfrr = mfrr;
769 if (mfrr < cppr)
770 xive_irq_trigger(&xc->vp_ipi_data);
771
772 /*
773 * Now saved XIRR is "interesting". It means there's something in
774 * the legacy "1 element" queue... for an IPI we simply ignore it,
775 * as the MFRR restore will handle that. For anything else we need
776 * to force a resend of the source.
777 * However the source may not have been setup yet. If that's the
778 * case, we keep that info and increment a counter in the xive to
779 * tell subsequent xive_set_source() to go look.
780 */
781 if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
782 xc->delayed_irq = xisr;
783 xive->delayed_irqs++;
784 pr_devel(" xisr restore delayed\n");
785 }
786
787 return 0;
788}
789
790int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
791 struct irq_desc *host_desc)
792{
793 struct kvmppc_xive *xive = kvm->arch.xive;
794 struct kvmppc_xive_src_block *sb;
795 struct kvmppc_xive_irq_state *state;
796 struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
797 unsigned int host_irq = irq_desc_get_irq(host_desc);
798 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
799 u16 idx;
800 u8 prio;
801 int rc;
802
803 if (!xive)
804 return -ENODEV;
805
806 pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
807
808 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
809 if (!sb)
810 return -EINVAL;
811 state = &sb->irq_state[idx];
812
813 /*
814 * Mark the passed-through interrupt as going to a VCPU,
815 * this will prevent further EOIs and similar operations
816 * from the XIVE code. It will also mask the interrupt
817 * to either PQ=10 or 11 state, the latter if the interrupt
818 * is pending. This will allow us to unmask or retrigger it
819 * after routing it to the guest with a simple EOI.
820 *
821 * The "state" argument is a "token", all it needs is to be
822 * non-NULL to switch to passed-through or NULL for the
823 * other way around. We may not yet have an actual VCPU
824 * target here and we don't really care.
825 */
826 rc = irq_set_vcpu_affinity(host_irq, state);
827 if (rc) {
828 pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
829 return rc;
830 }
831
832 /*
833 * Mask and read state of IPI. We need to know if its P bit
834 * is set as that means it's potentially already using a
835 * queue entry in the target
836 */
837 prio = xive_lock_and_mask(xive, sb, state);
838 pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
839 state->old_p, state->old_q);
840
841 /* Turn the IPI hard off */
842 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
843
844 /* Grab info about irq */
845 state->pt_number = hw_irq;
846 state->pt_data = irq_data_get_irq_handler_data(host_data);
847
848 /*
849 * Configure the IRQ to match the existing configuration of
850 * the IPI if it was already targetted. Otherwise this will
851 * mask the interrupt in a lossy way (act_priority is 0xff)
852 * which is fine for a never started interrupt.
853 */
854 xive_native_configure_irq(hw_irq,
855 xive->vp_base + state->act_server,
856 state->act_priority, state->number);
857
858 /*
859 * We do an EOI to enable the interrupt (and retrigger if needed)
860 * if the guest has the interrupt unmasked and the P bit was *not*
861 * set in the IPI. If it was set, we know a slot may still be in
862 * use in the target queue thus we have to wait for a guest
863 * originated EOI
864 */
865 if (prio != MASKED && !state->old_p)
866 xive_vm_source_eoi(hw_irq, state->pt_data);
867
868 /* Clear old_p/old_q as they are no longer relevant */
869 state->old_p = state->old_q = false;
870
871 /* Restore guest prio (unlocks EOI) */
872 mb();
873 state->guest_priority = prio;
874 arch_spin_unlock(&sb->lock);
875
876 return 0;
877}
878EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
879
880int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
881 struct irq_desc *host_desc)
882{
883 struct kvmppc_xive *xive = kvm->arch.xive;
884 struct kvmppc_xive_src_block *sb;
885 struct kvmppc_xive_irq_state *state;
886 unsigned int host_irq = irq_desc_get_irq(host_desc);
887 u16 idx;
888 u8 prio;
889 int rc;
890
891 if (!xive)
892 return -ENODEV;
893
894 pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
895
896 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
897 if (!sb)
898 return -EINVAL;
899 state = &sb->irq_state[idx];
900
901 /*
902 * Mask and read state of IRQ. We need to know if its P bit
903 * is set as that means it's potentially already using a
904 * queue entry in the target
905 */
906 prio = xive_lock_and_mask(xive, sb, state);
907 pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
908 state->old_p, state->old_q);
909
910 /*
911 * If old_p is set, the interrupt is pending, we switch it to
912 * PQ=11. This will force a resend in the host so the interrupt
913 * isn't lost to whatver host driver may pick it up
914 */
915 if (state->old_p)
916 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
917
918 /* Release the passed-through interrupt to the host */
919 rc = irq_set_vcpu_affinity(host_irq, NULL);
920 if (rc) {
921 pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
922 return rc;
923 }
924
925 /* Forget about the IRQ */
926 state->pt_number = 0;
927 state->pt_data = NULL;
928
929 /* Reconfigure the IPI */
930 xive_native_configure_irq(state->ipi_number,
931 xive->vp_base + state->act_server,
932 state->act_priority, state->number);
933
934 /*
935 * If old_p is set (we have a queue entry potentially
936 * occupied) or the interrupt is masked, we set the IPI
937 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
938 */
939 if (prio == MASKED || state->old_p)
940 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
941 else
942 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
943
944 /* Restore guest prio (unlocks EOI) */
945 mb();
946 state->guest_priority = prio;
947 arch_spin_unlock(&sb->lock);
948
949 return 0;
950}
951EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
952
953static void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
954{
955 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
956 struct kvm *kvm = vcpu->kvm;
957 struct kvmppc_xive *xive = kvm->arch.xive;
958 int i, j;
959
960 for (i = 0; i <= xive->max_sbid; i++) {
961 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
962
963 if (!sb)
964 continue;
965 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
966 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
967
968 if (!state->valid)
969 continue;
970 if (state->act_priority == MASKED)
971 continue;
972 if (state->act_server != xc->server_num)
973 continue;
974
975 /* Clean it up */
976 arch_spin_lock(&sb->lock);
977 state->act_priority = MASKED;
978 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
979 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
980 if (state->pt_number) {
981 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
982 xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
983 }
984 arch_spin_unlock(&sb->lock);
985 }
986 }
987}
988
989void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
990{
991 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
992 struct kvmppc_xive *xive = xc->xive;
993 int i;
994
995 pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
996
997 /* Ensure no interrupt is still routed to that VP */
998 xc->valid = false;
999 kvmppc_xive_disable_vcpu_interrupts(vcpu);
1000
1001 /* Mask the VP IPI */
1002 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
1003
1004 /* Disable the VP */
1005 xive_native_disable_vp(xc->vp_id);
1006
1007 /* Free the queues & associated interrupts */
1008 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1009 struct xive_q *q = &xc->queues[i];
1010
1011 /* Free the escalation irq */
1012 if (xc->esc_virq[i]) {
1013 free_irq(xc->esc_virq[i], vcpu);
1014 irq_dispose_mapping(xc->esc_virq[i]);
1015 kfree(xc->esc_virq_names[i]);
1016 }
1017 /* Free the queue */
1018 xive_native_disable_queue(xc->vp_id, q, i);
1019 if (q->qpage) {
1020 free_pages((unsigned long)q->qpage,
1021 xive->q_page_order);
1022 q->qpage = NULL;
1023 }
1024 }
1025
1026 /* Free the IPI */
1027 if (xc->vp_ipi) {
1028 xive_cleanup_irq_data(&xc->vp_ipi_data);
1029 xive_native_free_irq(xc->vp_ipi);
1030 }
1031 /* Free the VP */
1032 kfree(xc);
1033}
1034
1035int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1036 struct kvm_vcpu *vcpu, u32 cpu)
1037{
1038 struct kvmppc_xive *xive = dev->private;
1039 struct kvmppc_xive_vcpu *xc;
1040 int i, r = -EBUSY;
1041
1042 pr_devel("connect_vcpu(cpu=%d)\n", cpu);
1043
1044 if (dev->ops != &kvm_xive_ops) {
1045 pr_devel("Wrong ops !\n");
1046 return -EPERM;
1047 }
1048 if (xive->kvm != vcpu->kvm)
1049 return -EPERM;
1050 if (vcpu->arch.irq_type)
1051 return -EBUSY;
1052 if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
1053 pr_devel("Duplicate !\n");
1054 return -EEXIST;
1055 }
1056 if (cpu >= KVM_MAX_VCPUS) {
1057 pr_devel("Out of bounds !\n");
1058 return -EINVAL;
1059 }
1060 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
1061 if (!xc)
1062 return -ENOMEM;
1063
1064 /* We need to synchronize with queue provisioning */
1065 mutex_lock(&vcpu->kvm->lock);
1066 vcpu->arch.xive_vcpu = xc;
1067 xc->xive = xive;
1068 xc->vcpu = vcpu;
1069 xc->server_num = cpu;
1070 xc->vp_id = xive->vp_base + cpu;
1071 xc->mfrr = 0xff;
1072 xc->valid = true;
1073
1074 r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
1075 if (r)
1076 goto bail;
1077
1078 /* Configure VCPU fields for use by assembly push/pull */
1079 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
1080 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
1081
1082 /* Allocate IPI */
1083 xc->vp_ipi = xive_native_alloc_irq();
1084 if (!xc->vp_ipi) {
1085 r = -EIO;
1086 goto bail;
1087 }
1088 pr_devel(" IPI=0x%x\n", xc->vp_ipi);
1089
1090 r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
1091 if (r)
1092 goto bail;
1093
1094 /*
1095 * Initialize queues. Initially we set them all for no queueing
1096 * and we enable escalation for queue 0 only which we'll use for
1097 * our mfrr change notifications. If the VCPU is hot-plugged, we
1098 * do handle provisioning however.
1099 */
1100 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1101 struct xive_q *q = &xc->queues[i];
1102
1103 /* Is queue already enabled ? Provision it */
1104 if (xive->qmap & (1 << i)) {
1105 r = xive_provision_queue(vcpu, i);
1106 if (r == 0)
1107 xive_attach_escalation(vcpu, i);
1108 if (r)
1109 goto bail;
1110 } else {
1111 r = xive_native_configure_queue(xc->vp_id,
1112 q, i, NULL, 0, true);
1113 if (r) {
1114 pr_err("Failed to configure queue %d for VCPU %d\n",
1115 i, cpu);
1116 goto bail;
1117 }
1118 }
1119 }
1120
1121 /* If not done above, attach priority 0 escalation */
1122 r = xive_attach_escalation(vcpu, 0);
1123 if (r)
1124 goto bail;
1125
1126 /* Enable the VP */
1127 r = xive_native_enable_vp(xc->vp_id);
1128 if (r)
1129 goto bail;
1130
1131 /* Route the IPI */
1132 r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
1133 if (!r)
1134 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
1135
1136bail:
1137 mutex_unlock(&vcpu->kvm->lock);
1138 if (r) {
1139 kvmppc_xive_cleanup_vcpu(vcpu);
1140 return r;
1141 }
1142
1143 vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
1144 return 0;
1145}
1146
1147/*
1148 * Scanning of queues before/after migration save
1149 */
1150static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
1151{
1152 struct kvmppc_xive_src_block *sb;
1153 struct kvmppc_xive_irq_state *state;
1154 u16 idx;
1155
1156 sb = kvmppc_xive_find_source(xive, irq, &idx);
1157 if (!sb)
1158 return;
1159
1160 state = &sb->irq_state[idx];
1161
1162 /* Some sanity checking */
1163 if (!state->valid) {
1164 pr_err("invalid irq 0x%x in cpu queue!\n", irq);
1165 return;
1166 }
1167
1168 /*
1169 * If the interrupt is in a queue it should have P set.
1170 * We warn so that gets reported. A backtrace isn't useful
1171 * so no need to use a WARN_ON.
1172 */
1173 if (!state->saved_p)
1174 pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
1175
1176 /* Set flag */
1177 state->in_queue = true;
1178}
1179
1180static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
1181 struct kvmppc_xive_src_block *sb,
1182 u32 irq)
1183{
1184 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1185
1186 if (!state->valid)
1187 return;
1188
1189 /* Mask and save state, this will also sync HW queues */
1190 state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
1191
1192 /* Transfer P and Q */
1193 state->saved_p = state->old_p;
1194 state->saved_q = state->old_q;
1195
1196 /* Unlock */
1197 arch_spin_unlock(&sb->lock);
1198}
1199
1200static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
1201 struct kvmppc_xive_src_block *sb,
1202 u32 irq)
1203{
1204 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1205
1206 if (!state->valid)
1207 return;
1208
1209 /*
1210 * Lock / exclude EOI (not technically necessary if the
1211 * guest isn't running concurrently. If this becomes a
1212 * performance issue we can probably remove the lock.
1213 */
1214 xive_lock_for_unmask(sb, state);
1215
1216 /* Restore mask/prio if it wasn't masked */
1217 if (state->saved_scan_prio != MASKED)
1218 xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
1219
1220 /* Unlock */
1221 arch_spin_unlock(&sb->lock);
1222}
1223
1224static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
1225{
1226 u32 idx = q->idx;
1227 u32 toggle = q->toggle;
1228 u32 irq;
1229
1230 do {
1231 irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
1232 if (irq > XICS_IPI)
1233 xive_pre_save_set_queued(xive, irq);
1234 } while(irq);
1235}
1236
1237static void xive_pre_save_scan(struct kvmppc_xive *xive)
1238{
1239 struct kvm_vcpu *vcpu = NULL;
1240 int i, j;
1241
1242 /*
1243 * See comment in xive_get_source() about how this
1244 * work. Collect a stable state for all interrupts
1245 */
1246 for (i = 0; i <= xive->max_sbid; i++) {
1247 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1248 if (!sb)
1249 continue;
1250 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1251 xive_pre_save_mask_irq(xive, sb, j);
1252 }
1253
1254 /* Then scan the queues and update the "in_queue" flag */
1255 kvm_for_each_vcpu(i, vcpu, xive->kvm) {
1256 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1257 if (!xc)
1258 continue;
1259 for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
1260 if (xc->queues[i].qpage)
1261 xive_pre_save_queue(xive, &xc->queues[i]);
1262 }
1263 }
1264
1265 /* Finally restore interrupt states */
1266 for (i = 0; i <= xive->max_sbid; i++) {
1267 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1268 if (!sb)
1269 continue;
1270 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1271 xive_pre_save_unmask_irq(xive, sb, j);
1272 }
1273}
1274
1275static void xive_post_save_scan(struct kvmppc_xive *xive)
1276{
1277 u32 i, j;
1278
1279 /* Clear all the in_queue flags */
1280 for (i = 0; i <= xive->max_sbid; i++) {
1281 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1282 if (!sb)
1283 continue;
1284 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1285 sb->irq_state[j].in_queue = false;
1286 }
1287
1288 /* Next get_source() will do a new scan */
1289 xive->saved_src_count = 0;
1290}
1291
1292/*
1293 * This returns the source configuration and state to user space.
1294 */
1295static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
1296{
1297 struct kvmppc_xive_src_block *sb;
1298 struct kvmppc_xive_irq_state *state;
1299 u64 __user *ubufp = (u64 __user *) addr;
1300 u64 val, prio;
1301 u16 idx;
1302
1303 sb = kvmppc_xive_find_source(xive, irq, &idx);
1304 if (!sb)
1305 return -ENOENT;
1306
1307 state = &sb->irq_state[idx];
1308
1309 if (!state->valid)
1310 return -ENOENT;
1311
1312 pr_devel("get_source(%ld)...\n", irq);
1313
1314 /*
1315 * So to properly save the state into something that looks like a
1316 * XICS migration stream we cannot treat interrupts individually.
1317 *
1318 * We need, instead, mask them all (& save their previous PQ state)
1319 * to get a stable state in the HW, then sync them to ensure that
1320 * any interrupt that had already fired hits its queue, and finally
1321 * scan all the queues to collect which interrupts are still present
1322 * in the queues, so we can set the "pending" flag on them and
1323 * they can be resent on restore.
1324 *
1325 * So we do it all when the "first" interrupt gets saved, all the
1326 * state is collected at that point, the rest of xive_get_source()
1327 * will merely collect and convert that state to the expected
1328 * userspace bit mask.
1329 */
1330 if (xive->saved_src_count == 0)
1331 xive_pre_save_scan(xive);
1332 xive->saved_src_count++;
1333
1334 /* Convert saved state into something compatible with xics */
1335 val = state->guest_server;
1336 prio = state->saved_scan_prio;
1337
1338 if (prio == MASKED) {
1339 val |= KVM_XICS_MASKED;
1340 prio = state->saved_priority;
1341 }
1342 val |= prio << KVM_XICS_PRIORITY_SHIFT;
1343 if (state->lsi) {
1344 val |= KVM_XICS_LEVEL_SENSITIVE;
1345 if (state->saved_p)
1346 val |= KVM_XICS_PENDING;
1347 } else {
1348 if (state->saved_p)
1349 val |= KVM_XICS_PRESENTED;
1350
1351 if (state->saved_q)
1352 val |= KVM_XICS_QUEUED;
1353
1354 /*
1355 * We mark it pending (which will attempt a re-delivery)
1356 * if we are in a queue *or* we were masked and had
1357 * Q set which is equivalent to the XICS "masked pending"
1358 * state
1359 */
1360 if (state->in_queue || (prio == MASKED && state->saved_q))
1361 val |= KVM_XICS_PENDING;
1362 }
1363
1364 /*
1365 * If that was the last interrupt saved, reset the
1366 * in_queue flags
1367 */
1368 if (xive->saved_src_count == xive->src_count)
1369 xive_post_save_scan(xive);
1370
1371 /* Copy the result to userspace */
1372 if (put_user(val, ubufp))
1373 return -EFAULT;
1374
1375 return 0;
1376}
1377
1378static struct kvmppc_xive_src_block *xive_create_src_block(struct kvmppc_xive *xive,
1379 int irq)
1380{
1381 struct kvm *kvm = xive->kvm;
1382 struct kvmppc_xive_src_block *sb;
1383 int i, bid;
1384
1385 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
1386
1387 mutex_lock(&kvm->lock);
1388
1389 /* block already exists - somebody else got here first */
1390 if (xive->src_blocks[bid])
1391 goto out;
1392
1393 /* Create the ICS */
1394 sb = kzalloc(sizeof(*sb), GFP_KERNEL);
1395 if (!sb)
1396 goto out;
1397
1398 sb->id = bid;
1399
1400 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1401 sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
1402 sb->irq_state[i].guest_priority = MASKED;
1403 sb->irq_state[i].saved_priority = MASKED;
1404 sb->irq_state[i].act_priority = MASKED;
1405 }
1406 smp_wmb();
1407 xive->src_blocks[bid] = sb;
1408
1409 if (bid > xive->max_sbid)
1410 xive->max_sbid = bid;
1411
1412out:
1413 mutex_unlock(&kvm->lock);
1414 return xive->src_blocks[bid];
1415}
1416
1417static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
1418{
1419 struct kvm *kvm = xive->kvm;
1420 struct kvm_vcpu *vcpu = NULL;
1421 int i;
1422
1423 kvm_for_each_vcpu(i, vcpu, kvm) {
1424 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1425
1426 if (!xc)
1427 continue;
1428
1429 if (xc->delayed_irq == irq) {
1430 xc->delayed_irq = 0;
1431 xive->delayed_irqs--;
1432 return true;
1433 }
1434 }
1435 return false;
1436}
1437
1438static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1439{
1440 struct kvmppc_xive_src_block *sb;
1441 struct kvmppc_xive_irq_state *state;
1442 u64 __user *ubufp = (u64 __user *) addr;
1443 u16 idx;
1444 u64 val;
1445 u8 act_prio, guest_prio;
1446 u32 server;
1447 int rc = 0;
1448
1449 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
1450 return -ENOENT;
1451
1452 pr_devel("set_source(irq=0x%lx)\n", irq);
1453
1454 /* Find the source */
1455 sb = kvmppc_xive_find_source(xive, irq, &idx);
1456 if (!sb) {
1457 pr_devel("No source, creating source block...\n");
1458 sb = xive_create_src_block(xive, irq);
1459 if (!sb) {
1460 pr_devel("Failed to create block...\n");
1461 return -ENOMEM;
1462 }
1463 }
1464 state = &sb->irq_state[idx];
1465
1466 /* Read user passed data */
1467 if (get_user(val, ubufp)) {
1468 pr_devel("fault getting user info !\n");
1469 return -EFAULT;
1470 }
1471
1472 server = val & KVM_XICS_DESTINATION_MASK;
1473 guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
1474
1475 pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
1476 val, server, guest_prio);
1477 /*
1478 * If the source doesn't already have an IPI, allocate
1479 * one and get the corresponding data
1480 */
1481 if (!state->ipi_number) {
1482 state->ipi_number = xive_native_alloc_irq();
1483 if (state->ipi_number == 0) {
1484 pr_devel("Failed to allocate IPI !\n");
1485 return -ENOMEM;
1486 }
1487 xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
1488 pr_devel(" src_ipi=0x%x\n", state->ipi_number);
1489 }
1490
1491 /*
1492 * We use lock_and_mask() to set us in the right masked
1493 * state. We will override that state from the saved state
1494 * further down, but this will handle the cases of interrupts
1495 * that need FW masking. We set the initial guest_priority to
1496 * 0 before calling it to ensure it actually performs the masking.
1497 */
1498 state->guest_priority = 0;
1499 xive_lock_and_mask(xive, sb, state);
1500
1501 /*
1502 * Now, we select a target if we have one. If we don't we
1503 * leave the interrupt untargetted. It means that an interrupt
1504 * can become "untargetted" accross migration if it was masked
1505 * by set_xive() but there is little we can do about it.
1506 */
1507
1508 /* First convert prio and mark interrupt as untargetted */
1509 act_prio = xive_prio_from_guest(guest_prio);
1510 state->act_priority = MASKED;
1511 state->guest_server = server;
1512
1513 /*
1514 * We need to drop the lock due to the mutex below. Hopefully
1515 * nothing is touching that interrupt yet since it hasn't been
1516 * advertized to a running guest yet
1517 */
1518 arch_spin_unlock(&sb->lock);
1519
1520 /* If we have a priority target the interrupt */
1521 if (act_prio != MASKED) {
1522 /* First, check provisioning of queues */
1523 mutex_lock(&xive->kvm->lock);
1524 rc = xive_check_provisioning(xive->kvm, act_prio);
1525 mutex_unlock(&xive->kvm->lock);
1526
1527 /* Target interrupt */
1528 if (rc == 0)
1529 rc = xive_target_interrupt(xive->kvm, state,
1530 server, act_prio);
1531 /*
1532 * If provisioning or targetting failed, leave it
1533 * alone and masked. It will remain disabled until
1534 * the guest re-targets it.
1535 */
1536 }
1537
1538 /*
1539 * Find out if this was a delayed irq stashed in an ICP,
1540 * in which case, treat it as pending
1541 */
1542 if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
1543 val |= KVM_XICS_PENDING;
1544 pr_devel(" Found delayed ! forcing PENDING !\n");
1545 }
1546
1547 /* Cleanup the SW state */
1548 state->old_p = false;
1549 state->old_q = false;
1550 state->lsi = false;
1551 state->asserted = false;
1552
1553 /* Restore LSI state */
1554 if (val & KVM_XICS_LEVEL_SENSITIVE) {
1555 state->lsi = true;
1556 if (val & KVM_XICS_PENDING)
1557 state->asserted = true;
1558 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
1559 }
1560
1561 /*
1562 * Restore P and Q. If the interrupt was pending, we
1563 * force both P and Q, which will trigger a resend.
1564 *
1565 * That means that a guest that had both an interrupt
1566 * pending (queued) and Q set will restore with only
1567 * one instance of that interrupt instead of 2, but that
1568 * is perfectly fine as coalescing interrupts that haven't
1569 * been presented yet is always allowed.
1570 */
1571 if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
1572 state->old_p = true;
1573 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1574 state->old_q = true;
1575
1576 pr_devel(" P=%d, Q=%d\n", state->old_p, state->old_q);
1577
1578 /*
1579 * If the interrupt was unmasked, update guest priority and
1580 * perform the appropriate state transition and do a
1581 * re-trigger if necessary.
1582 */
1583 if (val & KVM_XICS_MASKED) {
1584 pr_devel(" masked, saving prio\n");
1585 state->guest_priority = MASKED;
1586 state->saved_priority = guest_prio;
1587 } else {
1588 pr_devel(" unmasked, restoring to prio %d\n", guest_prio);
1589 xive_finish_unmask(xive, sb, state, guest_prio);
1590 state->saved_priority = guest_prio;
1591 }
1592
1593 /* Increment the number of valid sources and mark this one valid */
1594 if (!state->valid)
1595 xive->src_count++;
1596 state->valid = true;
1597
1598 return 0;
1599}
1600
1601int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1602 bool line_status)
1603{
1604 struct kvmppc_xive *xive = kvm->arch.xive;
1605 struct kvmppc_xive_src_block *sb;
1606 struct kvmppc_xive_irq_state *state;
1607 u16 idx;
1608
1609 if (!xive)
1610 return -ENODEV;
1611
1612 sb = kvmppc_xive_find_source(xive, irq, &idx);
1613 if (!sb)
1614 return -EINVAL;
1615
1616 /* Perform locklessly .... (we need to do some RCUisms here...) */
1617 state = &sb->irq_state[idx];
1618 if (!state->valid)
1619 return -EINVAL;
1620
1621 /* We don't allow a trigger on a passed-through interrupt */
1622 if (state->pt_number)
1623 return -EINVAL;
1624
1625 if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
1626 state->asserted = 1;
1627 else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
1628 state->asserted = 0;
1629 return 0;
1630 }
1631
1632 /* Trigger the IPI */
1633 xive_irq_trigger(&state->ipi_data);
1634
1635 return 0;
1636}
1637
1638static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1639{
1640 struct kvmppc_xive *xive = dev->private;
1641
1642 /* We honor the existing XICS ioctl */
1643 switch (attr->group) {
1644 case KVM_DEV_XICS_GRP_SOURCES:
1645 return xive_set_source(xive, attr->attr, attr->addr);
1646 }
1647 return -ENXIO;
1648}
1649
1650static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1651{
1652 struct kvmppc_xive *xive = dev->private;
1653
1654 /* We honor the existing XICS ioctl */
1655 switch (attr->group) {
1656 case KVM_DEV_XICS_GRP_SOURCES:
1657 return xive_get_source(xive, attr->attr, attr->addr);
1658 }
1659 return -ENXIO;
1660}
1661
1662static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1663{
1664 /* We honor the same limits as XICS, at least for now */
1665 switch (attr->group) {
1666 case KVM_DEV_XICS_GRP_SOURCES:
1667 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
1668 attr->attr < KVMPPC_XICS_NR_IRQS)
1669 return 0;
1670 break;
1671 }
1672 return -ENXIO;
1673}
1674
1675static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
1676{
1677 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
1678 xive_native_configure_irq(hw_num, 0, MASKED, 0);
1679 xive_cleanup_irq_data(xd);
1680}
1681
1682static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
1683{
1684 int i;
1685
1686 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1687 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
1688
1689 if (!state->valid)
1690 continue;
1691
1692 kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
1693 xive_native_free_irq(state->ipi_number);
1694
1695 /* Pass-through, cleanup too */
1696 if (state->pt_number)
1697 kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
1698
1699 state->valid = false;
1700 }
1701}
1702
1703static void kvmppc_xive_free(struct kvm_device *dev)
1704{
1705 struct kvmppc_xive *xive = dev->private;
1706 struct kvm *kvm = xive->kvm;
1707 int i;
1708
1709 debugfs_remove(xive->dentry);
1710
1711 if (kvm)
1712 kvm->arch.xive = NULL;
1713
1714 /* Mask and free interrupts */
1715 for (i = 0; i <= xive->max_sbid; i++) {
1716 if (xive->src_blocks[i])
1717 kvmppc_xive_free_sources(xive->src_blocks[i]);
1718 kfree(xive->src_blocks[i]);
1719 xive->src_blocks[i] = NULL;
1720 }
1721
1722 if (xive->vp_base != XIVE_INVALID_VP)
1723 xive_native_free_vp_block(xive->vp_base);
1724
1725
1726 kfree(xive);
1727 kfree(dev);
1728}
1729
1730static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
1731{
1732 struct kvmppc_xive *xive;
1733 struct kvm *kvm = dev->kvm;
1734 int ret = 0;
1735
1736 pr_devel("Creating xive for partition\n");
1737
1738 xive = kzalloc(sizeof(*xive), GFP_KERNEL);
1739 if (!xive)
1740 return -ENOMEM;
1741
1742 dev->private = xive;
1743 xive->dev = dev;
1744 xive->kvm = kvm;
1745
1746 /* Already there ? */
1747 if (kvm->arch.xive)
1748 ret = -EEXIST;
1749 else
1750 kvm->arch.xive = xive;
1751
1752 /* We use the default queue size set by the host */
1753 xive->q_order = xive_native_default_eq_shift();
1754 if (xive->q_order < PAGE_SHIFT)
1755 xive->q_page_order = 0;
1756 else
1757 xive->q_page_order = xive->q_order - PAGE_SHIFT;
1758
1759 /* Allocate a bunch of VPs */
1760 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1761 pr_devel("VP_Base=%x\n", xive->vp_base);
1762
1763 if (xive->vp_base == XIVE_INVALID_VP)
1764 ret = -ENOMEM;
1765
1766 if (ret) {
1767 kfree(xive);
1768 return ret;
1769 }
1770
1771 return 0;
1772}
1773
1774
1775static int xive_debug_show(struct seq_file *m, void *private)
1776{
1777 struct kvmppc_xive *xive = m->private;
1778 struct kvm *kvm = xive->kvm;
1779 struct kvm_vcpu *vcpu;
1780 u64 t_rm_h_xirr = 0;
1781 u64 t_rm_h_ipoll = 0;
1782 u64 t_rm_h_cppr = 0;
1783 u64 t_rm_h_eoi = 0;
1784 u64 t_rm_h_ipi = 0;
1785 u64 t_vm_h_xirr = 0;
1786 u64 t_vm_h_ipoll = 0;
1787 u64 t_vm_h_cppr = 0;
1788 u64 t_vm_h_eoi = 0;
1789 u64 t_vm_h_ipi = 0;
1790 unsigned int i;
1791
1792 if (!kvm)
1793 return 0;
1794
1795 seq_printf(m, "=========\nVCPU state\n=========\n");
1796
1797 kvm_for_each_vcpu(i, vcpu, kvm) {
1798 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1799
1800 if (!xc)
1801 continue;
1802
1803 seq_printf(m, "cpu server %#x CPPR:%#x HWCPPR:%#x"
1804 " MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
1805 xc->server_num, xc->cppr, xc->hw_cppr,
1806 xc->mfrr, xc->pending,
1807 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
1808
1809 t_rm_h_xirr += xc->stat_rm_h_xirr;
1810 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
1811 t_rm_h_cppr += xc->stat_rm_h_cppr;
1812 t_rm_h_eoi += xc->stat_rm_h_eoi;
1813 t_rm_h_ipi += xc->stat_rm_h_ipi;
1814 t_vm_h_xirr += xc->stat_vm_h_xirr;
1815 t_vm_h_ipoll += xc->stat_vm_h_ipoll;
1816 t_vm_h_cppr += xc->stat_vm_h_cppr;
1817 t_vm_h_eoi += xc->stat_vm_h_eoi;
1818 t_vm_h_ipi += xc->stat_vm_h_ipi;
1819 }
1820
1821 seq_printf(m, "Hcalls totals\n");
1822 seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
1823 seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
1824 seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
1825 seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
1826 seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
1827
1828 return 0;
1829}
1830
1831static int xive_debug_open(struct inode *inode, struct file *file)
1832{
1833 return single_open(file, xive_debug_show, inode->i_private);
1834}
1835
1836static const struct file_operations xive_debug_fops = {
1837 .open = xive_debug_open,
1838 .read = seq_read,
1839 .llseek = seq_lseek,
1840 .release = single_release,
1841};
1842
1843static void xive_debugfs_init(struct kvmppc_xive *xive)
1844{
1845 char *name;
1846
1847 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1848 if (!name) {
1849 pr_err("%s: no memory for name\n", __func__);
1850 return;
1851 }
1852
1853 xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
1854 xive, &xive_debug_fops);
1855
1856 pr_debug("%s: created %s\n", __func__, name);
1857 kfree(name);
1858}
1859
1860static void kvmppc_xive_init(struct kvm_device *dev)
1861{
1862 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1863
1864 /* Register some debug interfaces */
1865 xive_debugfs_init(xive);
1866}
1867
1868struct kvm_device_ops kvm_xive_ops = {
1869 .name = "kvm-xive",
1870 .create = kvmppc_xive_create,
1871 .init = kvmppc_xive_init,
1872 .destroy = kvmppc_xive_free,
1873 .set_attr = xive_set_attr,
1874 .get_attr = xive_get_attr,
1875 .has_attr = xive_has_attr,
1876};
1877
1878void kvmppc_xive_init_module(void)
1879{
1880 __xive_vm_h_xirr = xive_vm_h_xirr;
1881 __xive_vm_h_ipoll = xive_vm_h_ipoll;
1882 __xive_vm_h_ipi = xive_vm_h_ipi;
1883 __xive_vm_h_cppr = xive_vm_h_cppr;
1884 __xive_vm_h_eoi = xive_vm_h_eoi;
1885}
1886
1887void kvmppc_xive_exit_module(void)
1888{
1889 __xive_vm_h_xirr = NULL;
1890 __xive_vm_h_ipoll = NULL;
1891 __xive_vm_h_ipi = NULL;
1892 __xive_vm_h_cppr = NULL;
1893 __xive_vm_h_eoi = NULL;
1894}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 000000000000..5938f7644dc1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,256 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef _KVM_PPC_BOOK3S_XIVE_H
10#define _KVM_PPC_BOOK3S_XIVE_H
11
12#ifdef CONFIG_KVM_XICS
13#include "book3s_xics.h"
14
15/*
16 * State for one guest irq source.
17 *
18 * For each guest source we allocate a HW interrupt in the XIVE
19 * which we use for all SW triggers. It will be unused for
20 * pass-through but it's easier to keep around as the same
21 * guest interrupt can alternatively be emulated or pass-through
22 * if a physical device is hot unplugged and replaced with an
23 * emulated one.
24 *
25 * This state structure is very similar to the XICS one with
26 * additional XIVE specific tracking.
27 */
28struct kvmppc_xive_irq_state {
29 bool valid; /* Interrupt entry is valid */
30
31 u32 number; /* Guest IRQ number */
32 u32 ipi_number; /* XIVE IPI HW number */
33 struct xive_irq_data ipi_data; /* XIVE IPI associated data */
34 u32 pt_number; /* XIVE Pass-through number if any */
35 struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */
36
37 /* Targetting as set by guest */
38 u32 guest_server; /* Current guest selected target */
39 u8 guest_priority; /* Guest set priority */
40 u8 saved_priority; /* Saved priority when masking */
41
42 /* Actual targetting */
43 u32 act_server; /* Actual server */
44 u8 act_priority; /* Actual priority */
45
46 /* Various state bits */
47 bool in_eoi; /* Synchronize with H_EOI */
48 bool old_p; /* P bit state when masking */
49 bool old_q; /* Q bit state when masking */
50 bool lsi; /* level-sensitive interrupt */
51 bool asserted; /* Only for emulated LSI: current state */
52
53 /* Saved for migration state */
54 bool in_queue;
55 bool saved_p;
56 bool saved_q;
57 u8 saved_scan_prio;
58};
59
60/* Select the "right" interrupt (IPI vs. passthrough) */
61static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
62 u32 *out_hw_irq,
63 struct xive_irq_data **out_xd)
64{
65 if (state->pt_number) {
66 if (out_hw_irq)
67 *out_hw_irq = state->pt_number;
68 if (out_xd)
69 *out_xd = state->pt_data;
70 } else {
71 if (out_hw_irq)
72 *out_hw_irq = state->ipi_number;
73 if (out_xd)
74 *out_xd = &state->ipi_data;
75 }
76}
77
78/*
79 * This corresponds to an "ICS" in XICS terminology, we use it
80 * as a mean to break up source information into multiple structures.
81 */
82struct kvmppc_xive_src_block {
83 arch_spinlock_t lock;
84 u16 id;
85 struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
86};
87
88
89struct kvmppc_xive {
90 struct kvm *kvm;
91 struct kvm_device *dev;
92 struct dentry *dentry;
93
94 /* VP block associated with the VM */
95 u32 vp_base;
96
97 /* Blocks of sources */
98 struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
99 u32 max_sbid;
100
101 /*
102 * For state save, we lazily scan the queues on the first interrupt
103 * being migrated. We don't have a clean way to reset that flags
104 * so we keep track of the number of valid sources and how many of
105 * them were migrated so we can reset when all of them have been
106 * processed.
107 */
108 u32 src_count;
109 u32 saved_src_count;
110
111 /*
112 * Some irqs are delayed on restore until the source is created,
113 * keep track here of how many of them
114 */
115 u32 delayed_irqs;
116
117 /* Which queues (priorities) are in use by the guest */
118 u8 qmap;
119
120 /* Queue orders */
121 u32 q_order;
122 u32 q_page_order;
123
124};
125
126#define KVMPPC_XIVE_Q_COUNT 8
127
128struct kvmppc_xive_vcpu {
129 struct kvmppc_xive *xive;
130 struct kvm_vcpu *vcpu;
131 bool valid;
132
133 /* Server number. This is the HW CPU ID from a guest perspective */
134 u32 server_num;
135
136 /*
137 * HW VP corresponding to this VCPU. This is the base of the VP
138 * block plus the server number.
139 */
140 u32 vp_id;
141 u32 vp_chip_id;
142 u32 vp_cam;
143
144 /* IPI used for sending ... IPIs */
145 u32 vp_ipi;
146 struct xive_irq_data vp_ipi_data;
147
148 /* Local emulation state */
149 uint8_t cppr; /* guest CPPR */
150 uint8_t hw_cppr;/* Hardware CPPR */
151 uint8_t mfrr;
152 uint8_t pending;
153
154 /* Each VP has 8 queues though we only provision some */
155 struct xive_q queues[KVMPPC_XIVE_Q_COUNT];
156 u32 esc_virq[KVMPPC_XIVE_Q_COUNT];
157 char *esc_virq_names[KVMPPC_XIVE_Q_COUNT];
158
159 /* Stash a delayed irq on restore from migration (see set_icp) */
160 u32 delayed_irq;
161
162 /* Stats */
163 u64 stat_rm_h_xirr;
164 u64 stat_rm_h_ipoll;
165 u64 stat_rm_h_cppr;
166 u64 stat_rm_h_eoi;
167 u64 stat_rm_h_ipi;
168 u64 stat_vm_h_xirr;
169 u64 stat_vm_h_ipoll;
170 u64 stat_vm_h_cppr;
171 u64 stat_vm_h_eoi;
172 u64 stat_vm_h_ipi;
173};
174
175static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
176{
177 struct kvm_vcpu *vcpu = NULL;
178 int i;
179
180 kvm_for_each_vcpu(i, vcpu, kvm) {
181 if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
182 return vcpu;
183 }
184 return NULL;
185}
186
187static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
188 u32 irq, u16 *source)
189{
190 u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
191 u16 src = irq & KVMPPC_XICS_SRC_MASK;
192
193 if (source)
194 *source = src;
195 if (bid > KVMPPC_XICS_MAX_ICS_ID)
196 return NULL;
197 return xive->src_blocks[bid];
198}
199
200/*
201 * Mapping between guest priorities and host priorities
202 * is as follow.
203 *
204 * Guest request for 0...6 are honored. Guest request for anything
205 * higher results in a priority of 7 being applied.
206 *
207 * However, when XIRR is returned via H_XIRR, 7 is translated to 0xb
208 * in order to match AIX expectations
209 *
210 * Similar mapping is done for CPPR values
211 */
212static inline u8 xive_prio_from_guest(u8 prio)
213{
214 if (prio == 0xff || prio < 8)
215 return prio;
216 return 7;
217}
218
219static inline u8 xive_prio_to_guest(u8 prio)
220{
221 if (prio == 0xff || prio < 7)
222 return prio;
223 return 0xb;
224}
225
226static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
227{
228 u32 cur;
229
230 if (!qpage)
231 return 0;
232 cur = be32_to_cpup(qpage + *idx);
233 if ((cur >> 31) == *toggle)
234 return 0;
235 *idx = (*idx + 1) & msk;
236 if (*idx == 0)
237 (*toggle) ^= 1;
238 return cur & 0x7fffffff;
239}
240
241extern unsigned long xive_rm_h_xirr(struct kvm_vcpu *vcpu);
242extern unsigned long xive_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server);
243extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
244 unsigned long mfrr);
245extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
246extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
247
248extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
249extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
250extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
251 unsigned long mfrr);
252extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
253extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
254
255#endif /* CONFIG_KVM_XICS */
256#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
new file mode 100644
index 000000000000..023a31133c37
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -0,0 +1,503 @@
1/*
2 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2, as
6 * published by the Free Software Foundation.
7 */
8
9/* File to be included by other .c files */
10
11#define XGLUE(a,b) a##b
12#define GLUE(a,b) XGLUE(a,b)
13
14static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
15{
16 u8 cppr;
17 u16 ack;
18
19 /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
20
21 /* Perform the acknowledge OS to register cycle. */
22 ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
23
24 /* Synchronize subsequent queue accesses */
25 mb();
26
27 /* XXX Check grouping level */
28
29 /* Anything ? */
30 if (!((ack >> 8) & TM_QW1_NSR_EO))
31 return;
32
33 /* Grab CPPR of the most favored pending interrupt */
34 cppr = ack & 0xff;
35 if (cppr < 8)
36 xc->pending |= 1 << cppr;
37
38#ifdef XIVE_RUNTIME_CHECKS
39 /* Check consistency */
40 if (cppr >= xc->hw_cppr)
41 pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
42 smp_processor_id(), cppr, xc->hw_cppr);
43#endif
44
45 /*
46 * Update our image of the HW CPPR. We don't yet modify
47 * xc->cppr, this will be done as we scan for interrupts
48 * in the queues.
49 */
50 xc->hw_cppr = cppr;
51}
52
53static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset)
54{
55 u64 val;
56
57 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
58 offset |= offset << 4;
59
60 val =__x_readq(__x_eoi_page(xd) + offset);
61#ifdef __LITTLE_ENDIAN__
62 val >>= 64-8;
63#endif
64 return (u8)val;
65}
66
67
68static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
69{
70 /* If the XIVE supports the new "store EOI facility, use it */
71 if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
72 __x_writeq(0, __x_eoi_page(xd));
73 else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
74 opal_int_eoi(hw_irq);
75 } else {
76 uint64_t eoi_val;
77
78 /*
79 * Otherwise for EOI, we use the special MMIO that does
80 * a clear of both P and Q and returns the old Q,
81 * except for LSIs where we use the "EOI cycle" special
82 * load.
83 *
84 * This allows us to then do a re-trigger if Q was set
85 * rather than synthetizing an interrupt in software
86 *
87 * For LSIs, using the HW EOI cycle works around a problem
88 * on P9 DD1 PHBs where the other ESB accesses don't work
89 * properly.
90 */
91 if (xd->flags & XIVE_IRQ_FLAG_LSI)
92 __x_readq(__x_eoi_page(xd));
93 else {
94 eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
95
96 /* Re-trigger if needed */
97 if ((eoi_val & 1) && __x_trig_page(xd))
98 __x_writeq(0, __x_trig_page(xd));
99 }
100 }
101}
102
103enum {
104 scan_fetch,
105 scan_poll,
106 scan_eoi,
107};
108
109static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
110 u8 pending, int scan_type)
111{
112 u32 hirq = 0;
113 u8 prio = 0xff;
114
115 /* Find highest pending priority */
116 while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
117 struct xive_q *q;
118 u32 idx, toggle;
119 __be32 *qpage;
120
121 /*
122 * If pending is 0 this will return 0xff which is what
123 * we want
124 */
125 prio = ffs(pending) - 1;
126
127 /*
128 * If the most favoured prio we found pending is less
129 * favored (or equal) than a pending IPI, we return
130 * the IPI instead.
131 *
132 * Note: If pending was 0 and mfrr is 0xff, we will
133 * not spurriously take an IPI because mfrr cannot
134 * then be smaller than cppr.
135 */
136 if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
137 prio = xc->mfrr;
138 hirq = XICS_IPI;
139 break;
140 }
141
142 /* Don't scan past the guest cppr */
143 if (prio >= xc->cppr || prio > 7)
144 break;
145
146 /* Grab queue and pointers */
147 q = &xc->queues[prio];
148 idx = q->idx;
149 toggle = q->toggle;
150
151 /*
152 * Snapshot the queue page. The test further down for EOI
153 * must use the same "copy" that was used by __xive_read_eq
154 * since qpage can be set concurrently and we don't want
155 * to miss an EOI.
156 */
157 qpage = READ_ONCE(q->qpage);
158
159skip_ipi:
160 /*
161 * Try to fetch from the queue. Will return 0 for a
162 * non-queueing priority (ie, qpage = 0).
163 */
164 hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
165
166 /*
167 * If this was a signal for an MFFR change done by
168 * H_IPI we skip it. Additionally, if we were fetching
169 * we EOI it now, thus re-enabling reception of a new
170 * such signal.
171 *
172 * We also need to do that if prio is 0 and we had no
173 * page for the queue. In this case, we have non-queued
174 * IPI that needs to be EOId.
175 *
176 * This is safe because if we have another pending MFRR
177 * change that wasn't observed above, the Q bit will have
178 * been set and another occurrence of the IPI will trigger.
179 */
180 if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
181 if (scan_type == scan_fetch)
182 GLUE(X_PFX,source_eoi)(xc->vp_ipi,
183 &xc->vp_ipi_data);
184 /* Loop back on same queue with updated idx/toggle */
185#ifdef XIVE_RUNTIME_CHECKS
186 WARN_ON(hirq && hirq != XICS_IPI);
187#endif
188 if (hirq)
189 goto skip_ipi;
190 }
191
192 /* If fetching, update queue pointers */
193 if (scan_type == scan_fetch) {
194 q->idx = idx;
195 q->toggle = toggle;
196 }
197
198 /* Something found, stop searching */
199 if (hirq)
200 break;
201
202 /* Clear the pending bit on the now empty queue */
203 pending &= ~(1 << prio);
204
205 /*
206 * Check if the queue count needs adjusting due to
207 * interrupts being moved away.
208 */
209 if (atomic_read(&q->pending_count)) {
210 int p = atomic_xchg(&q->pending_count, 0);
211 if (p) {
212#ifdef XIVE_RUNTIME_CHECKS
213 WARN_ON(p > atomic_read(&q->count));
214#endif
215 atomic_sub(p, &q->count);
216 }
217 }
218 }
219
220 /* If we are just taking a "peek", do nothing else */
221 if (scan_type == scan_poll)
222 return hirq;
223
224 /* Update the pending bits */
225 xc->pending = pending;
226
227 /*
228 * If this is an EOI that's it, no CPPR adjustment done here,
229 * all we needed was cleanup the stale pending bits and check
230 * if there's anything left.
231 */
232 if (scan_type == scan_eoi)
233 return hirq;
234
235 /*
236 * If we found an interrupt, adjust what the guest CPPR should
237 * be as if we had just fetched that interrupt from HW.
238 */
239 if (hirq)
240 xc->cppr = prio;
241 /*
242 * If it was an IPI the HW CPPR might have been lowered too much
243 * as the HW interrupt we use for IPIs is routed to priority 0.
244 *
245 * We re-sync it here.
246 */
247 if (xc->cppr != xc->hw_cppr) {
248 xc->hw_cppr = xc->cppr;
249 __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
250 }
251
252 return hirq;
253}
254
255X_STATIC unsigned long GLUE(X_PFX,h_xirr)(struct kvm_vcpu *vcpu)
256{
257 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
258 u8 old_cppr;
259 u32 hirq;
260
261 pr_devel("H_XIRR\n");
262
263 xc->GLUE(X_STAT_PFX,h_xirr)++;
264
265 /* First collect pending bits from HW */
266 GLUE(X_PFX,ack_pending)(xc);
267
268 /*
269 * Cleanup the old-style bits if needed (they may have been
270 * set by pull or an escalation interrupts).
271 */
272 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions))
273 clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
274 &vcpu->arch.pending_exceptions);
275
276 pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
277 xc->pending, xc->hw_cppr, xc->cppr);
278
279 /* Grab previous CPPR and reverse map it */
280 old_cppr = xive_prio_to_guest(xc->cppr);
281
282 /* Scan for actual interrupts */
283 hirq = GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_fetch);
284
285 pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
286 hirq, xc->hw_cppr, xc->cppr);
287
288#ifdef XIVE_RUNTIME_CHECKS
289 /* That should never hit */
290 if (hirq & 0xff000000)
291 pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
292#endif
293
294 /*
295 * XXX We could check if the interrupt is masked here and
296 * filter it. If we chose to do so, we would need to do:
297 *
298 * if (masked) {
299 * lock();
300 * if (masked) {
301 * old_Q = true;
302 * hirq = 0;
303 * }
304 * unlock();
305 * }
306 */
307
308 /* Return interrupt and old CPPR in GPR4 */
309 vcpu->arch.gpr[4] = hirq | (old_cppr << 24);
310
311 return H_SUCCESS;
312}
313
314X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server)
315{
316 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
317 u8 pending = xc->pending;
318 u32 hirq;
319 u8 pipr;
320
321 pr_devel("H_IPOLL(server=%ld)\n", server);
322
323 xc->GLUE(X_STAT_PFX,h_ipoll)++;
324
325 /* Grab the target VCPU if not the current one */
326 if (xc->server_num != server) {
327 vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
328 if (!vcpu)
329 return H_PARAMETER;
330 xc = vcpu->arch.xive_vcpu;
331
332 /* Scan all priorities */
333 pending = 0xff;
334 } else {
335 /* Grab pending interrupt if any */
336 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
337 if (pipr < 8)
338 pending |= 1 << pipr;
339 }
340
341 hirq = GLUE(X_PFX,scan_interrupts)(xc, pending, scan_poll);
342
343 /* Return interrupt and old CPPR in GPR4 */
344 vcpu->arch.gpr[4] = hirq | (xc->cppr << 24);
345
346 return H_SUCCESS;
347}
348
349static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
350{
351 u8 pending, prio;
352
353 pending = xc->pending;
354 if (xc->mfrr != 0xff) {
355 if (xc->mfrr < 8)
356 pending |= 1 << xc->mfrr;
357 else
358 pending |= 0x80;
359 }
360 if (!pending)
361 return;
362 prio = ffs(pending) - 1;
363
364 __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
365}
366
367X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
368{
369 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
370 u8 old_cppr;
371
372 pr_devel("H_CPPR(cppr=%ld)\n", cppr);
373
374 xc->GLUE(X_STAT_PFX,h_cppr)++;
375
376 /* Map CPPR */
377 cppr = xive_prio_from_guest(cppr);
378
379 /* Remember old and update SW state */
380 old_cppr = xc->cppr;
381 xc->cppr = cppr;
382
383 /*
384 * We are masking less, we need to look for pending things
385 * to deliver and set VP pending bits accordingly to trigger
386 * a new interrupt otherwise we might miss MFRR changes for
387 * which we have optimized out sending an IPI signal.
388 */
389 if (cppr > old_cppr)
390 GLUE(X_PFX,push_pending_to_hw)(xc);
391
392 /* Apply new CPPR */
393 xc->hw_cppr = cppr;
394 __x_writeb(cppr, __x_tima + TM_QW1_OS + TM_CPPR);
395
396 return H_SUCCESS;
397}
398
399X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
400{
401 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
402 struct kvmppc_xive_src_block *sb;
403 struct kvmppc_xive_irq_state *state;
404 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
405 struct xive_irq_data *xd;
406 u8 new_cppr = xirr >> 24;
407 u32 irq = xirr & 0x00ffffff, hw_num;
408 u16 src;
409 int rc = 0;
410
411 pr_devel("H_EOI(xirr=%08lx)\n", xirr);
412
413 xc->GLUE(X_STAT_PFX,h_eoi)++;
414
415 xc->cppr = xive_prio_from_guest(new_cppr);
416
417 /*
418 * IPIs are synthetized from MFRR and thus don't need
419 * any special EOI handling. The underlying interrupt
420 * used to signal MFRR changes is EOId when fetched from
421 * the queue.
422 */
423 if (irq == XICS_IPI || irq == 0)
424 goto bail;
425
426 /* Find interrupt source */
427 sb = kvmppc_xive_find_source(xive, irq, &src);
428 if (!sb) {
429 pr_devel(" source not found !\n");
430 rc = H_PARAMETER;
431 goto bail;
432 }
433 state = &sb->irq_state[src];
434 kvmppc_xive_select_irq(state, &hw_num, &xd);
435
436 state->in_eoi = true;
437 mb();
438
439again:
440 if (state->guest_priority == MASKED) {
441 arch_spin_lock(&sb->lock);
442 if (state->guest_priority != MASKED) {
443 arch_spin_unlock(&sb->lock);
444 goto again;
445 }
446 pr_devel(" EOI on saved P...\n");
447
448 /* Clear old_p, that will cause unmask to perform an EOI */
449 state->old_p = false;
450
451 arch_spin_unlock(&sb->lock);
452 } else {
453 pr_devel(" EOI on source...\n");
454
455 /* Perform EOI on the source */
456 GLUE(X_PFX,source_eoi)(hw_num, xd);
457
458 /* If it's an emulated LSI, check level and resend */
459 if (state->lsi && state->asserted)
460 __x_writeq(0, __x_trig_page(xd));
461
462 }
463
464 mb();
465 state->in_eoi = false;
466bail:
467
468 /* Re-evaluate pending IRQs and update HW */
469 GLUE(X_PFX,scan_interrupts)(xc, xc->pending, scan_eoi);
470 GLUE(X_PFX,push_pending_to_hw)(xc);
471 pr_devel(" after scan pending=%02x\n", xc->pending);
472
473 /* Apply new CPPR */
474 xc->hw_cppr = xc->cppr;
475 __x_writeb(xc->cppr, __x_tima + TM_QW1_OS + TM_CPPR);
476
477 return rc;
478}
479
480X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
481 unsigned long mfrr)
482{
483 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
484
485 pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
486
487 xc->GLUE(X_STAT_PFX,h_ipi)++;
488
489 /* Find target */
490 vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
491 if (!vcpu)
492 return H_PARAMETER;
493 xc = vcpu->arch.xive_vcpu;
494
495 /* Locklessly write over MFRR */
496 xc->mfrr = mfrr;
497
498 /* Shoot the IPI if most favored than target cppr */
499 if (mfrr < xc->cppr)
500 __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
501
502 return H_SUCCESS;
503}
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index 5a9a10b90762..3f1be85a83bc 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -12,6 +12,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
12#endif 12#endif
13#ifdef CONFIG_KVM_XICS 13#ifdef CONFIG_KVM_XICS
14 ret = ret || (kvm->arch.xics != NULL); 14 ret = ret || (kvm->arch.xics != NULL);
15 ret = ret || (kvm->arch.xive != NULL);
15#endif 16#endif
16 smp_rmb(); 17 smp_rmb();
17 return ret; 18 return ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1ee22a910074..f7cf2cd564ef 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,6 +38,8 @@
38#include <asm/irqflags.h> 38#include <asm/irqflags.h>
39#include <asm/iommu.h> 39#include <asm/iommu.h>
40#include <asm/switch_to.h> 40#include <asm/switch_to.h>
41#include <asm/xive.h>
42
41#include "timing.h" 43#include "timing.h"
42#include "irq.h" 44#include "irq.h"
43#include "../mm/mmu_decl.h" 45#include "../mm/mmu_decl.h"
@@ -697,7 +699,10 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
697 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); 699 kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
698 break; 700 break;
699 case KVMPPC_IRQ_XICS: 701 case KVMPPC_IRQ_XICS:
700 kvmppc_xics_free_icp(vcpu); 702 if (xive_enabled())
703 kvmppc_xive_cleanup_vcpu(vcpu);
704 else
705 kvmppc_xics_free_icp(vcpu);
701 break; 706 break;
702 } 707 }
703 708
@@ -1522,8 +1527,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
1522 1527
1523 r = -EPERM; 1528 r = -EPERM;
1524 dev = kvm_device_from_filp(f.file); 1529 dev = kvm_device_from_filp(f.file);
1525 if (dev) 1530 if (dev) {
1526 r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); 1531 if (xive_enabled())
1532 r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
1533 else
1534 r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
1535 }
1527 1536
1528 fdput(f); 1537 fdput(f);
1529 break; 1538 break;
@@ -1547,7 +1556,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
1547 return true; 1556 return true;
1548#endif 1557#endif
1549#ifdef CONFIG_KVM_XICS 1558#ifdef CONFIG_KVM_XICS
1550 if (kvm->arch.xics) 1559 if (kvm->arch.xics || kvm->arch.xive)
1551 return true; 1560 return true;
1552#endif 1561#endif
1553 return false; 1562 return false;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 7925a9d72cca..59684b4af4d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -967,3 +967,4 @@ EXPORT_SYMBOL_GPL(opal_leds_set_ind);
967EXPORT_SYMBOL_GPL(opal_write_oppanel_async); 967EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
968/* Export this for KVM */ 968/* Export this for KVM */
969EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 969EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
970EXPORT_SYMBOL_GPL(opal_int_eoi);
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 6a98efb14264..913825086b8d 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -46,13 +46,15 @@
46#endif 46#endif
47 47
48bool __xive_enabled; 48bool __xive_enabled;
49EXPORT_SYMBOL_GPL(__xive_enabled);
49bool xive_cmdline_disabled; 50bool xive_cmdline_disabled;
50 51
51/* We use only one priority for now */ 52/* We use only one priority for now */
52static u8 xive_irq_priority; 53static u8 xive_irq_priority;
53 54
54/* TIMA */ 55/* TIMA exported to KVM */
55void __iomem *xive_tima; 56void __iomem *xive_tima;
57EXPORT_SYMBOL_GPL(xive_tima);
56u32 xive_tima_offset; 58u32 xive_tima_offset;
57 59
58/* Backend ops */ 60/* Backend ops */
@@ -345,8 +347,11 @@ static void xive_irq_eoi(struct irq_data *d)
345 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n", 347 DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
346 d->irq, irqd_to_hwirq(d), xc->pending_prio); 348 d->irq, irqd_to_hwirq(d), xc->pending_prio);
347 349
348 /* EOI the source if it hasn't been disabled */ 350 /*
349 if (!irqd_irq_disabled(d)) 351 * EOI the source if it hasn't been disabled and hasn't
352 * been passed-through to a KVM guest
353 */
354 if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
350 xive_do_source_eoi(irqd_to_hwirq(d), xd); 355 xive_do_source_eoi(irqd_to_hwirq(d), xd);
351 356
352 /* 357 /*
@@ -689,9 +694,14 @@ static int xive_irq_set_affinity(struct irq_data *d,
689 694
690 old_target = xd->target; 695 old_target = xd->target;
691 696
692 rc = xive_ops->configure_irq(hw_irq, 697 /*
693 get_hard_smp_processor_id(target), 698 * Only configure the irq if it's not currently passed-through to
694 xive_irq_priority, d->irq); 699 * a KVM guest
700 */
701 if (!irqd_is_forwarded_to_vcpu(d))
702 rc = xive_ops->configure_irq(hw_irq,
703 get_hard_smp_processor_id(target),
704 xive_irq_priority, d->irq);
695 if (rc < 0) { 705 if (rc < 0) {
696 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq); 706 pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
697 return rc; 707 return rc;
@@ -771,6 +781,123 @@ static int xive_irq_retrigger(struct irq_data *d)
771 return 1; 781 return 1;
772} 782}
773 783
784static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
785{
786 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
787 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
788 int rc;
789 u8 pq;
790
791 /*
792 * We only support this on interrupts that do not require
793 * firmware calls for masking and unmasking
794 */
795 if (xd->flags & XIVE_IRQ_FLAG_MASK_FW)
796 return -EIO;
797
798 /*
799 * This is called by KVM with state non-NULL for enabling
800 * pass-through or NULL for disabling it
801 */
802 if (state) {
803 irqd_set_forwarded_to_vcpu(d);
804
805 /* Set it to PQ=10 state to prevent further sends */
806 pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_10);
807
808 /* No target ? nothing to do */
809 if (xd->target == XIVE_INVALID_TARGET) {
810 /*
811 * An untargetted interrupt should have been
812 * also masked at the source
813 */
814 WARN_ON(pq & 2);
815
816 return 0;
817 }
818
819 /*
820 * If P was set, adjust state to PQ=11 to indicate
821 * that a resend is needed for the interrupt to reach
822 * the guest. Also remember the value of P.
823 *
824 * This also tells us that it's in flight to a host queue
825 * or has already been fetched but hasn't been EOIed yet
826 * by the host. This it's potentially using up a host
827 * queue slot. This is important to know because as long
828 * as this is the case, we must not hard-unmask it when
829 * "returning" that interrupt to the host.
830 *
831 * This saved_p is cleared by the host EOI, when we know
832 * for sure the queue slot is no longer in use.
833 */
834 if (pq & 2) {
835 pq = xive_poke_esb(xd, XIVE_ESB_SET_PQ_11);
836 xd->saved_p = true;
837
838 /*
839 * Sync the XIVE source HW to ensure the interrupt
840 * has gone through the EAS before we change its
841 * target to the guest. That should guarantee us
842 * that we *will* eventually get an EOI for it on
843 * the host. Otherwise there would be a small window
844 * for P to be seen here but the interrupt going
845 * to the guest queue.
846 */
847 if (xive_ops->sync_source)
848 xive_ops->sync_source(hw_irq);
849 } else
850 xd->saved_p = false;
851 } else {
852 irqd_clr_forwarded_to_vcpu(d);
853
854 /* No host target ? hard mask and return */
855 if (xd->target == XIVE_INVALID_TARGET) {
856 xive_do_source_set_mask(xd, true);
857 return 0;
858 }
859
860 /*
861 * Sync the XIVE source HW to ensure the interrupt
862 * has gone through the EAS before we change its
863 * target to the host.
864 */
865 if (xive_ops->sync_source)
866 xive_ops->sync_source(hw_irq);
867
868 /*
869 * By convention we are called with the interrupt in
870 * a PQ=10 or PQ=11 state, ie, it won't fire and will
871 * have latched in Q whether there's a pending HW
872 * interrupt or not.
873 *
874 * First reconfigure the target.
875 */
876 rc = xive_ops->configure_irq(hw_irq,
877 get_hard_smp_processor_id(xd->target),
878 xive_irq_priority, d->irq);
879 if (rc)
880 return rc;
881
882 /*
883 * Then if saved_p is not set, effectively re-enable the
884 * interrupt with an EOI. If it is set, we know there is
885 * still a message in a host queue somewhere that will be
886 * EOId eventually.
887 *
888 * Note: We don't check irqd_irq_disabled(). Effectively,
889 * we *will* let the irq get through even if masked if the
890 * HW is still firing it in order to deal with the whole
891 * saved_p business properly. If the interrupt triggers
892 * while masked, the generic code will re-mask it anyway.
893 */
894 if (!xd->saved_p)
895 xive_do_source_eoi(hw_irq, xd);
896
897 }
898 return 0;
899}
900
774static struct irq_chip xive_irq_chip = { 901static struct irq_chip xive_irq_chip = {
775 .name = "XIVE-IRQ", 902 .name = "XIVE-IRQ",
776 .irq_startup = xive_irq_startup, 903 .irq_startup = xive_irq_startup,
@@ -781,12 +908,14 @@ static struct irq_chip xive_irq_chip = {
781 .irq_set_affinity = xive_irq_set_affinity, 908 .irq_set_affinity = xive_irq_set_affinity,
782 .irq_set_type = xive_irq_set_type, 909 .irq_set_type = xive_irq_set_type,
783 .irq_retrigger = xive_irq_retrigger, 910 .irq_retrigger = xive_irq_retrigger,
911 .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
784}; 912};
785 913
786bool is_xive_irq(struct irq_chip *chip) 914bool is_xive_irq(struct irq_chip *chip)
787{ 915{
788 return chip == &xive_irq_chip; 916 return chip == &xive_irq_chip;
789} 917}
918EXPORT_SYMBOL_GPL(is_xive_irq);
790 919
791void xive_cleanup_irq_data(struct xive_irq_data *xd) 920void xive_cleanup_irq_data(struct xive_irq_data *xd)
792{ 921{
@@ -801,6 +930,7 @@ void xive_cleanup_irq_data(struct xive_irq_data *xd)
801 xd->trig_mmio = NULL; 930 xd->trig_mmio = NULL;
802 } 931 }
803} 932}
933EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
804 934
805static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) 935static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
806{ 936{
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1a726229a427..ab9ecce61ee5 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -31,6 +31,7 @@
31#include <asm/xive.h> 31#include <asm/xive.h>
32#include <asm/xive-regs.h> 32#include <asm/xive-regs.h>
33#include <asm/opal.h> 33#include <asm/opal.h>
34#include <asm/kvm_ppc.h>
34 35
35#include "xive-internal.h" 36#include "xive-internal.h"
36 37
@@ -95,6 +96,7 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
95 } 96 }
96 return 0; 97 return 0;
97} 98}
99EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
98 100
99int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq) 101int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
100{ 102{
@@ -108,6 +110,8 @@ int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
108 } 110 }
109 return rc == 0 ? 0 : -ENXIO; 111 return rc == 0 ? 0 : -ENXIO;
110} 112}
113EXPORT_SYMBOL_GPL(xive_native_configure_irq);
114
111 115
112/* This can be called multiple time to change a queue configuration */ 116/* This can be called multiple time to change a queue configuration */
113int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, 117int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
@@ -172,6 +176,7 @@ int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
172fail: 176fail:
173 return rc; 177 return rc;
174} 178}
179EXPORT_SYMBOL_GPL(xive_native_configure_queue);
175 180
176static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio) 181static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
177{ 182{
@@ -192,6 +197,7 @@ void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
192{ 197{
193 __xive_native_disable_queue(vp_id, q, prio); 198 __xive_native_disable_queue(vp_id, q, prio);
194} 199}
200EXPORT_SYMBOL_GPL(xive_native_disable_queue);
195 201
196static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio) 202static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
197{ 203{
@@ -262,6 +268,7 @@ static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
262 } 268 }
263 return 0; 269 return 0;
264} 270}
271#endif /* CONFIG_SMP */
265 272
266u32 xive_native_alloc_irq(void) 273u32 xive_native_alloc_irq(void)
267{ 274{
@@ -277,6 +284,7 @@ u32 xive_native_alloc_irq(void)
277 return 0; 284 return 0;
278 return rc; 285 return rc;
279} 286}
287EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
280 288
281void xive_native_free_irq(u32 irq) 289void xive_native_free_irq(u32 irq)
282{ 290{
@@ -287,7 +295,9 @@ void xive_native_free_irq(u32 irq)
287 msleep(1); 295 msleep(1);
288 } 296 }
289} 297}
298EXPORT_SYMBOL_GPL(xive_native_free_irq);
290 299
300#ifdef CONFIG_SMP
291static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) 301static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
292{ 302{
293 s64 rc; 303 s64 rc;
@@ -383,7 +393,7 @@ static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
383 return; 393 return;
384 394
385 /* Enable the pool VP */ 395 /* Enable the pool VP */
386 vp = xive_pool_vps + get_hard_smp_processor_id(cpu); 396 vp = xive_pool_vps + cpu;
387 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp); 397 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
388 for (;;) { 398 for (;;) {
389 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0); 399 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
@@ -428,7 +438,7 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
428 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX); 438 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
429 439
430 /* Disable it */ 440 /* Disable it */
431 vp = xive_pool_vps + get_hard_smp_processor_id(cpu); 441 vp = xive_pool_vps + cpu;
432 for (;;) { 442 for (;;) {
433 rc = opal_xive_set_vp_info(vp, 0, 0); 443 rc = opal_xive_set_vp_info(vp, 0, 0);
434 if (rc != OPAL_BUSY) 444 if (rc != OPAL_BUSY)
@@ -437,10 +447,11 @@ static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
437 } 447 }
438} 448}
439 449
440static void xive_native_sync_source(u32 hw_irq) 450void xive_native_sync_source(u32 hw_irq)
441{ 451{
442 opal_xive_sync(XIVE_SYNC_EAS, hw_irq); 452 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
443} 453}
454EXPORT_SYMBOL_GPL(xive_native_sync_source);
444 455
445static const struct xive_ops xive_native_ops = { 456static const struct xive_ops xive_native_ops = {
446 .populate_irq_data = xive_native_populate_irq_data, 457 .populate_irq_data = xive_native_populate_irq_data,
@@ -501,10 +512,24 @@ static bool xive_parse_provisioning(struct device_node *np)
501 return true; 512 return true;
502} 513}
503 514
515static void xive_native_setup_pools(void)
516{
517 /* Allocate a pool big enough */
518 pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
519
520 xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
521 if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
522 pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
523
524 pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
525 xive_pool_vps, nr_cpu_ids);
526}
527
504u32 xive_native_default_eq_shift(void) 528u32 xive_native_default_eq_shift(void)
505{ 529{
506 return xive_queue_shift; 530 return xive_queue_shift;
507} 531}
532EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
508 533
509bool xive_native_init(void) 534bool xive_native_init(void)
510{ 535{
@@ -514,7 +539,7 @@ bool xive_native_init(void)
514 struct property *prop; 539 struct property *prop;
515 u8 max_prio = 7; 540 u8 max_prio = 7;
516 const __be32 *p; 541 const __be32 *p;
517 u32 val; 542 u32 val, cpu;
518 s64 rc; 543 s64 rc;
519 544
520 if (xive_cmdline_disabled) 545 if (xive_cmdline_disabled)
@@ -550,7 +575,11 @@ bool xive_native_init(void)
550 break; 575 break;
551 } 576 }
552 577
553 /* Grab size of provisioning pages */ 578 /* Configure Thread Management areas for KVM */
579 for_each_possible_cpu(cpu)
580 kvmppc_set_xive_tima(cpu, r.start, tima);
581
582 /* Grab size of provisionning pages */
554 xive_parse_provisioning(np); 583 xive_parse_provisioning(np);
555 584
556 /* Switch the XIVE to exploitation mode */ 585 /* Switch the XIVE to exploitation mode */
@@ -560,6 +589,9 @@ bool xive_native_init(void)
560 return false; 589 return false;
561 } 590 }
562 591
592 /* Setup some dummy HV pool VPs */
593 xive_native_setup_pools();
594
563 /* Initialize XIVE core with our backend */ 595 /* Initialize XIVE core with our backend */
564 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS, 596 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
565 max_prio)) { 597 max_prio)) {
@@ -638,3 +670,47 @@ void xive_native_free_vp_block(u32 vp_base)
638 pr_warn("OPAL error %lld freeing VP block\n", rc); 670 pr_warn("OPAL error %lld freeing VP block\n", rc);
639} 671}
640EXPORT_SYMBOL_GPL(xive_native_free_vp_block); 672EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
673
674int xive_native_enable_vp(u32 vp_id)
675{
676 s64 rc;
677
678 for (;;) {
679 rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
680 if (rc != OPAL_BUSY)
681 break;
682 msleep(1);
683 }
684 return rc ? -EIO : 0;
685}
686EXPORT_SYMBOL_GPL(xive_native_enable_vp);
687
688int xive_native_disable_vp(u32 vp_id)
689{
690 s64 rc;
691
692 for (;;) {
693 rc = opal_xive_set_vp_info(vp_id, 0, 0);
694 if (rc != OPAL_BUSY)
695 break;
696 msleep(1);
697 }
698 return rc ? -EIO : 0;
699}
700EXPORT_SYMBOL_GPL(xive_native_disable_vp);
701
702int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
703{
704 __be64 vp_cam_be;
705 __be32 vp_chip_id_be;
706 s64 rc;
707
708 rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
709 if (rc)
710 return -EIO;
711 *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
712 *out_chip_id = be32_to_cpu(vp_chip_id_be);
713
714 return 0;
715}
716EXPORT_SYMBOL_GPL(xive_native_get_vp_info);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f5bddf92faba..9c761fea0c98 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1020,6 +1020,8 @@ struct kvm_x86_ops {
1020 void (*enable_log_dirty_pt_masked)(struct kvm *kvm, 1020 void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
1021 struct kvm_memory_slot *slot, 1021 struct kvm_memory_slot *slot,
1022 gfn_t offset, unsigned long mask); 1022 gfn_t offset, unsigned long mask);
1023 int (*write_log_dirty)(struct kvm_vcpu *vcpu);
1024
1023 /* pmu operations of sub-arch */ 1025 /* pmu operations of sub-arch */
1024 const struct kvm_pmu_ops *pmu_ops; 1026 const struct kvm_pmu_ops *pmu_ops;
1025 1027
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 558676538fca..5d3376f67794 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1498,6 +1498,21 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1498 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); 1498 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1499} 1499}
1500 1500
1501/**
1502 * kvm_arch_write_log_dirty - emulate dirty page logging
1503 * @vcpu: Guest mode vcpu
1504 *
1505 * Emulate arch specific page modification logging for the
1506 * nested hypervisor
1507 */
1508int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
1509{
1510 if (kvm_x86_ops->write_log_dirty)
1511 return kvm_x86_ops->write_log_dirty(vcpu);
1512
1513 return 0;
1514}
1515
1501bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, 1516bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
1502 struct kvm_memory_slot *slot, u64 gfn) 1517 struct kvm_memory_slot *slot, u64 gfn)
1503{ 1518{
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d8ccb32f7308..27975807cc64 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -202,4 +202,5 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
202void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); 202void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
203bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, 203bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
204 struct kvm_memory_slot *slot, u64 gfn); 204 struct kvm_memory_slot *slot, u64 gfn);
205int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
205#endif 206#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 314d2071b337..56241746abbd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -226,6 +226,10 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
226 if (level == walker->level && write_fault && 226 if (level == walker->level && write_fault &&
227 !(pte & PT_GUEST_DIRTY_MASK)) { 227 !(pte & PT_GUEST_DIRTY_MASK)) {
228 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); 228 trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
229#if PTTYPE == PTTYPE_EPT
230 if (kvm_arch_write_log_dirty(vcpu))
231 return -EINVAL;
232#endif
229 pte |= PT_GUEST_DIRTY_MASK; 233 pte |= PT_GUEST_DIRTY_MASK;
230 } 234 }
231 if (pte == orig_pte) 235 if (pte == orig_pte)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c5fd459c4043..c6f4ad44aa95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -248,6 +248,7 @@ struct __packed vmcs12 {
248 u64 xss_exit_bitmap; 248 u64 xss_exit_bitmap;
249 u64 guest_physical_address; 249 u64 guest_physical_address;
250 u64 vmcs_link_pointer; 250 u64 vmcs_link_pointer;
251 u64 pml_address;
251 u64 guest_ia32_debugctl; 252 u64 guest_ia32_debugctl;
252 u64 guest_ia32_pat; 253 u64 guest_ia32_pat;
253 u64 guest_ia32_efer; 254 u64 guest_ia32_efer;
@@ -369,6 +370,7 @@ struct __packed vmcs12 {
369 u16 guest_ldtr_selector; 370 u16 guest_ldtr_selector;
370 u16 guest_tr_selector; 371 u16 guest_tr_selector;
371 u16 guest_intr_status; 372 u16 guest_intr_status;
373 u16 guest_pml_index;
372 u16 host_es_selector; 374 u16 host_es_selector;
373 u16 host_cs_selector; 375 u16 host_cs_selector;
374 u16 host_ss_selector; 376 u16 host_ss_selector;
@@ -407,6 +409,7 @@ struct nested_vmx {
407 /* Has the level1 guest done vmxon? */ 409 /* Has the level1 guest done vmxon? */
408 bool vmxon; 410 bool vmxon;
409 gpa_t vmxon_ptr; 411 gpa_t vmxon_ptr;
412 bool pml_full;
410 413
411 /* The guest-physical address of the current VMCS L1 keeps for L2 */ 414 /* The guest-physical address of the current VMCS L1 keeps for L2 */
412 gpa_t current_vmptr; 415 gpa_t current_vmptr;
@@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
742 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector), 745 FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
743 FIELD(GUEST_TR_SELECTOR, guest_tr_selector), 746 FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
744 FIELD(GUEST_INTR_STATUS, guest_intr_status), 747 FIELD(GUEST_INTR_STATUS, guest_intr_status),
748 FIELD(GUEST_PML_INDEX, guest_pml_index),
745 FIELD(HOST_ES_SELECTOR, host_es_selector), 749 FIELD(HOST_ES_SELECTOR, host_es_selector),
746 FIELD(HOST_CS_SELECTOR, host_cs_selector), 750 FIELD(HOST_CS_SELECTOR, host_cs_selector),
747 FIELD(HOST_SS_SELECTOR, host_ss_selector), 751 FIELD(HOST_SS_SELECTOR, host_ss_selector),
@@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
767 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap), 771 FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
768 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address), 772 FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
769 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer), 773 FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
774 FIELD64(PML_ADDRESS, pml_address),
770 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl), 775 FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
771 FIELD64(GUEST_IA32_PAT, guest_ia32_pat), 776 FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
772 FIELD64(GUEST_IA32_EFER, guest_ia32_efer), 777 FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@ -1314,6 +1319,11 @@ static inline bool report_flexpriority(void)
1314 return flexpriority_enabled; 1319 return flexpriority_enabled;
1315} 1320}
1316 1321
1322static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
1323{
1324 return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
1325}
1326
1317static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) 1327static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
1318{ 1328{
1319 return vmcs12->cpu_based_vm_exec_control & bit; 1329 return vmcs12->cpu_based_vm_exec_control & bit;
@@ -1348,6 +1358,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
1348 vmx_xsaves_supported(); 1358 vmx_xsaves_supported();
1349} 1359}
1350 1360
1361static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
1362{
1363 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
1364}
1365
1351static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) 1366static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
1352{ 1367{
1353 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); 1368 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@@ -2751,8 +2766,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
2751 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | 2766 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
2752 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | 2767 VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
2753 VMX_EPT_1GB_PAGE_BIT; 2768 VMX_EPT_1GB_PAGE_BIT;
2754 if (enable_ept_ad_bits) 2769 if (enable_ept_ad_bits) {
2770 vmx->nested.nested_vmx_secondary_ctls_high |=
2771 SECONDARY_EXEC_ENABLE_PML;
2755 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; 2772 vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
2773 }
2756 } else 2774 } else
2757 vmx->nested.nested_vmx_ept_caps = 0; 2775 vmx->nested.nested_vmx_ept_caps = 0;
2758 2776
@@ -8114,7 +8132,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
8114 case EXIT_REASON_PREEMPTION_TIMER: 8132 case EXIT_REASON_PREEMPTION_TIMER:
8115 return false; 8133 return false;
8116 case EXIT_REASON_PML_FULL: 8134 case EXIT_REASON_PML_FULL:
8117 /* We don't expose PML support to L1. */ 8135 /* We emulate PML support to L1. */
8118 return false; 8136 return false;
8119 default: 8137 default:
8120 return true; 8138 return true;
@@ -9364,13 +9382,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
9364 struct x86_exception *fault) 9382 struct x86_exception *fault)
9365{ 9383{
9366 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 9384 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
9385 struct vcpu_vmx *vmx = to_vmx(vcpu);
9367 u32 exit_reason; 9386 u32 exit_reason;
9387 unsigned long exit_qualification = vcpu->arch.exit_qualification;
9368 9388
9369 if (fault->error_code & PFERR_RSVD_MASK) 9389 if (vmx->nested.pml_full) {
9390 exit_reason = EXIT_REASON_PML_FULL;
9391 vmx->nested.pml_full = false;
9392 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
9393 } else if (fault->error_code & PFERR_RSVD_MASK)
9370 exit_reason = EXIT_REASON_EPT_MISCONFIG; 9394 exit_reason = EXIT_REASON_EPT_MISCONFIG;
9371 else 9395 else
9372 exit_reason = EXIT_REASON_EPT_VIOLATION; 9396 exit_reason = EXIT_REASON_EPT_VIOLATION;
9373 nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); 9397
9398 nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
9374 vmcs12->guest_physical_address = fault->address; 9399 vmcs12->guest_physical_address = fault->address;
9375} 9400}
9376 9401
@@ -9713,6 +9738,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
9713 return 0; 9738 return 0;
9714} 9739}
9715 9740
9741static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
9742 struct vmcs12 *vmcs12)
9743{
9744 u64 address = vmcs12->pml_address;
9745 int maxphyaddr = cpuid_maxphyaddr(vcpu);
9746
9747 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
9748 if (!nested_cpu_has_ept(vmcs12) ||
9749 !IS_ALIGNED(address, 4096) ||
9750 address >> maxphyaddr)
9751 return -EINVAL;
9752 }
9753
9754 return 0;
9755}
9756
9716static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, 9757static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
9717 struct vmx_msr_entry *e) 9758 struct vmx_msr_entry *e)
9718{ 9759{
@@ -9886,7 +9927,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
9886 bool from_vmentry, u32 *entry_failure_code) 9927 bool from_vmentry, u32 *entry_failure_code)
9887{ 9928{
9888 struct vcpu_vmx *vmx = to_vmx(vcpu); 9929 struct vcpu_vmx *vmx = to_vmx(vcpu);
9889 u32 exec_control; 9930 u32 exec_control, vmcs12_exec_ctrl;
9890 9931
9891 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 9932 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
9892 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 9933 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10017,8 +10058,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
10017 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 10058 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
10018 SECONDARY_EXEC_APIC_REGISTER_VIRT); 10059 SECONDARY_EXEC_APIC_REGISTER_VIRT);
10019 if (nested_cpu_has(vmcs12, 10060 if (nested_cpu_has(vmcs12,
10020 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) 10061 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
10021 exec_control |= vmcs12->secondary_vm_exec_control; 10062 vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
10063 ~SECONDARY_EXEC_ENABLE_PML;
10064 exec_control |= vmcs12_exec_ctrl;
10065 }
10022 10066
10023 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { 10067 if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
10024 vmcs_write64(EOI_EXIT_BITMAP0, 10068 vmcs_write64(EOI_EXIT_BITMAP0,
@@ -10248,6 +10292,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10248 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) 10292 if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
10249 return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 10293 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10250 10294
10295 if (nested_vmx_check_pml_controls(vcpu, vmcs12))
10296 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10297
10251 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 10298 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
10252 vmx->nested.nested_vmx_procbased_ctls_low, 10299 vmx->nested.nested_vmx_procbased_ctls_low,
10253 vmx->nested.nested_vmx_procbased_ctls_high) || 10300 vmx->nested.nested_vmx_procbased_ctls_high) ||
@@ -10266,6 +10313,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
10266 vmx->nested.nested_vmx_entry_ctls_high)) 10313 vmx->nested.nested_vmx_entry_ctls_high))
10267 return VMXERR_ENTRY_INVALID_CONTROL_FIELD; 10314 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10268 10315
10316 if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
10317 return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
10318
10269 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || 10319 if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
10270 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || 10320 !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
10271 !nested_cr3_valid(vcpu, vmcs12->host_cr3)) 10321 !nested_cr3_valid(vcpu, vmcs12->host_cr3))
@@ -11143,6 +11193,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
11143 kvm_flush_pml_buffers(kvm); 11193 kvm_flush_pml_buffers(kvm);
11144} 11194}
11145 11195
11196static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
11197{
11198 struct vmcs12 *vmcs12;
11199 struct vcpu_vmx *vmx = to_vmx(vcpu);
11200 gpa_t gpa;
11201 struct page *page = NULL;
11202 u64 *pml_address;
11203
11204 if (is_guest_mode(vcpu)) {
11205 WARN_ON_ONCE(vmx->nested.pml_full);
11206
11207 /*
11208 * Check if PML is enabled for the nested guest.
11209 * Whether eptp bit 6 is set is already checked
11210 * as part of A/D emulation.
11211 */
11212 vmcs12 = get_vmcs12(vcpu);
11213 if (!nested_cpu_has_pml(vmcs12))
11214 return 0;
11215
11216 if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
11217 vmx->nested.pml_full = true;
11218 return 1;
11219 }
11220
11221 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
11222
11223 page = nested_get_page(vcpu, vmcs12->pml_address);
11224 if (!page)
11225 return 0;
11226
11227 pml_address = kmap(page);
11228 pml_address[vmcs12->guest_pml_index--] = gpa;
11229 kunmap(page);
11230 nested_release_page_clean(page);
11231 }
11232
11233 return 0;
11234}
11235
11146static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, 11236static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
11147 struct kvm_memory_slot *memslot, 11237 struct kvm_memory_slot *memslot,
11148 gfn_t offset, unsigned long mask) 11238 gfn_t offset, unsigned long mask)
@@ -11502,6 +11592,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11502 .slot_disable_log_dirty = vmx_slot_disable_log_dirty, 11592 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
11503 .flush_log_dirty = vmx_flush_log_dirty, 11593 .flush_log_dirty = vmx_flush_log_dirty,
11504 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, 11594 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
11595 .write_log_dirty = vmx_write_pml_buffer,
11505 11596
11506 .pre_block = vmx_pre_block, 11597 .pre_block = vmx_pre_block,
11507 .post_block = vmx_post_block, 11598 .post_block = vmx_post_block,
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 581a59ea7e34..97b8d3728b31 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -148,7 +148,6 @@ struct vgic_its {
148 gpa_t vgic_its_base; 148 gpa_t vgic_its_base;
149 149
150 bool enabled; 150 bool enabled;
151 bool initialized;
152 struct vgic_io_device iodev; 151 struct vgic_io_device iodev;
153 struct kvm_device *dev; 152 struct kvm_device *dev;
154 153
@@ -162,6 +161,9 @@ struct vgic_its {
162 u32 creadr; 161 u32 creadr;
163 u32 cwriter; 162 u32 cwriter;
164 163
164 /* migration ABI revision in use */
165 u32 abi_rev;
166
165 /* Protects the device and collection lists */ 167 /* Protects the device and collection lists */
166 struct mutex its_lock; 168 struct mutex its_lock;
167 struct list_head device_list; 169 struct list_head device_list;
@@ -283,6 +285,7 @@ extern struct static_key_false vgic_v2_cpuif_trap;
283 285
284int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); 286int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
285void kvm_vgic_early_init(struct kvm *kvm); 287void kvm_vgic_early_init(struct kvm *kvm);
288int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
286int kvm_vgic_create(struct kvm *kvm, u32 type); 289int kvm_vgic_create(struct kvm *kvm, u32 type);
287void kvm_vgic_destroy(struct kvm *kvm); 290void kvm_vgic_destroy(struct kvm *kvm);
288void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); 291void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 97cbca19430d..fffb91202bc9 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -132,6 +132,9 @@
132#define GIC_BASER_SHAREABILITY(reg, type) \ 132#define GIC_BASER_SHAREABILITY(reg, type) \
133 (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) 133 (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
134 134
135/* encode a size field of width @w containing @n - 1 units */
136#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
137
135#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) 138#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
136#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) 139#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
137#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) 140#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
@@ -156,6 +159,8 @@
156#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) 159#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
157 160
158#define GICR_PROPBASER_IDBITS_MASK (0x1f) 161#define GICR_PROPBASER_IDBITS_MASK (0x1f)
162#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
163#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
159 164
160#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) 165#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
161#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) 166#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
@@ -232,12 +237,18 @@
232#define GITS_CTLR_QUIESCENT (1U << 31) 237#define GITS_CTLR_QUIESCENT (1U << 31)
233 238
234#define GITS_TYPER_PLPIS (1UL << 0) 239#define GITS_TYPER_PLPIS (1UL << 0)
240#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
235#define GITS_TYPER_IDBITS_SHIFT 8 241#define GITS_TYPER_IDBITS_SHIFT 8
236#define GITS_TYPER_DEVBITS_SHIFT 13 242#define GITS_TYPER_DEVBITS_SHIFT 13
237#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) 243#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
238#define GITS_TYPER_PTA (1UL << 19) 244#define GITS_TYPER_PTA (1UL << 19)
239#define GITS_TYPER_HWCOLLCNT_SHIFT 24 245#define GITS_TYPER_HWCOLLCNT_SHIFT 24
240 246
247#define GITS_IIDR_REV_SHIFT 12
248#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
249#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
250#define GITS_IIDR_PRODUCTID_SHIFT 24
251
241#define GITS_CBASER_VALID (1ULL << 63) 252#define GITS_CBASER_VALID (1ULL << 63)
242#define GITS_CBASER_SHAREABILITY_SHIFT (10) 253#define GITS_CBASER_SHAREABILITY_SHIFT (10)
243#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) 254#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
@@ -290,6 +301,7 @@
290#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) 301#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
291#define GITS_BASER_ENTRY_SIZE_SHIFT (48) 302#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
292#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) 303#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
304#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
293#define GITS_BASER_SHAREABILITY_SHIFT (10) 305#define GITS_BASER_SHAREABILITY_SHIFT (10)
294#define GITS_BASER_InnerShareable \ 306#define GITS_BASER_InnerShareable \
295 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) 307 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
@@ -337,9 +349,11 @@
337#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 349#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
338#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 350#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
339#define E_ITS_MAPD_DEVICE_OOR 0x010801 351#define E_ITS_MAPD_DEVICE_OOR 0x010801
352#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
340#define E_ITS_MAPC_PROCNUM_OOR 0x010902 353#define E_ITS_MAPC_PROCNUM_OOR 0x010902
341#define E_ITS_MAPC_COLLECTION_OOR 0x010903 354#define E_ITS_MAPC_COLLECTION_OOR 0x010903
342#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 355#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
356#define E_ITS_MAPTI_ID_OOR 0x010a05
343#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 357#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
344#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 358#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
345#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 359#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2b12b2683359..8c0664309815 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -499,6 +499,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
499 return NULL; 499 return NULL;
500} 500}
501 501
502static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
503{
504 struct kvm_vcpu *tmp;
505 int idx;
506
507 kvm_for_each_vcpu(idx, tmp, vcpu->kvm)
508 if (tmp == vcpu)
509 return idx;
510 BUG();
511}
512
502#define kvm_for_each_memslot(memslot, slots) \ 513#define kvm_for_each_memslot(memslot, slots) \
503 for (memslot = &slots->memslots[0]; \ 514 for (memslot = &slots->memslots[0]; \
504 memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ 515 memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
@@ -1167,7 +1178,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1167void kvm_unregister_device_ops(u32 type); 1178void kvm_unregister_device_ops(u32 type);
1168 1179
1169extern struct kvm_device_ops kvm_mpic_ops; 1180extern struct kvm_device_ops kvm_mpic_ops;
1170extern struct kvm_device_ops kvm_xics_ops;
1171extern struct kvm_device_ops kvm_arm_vgic_v2_ops; 1181extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
1172extern struct kvm_device_ops kvm_arm_vgic_v3_ops; 1182extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
1173 1183
diff --git a/arch/arm/kvm/arm.c b/virt/kvm/arm/arm.c
index 8a31906bdc9b..3417e184c8e1 100644
--- a/arch/arm/kvm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -332,7 +332,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
332 332
333 kvm_arm_reset_debug_ptr(vcpu); 333 kvm_arm_reset_debug_ptr(vcpu);
334 334
335 return 0; 335 return kvm_vgic_vcpu_init(vcpu);
336} 336}
337 337
338void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 338void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
diff --git a/arch/arm/kvm/mmio.c b/virt/kvm/arm/mmio.c
index b6e715fd3c90..b6e715fd3c90 100644
--- a/arch/arm/kvm/mmio.c
+++ b/virt/kvm/arm/mmio.c
diff --git a/arch/arm/kvm/mmu.c b/virt/kvm/arm/mmu.c
index 313ee646480f..313ee646480f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/virt/kvm/arm/mmu.c
diff --git a/arch/arm/kvm/perf.c b/virt/kvm/arm/perf.c
index 1a3849da0b4b..1a3849da0b4b 100644
--- a/arch/arm/kvm/perf.c
+++ b/virt/kvm/arm/perf.c
diff --git a/arch/arm/kvm/psci.c b/virt/kvm/arm/psci.c
index a08d7a93aebb..a08d7a93aebb 100644
--- a/arch/arm/kvm/psci.c
+++ b/virt/kvm/arm/psci.c
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 37d8b98867d5..f7dc5ddd6847 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -7,26 +7,250 @@
7#define TRACE_SYSTEM kvm 7#define TRACE_SYSTEM kvm
8 8
9/* 9/*
10 * Tracepoints for vgic 10 * Tracepoints for entry/exit to guest
11 */ 11 */
12TRACE_EVENT(vgic_update_irq_pending, 12TRACE_EVENT(kvm_entry,
13 TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), 13 TP_PROTO(unsigned long vcpu_pc),
14 TP_ARGS(vcpu_id, irq, level), 14 TP_ARGS(vcpu_pc),
15 15
16 TP_STRUCT__entry( 16 TP_STRUCT__entry(
17 __field( unsigned long, vcpu_id ) 17 __field( unsigned long, vcpu_pc )
18 __field( __u32, irq )
19 __field( bool, level )
20 ), 18 ),
21 19
22 TP_fast_assign( 20 TP_fast_assign(
23 __entry->vcpu_id = vcpu_id; 21 __entry->vcpu_pc = vcpu_pc;
24 __entry->irq = irq; 22 ),
23
24 TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
25);
26
27TRACE_EVENT(kvm_exit,
28 TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
29 TP_ARGS(idx, exit_reason, vcpu_pc),
30
31 TP_STRUCT__entry(
32 __field( int, idx )
33 __field( unsigned int, exit_reason )
34 __field( unsigned long, vcpu_pc )
35 ),
36
37 TP_fast_assign(
38 __entry->idx = idx;
39 __entry->exit_reason = exit_reason;
40 __entry->vcpu_pc = vcpu_pc;
41 ),
42
43 TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx",
44 __print_symbolic(__entry->idx, kvm_arm_exception_type),
45 __entry->exit_reason,
46 __print_symbolic(__entry->exit_reason, kvm_arm_exception_class),
47 __entry->vcpu_pc)
48);
49
50TRACE_EVENT(kvm_guest_fault,
51 TP_PROTO(unsigned long vcpu_pc, unsigned long hsr,
52 unsigned long hxfar,
53 unsigned long long ipa),
54 TP_ARGS(vcpu_pc, hsr, hxfar, ipa),
55
56 TP_STRUCT__entry(
57 __field( unsigned long, vcpu_pc )
58 __field( unsigned long, hsr )
59 __field( unsigned long, hxfar )
60 __field( unsigned long long, ipa )
61 ),
62
63 TP_fast_assign(
64 __entry->vcpu_pc = vcpu_pc;
65 __entry->hsr = hsr;
66 __entry->hxfar = hxfar;
67 __entry->ipa = ipa;
68 ),
69
70 TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
71 __entry->ipa, __entry->hsr,
72 __entry->hxfar, __entry->vcpu_pc)
73);
74
75TRACE_EVENT(kvm_access_fault,
76 TP_PROTO(unsigned long ipa),
77 TP_ARGS(ipa),
78
79 TP_STRUCT__entry(
80 __field( unsigned long, ipa )
81 ),
82
83 TP_fast_assign(
84 __entry->ipa = ipa;
85 ),
86
87 TP_printk("IPA: %lx", __entry->ipa)
88);
89
90TRACE_EVENT(kvm_irq_line,
91 TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
92 TP_ARGS(type, vcpu_idx, irq_num, level),
93
94 TP_STRUCT__entry(
95 __field( unsigned int, type )
96 __field( int, vcpu_idx )
97 __field( int, irq_num )
98 __field( int, level )
99 ),
100
101 TP_fast_assign(
102 __entry->type = type;
103 __entry->vcpu_idx = vcpu_idx;
104 __entry->irq_num = irq_num;
25 __entry->level = level; 105 __entry->level = level;
26 ), 106 ),
27 107
28 TP_printk("VCPU: %ld, IRQ %d, level: %d", 108 TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d",
29 __entry->vcpu_id, __entry->irq, __entry->level) 109 (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" :
110 (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" :
111 (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN",
112 __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level)
113);
114
115TRACE_EVENT(kvm_mmio_emulate,
116 TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
117 unsigned long cpsr),
118 TP_ARGS(vcpu_pc, instr, cpsr),
119
120 TP_STRUCT__entry(
121 __field( unsigned long, vcpu_pc )
122 __field( unsigned long, instr )
123 __field( unsigned long, cpsr )
124 ),
125
126 TP_fast_assign(
127 __entry->vcpu_pc = vcpu_pc;
128 __entry->instr = instr;
129 __entry->cpsr = cpsr;
130 ),
131
132 TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
133 __entry->vcpu_pc, __entry->instr, __entry->cpsr)
134);
135
136TRACE_EVENT(kvm_unmap_hva,
137 TP_PROTO(unsigned long hva),
138 TP_ARGS(hva),
139
140 TP_STRUCT__entry(
141 __field( unsigned long, hva )
142 ),
143
144 TP_fast_assign(
145 __entry->hva = hva;
146 ),
147
148 TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
149);
150
151TRACE_EVENT(kvm_unmap_hva_range,
152 TP_PROTO(unsigned long start, unsigned long end),
153 TP_ARGS(start, end),
154
155 TP_STRUCT__entry(
156 __field( unsigned long, start )
157 __field( unsigned long, end )
158 ),
159
160 TP_fast_assign(
161 __entry->start = start;
162 __entry->end = end;
163 ),
164
165 TP_printk("mmu notifier unmap range: %#08lx -- %#08lx",
166 __entry->start, __entry->end)
167);
168
169TRACE_EVENT(kvm_set_spte_hva,
170 TP_PROTO(unsigned long hva),
171 TP_ARGS(hva),
172
173 TP_STRUCT__entry(
174 __field( unsigned long, hva )
175 ),
176
177 TP_fast_assign(
178 __entry->hva = hva;
179 ),
180
181 TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
182);
183
184TRACE_EVENT(kvm_age_hva,
185 TP_PROTO(unsigned long start, unsigned long end),
186 TP_ARGS(start, end),
187
188 TP_STRUCT__entry(
189 __field( unsigned long, start )
190 __field( unsigned long, end )
191 ),
192
193 TP_fast_assign(
194 __entry->start = start;
195 __entry->end = end;
196 ),
197
198 TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
199 __entry->start, __entry->end)
200);
201
202TRACE_EVENT(kvm_test_age_hva,
203 TP_PROTO(unsigned long hva),
204 TP_ARGS(hva),
205
206 TP_STRUCT__entry(
207 __field( unsigned long, hva )
208 ),
209
210 TP_fast_assign(
211 __entry->hva = hva;
212 ),
213
214 TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
215);
216
217TRACE_EVENT(kvm_set_way_flush,
218 TP_PROTO(unsigned long vcpu_pc, bool cache),
219 TP_ARGS(vcpu_pc, cache),
220
221 TP_STRUCT__entry(
222 __field( unsigned long, vcpu_pc )
223 __field( bool, cache )
224 ),
225
226 TP_fast_assign(
227 __entry->vcpu_pc = vcpu_pc;
228 __entry->cache = cache;
229 ),
230
231 TP_printk("S/W flush at 0x%016lx (cache %s)",
232 __entry->vcpu_pc, __entry->cache ? "on" : "off")
233);
234
235TRACE_EVENT(kvm_toggle_cache,
236 TP_PROTO(unsigned long vcpu_pc, bool was, bool now),
237 TP_ARGS(vcpu_pc, was, now),
238
239 TP_STRUCT__entry(
240 __field( unsigned long, vcpu_pc )
241 __field( bool, was )
242 __field( bool, now )
243 ),
244
245 TP_fast_assign(
246 __entry->vcpu_pc = vcpu_pc;
247 __entry->was = was;
248 __entry->now = now;
249 ),
250
251 TP_printk("VM op at 0x%016lx (cache was %s, now %s)",
252 __entry->vcpu_pc, __entry->was ? "on" : "off",
253 __entry->now ? "on" : "off")
30); 254);
31 255
32/* 256/*
diff --git a/virt/kvm/arm/vgic/trace.h b/virt/kvm/arm/vgic/trace.h
new file mode 100644
index 000000000000..ed3229282888
--- /dev/null
+++ b/virt/kvm/arm/vgic/trace.h
@@ -0,0 +1,37 @@
1#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_VGIC_H
3
4#include <linux/tracepoint.h>
5
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM kvm
8
9TRACE_EVENT(vgic_update_irq_pending,
10 TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
11 TP_ARGS(vcpu_id, irq, level),
12
13 TP_STRUCT__entry(
14 __field( unsigned long, vcpu_id )
15 __field( __u32, irq )
16 __field( bool, level )
17 ),
18
19 TP_fast_assign(
20 __entry->vcpu_id = vcpu_id;
21 __entry->irq = irq;
22 __entry->level = level;
23 ),
24
25 TP_printk("VCPU: %ld, IRQ %d, level: %d",
26 __entry->vcpu_id, __entry->irq, __entry->level)
27);
28
29#endif /* _TRACE_VGIC_H */
30
31#undef TRACE_INCLUDE_PATH
32#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm/vgic
33#undef TRACE_INCLUDE_FILE
34#define TRACE_INCLUDE_FILE trace
35
36/* This part must be outside protection */
37#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 25fd1b942c11..dc68e2e424ab 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -227,10 +227,27 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
227} 227}
228 228
229/** 229/**
230 * kvm_vgic_vcpu_init() - Enable the VCPU interface 230 * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs
231 * @vcpu: the VCPU which's VGIC should be enabled 231 * @vcpu: pointer to the VCPU being created and initialized
232 */ 232 */
233static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 233int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
234{
235 int ret = 0;
236 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
237
238 if (!irqchip_in_kernel(vcpu->kvm))
239 return 0;
240
241 /*
242 * If we are creating a VCPU with a GICv3 we must also register the
243 * KVM io device for the redistributor that belongs to this VCPU.
244 */
245 if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
246 ret = vgic_register_redist_iodev(vcpu);
247 return ret;
248}
249
250static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
234{ 251{
235 if (kvm_vgic_global_state.type == VGIC_V2) 252 if (kvm_vgic_global_state.type == VGIC_V2)
236 vgic_v2_enable(vcpu); 253 vgic_v2_enable(vcpu);
@@ -269,7 +286,7 @@ int vgic_init(struct kvm *kvm)
269 dist->msis_require_devid = true; 286 dist->msis_require_devid = true;
270 287
271 kvm_for_each_vcpu(i, vcpu, kvm) 288 kvm_for_each_vcpu(i, vcpu, kvm)
272 kvm_vgic_vcpu_init(vcpu); 289 kvm_vgic_vcpu_enable(vcpu);
273 290
274 ret = kvm_vgic_setup_default_irq_routing(kvm); 291 ret = kvm_vgic_setup_default_irq_routing(kvm);
275 if (ret) 292 if (ret)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8d1da1af4b09..2dff288b3a66 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -23,6 +23,7 @@
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/uaccess.h> 25#include <linux/uaccess.h>
26#include <linux/list_sort.h>
26 27
27#include <linux/irqchip/arm-gic-v3.h> 28#include <linux/irqchip/arm-gic-v3.h>
28 29
@@ -33,6 +34,12 @@
33#include "vgic.h" 34#include "vgic.h"
34#include "vgic-mmio.h" 35#include "vgic-mmio.h"
35 36
37static int vgic_its_save_tables_v0(struct vgic_its *its);
38static int vgic_its_restore_tables_v0(struct vgic_its *its);
39static int vgic_its_commit_v0(struct vgic_its *its);
40static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
41 struct kvm_vcpu *filter_vcpu);
42
36/* 43/*
37 * Creates a new (reference to a) struct vgic_irq for a given LPI. 44 * Creates a new (reference to a) struct vgic_irq for a given LPI.
38 * If this LPI is already mapped on another ITS, we increase its refcount 45 * If this LPI is already mapped on another ITS, we increase its refcount
@@ -40,10 +47,12 @@
40 * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. 47 * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq.
41 * This function returns a pointer to the _unlocked_ structure. 48 * This function returns a pointer to the _unlocked_ structure.
42 */ 49 */
43static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) 50static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
51 struct kvm_vcpu *vcpu)
44{ 52{
45 struct vgic_dist *dist = &kvm->arch.vgic; 53 struct vgic_dist *dist = &kvm->arch.vgic;
46 struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; 54 struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq;
55 int ret;
47 56
48 /* In this case there is no put, since we keep the reference. */ 57 /* In this case there is no put, since we keep the reference. */
49 if (irq) 58 if (irq)
@@ -60,6 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
60 irq->config = VGIC_CONFIG_EDGE; 69 irq->config = VGIC_CONFIG_EDGE;
61 kref_init(&irq->refcount); 70 kref_init(&irq->refcount);
62 irq->intid = intid; 71 irq->intid = intid;
72 irq->target_vcpu = vcpu;
63 73
64 spin_lock(&dist->lpi_list_lock); 74 spin_lock(&dist->lpi_list_lock);
65 75
@@ -91,6 +101,19 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
91out_unlock: 101out_unlock:
92 spin_unlock(&dist->lpi_list_lock); 102 spin_unlock(&dist->lpi_list_lock);
93 103
104 /*
105 * We "cache" the configuration table entries in our struct vgic_irq's.
106 * However we only have those structs for mapped IRQs, so we read in
107 * the respective config data from memory here upon mapping the LPI.
108 */
109 ret = update_lpi_config(kvm, irq, NULL);
110 if (ret)
111 return ERR_PTR(ret);
112
113 ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
114 if (ret)
115 return ERR_PTR(ret);
116
94 return irq; 117 return irq;
95} 118}
96 119
@@ -99,6 +122,8 @@ struct its_device {
99 122
100 /* the head for the list of ITTEs */ 123 /* the head for the list of ITTEs */
101 struct list_head itt_head; 124 struct list_head itt_head;
125 u32 num_eventid_bits;
126 gpa_t itt_addr;
102 u32 device_id; 127 u32 device_id;
103}; 128};
104 129
@@ -114,8 +139,8 @@ struct its_collection {
114#define its_is_collection_mapped(coll) ((coll) && \ 139#define its_is_collection_mapped(coll) ((coll) && \
115 ((coll)->target_addr != COLLECTION_NOT_MAPPED)) 140 ((coll)->target_addr != COLLECTION_NOT_MAPPED))
116 141
117struct its_itte { 142struct its_ite {
118 struct list_head itte_list; 143 struct list_head ite_list;
119 144
120 struct vgic_irq *irq; 145 struct vgic_irq *irq;
121 struct its_collection *collection; 146 struct its_collection *collection;
@@ -123,6 +148,50 @@ struct its_itte {
123 u32 event_id; 148 u32 event_id;
124}; 149};
125 150
151/**
152 * struct vgic_its_abi - ITS abi ops and settings
153 * @cte_esz: collection table entry size
154 * @dte_esz: device table entry size
155 * @ite_esz: interrupt translation table entry size
156 * @save tables: save the ITS tables into guest RAM
157 * @restore_tables: restore the ITS internal structs from tables
158 * stored in guest RAM
159 * @commit: initialize the registers which expose the ABI settings,
160 * especially the entry sizes
161 */
162struct vgic_its_abi {
163 int cte_esz;
164 int dte_esz;
165 int ite_esz;
166 int (*save_tables)(struct vgic_its *its);
167 int (*restore_tables)(struct vgic_its *its);
168 int (*commit)(struct vgic_its *its);
169};
170
171static const struct vgic_its_abi its_table_abi_versions[] = {
172 [0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
173 .save_tables = vgic_its_save_tables_v0,
174 .restore_tables = vgic_its_restore_tables_v0,
175 .commit = vgic_its_commit_v0,
176 },
177};
178
179#define NR_ITS_ABIS ARRAY_SIZE(its_table_abi_versions)
180
181inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
182{
183 return &its_table_abi_versions[its->abi_rev];
184}
185
186int vgic_its_set_abi(struct vgic_its *its, int rev)
187{
188 const struct vgic_its_abi *abi;
189
190 its->abi_rev = rev;
191 abi = vgic_its_get_abi(its);
192 return abi->commit(its);
193}
194
126/* 195/*
127 * Find and returns a device in the device table for an ITS. 196 * Find and returns a device in the device table for an ITS.
128 * Must be called with the its_lock mutex held. 197 * Must be called with the its_lock mutex held.
@@ -143,27 +212,27 @@ static struct its_device *find_its_device(struct vgic_its *its, u32 device_id)
143 * Device ID/Event ID pair on an ITS. 212 * Device ID/Event ID pair on an ITS.
144 * Must be called with the its_lock mutex held. 213 * Must be called with the its_lock mutex held.
145 */ 214 */
146static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, 215static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
147 u32 event_id) 216 u32 event_id)
148{ 217{
149 struct its_device *device; 218 struct its_device *device;
150 struct its_itte *itte; 219 struct its_ite *ite;
151 220
152 device = find_its_device(its, device_id); 221 device = find_its_device(its, device_id);
153 if (device == NULL) 222 if (device == NULL)
154 return NULL; 223 return NULL;
155 224
156 list_for_each_entry(itte, &device->itt_head, itte_list) 225 list_for_each_entry(ite, &device->itt_head, ite_list)
157 if (itte->event_id == event_id) 226 if (ite->event_id == event_id)
158 return itte; 227 return ite;
159 228
160 return NULL; 229 return NULL;
161} 230}
162 231
163/* To be used as an iterator this macro misses the enclosing parentheses */ 232/* To be used as an iterator this macro misses the enclosing parentheses */
164#define for_each_lpi_its(dev, itte, its) \ 233#define for_each_lpi_its(dev, ite, its) \
165 list_for_each_entry(dev, &(its)->device_list, dev_list) \ 234 list_for_each_entry(dev, &(its)->device_list, dev_list) \
166 list_for_each_entry(itte, &(dev)->itt_head, itte_list) 235 list_for_each_entry(ite, &(dev)->itt_head, ite_list)
167 236
168/* 237/*
169 * We only implement 48 bits of PA at the moment, although the ITS 238 * We only implement 48 bits of PA at the moment, although the ITS
@@ -171,11 +240,14 @@ static struct its_itte *find_itte(struct vgic_its *its, u32 device_id,
171 */ 240 */
172#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) 241#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
173#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) 242#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
174#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16))
175#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12))
176 243
177#define GIC_LPI_OFFSET 8192 244#define GIC_LPI_OFFSET 8192
178 245
246#define VITS_TYPER_IDBITS 16
247#define VITS_TYPER_DEVBITS 16
248#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1)
249#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1)
250
179/* 251/*
180 * Finds and returns a collection in the ITS collection table. 252 * Finds and returns a collection in the ITS collection table.
181 * Must be called with the its_lock mutex held. 253 * Must be called with the its_lock mutex held.
@@ -204,7 +276,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
204static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, 276static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
205 struct kvm_vcpu *filter_vcpu) 277 struct kvm_vcpu *filter_vcpu)
206{ 278{
207 u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); 279 u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
208 u8 prop; 280 u8 prop;
209 int ret; 281 int ret;
210 282
@@ -229,13 +301,13 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
229} 301}
230 302
231/* 303/*
232 * Create a snapshot of the current LPI list, so that we can enumerate all 304 * Create a snapshot of the current LPIs targeting @vcpu, so that we can
233 * LPIs without holding any lock. 305 * enumerate those LPIs without holding any lock.
234 * Returns the array length and puts the kmalloc'ed array into intid_ptr. 306 * Returns their number and puts the kmalloc'ed array into intid_ptr.
235 */ 307 */
236static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) 308static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
237{ 309{
238 struct vgic_dist *dist = &kvm->arch.vgic; 310 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
239 struct vgic_irq *irq; 311 struct vgic_irq *irq;
240 u32 *intids; 312 u32 *intids;
241 int irq_count = dist->lpi_list_count, i = 0; 313 int irq_count = dist->lpi_list_count, i = 0;
@@ -254,14 +326,14 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
254 spin_lock(&dist->lpi_list_lock); 326 spin_lock(&dist->lpi_list_lock);
255 list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { 327 list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
256 /* We don't need to "get" the IRQ, as we hold the list lock. */ 328 /* We don't need to "get" the IRQ, as we hold the list lock. */
257 intids[i] = irq->intid; 329 if (irq->target_vcpu != vcpu)
258 if (++i == irq_count) 330 continue;
259 break; 331 intids[i++] = irq->intid;
260 } 332 }
261 spin_unlock(&dist->lpi_list_lock); 333 spin_unlock(&dist->lpi_list_lock);
262 334
263 *intid_ptr = intids; 335 *intid_ptr = intids;
264 return irq_count; 336 return i;
265} 337}
266 338
267/* 339/*
@@ -270,18 +342,18 @@ static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr)
270 * Needs to be called whenever either the collection for a LPIs has 342 * Needs to be called whenever either the collection for a LPIs has
271 * changed or the collection itself got retargeted. 343 * changed or the collection itself got retargeted.
272 */ 344 */
273static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte) 345static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
274{ 346{
275 struct kvm_vcpu *vcpu; 347 struct kvm_vcpu *vcpu;
276 348
277 if (!its_is_collection_mapped(itte->collection)) 349 if (!its_is_collection_mapped(ite->collection))
278 return; 350 return;
279 351
280 vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); 352 vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
281 353
282 spin_lock(&itte->irq->irq_lock); 354 spin_lock(&ite->irq->irq_lock);
283 itte->irq->target_vcpu = vcpu; 355 ite->irq->target_vcpu = vcpu;
284 spin_unlock(&itte->irq->irq_lock); 356 spin_unlock(&ite->irq->irq_lock);
285} 357}
286 358
287/* 359/*
@@ -292,13 +364,13 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
292 struct its_collection *coll) 364 struct its_collection *coll)
293{ 365{
294 struct its_device *device; 366 struct its_device *device;
295 struct its_itte *itte; 367 struct its_ite *ite;
296 368
297 for_each_lpi_its(device, itte, its) { 369 for_each_lpi_its(device, ite, its) {
298 if (!itte->collection || coll != itte->collection) 370 if (!ite->collection || coll != ite->collection)
299 continue; 371 continue;
300 372
301 update_affinity_itte(kvm, itte); 373 update_affinity_ite(kvm, ite);
302 } 374 }
303} 375}
304 376
@@ -310,20 +382,20 @@ static u32 max_lpis_propbaser(u64 propbaser)
310} 382}
311 383
312/* 384/*
313 * Scan the whole LPI pending table and sync the pending bit in there 385 * Sync the pending table pending bit of LPIs targeting @vcpu
314 * with our own data structures. This relies on the LPI being 386 * with our own data structures. This relies on the LPI being
315 * mapped before. 387 * mapped before.
316 */ 388 */
317static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) 389static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
318{ 390{
319 gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); 391 gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
320 struct vgic_irq *irq; 392 struct vgic_irq *irq;
321 int last_byte_offset = -1; 393 int last_byte_offset = -1;
322 int ret = 0; 394 int ret = 0;
323 u32 *intids; 395 u32 *intids;
324 int nr_irqs, i; 396 int nr_irqs, i;
325 397
326 nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); 398 nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
327 if (nr_irqs < 0) 399 if (nr_irqs < 0)
328 return nr_irqs; 400 return nr_irqs;
329 401
@@ -364,6 +436,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
364 struct vgic_its *its, 436 struct vgic_its *its,
365 gpa_t addr, unsigned int len) 437 gpa_t addr, unsigned int len)
366{ 438{
439 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
367 u64 reg = GITS_TYPER_PLPIS; 440 u64 reg = GITS_TYPER_PLPIS;
368 441
369 /* 442 /*
@@ -374,8 +447,9 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm,
374 * To avoid memory waste in the guest, we keep the number of IDBits and 447 * To avoid memory waste in the guest, we keep the number of IDBits and
375 * DevBits low - as least for the time being. 448 * DevBits low - as least for the time being.
376 */ 449 */
377 reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; 450 reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT;
378 reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; 451 reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT;
452 reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
379 453
380 return extract_bytes(reg, addr & 7, len); 454 return extract_bytes(reg, addr & 7, len);
381} 455}
@@ -384,7 +458,23 @@ static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm,
384 struct vgic_its *its, 458 struct vgic_its *its,
385 gpa_t addr, unsigned int len) 459 gpa_t addr, unsigned int len)
386{ 460{
387 return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); 461 u32 val;
462
463 val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK;
464 val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM;
465 return val;
466}
467
468static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm,
469 struct vgic_its *its,
470 gpa_t addr, unsigned int len,
471 unsigned long val)
472{
473 u32 rev = GITS_IIDR_REV(val);
474
475 if (rev >= NR_ITS_ABIS)
476 return -EINVAL;
477 return vgic_its_set_abi(its, rev);
388} 478}
389 479
390static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, 480static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
@@ -425,25 +515,25 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
425 u32 devid, u32 eventid) 515 u32 devid, u32 eventid)
426{ 516{
427 struct kvm_vcpu *vcpu; 517 struct kvm_vcpu *vcpu;
428 struct its_itte *itte; 518 struct its_ite *ite;
429 519
430 if (!its->enabled) 520 if (!its->enabled)
431 return -EBUSY; 521 return -EBUSY;
432 522
433 itte = find_itte(its, devid, eventid); 523 ite = find_ite(its, devid, eventid);
434 if (!itte || !its_is_collection_mapped(itte->collection)) 524 if (!ite || !its_is_collection_mapped(ite->collection))
435 return E_ITS_INT_UNMAPPED_INTERRUPT; 525 return E_ITS_INT_UNMAPPED_INTERRUPT;
436 526
437 vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); 527 vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
438 if (!vcpu) 528 if (!vcpu)
439 return E_ITS_INT_UNMAPPED_INTERRUPT; 529 return E_ITS_INT_UNMAPPED_INTERRUPT;
440 530
441 if (!vcpu->arch.vgic_cpu.lpis_enabled) 531 if (!vcpu->arch.vgic_cpu.lpis_enabled)
442 return -EBUSY; 532 return -EBUSY;
443 533
444 spin_lock(&itte->irq->irq_lock); 534 spin_lock(&ite->irq->irq_lock);
445 itte->irq->pending_latch = true; 535 ite->irq->pending_latch = true;
446 vgic_queue_irq_unlock(kvm, itte->irq); 536 vgic_queue_irq_unlock(kvm, ite->irq);
447 537
448 return 0; 538 return 0;
449} 539}
@@ -511,15 +601,15 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
511} 601}
512 602
513/* Requires the its_lock to be held. */ 603/* Requires the its_lock to be held. */
514static void its_free_itte(struct kvm *kvm, struct its_itte *itte) 604static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
515{ 605{
516 list_del(&itte->itte_list); 606 list_del(&ite->ite_list);
517 607
518 /* This put matches the get in vgic_add_lpi. */ 608 /* This put matches the get in vgic_add_lpi. */
519 if (itte->irq) 609 if (ite->irq)
520 vgic_put_irq(kvm, itte->irq); 610 vgic_put_irq(kvm, ite->irq);
521 611
522 kfree(itte); 612 kfree(ite);
523} 613}
524 614
525static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) 615static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
@@ -529,9 +619,11 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size)
529 619
530#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) 620#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8)
531#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) 621#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32)
622#define its_cmd_get_size(cmd) (its_cmd_mask_field(cmd, 1, 0, 5) + 1)
532#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) 623#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32)
533#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) 624#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32)
534#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) 625#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16)
626#define its_cmd_get_ittaddr(cmd) (its_cmd_mask_field(cmd, 2, 8, 44) << 8)
535#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) 627#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32)
536#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) 628#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1)
537 629
@@ -544,17 +636,17 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
544{ 636{
545 u32 device_id = its_cmd_get_deviceid(its_cmd); 637 u32 device_id = its_cmd_get_deviceid(its_cmd);
546 u32 event_id = its_cmd_get_id(its_cmd); 638 u32 event_id = its_cmd_get_id(its_cmd);
547 struct its_itte *itte; 639 struct its_ite *ite;
548 640
549 641
550 itte = find_itte(its, device_id, event_id); 642 ite = find_ite(its, device_id, event_id);
551 if (itte && itte->collection) { 643 if (ite && ite->collection) {
552 /* 644 /*
553 * Though the spec talks about removing the pending state, we 645 * Though the spec talks about removing the pending state, we
554 * don't bother here since we clear the ITTE anyway and the 646 * don't bother here since we clear the ITTE anyway and the
555 * pending state is a property of the ITTE struct. 647 * pending state is a property of the ITTE struct.
556 */ 648 */
557 its_free_itte(kvm, itte); 649 its_free_ite(kvm, ite);
558 return 0; 650 return 0;
559 } 651 }
560 652
@@ -572,26 +664,26 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
572 u32 event_id = its_cmd_get_id(its_cmd); 664 u32 event_id = its_cmd_get_id(its_cmd);
573 u32 coll_id = its_cmd_get_collection(its_cmd); 665 u32 coll_id = its_cmd_get_collection(its_cmd);
574 struct kvm_vcpu *vcpu; 666 struct kvm_vcpu *vcpu;
575 struct its_itte *itte; 667 struct its_ite *ite;
576 struct its_collection *collection; 668 struct its_collection *collection;
577 669
578 itte = find_itte(its, device_id, event_id); 670 ite = find_ite(its, device_id, event_id);
579 if (!itte) 671 if (!ite)
580 return E_ITS_MOVI_UNMAPPED_INTERRUPT; 672 return E_ITS_MOVI_UNMAPPED_INTERRUPT;
581 673
582 if (!its_is_collection_mapped(itte->collection)) 674 if (!its_is_collection_mapped(ite->collection))
583 return E_ITS_MOVI_UNMAPPED_COLLECTION; 675 return E_ITS_MOVI_UNMAPPED_COLLECTION;
584 676
585 collection = find_collection(its, coll_id); 677 collection = find_collection(its, coll_id);
586 if (!its_is_collection_mapped(collection)) 678 if (!its_is_collection_mapped(collection))
587 return E_ITS_MOVI_UNMAPPED_COLLECTION; 679 return E_ITS_MOVI_UNMAPPED_COLLECTION;
588 680
589 itte->collection = collection; 681 ite->collection = collection;
590 vcpu = kvm_get_vcpu(kvm, collection->target_addr); 682 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
591 683
592 spin_lock(&itte->irq->irq_lock); 684 spin_lock(&ite->irq->irq_lock);
593 itte->irq->target_vcpu = vcpu; 685 ite->irq->target_vcpu = vcpu;
594 spin_unlock(&itte->irq->irq_lock); 686 spin_unlock(&ite->irq->irq_lock);
595 687
596 return 0; 688 return 0;
597} 689}
@@ -600,16 +692,31 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
600 * Check whether an ID can be stored into the corresponding guest table. 692 * Check whether an ID can be stored into the corresponding guest table.
601 * For a direct table this is pretty easy, but gets a bit nasty for 693 * For a direct table this is pretty easy, but gets a bit nasty for
602 * indirect tables. We check whether the resulting guest physical address 694 * indirect tables. We check whether the resulting guest physical address
603 * is actually valid (covered by a memslot and guest accessbible). 695 * is actually valid (covered by a memslot and guest accessible).
604 * For this we have to read the respective first level entry. 696 * For this we have to read the respective first level entry.
605 */ 697 */
606static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) 698static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
699 gpa_t *eaddr)
607{ 700{
608 int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; 701 int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
702 u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
703 int esz = GITS_BASER_ENTRY_SIZE(baser);
609 int index; 704 int index;
610 u64 indirect_ptr;
611 gfn_t gfn; 705 gfn_t gfn;
612 int esz = GITS_BASER_ENTRY_SIZE(baser); 706
707 switch (type) {
708 case GITS_BASER_TYPE_DEVICE:
709 if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
710 return false;
711 break;
712 case GITS_BASER_TYPE_COLLECTION:
713 /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */
714 if (id >= BIT_ULL(16))
715 return false;
716 break;
717 default:
718 return false;
719 }
613 720
614 if (!(baser & GITS_BASER_INDIRECT)) { 721 if (!(baser & GITS_BASER_INDIRECT)) {
615 phys_addr_t addr; 722 phys_addr_t addr;
@@ -620,6 +727,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
620 addr = BASER_ADDRESS(baser) + id * esz; 727 addr = BASER_ADDRESS(baser) + id * esz;
621 gfn = addr >> PAGE_SHIFT; 728 gfn = addr >> PAGE_SHIFT;
622 729
730 if (eaddr)
731 *eaddr = addr;
623 return kvm_is_visible_gfn(its->dev->kvm, gfn); 732 return kvm_is_visible_gfn(its->dev->kvm, gfn);
624 } 733 }
625 734
@@ -652,6 +761,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id)
652 indirect_ptr += index * esz; 761 indirect_ptr += index * esz;
653 gfn = indirect_ptr >> PAGE_SHIFT; 762 gfn = indirect_ptr >> PAGE_SHIFT;
654 763
764 if (eaddr)
765 *eaddr = indirect_ptr;
655 return kvm_is_visible_gfn(its->dev->kvm, gfn); 766 return kvm_is_visible_gfn(its->dev->kvm, gfn);
656} 767}
657 768
@@ -661,7 +772,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
661{ 772{
662 struct its_collection *collection; 773 struct its_collection *collection;
663 774
664 if (!vgic_its_check_id(its, its->baser_coll_table, coll_id)) 775 if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
665 return E_ITS_MAPC_COLLECTION_OOR; 776 return E_ITS_MAPC_COLLECTION_OOR;
666 777
667 collection = kzalloc(sizeof(*collection), GFP_KERNEL); 778 collection = kzalloc(sizeof(*collection), GFP_KERNEL);
@@ -679,7 +790,7 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
679{ 790{
680 struct its_collection *collection; 791 struct its_collection *collection;
681 struct its_device *device; 792 struct its_device *device;
682 struct its_itte *itte; 793 struct its_ite *ite;
683 794
684 /* 795 /*
685 * Clearing the mapping for that collection ID removes the 796 * Clearing the mapping for that collection ID removes the
@@ -690,15 +801,34 @@ static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id)
690 if (!collection) 801 if (!collection)
691 return; 802 return;
692 803
693 for_each_lpi_its(device, itte, its) 804 for_each_lpi_its(device, ite, its)
694 if (itte->collection && 805 if (ite->collection &&
695 itte->collection->collection_id == coll_id) 806 ite->collection->collection_id == coll_id)
696 itte->collection = NULL; 807 ite->collection = NULL;
697 808
698 list_del(&collection->coll_list); 809 list_del(&collection->coll_list);
699 kfree(collection); 810 kfree(collection);
700} 811}
701 812
813/* Must be called with its_lock mutex held */
814static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
815 struct its_collection *collection,
816 u32 lpi_id, u32 event_id)
817{
818 struct its_ite *ite;
819
820 ite = kzalloc(sizeof(*ite), GFP_KERNEL);
821 if (!ite)
822 return ERR_PTR(-ENOMEM);
823
824 ite->event_id = event_id;
825 ite->collection = collection;
826 ite->lpi = lpi_id;
827
828 list_add_tail(&ite->ite_list, &device->itt_head);
829 return ite;
830}
831
702/* 832/*
703 * The MAPTI and MAPI commands map LPIs to ITTEs. 833 * The MAPTI and MAPI commands map LPIs to ITTEs.
704 * Must be called with its_lock mutex held. 834 * Must be called with its_lock mutex held.
@@ -709,16 +839,20 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
709 u32 device_id = its_cmd_get_deviceid(its_cmd); 839 u32 device_id = its_cmd_get_deviceid(its_cmd);
710 u32 event_id = its_cmd_get_id(its_cmd); 840 u32 event_id = its_cmd_get_id(its_cmd);
711 u32 coll_id = its_cmd_get_collection(its_cmd); 841 u32 coll_id = its_cmd_get_collection(its_cmd);
712 struct its_itte *itte; 842 struct its_ite *ite;
843 struct kvm_vcpu *vcpu = NULL;
713 struct its_device *device; 844 struct its_device *device;
714 struct its_collection *collection, *new_coll = NULL; 845 struct its_collection *collection, *new_coll = NULL;
715 int lpi_nr;
716 struct vgic_irq *irq; 846 struct vgic_irq *irq;
847 int lpi_nr;
717 848
718 device = find_its_device(its, device_id); 849 device = find_its_device(its, device_id);
719 if (!device) 850 if (!device)
720 return E_ITS_MAPTI_UNMAPPED_DEVICE; 851 return E_ITS_MAPTI_UNMAPPED_DEVICE;
721 852
853 if (event_id >= BIT_ULL(device->num_eventid_bits))
854 return E_ITS_MAPTI_ID_OOR;
855
722 if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) 856 if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI)
723 lpi_nr = its_cmd_get_physical_id(its_cmd); 857 lpi_nr = its_cmd_get_physical_id(its_cmd);
724 else 858 else
@@ -728,7 +862,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
728 return E_ITS_MAPTI_PHYSICALID_OOR; 862 return E_ITS_MAPTI_PHYSICALID_OOR;
729 863
730 /* If there is an existing mapping, behavior is UNPREDICTABLE. */ 864 /* If there is an existing mapping, behavior is UNPREDICTABLE. */
731 if (find_itte(its, device_id, event_id)) 865 if (find_ite(its, device_id, event_id))
732 return 0; 866 return 0;
733 867
734 collection = find_collection(its, coll_id); 868 collection = find_collection(its, coll_id);
@@ -739,36 +873,24 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
739 new_coll = collection; 873 new_coll = collection;
740 } 874 }
741 875
742 itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); 876 ite = vgic_its_alloc_ite(device, collection, lpi_nr, event_id);
743 if (!itte) { 877 if (IS_ERR(ite)) {
744 if (new_coll) 878 if (new_coll)
745 vgic_its_free_collection(its, coll_id); 879 vgic_its_free_collection(its, coll_id);
746 return -ENOMEM; 880 return PTR_ERR(ite);
747 } 881 }
748 882
749 itte->event_id = event_id; 883 if (its_is_collection_mapped(collection))
750 list_add_tail(&itte->itte_list, &device->itt_head); 884 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
751
752 itte->collection = collection;
753 itte->lpi = lpi_nr;
754 885
755 irq = vgic_add_lpi(kvm, lpi_nr); 886 irq = vgic_add_lpi(kvm, lpi_nr, vcpu);
756 if (IS_ERR(irq)) { 887 if (IS_ERR(irq)) {
757 if (new_coll) 888 if (new_coll)
758 vgic_its_free_collection(its, coll_id); 889 vgic_its_free_collection(its, coll_id);
759 its_free_itte(kvm, itte); 890 its_free_ite(kvm, ite);
760 return PTR_ERR(irq); 891 return PTR_ERR(irq);
761 } 892 }
762 itte->irq = irq; 893 ite->irq = irq;
763
764 update_affinity_itte(kvm, itte);
765
766 /*
767 * We "cache" the configuration table entries in out struct vgic_irq's.
768 * However we only have those structs for mapped IRQs, so we read in
769 * the respective config data from memory here upon mapping the LPI.
770 */
771 update_lpi_config(kvm, itte->irq, NULL);
772 894
773 return 0; 895 return 0;
774} 896}
@@ -776,20 +898,40 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
776/* Requires the its_lock to be held. */ 898/* Requires the its_lock to be held. */
777static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) 899static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device)
778{ 900{
779 struct its_itte *itte, *temp; 901 struct its_ite *ite, *temp;
780 902
781 /* 903 /*
782 * The spec says that unmapping a device with still valid 904 * The spec says that unmapping a device with still valid
783 * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, 905 * ITTEs associated is UNPREDICTABLE. We remove all ITTEs,
784 * since we cannot leave the memory unreferenced. 906 * since we cannot leave the memory unreferenced.
785 */ 907 */
786 list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list) 908 list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
787 its_free_itte(kvm, itte); 909 its_free_ite(kvm, ite);
788 910
789 list_del(&device->dev_list); 911 list_del(&device->dev_list);
790 kfree(device); 912 kfree(device);
791} 913}
792 914
915/* Must be called with its_lock mutex held */
916static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
917 u32 device_id, gpa_t itt_addr,
918 u8 num_eventid_bits)
919{
920 struct its_device *device;
921
922 device = kzalloc(sizeof(*device), GFP_KERNEL);
923 if (!device)
924 return ERR_PTR(-ENOMEM);
925
926 device->device_id = device_id;
927 device->itt_addr = itt_addr;
928 device->num_eventid_bits = num_eventid_bits;
929 INIT_LIST_HEAD(&device->itt_head);
930
931 list_add_tail(&device->dev_list, &its->device_list);
932 return device;
933}
934
793/* 935/*
794 * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). 936 * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
795 * Must be called with the its_lock mutex held. 937 * Must be called with the its_lock mutex held.
@@ -799,11 +941,16 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
799{ 941{
800 u32 device_id = its_cmd_get_deviceid(its_cmd); 942 u32 device_id = its_cmd_get_deviceid(its_cmd);
801 bool valid = its_cmd_get_validbit(its_cmd); 943 bool valid = its_cmd_get_validbit(its_cmd);
944 u8 num_eventid_bits = its_cmd_get_size(its_cmd);
945 gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
802 struct its_device *device; 946 struct its_device *device;
803 947
804 if (!vgic_its_check_id(its, its->baser_device_table, device_id)) 948 if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL))
805 return E_ITS_MAPD_DEVICE_OOR; 949 return E_ITS_MAPD_DEVICE_OOR;
806 950
951 if (valid && num_eventid_bits > VITS_TYPER_IDBITS)
952 return E_ITS_MAPD_ITTSIZE_OOR;
953
807 device = find_its_device(its, device_id); 954 device = find_its_device(its, device_id);
808 955
809 /* 956 /*
@@ -821,14 +968,10 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
821 if (!valid) 968 if (!valid)
822 return 0; 969 return 0;
823 970
824 device = kzalloc(sizeof(struct its_device), GFP_KERNEL); 971 device = vgic_its_alloc_device(its, device_id, itt_addr,
825 if (!device) 972 num_eventid_bits);
826 return -ENOMEM; 973 if (IS_ERR(device))
827 974 return PTR_ERR(device);
828 device->device_id = device_id;
829 INIT_LIST_HEAD(&device->itt_head);
830
831 list_add_tail(&device->dev_list, &its->device_list);
832 975
833 return 0; 976 return 0;
834} 977}
@@ -883,14 +1026,14 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
883{ 1026{
884 u32 device_id = its_cmd_get_deviceid(its_cmd); 1027 u32 device_id = its_cmd_get_deviceid(its_cmd);
885 u32 event_id = its_cmd_get_id(its_cmd); 1028 u32 event_id = its_cmd_get_id(its_cmd);
886 struct its_itte *itte; 1029 struct its_ite *ite;
887 1030
888 1031
889 itte = find_itte(its, device_id, event_id); 1032 ite = find_ite(its, device_id, event_id);
890 if (!itte) 1033 if (!ite)
891 return E_ITS_CLEAR_UNMAPPED_INTERRUPT; 1034 return E_ITS_CLEAR_UNMAPPED_INTERRUPT;
892 1035
893 itte->irq->pending_latch = false; 1036 ite->irq->pending_latch = false;
894 1037
895 return 0; 1038 return 0;
896} 1039}
@@ -904,14 +1047,14 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
904{ 1047{
905 u32 device_id = its_cmd_get_deviceid(its_cmd); 1048 u32 device_id = its_cmd_get_deviceid(its_cmd);
906 u32 event_id = its_cmd_get_id(its_cmd); 1049 u32 event_id = its_cmd_get_id(its_cmd);
907 struct its_itte *itte; 1050 struct its_ite *ite;
908 1051
909 1052
910 itte = find_itte(its, device_id, event_id); 1053 ite = find_ite(its, device_id, event_id);
911 if (!itte) 1054 if (!ite)
912 return E_ITS_INV_UNMAPPED_INTERRUPT; 1055 return E_ITS_INV_UNMAPPED_INTERRUPT;
913 1056
914 return update_lpi_config(kvm, itte->irq, NULL); 1057 return update_lpi_config(kvm, ite->irq, NULL);
915} 1058}
916 1059
917/* 1060/*
@@ -938,7 +1081,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
938 1081
939 vcpu = kvm_get_vcpu(kvm, collection->target_addr); 1082 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
940 1083
941 irq_count = vgic_copy_lpi_list(kvm, &intids); 1084 irq_count = vgic_copy_lpi_list(vcpu, &intids);
942 if (irq_count < 0) 1085 if (irq_count < 0)
943 return irq_count; 1086 return irq_count;
944 1087
@@ -1213,6 +1356,33 @@ static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm,
1213 return extract_bytes(its->creadr, addr & 0x7, len); 1356 return extract_bytes(its->creadr, addr & 0x7, len);
1214} 1357}
1215 1358
1359static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
1360 struct vgic_its *its,
1361 gpa_t addr, unsigned int len,
1362 unsigned long val)
1363{
1364 u32 cmd_offset;
1365 int ret = 0;
1366
1367 mutex_lock(&its->cmd_lock);
1368
1369 if (its->enabled) {
1370 ret = -EBUSY;
1371 goto out;
1372 }
1373
1374 cmd_offset = ITS_CMD_OFFSET(val);
1375 if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
1376 ret = -EINVAL;
1377 goto out;
1378 }
1379
1380 its->creadr = cmd_offset;
1381out:
1382 mutex_unlock(&its->cmd_lock);
1383 return ret;
1384}
1385
1216#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) 1386#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
1217static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, 1387static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
1218 struct vgic_its *its, 1388 struct vgic_its *its,
@@ -1241,6 +1411,7 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
1241 gpa_t addr, unsigned int len, 1411 gpa_t addr, unsigned int len,
1242 unsigned long val) 1412 unsigned long val)
1243{ 1413{
1414 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
1244 u64 entry_size, device_type; 1415 u64 entry_size, device_type;
1245 u64 reg, *regptr, clearbits = 0; 1416 u64 reg, *regptr, clearbits = 0;
1246 1417
@@ -1251,12 +1422,12 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
1251 switch (BASER_INDEX(addr)) { 1422 switch (BASER_INDEX(addr)) {
1252 case 0: 1423 case 0:
1253 regptr = &its->baser_device_table; 1424 regptr = &its->baser_device_table;
1254 entry_size = 8; 1425 entry_size = abi->dte_esz;
1255 device_type = GITS_BASER_TYPE_DEVICE; 1426 device_type = GITS_BASER_TYPE_DEVICE;
1256 break; 1427 break;
1257 case 1: 1428 case 1:
1258 regptr = &its->baser_coll_table; 1429 regptr = &its->baser_coll_table;
1259 entry_size = 8; 1430 entry_size = abi->cte_esz;
1260 device_type = GITS_BASER_TYPE_COLLECTION; 1431 device_type = GITS_BASER_TYPE_COLLECTION;
1261 clearbits = GITS_BASER_INDIRECT; 1432 clearbits = GITS_BASER_INDIRECT;
1262 break; 1433 break;
@@ -1317,6 +1488,16 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
1317 .its_write = wr, \ 1488 .its_write = wr, \
1318} 1489}
1319 1490
1491#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\
1492{ \
1493 .reg_offset = off, \
1494 .len = length, \
1495 .access_flags = acc, \
1496 .its_read = rd, \
1497 .its_write = wr, \
1498 .uaccess_its_write = uwr, \
1499}
1500
1320static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, 1501static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
1321 gpa_t addr, unsigned int len, unsigned long val) 1502 gpa_t addr, unsigned int len, unsigned long val)
1322{ 1503{
@@ -1327,8 +1508,9 @@ static struct vgic_register_region its_registers[] = {
1327 REGISTER_ITS_DESC(GITS_CTLR, 1508 REGISTER_ITS_DESC(GITS_CTLR,
1328 vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, 1509 vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
1329 VGIC_ACCESS_32bit), 1510 VGIC_ACCESS_32bit),
1330 REGISTER_ITS_DESC(GITS_IIDR, 1511 REGISTER_ITS_DESC_UACCESS(GITS_IIDR,
1331 vgic_mmio_read_its_iidr, its_mmio_write_wi, 4, 1512 vgic_mmio_read_its_iidr, its_mmio_write_wi,
1513 vgic_mmio_uaccess_write_its_iidr, 4,
1332 VGIC_ACCESS_32bit), 1514 VGIC_ACCESS_32bit),
1333 REGISTER_ITS_DESC(GITS_TYPER, 1515 REGISTER_ITS_DESC(GITS_TYPER,
1334 vgic_mmio_read_its_typer, its_mmio_write_wi, 8, 1516 vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
@@ -1339,8 +1521,9 @@ static struct vgic_register_region its_registers[] = {
1339 REGISTER_ITS_DESC(GITS_CWRITER, 1521 REGISTER_ITS_DESC(GITS_CWRITER,
1340 vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, 1522 vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
1341 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 1523 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1342 REGISTER_ITS_DESC(GITS_CREADR, 1524 REGISTER_ITS_DESC_UACCESS(GITS_CREADR,
1343 vgic_mmio_read_its_creadr, its_mmio_write_wi, 8, 1525 vgic_mmio_read_its_creadr, its_mmio_write_wi,
1526 vgic_mmio_uaccess_write_its_creadr, 8,
1344 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), 1527 VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
1345 REGISTER_ITS_DESC(GITS_BASER, 1528 REGISTER_ITS_DESC(GITS_BASER,
1346 vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, 1529 vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
@@ -1357,17 +1540,19 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
1357 its_sync_lpi_pending_table(vcpu); 1540 its_sync_lpi_pending_table(vcpu);
1358} 1541}
1359 1542
1360static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) 1543static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its,
1544 u64 addr)
1361{ 1545{
1362 struct vgic_io_device *iodev = &its->iodev; 1546 struct vgic_io_device *iodev = &its->iodev;
1363 int ret; 1547 int ret;
1364 1548
1365 if (!its->initialized) 1549 mutex_lock(&kvm->slots_lock);
1366 return -EBUSY; 1550 if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
1367 1551 ret = -EBUSY;
1368 if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) 1552 goto out;
1369 return -ENXIO; 1553 }
1370 1554
1555 its->vgic_its_base = addr;
1371 iodev->regions = its_registers; 1556 iodev->regions = its_registers;
1372 iodev->nr_regions = ARRAY_SIZE(its_registers); 1557 iodev->nr_regions = ARRAY_SIZE(its_registers);
1373 kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); 1558 kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops);
@@ -1375,9 +1560,9 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
1375 iodev->base_addr = its->vgic_its_base; 1560 iodev->base_addr = its->vgic_its_base;
1376 iodev->iodev_type = IODEV_ITS; 1561 iodev->iodev_type = IODEV_ITS;
1377 iodev->its = its; 1562 iodev->its = its;
1378 mutex_lock(&kvm->slots_lock);
1379 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, 1563 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr,
1380 KVM_VGIC_V3_ITS_SIZE, &iodev->dev); 1564 KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
1565out:
1381 mutex_unlock(&kvm->slots_lock); 1566 mutex_unlock(&kvm->slots_lock);
1382 1567
1383 return ret; 1568 return ret;
@@ -1387,7 +1572,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
1387 (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ 1572 (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
1388 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ 1573 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
1389 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ 1574 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \
1390 ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \
1391 GITS_BASER_PAGE_SIZE_64K) 1575 GITS_BASER_PAGE_SIZE_64K)
1392 1576
1393#define INITIAL_PROPBASER_VALUE \ 1577#define INITIAL_PROPBASER_VALUE \
@@ -1415,7 +1599,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
1415 INIT_LIST_HEAD(&its->collection_list); 1599 INIT_LIST_HEAD(&its->collection_list);
1416 1600
1417 dev->kvm->arch.vgic.has_its = true; 1601 dev->kvm->arch.vgic.has_its = true;
1418 its->initialized = false;
1419 its->enabled = false; 1602 its->enabled = false;
1420 its->dev = dev; 1603 its->dev = dev;
1421 1604
@@ -1427,16 +1610,23 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
1427 1610
1428 dev->private = its; 1611 dev->private = its;
1429 1612
1430 return 0; 1613 return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
1614}
1615
1616static void vgic_its_free_device(struct kvm *kvm, struct its_device *dev)
1617{
1618 struct its_ite *ite, *tmp;
1619
1620 list_for_each_entry_safe(ite, tmp, &dev->itt_head, ite_list)
1621 its_free_ite(kvm, ite);
1622 list_del(&dev->dev_list);
1623 kfree(dev);
1431} 1624}
1432 1625
1433static void vgic_its_destroy(struct kvm_device *kvm_dev) 1626static void vgic_its_destroy(struct kvm_device *kvm_dev)
1434{ 1627{
1435 struct kvm *kvm = kvm_dev->kvm; 1628 struct kvm *kvm = kvm_dev->kvm;
1436 struct vgic_its *its = kvm_dev->private; 1629 struct vgic_its *its = kvm_dev->private;
1437 struct its_device *dev;
1438 struct its_itte *itte;
1439 struct list_head *dev_cur, *dev_temp;
1440 struct list_head *cur, *temp; 1630 struct list_head *cur, *temp;
1441 1631
1442 /* 1632 /*
@@ -1447,25 +1637,710 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
1447 return; 1637 return;
1448 1638
1449 mutex_lock(&its->its_lock); 1639 mutex_lock(&its->its_lock);
1450 list_for_each_safe(dev_cur, dev_temp, &its->device_list) { 1640 list_for_each_safe(cur, temp, &its->device_list) {
1451 dev = container_of(dev_cur, struct its_device, dev_list); 1641 struct its_device *dev;
1452 list_for_each_safe(cur, temp, &dev->itt_head) { 1642
1453 itte = (container_of(cur, struct its_itte, itte_list)); 1643 dev = list_entry(cur, struct its_device, dev_list);
1454 its_free_itte(kvm, itte); 1644 vgic_its_free_device(kvm, dev);
1455 }
1456 list_del(dev_cur);
1457 kfree(dev);
1458 } 1645 }
1459 1646
1460 list_for_each_safe(cur, temp, &its->collection_list) { 1647 list_for_each_safe(cur, temp, &its->collection_list) {
1648 struct its_collection *coll;
1649
1650 coll = list_entry(cur, struct its_collection, coll_list);
1461 list_del(cur); 1651 list_del(cur);
1462 kfree(container_of(cur, struct its_collection, coll_list)); 1652 kfree(coll);
1463 } 1653 }
1464 mutex_unlock(&its->its_lock); 1654 mutex_unlock(&its->its_lock);
1465 1655
1466 kfree(its); 1656 kfree(its);
1467} 1657}
1468 1658
1659int vgic_its_has_attr_regs(struct kvm_device *dev,
1660 struct kvm_device_attr *attr)
1661{
1662 const struct vgic_register_region *region;
1663 gpa_t offset = attr->attr;
1664 int align;
1665
1666 align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7;
1667
1668 if (offset & align)
1669 return -EINVAL;
1670
1671 region = vgic_find_mmio_region(its_registers,
1672 ARRAY_SIZE(its_registers),
1673 offset);
1674 if (!region)
1675 return -ENXIO;
1676
1677 return 0;
1678}
1679
1680int vgic_its_attr_regs_access(struct kvm_device *dev,
1681 struct kvm_device_attr *attr,
1682 u64 *reg, bool is_write)
1683{
1684 const struct vgic_register_region *region;
1685 struct vgic_its *its;
1686 gpa_t addr, offset;
1687 unsigned int len;
1688 int align, ret = 0;
1689
1690 its = dev->private;
1691 offset = attr->attr;
1692
1693 /*
1694 * Although the spec supports upper/lower 32-bit accesses to
1695 * 64-bit ITS registers, the userspace ABI requires 64-bit
1696 * accesses to all 64-bit wide registers. We therefore only
1697 * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID
1698 * registers
1699 */
1700 if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4))
1701 align = 0x3;
1702 else
1703 align = 0x7;
1704
1705 if (offset & align)
1706 return -EINVAL;
1707
1708 mutex_lock(&dev->kvm->lock);
1709
1710 if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) {
1711 ret = -ENXIO;
1712 goto out;
1713 }
1714
1715 region = vgic_find_mmio_region(its_registers,
1716 ARRAY_SIZE(its_registers),
1717 offset);
1718 if (!region) {
1719 ret = -ENXIO;
1720 goto out;
1721 }
1722
1723 if (!lock_all_vcpus(dev->kvm)) {
1724 ret = -EBUSY;
1725 goto out;
1726 }
1727
1728 addr = its->vgic_its_base + offset;
1729
1730 len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4;
1731
1732 if (is_write) {
1733 if (region->uaccess_its_write)
1734 ret = region->uaccess_its_write(dev->kvm, its, addr,
1735 len, *reg);
1736 else
1737 region->its_write(dev->kvm, its, addr, len, *reg);
1738 } else {
1739 *reg = region->its_read(dev->kvm, its, addr, len);
1740 }
1741 unlock_all_vcpus(dev->kvm);
1742out:
1743 mutex_unlock(&dev->kvm->lock);
1744 return ret;
1745}
1746
1747static u32 compute_next_devid_offset(struct list_head *h,
1748 struct its_device *dev)
1749{
1750 struct its_device *next;
1751 u32 next_offset;
1752
1753 if (list_is_last(&dev->dev_list, h))
1754 return 0;
1755 next = list_next_entry(dev, dev_list);
1756 next_offset = next->device_id - dev->device_id;
1757
1758 return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET);
1759}
1760
1761static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
1762{
1763 struct its_ite *next;
1764 u32 next_offset;
1765
1766 if (list_is_last(&ite->ite_list, h))
1767 return 0;
1768 next = list_next_entry(ite, ite_list);
1769 next_offset = next->event_id - ite->event_id;
1770
1771 return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET);
1772}
1773
1774/**
1775 * entry_fn_t - Callback called on a table entry restore path
1776 * @its: its handle
1777 * @id: id of the entry
1778 * @entry: pointer to the entry
1779 * @opaque: pointer to an opaque data
1780 *
1781 * Return: < 0 on error, 0 if last element was identified, id offset to next
1782 * element otherwise
1783 */
1784typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
1785 void *opaque);
1786
1787/**
1788 * scan_its_table - Scan a contiguous table in guest RAM and applies a function
1789 * to each entry
1790 *
1791 * @its: its handle
1792 * @base: base gpa of the table
1793 * @size: size of the table in bytes
1794 * @esz: entry size in bytes
1795 * @start_id: the ID of the first entry in the table
1796 * (non zero for 2d level tables)
1797 * @fn: function to apply on each entry
1798 *
1799 * Return: < 0 on error, 0 if last element was identified, 1 otherwise
1800 * (the last element may not be found on second level tables)
1801 */
1802static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
1803 int start_id, entry_fn_t fn, void *opaque)
1804{
1805 void *entry = kzalloc(esz, GFP_KERNEL);
1806 struct kvm *kvm = its->dev->kvm;
1807 unsigned long len = size;
1808 int id = start_id;
1809 gpa_t gpa = base;
1810 int ret;
1811
1812 while (len > 0) {
1813 int next_offset;
1814 size_t byte_offset;
1815
1816 ret = kvm_read_guest(kvm, gpa, entry, esz);
1817 if (ret)
1818 goto out;
1819
1820 next_offset = fn(its, id, entry, opaque);
1821 if (next_offset <= 0) {
1822 ret = next_offset;
1823 goto out;
1824 }
1825
1826 byte_offset = next_offset * esz;
1827 id += next_offset;
1828 gpa += byte_offset;
1829 len -= byte_offset;
1830 }
1831 ret = 1;
1832
1833out:
1834 kfree(entry);
1835 return ret;
1836}
1837
1838/**
1839 * vgic_its_save_ite - Save an interrupt translation entry at @gpa
1840 */
1841static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev,
1842 struct its_ite *ite, gpa_t gpa, int ite_esz)
1843{
1844 struct kvm *kvm = its->dev->kvm;
1845 u32 next_offset;
1846 u64 val;
1847
1848 next_offset = compute_next_eventid_offset(&dev->itt_head, ite);
1849 val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) |
1850 ((u64)ite->lpi << KVM_ITS_ITE_PINTID_SHIFT) |
1851 ite->collection->collection_id;
1852 val = cpu_to_le64(val);
1853 return kvm_write_guest(kvm, gpa, &val, ite_esz);
1854}
1855
1856/**
1857 * vgic_its_restore_ite - restore an interrupt translation entry
1858 * @event_id: id used for indexing
1859 * @ptr: pointer to the ITE entry
1860 * @opaque: pointer to the its_device
1861 */
1862static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id,
1863 void *ptr, void *opaque)
1864{
1865 struct its_device *dev = (struct its_device *)opaque;
1866 struct its_collection *collection;
1867 struct kvm *kvm = its->dev->kvm;
1868 struct kvm_vcpu *vcpu = NULL;
1869 u64 val;
1870 u64 *p = (u64 *)ptr;
1871 struct vgic_irq *irq;
1872 u32 coll_id, lpi_id;
1873 struct its_ite *ite;
1874 u32 offset;
1875
1876 val = *p;
1877
1878 val = le64_to_cpu(val);
1879
1880 coll_id = val & KVM_ITS_ITE_ICID_MASK;
1881 lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
1882
1883 if (!lpi_id)
1884 return 1; /* invalid entry, no choice but to scan next entry */
1885
1886 if (lpi_id < VGIC_MIN_LPI)
1887 return -EINVAL;
1888
1889 offset = val >> KVM_ITS_ITE_NEXT_SHIFT;
1890 if (event_id + offset >= BIT_ULL(dev->num_eventid_bits))
1891 return -EINVAL;
1892
1893 collection = find_collection(its, coll_id);
1894 if (!collection)
1895 return -EINVAL;
1896
1897 ite = vgic_its_alloc_ite(dev, collection, lpi_id, event_id);
1898 if (IS_ERR(ite))
1899 return PTR_ERR(ite);
1900
1901 if (its_is_collection_mapped(collection))
1902 vcpu = kvm_get_vcpu(kvm, collection->target_addr);
1903
1904 irq = vgic_add_lpi(kvm, lpi_id, vcpu);
1905 if (IS_ERR(irq))
1906 return PTR_ERR(irq);
1907 ite->irq = irq;
1908
1909 return offset;
1910}
1911
1912static int vgic_its_ite_cmp(void *priv, struct list_head *a,
1913 struct list_head *b)
1914{
1915 struct its_ite *itea = container_of(a, struct its_ite, ite_list);
1916 struct its_ite *iteb = container_of(b, struct its_ite, ite_list);
1917
1918 if (itea->event_id < iteb->event_id)
1919 return -1;
1920 else
1921 return 1;
1922}
1923
1924static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
1925{
1926 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
1927 gpa_t base = device->itt_addr;
1928 struct its_ite *ite;
1929 int ret;
1930 int ite_esz = abi->ite_esz;
1931
1932 list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
1933
1934 list_for_each_entry(ite, &device->itt_head, ite_list) {
1935 gpa_t gpa = base + ite->event_id * ite_esz;
1936
1937 ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
1938 if (ret)
1939 return ret;
1940 }
1941 return 0;
1942}
1943
1944static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev)
1945{
1946 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
1947 gpa_t base = dev->itt_addr;
1948 int ret;
1949 int ite_esz = abi->ite_esz;
1950 size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz;
1951
1952 ret = scan_its_table(its, base, max_size, ite_esz, 0,
1953 vgic_its_restore_ite, dev);
1954
1955 return ret;
1956}
1957
1958/**
1959 * vgic_its_save_dte - Save a device table entry at a given GPA
1960 *
1961 * @its: ITS handle
1962 * @dev: ITS device
1963 * @ptr: GPA
1964 */
1965static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev,
1966 gpa_t ptr, int dte_esz)
1967{
1968 struct kvm *kvm = its->dev->kvm;
1969 u64 val, itt_addr_field;
1970 u32 next_offset;
1971
1972 itt_addr_field = dev->itt_addr >> 8;
1973 next_offset = compute_next_devid_offset(&its->device_list, dev);
1974 val = (1ULL << KVM_ITS_DTE_VALID_SHIFT |
1975 ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) |
1976 (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) |
1977 (dev->num_eventid_bits - 1));
1978 val = cpu_to_le64(val);
1979 return kvm_write_guest(kvm, ptr, &val, dte_esz);
1980}
1981
1982/**
1983 * vgic_its_restore_dte - restore a device table entry
1984 *
1985 * @its: its handle
1986 * @id: device id the DTE corresponds to
1987 * @ptr: kernel VA where the 8 byte DTE is located
1988 * @opaque: unused
1989 *
1990 * Return: < 0 on error, 0 if the dte is the last one, id offset to the
1991 * next dte otherwise
1992 */
1993static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
1994 void *ptr, void *opaque)
1995{
1996 struct its_device *dev;
1997 gpa_t itt_addr;
1998 u8 num_eventid_bits;
1999 u64 entry = *(u64 *)ptr;
2000 bool valid;
2001 u32 offset;
2002 int ret;
2003
2004 entry = le64_to_cpu(entry);
2005
2006 valid = entry >> KVM_ITS_DTE_VALID_SHIFT;
2007 num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1;
2008 itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK)
2009 >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8;
2010
2011 if (!valid)
2012 return 1;
2013
2014 /* dte entry is valid */
2015 offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT;
2016
2017 dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits);
2018 if (IS_ERR(dev))
2019 return PTR_ERR(dev);
2020
2021 ret = vgic_its_restore_itt(its, dev);
2022 if (ret) {
2023 vgic_its_free_device(its->dev->kvm, dev);
2024 return ret;
2025 }
2026
2027 return offset;
2028}
2029
2030static int vgic_its_device_cmp(void *priv, struct list_head *a,
2031 struct list_head *b)
2032{
2033 struct its_device *deva = container_of(a, struct its_device, dev_list);
2034 struct its_device *devb = container_of(b, struct its_device, dev_list);
2035
2036 if (deva->device_id < devb->device_id)
2037 return -1;
2038 else
2039 return 1;
2040}
2041
2042/**
2043 * vgic_its_save_device_tables - Save the device table and all ITT
2044 * into guest RAM
2045 *
2046 * L1/L2 handling is hidden by vgic_its_check_id() helper which directly
2047 * returns the GPA of the device entry
2048 */
2049static int vgic_its_save_device_tables(struct vgic_its *its)
2050{
2051 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2052 struct its_device *dev;
2053 int dte_esz = abi->dte_esz;
2054 u64 baser;
2055
2056 baser = its->baser_device_table;
2057
2058 list_sort(NULL, &its->device_list, vgic_its_device_cmp);
2059
2060 list_for_each_entry(dev, &its->device_list, dev_list) {
2061 int ret;
2062 gpa_t eaddr;
2063
2064 if (!vgic_its_check_id(its, baser,
2065 dev->device_id, &eaddr))
2066 return -EINVAL;
2067
2068 ret = vgic_its_save_itt(its, dev);
2069 if (ret)
2070 return ret;
2071
2072 ret = vgic_its_save_dte(its, dev, eaddr, dte_esz);
2073 if (ret)
2074 return ret;
2075 }
2076 return 0;
2077}
2078
2079/**
2080 * handle_l1_dte - callback used for L1 device table entries (2 stage case)
2081 *
2082 * @its: its handle
2083 * @id: index of the entry in the L1 table
2084 * @addr: kernel VA
2085 * @opaque: unused
2086 *
2087 * L1 table entries are scanned by steps of 1 entry
2088 * Return < 0 if error, 0 if last dte was found when scanning the L2
2089 * table, +1 otherwise (meaning next L1 entry must be scanned)
2090 */
2091static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr,
2092 void *opaque)
2093{
2094 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2095 int l2_start_id = id * (SZ_64K / abi->dte_esz);
2096 u64 entry = *(u64 *)addr;
2097 int dte_esz = abi->dte_esz;
2098 gpa_t gpa;
2099 int ret;
2100
2101 entry = le64_to_cpu(entry);
2102
2103 if (!(entry & KVM_ITS_L1E_VALID_MASK))
2104 return 1;
2105
2106 gpa = entry & KVM_ITS_L1E_ADDR_MASK;
2107
2108 ret = scan_its_table(its, gpa, SZ_64K, dte_esz,
2109 l2_start_id, vgic_its_restore_dte, NULL);
2110
2111 if (ret <= 0)
2112 return ret;
2113
2114 return 1;
2115}
2116
2117/**
2118 * vgic_its_restore_device_tables - Restore the device table and all ITT
2119 * from guest RAM to internal data structs
2120 */
2121static int vgic_its_restore_device_tables(struct vgic_its *its)
2122{
2123 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2124 u64 baser = its->baser_device_table;
2125 int l1_esz, ret;
2126 int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
2127 gpa_t l1_gpa;
2128
2129 if (!(baser & GITS_BASER_VALID))
2130 return 0;
2131
2132 l1_gpa = BASER_ADDRESS(baser);
2133
2134 if (baser & GITS_BASER_INDIRECT) {
2135 l1_esz = GITS_LVL1_ENTRY_SIZE;
2136 ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
2137 handle_l1_dte, NULL);
2138 } else {
2139 l1_esz = abi->dte_esz;
2140 ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0,
2141 vgic_its_restore_dte, NULL);
2142 }
2143
2144 if (ret > 0)
2145 ret = -EINVAL;
2146
2147 return ret;
2148}
2149
2150static int vgic_its_save_cte(struct vgic_its *its,
2151 struct its_collection *collection,
2152 gpa_t gpa, int esz)
2153{
2154 u64 val;
2155
2156 val = (1ULL << KVM_ITS_CTE_VALID_SHIFT |
2157 ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) |
2158 collection->collection_id);
2159 val = cpu_to_le64(val);
2160 return kvm_write_guest(its->dev->kvm, gpa, &val, esz);
2161}
2162
2163static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz)
2164{
2165 struct its_collection *collection;
2166 struct kvm *kvm = its->dev->kvm;
2167 u32 target_addr, coll_id;
2168 u64 val;
2169 int ret;
2170
2171 BUG_ON(esz > sizeof(val));
2172 ret = kvm_read_guest(kvm, gpa, &val, esz);
2173 if (ret)
2174 return ret;
2175 val = le64_to_cpu(val);
2176 if (!(val & KVM_ITS_CTE_VALID_MASK))
2177 return 0;
2178
2179 target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT);
2180 coll_id = val & KVM_ITS_CTE_ICID_MASK;
2181
2182 if (target_addr >= atomic_read(&kvm->online_vcpus))
2183 return -EINVAL;
2184
2185 collection = find_collection(its, coll_id);
2186 if (collection)
2187 return -EEXIST;
2188 ret = vgic_its_alloc_collection(its, &collection, coll_id);
2189 if (ret)
2190 return ret;
2191 collection->target_addr = target_addr;
2192 return 1;
2193}
2194
2195/**
2196 * vgic_its_save_collection_table - Save the collection table into
2197 * guest RAM
2198 */
2199static int vgic_its_save_collection_table(struct vgic_its *its)
2200{
2201 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2202 struct its_collection *collection;
2203 u64 val;
2204 gpa_t gpa;
2205 size_t max_size, filled = 0;
2206 int ret, cte_esz = abi->cte_esz;
2207
2208 gpa = BASER_ADDRESS(its->baser_coll_table);
2209 if (!gpa)
2210 return 0;
2211
2212 max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
2213
2214 list_for_each_entry(collection, &its->collection_list, coll_list) {
2215 ret = vgic_its_save_cte(its, collection, gpa, cte_esz);
2216 if (ret)
2217 return ret;
2218 gpa += cte_esz;
2219 filled += cte_esz;
2220 }
2221
2222 if (filled == max_size)
2223 return 0;
2224
2225 /*
2226 * table is not fully filled, add a last dummy element
2227 * with valid bit unset
2228 */
2229 val = 0;
2230 BUG_ON(cte_esz > sizeof(val));
2231 ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz);
2232 return ret;
2233}
2234
2235/**
2236 * vgic_its_restore_collection_table - reads the collection table
2237 * in guest memory and restores the ITS internal state. Requires the
2238 * BASER registers to be restored before.
2239 */
2240static int vgic_its_restore_collection_table(struct vgic_its *its)
2241{
2242 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2243 int cte_esz = abi->cte_esz;
2244 size_t max_size, read = 0;
2245 gpa_t gpa;
2246 int ret;
2247
2248 if (!(its->baser_coll_table & GITS_BASER_VALID))
2249 return 0;
2250
2251 gpa = BASER_ADDRESS(its->baser_coll_table);
2252
2253 max_size = GITS_BASER_NR_PAGES(its->baser_coll_table) * SZ_64K;
2254
2255 while (read < max_size) {
2256 ret = vgic_its_restore_cte(its, gpa, cte_esz);
2257 if (ret <= 0)
2258 break;
2259 gpa += cte_esz;
2260 read += cte_esz;
2261 }
2262 return ret;
2263}
2264
2265/**
2266 * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM
2267 * according to v0 ABI
2268 */
2269static int vgic_its_save_tables_v0(struct vgic_its *its)
2270{
2271 struct kvm *kvm = its->dev->kvm;
2272 int ret;
2273
2274 mutex_lock(&kvm->lock);
2275 mutex_lock(&its->its_lock);
2276
2277 if (!lock_all_vcpus(kvm)) {
2278 mutex_unlock(&its->its_lock);
2279 mutex_unlock(&kvm->lock);
2280 return -EBUSY;
2281 }
2282
2283 ret = vgic_its_save_device_tables(its);
2284 if (ret)
2285 goto out;
2286
2287 ret = vgic_its_save_collection_table(its);
2288
2289out:
2290 unlock_all_vcpus(kvm);
2291 mutex_unlock(&its->its_lock);
2292 mutex_unlock(&kvm->lock);
2293 return ret;
2294}
2295
2296/**
2297 * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
2298 * to internal data structs according to V0 ABI
2299 *
2300 */
2301static int vgic_its_restore_tables_v0(struct vgic_its *its)
2302{
2303 struct kvm *kvm = its->dev->kvm;
2304 int ret;
2305
2306 mutex_lock(&kvm->lock);
2307 mutex_lock(&its->its_lock);
2308
2309 if (!lock_all_vcpus(kvm)) {
2310 mutex_unlock(&its->its_lock);
2311 mutex_unlock(&kvm->lock);
2312 return -EBUSY;
2313 }
2314
2315 ret = vgic_its_restore_collection_table(its);
2316 if (ret)
2317 goto out;
2318
2319 ret = vgic_its_restore_device_tables(its);
2320out:
2321 unlock_all_vcpus(kvm);
2322 mutex_unlock(&its->its_lock);
2323 mutex_unlock(&kvm->lock);
2324
2325 return ret;
2326}
2327
2328static int vgic_its_commit_v0(struct vgic_its *its)
2329{
2330 const struct vgic_its_abi *abi;
2331
2332 abi = vgic_its_get_abi(its);
2333 its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
2334 its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK;
2335
2336 its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5)
2337 << GITS_BASER_ENTRY_SIZE_SHIFT);
2338
2339 its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5)
2340 << GITS_BASER_ENTRY_SIZE_SHIFT);
2341 return 0;
2342}
2343
1469static int vgic_its_has_attr(struct kvm_device *dev, 2344static int vgic_its_has_attr(struct kvm_device *dev,
1470 struct kvm_device_attr *attr) 2345 struct kvm_device_attr *attr)
1471{ 2346{
@@ -1480,8 +2355,14 @@ static int vgic_its_has_attr(struct kvm_device *dev,
1480 switch (attr->attr) { 2355 switch (attr->attr) {
1481 case KVM_DEV_ARM_VGIC_CTRL_INIT: 2356 case KVM_DEV_ARM_VGIC_CTRL_INIT:
1482 return 0; 2357 return 0;
2358 case KVM_DEV_ARM_ITS_SAVE_TABLES:
2359 return 0;
2360 case KVM_DEV_ARM_ITS_RESTORE_TABLES:
2361 return 0;
1483 } 2362 }
1484 break; 2363 break;
2364 case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
2365 return vgic_its_has_attr_regs(dev, attr);
1485 } 2366 }
1486 return -ENXIO; 2367 return -ENXIO;
1487} 2368}
@@ -1509,18 +2390,30 @@ static int vgic_its_set_attr(struct kvm_device *dev,
1509 if (ret) 2390 if (ret)
1510 return ret; 2391 return ret;
1511 2392
1512 its->vgic_its_base = addr; 2393 return vgic_register_its_iodev(dev->kvm, its, addr);
1513
1514 return 0;
1515 } 2394 }
1516 case KVM_DEV_ARM_VGIC_GRP_CTRL: 2395 case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2396 const struct vgic_its_abi *abi = vgic_its_get_abi(its);
2397
1517 switch (attr->attr) { 2398 switch (attr->attr) {
1518 case KVM_DEV_ARM_VGIC_CTRL_INIT: 2399 case KVM_DEV_ARM_VGIC_CTRL_INIT:
1519 its->initialized = true; 2400 /* Nothing to do */
1520
1521 return 0; 2401 return 0;
2402 case KVM_DEV_ARM_ITS_SAVE_TABLES:
2403 return abi->save_tables(its);
2404 case KVM_DEV_ARM_ITS_RESTORE_TABLES:
2405 return abi->restore_tables(its);
1522 } 2406 }
1523 break; 2407 }
2408 case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
2409 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2410 u64 reg;
2411
2412 if (get_user(reg, uaddr))
2413 return -EFAULT;
2414
2415 return vgic_its_attr_regs_access(dev, attr, &reg, true);
2416 }
1524 } 2417 }
1525 return -ENXIO; 2418 return -ENXIO;
1526} 2419}
@@ -1541,10 +2434,20 @@ static int vgic_its_get_attr(struct kvm_device *dev,
1541 if (copy_to_user(uaddr, &addr, sizeof(addr))) 2434 if (copy_to_user(uaddr, &addr, sizeof(addr)))
1542 return -EFAULT; 2435 return -EFAULT;
1543 break; 2436 break;
2437 }
2438 case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
2439 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2440 u64 reg;
2441 int ret;
2442
2443 ret = vgic_its_attr_regs_access(dev, attr, &reg, false);
2444 if (ret)
2445 return ret;
2446 return put_user(reg, uaddr);
2447 }
1544 default: 2448 default:
1545 return -ENXIO; 2449 return -ENXIO;
1546 } 2450 }
1547 }
1548 2451
1549 return 0; 2452 return 0;
1550} 2453}
@@ -1563,30 +2466,3 @@ int kvm_vgic_register_its_device(void)
1563 return kvm_register_device_ops(&kvm_arm_vgic_its_ops, 2466 return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
1564 KVM_DEV_TYPE_ARM_VGIC_ITS); 2467 KVM_DEV_TYPE_ARM_VGIC_ITS);
1565} 2468}
1566
1567/*
1568 * Registers all ITSes with the kvm_io_bus framework.
1569 * To follow the existing VGIC initialization sequence, this has to be
1570 * done as late as possible, just before the first VCPU runs.
1571 */
1572int vgic_register_its_iodevs(struct kvm *kvm)
1573{
1574 struct kvm_device *dev;
1575 int ret = 0;
1576
1577 list_for_each_entry(dev, &kvm->devices, vm_node) {
1578 if (dev->ops != &kvm_arm_vgic_its_ops)
1579 continue;
1580
1581 ret = vgic_register_its_iodev(kvm, dev->private);
1582 if (ret)
1583 return ret;
1584 /*
1585 * We don't need to care about tearing down previously
1586 * registered ITSes, as the kvm_io_bus framework removes
1587 * them for us if the VM gets destroyed.
1588 */
1589 }
1590
1591 return ret;
1592}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
index d181d2baee9c..10ae6f394b71 100644
--- a/virt/kvm/arm/vgic/vgic-kvm-device.c
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -37,6 +37,14 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
37 return 0; 37 return 0;
38} 38}
39 39
40static int vgic_check_type(struct kvm *kvm, int type_needed)
41{
42 if (kvm->arch.vgic.vgic_model != type_needed)
43 return -ENODEV;
44 else
45 return 0;
46}
47
40/** 48/**
41 * kvm_vgic_addr - set or get vgic VM base addresses 49 * kvm_vgic_addr - set or get vgic VM base addresses
42 * @kvm: pointer to the vm struct 50 * @kvm: pointer to the vm struct
@@ -57,40 +65,41 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
57{ 65{
58 int r = 0; 66 int r = 0;
59 struct vgic_dist *vgic = &kvm->arch.vgic; 67 struct vgic_dist *vgic = &kvm->arch.vgic;
60 int type_needed;
61 phys_addr_t *addr_ptr, alignment; 68 phys_addr_t *addr_ptr, alignment;
62 69
63 mutex_lock(&kvm->lock); 70 mutex_lock(&kvm->lock);
64 switch (type) { 71 switch (type) {
65 case KVM_VGIC_V2_ADDR_TYPE_DIST: 72 case KVM_VGIC_V2_ADDR_TYPE_DIST:
66 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; 73 r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
67 addr_ptr = &vgic->vgic_dist_base; 74 addr_ptr = &vgic->vgic_dist_base;
68 alignment = SZ_4K; 75 alignment = SZ_4K;
69 break; 76 break;
70 case KVM_VGIC_V2_ADDR_TYPE_CPU: 77 case KVM_VGIC_V2_ADDR_TYPE_CPU:
71 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; 78 r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
72 addr_ptr = &vgic->vgic_cpu_base; 79 addr_ptr = &vgic->vgic_cpu_base;
73 alignment = SZ_4K; 80 alignment = SZ_4K;
74 break; 81 break;
75 case KVM_VGIC_V3_ADDR_TYPE_DIST: 82 case KVM_VGIC_V3_ADDR_TYPE_DIST:
76 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; 83 r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
77 addr_ptr = &vgic->vgic_dist_base; 84 addr_ptr = &vgic->vgic_dist_base;
78 alignment = SZ_64K; 85 alignment = SZ_64K;
79 break; 86 break;
80 case KVM_VGIC_V3_ADDR_TYPE_REDIST: 87 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
81 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; 88 r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
89 if (r)
90 break;
91 if (write) {
92 r = vgic_v3_set_redist_base(kvm, *addr);
93 goto out;
94 }
82 addr_ptr = &vgic->vgic_redist_base; 95 addr_ptr = &vgic->vgic_redist_base;
83 alignment = SZ_64K;
84 break; 96 break;
85 default: 97 default:
86 r = -ENODEV; 98 r = -ENODEV;
87 goto out;
88 } 99 }
89 100
90 if (vgic->vgic_model != type_needed) { 101 if (r)
91 r = -ENODEV;
92 goto out; 102 goto out;
93 }
94 103
95 if (write) { 104 if (write) {
96 r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); 105 r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
@@ -259,13 +268,13 @@ static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
259 } 268 }
260} 269}
261 270
262static void unlock_all_vcpus(struct kvm *kvm) 271void unlock_all_vcpus(struct kvm *kvm)
263{ 272{
264 unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); 273 unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
265} 274}
266 275
267/* Returns true if all vcpus were locked, false otherwise */ 276/* Returns true if all vcpus were locked, false otherwise */
268static bool lock_all_vcpus(struct kvm *kvm) 277bool lock_all_vcpus(struct kvm *kvm)
269{ 278{
270 struct kvm_vcpu *tmp_vcpu; 279 struct kvm_vcpu *tmp_vcpu;
271 int c; 280 int c;
@@ -580,6 +589,24 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
580 reg = tmp32; 589 reg = tmp32;
581 return vgic_v3_attr_regs_access(dev, attr, &reg, true); 590 return vgic_v3_attr_regs_access(dev, attr, &reg, true);
582 } 591 }
592 case KVM_DEV_ARM_VGIC_GRP_CTRL: {
593 int ret;
594
595 switch (attr->attr) {
596 case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
597 mutex_lock(&dev->kvm->lock);
598
599 if (!lock_all_vcpus(dev->kvm)) {
600 mutex_unlock(&dev->kvm->lock);
601 return -EBUSY;
602 }
603 ret = vgic_v3_save_pending_tables(dev->kvm);
604 unlock_all_vcpus(dev->kvm);
605 mutex_unlock(&dev->kvm->lock);
606 return ret;
607 }
608 break;
609 }
583 } 610 }
584 return -ENXIO; 611 return -ENXIO;
585} 612}
@@ -658,6 +685,8 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
658 switch (attr->attr) { 685 switch (attr->attr) {
659 case KVM_DEV_ARM_VGIC_CTRL_INIT: 686 case KVM_DEV_ARM_VGIC_CTRL_INIT:
660 return 0; 687 return 0;
688 case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES:
689 return 0;
661 } 690 }
662 } 691 }
663 return -ENXIO; 692 return -ENXIO;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 6afb3b484886..99da1a207c19 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -556,67 +556,130 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
556 return SZ_64K; 556 return SZ_64K;
557} 557}
558 558
559int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) 559/**
560 * vgic_register_redist_iodev - register a single redist iodev
561 * @vcpu: The VCPU to which the redistributor belongs
562 *
563 * Register a KVM iodev for this VCPU's redistributor using the address
564 * provided.
565 *
566 * Return 0 on success, -ERRNO otherwise.
567 */
568int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
569{
570 struct kvm *kvm = vcpu->kvm;
571 struct vgic_dist *vgic = &kvm->arch.vgic;
572 struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
573 struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
574 gpa_t rd_base, sgi_base;
575 int ret;
576
577 /*
578 * We may be creating VCPUs before having set the base address for the
579 * redistributor region, in which case we will come back to this
580 * function for all VCPUs when the base address is set. Just return
581 * without doing any work for now.
582 */
583 if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base))
584 return 0;
585
586 if (!vgic_v3_check_base(kvm))
587 return -EINVAL;
588
589 rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2;
590 sgi_base = rd_base + SZ_64K;
591
592 kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
593 rd_dev->base_addr = rd_base;
594 rd_dev->iodev_type = IODEV_REDIST;
595 rd_dev->regions = vgic_v3_rdbase_registers;
596 rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
597 rd_dev->redist_vcpu = vcpu;
598
599 mutex_lock(&kvm->slots_lock);
600 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
601 SZ_64K, &rd_dev->dev);
602 mutex_unlock(&kvm->slots_lock);
603
604 if (ret)
605 return ret;
606
607 kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
608 sgi_dev->base_addr = sgi_base;
609 sgi_dev->iodev_type = IODEV_REDIST;
610 sgi_dev->regions = vgic_v3_sgibase_registers;
611 sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
612 sgi_dev->redist_vcpu = vcpu;
613
614 mutex_lock(&kvm->slots_lock);
615 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
616 SZ_64K, &sgi_dev->dev);
617 mutex_unlock(&kvm->slots_lock);
618 if (ret)
619 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
620 &rd_dev->dev);
621
622 return ret;
623}
624
625static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
626{
627 struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
628 struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
629
630 kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
631 kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
632}
633
634static int vgic_register_all_redist_iodevs(struct kvm *kvm)
560{ 635{
561 struct kvm_vcpu *vcpu; 636 struct kvm_vcpu *vcpu;
562 int c, ret = 0; 637 int c, ret = 0;
563 638
564 kvm_for_each_vcpu(c, vcpu, kvm) { 639 kvm_for_each_vcpu(c, vcpu, kvm) {
565 gpa_t rd_base = redist_base_address + c * SZ_64K * 2; 640 ret = vgic_register_redist_iodev(vcpu);
566 gpa_t sgi_base = rd_base + SZ_64K;
567 struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
568 struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
569
570 kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
571 rd_dev->base_addr = rd_base;
572 rd_dev->iodev_type = IODEV_REDIST;
573 rd_dev->regions = vgic_v3_rdbase_registers;
574 rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
575 rd_dev->redist_vcpu = vcpu;
576
577 mutex_lock(&kvm->slots_lock);
578 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
579 SZ_64K, &rd_dev->dev);
580 mutex_unlock(&kvm->slots_lock);
581
582 if (ret) 641 if (ret)
583 break; 642 break;
584
585 kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
586 sgi_dev->base_addr = sgi_base;
587 sgi_dev->iodev_type = IODEV_REDIST;
588 sgi_dev->regions = vgic_v3_sgibase_registers;
589 sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
590 sgi_dev->redist_vcpu = vcpu;
591
592 mutex_lock(&kvm->slots_lock);
593 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
594 SZ_64K, &sgi_dev->dev);
595 mutex_unlock(&kvm->slots_lock);
596 if (ret) {
597 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
598 &rd_dev->dev);
599 break;
600 }
601 } 643 }
602 644
603 if (ret) { 645 if (ret) {
604 /* The current c failed, so we start with the previous one. */ 646 /* The current c failed, so we start with the previous one. */
605 for (c--; c >= 0; c--) { 647 for (c--; c >= 0; c--) {
606 struct vgic_cpu *vgic_cpu;
607
608 vcpu = kvm_get_vcpu(kvm, c); 648 vcpu = kvm_get_vcpu(kvm, c);
609 vgic_cpu = &vcpu->arch.vgic_cpu; 649 vgic_unregister_redist_iodev(vcpu);
610 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
611 &vgic_cpu->rd_iodev.dev);
612 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
613 &vgic_cpu->sgi_iodev.dev);
614 } 650 }
615 } 651 }
616 652
617 return ret; 653 return ret;
618} 654}
619 655
656int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr)
657{
658 struct vgic_dist *vgic = &kvm->arch.vgic;
659 int ret;
660
661 /* vgic_check_ioaddr makes sure we don't do this twice */
662 ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K);
663 if (ret)
664 return ret;
665
666 vgic->vgic_redist_base = addr;
667 if (!vgic_v3_check_base(kvm)) {
668 vgic->vgic_redist_base = VGIC_ADDR_UNDEF;
669 return -EINVAL;
670 }
671
672 /*
673 * Register iodevs for each existing VCPU. Adding more VCPUs
674 * afterwards will register the iodevs when needed.
675 */
676 ret = vgic_register_all_redist_iodevs(kvm);
677 if (ret)
678 return ret;
679
680 return 0;
681}
682
620int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) 683int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
621{ 684{
622 const struct vgic_register_region *region; 685 const struct vgic_register_region *region;
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 2a5db1352722..1c17b2a2f105 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -446,13 +446,12 @@ static int match_region(const void *key, const void *elt)
446 return 0; 446 return 0;
447} 447}
448 448
449/* Find the proper register handler entry given a certain address offset. */ 449const struct vgic_register_region *
450static const struct vgic_register_region * 450vgic_find_mmio_region(const struct vgic_register_region *regions,
451vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, 451 int nr_regions, unsigned int offset)
452 unsigned int offset)
453{ 452{
454 return bsearch((void *)(uintptr_t)offset, region, nr_regions, 453 return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
455 sizeof(region[0]), match_region); 454 sizeof(regions[0]), match_region);
456} 455}
457 456
458void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) 457void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 98bb566b660a..ea4171acdef3 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -36,8 +36,13 @@ struct vgic_register_region {
36 }; 36 };
37 unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, 37 unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
38 unsigned int len); 38 unsigned int len);
39 void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, 39 union {
40 unsigned int len, unsigned long val); 40 void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
41 unsigned int len, unsigned long val);
42 int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
43 gpa_t addr, unsigned int len,
44 unsigned long val);
45 };
41}; 46};
42 47
43extern struct kvm_io_device_ops kvm_io_gic_ops; 48extern struct kvm_io_device_ops kvm_io_gic_ops;
@@ -192,4 +197,9 @@ u64 vgic_sanitise_shareability(u64 reg);
192u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, 197u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
193 u64 (*sanitise_fn)(u64)); 198 u64 (*sanitise_fn)(u64));
194 199
200/* Find the proper register handler entry given a certain address offset */
201const struct vgic_register_region *
202vgic_find_mmio_region(const struct vgic_register_region *regions,
203 int nr_regions, unsigned int offset);
204
195#endif 205#endif
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index df1503650300..8fa737edde6f 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -234,19 +234,125 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
234 vgic_v3->vgic_hcr = ICH_HCR_EN; 234 vgic_v3->vgic_hcr = ICH_HCR_EN;
235} 235}
236 236
237/* check for overlapping regions and for regions crossing the end of memory */ 237int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
238static bool vgic_v3_check_base(struct kvm *kvm) 238{
239 struct kvm_vcpu *vcpu;
240 int byte_offset, bit_nr;
241 gpa_t pendbase, ptr;
242 bool status;
243 u8 val;
244 int ret;
245
246retry:
247 vcpu = irq->target_vcpu;
248 if (!vcpu)
249 return 0;
250
251 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
252
253 byte_offset = irq->intid / BITS_PER_BYTE;
254 bit_nr = irq->intid % BITS_PER_BYTE;
255 ptr = pendbase + byte_offset;
256
257 ret = kvm_read_guest(kvm, ptr, &val, 1);
258 if (ret)
259 return ret;
260
261 status = val & (1 << bit_nr);
262
263 spin_lock(&irq->irq_lock);
264 if (irq->target_vcpu != vcpu) {
265 spin_unlock(&irq->irq_lock);
266 goto retry;
267 }
268 irq->pending_latch = status;
269 vgic_queue_irq_unlock(vcpu->kvm, irq);
270
271 if (status) {
272 /* clear consumed data */
273 val &= ~(1 << bit_nr);
274 ret = kvm_write_guest(kvm, ptr, &val, 1);
275 if (ret)
276 return ret;
277 }
278 return 0;
279}
280
281/**
282 * vgic_its_save_pending_tables - Save the pending tables into guest RAM
283 * kvm lock and all vcpu lock must be held
284 */
285int vgic_v3_save_pending_tables(struct kvm *kvm)
286{
287 struct vgic_dist *dist = &kvm->arch.vgic;
288 int last_byte_offset = -1;
289 struct vgic_irq *irq;
290 int ret;
291
292 list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
293 int byte_offset, bit_nr;
294 struct kvm_vcpu *vcpu;
295 gpa_t pendbase, ptr;
296 bool stored;
297 u8 val;
298
299 vcpu = irq->target_vcpu;
300 if (!vcpu)
301 continue;
302
303 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
304
305 byte_offset = irq->intid / BITS_PER_BYTE;
306 bit_nr = irq->intid % BITS_PER_BYTE;
307 ptr = pendbase + byte_offset;
308
309 if (byte_offset != last_byte_offset) {
310 ret = kvm_read_guest(kvm, ptr, &val, 1);
311 if (ret)
312 return ret;
313 last_byte_offset = byte_offset;
314 }
315
316 stored = val & (1U << bit_nr);
317 if (stored == irq->pending_latch)
318 continue;
319
320 if (irq->pending_latch)
321 val |= 1 << bit_nr;
322 else
323 val &= ~(1 << bit_nr);
324
325 ret = kvm_write_guest(kvm, ptr, &val, 1);
326 if (ret)
327 return ret;
328 }
329 return 0;
330}
331
332/*
333 * Check for overlapping regions and for regions crossing the end of memory
334 * for base addresses which have already been set.
335 */
336bool vgic_v3_check_base(struct kvm *kvm)
239{ 337{
240 struct vgic_dist *d = &kvm->arch.vgic; 338 struct vgic_dist *d = &kvm->arch.vgic;
241 gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; 339 gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
242 340
243 redist_size *= atomic_read(&kvm->online_vcpus); 341 redist_size *= atomic_read(&kvm->online_vcpus);
244 342
245 if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) 343 if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
344 d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
246 return false; 345 return false;
247 if (d->vgic_redist_base + redist_size < d->vgic_redist_base) 346
347 if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) &&
348 d->vgic_redist_base + redist_size < d->vgic_redist_base)
248 return false; 349 return false;
249 350
351 /* Both base addresses must be set to check if they overlap */
352 if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) ||
353 IS_VGIC_ADDR_UNDEF(d->vgic_redist_base))
354 return true;
355
250 if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) 356 if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
251 return true; 357 return true;
252 if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) 358 if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
@@ -291,20 +397,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
291 goto out; 397 goto out;
292 } 398 }
293 399
294 ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
295 if (ret) {
296 kvm_err("Unable to register VGICv3 redist MMIO regions\n");
297 goto out;
298 }
299
300 if (vgic_has_its(kvm)) {
301 ret = vgic_register_its_iodevs(kvm);
302 if (ret) {
303 kvm_err("Unable to register VGIC ITS MMIO regions\n");
304 goto out;
305 }
306 }
307
308 dist->ready = true; 400 dist->ready = true;
309 401
310out: 402out:
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 4346bc7d08dc..83b24d20ff8f 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -21,7 +21,7 @@
21#include "vgic.h" 21#include "vgic.h"
22 22
23#define CREATE_TRACE_POINTS 23#define CREATE_TRACE_POINTS
24#include "../trace.h" 24#include "trace.h"
25 25
26#ifdef CONFIG_DEBUG_SPINLOCK 26#ifdef CONFIG_DEBUG_SPINLOCK
27#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) 27#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 799fd651b260..da83e4caa272 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -73,6 +73,29 @@
73 KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ 73 KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
74 KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) 74 KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
75 75
76/*
77 * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt,
78 * below macros are defined for ITS table entry encoding.
79 */
80#define KVM_ITS_CTE_VALID_SHIFT 63
81#define KVM_ITS_CTE_VALID_MASK BIT_ULL(63)
82#define KVM_ITS_CTE_RDBASE_SHIFT 16
83#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0)
84#define KVM_ITS_ITE_NEXT_SHIFT 48
85#define KVM_ITS_ITE_PINTID_SHIFT 16
86#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16)
87#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0)
88#define KVM_ITS_DTE_VALID_SHIFT 63
89#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63)
90#define KVM_ITS_DTE_NEXT_SHIFT 49
91#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49)
92#define KVM_ITS_DTE_ITTADDR_SHIFT 5
93#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5)
94#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0)
95#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63)
96/* we only support 64 kB translation table page size */
97#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
98
76static inline bool irq_is_pending(struct vgic_irq *irq) 99static inline bool irq_is_pending(struct vgic_irq *irq)
77{ 100{
78 if (irq->config == VGIC_CONFIG_EDGE) 101 if (irq->config == VGIC_CONFIG_EDGE)
@@ -157,12 +180,15 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
157void vgic_v3_enable(struct kvm_vcpu *vcpu); 180void vgic_v3_enable(struct kvm_vcpu *vcpu);
158int vgic_v3_probe(const struct gic_kvm_info *info); 181int vgic_v3_probe(const struct gic_kvm_info *info);
159int vgic_v3_map_resources(struct kvm *kvm); 182int vgic_v3_map_resources(struct kvm *kvm);
160int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); 183int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
184int vgic_v3_save_pending_tables(struct kvm *kvm);
185int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr);
186int vgic_register_redist_iodev(struct kvm_vcpu *vcpu);
187bool vgic_v3_check_base(struct kvm *kvm);
161 188
162void vgic_v3_load(struct kvm_vcpu *vcpu); 189void vgic_v3_load(struct kvm_vcpu *vcpu);
163void vgic_v3_put(struct kvm_vcpu *vcpu); 190void vgic_v3_put(struct kvm_vcpu *vcpu);
164 191
165int vgic_register_its_iodevs(struct kvm *kvm);
166bool vgic_has_its(struct kvm *kvm); 192bool vgic_has_its(struct kvm *kvm);
167int kvm_vgic_register_its_device(void); 193int kvm_vgic_register_its_device(void);
168void vgic_enable_lpis(struct kvm_vcpu *vcpu); 194void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -187,4 +213,7 @@ int vgic_init(struct kvm *kvm);
187int vgic_debug_init(struct kvm *kvm); 213int vgic_debug_init(struct kvm *kvm);
188int vgic_debug_destroy(struct kvm *kvm); 214int vgic_debug_destroy(struct kvm *kvm);
189 215
216bool lock_all_vcpus(struct kvm *kvm);
217void unlock_all_vcpus(struct kvm *kvm);
218
190#endif 219#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3d151ee2a67..f0fe9d02f6bb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2836,10 +2836,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2836 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, 2836 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2837 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, 2837 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2838#endif 2838#endif
2839
2840#ifdef CONFIG_KVM_XICS
2841 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2842#endif
2843}; 2839};
2844 2840
2845int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) 2841int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
@@ -3715,7 +3711,7 @@ static const struct file_operations vm_stat_get_per_vm_fops = {
3715 .release = kvm_debugfs_release, 3711 .release = kvm_debugfs_release,
3716 .read = simple_attr_read, 3712 .read = simple_attr_read,
3717 .write = simple_attr_write, 3713 .write = simple_attr_write,
3718 .llseek = generic_file_llseek, 3714 .llseek = no_llseek,
3719}; 3715};
3720 3716
3721static int vcpu_stat_get_per_vm(void *data, u64 *val) 3717static int vcpu_stat_get_per_vm(void *data, u64 *val)
@@ -3760,7 +3756,7 @@ static const struct file_operations vcpu_stat_get_per_vm_fops = {
3760 .release = kvm_debugfs_release, 3756 .release = kvm_debugfs_release,
3761 .read = simple_attr_read, 3757 .read = simple_attr_read,
3762 .write = simple_attr_write, 3758 .write = simple_attr_write,
3763 .llseek = generic_file_llseek, 3759 .llseek = no_llseek,
3764}; 3760};
3765 3761
3766static const struct file_operations *stat_fops_per_vm[] = { 3762static const struct file_operations *stat_fops_per_vm[] = {