aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig13
-rw-r--r--virt/kvm/arm/arch_timer.c78
-rw-r--r--virt/kvm/arm/vgic-v2-emul.c856
-rw-r--r--virt/kvm/arm/vgic-v2.c259
-rw-r--r--virt/kvm/arm/vgic-v3-emul.c1030
-rw-r--r--virt/kvm/arm/vgic-v3.c287
-rw-r--r--virt/kvm/arm/vgic.c2216
-rw-r--r--virt/kvm/arm/vgic.h140
-rw-r--r--virt/kvm/assigned-dev.c1024
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/coalesced_mmio.c7
-rw-r--r--virt/kvm/eventfd.c138
-rw-r--r--virt/kvm/ioapic.c646
-rw-r--r--virt/kvm/ioapic.h102
-rw-r--r--virt/kvm/iodev.h70
-rw-r--r--virt/kvm/iommu.c359
-rw-r--r--virt/kvm/irq_comm.c373
-rw-r--r--virt/kvm/irqchip.c100
-rw-r--r--virt/kvm/kvm_main.c659
-rw-r--r--virt/kvm/vfio.c27
-rw-r--r--virt/kvm/vfio.h17
21 files changed, 4409 insertions, 3996 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19793e3..e2c876d5a03b 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
6config HAVE_KVM_IRQCHIP 6config HAVE_KVM_IRQCHIP
7 bool 7 bool
8 8
9config HAVE_KVM_IRQFD
10 bool
11
9config HAVE_KVM_IRQ_ROUTING 12config HAVE_KVM_IRQ_ROUTING
10 bool 13 bool
11 14
@@ -34,3 +37,13 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
34 37
35config KVM_VFIO 38config KVM_VFIO
36 bool 39 bool
40
41config HAVE_KVM_ARCH_TLB_FLUSH_ALL
42 bool
43
44config KVM_GENERIC_DIRTYLOG_READ_PROTECT
45 bool
46
47config KVM_COMPAT
48 def_bool y
49 depends on COMPAT && !S390
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819a9b6a..98c95f2fcba4 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
61 61
62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) 62static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
63{ 63{
64 int ret;
64 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; 65 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
65 66
66 timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK; 67 timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
67 kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 68 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
68 timer->irq->irq, 69 timer->irq->irq,
69 timer->irq->level); 70 timer->irq->level);
71 WARN_ON(ret);
70} 72}
71 73
72static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 74static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -83,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
83 return IRQ_HANDLED; 85 return IRQ_HANDLED;
84} 86}
85 87
88/*
89 * Work function for handling the backup timer that we schedule when a vcpu is
90 * no longer running, but had a timer programmed to fire in the future.
91 */
86static void kvm_timer_inject_irq_work(struct work_struct *work) 92static void kvm_timer_inject_irq_work(struct work_struct *work)
87{ 93{
88 struct kvm_vcpu *vcpu; 94 struct kvm_vcpu *vcpu;
89 95
90 vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); 96 vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
91 vcpu->arch.timer_cpu.armed = false; 97 vcpu->arch.timer_cpu.armed = false;
92 kvm_timer_inject_irq(vcpu); 98
99 /*
100 * If the vcpu is blocked we want to wake it up so that it will see
101 * the timer has expired when entering the guest.
102 */
103 kvm_vcpu_kick(vcpu);
93} 104}
94 105
95static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) 106static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
@@ -100,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
100 return HRTIMER_NORESTART; 111 return HRTIMER_NORESTART;
101} 112}
102 113
114bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
115{
116 struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
117 cycle_t cval, now;
118
119 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
120 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
121 return false;
122
123 cval = timer->cntv_cval;
124 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
125
126 return cval <= now;
127}
128
103/** 129/**
104 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu 130 * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
105 * @vcpu: The vcpu pointer 131 * @vcpu: The vcpu pointer
@@ -117,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
117 * populate the CPU timer again. 143 * populate the CPU timer again.
118 */ 144 */
119 timer_disarm(timer); 145 timer_disarm(timer);
146
147 /*
148 * If the timer expired while we were not scheduled, now is the time
149 * to inject it.
150 */
151 if (kvm_timer_should_fire(vcpu))
152 kvm_timer_inject_irq(vcpu);
120} 153}
121 154
122/** 155/**
@@ -132,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
132 cycle_t cval, now; 165 cycle_t cval, now;
133 u64 ns; 166 u64 ns;
134 167
135 if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
136 !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
137 return;
138
139 cval = timer->cntv_cval;
140 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
141
142 BUG_ON(timer_is_armed(timer)); 168 BUG_ON(timer_is_armed(timer));
143 169
144 if (cval <= now) { 170 if (kvm_timer_should_fire(vcpu)) {
145 /* 171 /*
146 * Timer has already expired while we were not 172 * Timer has already expired while we were not
147 * looking. Inject the interrupt and carry on. 173 * looking. Inject the interrupt and carry on.
@@ -150,7 +176,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
150 return; 176 return;
151 } 177 }
152 178
153 ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); 179 cval = timer->cntv_cval;
180 now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
181
182 ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
183 &timecounter->frac);
154 timer_arm(timer, ns); 184 timer_arm(timer, ns);
155} 185}
156 186
@@ -307,12 +337,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
307 timer_disarm(timer); 337 timer_disarm(timer);
308} 338}
309 339
310int kvm_timer_init(struct kvm *kvm) 340void kvm_timer_enable(struct kvm *kvm)
311{ 341{
312 if (timecounter && wqueue) { 342 if (kvm->arch.timer.enabled)
313 kvm->arch.timer.cntvoff = kvm_phys_timer_read(); 343 return;
344
345 /*
346 * There is a potential race here between VCPUs starting for the first
347 * time, which may be enabling the timer multiple times. That doesn't
348 * hurt though, because we're just setting a variable to the same
349 * variable that it already was. The important thing is that all
350 * VCPUs have the enabled variable set, before entering the guest, if
351 * the arch timers are enabled.
352 */
353 if (timecounter && wqueue)
314 kvm->arch.timer.enabled = 1; 354 kvm->arch.timer.enabled = 1;
315 } 355}
316 356
317 return 0; 357void kvm_timer_init(struct kvm *kvm)
358{
359 kvm->arch.timer.cntvoff = kvm_phys_timer_read();
318} 360}
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
new file mode 100644
index 000000000000..13907970d11c
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -0,0 +1,856 @@
1/*
2 * Contains GICv2 specific emulation code, was in vgic.c before.
3 *
4 * Copyright (C) 2012 ARM Ltd.
5 * Author: Marc Zyngier <marc.zyngier@arm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/cpu.h>
21#include <linux/kvm.h>
22#include <linux/kvm_host.h>
23#include <linux/interrupt.h>
24#include <linux/io.h>
25#include <linux/uaccess.h>
26
27#include <linux/irqchip/arm-gic.h>
28
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_arm.h>
31#include <asm/kvm_mmu.h>
32
33#include "vgic.h"
34
35#define GICC_ARCH_VERSION_V2 0x2
36
37static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
38static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
39{
40 return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
41}
42
43static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
44 struct kvm_exit_mmio *mmio, phys_addr_t offset)
45{
46 u32 reg;
47 u32 word_offset = offset & 3;
48
49 switch (offset & ~3) {
50 case 0: /* GICD_CTLR */
51 reg = vcpu->kvm->arch.vgic.enabled;
52 vgic_reg_access(mmio, &reg, word_offset,
53 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
54 if (mmio->is_write) {
55 vcpu->kvm->arch.vgic.enabled = reg & 1;
56 vgic_update_state(vcpu->kvm);
57 return true;
58 }
59 break;
60
61 case 4: /* GICD_TYPER */
62 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
63 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
64 vgic_reg_access(mmio, &reg, word_offset,
65 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
66 break;
67
68 case 8: /* GICD_IIDR */
69 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
70 vgic_reg_access(mmio, &reg, word_offset,
71 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
72 break;
73 }
74
75 return false;
76}
77
78static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
79 struct kvm_exit_mmio *mmio,
80 phys_addr_t offset)
81{
82 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
83 vcpu->vcpu_id, ACCESS_WRITE_SETBIT);
84}
85
86static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
87 struct kvm_exit_mmio *mmio,
88 phys_addr_t offset)
89{
90 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
91 vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT);
92}
93
94static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
95 struct kvm_exit_mmio *mmio,
96 phys_addr_t offset)
97{
98 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
99 vcpu->vcpu_id);
100}
101
102static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
103 struct kvm_exit_mmio *mmio,
104 phys_addr_t offset)
105{
106 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
107 vcpu->vcpu_id);
108}
109
110static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
111 struct kvm_exit_mmio *mmio,
112 phys_addr_t offset)
113{
114 return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
115 vcpu->vcpu_id);
116}
117
118static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
119 struct kvm_exit_mmio *mmio,
120 phys_addr_t offset)
121{
122 return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
123 vcpu->vcpu_id);
124}
125
126static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
127 struct kvm_exit_mmio *mmio,
128 phys_addr_t offset)
129{
130 u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
131 vcpu->vcpu_id, offset);
132 vgic_reg_access(mmio, reg, offset,
133 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
134 return false;
135}
136
137#define GICD_ITARGETSR_SIZE 32
138#define GICD_CPUTARGETS_BITS 8
139#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
140static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
141{
142 struct vgic_dist *dist = &kvm->arch.vgic;
143 int i;
144 u32 val = 0;
145
146 irq -= VGIC_NR_PRIVATE_IRQS;
147
148 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
149 val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
150
151 return val;
152}
153
154static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
155{
156 struct vgic_dist *dist = &kvm->arch.vgic;
157 struct kvm_vcpu *vcpu;
158 int i, c;
159 unsigned long *bmap;
160 u32 target;
161
162 irq -= VGIC_NR_PRIVATE_IRQS;
163
164 /*
165 * Pick the LSB in each byte. This ensures we target exactly
166 * one vcpu per IRQ. If the byte is null, assume we target
167 * CPU0.
168 */
169 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
170 int shift = i * GICD_CPUTARGETS_BITS;
171
172 target = ffs((val >> shift) & 0xffU);
173 target = target ? (target - 1) : 0;
174 dist->irq_spi_cpu[irq + i] = target;
175 kvm_for_each_vcpu(c, vcpu, kvm) {
176 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
177 if (c == target)
178 set_bit(irq + i, bmap);
179 else
180 clear_bit(irq + i, bmap);
181 }
182 }
183}
184
185static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
186 struct kvm_exit_mmio *mmio,
187 phys_addr_t offset)
188{
189 u32 reg;
190
191 /* We treat the banked interrupts targets as read-only */
192 if (offset < 32) {
193 u32 roreg;
194
195 roreg = 1 << vcpu->vcpu_id;
196 roreg |= roreg << 8;
197 roreg |= roreg << 16;
198
199 vgic_reg_access(mmio, &roreg, offset,
200 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
201 return false;
202 }
203
204 reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
205 vgic_reg_access(mmio, &reg, offset,
206 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
207 if (mmio->is_write) {
208 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
209 vgic_update_state(vcpu->kvm);
210 return true;
211 }
212
213 return false;
214}
215
216static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
217 struct kvm_exit_mmio *mmio, phys_addr_t offset)
218{
219 u32 *reg;
220
221 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
222 vcpu->vcpu_id, offset >> 1);
223
224 return vgic_handle_cfg_reg(reg, mmio, offset);
225}
226
227static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
228 struct kvm_exit_mmio *mmio, phys_addr_t offset)
229{
230 u32 reg;
231
232 vgic_reg_access(mmio, &reg, offset,
233 ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
234 if (mmio->is_write) {
235 vgic_dispatch_sgi(vcpu, reg);
236 vgic_update_state(vcpu->kvm);
237 return true;
238 }
239
240 return false;
241}
242
243/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
244static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
245 struct kvm_exit_mmio *mmio,
246 phys_addr_t offset)
247{
248 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
249 int sgi;
250 int min_sgi = (offset & ~0x3);
251 int max_sgi = min_sgi + 3;
252 int vcpu_id = vcpu->vcpu_id;
253 u32 reg = 0;
254
255 /* Copy source SGIs from distributor side */
256 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
257 u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi);
258
259 reg |= ((u32)sources) << (8 * (sgi - min_sgi));
260 }
261
262 mmio_data_write(mmio, ~0, reg);
263 return false;
264}
265
266static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
267 struct kvm_exit_mmio *mmio,
268 phys_addr_t offset, bool set)
269{
270 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
271 int sgi;
272 int min_sgi = (offset & ~0x3);
273 int max_sgi = min_sgi + 3;
274 int vcpu_id = vcpu->vcpu_id;
275 u32 reg;
276 bool updated = false;
277
278 reg = mmio_data_read(mmio, ~0);
279
280 /* Clear pending SGIs on the distributor */
281 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
282 u8 mask = reg >> (8 * (sgi - min_sgi));
283 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
284
285 if (set) {
286 if ((*src & mask) != mask)
287 updated = true;
288 *src |= mask;
289 } else {
290 if (*src & mask)
291 updated = true;
292 *src &= ~mask;
293 }
294 }
295
296 if (updated)
297 vgic_update_state(vcpu->kvm);
298
299 return updated;
300}
301
302static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
303 struct kvm_exit_mmio *mmio,
304 phys_addr_t offset)
305{
306 if (!mmio->is_write)
307 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
308 else
309 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
310}
311
312static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
313 struct kvm_exit_mmio *mmio,
314 phys_addr_t offset)
315{
316 if (!mmio->is_write)
317 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
318 else
319 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
320}
321
322static const struct vgic_io_range vgic_dist_ranges[] = {
323 {
324 .base = GIC_DIST_CTRL,
325 .len = 12,
326 .bits_per_irq = 0,
327 .handle_mmio = handle_mmio_misc,
328 },
329 {
330 .base = GIC_DIST_IGROUP,
331 .len = VGIC_MAX_IRQS / 8,
332 .bits_per_irq = 1,
333 .handle_mmio = handle_mmio_raz_wi,
334 },
335 {
336 .base = GIC_DIST_ENABLE_SET,
337 .len = VGIC_MAX_IRQS / 8,
338 .bits_per_irq = 1,
339 .handle_mmio = handle_mmio_set_enable_reg,
340 },
341 {
342 .base = GIC_DIST_ENABLE_CLEAR,
343 .len = VGIC_MAX_IRQS / 8,
344 .bits_per_irq = 1,
345 .handle_mmio = handle_mmio_clear_enable_reg,
346 },
347 {
348 .base = GIC_DIST_PENDING_SET,
349 .len = VGIC_MAX_IRQS / 8,
350 .bits_per_irq = 1,
351 .handle_mmio = handle_mmio_set_pending_reg,
352 },
353 {
354 .base = GIC_DIST_PENDING_CLEAR,
355 .len = VGIC_MAX_IRQS / 8,
356 .bits_per_irq = 1,
357 .handle_mmio = handle_mmio_clear_pending_reg,
358 },
359 {
360 .base = GIC_DIST_ACTIVE_SET,
361 .len = VGIC_MAX_IRQS / 8,
362 .bits_per_irq = 1,
363 .handle_mmio = handle_mmio_set_active_reg,
364 },
365 {
366 .base = GIC_DIST_ACTIVE_CLEAR,
367 .len = VGIC_MAX_IRQS / 8,
368 .bits_per_irq = 1,
369 .handle_mmio = handle_mmio_clear_active_reg,
370 },
371 {
372 .base = GIC_DIST_PRI,
373 .len = VGIC_MAX_IRQS,
374 .bits_per_irq = 8,
375 .handle_mmio = handle_mmio_priority_reg,
376 },
377 {
378 .base = GIC_DIST_TARGET,
379 .len = VGIC_MAX_IRQS,
380 .bits_per_irq = 8,
381 .handle_mmio = handle_mmio_target_reg,
382 },
383 {
384 .base = GIC_DIST_CONFIG,
385 .len = VGIC_MAX_IRQS / 4,
386 .bits_per_irq = 2,
387 .handle_mmio = handle_mmio_cfg_reg,
388 },
389 {
390 .base = GIC_DIST_SOFTINT,
391 .len = 4,
392 .handle_mmio = handle_mmio_sgi_reg,
393 },
394 {
395 .base = GIC_DIST_SGI_PENDING_CLEAR,
396 .len = VGIC_NR_SGIS,
397 .handle_mmio = handle_mmio_sgi_clear,
398 },
399 {
400 .base = GIC_DIST_SGI_PENDING_SET,
401 .len = VGIC_NR_SGIS,
402 .handle_mmio = handle_mmio_sgi_set,
403 },
404 {}
405};
406
407static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
408{
409 struct kvm *kvm = vcpu->kvm;
410 struct vgic_dist *dist = &kvm->arch.vgic;
411 int nrcpus = atomic_read(&kvm->online_vcpus);
412 u8 target_cpus;
413 int sgi, mode, c, vcpu_id;
414
415 vcpu_id = vcpu->vcpu_id;
416
417 sgi = reg & 0xf;
418 target_cpus = (reg >> 16) & 0xff;
419 mode = (reg >> 24) & 3;
420
421 switch (mode) {
422 case 0:
423 if (!target_cpus)
424 return;
425 break;
426
427 case 1:
428 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
429 break;
430
431 case 2:
432 target_cpus = 1 << vcpu_id;
433 break;
434 }
435
436 kvm_for_each_vcpu(c, vcpu, kvm) {
437 if (target_cpus & 1) {
438 /* Flag the SGI as pending */
439 vgic_dist_irq_set_pending(vcpu, sgi);
440 *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
441 kvm_debug("SGI%d from CPU%d to CPU%d\n",
442 sgi, vcpu_id, c);
443 }
444
445 target_cpus >>= 1;
446 }
447}
448
449static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
450{
451 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
452 unsigned long sources;
453 int vcpu_id = vcpu->vcpu_id;
454 int c;
455
456 sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
457
458 for_each_set_bit(c, &sources, dist->nr_cpus) {
459 if (vgic_queue_irq(vcpu, c, irq))
460 clear_bit(c, &sources);
461 }
462
463 *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
464
465 /*
466 * If the sources bitmap has been cleared it means that we
467 * could queue all the SGIs onto link registers (see the
468 * clear_bit above), and therefore we are done with them in
469 * our emulated gic and can get rid of them.
470 */
471 if (!sources) {
472 vgic_dist_irq_clear_pending(vcpu, irq);
473 vgic_cpu_irq_clear(vcpu, irq);
474 return true;
475 }
476
477 return false;
478}
479
480/**
481 * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
482 * @kvm: pointer to the kvm struct
483 *
484 * Map the virtual CPU interface into the VM before running any VCPUs. We
485 * can't do this at creation time, because user space must first set the
486 * virtual CPU interface address in the guest physical address space.
487 */
488static int vgic_v2_map_resources(struct kvm *kvm,
489 const struct vgic_params *params)
490{
491 struct vgic_dist *dist = &kvm->arch.vgic;
492 int ret = 0;
493
494 if (!irqchip_in_kernel(kvm))
495 return 0;
496
497 mutex_lock(&kvm->lock);
498
499 if (vgic_ready(kvm))
500 goto out;
501
502 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
503 IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
504 kvm_err("Need to set vgic cpu and dist addresses first\n");
505 ret = -ENXIO;
506 goto out;
507 }
508
509 vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
510 KVM_VGIC_V2_DIST_SIZE,
511 vgic_dist_ranges, -1, &dist->dist_iodev);
512
513 /*
514 * Initialize the vgic if this hasn't already been done on demand by
515 * accessing the vgic state from userspace.
516 */
517 ret = vgic_init(kvm);
518 if (ret) {
519 kvm_err("Unable to allocate maps\n");
520 goto out_unregister;
521 }
522
523 ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
524 params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
525 true);
526 if (ret) {
527 kvm_err("Unable to remap VGIC CPU to VCPU\n");
528 goto out_unregister;
529 }
530
531 dist->ready = true;
532 goto out;
533
534out_unregister:
535 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
536
537out:
538 if (ret)
539 kvm_vgic_destroy(kvm);
540 mutex_unlock(&kvm->lock);
541 return ret;
542}
543
544static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
545{
546 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
547
548 *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source;
549}
550
551static int vgic_v2_init_model(struct kvm *kvm)
552{
553 int i;
554
555 for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
556 vgic_set_target_reg(kvm, 0, i);
557
558 return 0;
559}
560
561void vgic_v2_init_emulation(struct kvm *kvm)
562{
563 struct vgic_dist *dist = &kvm->arch.vgic;
564
565 dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
566 dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
567 dist->vm_ops.init_model = vgic_v2_init_model;
568 dist->vm_ops.map_resources = vgic_v2_map_resources;
569
570 kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
571}
572
573static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
574 struct kvm_exit_mmio *mmio, phys_addr_t offset)
575{
576 bool updated = false;
577 struct vgic_vmcr vmcr;
578 u32 *vmcr_field;
579 u32 reg;
580
581 vgic_get_vmcr(vcpu, &vmcr);
582
583 switch (offset & ~0x3) {
584 case GIC_CPU_CTRL:
585 vmcr_field = &vmcr.ctlr;
586 break;
587 case GIC_CPU_PRIMASK:
588 vmcr_field = &vmcr.pmr;
589 break;
590 case GIC_CPU_BINPOINT:
591 vmcr_field = &vmcr.bpr;
592 break;
593 case GIC_CPU_ALIAS_BINPOINT:
594 vmcr_field = &vmcr.abpr;
595 break;
596 default:
597 BUG();
598 }
599
600 if (!mmio->is_write) {
601 reg = *vmcr_field;
602 mmio_data_write(mmio, ~0, reg);
603 } else {
604 reg = mmio_data_read(mmio, ~0);
605 if (reg != *vmcr_field) {
606 *vmcr_field = reg;
607 vgic_set_vmcr(vcpu, &vmcr);
608 updated = true;
609 }
610 }
611 return updated;
612}
613
614static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
615 struct kvm_exit_mmio *mmio, phys_addr_t offset)
616{
617 return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
618}
619
620static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
621 struct kvm_exit_mmio *mmio,
622 phys_addr_t offset)
623{
624 u32 reg;
625
626 if (mmio->is_write)
627 return false;
628
629 /* GICC_IIDR */
630 reg = (PRODUCT_ID_KVM << 20) |
631 (GICC_ARCH_VERSION_V2 << 16) |
632 (IMPLEMENTER_ARM << 0);
633 mmio_data_write(mmio, ~0, reg);
634 return false;
635}
636
637/*
638 * CPU Interface Register accesses - these are not accessed by the VM, but by
639 * user space for saving and restoring VGIC state.
640 */
641static const struct vgic_io_range vgic_cpu_ranges[] = {
642 {
643 .base = GIC_CPU_CTRL,
644 .len = 12,
645 .handle_mmio = handle_cpu_mmio_misc,
646 },
647 {
648 .base = GIC_CPU_ALIAS_BINPOINT,
649 .len = 4,
650 .handle_mmio = handle_mmio_abpr,
651 },
652 {
653 .base = GIC_CPU_ACTIVEPRIO,
654 .len = 16,
655 .handle_mmio = handle_mmio_raz_wi,
656 },
657 {
658 .base = GIC_CPU_IDENT,
659 .len = 4,
660 .handle_mmio = handle_cpu_mmio_ident,
661 },
662};
663
664static int vgic_attr_regs_access(struct kvm_device *dev,
665 struct kvm_device_attr *attr,
666 u32 *reg, bool is_write)
667{
668 const struct vgic_io_range *r = NULL, *ranges;
669 phys_addr_t offset;
670 int ret, cpuid, c;
671 struct kvm_vcpu *vcpu, *tmp_vcpu;
672 struct vgic_dist *vgic;
673 struct kvm_exit_mmio mmio;
674 u32 data;
675
676 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
677 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
678 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
679
680 mutex_lock(&dev->kvm->lock);
681
682 ret = vgic_init(dev->kvm);
683 if (ret)
684 goto out;
685
686 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
687 ret = -EINVAL;
688 goto out;
689 }
690
691 vcpu = kvm_get_vcpu(dev->kvm, cpuid);
692 vgic = &dev->kvm->arch.vgic;
693
694 mmio.len = 4;
695 mmio.is_write = is_write;
696 mmio.data = &data;
697 if (is_write)
698 mmio_data_write(&mmio, ~0, *reg);
699 switch (attr->group) {
700 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
701 mmio.phys_addr = vgic->vgic_dist_base + offset;
702 ranges = vgic_dist_ranges;
703 break;
704 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
705 mmio.phys_addr = vgic->vgic_cpu_base + offset;
706 ranges = vgic_cpu_ranges;
707 break;
708 default:
709 BUG();
710 }
711 r = vgic_find_range(ranges, 4, offset);
712
713 if (unlikely(!r || !r->handle_mmio)) {
714 ret = -ENXIO;
715 goto out;
716 }
717
718
719 spin_lock(&vgic->lock);
720
721 /*
722 * Ensure that no other VCPU is running by checking the vcpu->cpu
723 * field. If no other VPCUs are running we can safely access the VGIC
724 * state, because even if another VPU is run after this point, that
725 * VCPU will not touch the vgic state, because it will block on
726 * getting the vgic->lock in kvm_vgic_sync_hwstate().
727 */
728 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
729 if (unlikely(tmp_vcpu->cpu != -1)) {
730 ret = -EBUSY;
731 goto out_vgic_unlock;
732 }
733 }
734
735 /*
736 * Move all pending IRQs from the LRs on all VCPUs so the pending
737 * state can be properly represented in the register state accessible
738 * through this API.
739 */
740 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
741 vgic_unqueue_irqs(tmp_vcpu);
742
743 offset -= r->base;
744 r->handle_mmio(vcpu, &mmio, offset);
745
746 if (!is_write)
747 *reg = mmio_data_read(&mmio, ~0);
748
749 ret = 0;
750out_vgic_unlock:
751 spin_unlock(&vgic->lock);
752out:
753 mutex_unlock(&dev->kvm->lock);
754 return ret;
755}
756
757static int vgic_v2_create(struct kvm_device *dev, u32 type)
758{
759 return kvm_vgic_create(dev->kvm, type);
760}
761
762static void vgic_v2_destroy(struct kvm_device *dev)
763{
764 kfree(dev);
765}
766
767static int vgic_v2_set_attr(struct kvm_device *dev,
768 struct kvm_device_attr *attr)
769{
770 int ret;
771
772 ret = vgic_set_common_attr(dev, attr);
773 if (ret != -ENXIO)
774 return ret;
775
776 switch (attr->group) {
777 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
778 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
779 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
780 u32 reg;
781
782 if (get_user(reg, uaddr))
783 return -EFAULT;
784
785 return vgic_attr_regs_access(dev, attr, &reg, true);
786 }
787
788 }
789
790 return -ENXIO;
791}
792
793static int vgic_v2_get_attr(struct kvm_device *dev,
794 struct kvm_device_attr *attr)
795{
796 int ret;
797
798 ret = vgic_get_common_attr(dev, attr);
799 if (ret != -ENXIO)
800 return ret;
801
802 switch (attr->group) {
803 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
804 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
805 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
806 u32 reg = 0;
807
808 ret = vgic_attr_regs_access(dev, attr, &reg, false);
809 if (ret)
810 return ret;
811 return put_user(reg, uaddr);
812 }
813
814 }
815
816 return -ENXIO;
817}
818
819static int vgic_v2_has_attr(struct kvm_device *dev,
820 struct kvm_device_attr *attr)
821{
822 phys_addr_t offset;
823
824 switch (attr->group) {
825 case KVM_DEV_ARM_VGIC_GRP_ADDR:
826 switch (attr->attr) {
827 case KVM_VGIC_V2_ADDR_TYPE_DIST:
828 case KVM_VGIC_V2_ADDR_TYPE_CPU:
829 return 0;
830 }
831 break;
832 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
833 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
834 return vgic_has_attr_regs(vgic_dist_ranges, offset);
835 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
836 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
837 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
838 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
839 return 0;
840 case KVM_DEV_ARM_VGIC_GRP_CTRL:
841 switch (attr->attr) {
842 case KVM_DEV_ARM_VGIC_CTRL_INIT:
843 return 0;
844 }
845 }
846 return -ENXIO;
847}
848
849struct kvm_device_ops kvm_arm_vgic_v2_ops = {
850 .name = "kvm-arm-vgic-v2",
851 .create = vgic_v2_create,
852 .destroy = vgic_v2_destroy,
853 .set_attr = vgic_v2_set_attr,
854 .get_attr = vgic_v2_get_attr,
855 .has_attr = vgic_v2_has_attr,
856};
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..f9b9c7c51372
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,259 @@
1/*
2 * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/cpu.h>
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <linux/interrupt.h>
22#include <linux/io.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_irq.h>
26
27#include <linux/irqchip/arm-gic.h>
28
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_arm.h>
31#include <asm/kvm_mmu.h>
32
33static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
34{
35 struct vgic_lr lr_desc;
36 u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
37
38 lr_desc.irq = val & GICH_LR_VIRTUALID;
39 if (lr_desc.irq <= 15)
40 lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
41 else
42 lr_desc.source = 0;
43 lr_desc.state = 0;
44
45 if (val & GICH_LR_PENDING_BIT)
46 lr_desc.state |= LR_STATE_PENDING;
47 if (val & GICH_LR_ACTIVE_BIT)
48 lr_desc.state |= LR_STATE_ACTIVE;
49 if (val & GICH_LR_EOI)
50 lr_desc.state |= LR_EOI_INT;
51
52 return lr_desc;
53}
54
55static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
56 struct vgic_lr lr_desc)
57{
58 u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
59
60 if (lr_desc.state & LR_STATE_PENDING)
61 lr_val |= GICH_LR_PENDING_BIT;
62 if (lr_desc.state & LR_STATE_ACTIVE)
63 lr_val |= GICH_LR_ACTIVE_BIT;
64 if (lr_desc.state & LR_EOI_INT)
65 lr_val |= GICH_LR_EOI;
66
67 vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
68}
69
70static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
71 struct vgic_lr lr_desc)
72{
73 if (!(lr_desc.state & LR_STATE_MASK))
74 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
75 else
76 vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
77}
78
79static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
80{
81 return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
82}
83
84static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
85{
86 return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
87}
88
89static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
90{
91 vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
92}
93
94static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
95{
96 u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
97 u32 ret = 0;
98
99 if (misr & GICH_MISR_EOI)
100 ret |= INT_STATUS_EOI;
101 if (misr & GICH_MISR_U)
102 ret |= INT_STATUS_UNDERFLOW;
103
104 return ret;
105}
106
107static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
108{
109 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
110}
111
112static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
113{
114 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
115}
116
117static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
118{
119 u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
120
121 vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
122 vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
123 vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
124 vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
125}
126
127static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
128{
129 u32 vmcr;
130
131 vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
132 vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
133 vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
134 vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
135
136 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
137}
138
139static void vgic_v2_enable(struct kvm_vcpu *vcpu)
140{
141 /*
142 * By forcing VMCR to zero, the GIC will restore the binary
143 * points to their reset values. Anything else resets to zero
144 * anyway.
145 */
146 vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
147
148 /* Get the show on the road... */
149 vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
150}
151
152static const struct vgic_ops vgic_v2_ops = {
153 .get_lr = vgic_v2_get_lr,
154 .set_lr = vgic_v2_set_lr,
155 .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
156 .get_elrsr = vgic_v2_get_elrsr,
157 .get_eisr = vgic_v2_get_eisr,
158 .clear_eisr = vgic_v2_clear_eisr,
159 .get_interrupt_status = vgic_v2_get_interrupt_status,
160 .enable_underflow = vgic_v2_enable_underflow,
161 .disable_underflow = vgic_v2_disable_underflow,
162 .get_vmcr = vgic_v2_get_vmcr,
163 .set_vmcr = vgic_v2_set_vmcr,
164 .enable = vgic_v2_enable,
165};
166
167static struct vgic_params vgic_v2_params;
168
169/**
170 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
171 * @node: pointer to the DT node
172 * @ops: address of a pointer to the GICv2 operations
173 * @params: address of a pointer to HW-specific parameters
174 *
175 * Returns 0 if a GICv2 has been found, with the low level operations
176 * in *ops and the HW parameters in *params. Returns an error code
177 * otherwise.
178 */
179int vgic_v2_probe(struct device_node *vgic_node,
180 const struct vgic_ops **ops,
181 const struct vgic_params **params)
182{
183 int ret;
184 struct resource vctrl_res;
185 struct resource vcpu_res;
186 struct vgic_params *vgic = &vgic_v2_params;
187
188 vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
189 if (!vgic->maint_irq) {
190 kvm_err("error getting vgic maintenance irq from DT\n");
191 ret = -ENXIO;
192 goto out;
193 }
194
195 ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
196 if (ret) {
197 kvm_err("Cannot obtain GICH resource\n");
198 goto out;
199 }
200
201 vgic->vctrl_base = of_iomap(vgic_node, 2);
202 if (!vgic->vctrl_base) {
203 kvm_err("Cannot ioremap GICH\n");
204 ret = -ENOMEM;
205 goto out;
206 }
207
208 vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
209 vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
210
211 ret = create_hyp_io_mappings(vgic->vctrl_base,
212 vgic->vctrl_base + resource_size(&vctrl_res),
213 vctrl_res.start);
214 if (ret) {
215 kvm_err("Cannot map VCTRL into hyp\n");
216 goto out_unmap;
217 }
218
219 if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
220 kvm_err("Cannot obtain GICV resource\n");
221 ret = -ENXIO;
222 goto out_unmap;
223 }
224
225 if (!PAGE_ALIGNED(vcpu_res.start)) {
226 kvm_err("GICV physical address 0x%llx not page aligned\n",
227 (unsigned long long)vcpu_res.start);
228 ret = -ENXIO;
229 goto out_unmap;
230 }
231
232 if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
233 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
234 (unsigned long long)resource_size(&vcpu_res),
235 PAGE_SIZE);
236 ret = -ENXIO;
237 goto out_unmap;
238 }
239
240 vgic->can_emulate_gicv2 = true;
241 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
242
243 vgic->vcpu_base = vcpu_res.start;
244
245 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
246 vctrl_res.start, vgic->maint_irq);
247
248 vgic->type = VGIC_V2;
249 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
250 *ops = &vgic_v2_ops;
251 *params = vgic;
252 goto out;
253
254out_unmap:
255 iounmap(vgic->vctrl_base);
256out:
257 of_node_put(vgic_node);
258 return ret;
259}
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
new file mode 100644
index 000000000000..e9c3a7a83833
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -0,0 +1,1030 @@
1/*
2 * GICv3 distributor and redistributor emulation
3 *
4 * GICv3 emulation is currently only supported on a GICv3 host (because
5 * we rely on the hardware's CPU interface virtualization support), but
6 * supports both hardware with or without the optional GICv2 backwards
7 * compatibility features.
8 *
9 * Limitations of the emulation:
10 * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore)
11 * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI.
12 * - We do not support the message based interrupts (MBIs) triggered by
13 * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0.
14 * - We do not support the (optional) backwards compatibility feature.
15 * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports
16 * the compatiblity feature, you can use a GICv2 in the guest, though.
17 * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI.
18 * - Priorities are not emulated (same as the GICv2 emulation). Linux
19 * as a guest is fine with this, because it does not use priorities.
20 * - We only support Group1 interrupts. Again Linux uses only those.
21 *
22 * Copyright (C) 2014 ARM Ltd.
23 * Author: Andre Przywara <andre.przywara@arm.com>
24 *
25 * This program is free software; you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License version 2 as
27 * published by the Free Software Foundation.
28 *
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 * GNU General Public License for more details.
33 *
34 * You should have received a copy of the GNU General Public License
35 * along with this program. If not, see <http://www.gnu.org/licenses/>.
36 */
37
38#include <linux/cpu.h>
39#include <linux/kvm.h>
40#include <linux/kvm_host.h>
41#include <linux/interrupt.h>
42
43#include <linux/irqchip/arm-gic-v3.h>
44#include <kvm/arm_vgic.h>
45
46#include <asm/kvm_emulate.h>
47#include <asm/kvm_arm.h>
48#include <asm/kvm_mmu.h>
49
50#include "vgic.h"
51
52static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu,
53 struct kvm_exit_mmio *mmio, phys_addr_t offset)
54{
55 u32 reg = 0xffffffff;
56
57 vgic_reg_access(mmio, &reg, offset,
58 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
59
60 return false;
61}
62
63static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu,
64 struct kvm_exit_mmio *mmio, phys_addr_t offset)
65{
66 u32 reg = 0;
67
68 /*
69 * Force ARE and DS to 1, the guest cannot change this.
70 * For the time being we only support Group1 interrupts.
71 */
72 if (vcpu->kvm->arch.vgic.enabled)
73 reg = GICD_CTLR_ENABLE_SS_G1;
74 reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
75
76 vgic_reg_access(mmio, &reg, offset,
77 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
78 if (mmio->is_write) {
79 if (reg & GICD_CTLR_ENABLE_SS_G0)
80 kvm_info("guest tried to enable unsupported Group0 interrupts\n");
81 vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1);
82 vgic_update_state(vcpu->kvm);
83 return true;
84 }
85 return false;
86}
87
88/*
89 * As this implementation does not provide compatibility
90 * with GICv2 (ARE==1), we report zero CPUs in bits [5..7].
91 * Also LPIs and MBIs are not supported, so we set the respective bits to 0.
92 * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs).
93 */
94#define INTERRUPT_ID_BITS 10
95static bool handle_mmio_typer(struct kvm_vcpu *vcpu,
96 struct kvm_exit_mmio *mmio, phys_addr_t offset)
97{
98 u32 reg;
99
100 reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1;
101
102 reg |= (INTERRUPT_ID_BITS - 1) << 19;
103
104 vgic_reg_access(mmio, &reg, offset,
105 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
106
107 return false;
108}
109
110static bool handle_mmio_iidr(struct kvm_vcpu *vcpu,
111 struct kvm_exit_mmio *mmio, phys_addr_t offset)
112{
113 u32 reg;
114
115 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
116 vgic_reg_access(mmio, &reg, offset,
117 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
118
119 return false;
120}
121
122static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu,
123 struct kvm_exit_mmio *mmio,
124 phys_addr_t offset)
125{
126 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
127 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
128 vcpu->vcpu_id,
129 ACCESS_WRITE_SETBIT);
130
131 vgic_reg_access(mmio, NULL, offset,
132 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
133 return false;
134}
135
136static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu,
137 struct kvm_exit_mmio *mmio,
138 phys_addr_t offset)
139{
140 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
141 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
142 vcpu->vcpu_id,
143 ACCESS_WRITE_CLEARBIT);
144
145 vgic_reg_access(mmio, NULL, offset,
146 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
147 return false;
148}
149
150static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu,
151 struct kvm_exit_mmio *mmio,
152 phys_addr_t offset)
153{
154 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
155 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
156 vcpu->vcpu_id);
157
158 vgic_reg_access(mmio, NULL, offset,
159 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
160 return false;
161}
162
163static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu,
164 struct kvm_exit_mmio *mmio,
165 phys_addr_t offset)
166{
167 if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
168 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
169 vcpu->vcpu_id);
170
171 vgic_reg_access(mmio, NULL, offset,
172 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
173 return false;
174}
175
176static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu,
177 struct kvm_exit_mmio *mmio,
178 phys_addr_t offset)
179{
180 u32 *reg;
181
182 if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) {
183 vgic_reg_access(mmio, NULL, offset,
184 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
185 return false;
186 }
187
188 reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
189 vcpu->vcpu_id, offset);
190 vgic_reg_access(mmio, reg, offset,
191 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
192 return false;
193}
194
195static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu,
196 struct kvm_exit_mmio *mmio,
197 phys_addr_t offset)
198{
199 u32 *reg;
200
201 if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) {
202 vgic_reg_access(mmio, NULL, offset,
203 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
204 return false;
205 }
206
207 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
208 vcpu->vcpu_id, offset >> 1);
209
210 return vgic_handle_cfg_reg(reg, mmio, offset);
211}
212
213/*
214 * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word)
215 * when we store the target MPIDR written by the guest.
216 */
217static u32 compress_mpidr(unsigned long mpidr)
218{
219 u32 ret;
220
221 ret = MPIDR_AFFINITY_LEVEL(mpidr, 0);
222 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8;
223 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16;
224 ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24;
225
226 return ret;
227}
228
229static unsigned long uncompress_mpidr(u32 value)
230{
231 unsigned long mpidr;
232
233 mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0);
234 mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1);
235 mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2);
236 mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3);
237
238 return mpidr;
239}
240
241/*
242 * Lookup the given MPIDR value to get the vcpu_id (if there is one)
243 * and store that in the irq_spi_cpu[] array.
244 * This limits the number of VCPUs to 255 for now, extending the data
245 * type (or storing kvm_vcpu pointers) should lift the limit.
246 * Store the original MPIDR value in an extra array to support read-as-written.
247 * Unallocated MPIDRs are translated to a special value and caught
248 * before any array accesses.
249 */
250static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu,
251 struct kvm_exit_mmio *mmio,
252 phys_addr_t offset)
253{
254 struct kvm *kvm = vcpu->kvm;
255 struct vgic_dist *dist = &kvm->arch.vgic;
256 int spi;
257 u32 reg;
258 int vcpu_id;
259 unsigned long *bmap, mpidr;
260
261 /*
262 * The upper 32 bits of each 64 bit register are zero,
263 * as we don't support Aff3.
264 */
265 if ((offset & 4)) {
266 vgic_reg_access(mmio, NULL, offset,
267 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
268 return false;
269 }
270
271 /* This region only covers SPIs, so no handling of private IRQs here. */
272 spi = offset / 8;
273
274 /* get the stored MPIDR for this IRQ */
275 mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]);
276 reg = mpidr;
277
278 vgic_reg_access(mmio, &reg, offset,
279 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
280
281 if (!mmio->is_write)
282 return false;
283
284 /*
285 * Now clear the currently assigned vCPU from the map, making room
286 * for the new one to be written below
287 */
288 vcpu = kvm_mpidr_to_vcpu(kvm, mpidr);
289 if (likely(vcpu)) {
290 vcpu_id = vcpu->vcpu_id;
291 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
292 __clear_bit(spi, bmap);
293 }
294
295 dist->irq_spi_mpidr[spi] = compress_mpidr(reg);
296 vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK);
297
298 /*
299 * The spec says that non-existent MPIDR values should not be
300 * forwarded to any existent (v)CPU, but should be able to become
301 * pending anyway. We simply keep the irq_spi_target[] array empty, so
302 * the interrupt will never be injected.
303 * irq_spi_cpu[irq] gets a magic value in this case.
304 */
305 if (likely(vcpu)) {
306 vcpu_id = vcpu->vcpu_id;
307 dist->irq_spi_cpu[spi] = vcpu_id;
308 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
309 __set_bit(spi, bmap);
310 } else {
311 dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED;
312 }
313
314 vgic_update_state(kvm);
315
316 return true;
317}
318
319/*
320 * We should be careful about promising too much when a guest reads
321 * this register. Don't claim to be like any hardware implementation,
322 * but just report the GIC as version 3 - which is what a Linux guest
323 * would check.
324 */
325static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
326 struct kvm_exit_mmio *mmio,
327 phys_addr_t offset)
328{
329 u32 reg = 0;
330
331 switch (offset + GICD_IDREGS) {
332 case GICD_PIDR2:
333 reg = 0x3b;
334 break;
335 }
336
337 vgic_reg_access(mmio, &reg, offset,
338 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
339
340 return false;
341}
342
343static const struct vgic_io_range vgic_v3_dist_ranges[] = {
344 {
345 .base = GICD_CTLR,
346 .len = 0x04,
347 .bits_per_irq = 0,
348 .handle_mmio = handle_mmio_ctlr,
349 },
350 {
351 .base = GICD_TYPER,
352 .len = 0x04,
353 .bits_per_irq = 0,
354 .handle_mmio = handle_mmio_typer,
355 },
356 {
357 .base = GICD_IIDR,
358 .len = 0x04,
359 .bits_per_irq = 0,
360 .handle_mmio = handle_mmio_iidr,
361 },
362 {
363 /* this register is optional, it is RAZ/WI if not implemented */
364 .base = GICD_STATUSR,
365 .len = 0x04,
366 .bits_per_irq = 0,
367 .handle_mmio = handle_mmio_raz_wi,
368 },
369 {
370 /* this write only register is WI when TYPER.MBIS=0 */
371 .base = GICD_SETSPI_NSR,
372 .len = 0x04,
373 .bits_per_irq = 0,
374 .handle_mmio = handle_mmio_raz_wi,
375 },
376 {
377 /* this write only register is WI when TYPER.MBIS=0 */
378 .base = GICD_CLRSPI_NSR,
379 .len = 0x04,
380 .bits_per_irq = 0,
381 .handle_mmio = handle_mmio_raz_wi,
382 },
383 {
384 /* this is RAZ/WI when DS=1 */
385 .base = GICD_SETSPI_SR,
386 .len = 0x04,
387 .bits_per_irq = 0,
388 .handle_mmio = handle_mmio_raz_wi,
389 },
390 {
391 /* this is RAZ/WI when DS=1 */
392 .base = GICD_CLRSPI_SR,
393 .len = 0x04,
394 .bits_per_irq = 0,
395 .handle_mmio = handle_mmio_raz_wi,
396 },
397 {
398 .base = GICD_IGROUPR,
399 .len = 0x80,
400 .bits_per_irq = 1,
401 .handle_mmio = handle_mmio_rao_wi,
402 },
403 {
404 .base = GICD_ISENABLER,
405 .len = 0x80,
406 .bits_per_irq = 1,
407 .handle_mmio = handle_mmio_set_enable_reg_dist,
408 },
409 {
410 .base = GICD_ICENABLER,
411 .len = 0x80,
412 .bits_per_irq = 1,
413 .handle_mmio = handle_mmio_clear_enable_reg_dist,
414 },
415 {
416 .base = GICD_ISPENDR,
417 .len = 0x80,
418 .bits_per_irq = 1,
419 .handle_mmio = handle_mmio_set_pending_reg_dist,
420 },
421 {
422 .base = GICD_ICPENDR,
423 .len = 0x80,
424 .bits_per_irq = 1,
425 .handle_mmio = handle_mmio_clear_pending_reg_dist,
426 },
427 {
428 .base = GICD_ISACTIVER,
429 .len = 0x80,
430 .bits_per_irq = 1,
431 .handle_mmio = handle_mmio_raz_wi,
432 },
433 {
434 .base = GICD_ICACTIVER,
435 .len = 0x80,
436 .bits_per_irq = 1,
437 .handle_mmio = handle_mmio_raz_wi,
438 },
439 {
440 .base = GICD_IPRIORITYR,
441 .len = 0x400,
442 .bits_per_irq = 8,
443 .handle_mmio = handle_mmio_priority_reg_dist,
444 },
445 {
446 /* TARGETSRn is RES0 when ARE=1 */
447 .base = GICD_ITARGETSR,
448 .len = 0x400,
449 .bits_per_irq = 8,
450 .handle_mmio = handle_mmio_raz_wi,
451 },
452 {
453 .base = GICD_ICFGR,
454 .len = 0x100,
455 .bits_per_irq = 2,
456 .handle_mmio = handle_mmio_cfg_reg_dist,
457 },
458 {
459 /* this is RAZ/WI when DS=1 */
460 .base = GICD_IGRPMODR,
461 .len = 0x80,
462 .bits_per_irq = 1,
463 .handle_mmio = handle_mmio_raz_wi,
464 },
465 {
466 /* this is RAZ/WI when DS=1 */
467 .base = GICD_NSACR,
468 .len = 0x100,
469 .bits_per_irq = 2,
470 .handle_mmio = handle_mmio_raz_wi,
471 },
472 {
473 /* this is RAZ/WI when ARE=1 */
474 .base = GICD_SGIR,
475 .len = 0x04,
476 .handle_mmio = handle_mmio_raz_wi,
477 },
478 {
479 /* this is RAZ/WI when ARE=1 */
480 .base = GICD_CPENDSGIR,
481 .len = 0x10,
482 .handle_mmio = handle_mmio_raz_wi,
483 },
484 {
485 /* this is RAZ/WI when ARE=1 */
486 .base = GICD_SPENDSGIR,
487 .len = 0x10,
488 .handle_mmio = handle_mmio_raz_wi,
489 },
490 {
491 .base = GICD_IROUTER + 0x100,
492 .len = 0x1ee0,
493 .bits_per_irq = 64,
494 .handle_mmio = handle_mmio_route_reg,
495 },
496 {
497 .base = GICD_IDREGS,
498 .len = 0x30,
499 .bits_per_irq = 0,
500 .handle_mmio = handle_mmio_idregs,
501 },
502 {},
503};
504
505static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
506 struct kvm_exit_mmio *mmio,
507 phys_addr_t offset)
508{
509 /* since we don't support LPIs, this register is zero for now */
510 vgic_reg_access(mmio, NULL, offset,
511 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
512 return false;
513}
514
515static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
516 struct kvm_exit_mmio *mmio,
517 phys_addr_t offset)
518{
519 u32 reg;
520 u64 mpidr;
521 struct kvm_vcpu *redist_vcpu = mmio->private;
522 int target_vcpu_id = redist_vcpu->vcpu_id;
523
524 /* the upper 32 bits contain the affinity value */
525 if ((offset & ~3) == 4) {
526 mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
527 reg = compress_mpidr(mpidr);
528
529 vgic_reg_access(mmio, &reg, offset,
530 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
531 return false;
532 }
533
534 reg = redist_vcpu->vcpu_id << 8;
535 if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
536 reg |= GICR_TYPER_LAST;
537 vgic_reg_access(mmio, &reg, offset,
538 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
539 return false;
540}
541
542static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
543 struct kvm_exit_mmio *mmio,
544 phys_addr_t offset)
545{
546 struct kvm_vcpu *redist_vcpu = mmio->private;
547
548 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
549 redist_vcpu->vcpu_id,
550 ACCESS_WRITE_SETBIT);
551}
552
553static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu,
554 struct kvm_exit_mmio *mmio,
555 phys_addr_t offset)
556{
557 struct kvm_vcpu *redist_vcpu = mmio->private;
558
559 return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
560 redist_vcpu->vcpu_id,
561 ACCESS_WRITE_CLEARBIT);
562}
563
564static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu,
565 struct kvm_exit_mmio *mmio,
566 phys_addr_t offset)
567{
568 struct kvm_vcpu *redist_vcpu = mmio->private;
569
570 return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
571 redist_vcpu->vcpu_id);
572}
573
574static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu,
575 struct kvm_exit_mmio *mmio,
576 phys_addr_t offset)
577{
578 struct kvm_vcpu *redist_vcpu = mmio->private;
579
580 return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
581 redist_vcpu->vcpu_id);
582}
583
584static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu,
585 struct kvm_exit_mmio *mmio,
586 phys_addr_t offset)
587{
588 struct kvm_vcpu *redist_vcpu = mmio->private;
589 u32 *reg;
590
591 reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
592 redist_vcpu->vcpu_id, offset);
593 vgic_reg_access(mmio, reg, offset,
594 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
595 return false;
596}
597
598static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
599 struct kvm_exit_mmio *mmio,
600 phys_addr_t offset)
601{
602 struct kvm_vcpu *redist_vcpu = mmio->private;
603
604 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
605 redist_vcpu->vcpu_id, offset >> 1);
606
607 return vgic_handle_cfg_reg(reg, mmio, offset);
608}
609
610#define SGI_base(x) ((x) + SZ_64K)
611
612static const struct vgic_io_range vgic_redist_ranges[] = {
613 {
614 .base = GICR_CTLR,
615 .len = 0x04,
616 .bits_per_irq = 0,
617 .handle_mmio = handle_mmio_ctlr_redist,
618 },
619 {
620 .base = GICR_TYPER,
621 .len = 0x08,
622 .bits_per_irq = 0,
623 .handle_mmio = handle_mmio_typer_redist,
624 },
625 {
626 .base = GICR_IIDR,
627 .len = 0x04,
628 .bits_per_irq = 0,
629 .handle_mmio = handle_mmio_iidr,
630 },
631 {
632 .base = GICR_WAKER,
633 .len = 0x04,
634 .bits_per_irq = 0,
635 .handle_mmio = handle_mmio_raz_wi,
636 },
637 {
638 .base = GICR_IDREGS,
639 .len = 0x30,
640 .bits_per_irq = 0,
641 .handle_mmio = handle_mmio_idregs,
642 },
643 {
644 .base = SGI_base(GICR_IGROUPR0),
645 .len = 0x04,
646 .bits_per_irq = 1,
647 .handle_mmio = handle_mmio_rao_wi,
648 },
649 {
650 .base = SGI_base(GICR_ISENABLER0),
651 .len = 0x04,
652 .bits_per_irq = 1,
653 .handle_mmio = handle_mmio_set_enable_reg_redist,
654 },
655 {
656 .base = SGI_base(GICR_ICENABLER0),
657 .len = 0x04,
658 .bits_per_irq = 1,
659 .handle_mmio = handle_mmio_clear_enable_reg_redist,
660 },
661 {
662 .base = SGI_base(GICR_ISPENDR0),
663 .len = 0x04,
664 .bits_per_irq = 1,
665 .handle_mmio = handle_mmio_set_pending_reg_redist,
666 },
667 {
668 .base = SGI_base(GICR_ICPENDR0),
669 .len = 0x04,
670 .bits_per_irq = 1,
671 .handle_mmio = handle_mmio_clear_pending_reg_redist,
672 },
673 {
674 .base = SGI_base(GICR_ISACTIVER0),
675 .len = 0x04,
676 .bits_per_irq = 1,
677 .handle_mmio = handle_mmio_raz_wi,
678 },
679 {
680 .base = SGI_base(GICR_ICACTIVER0),
681 .len = 0x04,
682 .bits_per_irq = 1,
683 .handle_mmio = handle_mmio_raz_wi,
684 },
685 {
686 .base = SGI_base(GICR_IPRIORITYR0),
687 .len = 0x20,
688 .bits_per_irq = 8,
689 .handle_mmio = handle_mmio_priority_reg_redist,
690 },
691 {
692 .base = SGI_base(GICR_ICFGR0),
693 .len = 0x08,
694 .bits_per_irq = 2,
695 .handle_mmio = handle_mmio_cfg_reg_redist,
696 },
697 {
698 .base = SGI_base(GICR_IGRPMODR0),
699 .len = 0x04,
700 .bits_per_irq = 1,
701 .handle_mmio = handle_mmio_raz_wi,
702 },
703 {
704 .base = SGI_base(GICR_NSACR),
705 .len = 0x04,
706 .handle_mmio = handle_mmio_raz_wi,
707 },
708 {},
709};
710
711static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
712{
713 if (vgic_queue_irq(vcpu, 0, irq)) {
714 vgic_dist_irq_clear_pending(vcpu, irq);
715 vgic_cpu_irq_clear(vcpu, irq);
716 return true;
717 }
718
719 return false;
720}
721
722static int vgic_v3_map_resources(struct kvm *kvm,
723 const struct vgic_params *params)
724{
725 int ret = 0;
726 struct vgic_dist *dist = &kvm->arch.vgic;
727 gpa_t rdbase = dist->vgic_redist_base;
728 struct vgic_io_device *iodevs = NULL;
729 int i;
730
731 if (!irqchip_in_kernel(kvm))
732 return 0;
733
734 mutex_lock(&kvm->lock);
735
736 if (vgic_ready(kvm))
737 goto out;
738
739 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
740 IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
741 kvm_err("Need to set vgic distributor addresses first\n");
742 ret = -ENXIO;
743 goto out;
744 }
745
746 /*
747 * For a VGICv3 we require the userland to explicitly initialize
748 * the VGIC before we need to use it.
749 */
750 if (!vgic_initialized(kvm)) {
751 ret = -EBUSY;
752 goto out;
753 }
754
755 ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
756 GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
757 -1, &dist->dist_iodev);
758 if (ret)
759 goto out;
760
761 iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL);
762 if (!iodevs) {
763 ret = -ENOMEM;
764 goto out_unregister;
765 }
766
767 for (i = 0; i < dist->nr_cpus; i++) {
768 ret = vgic_register_kvm_io_dev(kvm, rdbase,
769 SZ_128K, vgic_redist_ranges,
770 i, &iodevs[i]);
771 if (ret)
772 goto out_unregister;
773 rdbase += GIC_V3_REDIST_SIZE;
774 }
775
776 dist->redist_iodevs = iodevs;
777 dist->ready = true;
778 goto out;
779
780out_unregister:
781 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
782 if (iodevs) {
783 for (i = 0; i < dist->nr_cpus; i++) {
784 if (iodevs[i].dev.ops)
785 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
786 &iodevs[i].dev);
787 }
788 }
789
790out:
791 if (ret)
792 kvm_vgic_destroy(kvm);
793 mutex_unlock(&kvm->lock);
794 return ret;
795}
796
797static int vgic_v3_init_model(struct kvm *kvm)
798{
799 int i;
800 u32 mpidr;
801 struct vgic_dist *dist = &kvm->arch.vgic;
802 int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
803
804 dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]),
805 GFP_KERNEL);
806
807 if (!dist->irq_spi_mpidr)
808 return -ENOMEM;
809
810 /* Initialize the target VCPUs for each IRQ to VCPU 0 */
811 mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0)));
812 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) {
813 dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0;
814 dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr;
815 vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1);
816 }
817
818 return 0;
819}
820
821/* GICv3 does not keep track of SGI sources anymore. */
822static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
823{
824}
825
826void vgic_v3_init_emulation(struct kvm *kvm)
827{
828 struct vgic_dist *dist = &kvm->arch.vgic;
829
830 dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
831 dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
832 dist->vm_ops.init_model = vgic_v3_init_model;
833 dist->vm_ops.map_resources = vgic_v3_map_resources;
834
835 kvm->arch.max_vcpus = KVM_MAX_VCPUS;
836}
837
838/*
839 * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
840 * generation register ICC_SGI1R_EL1) with a given VCPU.
841 * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
842 * return -1.
843 */
844static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
845{
846 unsigned long affinity;
847 int level0;
848
849 /*
850 * Split the current VCPU's MPIDR into affinity level 0 and the
851 * rest as this is what we have to compare against.
852 */
853 affinity = kvm_vcpu_get_mpidr_aff(vcpu);
854 level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
855 affinity &= ~MPIDR_LEVEL_MASK;
856
857 /* bail out if the upper three levels don't match */
858 if (sgi_aff != affinity)
859 return -1;
860
861 /* Is this VCPU's bit set in the mask ? */
862 if (!(sgi_cpu_mask & BIT(level0)))
863 return -1;
864
865 return level0;
866}
867
868#define SGI_AFFINITY_LEVEL(reg, level) \
869 ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
870 >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
871
872/**
873 * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
874 * @vcpu: The VCPU requesting a SGI
875 * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
876 *
877 * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
878 * This will trap in sys_regs.c and call this function.
879 * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
880 * target processors as well as a bitmask of 16 Aff0 CPUs.
881 * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
882 * check for matching ones. If this bit is set, we signal all, but not the
883 * calling VCPU.
884 */
885void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
886{
887 struct kvm *kvm = vcpu->kvm;
888 struct kvm_vcpu *c_vcpu;
889 struct vgic_dist *dist = &kvm->arch.vgic;
890 u16 target_cpus;
891 u64 mpidr;
892 int sgi, c;
893 int vcpu_id = vcpu->vcpu_id;
894 bool broadcast;
895 int updated = 0;
896
897 sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
898 broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
899 target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
900 mpidr = SGI_AFFINITY_LEVEL(reg, 3);
901 mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
902 mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
903
904 /*
905 * We take the dist lock here, because we come from the sysregs
906 * code path and not from the MMIO one (which already takes the lock).
907 */
908 spin_lock(&dist->lock);
909
910 /*
911 * We iterate over all VCPUs to find the MPIDRs matching the request.
912 * If we have handled one CPU, we clear it's bit to detect early
913 * if we are already finished. This avoids iterating through all
914 * VCPUs when most of the times we just signal a single VCPU.
915 */
916 kvm_for_each_vcpu(c, c_vcpu, kvm) {
917
918 /* Exit early if we have dealt with all requested CPUs */
919 if (!broadcast && target_cpus == 0)
920 break;
921
922 /* Don't signal the calling VCPU */
923 if (broadcast && c == vcpu_id)
924 continue;
925
926 if (!broadcast) {
927 int level0;
928
929 level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
930 if (level0 == -1)
931 continue;
932
933 /* remove this matching VCPU from the mask */
934 target_cpus &= ~BIT(level0);
935 }
936
937 /* Flag the SGI as pending */
938 vgic_dist_irq_set_pending(c_vcpu, sgi);
939 updated = 1;
940 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
941 }
942 if (updated)
943 vgic_update_state(vcpu->kvm);
944 spin_unlock(&dist->lock);
945 if (updated)
946 vgic_kick_vcpus(vcpu->kvm);
947}
948
949static int vgic_v3_create(struct kvm_device *dev, u32 type)
950{
951 return kvm_vgic_create(dev->kvm, type);
952}
953
954static void vgic_v3_destroy(struct kvm_device *dev)
955{
956 kfree(dev);
957}
958
959static int vgic_v3_set_attr(struct kvm_device *dev,
960 struct kvm_device_attr *attr)
961{
962 int ret;
963
964 ret = vgic_set_common_attr(dev, attr);
965 if (ret != -ENXIO)
966 return ret;
967
968 switch (attr->group) {
969 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
970 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
971 return -ENXIO;
972 }
973
974 return -ENXIO;
975}
976
977static int vgic_v3_get_attr(struct kvm_device *dev,
978 struct kvm_device_attr *attr)
979{
980 int ret;
981
982 ret = vgic_get_common_attr(dev, attr);
983 if (ret != -ENXIO)
984 return ret;
985
986 switch (attr->group) {
987 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
988 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
989 return -ENXIO;
990 }
991
992 return -ENXIO;
993}
994
995static int vgic_v3_has_attr(struct kvm_device *dev,
996 struct kvm_device_attr *attr)
997{
998 switch (attr->group) {
999 case KVM_DEV_ARM_VGIC_GRP_ADDR:
1000 switch (attr->attr) {
1001 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1002 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1003 return -ENXIO;
1004 case KVM_VGIC_V3_ADDR_TYPE_DIST:
1005 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
1006 return 0;
1007 }
1008 break;
1009 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1010 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1011 return -ENXIO;
1012 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
1013 return 0;
1014 case KVM_DEV_ARM_VGIC_GRP_CTRL:
1015 switch (attr->attr) {
1016 case KVM_DEV_ARM_VGIC_CTRL_INIT:
1017 return 0;
1018 }
1019 }
1020 return -ENXIO;
1021}
1022
1023struct kvm_device_ops kvm_arm_vgic_v3_ops = {
1024 .name = "kvm-arm-vgic-v3",
1025 .create = vgic_v3_create,
1026 .destroy = vgic_v3_destroy,
1027 .set_attr = vgic_v3_set_attr,
1028 .get_attr = vgic_v3_get_attr,
1029 .has_attr = vgic_v3_has_attr,
1030};
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..dff06021e748
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,287 @@
1/*
2 * Copyright (C) 2013 ARM Limited, All Rights Reserved.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <linux/cpu.h>
19#include <linux/kvm.h>
20#include <linux/kvm_host.h>
21#include <linux/interrupt.h>
22#include <linux/io.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_irq.h>
26
27#include <linux/irqchip/arm-gic-v3.h>
28
29#include <asm/kvm_emulate.h>
30#include <asm/kvm_arm.h>
31#include <asm/kvm_mmu.h>
32
33/* These are for GICv2 emulation only */
34#define GICH_LR_VIRTUALID (0x3ffUL << 0)
35#define GICH_LR_PHYSID_CPUID_SHIFT (10)
36#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
37#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
38
39/*
40 * LRs are stored in reverse order in memory. make sure we index them
41 * correctly.
42 */
43#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
44
45static u32 ich_vtr_el2;
46
47static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
48{
49 struct vgic_lr lr_desc;
50 u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
51
52 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
53 lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
54 else
55 lr_desc.irq = val & GICH_LR_VIRTUALID;
56
57 lr_desc.source = 0;
58 if (lr_desc.irq <= 15 &&
59 vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
60 lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
61
62 lr_desc.state = 0;
63
64 if (val & ICH_LR_PENDING_BIT)
65 lr_desc.state |= LR_STATE_PENDING;
66 if (val & ICH_LR_ACTIVE_BIT)
67 lr_desc.state |= LR_STATE_ACTIVE;
68 if (val & ICH_LR_EOI)
69 lr_desc.state |= LR_EOI_INT;
70
71 return lr_desc;
72}
73
74static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
75 struct vgic_lr lr_desc)
76{
77 u64 lr_val;
78
79 lr_val = lr_desc.irq;
80
81 /*
82 * Currently all guest IRQs are Group1, as Group0 would result
83 * in a FIQ in the guest, which it wouldn't expect.
84 * Eventually we want to make this configurable, so we may revisit
85 * this in the future.
86 */
87 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
88 lr_val |= ICH_LR_GROUP;
89 else
90 lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
91
92 if (lr_desc.state & LR_STATE_PENDING)
93 lr_val |= ICH_LR_PENDING_BIT;
94 if (lr_desc.state & LR_STATE_ACTIVE)
95 lr_val |= ICH_LR_ACTIVE_BIT;
96 if (lr_desc.state & LR_EOI_INT)
97 lr_val |= ICH_LR_EOI;
98
99 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
100}
101
102static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
103 struct vgic_lr lr_desc)
104{
105 if (!(lr_desc.state & LR_STATE_MASK))
106 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
107 else
108 vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
109}
110
111static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
112{
113 return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
114}
115
116static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
117{
118 return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
119}
120
121static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
122{
123 vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
124}
125
126static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
127{
128 u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
129 u32 ret = 0;
130
131 if (misr & ICH_MISR_EOI)
132 ret |= INT_STATUS_EOI;
133 if (misr & ICH_MISR_U)
134 ret |= INT_STATUS_UNDERFLOW;
135
136 return ret;
137}
138
139static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
140{
141 u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
142
143 vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
144 vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
145 vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
146 vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
147}
148
149static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
150{
151 vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
152}
153
154static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
155{
156 vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
157}
158
159static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
160{
161 u32 vmcr;
162
163 vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
164 vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
165 vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
166 vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
167
168 vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
169}
170
171static void vgic_v3_enable(struct kvm_vcpu *vcpu)
172{
173 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
174
175 /*
176 * By forcing VMCR to zero, the GIC will restore the binary
177 * points to their reset values. Anything else resets to zero
178 * anyway.
179 */
180 vgic_v3->vgic_vmcr = 0;
181
182 /*
183 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
184 * way, so we force SRE to 1 to demonstrate this to the guest.
185 * This goes with the spec allowing the value to be RAO/WI.
186 */
187 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
188 vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
189 else
190 vgic_v3->vgic_sre = 0;
191
192 /* Get the show on the road... */
193 vgic_v3->vgic_hcr = ICH_HCR_EN;
194}
195
196static const struct vgic_ops vgic_v3_ops = {
197 .get_lr = vgic_v3_get_lr,
198 .set_lr = vgic_v3_set_lr,
199 .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
200 .get_elrsr = vgic_v3_get_elrsr,
201 .get_eisr = vgic_v3_get_eisr,
202 .clear_eisr = vgic_v3_clear_eisr,
203 .get_interrupt_status = vgic_v3_get_interrupt_status,
204 .enable_underflow = vgic_v3_enable_underflow,
205 .disable_underflow = vgic_v3_disable_underflow,
206 .get_vmcr = vgic_v3_get_vmcr,
207 .set_vmcr = vgic_v3_set_vmcr,
208 .enable = vgic_v3_enable,
209};
210
211static struct vgic_params vgic_v3_params;
212
213/**
214 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
215 * @node: pointer to the DT node
216 * @ops: address of a pointer to the GICv3 operations
217 * @params: address of a pointer to HW-specific parameters
218 *
219 * Returns 0 if a GICv3 has been found, with the low level operations
220 * in *ops and the HW parameters in *params. Returns an error code
221 * otherwise.
222 */
223int vgic_v3_probe(struct device_node *vgic_node,
224 const struct vgic_ops **ops,
225 const struct vgic_params **params)
226{
227 int ret = 0;
228 u32 gicv_idx;
229 struct resource vcpu_res;
230 struct vgic_params *vgic = &vgic_v3_params;
231
232 vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
233 if (!vgic->maint_irq) {
234 kvm_err("error getting vgic maintenance irq from DT\n");
235 ret = -ENXIO;
236 goto out;
237 }
238
239 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
240
241 /*
242 * The ListRegs field is 5 bits, but there is a architectural
243 * maximum of 16 list registers. Just ignore bit 4...
244 */
245 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
246 vgic->can_emulate_gicv2 = false;
247
248 if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
249 gicv_idx = 1;
250
251 gicv_idx += 3; /* Also skip GICD, GICC, GICH */
252 if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
253 kvm_info("GICv3: no GICV resource entry\n");
254 vgic->vcpu_base = 0;
255 } else if (!PAGE_ALIGNED(vcpu_res.start)) {
256 pr_warn("GICV physical address 0x%llx not page aligned\n",
257 (unsigned long long)vcpu_res.start);
258 vgic->vcpu_base = 0;
259 } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
260 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
261 (unsigned long long)resource_size(&vcpu_res),
262 PAGE_SIZE);
263 vgic->vcpu_base = 0;
264 } else {
265 vgic->vcpu_base = vcpu_res.start;
266 vgic->can_emulate_gicv2 = true;
267 kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
268 KVM_DEV_TYPE_ARM_VGIC_V2);
269 }
270 if (vgic->vcpu_base == 0)
271 kvm_info("disabling GICv2 emulation\n");
272 kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3);
273
274 vgic->vctrl_base = NULL;
275 vgic->type = VGIC_V3;
276 vgic->max_gic_vcpus = KVM_MAX_VCPUS;
277
278 kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
279 vcpu_res.start, vgic->maint_irq);
280
281 *ops = &vgic_v3_ops;
282 *params = vgic;
283
284out:
285 of_node_put(vgic_node);
286 return ret;
287}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 476d3bf540a8..8d550ff14700 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -31,26 +31,30 @@
31#include <asm/kvm_emulate.h> 31#include <asm/kvm_emulate.h>
32#include <asm/kvm_arm.h> 32#include <asm/kvm_arm.h>
33#include <asm/kvm_mmu.h> 33#include <asm/kvm_mmu.h>
34#include <trace/events/kvm.h>
35#include <asm/kvm.h>
36#include <kvm/iodev.h>
34 37
35/* 38/*
36 * How the whole thing works (courtesy of Christoffer Dall): 39 * How the whole thing works (courtesy of Christoffer Dall):
37 * 40 *
38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if 41 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
39 * something is pending 42 * something is pending on the CPU interface.
40 * - VGIC pending interrupts are stored on the vgic.irq_state vgic 43 * - Interrupts that are pending on the distributor are stored on the
41 * bitmap (this bitmap is updated by both user land ioctls and guest 44 * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
42 * mmio ops, and other in-kernel peripherals such as the 45 * ioctls and guest mmio ops, and other in-kernel peripherals such as the
43 * arch. timers) and indicate the 'wire' state. 46 * arch. timers).
44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is 47 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
45 * recalculated 48 * recalculated
46 * - To calculate the oracle, we need info for each cpu from 49 * - To calculate the oracle, we need info for each cpu from
47 * compute_pending_for_cpu, which considers: 50 * compute_pending_for_cpu, which considers:
48 * - PPI: dist->irq_state & dist->irq_enable 51 * - PPI: dist->irq_pending & dist->irq_enable
49 * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target 52 * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
50 * - irq_spi_target is a 'formatted' version of the GICD_ICFGR 53 * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
51 * registers, stored on each vcpu. We only keep one bit of 54 * registers, stored on each vcpu. We only keep one bit of
52 * information per interrupt, making sure that only one vcpu can 55 * information per interrupt, making sure that only one vcpu can
53 * accept the interrupt. 56 * accept the interrupt.
57 * - If any of the above state changes, we must recalculate the oracle.
54 * - The same is true when injecting an interrupt, except that we only 58 * - The same is true when injecting an interrupt, except that we only
55 * consider a single interrupt at a time. The irq_spi_cpu array 59 * consider a single interrupt at a time. The irq_spi_cpu array
56 * contains the target CPU for each SPI. 60 * contains the target CPU for each SPI.
@@ -60,75 +64,126 @@
60 * the 'line' again. This is achieved as such: 64 * the 'line' again. This is achieved as such:
61 * 65 *
62 * - When a level interrupt is moved onto a vcpu, the corresponding 66 * - When a level interrupt is moved onto a vcpu, the corresponding
63 * bit in irq_active is set. As long as this bit is set, the line 67 * bit in irq_queued is set. As long as this bit is set, the line
64 * will be ignored for further interrupts. The interrupt is injected 68 * will be ignored for further interrupts. The interrupt is injected
65 * into the vcpu with the GICH_LR_EOI bit set (generate a 69 * into the vcpu with the GICH_LR_EOI bit set (generate a
66 * maintenance interrupt on EOI). 70 * maintenance interrupt on EOI).
67 * - When the interrupt is EOIed, the maintenance interrupt fires, 71 * - When the interrupt is EOIed, the maintenance interrupt fires,
68 * and clears the corresponding bit in irq_active. This allow the 72 * and clears the corresponding bit in irq_queued. This allows the
69 * interrupt line to be sampled again. 73 * interrupt line to be sampled again.
74 * - Note that level-triggered interrupts can also be set to pending from
75 * writes to GICD_ISPENDRn and lowering the external input line does not
76 * cause the interrupt to become inactive in such a situation.
77 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
78 * inactive as long as the external input line is held high.
70 */ 79 */
71 80
72#define VGIC_ADDR_UNDEF (-1) 81#include "vgic.h"
73#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
74 82
75#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ 83static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
76#define IMPLEMENTER_ARM 0x43b 84static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
77#define GICC_ARCH_VERSION_V2 0x2 85static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
86static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
78 87
79/* Physical address of vgic virtual cpu interface */ 88static const struct vgic_ops *vgic_ops;
80static phys_addr_t vgic_vcpu_base; 89static const struct vgic_params *vgic;
81 90
82/* Virtual control interface base address */ 91static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
83static void __iomem *vgic_vctrl_base; 92{
93 vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
94}
84 95
85static struct device_node *vgic_node; 96static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
97{
98 return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
99}
86 100
87#define ACCESS_READ_VALUE (1 << 0) 101int kvm_vgic_map_resources(struct kvm *kvm)
88#define ACCESS_READ_RAZ (0 << 0) 102{
89#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) 103 return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
90#define ACCESS_WRITE_IGNORED (0 << 1) 104}
91#define ACCESS_WRITE_SETBIT (1 << 1)
92#define ACCESS_WRITE_CLEARBIT (2 << 1)
93#define ACCESS_WRITE_VALUE (3 << 1)
94#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
95 105
96static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); 106/*
97static void vgic_update_state(struct kvm *kvm); 107 * struct vgic_bitmap contains a bitmap made of unsigned longs, but
98static void vgic_kick_vcpus(struct kvm *kvm); 108 * extracts u32s out of them.
99static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); 109 *
100static u32 vgic_nr_lr; 110 * This does not work on 64-bit BE systems, because the bitmap access
111 * will store two consecutive 32-bit words with the higher-addressed
112 * register's bits at the lower index and the lower-addressed register's
113 * bits at the higher index.
114 *
115 * Therefore, swizzle the register index when accessing the 32-bit word
116 * registers to access the right register's value.
117 */
118#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
119#define REG_OFFSET_SWIZZLE 1
120#else
121#define REG_OFFSET_SWIZZLE 0
122#endif
123
124static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
125{
126 int nr_longs;
127
128 nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
129
130 b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
131 if (!b->private)
132 return -ENOMEM;
133
134 b->shared = b->private + nr_cpus;
135
136 return 0;
137}
138
139static void vgic_free_bitmap(struct vgic_bitmap *b)
140{
141 kfree(b->private);
142 b->private = NULL;
143 b->shared = NULL;
144}
101 145
102static unsigned int vgic_maint_irq; 146/*
147 * Call this function to convert a u64 value to an unsigned long * bitmask
148 * in a way that works on both 32-bit and 64-bit LE and BE platforms.
149 *
150 * Warning: Calling this function may modify *val.
151 */
152static unsigned long *u64_to_bitmask(u64 *val)
153{
154#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
155 *val = (*val >> 32) | (*val << 32);
156#endif
157 return (unsigned long *)val;
158}
103 159
104static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 160u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
105 int cpuid, u32 offset)
106{ 161{
107 offset >>= 2; 162 offset >>= 2;
108 if (!offset) 163 if (!offset)
109 return x->percpu[cpuid].reg; 164 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
110 else 165 else
111 return x->shared.reg + offset - 1; 166 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
112} 167}
113 168
114static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, 169static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
115 int cpuid, int irq) 170 int cpuid, int irq)
116{ 171{
117 if (irq < VGIC_NR_PRIVATE_IRQS) 172 if (irq < VGIC_NR_PRIVATE_IRQS)
118 return test_bit(irq, x->percpu[cpuid].reg_ul); 173 return test_bit(irq, x->private + cpuid);
119 174
120 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); 175 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
121} 176}
122 177
123static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, 178void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
124 int irq, int val) 179 int irq, int val)
125{ 180{
126 unsigned long *reg; 181 unsigned long *reg;
127 182
128 if (irq < VGIC_NR_PRIVATE_IRQS) { 183 if (irq < VGIC_NR_PRIVATE_IRQS) {
129 reg = x->percpu[cpuid].reg_ul; 184 reg = x->private + cpuid;
130 } else { 185 } else {
131 reg = x->shared.reg_ul; 186 reg = x->shared;
132 irq -= VGIC_NR_PRIVATE_IRQS; 187 irq -= VGIC_NR_PRIVATE_IRQS;
133 } 188 }
134 189
@@ -140,24 +195,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
140 195
141static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) 196static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
142{ 197{
143 if (unlikely(cpuid >= VGIC_MAX_CPUS)) 198 return x->private + cpuid;
144 return NULL;
145 return x->percpu[cpuid].reg_ul;
146} 199}
147 200
148static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) 201unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
149{ 202{
150 return x->shared.reg_ul; 203 return x->shared;
151} 204}
152 205
153static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) 206static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
154{ 207{
155 offset >>= 2; 208 int size;
156 BUG_ON(offset > (VGIC_NR_IRQS / 4)); 209
157 if (offset < 8) 210 size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
158 return x->percpu[cpuid] + offset; 211 size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
159 else 212
160 return x->shared + offset - 8; 213 x->private = kzalloc(size, GFP_KERNEL);
214 if (!x->private)
215 return -ENOMEM;
216
217 x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
218 return 0;
219}
220
221static void vgic_free_bytemap(struct vgic_bytemap *b)
222{
223 kfree(b->private);
224 b->private = NULL;
225 b->shared = NULL;
226}
227
228u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
229{
230 u32 *reg;
231
232 if (offset < VGIC_NR_PRIVATE_IRQS) {
233 reg = x->private;
234 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
235 } else {
236 reg = x->shared;
237 offset -= VGIC_NR_PRIVATE_IRQS;
238 }
239
240 return reg + (offset / sizeof(u32));
161} 241}
162 242
163#define VGIC_CFG_LEVEL 0 243#define VGIC_CFG_LEVEL 0
@@ -179,6 +259,13 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
179 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); 259 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
180} 260}
181 261
262static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
263{
264 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
265
266 return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
267}
268
182static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) 269static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
183{ 270{
184 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 271 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -186,6 +273,20 @@ static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
186 return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); 273 return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
187} 274}
188 275
276static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
277{
278 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
279
280 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
281}
282
283static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
284{
285 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
286
287 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
288}
289
189static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) 290static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
190{ 291{
191 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 292 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -200,25 +301,60 @@ static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
200 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); 301 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
201} 302}
202 303
304static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
305{
306 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
307
308 return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
309}
310
311static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
312{
313 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
314
315 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
316}
317
318static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
319{
320 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
321
322 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
323}
324
325static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
326{
327 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
328
329 return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
330}
331
332static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
333{
334 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
335
336 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
337}
338
203static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 339static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
204{ 340{
205 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 341 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
206 342
207 return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); 343 return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
208} 344}
209 345
210static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) 346void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
211{ 347{
212 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 348 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
213 349
214 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); 350 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
215} 351}
216 352
217static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) 353void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
218{ 354{
219 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 355 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
220 356
221 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); 357 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
222} 358}
223 359
224static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) 360static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -230,7 +366,7 @@ static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
230 vcpu->arch.vgic_cpu.pending_shared); 366 vcpu->arch.vgic_cpu.pending_shared);
231} 367}
232 368
233static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) 369void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
234{ 370{
235 if (irq < VGIC_NR_PRIVATE_IRQS) 371 if (irq < VGIC_NR_PRIVATE_IRQS)
236 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); 372 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
@@ -239,14 +375,9 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
239 vcpu->arch.vgic_cpu.pending_shared); 375 vcpu->arch.vgic_cpu.pending_shared);
240} 376}
241 377
242static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) 378static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
243{
244 return *((u32 *)mmio->data) & mask;
245}
246
247static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
248{ 379{
249 *((u32 *)mmio->data) = value & mask; 380 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
250} 381}
251 382
252/** 383/**
@@ -260,8 +391,8 @@ static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
260 * modes defined for vgic register access 391 * modes defined for vgic register access
261 * (read,raz,write-ignored,setbit,clearbit,write) 392 * (read,raz,write-ignored,setbit,clearbit,write)
262 */ 393 */
263static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, 394void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
264 phys_addr_t offset, int mode) 395 phys_addr_t offset, int mode)
265{ 396{
266 int word_offset = (offset & 3) * 8; 397 int word_offset = (offset & 3) * 8;
267 u32 mask = (1UL << (mmio->len * 8)) - 1; 398 u32 mask = (1UL << (mmio->len * 8)) - 1;
@@ -310,197 +441,141 @@ static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
310 } 441 }
311} 442}
312 443
313static bool handle_mmio_misc(struct kvm_vcpu *vcpu, 444bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
314 struct kvm_exit_mmio *mmio, phys_addr_t offset) 445 phys_addr_t offset)
315{
316 u32 reg;
317 u32 word_offset = offset & 3;
318
319 switch (offset & ~3) {
320 case 0: /* GICD_CTLR */
321 reg = vcpu->kvm->arch.vgic.enabled;
322 vgic_reg_access(mmio, &reg, word_offset,
323 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
324 if (mmio->is_write) {
325 vcpu->kvm->arch.vgic.enabled = reg & 1;
326 vgic_update_state(vcpu->kvm);
327 return true;
328 }
329 break;
330
331 case 4: /* GICD_TYPER */
332 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
333 reg |= (VGIC_NR_IRQS >> 5) - 1;
334 vgic_reg_access(mmio, &reg, word_offset,
335 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
336 break;
337
338 case 8: /* GICD_IIDR */
339 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
340 vgic_reg_access(mmio, &reg, word_offset,
341 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
342 break;
343 }
344
345 return false;
346}
347
348static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
349 struct kvm_exit_mmio *mmio, phys_addr_t offset)
350{ 446{
351 vgic_reg_access(mmio, NULL, offset, 447 vgic_reg_access(mmio, NULL, offset,
352 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); 448 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
353 return false; 449 return false;
354} 450}
355 451
356static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, 452bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
357 struct kvm_exit_mmio *mmio, 453 phys_addr_t offset, int vcpu_id, int access)
358 phys_addr_t offset)
359{ 454{
360 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, 455 u32 *reg;
361 vcpu->vcpu_id, offset); 456 int mode = ACCESS_READ_VALUE | access;
362 vgic_reg_access(mmio, reg, offset, 457 struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
363 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
364 if (mmio->is_write) {
365 vgic_update_state(vcpu->kvm);
366 return true;
367 }
368
369 return false;
370}
371 458
372static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, 459 reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
373 struct kvm_exit_mmio *mmio, 460 vgic_reg_access(mmio, reg, offset, mode);
374 phys_addr_t offset)
375{
376 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
377 vcpu->vcpu_id, offset);
378 vgic_reg_access(mmio, reg, offset,
379 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
380 if (mmio->is_write) { 461 if (mmio->is_write) {
381 if (offset < 4) /* Force SGI enabled */ 462 if (access & ACCESS_WRITE_CLEARBIT) {
382 *reg |= 0xffff; 463 if (offset < 4) /* Force SGI enabled */
383 vgic_retire_disabled_irqs(vcpu); 464 *reg |= 0xffff;
384 vgic_update_state(vcpu->kvm); 465 vgic_retire_disabled_irqs(target_vcpu);
466 }
467 vgic_update_state(kvm);
385 return true; 468 return true;
386 } 469 }
387 470
388 return false; 471 return false;
389} 472}
390 473
391static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, 474bool vgic_handle_set_pending_reg(struct kvm *kvm,
392 struct kvm_exit_mmio *mmio, 475 struct kvm_exit_mmio *mmio,
393 phys_addr_t offset) 476 phys_addr_t offset, int vcpu_id)
394{ 477{
395 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 478 u32 *reg, orig;
396 vcpu->vcpu_id, offset); 479 u32 level_mask;
397 vgic_reg_access(mmio, reg, offset, 480 int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
398 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); 481 struct vgic_dist *dist = &kvm->arch.vgic;
399 if (mmio->is_write) {
400 vgic_update_state(vcpu->kvm);
401 return true;
402 }
403 482
404 return false; 483 reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
405} 484 level_mask = (~(*reg));
485
486 /* Mark both level and edge triggered irqs as pending */
487 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
488 orig = *reg;
489 vgic_reg_access(mmio, reg, offset, mode);
406 490
407static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
408 struct kvm_exit_mmio *mmio,
409 phys_addr_t offset)
410{
411 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
412 vcpu->vcpu_id, offset);
413 vgic_reg_access(mmio, reg, offset,
414 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
415 if (mmio->is_write) { 491 if (mmio->is_write) {
416 vgic_update_state(vcpu->kvm); 492 /* Set the soft-pending flag only for level-triggered irqs */
493 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
494 vcpu_id, offset);
495 vgic_reg_access(mmio, reg, offset, mode);
496 *reg &= level_mask;
497
498 /* Ignore writes to SGIs */
499 if (offset < 2) {
500 *reg &= ~0xffff;
501 *reg |= orig & 0xffff;
502 }
503
504 vgic_update_state(kvm);
417 return true; 505 return true;
418 } 506 }
419 507
420 return false; 508 return false;
421} 509}
422 510
423static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, 511bool vgic_handle_clear_pending_reg(struct kvm *kvm,
424 struct kvm_exit_mmio *mmio, 512 struct kvm_exit_mmio *mmio,
425 phys_addr_t offset) 513 phys_addr_t offset, int vcpu_id)
426{
427 u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
428 vcpu->vcpu_id, offset);
429 vgic_reg_access(mmio, reg, offset,
430 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
431 return false;
432}
433
434#define GICD_ITARGETSR_SIZE 32
435#define GICD_CPUTARGETS_BITS 8
436#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
437static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
438{ 514{
515 u32 *level_active;
516 u32 *reg, orig;
517 int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
439 struct vgic_dist *dist = &kvm->arch.vgic; 518 struct vgic_dist *dist = &kvm->arch.vgic;
440 int i;
441 u32 val = 0;
442 519
443 irq -= VGIC_NR_PRIVATE_IRQS; 520 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
521 orig = *reg;
522 vgic_reg_access(mmio, reg, offset, mode);
523 if (mmio->is_write) {
524 /* Re-set level triggered level-active interrupts */
525 level_active = vgic_bitmap_get_reg(&dist->irq_level,
526 vcpu_id, offset);
527 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
528 *reg |= *level_active;
529
530 /* Ignore writes to SGIs */
531 if (offset < 2) {
532 *reg &= ~0xffff;
533 *reg |= orig & 0xffff;
534 }
444 535
445 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) 536 /* Clear soft-pending flags */
446 val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); 537 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
538 vcpu_id, offset);
539 vgic_reg_access(mmio, reg, offset, mode);
447 540
448 return val; 541 vgic_update_state(kvm);
542 return true;
543 }
544 return false;
449} 545}
450 546
451static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) 547bool vgic_handle_set_active_reg(struct kvm *kvm,
548 struct kvm_exit_mmio *mmio,
549 phys_addr_t offset, int vcpu_id)
452{ 550{
551 u32 *reg;
453 struct vgic_dist *dist = &kvm->arch.vgic; 552 struct vgic_dist *dist = &kvm->arch.vgic;
454 struct kvm_vcpu *vcpu;
455 int i, c;
456 unsigned long *bmap;
457 u32 target;
458 553
459 irq -= VGIC_NR_PRIVATE_IRQS; 554 reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
555 vgic_reg_access(mmio, reg, offset,
556 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
460 557
461 /* 558 if (mmio->is_write) {
462 * Pick the LSB in each byte. This ensures we target exactly 559 vgic_update_state(kvm);
463 * one vcpu per IRQ. If the byte is null, assume we target 560 return true;
464 * CPU0.
465 */
466 for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
467 int shift = i * GICD_CPUTARGETS_BITS;
468 target = ffs((val >> shift) & 0xffU);
469 target = target ? (target - 1) : 0;
470 dist->irq_spi_cpu[irq + i] = target;
471 kvm_for_each_vcpu(c, vcpu, kvm) {
472 bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
473 if (c == target)
474 set_bit(irq + i, bmap);
475 else
476 clear_bit(irq + i, bmap);
477 }
478 } 561 }
562
563 return false;
479} 564}
480 565
481static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, 566bool vgic_handle_clear_active_reg(struct kvm *kvm,
482 struct kvm_exit_mmio *mmio, 567 struct kvm_exit_mmio *mmio,
483 phys_addr_t offset) 568 phys_addr_t offset, int vcpu_id)
484{ 569{
485 u32 reg; 570 u32 *reg;
486 571 struct vgic_dist *dist = &kvm->arch.vgic;
487 /* We treat the banked interrupts targets as read-only */
488 if (offset < 32) {
489 u32 roreg = 1 << vcpu->vcpu_id;
490 roreg |= roreg << 8;
491 roreg |= roreg << 16;
492 572
493 vgic_reg_access(mmio, &roreg, offset, 573 reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
494 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 574 vgic_reg_access(mmio, reg, offset,
495 return false; 575 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
496 }
497 576
498 reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
499 vgic_reg_access(mmio, &reg, offset,
500 ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
501 if (mmio->is_write) { 577 if (mmio->is_write) {
502 vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); 578 vgic_update_state(kvm);
503 vgic_update_state(vcpu->kvm);
504 return true; 579 return true;
505 } 580 }
506 581
@@ -542,14 +617,10 @@ static u16 vgic_cfg_compress(u32 val)
542 * LSB is always 0. As such, we only keep the upper bit, and use the 617 * LSB is always 0. As such, we only keep the upper bit, and use the
543 * two above functions to compress/expand the bits 618 * two above functions to compress/expand the bits
544 */ 619 */
545static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, 620bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
546 struct kvm_exit_mmio *mmio, phys_addr_t offset) 621 phys_addr_t offset)
547{ 622{
548 u32 val; 623 u32 val;
549 u32 *reg;
550
551 reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
552 vcpu->vcpu_id, offset >> 1);
553 624
554 if (offset & 4) 625 if (offset & 4)
555 val = *reg >> 16; 626 val = *reg >> 16;
@@ -578,57 +649,21 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
578 return false; 649 return false;
579} 650}
580 651
581static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
582 struct kvm_exit_mmio *mmio, phys_addr_t offset)
583{
584 u32 reg;
585 vgic_reg_access(mmio, &reg, offset,
586 ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
587 if (mmio->is_write) {
588 vgic_dispatch_sgi(vcpu, reg);
589 vgic_update_state(vcpu->kvm);
590 return true;
591 }
592
593 return false;
594}
595
596#define LR_CPUID(lr) \
597 (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
598#define LR_IRQID(lr) \
599 ((lr) & GICH_LR_VIRTUALID)
600
601static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
602{
603 clear_bit(lr_nr, vgic_cpu->lr_used);
604 vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
605 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
606}
607
608/** 652/**
609 * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor 653 * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
610 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs 654 * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
611 * 655 *
612 * Move any pending IRQs that have already been assigned to LRs back to the 656 * Move any IRQs that have already been assigned to LRs back to the
613 * emulated distributor state so that the complete emulated state can be read 657 * emulated distributor state so that the complete emulated state can be read
614 * from the main emulation structures without investigating the LRs. 658 * from the main emulation structures without investigating the LRs.
615 *
616 * Note that IRQs in the active state in the LRs get their pending state moved
617 * to the distributor but the active state stays in the LRs, because we don't
618 * track the active state on the distributor side.
619 */ 659 */
620static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) 660void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
621{ 661{
622 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
623 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 662 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
624 int vcpu_id = vcpu->vcpu_id; 663 int i;
625 int i, irq, source_cpu;
626 u32 *lr;
627 664
628 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { 665 for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
629 lr = &vgic_cpu->vgic_lr[i]; 666 struct vgic_lr lr = vgic_get_lr(vcpu, i);
630 irq = LR_IRQID(*lr);
631 source_cpu = LR_CPUID(*lr);
632 667
633 /* 668 /*
634 * There are three options for the state bits: 669 * There are three options for the state bits:
@@ -636,12 +671,22 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
636 * 01: pending 671 * 01: pending
637 * 10: active 672 * 10: active
638 * 11: pending and active 673 * 11: pending and active
639 *
640 * If the LR holds only an active interrupt (not pending) then
641 * just leave it alone.
642 */ 674 */
643 if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) 675 BUG_ON(!(lr.state & LR_STATE_MASK));
644 continue; 676
677 /* Reestablish SGI source for pending and active IRQs */
678 if (lr.irq < VGIC_NR_SGIS)
679 add_sgi_source(vcpu, lr.irq, lr.source);
680
681 /*
682 * If the LR holds an active (10) or a pending and active (11)
683 * interrupt then move the active state to the
684 * distributor tracking bit.
685 */
686 if (lr.state & LR_STATE_ACTIVE) {
687 vgic_irq_set_active(vcpu, lr.irq);
688 lr.state &= ~LR_STATE_ACTIVE;
689 }
645 690
646 /* 691 /*
647 * Reestablish the pending state on the distributor and the 692 * Reestablish the pending state on the distributor and the
@@ -649,293 +694,257 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
649 * is fine, then we are only setting a few bits that were 694 * is fine, then we are only setting a few bits that were
650 * already set. 695 * already set.
651 */ 696 */
652 vgic_dist_irq_set(vcpu, irq); 697 if (lr.state & LR_STATE_PENDING) {
653 if (irq < VGIC_NR_SGIS) 698 vgic_dist_irq_set_pending(vcpu, lr.irq);
654 dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; 699 lr.state &= ~LR_STATE_PENDING;
655 *lr &= ~GICH_LR_PENDING_BIT; 700 }
701
702 vgic_set_lr(vcpu, i, lr);
656 703
657 /* 704 /*
658 * If there's no state left on the LR (it could still be 705 * Mark the LR as free for other use.
659 * active), then the LR does not hold any useful info and can
660 * be marked as free for other use.
661 */ 706 */
662 if (!(*lr & GICH_LR_STATE)) 707 BUG_ON(lr.state & LR_STATE_MASK);
663 vgic_retire_lr(i, irq, vgic_cpu); 708 vgic_retire_lr(i, lr.irq, vcpu);
709 vgic_irq_clear_queued(vcpu, lr.irq);
664 710
665 /* Finally update the VGIC state. */ 711 /* Finally update the VGIC state. */
666 vgic_update_state(vcpu->kvm); 712 vgic_update_state(vcpu->kvm);
667 } 713 }
668} 714}
669 715
670/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ 716const
671static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, 717struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
672 struct kvm_exit_mmio *mmio, 718 int len, gpa_t offset)
673 phys_addr_t offset)
674{ 719{
675 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 720 while (ranges->len) {
676 int sgi; 721 if (offset >= ranges->base &&
677 int min_sgi = (offset & ~0x3) * 4; 722 (offset + len) <= (ranges->base + ranges->len))
678 int max_sgi = min_sgi + 3; 723 return ranges;
679 int vcpu_id = vcpu->vcpu_id; 724 ranges++;
680 u32 reg = 0;
681
682 /* Copy source SGIs from distributor side */
683 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
684 int shift = 8 * (sgi - min_sgi);
685 reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
686 } 725 }
687 726
688 mmio_data_write(mmio, ~0, reg); 727 return NULL;
689 return false;
690} 728}
691 729
692static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, 730static bool vgic_validate_access(const struct vgic_dist *dist,
693 struct kvm_exit_mmio *mmio, 731 const struct vgic_io_range *range,
694 phys_addr_t offset, bool set) 732 unsigned long offset)
695{ 733{
696 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 734 int irq;
697 int sgi;
698 int min_sgi = (offset & ~0x3) * 4;
699 int max_sgi = min_sgi + 3;
700 int vcpu_id = vcpu->vcpu_id;
701 u32 reg;
702 bool updated = false;
703
704 reg = mmio_data_read(mmio, ~0);
705
706 /* Clear pending SGIs on the distributor */
707 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
708 u8 mask = reg >> (8 * (sgi - min_sgi));
709 if (set) {
710 if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
711 updated = true;
712 dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
713 } else {
714 if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
715 updated = true;
716 dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
717 }
718 }
719
720 if (updated)
721 vgic_update_state(vcpu->kvm);
722 735
723 return updated; 736 if (!range->bits_per_irq)
724} 737 return true; /* Not an irq-based access */
725 738
726static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, 739 irq = offset * 8 / range->bits_per_irq;
727 struct kvm_exit_mmio *mmio, 740 if (irq >= dist->nr_irqs)
728 phys_addr_t offset) 741 return false;
729{
730 if (!mmio->is_write)
731 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
732 else
733 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
734}
735 742
736static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, 743 return true;
737 struct kvm_exit_mmio *mmio,
738 phys_addr_t offset)
739{
740 if (!mmio->is_write)
741 return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
742 else
743 return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
744} 744}
745 745
746/* 746/*
747 * I would have liked to use the kvm_bus_io_*() API instead, but it 747 * Call the respective handler function for the given range.
748 * cannot cope with banked registers (only the VM pointer is passed 748 * We split up any 64 bit accesses into two consecutive 32 bit
749 * around, and we need the vcpu). One of these days, someone please 749 * handler calls and merge the result afterwards.
750 * fix it! 750 * We do this in a little endian fashion regardless of the host's
751 * or guest's endianness, because the GIC is always LE and the rest of
752 * the code (vgic_reg_access) also puts it in a LE fashion already.
753 * At this point we have already identified the handle function, so
754 * range points to that one entry and offset is relative to this.
751 */ 755 */
752struct mmio_range { 756static bool call_range_handler(struct kvm_vcpu *vcpu,
753 phys_addr_t base; 757 struct kvm_exit_mmio *mmio,
754 unsigned long len; 758 unsigned long offset,
755 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, 759 const struct vgic_io_range *range)
756 phys_addr_t offset); 760{
757}; 761 struct kvm_exit_mmio mmio32;
762 bool ret;
758 763
759static const struct mmio_range vgic_dist_ranges[] = { 764 if (likely(mmio->len <= 4))
760 { 765 return range->handle_mmio(vcpu, mmio, offset);
761 .base = GIC_DIST_CTRL,
762 .len = 12,
763 .handle_mmio = handle_mmio_misc,
764 },
765 {
766 .base = GIC_DIST_IGROUP,
767 .len = VGIC_NR_IRQS / 8,
768 .handle_mmio = handle_mmio_raz_wi,
769 },
770 {
771 .base = GIC_DIST_ENABLE_SET,
772 .len = VGIC_NR_IRQS / 8,
773 .handle_mmio = handle_mmio_set_enable_reg,
774 },
775 {
776 .base = GIC_DIST_ENABLE_CLEAR,
777 .len = VGIC_NR_IRQS / 8,
778 .handle_mmio = handle_mmio_clear_enable_reg,
779 },
780 {
781 .base = GIC_DIST_PENDING_SET,
782 .len = VGIC_NR_IRQS / 8,
783 .handle_mmio = handle_mmio_set_pending_reg,
784 },
785 {
786 .base = GIC_DIST_PENDING_CLEAR,
787 .len = VGIC_NR_IRQS / 8,
788 .handle_mmio = handle_mmio_clear_pending_reg,
789 },
790 {
791 .base = GIC_DIST_ACTIVE_SET,
792 .len = VGIC_NR_IRQS / 8,
793 .handle_mmio = handle_mmio_raz_wi,
794 },
795 {
796 .base = GIC_DIST_ACTIVE_CLEAR,
797 .len = VGIC_NR_IRQS / 8,
798 .handle_mmio = handle_mmio_raz_wi,
799 },
800 {
801 .base = GIC_DIST_PRI,
802 .len = VGIC_NR_IRQS,
803 .handle_mmio = handle_mmio_priority_reg,
804 },
805 {
806 .base = GIC_DIST_TARGET,
807 .len = VGIC_NR_IRQS,
808 .handle_mmio = handle_mmio_target_reg,
809 },
810 {
811 .base = GIC_DIST_CONFIG,
812 .len = VGIC_NR_IRQS / 4,
813 .handle_mmio = handle_mmio_cfg_reg,
814 },
815 {
816 .base = GIC_DIST_SOFTINT,
817 .len = 4,
818 .handle_mmio = handle_mmio_sgi_reg,
819 },
820 {
821 .base = GIC_DIST_SGI_PENDING_CLEAR,
822 .len = VGIC_NR_SGIS,
823 .handle_mmio = handle_mmio_sgi_clear,
824 },
825 {
826 .base = GIC_DIST_SGI_PENDING_SET,
827 .len = VGIC_NR_SGIS,
828 .handle_mmio = handle_mmio_sgi_set,
829 },
830 {}
831};
832 766
833static const 767 /*
834struct mmio_range *find_matching_range(const struct mmio_range *ranges, 768 * Any access bigger than 4 bytes (that we currently handle in KVM)
835 struct kvm_exit_mmio *mmio, 769 * is actually 8 bytes long, caused by a 64-bit access
836 phys_addr_t offset) 770 */
837{
838 const struct mmio_range *r = ranges;
839 771
840 while (r->len) { 772 mmio32.len = 4;
841 if (offset >= r->base && 773 mmio32.is_write = mmio->is_write;
842 (offset + mmio->len) <= (r->base + r->len)) 774 mmio32.private = mmio->private;
843 return r;
844 r++;
845 }
846 775
847 return NULL; 776 mmio32.phys_addr = mmio->phys_addr + 4;
777 mmio32.data = &((u32 *)mmio->data)[1];
778 ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
779
780 mmio32.phys_addr = mmio->phys_addr;
781 mmio32.data = &((u32 *)mmio->data)[0];
782 ret |= range->handle_mmio(vcpu, &mmio32, offset);
783
784 return ret;
848} 785}
849 786
850/** 787/**
851 * vgic_handle_mmio - handle an in-kernel MMIO access 788 * vgic_handle_mmio_access - handle an in-kernel MMIO access
789 * This is called by the read/write KVM IO device wrappers below.
852 * @vcpu: pointer to the vcpu performing the access 790 * @vcpu: pointer to the vcpu performing the access
853 * @run: pointer to the kvm_run structure 791 * @this: pointer to the KVM IO device in charge
854 * @mmio: pointer to the data describing the access 792 * @addr: guest physical address of the access
793 * @len: size of the access
794 * @val: pointer to the data region
795 * @is_write: read or write access
855 * 796 *
856 * returns true if the MMIO access has been performed in kernel space, 797 * returns true if the MMIO access could be performed
857 * and false if it needs to be emulated in user space.
858 */ 798 */
859bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, 799static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
860 struct kvm_exit_mmio *mmio) 800 struct kvm_io_device *this, gpa_t addr,
801 int len, void *val, bool is_write)
861{ 802{
862 const struct mmio_range *range;
863 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 803 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
864 unsigned long base = dist->vgic_dist_base; 804 struct vgic_io_device *iodev = container_of(this,
805 struct vgic_io_device, dev);
806 struct kvm_run *run = vcpu->run;
807 const struct vgic_io_range *range;
808 struct kvm_exit_mmio mmio;
865 bool updated_state; 809 bool updated_state;
866 unsigned long offset; 810 gpa_t offset;
867
868 if (!irqchip_in_kernel(vcpu->kvm) ||
869 mmio->phys_addr < base ||
870 (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
871 return false;
872 811
873 /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ 812 offset = addr - iodev->addr;
874 if (mmio->len > 4) { 813 range = vgic_find_range(iodev->reg_ranges, len, offset);
875 kvm_inject_dabt(vcpu, mmio->phys_addr); 814 if (unlikely(!range || !range->handle_mmio)) {
876 return true; 815 pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
816 return -ENXIO;
877 } 817 }
878 818
879 offset = mmio->phys_addr - base; 819 mmio.phys_addr = addr;
880 range = find_matching_range(vgic_dist_ranges, mmio, offset); 820 mmio.len = len;
881 if (unlikely(!range || !range->handle_mmio)) { 821 mmio.is_write = is_write;
882 pr_warn("Unhandled access %d %08llx %d\n", 822 mmio.data = val;
883 mmio->is_write, mmio->phys_addr, mmio->len); 823 mmio.private = iodev->redist_vcpu;
884 return false; 824
825 spin_lock(&dist->lock);
826 offset -= range->base;
827 if (vgic_validate_access(dist, range, offset)) {
828 updated_state = call_range_handler(vcpu, &mmio, offset, range);
829 } else {
830 if (!is_write)
831 memset(val, 0, len);
832 updated_state = false;
885 } 833 }
834 spin_unlock(&dist->lock);
835 run->mmio.is_write = is_write;
836 run->mmio.len = len;
837 run->mmio.phys_addr = addr;
838 memcpy(run->mmio.data, val, len);
886 839
887 spin_lock(&vcpu->kvm->arch.vgic.lock);
888 offset = mmio->phys_addr - range->base - base;
889 updated_state = range->handle_mmio(vcpu, mmio, offset);
890 spin_unlock(&vcpu->kvm->arch.vgic.lock);
891 kvm_prepare_mmio(run, mmio);
892 kvm_handle_mmio_return(vcpu, run); 840 kvm_handle_mmio_return(vcpu, run);
893 841
894 if (updated_state) 842 if (updated_state)
895 vgic_kick_vcpus(vcpu->kvm); 843 vgic_kick_vcpus(vcpu->kvm);
896 844
897 return true; 845 return 0;
898} 846}
899 847
900static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) 848static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
849 struct kvm_io_device *this,
850 gpa_t addr, int len, void *val)
901{ 851{
902 struct kvm *kvm = vcpu->kvm; 852 return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
903 struct vgic_dist *dist = &kvm->arch.vgic; 853}
904 int nrcpus = atomic_read(&kvm->online_vcpus);
905 u8 target_cpus;
906 int sgi, mode, c, vcpu_id;
907 854
908 vcpu_id = vcpu->vcpu_id; 855static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
856 struct kvm_io_device *this,
857 gpa_t addr, int len, const void *val)
858{
859 return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
860 true);
861}
909 862
910 sgi = reg & 0xf; 863struct kvm_io_device_ops vgic_io_ops = {
911 target_cpus = (reg >> 16) & 0xff; 864 .read = vgic_handle_mmio_read,
912 mode = (reg >> 24) & 3; 865 .write = vgic_handle_mmio_write,
866};
913 867
914 switch (mode) { 868/**
915 case 0: 869 * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
916 if (!target_cpus) 870 * @kvm: The VM structure pointer
917 return; 871 * @base: The (guest) base address for the register frame
918 break; 872 * @len: Length of the register frame window
873 * @ranges: Describing the handler functions for each register
874 * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
875 * @iodev: Points to memory to be passed on to the handler
876 *
877 * @iodev stores the parameters of this function to be usable by the handler
878 * respectively the dispatcher function (since the KVM I/O bus framework lacks
879 * an opaque parameter). Initialization is done in this function, but the
880 * reference should be valid and unique for the whole VGIC lifetime.
881 * If the register frame is not mapped for a specific VCPU, pass -1 to
882 * @redist_vcpu_id.
883 */
884int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
885 const struct vgic_io_range *ranges,
886 int redist_vcpu_id,
887 struct vgic_io_device *iodev)
888{
889 struct kvm_vcpu *vcpu = NULL;
890 int ret;
919 891
920 case 1: 892 if (redist_vcpu_id >= 0)
921 target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; 893 vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
922 break;
923 894
924 case 2: 895 iodev->addr = base;
925 target_cpus = 1 << vcpu_id; 896 iodev->len = len;
926 break; 897 iodev->reg_ranges = ranges;
927 } 898 iodev->redist_vcpu = vcpu;
928 899
929 kvm_for_each_vcpu(c, vcpu, kvm) { 900 kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
930 if (target_cpus & 1) {
931 /* Flag the SGI as pending */
932 vgic_dist_irq_set(vcpu, sgi);
933 dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
934 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
935 }
936 901
937 target_cpus >>= 1; 902 mutex_lock(&kvm->slots_lock);
938 } 903
904 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
905 &iodev->dev);
906 mutex_unlock(&kvm->slots_lock);
907
908 /* Mark the iodev as invalid if registration fails. */
909 if (ret)
910 iodev->dev.ops = NULL;
911
912 return ret;
913}
914
915static int vgic_nr_shared_irqs(struct vgic_dist *dist)
916{
917 return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
918}
919
920static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
921{
922 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
923 unsigned long *active, *enabled, *act_percpu, *act_shared;
924 unsigned long active_private, active_shared;
925 int nr_shared = vgic_nr_shared_irqs(dist);
926 int vcpu_id;
927
928 vcpu_id = vcpu->vcpu_id;
929 act_percpu = vcpu->arch.vgic_cpu.active_percpu;
930 act_shared = vcpu->arch.vgic_cpu.active_shared;
931
932 active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
933 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
934 bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
935
936 active = vgic_bitmap_get_shared_map(&dist->irq_active);
937 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
938 bitmap_and(act_shared, active, enabled, nr_shared);
939 bitmap_and(act_shared, act_shared,
940 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
941 nr_shared);
942
943 active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
944 active_shared = find_first_bit(act_shared, nr_shared);
945
946 return (active_private < VGIC_NR_PRIVATE_IRQS ||
947 active_shared < nr_shared);
939} 948}
940 949
941static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) 950static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
@@ -943,54 +952,129 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
943 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 952 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
944 unsigned long *pending, *enabled, *pend_percpu, *pend_shared; 953 unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
945 unsigned long pending_private, pending_shared; 954 unsigned long pending_private, pending_shared;
955 int nr_shared = vgic_nr_shared_irqs(dist);
946 int vcpu_id; 956 int vcpu_id;
947 957
948 vcpu_id = vcpu->vcpu_id; 958 vcpu_id = vcpu->vcpu_id;
949 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; 959 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
950 pend_shared = vcpu->arch.vgic_cpu.pending_shared; 960 pend_shared = vcpu->arch.vgic_cpu.pending_shared;
951 961
952 pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); 962 pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
953 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); 963 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
954 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); 964 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
955 965
956 pending = vgic_bitmap_get_shared_map(&dist->irq_state); 966 pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
957 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); 967 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
958 bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); 968 bitmap_and(pend_shared, pending, enabled, nr_shared);
959 bitmap_and(pend_shared, pend_shared, 969 bitmap_and(pend_shared, pend_shared,
960 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), 970 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
961 VGIC_NR_SHARED_IRQS); 971 nr_shared);
962 972
963 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); 973 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
964 pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); 974 pending_shared = find_first_bit(pend_shared, nr_shared);
965 return (pending_private < VGIC_NR_PRIVATE_IRQS || 975 return (pending_private < VGIC_NR_PRIVATE_IRQS ||
966 pending_shared < VGIC_NR_SHARED_IRQS); 976 pending_shared < vgic_nr_shared_irqs(dist));
967} 977}
968 978
969/* 979/*
970 * Update the interrupt state and determine which CPUs have pending 980 * Update the interrupt state and determine which CPUs have pending
971 * interrupts. Must be called with distributor lock held. 981 * or active interrupts. Must be called with distributor lock held.
972 */ 982 */
973static void vgic_update_state(struct kvm *kvm) 983void vgic_update_state(struct kvm *kvm)
974{ 984{
975 struct vgic_dist *dist = &kvm->arch.vgic; 985 struct vgic_dist *dist = &kvm->arch.vgic;
976 struct kvm_vcpu *vcpu; 986 struct kvm_vcpu *vcpu;
977 int c; 987 int c;
978 988
979 if (!dist->enabled) { 989 if (!dist->enabled) {
980 set_bit(0, &dist->irq_pending_on_cpu); 990 set_bit(0, dist->irq_pending_on_cpu);
981 return; 991 return;
982 } 992 }
983 993
984 kvm_for_each_vcpu(c, vcpu, kvm) { 994 kvm_for_each_vcpu(c, vcpu, kvm) {
985 if (compute_pending_for_cpu(vcpu)) { 995 if (compute_pending_for_cpu(vcpu))
986 pr_debug("CPU%d has pending interrupts\n", c); 996 set_bit(c, dist->irq_pending_on_cpu);
987 set_bit(c, &dist->irq_pending_on_cpu); 997
988 } 998 if (compute_active_for_cpu(vcpu))
999 set_bit(c, dist->irq_active_on_cpu);
1000 else
1001 clear_bit(c, dist->irq_active_on_cpu);
989 } 1002 }
990} 1003}
991 1004
992#define MK_LR_PEND(src, irq) \ 1005static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
993 (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) 1006{
1007 return vgic_ops->get_lr(vcpu, lr);
1008}
1009
1010static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1011 struct vgic_lr vlr)
1012{
1013 vgic_ops->set_lr(vcpu, lr, vlr);
1014}
1015
1016static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
1017 struct vgic_lr vlr)
1018{
1019 vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
1020}
1021
1022static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1023{
1024 return vgic_ops->get_elrsr(vcpu);
1025}
1026
1027static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
1028{
1029 return vgic_ops->get_eisr(vcpu);
1030}
1031
1032static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
1033{
1034 vgic_ops->clear_eisr(vcpu);
1035}
1036
1037static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
1038{
1039 return vgic_ops->get_interrupt_status(vcpu);
1040}
1041
1042static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
1043{
1044 vgic_ops->enable_underflow(vcpu);
1045}
1046
1047static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1048{
1049 vgic_ops->disable_underflow(vcpu);
1050}
1051
1052void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1053{
1054 vgic_ops->get_vmcr(vcpu, vmcr);
1055}
1056
1057void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1058{
1059 vgic_ops->set_vmcr(vcpu, vmcr);
1060}
1061
1062static inline void vgic_enable(struct kvm_vcpu *vcpu)
1063{
1064 vgic_ops->enable(vcpu);
1065}
1066
1067static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
1068{
1069 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1070 struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1071
1072 vlr.state = 0;
1073 vgic_set_lr(vcpu, lr_nr, vlr);
1074 clear_bit(lr_nr, vgic_cpu->lr_used);
1075 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
1076 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1077}
994 1078
995/* 1079/*
996 * An interrupt may have been disabled after being made pending on the 1080 * An interrupt may have been disabled after being made pending on the
@@ -1006,104 +1090,98 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1006 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1090 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1007 int lr; 1091 int lr;
1008 1092
1009 for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { 1093 for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
1010 int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; 1094 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1011 1095
1012 if (!vgic_irq_is_enabled(vcpu, irq)) { 1096 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1013 vgic_retire_lr(lr, irq, vgic_cpu); 1097 vgic_retire_lr(lr, vlr.irq, vcpu);
1014 if (vgic_irq_is_active(vcpu, irq)) 1098 if (vgic_irq_is_queued(vcpu, vlr.irq))
1015 vgic_irq_clear_active(vcpu, irq); 1099 vgic_irq_clear_queued(vcpu, vlr.irq);
1016 } 1100 }
1017 } 1101 }
1018} 1102}
1019 1103
1104static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1105 int lr_nr, struct vgic_lr vlr)
1106{
1107 if (vgic_irq_is_active(vcpu, irq)) {
1108 vlr.state |= LR_STATE_ACTIVE;
1109 kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
1110 vgic_irq_clear_active(vcpu, irq);
1111 vgic_update_state(vcpu->kvm);
1112 } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
1113 vlr.state |= LR_STATE_PENDING;
1114 kvm_debug("Set pending: 0x%x\n", vlr.state);
1115 }
1116
1117 if (!vgic_irq_is_edge(vcpu, irq))
1118 vlr.state |= LR_EOI_INT;
1119
1120 vgic_set_lr(vcpu, lr_nr, vlr);
1121 vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
1122}
1123
1020/* 1124/*
1021 * Queue an interrupt to a CPU virtual interface. Return true on success, 1125 * Queue an interrupt to a CPU virtual interface. Return true on success,
1022 * or false if it wasn't possible to queue it. 1126 * or false if it wasn't possible to queue it.
1127 * sgi_source must be zero for any non-SGI interrupts.
1023 */ 1128 */
1024static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1129bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1025{ 1130{
1026 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1131 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1132 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1133 struct vgic_lr vlr;
1027 int lr; 1134 int lr;
1028 1135
1029 /* Sanitize the input... */ 1136 /* Sanitize the input... */
1030 BUG_ON(sgi_source_id & ~7); 1137 BUG_ON(sgi_source_id & ~7);
1031 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); 1138 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1032 BUG_ON(irq >= VGIC_NR_IRQS); 1139 BUG_ON(irq >= dist->nr_irqs);
1033 1140
1034 kvm_debug("Queue IRQ%d\n", irq); 1141 kvm_debug("Queue IRQ%d\n", irq);
1035 1142
1036 lr = vgic_cpu->vgic_irq_lr_map[irq]; 1143 lr = vgic_cpu->vgic_irq_lr_map[irq];
1037 1144
1038 /* Do we have an active interrupt for the same CPUID? */ 1145 /* Do we have an active interrupt for the same CPUID? */
1039 if (lr != LR_EMPTY && 1146 if (lr != LR_EMPTY) {
1040 (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { 1147 vlr = vgic_get_lr(vcpu, lr);
1041 kvm_debug("LR%d piggyback for IRQ%d %x\n", 1148 if (vlr.source == sgi_source_id) {
1042 lr, irq, vgic_cpu->vgic_lr[lr]); 1149 kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1043 BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); 1150 BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
1044 vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; 1151 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1045 return true; 1152 return true;
1153 }
1046 } 1154 }
1047 1155
1048 /* Try to use another LR for this interrupt */ 1156 /* Try to use another LR for this interrupt */
1049 lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, 1157 lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
1050 vgic_cpu->nr_lr); 1158 vgic->nr_lr);
1051 if (lr >= vgic_cpu->nr_lr) 1159 if (lr >= vgic->nr_lr)
1052 return false; 1160 return false;
1053 1161
1054 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); 1162 kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1055 vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
1056 vgic_cpu->vgic_irq_lr_map[irq] = lr; 1163 vgic_cpu->vgic_irq_lr_map[irq] = lr;
1057 set_bit(lr, vgic_cpu->lr_used); 1164 set_bit(lr, vgic_cpu->lr_used);
1058 1165
1059 if (!vgic_irq_is_edge(vcpu, irq)) 1166 vlr.irq = irq;
1060 vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; 1167 vlr.source = sgi_source_id;
1168 vlr.state = 0;
1169 vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1061 1170
1062 return true; 1171 return true;
1063} 1172}
1064 1173
1065static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1066{
1067 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1068 unsigned long sources;
1069 int vcpu_id = vcpu->vcpu_id;
1070 int c;
1071
1072 sources = dist->irq_sgi_sources[vcpu_id][irq];
1073
1074 for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
1075 if (vgic_queue_irq(vcpu, c, irq))
1076 clear_bit(c, &sources);
1077 }
1078
1079 dist->irq_sgi_sources[vcpu_id][irq] = sources;
1080
1081 /*
1082 * If the sources bitmap has been cleared it means that we
1083 * could queue all the SGIs onto link registers (see the
1084 * clear_bit above), and therefore we are done with them in
1085 * our emulated gic and can get rid of them.
1086 */
1087 if (!sources) {
1088 vgic_dist_irq_clear(vcpu, irq);
1089 vgic_cpu_irq_clear(vcpu, irq);
1090 return true;
1091 }
1092
1093 return false;
1094}
1095
1096static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) 1174static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1097{ 1175{
1098 if (vgic_irq_is_active(vcpu, irq)) 1176 if (!vgic_can_sample_irq(vcpu, irq))
1099 return true; /* level interrupt, already queued */ 1177 return true; /* level interrupt, already queued */
1100 1178
1101 if (vgic_queue_irq(vcpu, 0, irq)) { 1179 if (vgic_queue_irq(vcpu, 0, irq)) {
1102 if (vgic_irq_is_edge(vcpu, irq)) { 1180 if (vgic_irq_is_edge(vcpu, irq)) {
1103 vgic_dist_irq_clear(vcpu, irq); 1181 vgic_dist_irq_clear_pending(vcpu, irq);
1104 vgic_cpu_irq_clear(vcpu, irq); 1182 vgic_cpu_irq_clear(vcpu, irq);
1105 } else { 1183 } else {
1106 vgic_irq_set_active(vcpu, irq); 1184 vgic_irq_set_queued(vcpu, irq);
1107 } 1185 }
1108 1186
1109 return true; 1187 return true;
@@ -1120,130 +1198,180 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1120{ 1198{
1121 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1199 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1122 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1200 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1201 unsigned long *pa_percpu, *pa_shared;
1123 int i, vcpu_id; 1202 int i, vcpu_id;
1124 int overflow = 0; 1203 int overflow = 0;
1204 int nr_shared = vgic_nr_shared_irqs(dist);
1125 1205
1126 vcpu_id = vcpu->vcpu_id; 1206 vcpu_id = vcpu->vcpu_id;
1127 1207
1208 pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
1209 pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
1210
1211 bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
1212 VGIC_NR_PRIVATE_IRQS);
1213 bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
1214 nr_shared);
1128 /* 1215 /*
1129 * We may not have any pending interrupt, or the interrupts 1216 * We may not have any pending interrupt, or the interrupts
1130 * may have been serviced from another vcpu. In all cases, 1217 * may have been serviced from another vcpu. In all cases,
1131 * move along. 1218 * move along.
1132 */ 1219 */
1133 if (!kvm_vgic_vcpu_pending_irq(vcpu)) { 1220 if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
1134 pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
1135 goto epilog; 1221 goto epilog;
1136 }
1137 1222
1138 /* SGIs */ 1223 /* SGIs */
1139 for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { 1224 for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
1140 if (!vgic_queue_sgi(vcpu, i)) 1225 if (!queue_sgi(vcpu, i))
1141 overflow = 1; 1226 overflow = 1;
1142 } 1227 }
1143 1228
1144 /* PPIs */ 1229 /* PPIs */
1145 for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { 1230 for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
1146 if (!vgic_queue_hwirq(vcpu, i)) 1231 if (!vgic_queue_hwirq(vcpu, i))
1147 overflow = 1; 1232 overflow = 1;
1148 } 1233 }
1149 1234
1150 /* SPIs */ 1235 /* SPIs */
1151 for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { 1236 for_each_set_bit(i, pa_shared, nr_shared) {
1152 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) 1237 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1153 overflow = 1; 1238 overflow = 1;
1154 } 1239 }
1155 1240
1241
1242
1243
1156epilog: 1244epilog:
1157 if (overflow) { 1245 if (overflow) {
1158 vgic_cpu->vgic_hcr |= GICH_HCR_UIE; 1246 vgic_enable_underflow(vcpu);
1159 } else { 1247 } else {
1160 vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; 1248 vgic_disable_underflow(vcpu);
1161 /* 1249 /*
1162 * We're about to run this VCPU, and we've consumed 1250 * We're about to run this VCPU, and we've consumed
1163 * everything the distributor had in store for 1251 * everything the distributor had in store for
1164 * us. Claim we don't have anything pending. We'll 1252 * us. Claim we don't have anything pending. We'll
1165 * adjust that if needed while exiting. 1253 * adjust that if needed while exiting.
1166 */ 1254 */
1167 clear_bit(vcpu_id, &dist->irq_pending_on_cpu); 1255 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1168 } 1256 }
1169} 1257}
1170 1258
1171static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) 1259static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1172{ 1260{
1173 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1261 u32 status = vgic_get_interrupt_status(vcpu);
1262 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1174 bool level_pending = false; 1263 bool level_pending = false;
1264 struct kvm *kvm = vcpu->kvm;
1175 1265
1176 kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); 1266 kvm_debug("STATUS = %08x\n", status);
1177 1267
1178 if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { 1268 if (status & INT_STATUS_EOI) {
1179 /* 1269 /*
1180 * Some level interrupts have been EOIed. Clear their 1270 * Some level interrupts have been EOIed. Clear their
1181 * active bit. 1271 * active bit.
1182 */ 1272 */
1183 int lr, irq; 1273 u64 eisr = vgic_get_eisr(vcpu);
1274 unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
1275 int lr;
1276
1277 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1278 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1279 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1280
1281 spin_lock(&dist->lock);
1282 vgic_irq_clear_queued(vcpu, vlr.irq);
1283 WARN_ON(vlr.state & LR_STATE_MASK);
1284 vlr.state = 0;
1285 vgic_set_lr(vcpu, lr, vlr);
1184 1286
1185 for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, 1287 /*
1186 vgic_cpu->nr_lr) { 1288 * If the IRQ was EOIed it was also ACKed and we we
1187 irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; 1289 * therefore assume we can clear the soft pending
1290 * state (should it had been set) for this interrupt.
1291 *
1292 * Note: if the IRQ soft pending state was set after
1293 * the IRQ was acked, it actually shouldn't be
1294 * cleared, but we have no way of knowing that unless
1295 * we start trapping ACKs when the soft-pending state
1296 * is set.
1297 */
1298 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1299
1300 /*
1301 * kvm_notify_acked_irq calls kvm_set_irq()
1302 * to reset the IRQ level. Need to release the
1303 * lock for kvm_set_irq to grab it.
1304 */
1305 spin_unlock(&dist->lock);
1188 1306
1189 vgic_irq_clear_active(vcpu, irq); 1307 kvm_notify_acked_irq(kvm, 0,
1190 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; 1308 vlr.irq - VGIC_NR_PRIVATE_IRQS);
1309 spin_lock(&dist->lock);
1191 1310
1192 /* Any additional pending interrupt? */ 1311 /* Any additional pending interrupt? */
1193 if (vgic_dist_irq_is_pending(vcpu, irq)) { 1312 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1194 vgic_cpu_irq_set(vcpu, irq); 1313 vgic_cpu_irq_set(vcpu, vlr.irq);
1195 level_pending = true; 1314 level_pending = true;
1196 } else { 1315 } else {
1197 vgic_cpu_irq_clear(vcpu, irq); 1316 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1317 vgic_cpu_irq_clear(vcpu, vlr.irq);
1198 } 1318 }
1199 1319
1320 spin_unlock(&dist->lock);
1321
1200 /* 1322 /*
1201 * Despite being EOIed, the LR may not have 1323 * Despite being EOIed, the LR may not have
1202 * been marked as empty. 1324 * been marked as empty.
1203 */ 1325 */
1204 set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); 1326 vgic_sync_lr_elrsr(vcpu, lr, vlr);
1205 vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
1206 } 1327 }
1207 } 1328 }
1208 1329
1209 if (vgic_cpu->vgic_misr & GICH_MISR_U) 1330 if (status & INT_STATUS_UNDERFLOW)
1210 vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; 1331 vgic_disable_underflow(vcpu);
1332
1333 /*
1334 * In the next iterations of the vcpu loop, if we sync the vgic state
1335 * after flushing it, but before entering the guest (this happens for
1336 * pending signals and vmid rollovers), then make sure we don't pick
1337 * up any old maintenance interrupts here.
1338 */
1339 vgic_clear_eisr(vcpu);
1211 1340
1212 return level_pending; 1341 return level_pending;
1213} 1342}
1214 1343
1215/* 1344/* Sync back the VGIC state after a guest run */
1216 * Sync back the VGIC state after a guest run. The distributor lock is
1217 * needed so we don't get preempted in the middle of the state processing.
1218 */
1219static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1345static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1220{ 1346{
1221 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1347 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1222 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1348 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1349 u64 elrsr;
1350 unsigned long *elrsr_ptr;
1223 int lr, pending; 1351 int lr, pending;
1224 bool level_pending; 1352 bool level_pending;
1225 1353
1226 level_pending = vgic_process_maintenance(vcpu); 1354 level_pending = vgic_process_maintenance(vcpu);
1355 elrsr = vgic_get_elrsr(vcpu);
1356 elrsr_ptr = u64_to_bitmask(&elrsr);
1227 1357
1228 /* Clear mappings for empty LRs */ 1358 /* Clear mappings for empty LRs */
1229 for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, 1359 for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
1230 vgic_cpu->nr_lr) { 1360 struct vgic_lr vlr;
1231 int irq;
1232 1361
1233 if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) 1362 if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
1234 continue; 1363 continue;
1235 1364
1236 irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; 1365 vlr = vgic_get_lr(vcpu, lr);
1237 1366
1238 BUG_ON(irq >= VGIC_NR_IRQS); 1367 BUG_ON(vlr.irq >= dist->nr_irqs);
1239 vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; 1368 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1240 } 1369 }
1241 1370
1242 /* Check if we still have something up our sleeve... */ 1371 /* Check if we still have something up our sleeve... */
1243 pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, 1372 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1244 vgic_cpu->nr_lr); 1373 if (level_pending || pending < vgic->nr_lr)
1245 if (level_pending || pending < vgic_cpu->nr_lr) 1374 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1246 set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
1247} 1375}
1248 1376
1249void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 1377void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1260,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1260 1388
1261void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 1389void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1262{ 1390{
1263 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1264
1265 if (!irqchip_in_kernel(vcpu->kvm)) 1391 if (!irqchip_in_kernel(vcpu->kvm))
1266 return; 1392 return;
1267 1393
1268 spin_lock(&dist->lock);
1269 __kvm_vgic_sync_hwstate(vcpu); 1394 __kvm_vgic_sync_hwstate(vcpu);
1270 spin_unlock(&dist->lock);
1271} 1395}
1272 1396
1273int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 1397int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1277,10 +1401,21 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1277 if (!irqchip_in_kernel(vcpu->kvm)) 1401 if (!irqchip_in_kernel(vcpu->kvm))
1278 return 0; 1402 return 0;
1279 1403
1280 return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1404 return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1281} 1405}
1282 1406
1283static void vgic_kick_vcpus(struct kvm *kvm) 1407int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
1408{
1409 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1410
1411 if (!irqchip_in_kernel(vcpu->kvm))
1412 return 0;
1413
1414 return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
1415}
1416
1417
1418void vgic_kick_vcpus(struct kvm *kvm)
1284{ 1419{
1285 struct kvm_vcpu *vcpu; 1420 struct kvm_vcpu *vcpu;
1286 int c; 1421 int c;
@@ -1297,34 +1432,36 @@ static void vgic_kick_vcpus(struct kvm *kvm)
1297 1432
1298static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) 1433static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1299{ 1434{
1300 int is_edge = vgic_irq_is_edge(vcpu, irq); 1435 int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1301 int state = vgic_dist_irq_is_pending(vcpu, irq);
1302 1436
1303 /* 1437 /*
1304 * Only inject an interrupt if: 1438 * Only inject an interrupt if:
1305 * - edge triggered and we have a rising edge 1439 * - edge triggered and we have a rising edge
1306 * - level triggered and we change level 1440 * - level triggered and we change level
1307 */ 1441 */
1308 if (is_edge) 1442 if (edge_triggered) {
1443 int state = vgic_dist_irq_is_pending(vcpu, irq);
1309 return level > state; 1444 return level > state;
1310 else 1445 } else {
1446 int state = vgic_dist_irq_get_level(vcpu, irq);
1311 return level != state; 1447 return level != state;
1448 }
1312} 1449}
1313 1450
1314static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, 1451static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1315 unsigned int irq_num, bool level) 1452 unsigned int irq_num, bool level)
1316{ 1453{
1317 struct vgic_dist *dist = &kvm->arch.vgic; 1454 struct vgic_dist *dist = &kvm->arch.vgic;
1318 struct kvm_vcpu *vcpu; 1455 struct kvm_vcpu *vcpu;
1319 int is_edge, is_level; 1456 int edge_triggered, level_triggered;
1320 int enabled; 1457 int enabled;
1321 bool ret = true; 1458 bool ret = true, can_inject = true;
1322 1459
1323 spin_lock(&dist->lock); 1460 spin_lock(&dist->lock);
1324 1461
1325 vcpu = kvm_get_vcpu(kvm, cpuid); 1462 vcpu = kvm_get_vcpu(kvm, cpuid);
1326 is_edge = vgic_irq_is_edge(vcpu, irq_num); 1463 edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1327 is_level = !is_edge; 1464 level_triggered = !edge_triggered;
1328 1465
1329 if (!vgic_validate_injection(vcpu, irq_num, level)) { 1466 if (!vgic_validate_injection(vcpu, irq_num, level)) {
1330 ret = false; 1467 ret = false;
@@ -1333,24 +1470,39 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1333 1470
1334 if (irq_num >= VGIC_NR_PRIVATE_IRQS) { 1471 if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1335 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; 1472 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1473 if (cpuid == VCPU_NOT_ALLOCATED) {
1474 /* Pretend we use CPU0, and prevent injection */
1475 cpuid = 0;
1476 can_inject = false;
1477 }
1336 vcpu = kvm_get_vcpu(kvm, cpuid); 1478 vcpu = kvm_get_vcpu(kvm, cpuid);
1337 } 1479 }
1338 1480
1339 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); 1481 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1340 1482
1341 if (level) 1483 if (level) {
1342 vgic_dist_irq_set(vcpu, irq_num); 1484 if (level_triggered)
1343 else 1485 vgic_dist_irq_set_level(vcpu, irq_num);
1344 vgic_dist_irq_clear(vcpu, irq_num); 1486 vgic_dist_irq_set_pending(vcpu, irq_num);
1487 } else {
1488 if (level_triggered) {
1489 vgic_dist_irq_clear_level(vcpu, irq_num);
1490 if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1491 vgic_dist_irq_clear_pending(vcpu, irq_num);
1492 }
1493
1494 ret = false;
1495 goto out;
1496 }
1345 1497
1346 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1498 enabled = vgic_irq_is_enabled(vcpu, irq_num);
1347 1499
1348 if (!enabled) { 1500 if (!enabled || !can_inject) {
1349 ret = false; 1501 ret = false;
1350 goto out; 1502 goto out;
1351 } 1503 }
1352 1504
1353 if (is_level && vgic_irq_is_active(vcpu, irq_num)) { 1505 if (!vgic_can_sample_irq(vcpu, irq_num)) {
1354 /* 1506 /*
1355 * Level interrupt in progress, will be picked up 1507 * Level interrupt in progress, will be picked up
1356 * when EOId. 1508 * when EOId.
@@ -1361,13 +1513,13 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1361 1513
1362 if (level) { 1514 if (level) {
1363 vgic_cpu_irq_set(vcpu, irq_num); 1515 vgic_cpu_irq_set(vcpu, irq_num);
1364 set_bit(cpuid, &dist->irq_pending_on_cpu); 1516 set_bit(cpuid, dist->irq_pending_on_cpu);
1365 } 1517 }
1366 1518
1367out: 1519out:
1368 spin_unlock(&dist->lock); 1520 spin_unlock(&dist->lock);
1369 1521
1370 return ret; 1522 return ret ? cpuid : -EINVAL;
1371} 1523}
1372 1524
1373/** 1525/**
@@ -1387,10 +1539,36 @@ out:
1387int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1539int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1388 bool level) 1540 bool level)
1389{ 1541{
1390 if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) 1542 int ret = 0;
1391 vgic_kick_vcpus(kvm); 1543 int vcpu_id;
1392 1544
1393 return 0; 1545 if (unlikely(!vgic_initialized(kvm))) {
1546 /*
1547 * We only provide the automatic initialization of the VGIC
1548 * for the legacy case of a GICv2. Any other type must
1549 * be explicitly initialized once setup with the respective
1550 * KVM device call.
1551 */
1552 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
1553 ret = -EBUSY;
1554 goto out;
1555 }
1556 mutex_lock(&kvm->lock);
1557 ret = vgic_init(kvm);
1558 mutex_unlock(&kvm->lock);
1559
1560 if (ret)
1561 goto out;
1562 }
1563
1564 vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1565 if (vcpu_id >= 0) {
1566 /* kick the specified vcpu */
1567 kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
1568 }
1569
1570out:
1571 return ret;
1394} 1572}
1395 1573
1396static irqreturn_t vgic_maintenance_handler(int irq, void *data) 1574static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1404,227 +1582,239 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1404 return IRQ_HANDLED; 1582 return IRQ_HANDLED;
1405} 1583}
1406 1584
1407/** 1585void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1408 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1409 * @vcpu: pointer to the vcpu struct
1410 *
1411 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1412 * this vcpu and enable the VGIC for this VCPU
1413 */
1414int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1415{ 1586{
1416 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1587 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1417 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1418 int i;
1419 1588
1420 if (vcpu->vcpu_id >= VGIC_MAX_CPUS) 1589 kfree(vgic_cpu->pending_shared);
1421 return -EBUSY; 1590 kfree(vgic_cpu->active_shared);
1591 kfree(vgic_cpu->pend_act_shared);
1592 kfree(vgic_cpu->vgic_irq_lr_map);
1593 vgic_cpu->pending_shared = NULL;
1594 vgic_cpu->active_shared = NULL;
1595 vgic_cpu->pend_act_shared = NULL;
1596 vgic_cpu->vgic_irq_lr_map = NULL;
1597}
1598
1599static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1600{
1601 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1422 1602
1423 for (i = 0; i < VGIC_NR_IRQS; i++) { 1603 int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1424 if (i < VGIC_NR_PPIS) 1604 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1425 vgic_bitmap_set_irq_val(&dist->irq_enabled, 1605 vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1426 vcpu->vcpu_id, i, 1); 1606 vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1427 if (i < VGIC_NR_PRIVATE_IRQS) 1607 vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
1428 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1429 vcpu->vcpu_id, i, VGIC_CFG_EDGE);
1430 1608
1431 vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; 1609 if (!vgic_cpu->pending_shared
1610 || !vgic_cpu->active_shared
1611 || !vgic_cpu->pend_act_shared
1612 || !vgic_cpu->vgic_irq_lr_map) {
1613 kvm_vgic_vcpu_destroy(vcpu);
1614 return -ENOMEM;
1432 } 1615 }
1433 1616
1617 memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
1618
1434 /* 1619 /*
1435 * By forcing VMCR to zero, the GIC will restore the binary 1620 * Store the number of LRs per vcpu, so we don't have to go
1436 * points to their reset values. Anything else resets to zero 1621 * all the way to the distributor structure to find out. Only
1437 * anyway. 1622 * assembly code should use this one.
1438 */ 1623 */
1439 vgic_cpu->vgic_vmcr = 0; 1624 vgic_cpu->nr_lr = vgic->nr_lr;
1440
1441 vgic_cpu->nr_lr = vgic_nr_lr;
1442 vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
1443 1625
1444 return 0; 1626 return 0;
1445} 1627}
1446 1628
1447static void vgic_init_maintenance_interrupt(void *info) 1629/**
1630 * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1631 *
1632 * The host's GIC naturally limits the maximum amount of VCPUs a guest
1633 * can use.
1634 */
1635int kvm_vgic_get_max_vcpus(void)
1448{ 1636{
1449 enable_percpu_irq(vgic_maint_irq, 0); 1637 return vgic->max_gic_vcpus;
1450} 1638}
1451 1639
1452static int vgic_cpu_notify(struct notifier_block *self, 1640void kvm_vgic_destroy(struct kvm *kvm)
1453 unsigned long action, void *cpu)
1454{ 1641{
1455 switch (action) { 1642 struct vgic_dist *dist = &kvm->arch.vgic;
1456 case CPU_STARTING: 1643 struct kvm_vcpu *vcpu;
1457 case CPU_STARTING_FROZEN: 1644 int i;
1458 vgic_init_maintenance_interrupt(NULL);
1459 break;
1460 case CPU_DYING:
1461 case CPU_DYING_FROZEN:
1462 disable_percpu_irq(vgic_maint_irq);
1463 break;
1464 }
1465 1645
1466 return NOTIFY_OK; 1646 kvm_for_each_vcpu(i, vcpu, kvm)
1647 kvm_vgic_vcpu_destroy(vcpu);
1648
1649 vgic_free_bitmap(&dist->irq_enabled);
1650 vgic_free_bitmap(&dist->irq_level);
1651 vgic_free_bitmap(&dist->irq_pending);
1652 vgic_free_bitmap(&dist->irq_soft_pend);
1653 vgic_free_bitmap(&dist->irq_queued);
1654 vgic_free_bitmap(&dist->irq_cfg);
1655 vgic_free_bytemap(&dist->irq_priority);
1656 if (dist->irq_spi_target) {
1657 for (i = 0; i < dist->nr_cpus; i++)
1658 vgic_free_bitmap(&dist->irq_spi_target[i]);
1659 }
1660 kfree(dist->irq_sgi_sources);
1661 kfree(dist->irq_spi_cpu);
1662 kfree(dist->irq_spi_mpidr);
1663 kfree(dist->irq_spi_target);
1664 kfree(dist->irq_pending_on_cpu);
1665 kfree(dist->irq_active_on_cpu);
1666 dist->irq_sgi_sources = NULL;
1667 dist->irq_spi_cpu = NULL;
1668 dist->irq_spi_target = NULL;
1669 dist->irq_pending_on_cpu = NULL;
1670 dist->irq_active_on_cpu = NULL;
1671 dist->nr_cpus = 0;
1467} 1672}
1468 1673
1469static struct notifier_block vgic_cpu_nb = { 1674/*
1470 .notifier_call = vgic_cpu_notify, 1675 * Allocate and initialize the various data structures. Must be called
1471}; 1676 * with kvm->lock held!
1472 1677 */
1473int kvm_vgic_hyp_init(void) 1678int vgic_init(struct kvm *kvm)
1474{ 1679{
1475 int ret; 1680 struct vgic_dist *dist = &kvm->arch.vgic;
1476 struct resource vctrl_res; 1681 struct kvm_vcpu *vcpu;
1477 struct resource vcpu_res; 1682 int nr_cpus, nr_irqs;
1683 int ret, i, vcpu_id;
1478 1684
1479 vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); 1685 if (vgic_initialized(kvm))
1480 if (!vgic_node) { 1686 return 0;
1481 kvm_err("error: no compatible vgic node in DT\n"); 1687
1688 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1689 if (!nr_cpus) /* No vcpus? Can't be good... */
1482 return -ENODEV; 1690 return -ENODEV;
1483 }
1484 1691
1485 vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); 1692 /*
1486 if (!vgic_maint_irq) { 1693 * If nobody configured the number of interrupts, use the
1487 kvm_err("error getting vgic maintenance irq from DT\n"); 1694 * legacy one.
1488 ret = -ENXIO; 1695 */
1489 goto out; 1696 if (!dist->nr_irqs)
1490 } 1697 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1491 1698
1492 ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, 1699 nr_irqs = dist->nr_irqs;
1493 "vgic", kvm_get_running_vcpus());
1494 if (ret) {
1495 kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
1496 goto out;
1497 }
1498 1700
1499 ret = __register_cpu_notifier(&vgic_cpu_nb); 1701 ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1500 if (ret) { 1702 ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1501 kvm_err("Cannot register vgic CPU notifier\n"); 1703 ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1502 goto out_free_irq; 1704 ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1503 } 1705 ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1706 ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
1707 ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1708 ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1504 1709
1505 ret = of_address_to_resource(vgic_node, 2, &vctrl_res); 1710 if (ret)
1506 if (ret) { 1711 goto out;
1507 kvm_err("Cannot obtain VCTRL resource\n");
1508 goto out_free_irq;
1509 }
1510 1712
1511 vgic_vctrl_base = of_iomap(vgic_node, 2); 1713 dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1512 if (!vgic_vctrl_base) { 1714 dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1513 kvm_err("Cannot ioremap VCTRL\n"); 1715 dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1716 GFP_KERNEL);
1717 dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1718 GFP_KERNEL);
1719 dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1720 GFP_KERNEL);
1721 if (!dist->irq_sgi_sources ||
1722 !dist->irq_spi_cpu ||
1723 !dist->irq_spi_target ||
1724 !dist->irq_pending_on_cpu ||
1725 !dist->irq_active_on_cpu) {
1514 ret = -ENOMEM; 1726 ret = -ENOMEM;
1515 goto out_free_irq; 1727 goto out;
1516 }
1517
1518 vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
1519 vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
1520
1521 ret = create_hyp_io_mappings(vgic_vctrl_base,
1522 vgic_vctrl_base + resource_size(&vctrl_res),
1523 vctrl_res.start);
1524 if (ret) {
1525 kvm_err("Cannot map VCTRL into hyp\n");
1526 goto out_unmap;
1527 } 1728 }
1528 1729
1529 if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { 1730 for (i = 0; i < nr_cpus; i++)
1530 kvm_err("Cannot obtain VCPU resource\n"); 1731 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1531 ret = -ENXIO; 1732 nr_cpus, nr_irqs);
1532 goto out_unmap;
1533 }
1534 1733
1535 if (!PAGE_ALIGNED(vcpu_res.start)) { 1734 if (ret)
1536 kvm_err("GICV physical address 0x%llx not page aligned\n", 1735 goto out;
1537 (unsigned long long)vcpu_res.start);
1538 ret = -ENXIO;
1539 goto out_unmap;
1540 }
1541 1736
1542 if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 1737 ret = kvm->arch.vgic.vm_ops.init_model(kvm);
1543 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", 1738 if (ret)
1544 (unsigned long long)resource_size(&vcpu_res), 1739 goto out;
1545 PAGE_SIZE);
1546 ret = -ENXIO;
1547 goto out_unmap;
1548 }
1549 1740
1550 vgic_vcpu_base = vcpu_res.start; 1741 kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1742 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1743 if (ret) {
1744 kvm_err("VGIC: Failed to allocate vcpu memory\n");
1745 break;
1746 }
1551 1747
1552 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 1748 for (i = 0; i < dist->nr_irqs; i++) {
1553 vctrl_res.start, vgic_maint_irq); 1749 if (i < VGIC_NR_PPIS)
1554 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); 1750 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1751 vcpu->vcpu_id, i, 1);
1752 if (i < VGIC_NR_PRIVATE_IRQS)
1753 vgic_bitmap_set_irq_val(&dist->irq_cfg,
1754 vcpu->vcpu_id, i,
1755 VGIC_CFG_EDGE);
1756 }
1555 1757
1556 goto out; 1758 vgic_enable(vcpu);
1759 }
1557 1760
1558out_unmap:
1559 iounmap(vgic_vctrl_base);
1560out_free_irq:
1561 free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
1562out: 1761out:
1563 of_node_put(vgic_node); 1762 if (ret)
1763 kvm_vgic_destroy(kvm);
1764
1564 return ret; 1765 return ret;
1565} 1766}
1566 1767
1567/** 1768static int init_vgic_model(struct kvm *kvm, int type)
1568 * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
1569 * @kvm: pointer to the kvm struct
1570 *
1571 * Map the virtual CPU interface into the VM before running any VCPUs. We
1572 * can't do this at creation time, because user space must first set the
1573 * virtual CPU interface address in the guest physical address space. Also
1574 * initialize the ITARGETSRn regs to 0 on the emulated distributor.
1575 */
1576int kvm_vgic_init(struct kvm *kvm)
1577{ 1769{
1578 int ret = 0, i; 1770 switch (type) {
1579 1771 case KVM_DEV_TYPE_ARM_VGIC_V2:
1580 if (!irqchip_in_kernel(kvm)) 1772 vgic_v2_init_emulation(kvm);
1581 return 0; 1773 break;
1582 1774#ifdef CONFIG_ARM_GIC_V3
1583 mutex_lock(&kvm->lock); 1775 case KVM_DEV_TYPE_ARM_VGIC_V3:
1584 1776 vgic_v3_init_emulation(kvm);
1585 if (vgic_initialized(kvm)) 1777 break;
1586 goto out; 1778#endif
1587 1779 default:
1588 if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || 1780 return -ENODEV;
1589 IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
1590 kvm_err("Need to set vgic cpu and dist addresses first\n");
1591 ret = -ENXIO;
1592 goto out;
1593 }
1594
1595 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
1596 vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
1597 if (ret) {
1598 kvm_err("Unable to remap VGIC CPU to VCPU\n");
1599 goto out;
1600 } 1781 }
1601 1782
1602 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) 1783 if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
1603 vgic_set_target_reg(kvm, 0, i); 1784 return -E2BIG;
1604 1785
1605 kvm->arch.vgic.ready = true; 1786 return 0;
1606out:
1607 mutex_unlock(&kvm->lock);
1608 return ret;
1609} 1787}
1610 1788
1611int kvm_vgic_create(struct kvm *kvm) 1789int kvm_vgic_create(struct kvm *kvm, u32 type)
1612{ 1790{
1613 int i, vcpu_lock_idx = -1, ret = 0; 1791 int i, vcpu_lock_idx = -1, ret;
1614 struct kvm_vcpu *vcpu; 1792 struct kvm_vcpu *vcpu;
1615 1793
1616 mutex_lock(&kvm->lock); 1794 mutex_lock(&kvm->lock);
1617 1795
1618 if (kvm->arch.vgic.vctrl_base) { 1796 if (irqchip_in_kernel(kvm)) {
1619 ret = -EEXIST; 1797 ret = -EEXIST;
1620 goto out; 1798 goto out;
1621 } 1799 }
1622 1800
1623 /* 1801 /*
1802 * This function is also called by the KVM_CREATE_IRQCHIP handler,
1803 * which had no chance yet to check the availability of the GICv2
1804 * emulation. So check this here again. KVM_CREATE_DEVICE does
1805 * the proper checks already.
1806 */
1807 if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
1808 ret = -ENODEV;
1809 goto out;
1810 }
1811
1812 /*
1624 * Any time a vcpu is run, vcpu_load is called which tries to grab the 1813 * Any time a vcpu is run, vcpu_load is called which tries to grab the
1625 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure 1814 * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
1626 * that no other VCPUs are run while we create the vgic. 1815 * that no other VCPUs are run while we create the vgic.
1627 */ 1816 */
1817 ret = -EBUSY;
1628 kvm_for_each_vcpu(i, vcpu, kvm) { 1818 kvm_for_each_vcpu(i, vcpu, kvm) {
1629 if (!mutex_trylock(&vcpu->mutex)) 1819 if (!mutex_trylock(&vcpu->mutex))
1630 goto out_unlock; 1820 goto out_unlock;
@@ -1632,16 +1822,22 @@ int kvm_vgic_create(struct kvm *kvm)
1632 } 1822 }
1633 1823
1634 kvm_for_each_vcpu(i, vcpu, kvm) { 1824 kvm_for_each_vcpu(i, vcpu, kvm) {
1635 if (vcpu->arch.has_run_once) { 1825 if (vcpu->arch.has_run_once)
1636 ret = -EBUSY;
1637 goto out_unlock; 1826 goto out_unlock;
1638 }
1639 } 1827 }
1828 ret = 0;
1829
1830 ret = init_vgic_model(kvm, type);
1831 if (ret)
1832 goto out_unlock;
1640 1833
1641 spin_lock_init(&kvm->arch.vgic.lock); 1834 spin_lock_init(&kvm->arch.vgic.lock);
1642 kvm->arch.vgic.vctrl_base = vgic_vctrl_base; 1835 kvm->arch.vgic.in_kernel = true;
1836 kvm->arch.vgic.vgic_model = type;
1837 kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
1643 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; 1838 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
1644 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; 1839 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
1840 kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
1645 1841
1646out_unlock: 1842out_unlock:
1647 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { 1843 for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
@@ -1654,7 +1850,7 @@ out:
1654 return ret; 1850 return ret;
1655} 1851}
1656 1852
1657static bool vgic_ioaddr_overlap(struct kvm *kvm) 1853static int vgic_ioaddr_overlap(struct kvm *kvm)
1658{ 1854{
1659 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; 1855 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1660 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; 1856 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
@@ -1694,7 +1890,7 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
1694/** 1890/**
1695 * kvm_vgic_addr - set or get vgic VM base addresses 1891 * kvm_vgic_addr - set or get vgic VM base addresses
1696 * @kvm: pointer to the vm struct 1892 * @kvm: pointer to the vm struct
1697 * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX 1893 * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
1698 * @addr: pointer to address value 1894 * @addr: pointer to address value
1699 * @write: if true set the address in the VM address space, if false read the 1895 * @write: if true set the address in the VM address space, if false read the
1700 * address 1896 * address
@@ -1708,211 +1904,64 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
1708{ 1904{
1709 int r = 0; 1905 int r = 0;
1710 struct vgic_dist *vgic = &kvm->arch.vgic; 1906 struct vgic_dist *vgic = &kvm->arch.vgic;
1907 int type_needed;
1908 phys_addr_t *addr_ptr, block_size;
1909 phys_addr_t alignment;
1711 1910
1712 mutex_lock(&kvm->lock); 1911 mutex_lock(&kvm->lock);
1713 switch (type) { 1912 switch (type) {
1714 case KVM_VGIC_V2_ADDR_TYPE_DIST: 1913 case KVM_VGIC_V2_ADDR_TYPE_DIST:
1715 if (write) { 1914 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
1716 r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, 1915 addr_ptr = &vgic->vgic_dist_base;
1717 *addr, KVM_VGIC_V2_DIST_SIZE); 1916 block_size = KVM_VGIC_V2_DIST_SIZE;
1718 } else { 1917 alignment = SZ_4K;
1719 *addr = vgic->vgic_dist_base;
1720 }
1721 break; 1918 break;
1722 case KVM_VGIC_V2_ADDR_TYPE_CPU: 1919 case KVM_VGIC_V2_ADDR_TYPE_CPU:
1723 if (write) { 1920 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
1724 r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, 1921 addr_ptr = &vgic->vgic_cpu_base;
1725 *addr, KVM_VGIC_V2_CPU_SIZE); 1922 block_size = KVM_VGIC_V2_CPU_SIZE;
1726 } else { 1923 alignment = SZ_4K;
1727 *addr = vgic->vgic_cpu_base;
1728 }
1729 break;
1730 default:
1731 r = -ENODEV;
1732 }
1733
1734 mutex_unlock(&kvm->lock);
1735 return r;
1736}
1737
1738static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
1739 struct kvm_exit_mmio *mmio, phys_addr_t offset)
1740{
1741 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1742 u32 reg, mask = 0, shift = 0;
1743 bool updated = false;
1744
1745 switch (offset & ~0x3) {
1746 case GIC_CPU_CTRL:
1747 mask = GICH_VMCR_CTRL_MASK;
1748 shift = GICH_VMCR_CTRL_SHIFT;
1749 break; 1924 break;
1750 case GIC_CPU_PRIMASK: 1925#ifdef CONFIG_ARM_GIC_V3
1751 mask = GICH_VMCR_PRIMASK_MASK; 1926 case KVM_VGIC_V3_ADDR_TYPE_DIST:
1752 shift = GICH_VMCR_PRIMASK_SHIFT; 1927 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
1928 addr_ptr = &vgic->vgic_dist_base;
1929 block_size = KVM_VGIC_V3_DIST_SIZE;
1930 alignment = SZ_64K;
1753 break; 1931 break;
1754 case GIC_CPU_BINPOINT: 1932 case KVM_VGIC_V3_ADDR_TYPE_REDIST:
1755 mask = GICH_VMCR_BINPOINT_MASK; 1933 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
1756 shift = GICH_VMCR_BINPOINT_SHIFT; 1934 addr_ptr = &vgic->vgic_redist_base;
1757 break; 1935 block_size = KVM_VGIC_V3_REDIST_SIZE;
1758 case GIC_CPU_ALIAS_BINPOINT: 1936 alignment = SZ_64K;
1759 mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
1760 shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
1761 break;
1762 }
1763
1764 if (!mmio->is_write) {
1765 reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
1766 mmio_data_write(mmio, ~0, reg);
1767 } else {
1768 reg = mmio_data_read(mmio, ~0);
1769 reg = (reg << shift) & mask;
1770 if (reg != (vgic_cpu->vgic_vmcr & mask))
1771 updated = true;
1772 vgic_cpu->vgic_vmcr &= ~mask;
1773 vgic_cpu->vgic_vmcr |= reg;
1774 }
1775 return updated;
1776}
1777
1778static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
1779 struct kvm_exit_mmio *mmio, phys_addr_t offset)
1780{
1781 return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
1782}
1783
1784static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
1785 struct kvm_exit_mmio *mmio,
1786 phys_addr_t offset)
1787{
1788 u32 reg;
1789
1790 if (mmio->is_write)
1791 return false;
1792
1793 /* GICC_IIDR */
1794 reg = (PRODUCT_ID_KVM << 20) |
1795 (GICC_ARCH_VERSION_V2 << 16) |
1796 (IMPLEMENTER_ARM << 0);
1797 mmio_data_write(mmio, ~0, reg);
1798 return false;
1799}
1800
1801/*
1802 * CPU Interface Register accesses - these are not accessed by the VM, but by
1803 * user space for saving and restoring VGIC state.
1804 */
1805static const struct mmio_range vgic_cpu_ranges[] = {
1806 {
1807 .base = GIC_CPU_CTRL,
1808 .len = 12,
1809 .handle_mmio = handle_cpu_mmio_misc,
1810 },
1811 {
1812 .base = GIC_CPU_ALIAS_BINPOINT,
1813 .len = 4,
1814 .handle_mmio = handle_mmio_abpr,
1815 },
1816 {
1817 .base = GIC_CPU_ACTIVEPRIO,
1818 .len = 16,
1819 .handle_mmio = handle_mmio_raz_wi,
1820 },
1821 {
1822 .base = GIC_CPU_IDENT,
1823 .len = 4,
1824 .handle_mmio = handle_cpu_mmio_ident,
1825 },
1826};
1827
1828static int vgic_attr_regs_access(struct kvm_device *dev,
1829 struct kvm_device_attr *attr,
1830 u32 *reg, bool is_write)
1831{
1832 const struct mmio_range *r = NULL, *ranges;
1833 phys_addr_t offset;
1834 int ret, cpuid, c;
1835 struct kvm_vcpu *vcpu, *tmp_vcpu;
1836 struct vgic_dist *vgic;
1837 struct kvm_exit_mmio mmio;
1838
1839 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
1840 cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
1841 KVM_DEV_ARM_VGIC_CPUID_SHIFT;
1842
1843 mutex_lock(&dev->kvm->lock);
1844
1845 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
1846 ret = -EINVAL;
1847 goto out;
1848 }
1849
1850 vcpu = kvm_get_vcpu(dev->kvm, cpuid);
1851 vgic = &dev->kvm->arch.vgic;
1852
1853 mmio.len = 4;
1854 mmio.is_write = is_write;
1855 if (is_write)
1856 mmio_data_write(&mmio, ~0, *reg);
1857 switch (attr->group) {
1858 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1859 mmio.phys_addr = vgic->vgic_dist_base + offset;
1860 ranges = vgic_dist_ranges;
1861 break;
1862 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
1863 mmio.phys_addr = vgic->vgic_cpu_base + offset;
1864 ranges = vgic_cpu_ranges;
1865 break; 1937 break;
1938#endif
1866 default: 1939 default:
1867 BUG(); 1940 r = -ENODEV;
1941 goto out;
1868 } 1942 }
1869 r = find_matching_range(ranges, &mmio, offset);
1870 1943
1871 if (unlikely(!r || !r->handle_mmio)) { 1944 if (vgic->vgic_model != type_needed) {
1872 ret = -ENXIO; 1945 r = -ENODEV;
1873 goto out; 1946 goto out;
1874 } 1947 }
1875 1948
1876 1949 if (write) {
1877 spin_lock(&vgic->lock); 1950 if (!IS_ALIGNED(*addr, alignment))
1878 1951 r = -EINVAL;
1879 /* 1952 else
1880 * Ensure that no other VCPU is running by checking the vcpu->cpu 1953 r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
1881 * field. If no other VPCUs are running we can safely access the VGIC 1954 block_size);
1882 * state, because even if another VPU is run after this point, that 1955 } else {
1883 * VCPU will not touch the vgic state, because it will block on 1956 *addr = *addr_ptr;
1884 * getting the vgic->lock in kvm_vgic_sync_hwstate().
1885 */
1886 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
1887 if (unlikely(tmp_vcpu->cpu != -1)) {
1888 ret = -EBUSY;
1889 goto out_vgic_unlock;
1890 }
1891 } 1957 }
1892 1958
1893 /*
1894 * Move all pending IRQs from the LRs on all VCPUs so the pending
1895 * state can be properly represented in the register state accessible
1896 * through this API.
1897 */
1898 kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
1899 vgic_unqueue_irqs(tmp_vcpu);
1900
1901 offset -= r->base;
1902 r->handle_mmio(vcpu, &mmio, offset);
1903
1904 if (!is_write)
1905 *reg = mmio_data_read(&mmio, ~0);
1906
1907 ret = 0;
1908out_vgic_unlock:
1909 spin_unlock(&vgic->lock);
1910out: 1959out:
1911 mutex_unlock(&dev->kvm->lock); 1960 mutex_unlock(&kvm->lock);
1912 return ret; 1961 return r;
1913} 1962}
1914 1963
1915static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1964int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1916{ 1965{
1917 int r; 1966 int r;
1918 1967
@@ -1928,24 +1977,50 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1928 r = kvm_vgic_addr(dev->kvm, type, &addr, true); 1977 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
1929 return (r == -ENODEV) ? -ENXIO : r; 1978 return (r == -ENODEV) ? -ENXIO : r;
1930 } 1979 }
1931 1980 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
1932 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1933 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1934 u32 __user *uaddr = (u32 __user *)(long)attr->addr; 1981 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1935 u32 reg; 1982 u32 val;
1983 int ret = 0;
1936 1984
1937 if (get_user(reg, uaddr)) 1985 if (get_user(val, uaddr))
1938 return -EFAULT; 1986 return -EFAULT;
1939 1987
1940 return vgic_attr_regs_access(dev, attr, &reg, true); 1988 /*
1941 } 1989 * We require:
1990 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
1991 * - at most 1024 interrupts
1992 * - a multiple of 32 interrupts
1993 */
1994 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
1995 val > VGIC_MAX_IRQS ||
1996 (val & 31))
1997 return -EINVAL;
1998
1999 mutex_lock(&dev->kvm->lock);
2000
2001 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2002 ret = -EBUSY;
2003 else
2004 dev->kvm->arch.vgic.nr_irqs = val;
2005
2006 mutex_unlock(&dev->kvm->lock);
1942 2007
2008 return ret;
2009 }
2010 case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2011 switch (attr->attr) {
2012 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2013 r = vgic_init(dev->kvm);
2014 return r;
2015 }
2016 break;
2017 }
1943 } 2018 }
1944 2019
1945 return -ENXIO; 2020 return -ENXIO;
1946} 2021}
1947 2022
1948static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 2023int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1949{ 2024{
1950 int r = -ENXIO; 2025 int r = -ENXIO;
1951 2026
@@ -1963,16 +2038,10 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1963 return -EFAULT; 2038 return -EFAULT;
1964 break; 2039 break;
1965 } 2040 }
1966 2041 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
1967 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
1968 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
1969 u32 __user *uaddr = (u32 __user *)(long)attr->addr; 2042 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
1970 u32 reg = 0;
1971 2043
1972 r = vgic_attr_regs_access(dev, attr, &reg, false); 2044 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
1973 if (r)
1974 return r;
1975 r = put_user(reg, uaddr);
1976 break; 2045 break;
1977 } 2046 }
1978 2047
@@ -1981,55 +2050,124 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1981 return r; 2050 return r;
1982} 2051}
1983 2052
1984static int vgic_has_attr_regs(const struct mmio_range *ranges, 2053int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
1985 phys_addr_t offset)
1986{ 2054{
1987 struct kvm_exit_mmio dev_attr_mmio; 2055 if (vgic_find_range(ranges, 4, offset))
1988
1989 dev_attr_mmio.len = 4;
1990 if (find_matching_range(ranges, &dev_attr_mmio, offset))
1991 return 0; 2056 return 0;
1992 else 2057 else
1993 return -ENXIO; 2058 return -ENXIO;
1994} 2059}
1995 2060
1996static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 2061static void vgic_init_maintenance_interrupt(void *info)
1997{ 2062{
1998 phys_addr_t offset; 2063 enable_percpu_irq(vgic->maint_irq, 0);
2064}
1999 2065
2000 switch (attr->group) { 2066static int vgic_cpu_notify(struct notifier_block *self,
2001 case KVM_DEV_ARM_VGIC_GRP_ADDR: 2067 unsigned long action, void *cpu)
2002 switch (attr->attr) { 2068{
2003 case KVM_VGIC_V2_ADDR_TYPE_DIST: 2069 switch (action) {
2004 case KVM_VGIC_V2_ADDR_TYPE_CPU: 2070 case CPU_STARTING:
2005 return 0; 2071 case CPU_STARTING_FROZEN:
2006 } 2072 vgic_init_maintenance_interrupt(NULL);
2073 break;
2074 case CPU_DYING:
2075 case CPU_DYING_FROZEN:
2076 disable_percpu_irq(vgic->maint_irq);
2007 break; 2077 break;
2008 case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
2009 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2010 return vgic_has_attr_regs(vgic_dist_ranges, offset);
2011 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2012 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2013 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
2014 } 2078 }
2015 return -ENXIO; 2079
2080 return NOTIFY_OK;
2081}
2082
2083static struct notifier_block vgic_cpu_nb = {
2084 .notifier_call = vgic_cpu_notify,
2085};
2086
2087static const struct of_device_id vgic_ids[] = {
2088 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2089 { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
2090 { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
2091 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2092 {},
2093};
2094
2095int kvm_vgic_hyp_init(void)
2096{
2097 const struct of_device_id *matched_id;
2098 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2099 const struct vgic_params **);
2100 struct device_node *vgic_node;
2101 int ret;
2102
2103 vgic_node = of_find_matching_node_and_match(NULL,
2104 vgic_ids, &matched_id);
2105 if (!vgic_node) {
2106 kvm_err("error: no compatible GIC node found\n");
2107 return -ENODEV;
2108 }
2109
2110 vgic_probe = matched_id->data;
2111 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2112 if (ret)
2113 return ret;
2114
2115 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2116 "vgic", kvm_get_running_vcpus());
2117 if (ret) {
2118 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2119 return ret;
2120 }
2121
2122 ret = __register_cpu_notifier(&vgic_cpu_nb);
2123 if (ret) {
2124 kvm_err("Cannot register vgic CPU notifier\n");
2125 goto out_free_irq;
2126 }
2127
2128 /* Callback into for arch code for setup */
2129 vgic_arch_setup(vgic);
2130
2131 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2132
2133 return 0;
2134
2135out_free_irq:
2136 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2137 return ret;
2016} 2138}
2017 2139
2018static void vgic_destroy(struct kvm_device *dev) 2140int kvm_irq_map_gsi(struct kvm *kvm,
2141 struct kvm_kernel_irq_routing_entry *entries,
2142 int gsi)
2019{ 2143{
2020 kfree(dev); 2144 return gsi;
2021} 2145}
2022 2146
2023static int vgic_create(struct kvm_device *dev, u32 type) 2147int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
2024{ 2148{
2025 return kvm_vgic_create(dev->kvm); 2149 return pin;
2026} 2150}
2027 2151
2028struct kvm_device_ops kvm_arm_vgic_v2_ops = { 2152int kvm_set_irq(struct kvm *kvm, int irq_source_id,
2029 .name = "kvm-arm-vgic", 2153 u32 irq, int level, bool line_status)
2030 .create = vgic_create, 2154{
2031 .destroy = vgic_destroy, 2155 unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
2032 .set_attr = vgic_set_attr, 2156
2033 .get_attr = vgic_get_attr, 2157 trace_kvm_set_irq(irq, level, irq_source_id);
2034 .has_attr = vgic_has_attr, 2158
2035}; 2159 BUG_ON(!vgic_initialized(kvm));
2160
2161 if (spi > kvm->arch.vgic.nr_irqs)
2162 return -EINVAL;
2163 return kvm_vgic_inject_irq(kvm, 0, spi, level);
2164
2165}
2166
2167/* MSI not implemented yet */
2168int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
2169 struct kvm *kvm, int irq_source_id,
2170 int level, bool line_status)
2171{
2172 return 0;
2173}
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
new file mode 100644
index 000000000000..0df74cbb6200
--- /dev/null
+++ b/virt/kvm/arm/vgic.h
@@ -0,0 +1,140 @@
1/*
2 * Copyright (C) 2012-2014 ARM Ltd.
3 * Author: Marc Zyngier <marc.zyngier@arm.com>
4 *
5 * Derived from virt/kvm/arm/vgic.c
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#ifndef __KVM_VGIC_H__
21#define __KVM_VGIC_H__
22
23#include <kvm/iodev.h>
24
25#define VGIC_ADDR_UNDEF (-1)
26#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
27
28#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
29#define IMPLEMENTER_ARM 0x43b
30
31#define ACCESS_READ_VALUE (1 << 0)
32#define ACCESS_READ_RAZ (0 << 0)
33#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
34#define ACCESS_WRITE_IGNORED (0 << 1)
35#define ACCESS_WRITE_SETBIT (1 << 1)
36#define ACCESS_WRITE_CLEARBIT (2 << 1)
37#define ACCESS_WRITE_VALUE (3 << 1)
38#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
39
40#define VCPU_NOT_ALLOCATED ((u8)-1)
41
42unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x);
43
44void vgic_update_state(struct kvm *kvm);
45int vgic_init_common_maps(struct kvm *kvm);
46
47u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset);
48u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset);
49
50void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq);
51void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq);
52void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq);
53void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
54 int irq, int val);
55
56void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
57void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
58
59bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
60void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
61
62struct kvm_exit_mmio {
63 phys_addr_t phys_addr;
64 void *data;
65 u32 len;
66 bool is_write;
67 void *private;
68};
69
70void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
71 phys_addr_t offset, int mode);
72bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
73 phys_addr_t offset);
74
75static inline
76u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
77{
78 return le32_to_cpu(*((u32 *)mmio->data)) & mask;
79}
80
81static inline
82void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
83{
84 *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
85}
86
87struct vgic_io_range {
88 phys_addr_t base;
89 unsigned long len;
90 int bits_per_irq;
91 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
92 phys_addr_t offset);
93};
94
95int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
96 const struct vgic_io_range *ranges,
97 int redist_id,
98 struct vgic_io_device *iodev);
99
100static inline bool is_in_range(phys_addr_t addr, unsigned long len,
101 phys_addr_t baseaddr, unsigned long size)
102{
103 return (addr >= baseaddr) && (addr + len <= baseaddr + size);
104}
105
106const
107struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
108 int len, gpa_t offset);
109
110bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
111 phys_addr_t offset, int vcpu_id, int access);
112
113bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
114 phys_addr_t offset, int vcpu_id);
115
116bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
117 phys_addr_t offset, int vcpu_id);
118
119bool vgic_handle_set_active_reg(struct kvm *kvm,
120 struct kvm_exit_mmio *mmio,
121 phys_addr_t offset, int vcpu_id);
122
123bool vgic_handle_clear_active_reg(struct kvm *kvm,
124 struct kvm_exit_mmio *mmio,
125 phys_addr_t offset, int vcpu_id);
126
127bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
128 phys_addr_t offset);
129
130void vgic_kick_vcpus(struct kvm *kvm);
131
132int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset);
133int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
134int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
135
136int vgic_init(struct kvm *kvm);
137void vgic_v2_init_emulation(struct kvm *kvm);
138void vgic_v3_init_emulation(struct kvm *kvm);
139
140#endif
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
deleted file mode 100644
index bf06577fea51..000000000000
--- a/virt/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1024 +0,0 @@
1/*
2 * Kernel-based Virtual Machine - device assignment support
3 *
4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2. See
7 * the COPYING file in the top-level directory.
8 *
9 */
10
11#include <linux/kvm_host.h>
12#include <linux/kvm.h>
13#include <linux/uaccess.h>
14#include <linux/vmalloc.h>
15#include <linux/errno.h>
16#include <linux/spinlock.h>
17#include <linux/pci.h>
18#include <linux/interrupt.h>
19#include <linux/slab.h>
20#include <linux/namei.h>
21#include <linux/fs.h>
22#include "irq.h"
23
24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
25 int assigned_dev_id)
26{
27 struct list_head *ptr;
28 struct kvm_assigned_dev_kernel *match;
29
30 list_for_each(ptr, head) {
31 match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
32 if (match->assigned_dev_id == assigned_dev_id)
33 return match;
34 }
35 return NULL;
36}
37
38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
39 *assigned_dev, int irq)
40{
41 int i, index;
42 struct msix_entry *host_msix_entries;
43
44 host_msix_entries = assigned_dev->host_msix_entries;
45
46 index = -1;
47 for (i = 0; i < assigned_dev->entries_nr; i++)
48 if (irq == host_msix_entries[i].vector) {
49 index = i;
50 break;
51 }
52 if (index < 0)
53 printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
54
55 return index;
56}
57
58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
59{
60 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
61 int ret;
62
63 spin_lock(&assigned_dev->intx_lock);
64 if (pci_check_and_mask_intx(assigned_dev->dev)) {
65 assigned_dev->host_irq_disabled = true;
66 ret = IRQ_WAKE_THREAD;
67 } else
68 ret = IRQ_NONE;
69 spin_unlock(&assigned_dev->intx_lock);
70
71 return ret;
72}
73
74static void
75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
76 int vector)
77{
78 if (unlikely(assigned_dev->irq_requested_type &
79 KVM_DEV_IRQ_GUEST_INTX)) {
80 spin_lock(&assigned_dev->intx_mask_lock);
81 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
82 kvm_set_irq(assigned_dev->kvm,
83 assigned_dev->irq_source_id, vector, 1,
84 false);
85 spin_unlock(&assigned_dev->intx_mask_lock);
86 } else
87 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
88 vector, 1, false);
89}
90
91static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
92{
93 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
94
95 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
96 spin_lock_irq(&assigned_dev->intx_lock);
97 disable_irq_nosync(irq);
98 assigned_dev->host_irq_disabled = true;
99 spin_unlock_irq(&assigned_dev->intx_lock);
100 }
101
102 kvm_assigned_dev_raise_guest_irq(assigned_dev,
103 assigned_dev->guest_irq);
104
105 return IRQ_HANDLED;
106}
107
108#ifdef __KVM_HAVE_MSI
109static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
110{
111 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
112 int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
113 assigned_dev->irq_source_id,
114 assigned_dev->guest_irq, 1);
115 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
116}
117
118static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
119{
120 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
121
122 kvm_assigned_dev_raise_guest_irq(assigned_dev,
123 assigned_dev->guest_irq);
124
125 return IRQ_HANDLED;
126}
127#endif
128
129#ifdef __KVM_HAVE_MSIX
130static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
131{
132 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
133 int index = find_index_from_host_irq(assigned_dev, irq);
134 u32 vector;
135 int ret = 0;
136
137 if (index >= 0) {
138 vector = assigned_dev->guest_msix_entries[index].vector;
139 ret = kvm_set_irq_inatomic(assigned_dev->kvm,
140 assigned_dev->irq_source_id,
141 vector, 1);
142 }
143
144 return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
145}
146
147static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
148{
149 struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
150 int index = find_index_from_host_irq(assigned_dev, irq);
151 u32 vector;
152
153 if (index >= 0) {
154 vector = assigned_dev->guest_msix_entries[index].vector;
155 kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
156 }
157
158 return IRQ_HANDLED;
159}
160#endif
161
162/* Ack the irq line for an assigned device */
163static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
164{
165 struct kvm_assigned_dev_kernel *dev =
166 container_of(kian, struct kvm_assigned_dev_kernel,
167 ack_notifier);
168
169 kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
170
171 spin_lock(&dev->intx_mask_lock);
172
173 if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
174 bool reassert = false;
175
176 spin_lock_irq(&dev->intx_lock);
177 /*
178 * The guest IRQ may be shared so this ack can come from an
179 * IRQ for another guest device.
180 */
181 if (dev->host_irq_disabled) {
182 if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
183 enable_irq(dev->host_irq);
184 else if (!pci_check_and_unmask_intx(dev->dev))
185 reassert = true;
186 dev->host_irq_disabled = reassert;
187 }
188 spin_unlock_irq(&dev->intx_lock);
189
190 if (reassert)
191 kvm_set_irq(dev->kvm, dev->irq_source_id,
192 dev->guest_irq, 1, false);
193 }
194
195 spin_unlock(&dev->intx_mask_lock);
196}
197
198static void deassign_guest_irq(struct kvm *kvm,
199 struct kvm_assigned_dev_kernel *assigned_dev)
200{
201 if (assigned_dev->ack_notifier.gsi != -1)
202 kvm_unregister_irq_ack_notifier(kvm,
203 &assigned_dev->ack_notifier);
204
205 kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
206 assigned_dev->guest_irq, 0, false);
207
208 if (assigned_dev->irq_source_id != -1)
209 kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
210 assigned_dev->irq_source_id = -1;
211 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
212}
213
214/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
215static void deassign_host_irq(struct kvm *kvm,
216 struct kvm_assigned_dev_kernel *assigned_dev)
217{
218 /*
219 * We disable irq here to prevent further events.
220 *
221 * Notice this maybe result in nested disable if the interrupt type is
222 * INTx, but it's OK for we are going to free it.
223 *
224 * If this function is a part of VM destroy, please ensure that till
225 * now, the kvm state is still legal for probably we also have to wait
226 * on a currently running IRQ handler.
227 */
228 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
229 int i;
230 for (i = 0; i < assigned_dev->entries_nr; i++)
231 disable_irq(assigned_dev->host_msix_entries[i].vector);
232
233 for (i = 0; i < assigned_dev->entries_nr; i++)
234 free_irq(assigned_dev->host_msix_entries[i].vector,
235 assigned_dev);
236
237 assigned_dev->entries_nr = 0;
238 kfree(assigned_dev->host_msix_entries);
239 kfree(assigned_dev->guest_msix_entries);
240 pci_disable_msix(assigned_dev->dev);
241 } else {
242 /* Deal with MSI and INTx */
243 if ((assigned_dev->irq_requested_type &
244 KVM_DEV_IRQ_HOST_INTX) &&
245 (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
246 spin_lock_irq(&assigned_dev->intx_lock);
247 pci_intx(assigned_dev->dev, false);
248 spin_unlock_irq(&assigned_dev->intx_lock);
249 synchronize_irq(assigned_dev->host_irq);
250 } else
251 disable_irq(assigned_dev->host_irq);
252
253 free_irq(assigned_dev->host_irq, assigned_dev);
254
255 if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
256 pci_disable_msi(assigned_dev->dev);
257 }
258
259 assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
260}
261
262static int kvm_deassign_irq(struct kvm *kvm,
263 struct kvm_assigned_dev_kernel *assigned_dev,
264 unsigned long irq_requested_type)
265{
266 unsigned long guest_irq_type, host_irq_type;
267
268 if (!irqchip_in_kernel(kvm))
269 return -EINVAL;
270 /* no irq assignment to deassign */
271 if (!assigned_dev->irq_requested_type)
272 return -ENXIO;
273
274 host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
275 guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
276
277 if (host_irq_type)
278 deassign_host_irq(kvm, assigned_dev);
279 if (guest_irq_type)
280 deassign_guest_irq(kvm, assigned_dev);
281
282 return 0;
283}
284
285static void kvm_free_assigned_irq(struct kvm *kvm,
286 struct kvm_assigned_dev_kernel *assigned_dev)
287{
288 kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
289}
290
291static void kvm_free_assigned_device(struct kvm *kvm,
292 struct kvm_assigned_dev_kernel
293 *assigned_dev)
294{
295 kvm_free_assigned_irq(kvm, assigned_dev);
296
297 pci_reset_function(assigned_dev->dev);
298 if (pci_load_and_free_saved_state(assigned_dev->dev,
299 &assigned_dev->pci_saved_state))
300 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
301 __func__, dev_name(&assigned_dev->dev->dev));
302 else
303 pci_restore_state(assigned_dev->dev);
304
305 assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
306
307 pci_release_regions(assigned_dev->dev);
308 pci_disable_device(assigned_dev->dev);
309 pci_dev_put(assigned_dev->dev);
310
311 list_del(&assigned_dev->list);
312 kfree(assigned_dev);
313}
314
315void kvm_free_all_assigned_devices(struct kvm *kvm)
316{
317 struct list_head *ptr, *ptr2;
318 struct kvm_assigned_dev_kernel *assigned_dev;
319
320 list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
321 assigned_dev = list_entry(ptr,
322 struct kvm_assigned_dev_kernel,
323 list);
324
325 kvm_free_assigned_device(kvm, assigned_dev);
326 }
327}
328
329static int assigned_device_enable_host_intx(struct kvm *kvm,
330 struct kvm_assigned_dev_kernel *dev)
331{
332 irq_handler_t irq_handler;
333 unsigned long flags;
334
335 dev->host_irq = dev->dev->irq;
336
337 /*
338 * We can only share the IRQ line with other host devices if we are
339 * able to disable the IRQ source at device-level - independently of
340 * the guest driver. Otherwise host devices may suffer from unbounded
341 * IRQ latencies when the guest keeps the line asserted.
342 */
343 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
344 irq_handler = kvm_assigned_dev_intx;
345 flags = IRQF_SHARED;
346 } else {
347 irq_handler = NULL;
348 flags = IRQF_ONESHOT;
349 }
350 if (request_threaded_irq(dev->host_irq, irq_handler,
351 kvm_assigned_dev_thread_intx, flags,
352 dev->irq_name, dev))
353 return -EIO;
354
355 if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
356 spin_lock_irq(&dev->intx_lock);
357 pci_intx(dev->dev, true);
358 spin_unlock_irq(&dev->intx_lock);
359 }
360 return 0;
361}
362
363#ifdef __KVM_HAVE_MSI
364static int assigned_device_enable_host_msi(struct kvm *kvm,
365 struct kvm_assigned_dev_kernel *dev)
366{
367 int r;
368
369 if (!dev->dev->msi_enabled) {
370 r = pci_enable_msi(dev->dev);
371 if (r)
372 return r;
373 }
374
375 dev->host_irq = dev->dev->irq;
376 if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
377 kvm_assigned_dev_thread_msi, 0,
378 dev->irq_name, dev)) {
379 pci_disable_msi(dev->dev);
380 return -EIO;
381 }
382
383 return 0;
384}
385#endif
386
387#ifdef __KVM_HAVE_MSIX
388static int assigned_device_enable_host_msix(struct kvm *kvm,
389 struct kvm_assigned_dev_kernel *dev)
390{
391 int i, r = -EINVAL;
392
393 /* host_msix_entries and guest_msix_entries should have been
394 * initialized */
395 if (dev->entries_nr == 0)
396 return r;
397
398 r = pci_enable_msix_exact(dev->dev,
399 dev->host_msix_entries, dev->entries_nr);
400 if (r)
401 return r;
402
403 for (i = 0; i < dev->entries_nr; i++) {
404 r = request_threaded_irq(dev->host_msix_entries[i].vector,
405 kvm_assigned_dev_msix,
406 kvm_assigned_dev_thread_msix,
407 0, dev->irq_name, dev);
408 if (r)
409 goto err;
410 }
411
412 return 0;
413err:
414 for (i -= 1; i >= 0; i--)
415 free_irq(dev->host_msix_entries[i].vector, dev);
416 pci_disable_msix(dev->dev);
417 return r;
418}
419
420#endif
421
422static int assigned_device_enable_guest_intx(struct kvm *kvm,
423 struct kvm_assigned_dev_kernel *dev,
424 struct kvm_assigned_irq *irq)
425{
426 dev->guest_irq = irq->guest_irq;
427 dev->ack_notifier.gsi = irq->guest_irq;
428 return 0;
429}
430
431#ifdef __KVM_HAVE_MSI
432static int assigned_device_enable_guest_msi(struct kvm *kvm,
433 struct kvm_assigned_dev_kernel *dev,
434 struct kvm_assigned_irq *irq)
435{
436 dev->guest_irq = irq->guest_irq;
437 dev->ack_notifier.gsi = -1;
438 return 0;
439}
440#endif
441
442#ifdef __KVM_HAVE_MSIX
443static int assigned_device_enable_guest_msix(struct kvm *kvm,
444 struct kvm_assigned_dev_kernel *dev,
445 struct kvm_assigned_irq *irq)
446{
447 dev->guest_irq = irq->guest_irq;
448 dev->ack_notifier.gsi = -1;
449 return 0;
450}
451#endif
452
453static int assign_host_irq(struct kvm *kvm,
454 struct kvm_assigned_dev_kernel *dev,
455 __u32 host_irq_type)
456{
457 int r = -EEXIST;
458
459 if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
460 return r;
461
462 snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
463 pci_name(dev->dev));
464
465 switch (host_irq_type) {
466 case KVM_DEV_IRQ_HOST_INTX:
467 r = assigned_device_enable_host_intx(kvm, dev);
468 break;
469#ifdef __KVM_HAVE_MSI
470 case KVM_DEV_IRQ_HOST_MSI:
471 r = assigned_device_enable_host_msi(kvm, dev);
472 break;
473#endif
474#ifdef __KVM_HAVE_MSIX
475 case KVM_DEV_IRQ_HOST_MSIX:
476 r = assigned_device_enable_host_msix(kvm, dev);
477 break;
478#endif
479 default:
480 r = -EINVAL;
481 }
482 dev->host_irq_disabled = false;
483
484 if (!r)
485 dev->irq_requested_type |= host_irq_type;
486
487 return r;
488}
489
490static int assign_guest_irq(struct kvm *kvm,
491 struct kvm_assigned_dev_kernel *dev,
492 struct kvm_assigned_irq *irq,
493 unsigned long guest_irq_type)
494{
495 int id;
496 int r = -EEXIST;
497
498 if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
499 return r;
500
501 id = kvm_request_irq_source_id(kvm);
502 if (id < 0)
503 return id;
504
505 dev->irq_source_id = id;
506
507 switch (guest_irq_type) {
508 case KVM_DEV_IRQ_GUEST_INTX:
509 r = assigned_device_enable_guest_intx(kvm, dev, irq);
510 break;
511#ifdef __KVM_HAVE_MSI
512 case KVM_DEV_IRQ_GUEST_MSI:
513 r = assigned_device_enable_guest_msi(kvm, dev, irq);
514 break;
515#endif
516#ifdef __KVM_HAVE_MSIX
517 case KVM_DEV_IRQ_GUEST_MSIX:
518 r = assigned_device_enable_guest_msix(kvm, dev, irq);
519 break;
520#endif
521 default:
522 r = -EINVAL;
523 }
524
525 if (!r) {
526 dev->irq_requested_type |= guest_irq_type;
527 if (dev->ack_notifier.gsi != -1)
528 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
529 } else
530 kvm_free_irq_source_id(kvm, dev->irq_source_id);
531
532 return r;
533}
534
535/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
536static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
537 struct kvm_assigned_irq *assigned_irq)
538{
539 int r = -EINVAL;
540 struct kvm_assigned_dev_kernel *match;
541 unsigned long host_irq_type, guest_irq_type;
542
543 if (!irqchip_in_kernel(kvm))
544 return r;
545
546 mutex_lock(&kvm->lock);
547 r = -ENODEV;
548 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
549 assigned_irq->assigned_dev_id);
550 if (!match)
551 goto out;
552
553 host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
554 guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
555
556 r = -EINVAL;
557 /* can only assign one type at a time */
558 if (hweight_long(host_irq_type) > 1)
559 goto out;
560 if (hweight_long(guest_irq_type) > 1)
561 goto out;
562 if (host_irq_type == 0 && guest_irq_type == 0)
563 goto out;
564
565 r = 0;
566 if (host_irq_type)
567 r = assign_host_irq(kvm, match, host_irq_type);
568 if (r)
569 goto out;
570
571 if (guest_irq_type)
572 r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
573out:
574 mutex_unlock(&kvm->lock);
575 return r;
576}
577
578static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
579 struct kvm_assigned_irq
580 *assigned_irq)
581{
582 int r = -ENODEV;
583 struct kvm_assigned_dev_kernel *match;
584 unsigned long irq_type;
585
586 mutex_lock(&kvm->lock);
587
588 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
589 assigned_irq->assigned_dev_id);
590 if (!match)
591 goto out;
592
593 irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
594 KVM_DEV_IRQ_GUEST_MASK);
595 r = kvm_deassign_irq(kvm, match, irq_type);
596out:
597 mutex_unlock(&kvm->lock);
598 return r;
599}
600
601/*
602 * We want to test whether the caller has been granted permissions to
603 * use this device. To be able to configure and control the device,
604 * the user needs access to PCI configuration space and BAR resources.
605 * These are accessed through PCI sysfs. PCI config space is often
606 * passed to the process calling this ioctl via file descriptor, so we
607 * can't rely on access to that file. We can check for permissions
608 * on each of the BAR resource files, which is a pretty clear
609 * indicator that the user has been granted access to the device.
610 */
611static int probe_sysfs_permissions(struct pci_dev *dev)
612{
613#ifdef CONFIG_SYSFS
614 int i;
615 bool bar_found = false;
616
617 for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
618 char *kpath, *syspath;
619 struct path path;
620 struct inode *inode;
621 int r;
622
623 if (!pci_resource_len(dev, i))
624 continue;
625
626 kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
627 if (!kpath)
628 return -ENOMEM;
629
630 /* Per sysfs-rules, sysfs is always at /sys */
631 syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
632 kfree(kpath);
633 if (!syspath)
634 return -ENOMEM;
635
636 r = kern_path(syspath, LOOKUP_FOLLOW, &path);
637 kfree(syspath);
638 if (r)
639 return r;
640
641 inode = path.dentry->d_inode;
642
643 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
644 path_put(&path);
645 if (r)
646 return r;
647
648 bar_found = true;
649 }
650
651 /* If no resources, probably something special */
652 if (!bar_found)
653 return -EPERM;
654
655 return 0;
656#else
657 return -EINVAL; /* No way to control the device without sysfs */
658#endif
659}
660
661static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
662 struct kvm_assigned_pci_dev *assigned_dev)
663{
664 int r = 0, idx;
665 struct kvm_assigned_dev_kernel *match;
666 struct pci_dev *dev;
667
668 if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
669 return -EINVAL;
670
671 mutex_lock(&kvm->lock);
672 idx = srcu_read_lock(&kvm->srcu);
673
674 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
675 assigned_dev->assigned_dev_id);
676 if (match) {
677 /* device already assigned */
678 r = -EEXIST;
679 goto out;
680 }
681
682 match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
683 if (match == NULL) {
684 printk(KERN_INFO "%s: Couldn't allocate memory\n",
685 __func__);
686 r = -ENOMEM;
687 goto out;
688 }
689 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
690 assigned_dev->busnr,
691 assigned_dev->devfn);
692 if (!dev) {
693 printk(KERN_INFO "%s: host device not found\n", __func__);
694 r = -EINVAL;
695 goto out_free;
696 }
697
698 /* Don't allow bridges to be assigned */
699 if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
700 r = -EPERM;
701 goto out_put;
702 }
703
704 r = probe_sysfs_permissions(dev);
705 if (r)
706 goto out_put;
707
708 if (pci_enable_device(dev)) {
709 printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
710 r = -EBUSY;
711 goto out_put;
712 }
713 r = pci_request_regions(dev, "kvm_assigned_device");
714 if (r) {
715 printk(KERN_INFO "%s: Could not get access to device regions\n",
716 __func__);
717 goto out_disable;
718 }
719
720 pci_reset_function(dev);
721 pci_save_state(dev);
722 match->pci_saved_state = pci_store_saved_state(dev);
723 if (!match->pci_saved_state)
724 printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
725 __func__, dev_name(&dev->dev));
726
727 if (!pci_intx_mask_supported(dev))
728 assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
729
730 match->assigned_dev_id = assigned_dev->assigned_dev_id;
731 match->host_segnr = assigned_dev->segnr;
732 match->host_busnr = assigned_dev->busnr;
733 match->host_devfn = assigned_dev->devfn;
734 match->flags = assigned_dev->flags;
735 match->dev = dev;
736 spin_lock_init(&match->intx_lock);
737 spin_lock_init(&match->intx_mask_lock);
738 match->irq_source_id = -1;
739 match->kvm = kvm;
740 match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
741
742 list_add(&match->list, &kvm->arch.assigned_dev_head);
743
744 if (!kvm->arch.iommu_domain) {
745 r = kvm_iommu_map_guest(kvm);
746 if (r)
747 goto out_list_del;
748 }
749 r = kvm_assign_device(kvm, match);
750 if (r)
751 goto out_list_del;
752
753out:
754 srcu_read_unlock(&kvm->srcu, idx);
755 mutex_unlock(&kvm->lock);
756 return r;
757out_list_del:
758 if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
759 printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
760 __func__, dev_name(&dev->dev));
761 list_del(&match->list);
762 pci_release_regions(dev);
763out_disable:
764 pci_disable_device(dev);
765out_put:
766 pci_dev_put(dev);
767out_free:
768 kfree(match);
769 srcu_read_unlock(&kvm->srcu, idx);
770 mutex_unlock(&kvm->lock);
771 return r;
772}
773
774static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
775 struct kvm_assigned_pci_dev *assigned_dev)
776{
777 int r = 0;
778 struct kvm_assigned_dev_kernel *match;
779
780 mutex_lock(&kvm->lock);
781
782 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
783 assigned_dev->assigned_dev_id);
784 if (!match) {
785 printk(KERN_INFO "%s: device hasn't been assigned before, "
786 "so cannot be deassigned\n", __func__);
787 r = -EINVAL;
788 goto out;
789 }
790
791 kvm_deassign_device(kvm, match);
792
793 kvm_free_assigned_device(kvm, match);
794
795out:
796 mutex_unlock(&kvm->lock);
797 return r;
798}
799
800
801#ifdef __KVM_HAVE_MSIX
802static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
803 struct kvm_assigned_msix_nr *entry_nr)
804{
805 int r = 0;
806 struct kvm_assigned_dev_kernel *adev;
807
808 mutex_lock(&kvm->lock);
809
810 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
811 entry_nr->assigned_dev_id);
812 if (!adev) {
813 r = -EINVAL;
814 goto msix_nr_out;
815 }
816
817 if (adev->entries_nr == 0) {
818 adev->entries_nr = entry_nr->entry_nr;
819 if (adev->entries_nr == 0 ||
820 adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
821 r = -EINVAL;
822 goto msix_nr_out;
823 }
824
825 adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
826 entry_nr->entry_nr,
827 GFP_KERNEL);
828 if (!adev->host_msix_entries) {
829 r = -ENOMEM;
830 goto msix_nr_out;
831 }
832 adev->guest_msix_entries =
833 kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
834 GFP_KERNEL);
835 if (!adev->guest_msix_entries) {
836 kfree(adev->host_msix_entries);
837 r = -ENOMEM;
838 goto msix_nr_out;
839 }
840 } else /* Not allowed set MSI-X number twice */
841 r = -EINVAL;
842msix_nr_out:
843 mutex_unlock(&kvm->lock);
844 return r;
845}
846
847static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
848 struct kvm_assigned_msix_entry *entry)
849{
850 int r = 0, i;
851 struct kvm_assigned_dev_kernel *adev;
852
853 mutex_lock(&kvm->lock);
854
855 adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
856 entry->assigned_dev_id);
857
858 if (!adev) {
859 r = -EINVAL;
860 goto msix_entry_out;
861 }
862
863 for (i = 0; i < adev->entries_nr; i++)
864 if (adev->guest_msix_entries[i].vector == 0 ||
865 adev->guest_msix_entries[i].entry == entry->entry) {
866 adev->guest_msix_entries[i].entry = entry->entry;
867 adev->guest_msix_entries[i].vector = entry->gsi;
868 adev->host_msix_entries[i].entry = entry->entry;
869 break;
870 }
871 if (i == adev->entries_nr) {
872 r = -ENOSPC;
873 goto msix_entry_out;
874 }
875
876msix_entry_out:
877 mutex_unlock(&kvm->lock);
878
879 return r;
880}
881#endif
882
883static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
884 struct kvm_assigned_pci_dev *assigned_dev)
885{
886 int r = 0;
887 struct kvm_assigned_dev_kernel *match;
888
889 mutex_lock(&kvm->lock);
890
891 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
892 assigned_dev->assigned_dev_id);
893 if (!match) {
894 r = -ENODEV;
895 goto out;
896 }
897
898 spin_lock(&match->intx_mask_lock);
899
900 match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
901 match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
902
903 if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
904 if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
905 kvm_set_irq(match->kvm, match->irq_source_id,
906 match->guest_irq, 0, false);
907 /*
908 * Masking at hardware-level is performed on demand,
909 * i.e. when an IRQ actually arrives at the host.
910 */
911 } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
912 /*
913 * Unmask the IRQ line if required. Unmasking at
914 * device level will be performed by user space.
915 */
916 spin_lock_irq(&match->intx_lock);
917 if (match->host_irq_disabled) {
918 enable_irq(match->host_irq);
919 match->host_irq_disabled = false;
920 }
921 spin_unlock_irq(&match->intx_lock);
922 }
923 }
924
925 spin_unlock(&match->intx_mask_lock);
926
927out:
928 mutex_unlock(&kvm->lock);
929 return r;
930}
931
932long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
933 unsigned long arg)
934{
935 void __user *argp = (void __user *)arg;
936 int r;
937
938 switch (ioctl) {
939 case KVM_ASSIGN_PCI_DEVICE: {
940 struct kvm_assigned_pci_dev assigned_dev;
941
942 r = -EFAULT;
943 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
944 goto out;
945 r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
946 if (r)
947 goto out;
948 break;
949 }
950 case KVM_ASSIGN_IRQ: {
951 r = -EOPNOTSUPP;
952 break;
953 }
954 case KVM_ASSIGN_DEV_IRQ: {
955 struct kvm_assigned_irq assigned_irq;
956
957 r = -EFAULT;
958 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
959 goto out;
960 r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
961 if (r)
962 goto out;
963 break;
964 }
965 case KVM_DEASSIGN_DEV_IRQ: {
966 struct kvm_assigned_irq assigned_irq;
967
968 r = -EFAULT;
969 if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
970 goto out;
971 r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
972 if (r)
973 goto out;
974 break;
975 }
976 case KVM_DEASSIGN_PCI_DEVICE: {
977 struct kvm_assigned_pci_dev assigned_dev;
978
979 r = -EFAULT;
980 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
981 goto out;
982 r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
983 if (r)
984 goto out;
985 break;
986 }
987#ifdef __KVM_HAVE_MSIX
988 case KVM_ASSIGN_SET_MSIX_NR: {
989 struct kvm_assigned_msix_nr entry_nr;
990 r = -EFAULT;
991 if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
992 goto out;
993 r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
994 if (r)
995 goto out;
996 break;
997 }
998 case KVM_ASSIGN_SET_MSIX_ENTRY: {
999 struct kvm_assigned_msix_entry entry;
1000 r = -EFAULT;
1001 if (copy_from_user(&entry, argp, sizeof entry))
1002 goto out;
1003 r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1004 if (r)
1005 goto out;
1006 break;
1007 }
1008#endif
1009 case KVM_ASSIGN_SET_INTX_MASK: {
1010 struct kvm_assigned_pci_dev assigned_dev;
1011
1012 r = -EFAULT;
1013 if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1014 goto out;
1015 r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1016 break;
1017 }
1018 default:
1019 r = -ENOTTY;
1020 break;
1021 }
1022out:
1023 return r;
1024}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..44660aee335f 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
80 80
81 might_sleep(); 81 might_sleep();
82 82
83 down_read(&mm->mmap_sem); 83 get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL);
84 get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
85 up_read(&mm->mmap_sem);
86 kvm_async_page_present_sync(vcpu, apf); 84 kvm_async_page_present_sync(vcpu, apf);
87 85
88 spin_lock(&vcpu->async_pf.lock); 86 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 00d86427af0f..571c1ce37d15 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -8,7 +8,7 @@
8 * 8 *
9 */ 9 */
10 10
11#include "iodev.h" 11#include <kvm/iodev.h>
12 12
13#include <linux/kvm_host.h> 13#include <linux/kvm_host.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
@@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
60 return 1; 60 return 1;
61} 61}
62 62
63static int coalesced_mmio_write(struct kvm_io_device *this, 63static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
64 gpa_t addr, int len, const void *val) 64 struct kvm_io_device *this, gpa_t addr,
65 int len, const void *val)
65{ 66{
66 struct kvm_coalesced_mmio_dev *dev = to_mmio(this); 67 struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
67 struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; 68 struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 20c3af7692c5..9ff4193dfa49 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -33,10 +33,12 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/srcu.h> 34#include <linux/srcu.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/seqlock.h>
37#include <trace/events/kvm.h>
36 38
37#include "iodev.h" 39#include <kvm/iodev.h>
38 40
39#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 41#ifdef CONFIG_HAVE_KVM_IRQFD
40/* 42/*
41 * -------------------------------------------------------------------- 43 * --------------------------------------------------------------------
42 * irqfd: Allows an fd to be used to inject an interrupt to the guest 44 * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -75,7 +77,8 @@ struct _irqfd {
75 struct kvm *kvm; 77 struct kvm *kvm;
76 wait_queue_t wait; 78 wait_queue_t wait;
77 /* Update side is protected by irqfds.lock */ 79 /* Update side is protected by irqfds.lock */
78 struct kvm_kernel_irq_routing_entry __rcu *irq_entry; 80 struct kvm_kernel_irq_routing_entry irq_entry;
81 seqcount_t irq_entry_sc;
79 /* Used for level IRQ fast-path */ 82 /* Used for level IRQ fast-path */
80 int gsi; 83 int gsi;
81 struct work_struct inject; 84 struct work_struct inject;
@@ -223,16 +226,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
223{ 226{
224 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); 227 struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
225 unsigned long flags = (unsigned long)key; 228 unsigned long flags = (unsigned long)key;
226 struct kvm_kernel_irq_routing_entry *irq; 229 struct kvm_kernel_irq_routing_entry irq;
227 struct kvm *kvm = irqfd->kvm; 230 struct kvm *kvm = irqfd->kvm;
231 unsigned seq;
228 int idx; 232 int idx;
229 233
230 if (flags & POLLIN) { 234 if (flags & POLLIN) {
231 idx = srcu_read_lock(&kvm->irq_srcu); 235 idx = srcu_read_lock(&kvm->irq_srcu);
232 irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); 236 do {
237 seq = read_seqcount_begin(&irqfd->irq_entry_sc);
238 irq = irqfd->irq_entry;
239 } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
233 /* An event has been signaled, inject an interrupt */ 240 /* An event has been signaled, inject an interrupt */
234 if (irq) 241 if (irq.type == KVM_IRQ_ROUTING_MSI)
235 kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, 242 kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
236 false); 243 false);
237 else 244 else
238 schedule_work(&irqfd->inject); 245 schedule_work(&irqfd->inject);
@@ -272,34 +279,40 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
272} 279}
273 280
274/* Must be called under irqfds.lock */ 281/* Must be called under irqfds.lock */
275static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, 282static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
276 struct kvm_irq_routing_table *irq_rt)
277{ 283{
278 struct kvm_kernel_irq_routing_entry *e; 284 struct kvm_kernel_irq_routing_entry *e;
285 struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
286 int i, n_entries;
279 287
280 if (irqfd->gsi >= irq_rt->nr_rt_entries) { 288 n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
281 rcu_assign_pointer(irqfd->irq_entry, NULL); 289
282 return; 290 write_seqcount_begin(&irqfd->irq_entry_sc);
283 } 291
292 irqfd->irq_entry.type = 0;
284 293
285 hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { 294 e = entries;
295 for (i = 0; i < n_entries; ++i, ++e) {
286 /* Only fast-path MSI. */ 296 /* Only fast-path MSI. */
287 if (e->type == KVM_IRQ_ROUTING_MSI) 297 if (e->type == KVM_IRQ_ROUTING_MSI)
288 rcu_assign_pointer(irqfd->irq_entry, e); 298 irqfd->irq_entry = *e;
289 else
290 rcu_assign_pointer(irqfd->irq_entry, NULL);
291 } 299 }
300
301 write_seqcount_end(&irqfd->irq_entry_sc);
292} 302}
293 303
294static int 304static int
295kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) 305kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
296{ 306{
297 struct kvm_irq_routing_table *irq_rt;
298 struct _irqfd *irqfd, *tmp; 307 struct _irqfd *irqfd, *tmp;
299 struct fd f; 308 struct fd f;
300 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; 309 struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
301 int ret; 310 int ret;
302 unsigned int events; 311 unsigned int events;
312 int idx;
313
314 if (!kvm_arch_intc_initialized(kvm))
315 return -EAGAIN;
303 316
304 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); 317 irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
305 if (!irqfd) 318 if (!irqfd)
@@ -310,6 +323,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
310 INIT_LIST_HEAD(&irqfd->list); 323 INIT_LIST_HEAD(&irqfd->list);
311 INIT_WORK(&irqfd->inject, irqfd_inject); 324 INIT_WORK(&irqfd->inject, irqfd_inject);
312 INIT_WORK(&irqfd->shutdown, irqfd_shutdown); 325 INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
326 seqcount_init(&irqfd->irq_entry_sc);
313 327
314 f = fdget(args->fd); 328 f = fdget(args->fd);
315 if (!f.file) { 329 if (!f.file) {
@@ -392,9 +406,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
392 goto fail; 406 goto fail;
393 } 407 }
394 408
395 irq_rt = rcu_dereference_protected(kvm->irq_routing, 409 idx = srcu_read_lock(&kvm->irq_srcu);
396 lockdep_is_held(&kvm->irqfds.lock)); 410 irqfd_update(kvm, irqfd);
397 irqfd_update(kvm, irqfd, irq_rt); 411 srcu_read_unlock(&kvm->irq_srcu, idx);
398 412
399 list_add_tail(&irqfd->list, &kvm->irqfds.items); 413 list_add_tail(&irqfd->list, &kvm->irqfds.items);
400 414
@@ -433,12 +447,69 @@ out:
433 kfree(irqfd); 447 kfree(irqfd);
434 return ret; 448 return ret;
435} 449}
450
451bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
452{
453 struct kvm_irq_ack_notifier *kian;
454 int gsi, idx;
455
456 idx = srcu_read_lock(&kvm->irq_srcu);
457 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
458 if (gsi != -1)
459 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
460 link)
461 if (kian->gsi == gsi) {
462 srcu_read_unlock(&kvm->irq_srcu, idx);
463 return true;
464 }
465
466 srcu_read_unlock(&kvm->irq_srcu, idx);
467
468 return false;
469}
470EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
471
472void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
473{
474 struct kvm_irq_ack_notifier *kian;
475 int gsi, idx;
476
477 trace_kvm_ack_irq(irqchip, pin);
478
479 idx = srcu_read_lock(&kvm->irq_srcu);
480 gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
481 if (gsi != -1)
482 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
483 link)
484 if (kian->gsi == gsi)
485 kian->irq_acked(kian);
486 srcu_read_unlock(&kvm->irq_srcu, idx);
487}
488
489void kvm_register_irq_ack_notifier(struct kvm *kvm,
490 struct kvm_irq_ack_notifier *kian)
491{
492 mutex_lock(&kvm->irq_lock);
493 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
494 mutex_unlock(&kvm->irq_lock);
495 kvm_vcpu_request_scan_ioapic(kvm);
496}
497
498void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
499 struct kvm_irq_ack_notifier *kian)
500{
501 mutex_lock(&kvm->irq_lock);
502 hlist_del_init_rcu(&kian->link);
503 mutex_unlock(&kvm->irq_lock);
504 synchronize_srcu(&kvm->irq_srcu);
505 kvm_vcpu_request_scan_ioapic(kvm);
506}
436#endif 507#endif
437 508
438void 509void
439kvm_eventfd_init(struct kvm *kvm) 510kvm_eventfd_init(struct kvm *kvm)
440{ 511{
441#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 512#ifdef CONFIG_HAVE_KVM_IRQFD
442 spin_lock_init(&kvm->irqfds.lock); 513 spin_lock_init(&kvm->irqfds.lock);
443 INIT_LIST_HEAD(&kvm->irqfds.items); 514 INIT_LIST_HEAD(&kvm->irqfds.items);
444 INIT_LIST_HEAD(&kvm->irqfds.resampler_list); 515 INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -447,7 +518,7 @@ kvm_eventfd_init(struct kvm *kvm)
447 INIT_LIST_HEAD(&kvm->ioeventfds); 518 INIT_LIST_HEAD(&kvm->ioeventfds);
448} 519}
449 520
450#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING 521#ifdef CONFIG_HAVE_KVM_IRQFD
451/* 522/*
452 * shutdown any irqfd's that match fd+gsi 523 * shutdown any irqfd's that match fd+gsi
453 */ 524 */
@@ -466,14 +537,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
466 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { 537 list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
467 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { 538 if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
468 /* 539 /*
469 * This rcu_assign_pointer is needed for when 540 * This clearing of irq_entry.type is needed for when
470 * another thread calls kvm_irq_routing_update before 541 * another thread calls kvm_irq_routing_update before
471 * we flush workqueue below (we synchronize with 542 * we flush workqueue below (we synchronize with
472 * kvm_irq_routing_update using irqfds.lock). 543 * kvm_irq_routing_update using irqfds.lock).
473 * It is paired with synchronize_srcu done by caller
474 * of that function.
475 */ 544 */
476 rcu_assign_pointer(irqfd->irq_entry, NULL); 545 write_seqcount_begin(&irqfd->irq_entry_sc);
546 irqfd->irq_entry.type = 0;
547 write_seqcount_end(&irqfd->irq_entry_sc);
477 irqfd_deactivate(irqfd); 548 irqfd_deactivate(irqfd);
478 } 549 }
479 } 550 }
@@ -528,20 +599,17 @@ kvm_irqfd_release(struct kvm *kvm)
528} 599}
529 600
530/* 601/*
531 * Change irq_routing and irqfd. 602 * Take note of a change in irq routing.
532 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. 603 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
533 */ 604 */
534void kvm_irq_routing_update(struct kvm *kvm, 605void kvm_irq_routing_update(struct kvm *kvm)
535 struct kvm_irq_routing_table *irq_rt)
536{ 606{
537 struct _irqfd *irqfd; 607 struct _irqfd *irqfd;
538 608
539 spin_lock_irq(&kvm->irqfds.lock); 609 spin_lock_irq(&kvm->irqfds.lock);
540 610
541 rcu_assign_pointer(kvm->irq_routing, irq_rt);
542
543 list_for_each_entry(irqfd, &kvm->irqfds.items, list) 611 list_for_each_entry(irqfd, &kvm->irqfds.items, list)
544 irqfd_update(kvm, irqfd, irq_rt); 612 irqfd_update(kvm, irqfd);
545 613
546 spin_unlock_irq(&kvm->irqfds.lock); 614 spin_unlock_irq(&kvm->irqfds.lock);
547} 615}
@@ -647,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
647 715
648/* MMIO/PIO writes trigger an event if the addr/val match */ 716/* MMIO/PIO writes trigger an event if the addr/val match */
649static int 717static int
650ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, 718ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
651 const void *val) 719 int len, const void *val)
652{ 720{
653 struct _ioeventfd *p = to_ioeventfd(this); 721 struct _ioeventfd *p = to_ioeventfd(this);
654 722
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
deleted file mode 100644
index 2458a1dc2ba9..000000000000
--- a/virt/kvm/ioapic.c
+++ /dev/null
@@ -1,646 +0,0 @@
1/*
2 * Copyright (C) 2001 MandrakeSoft S.A.
3 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
4 *
5 * MandrakeSoft S.A.
6 * 43, rue d'Aboukir
7 * 75002 Paris - France
8 * http://www.linux-mandrake.com/
9 * http://www.mandrakesoft.com/
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 * Yunhong Jiang <yunhong.jiang@intel.com>
26 * Yaozu (Eddie) Dong <eddie.dong@intel.com>
27 * Based on Xen 3.1 code.
28 */
29
30#include <linux/kvm_host.h>
31#include <linux/kvm.h>
32#include <linux/mm.h>
33#include <linux/highmem.h>
34#include <linux/smp.h>
35#include <linux/hrtimer.h>
36#include <linux/io.h>
37#include <linux/slab.h>
38#include <linux/export.h>
39#include <asm/processor.h>
40#include <asm/page.h>
41#include <asm/current.h>
42#include <trace/events/kvm.h>
43
44#include "ioapic.h"
45#include "lapic.h"
46#include "irq.h"
47
48#if 0
49#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
50#else
51#define ioapic_debug(fmt, arg...)
52#endif
53static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
54 bool line_status);
55
56static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
57 unsigned long addr,
58 unsigned long length)
59{
60 unsigned long result = 0;
61
62 switch (ioapic->ioregsel) {
63 case IOAPIC_REG_VERSION:
64 result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
65 | (IOAPIC_VERSION_ID & 0xff));
66 break;
67
68 case IOAPIC_REG_APIC_ID:
69 case IOAPIC_REG_ARB_ID:
70 result = ((ioapic->id & 0xf) << 24);
71 break;
72
73 default:
74 {
75 u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
76 u64 redir_content;
77
78 if (redir_index < IOAPIC_NUM_PINS)
79 redir_content =
80 ioapic->redirtbl[redir_index].bits;
81 else
82 redir_content = ~0ULL;
83
84 result = (ioapic->ioregsel & 0x1) ?
85 (redir_content >> 32) & 0xffffffff :
86 redir_content & 0xffffffff;
87 break;
88 }
89 }
90
91 return result;
92}
93
94static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
95{
96 ioapic->rtc_status.pending_eoi = 0;
97 bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
98}
99
100static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
101
102static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
103{
104 if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
105 kvm_rtc_eoi_tracking_restore_all(ioapic);
106}
107
108static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
109{
110 bool new_val, old_val;
111 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
112 union kvm_ioapic_redirect_entry *e;
113
114 e = &ioapic->redirtbl[RTC_GSI];
115 if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id,
116 e->fields.dest_mode))
117 return;
118
119 new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
120 old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
121
122 if (new_val == old_val)
123 return;
124
125 if (new_val) {
126 __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
127 ioapic->rtc_status.pending_eoi++;
128 } else {
129 __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
130 ioapic->rtc_status.pending_eoi--;
131 rtc_status_pending_eoi_check_valid(ioapic);
132 }
133}
134
135void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
136{
137 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
138
139 spin_lock(&ioapic->lock);
140 __rtc_irq_eoi_tracking_restore_one(vcpu);
141 spin_unlock(&ioapic->lock);
142}
143
144static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
145{
146 struct kvm_vcpu *vcpu;
147 int i;
148
149 if (RTC_GSI >= IOAPIC_NUM_PINS)
150 return;
151
152 rtc_irq_eoi_tracking_reset(ioapic);
153 kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
154 __rtc_irq_eoi_tracking_restore_one(vcpu);
155}
156
157static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
158{
159 if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
160 --ioapic->rtc_status.pending_eoi;
161 rtc_status_pending_eoi_check_valid(ioapic);
162 }
163}
164
165static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
166{
167 if (ioapic->rtc_status.pending_eoi > 0)
168 return true; /* coalesced */
169
170 return false;
171}
172
173static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
174 int irq_level, bool line_status)
175{
176 union kvm_ioapic_redirect_entry entry;
177 u32 mask = 1 << irq;
178 u32 old_irr;
179 int edge, ret;
180
181 entry = ioapic->redirtbl[irq];
182 edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
183
184 if (!irq_level) {
185 ioapic->irr &= ~mask;
186 ret = 1;
187 goto out;
188 }
189
190 /*
191 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
192 * this only happens if a previous edge has not been delivered due
193 * do masking. For level interrupts, the remote_irr field tells
194 * us if the interrupt is waiting for an EOI.
195 *
196 * RTC is special: it is edge-triggered, but userspace likes to know
197 * if it has been already ack-ed via EOI because coalesced RTC
198 * interrupts lead to time drift in Windows guests. So we track
199 * EOI manually for the RTC interrupt.
200 */
201 if (irq == RTC_GSI && line_status &&
202 rtc_irq_check_coalesced(ioapic)) {
203 ret = 0;
204 goto out;
205 }
206
207 old_irr = ioapic->irr;
208 ioapic->irr |= mask;
209 if ((edge && old_irr == ioapic->irr) ||
210 (!edge && entry.fields.remote_irr)) {
211 ret = 0;
212 goto out;
213 }
214
215 ret = ioapic_service(ioapic, irq, line_status);
216
217out:
218 trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
219 return ret;
220}
221
222static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
223{
224 u32 idx;
225
226 rtc_irq_eoi_tracking_reset(ioapic);
227 for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
228 ioapic_set_irq(ioapic, idx, 1, true);
229
230 kvm_rtc_eoi_tracking_restore_all(ioapic);
231}
232
233
234static void update_handled_vectors(struct kvm_ioapic *ioapic)
235{
236 DECLARE_BITMAP(handled_vectors, 256);
237 int i;
238
239 memset(handled_vectors, 0, sizeof(handled_vectors));
240 for (i = 0; i < IOAPIC_NUM_PINS; ++i)
241 __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
242 memcpy(ioapic->handled_vectors, handled_vectors,
243 sizeof(handled_vectors));
244 smp_wmb();
245}
246
247void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
248 u32 *tmr)
249{
250 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
251 union kvm_ioapic_redirect_entry *e;
252 int index;
253
254 spin_lock(&ioapic->lock);
255 for (index = 0; index < IOAPIC_NUM_PINS; index++) {
256 e = &ioapic->redirtbl[index];
257 if (!e->fields.mask &&
258 (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
259 kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
260 index) || index == RTC_GSI)) {
261 if (kvm_apic_match_dest(vcpu, NULL, 0,
262 e->fields.dest_id, e->fields.dest_mode)) {
263 __set_bit(e->fields.vector,
264 (unsigned long *)eoi_exit_bitmap);
265 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
266 __set_bit(e->fields.vector,
267 (unsigned long *)tmr);
268 }
269 }
270 }
271 spin_unlock(&ioapic->lock);
272}
273
274#ifdef CONFIG_X86
275void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
276{
277 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
278
279 if (!ioapic)
280 return;
281 kvm_make_scan_ioapic_request(kvm);
282}
283#else
284void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
285{
286 return;
287}
288#endif
289
290static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
291{
292 unsigned index;
293 bool mask_before, mask_after;
294 union kvm_ioapic_redirect_entry *e;
295
296 switch (ioapic->ioregsel) {
297 case IOAPIC_REG_VERSION:
298 /* Writes are ignored. */
299 break;
300
301 case IOAPIC_REG_APIC_ID:
302 ioapic->id = (val >> 24) & 0xf;
303 break;
304
305 case IOAPIC_REG_ARB_ID:
306 break;
307
308 default:
309 index = (ioapic->ioregsel - 0x10) >> 1;
310
311 ioapic_debug("change redir index %x val %x\n", index, val);
312 if (index >= IOAPIC_NUM_PINS)
313 return;
314 e = &ioapic->redirtbl[index];
315 mask_before = e->fields.mask;
316 if (ioapic->ioregsel & 1) {
317 e->bits &= 0xffffffff;
318 e->bits |= (u64) val << 32;
319 } else {
320 e->bits &= ~0xffffffffULL;
321 e->bits |= (u32) val;
322 e->fields.remote_irr = 0;
323 }
324 update_handled_vectors(ioapic);
325 mask_after = e->fields.mask;
326 if (mask_before != mask_after)
327 kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
328 if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
329 && ioapic->irr & (1 << index))
330 ioapic_service(ioapic, index, false);
331 kvm_vcpu_request_scan_ioapic(ioapic->kvm);
332 break;
333 }
334}
335
336static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
337{
338 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
339 struct kvm_lapic_irq irqe;
340 int ret;
341
342 if (entry->fields.mask)
343 return -1;
344
345 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
346 "vector=%x trig_mode=%x\n",
347 entry->fields.dest_id, entry->fields.dest_mode,
348 entry->fields.delivery_mode, entry->fields.vector,
349 entry->fields.trig_mode);
350
351 irqe.dest_id = entry->fields.dest_id;
352 irqe.vector = entry->fields.vector;
353 irqe.dest_mode = entry->fields.dest_mode;
354 irqe.trig_mode = entry->fields.trig_mode;
355 irqe.delivery_mode = entry->fields.delivery_mode << 8;
356 irqe.level = 1;
357 irqe.shorthand = 0;
358
359 if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
360 ioapic->irr &= ~(1 << irq);
361
362 if (irq == RTC_GSI && line_status) {
363 /*
364 * pending_eoi cannot ever become negative (see
365 * rtc_status_pending_eoi_check_valid) and the caller
366 * ensures that it is only called if it is >= zero, namely
367 * if rtc_irq_check_coalesced returns false).
368 */
369 BUG_ON(ioapic->rtc_status.pending_eoi != 0);
370 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
371 ioapic->rtc_status.dest_map);
372 ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
373 } else
374 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
375
376 if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
377 entry->fields.remote_irr = 1;
378
379 return ret;
380}
381
382int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
383 int level, bool line_status)
384{
385 int ret, irq_level;
386
387 BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
388
389 spin_lock(&ioapic->lock);
390 irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
391 irq_source_id, level);
392 ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
393
394 spin_unlock(&ioapic->lock);
395
396 return ret;
397}
398
399void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
400{
401 int i;
402
403 spin_lock(&ioapic->lock);
404 for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
405 __clear_bit(irq_source_id, &ioapic->irq_states[i]);
406 spin_unlock(&ioapic->lock);
407}
408
409static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
410 struct kvm_ioapic *ioapic, int vector, int trigger_mode)
411{
412 int i;
413
414 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
415 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
416
417 if (ent->fields.vector != vector)
418 continue;
419
420 if (i == RTC_GSI)
421 rtc_irq_eoi(ioapic, vcpu);
422 /*
423 * We are dropping lock while calling ack notifiers because ack
424 * notifier callbacks for assigned devices call into IOAPIC
425 * recursively. Since remote_irr is cleared only after call
426 * to notifiers if the same vector will be delivered while lock
427 * is dropped it will be put into irr and will be delivered
428 * after ack notifier returns.
429 */
430 spin_unlock(&ioapic->lock);
431 kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
432 spin_lock(&ioapic->lock);
433
434 if (trigger_mode != IOAPIC_LEVEL_TRIG)
435 continue;
436
437 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
438 ent->fields.remote_irr = 0;
439 if (ioapic->irr & (1 << i))
440 ioapic_service(ioapic, i, false);
441 }
442}
443
444bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
445{
446 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
447 smp_rmb();
448 return test_bit(vector, ioapic->handled_vectors);
449}
450
451void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
452{
453 struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
454
455 spin_lock(&ioapic->lock);
456 __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
457 spin_unlock(&ioapic->lock);
458}
459
460static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
461{
462 return container_of(dev, struct kvm_ioapic, dev);
463}
464
465static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
466{
467 return ((addr >= ioapic->base_address &&
468 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
469}
470
471static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
472 void *val)
473{
474 struct kvm_ioapic *ioapic = to_ioapic(this);
475 u32 result;
476 if (!ioapic_in_range(ioapic, addr))
477 return -EOPNOTSUPP;
478
479 ioapic_debug("addr %lx\n", (unsigned long)addr);
480 ASSERT(!(addr & 0xf)); /* check alignment */
481
482 addr &= 0xff;
483 spin_lock(&ioapic->lock);
484 switch (addr) {
485 case IOAPIC_REG_SELECT:
486 result = ioapic->ioregsel;
487 break;
488
489 case IOAPIC_REG_WINDOW:
490 result = ioapic_read_indirect(ioapic, addr, len);
491 break;
492
493 default:
494 result = 0;
495 break;
496 }
497 spin_unlock(&ioapic->lock);
498
499 switch (len) {
500 case 8:
501 *(u64 *) val = result;
502 break;
503 case 1:
504 case 2:
505 case 4:
506 memcpy(val, (char *)&result, len);
507 break;
508 default:
509 printk(KERN_WARNING "ioapic: wrong length %d\n", len);
510 }
511 return 0;
512}
513
514static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
515 const void *val)
516{
517 struct kvm_ioapic *ioapic = to_ioapic(this);
518 u32 data;
519 if (!ioapic_in_range(ioapic, addr))
520 return -EOPNOTSUPP;
521
522 ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
523 (void*)addr, len, val);
524 ASSERT(!(addr & 0xf)); /* check alignment */
525
526 switch (len) {
527 case 8:
528 case 4:
529 data = *(u32 *) val;
530 break;
531 case 2:
532 data = *(u16 *) val;
533 break;
534 case 1:
535 data = *(u8 *) val;
536 break;
537 default:
538 printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
539 return 0;
540 }
541
542 addr &= 0xff;
543 spin_lock(&ioapic->lock);
544 switch (addr) {
545 case IOAPIC_REG_SELECT:
546 ioapic->ioregsel = data & 0xFF; /* 8-bit register */
547 break;
548
549 case IOAPIC_REG_WINDOW:
550 ioapic_write_indirect(ioapic, data);
551 break;
552#ifdef CONFIG_IA64
553 case IOAPIC_REG_EOI:
554 __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
555 break;
556#endif
557
558 default:
559 break;
560 }
561 spin_unlock(&ioapic->lock);
562 return 0;
563}
564
565static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
566{
567 int i;
568
569 for (i = 0; i < IOAPIC_NUM_PINS; i++)
570 ioapic->redirtbl[i].fields.mask = 1;
571 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
572 ioapic->ioregsel = 0;
573 ioapic->irr = 0;
574 ioapic->id = 0;
575 rtc_irq_eoi_tracking_reset(ioapic);
576 update_handled_vectors(ioapic);
577}
578
579static const struct kvm_io_device_ops ioapic_mmio_ops = {
580 .read = ioapic_mmio_read,
581 .write = ioapic_mmio_write,
582};
583
584int kvm_ioapic_init(struct kvm *kvm)
585{
586 struct kvm_ioapic *ioapic;
587 int ret;
588
589 ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
590 if (!ioapic)
591 return -ENOMEM;
592 spin_lock_init(&ioapic->lock);
593 kvm->arch.vioapic = ioapic;
594 kvm_ioapic_reset(ioapic);
595 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
596 ioapic->kvm = kvm;
597 mutex_lock(&kvm->slots_lock);
598 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
599 IOAPIC_MEM_LENGTH, &ioapic->dev);
600 mutex_unlock(&kvm->slots_lock);
601 if (ret < 0) {
602 kvm->arch.vioapic = NULL;
603 kfree(ioapic);
604 }
605
606 return ret;
607}
608
609void kvm_ioapic_destroy(struct kvm *kvm)
610{
611 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
612
613 if (ioapic) {
614 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
615 kvm->arch.vioapic = NULL;
616 kfree(ioapic);
617 }
618}
619
620int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
621{
622 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
623 if (!ioapic)
624 return -EINVAL;
625
626 spin_lock(&ioapic->lock);
627 memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
628 spin_unlock(&ioapic->lock);
629 return 0;
630}
631
632int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
633{
634 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
635 if (!ioapic)
636 return -EINVAL;
637
638 spin_lock(&ioapic->lock);
639 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
640 ioapic->irr = 0;
641 update_handled_vectors(ioapic);
642 kvm_vcpu_request_scan_ioapic(kvm);
643 kvm_ioapic_inject_all(ioapic, state->irr);
644 spin_unlock(&ioapic->lock);
645 return 0;
646}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
deleted file mode 100644
index 90d43e95dcf8..000000000000
--- a/virt/kvm/ioapic.h
+++ /dev/null
@@ -1,102 +0,0 @@
1#ifndef __KVM_IO_APIC_H
2#define __KVM_IO_APIC_H
3
4#include <linux/kvm_host.h>
5
6#include "iodev.h"
7
8struct kvm;
9struct kvm_vcpu;
10
11#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS
12#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
13#define IOAPIC_EDGE_TRIG 0
14#define IOAPIC_LEVEL_TRIG 1
15
16#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
17#define IOAPIC_MEM_LENGTH 0x100
18
19/* Direct registers. */
20#define IOAPIC_REG_SELECT 0x00
21#define IOAPIC_REG_WINDOW 0x10
22#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */
23
24/* Indirect registers. */
25#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */
26#define IOAPIC_REG_VERSION 0x01
27#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */
28
29/*ioapic delivery mode*/
30#define IOAPIC_FIXED 0x0
31#define IOAPIC_LOWEST_PRIORITY 0x1
32#define IOAPIC_PMI 0x2
33#define IOAPIC_NMI 0x4
34#define IOAPIC_INIT 0x5
35#define IOAPIC_EXTINT 0x7
36
37#ifdef CONFIG_X86
38#define RTC_GSI 8
39#else
40#define RTC_GSI -1U
41#endif
42
43struct rtc_status {
44 int pending_eoi;
45 DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
46};
47
48struct kvm_ioapic {
49 u64 base_address;
50 u32 ioregsel;
51 u32 id;
52 u32 irr;
53 u32 pad;
54 union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
55 unsigned long irq_states[IOAPIC_NUM_PINS];
56 struct kvm_io_device dev;
57 struct kvm *kvm;
58 void (*ack_notifier)(void *opaque, int irq);
59 spinlock_t lock;
60 DECLARE_BITMAP(handled_vectors, 256);
61 struct rtc_status rtc_status;
62};
63
64#ifdef DEBUG
65#define ASSERT(x) \
66do { \
67 if (!(x)) { \
68 printk(KERN_EMERG "assertion failed %s: %d: %s\n", \
69 __FILE__, __LINE__, #x); \
70 BUG(); \
71 } \
72} while (0)
73#else
74#define ASSERT(x) do { } while (0)
75#endif
76
77static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
78{
79 return kvm->arch.vioapic;
80}
81
82void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
83int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
84 int short_hand, int dest, int dest_mode);
85int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
86void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
87 int trigger_mode);
88bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
89int kvm_ioapic_init(struct kvm *kvm);
90void kvm_ioapic_destroy(struct kvm *kvm);
91int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
92 int level, bool line_status);
93void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
94int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
95 struct kvm_lapic_irq *irq, unsigned long *dest_map);
96int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
97int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
98void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
99void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
100 u32 *tmr);
101
102#endif
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
deleted file mode 100644
index 12fd3caffd2b..000000000000
--- a/virt/kvm/iodev.h
+++ /dev/null
@@ -1,70 +0,0 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
14 */
15
16#ifndef __KVM_IODEV_H__
17#define __KVM_IODEV_H__
18
19#include <linux/kvm_types.h>
20#include <asm/errno.h>
21
22struct kvm_io_device;
23
24/**
25 * kvm_io_device_ops are called under kvm slots_lock.
26 * read and write handlers return 0 if the transaction has been handled,
27 * or non-zero to have it passed to the next device.
28 **/
29struct kvm_io_device_ops {
30 int (*read)(struct kvm_io_device *this,
31 gpa_t addr,
32 int len,
33 void *val);
34 int (*write)(struct kvm_io_device *this,
35 gpa_t addr,
36 int len,
37 const void *val);
38 void (*destructor)(struct kvm_io_device *this);
39};
40
41
42struct kvm_io_device {
43 const struct kvm_io_device_ops *ops;
44};
45
46static inline void kvm_iodevice_init(struct kvm_io_device *dev,
47 const struct kvm_io_device_ops *ops)
48{
49 dev->ops = ops;
50}
51
52static inline int kvm_iodevice_read(struct kvm_io_device *dev,
53 gpa_t addr, int l, void *v)
54{
55 return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
56}
57
58static inline int kvm_iodevice_write(struct kvm_io_device *dev,
59 gpa_t addr, int l, const void *v)
60{
61 return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
62}
63
64static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
65{
66 if (dev->ops->destructor)
67 dev->ops->destructor(dev);
68}
69
70#endif /* __KVM_IODEV_H__ */
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
deleted file mode 100644
index 0df7d4b34dfe..000000000000
--- a/virt/kvm/iommu.c
+++ /dev/null
@@ -1,359 +0,0 @@
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Copyright IBM Corporation, 2008
19 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
20 *
21 * Author: Allen M. Kay <allen.m.kay@intel.com>
22 * Author: Weidong Han <weidong.han@intel.com>
23 * Author: Ben-Ami Yassour <benami@il.ibm.com>
24 */
25
26#include <linux/list.h>
27#include <linux/kvm_host.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <linux/stat.h>
31#include <linux/dmar.h>
32#include <linux/iommu.h>
33#include <linux/intel-iommu.h>
34
35static bool allow_unsafe_assigned_interrupts;
36module_param_named(allow_unsafe_assigned_interrupts,
37 allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
38MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
39 "Enable device assignment on platforms without interrupt remapping support.");
40
41static int kvm_iommu_unmap_memslots(struct kvm *kvm);
42static void kvm_iommu_put_pages(struct kvm *kvm,
43 gfn_t base_gfn, unsigned long npages);
44
45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
46 unsigned long size)
47{
48 gfn_t end_gfn;
49 pfn_t pfn;
50
51 pfn = gfn_to_pfn_memslot(slot, gfn);
52 end_gfn = gfn + (size >> PAGE_SHIFT);
53 gfn += 1;
54
55 if (is_error_noslot_pfn(pfn))
56 return pfn;
57
58 while (gfn < end_gfn)
59 gfn_to_pfn_memslot(slot, gfn++);
60
61 return pfn;
62}
63
64int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
65{
66 gfn_t gfn, end_gfn;
67 pfn_t pfn;
68 int r = 0;
69 struct iommu_domain *domain = kvm->arch.iommu_domain;
70 int flags;
71
72 /* check if iommu exists and in use */
73 if (!domain)
74 return 0;
75
76 gfn = slot->base_gfn;
77 end_gfn = gfn + slot->npages;
78
79 flags = IOMMU_READ;
80 if (!(slot->flags & KVM_MEM_READONLY))
81 flags |= IOMMU_WRITE;
82 if (!kvm->arch.iommu_noncoherent)
83 flags |= IOMMU_CACHE;
84
85
86 while (gfn < end_gfn) {
87 unsigned long page_size;
88
89 /* Check if already mapped */
90 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
91 gfn += 1;
92 continue;
93 }
94
95 /* Get the page size we could use to map */
96 page_size = kvm_host_page_size(kvm, gfn);
97
98 /* Make sure the page_size does not exceed the memslot */
99 while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
100 page_size >>= 1;
101
102 /* Make sure gfn is aligned to the page size we want to map */
103 while ((gfn << PAGE_SHIFT) & (page_size - 1))
104 page_size >>= 1;
105
106 /* Make sure hva is aligned to the page size we want to map */
107 while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
108 page_size >>= 1;
109
110 /*
111 * Pin all pages we are about to map in memory. This is
112 * important because we unmap and unpin in 4kb steps later.
113 */
114 pfn = kvm_pin_pages(slot, gfn, page_size);
115 if (is_error_noslot_pfn(pfn)) {
116 gfn += 1;
117 continue;
118 }
119
120 /* Map into IO address space */
121 r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
122 page_size, flags);
123 if (r) {
124 printk(KERN_ERR "kvm_iommu_map_address:"
125 "iommu failed to map pfn=%llx\n", pfn);
126 goto unmap_pages;
127 }
128
129 gfn += page_size >> PAGE_SHIFT;
130
131
132 }
133
134 return 0;
135
136unmap_pages:
137 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
138 return r;
139}
140
141static int kvm_iommu_map_memslots(struct kvm *kvm)
142{
143 int idx, r = 0;
144 struct kvm_memslots *slots;
145 struct kvm_memory_slot *memslot;
146
147 if (kvm->arch.iommu_noncoherent)
148 kvm_arch_register_noncoherent_dma(kvm);
149
150 idx = srcu_read_lock(&kvm->srcu);
151 slots = kvm_memslots(kvm);
152
153 kvm_for_each_memslot(memslot, slots) {
154 r = kvm_iommu_map_pages(kvm, memslot);
155 if (r)
156 break;
157 }
158 srcu_read_unlock(&kvm->srcu, idx);
159
160 return r;
161}
162
163int kvm_assign_device(struct kvm *kvm,
164 struct kvm_assigned_dev_kernel *assigned_dev)
165{
166 struct pci_dev *pdev = NULL;
167 struct iommu_domain *domain = kvm->arch.iommu_domain;
168 int r;
169 bool noncoherent;
170
171 /* check if iommu exists and in use */
172 if (!domain)
173 return 0;
174
175 pdev = assigned_dev->dev;
176 if (pdev == NULL)
177 return -ENODEV;
178
179 r = iommu_attach_device(domain, &pdev->dev);
180 if (r) {
181 dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
182 return r;
183 }
184
185 noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
186 IOMMU_CAP_CACHE_COHERENCY);
187
188 /* Check if need to update IOMMU page table for guest memory */
189 if (noncoherent != kvm->arch.iommu_noncoherent) {
190 kvm_iommu_unmap_memslots(kvm);
191 kvm->arch.iommu_noncoherent = noncoherent;
192 r = kvm_iommu_map_memslots(kvm);
193 if (r)
194 goto out_unmap;
195 }
196
197 pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
198
199 dev_info(&pdev->dev, "kvm assign device\n");
200
201 return 0;
202out_unmap:
203 kvm_iommu_unmap_memslots(kvm);
204 return r;
205}
206
207int kvm_deassign_device(struct kvm *kvm,
208 struct kvm_assigned_dev_kernel *assigned_dev)
209{
210 struct iommu_domain *domain = kvm->arch.iommu_domain;
211 struct pci_dev *pdev = NULL;
212
213 /* check if iommu exists and in use */
214 if (!domain)
215 return 0;
216
217 pdev = assigned_dev->dev;
218 if (pdev == NULL)
219 return -ENODEV;
220
221 iommu_detach_device(domain, &pdev->dev);
222
223 pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
224
225 dev_info(&pdev->dev, "kvm deassign device\n");
226
227 return 0;
228}
229
230int kvm_iommu_map_guest(struct kvm *kvm)
231{
232 int r;
233
234 if (!iommu_present(&pci_bus_type)) {
235 printk(KERN_ERR "%s: iommu not found\n", __func__);
236 return -ENODEV;
237 }
238
239 mutex_lock(&kvm->slots_lock);
240
241 kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
242 if (!kvm->arch.iommu_domain) {
243 r = -ENOMEM;
244 goto out_unlock;
245 }
246
247 if (!allow_unsafe_assigned_interrupts &&
248 !iommu_domain_has_cap(kvm->arch.iommu_domain,
249 IOMMU_CAP_INTR_REMAP)) {
250 printk(KERN_WARNING "%s: No interrupt remapping support,"
251 " disallowing device assignment."
252 " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
253 " module option.\n", __func__);
254 iommu_domain_free(kvm->arch.iommu_domain);
255 kvm->arch.iommu_domain = NULL;
256 r = -EPERM;
257 goto out_unlock;
258 }
259
260 r = kvm_iommu_map_memslots(kvm);
261 if (r)
262 kvm_iommu_unmap_memslots(kvm);
263
264out_unlock:
265 mutex_unlock(&kvm->slots_lock);
266 return r;
267}
268
269static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
270{
271 unsigned long i;
272
273 for (i = 0; i < npages; ++i)
274 kvm_release_pfn_clean(pfn + i);
275}
276
277static void kvm_iommu_put_pages(struct kvm *kvm,
278 gfn_t base_gfn, unsigned long npages)
279{
280 struct iommu_domain *domain;
281 gfn_t end_gfn, gfn;
282 pfn_t pfn;
283 u64 phys;
284
285 domain = kvm->arch.iommu_domain;
286 end_gfn = base_gfn + npages;
287 gfn = base_gfn;
288
289 /* check if iommu exists and in use */
290 if (!domain)
291 return;
292
293 while (gfn < end_gfn) {
294 unsigned long unmap_pages;
295 size_t size;
296
297 /* Get physical address */
298 phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
299
300 if (!phys) {
301 gfn++;
302 continue;
303 }
304
305 pfn = phys >> PAGE_SHIFT;
306
307 /* Unmap address from IO address space */
308 size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
309 unmap_pages = 1ULL << get_order(size);
310
311 /* Unpin all pages we just unmapped to not leak any memory */
312 kvm_unpin_pages(kvm, pfn, unmap_pages);
313
314 gfn += unmap_pages;
315 }
316}
317
318void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
319{
320 kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
321}
322
323static int kvm_iommu_unmap_memslots(struct kvm *kvm)
324{
325 int idx;
326 struct kvm_memslots *slots;
327 struct kvm_memory_slot *memslot;
328
329 idx = srcu_read_lock(&kvm->srcu);
330 slots = kvm_memslots(kvm);
331
332 kvm_for_each_memslot(memslot, slots)
333 kvm_iommu_unmap_pages(kvm, memslot);
334
335 srcu_read_unlock(&kvm->srcu, idx);
336
337 if (kvm->arch.iommu_noncoherent)
338 kvm_arch_unregister_noncoherent_dma(kvm);
339
340 return 0;
341}
342
343int kvm_iommu_unmap_guest(struct kvm *kvm)
344{
345 struct iommu_domain *domain = kvm->arch.iommu_domain;
346
347 /* check if iommu exists and in use */
348 if (!domain)
349 return 0;
350
351 mutex_lock(&kvm->slots_lock);
352 kvm_iommu_unmap_memslots(kvm);
353 kvm->arch.iommu_domain = NULL;
354 kvm->arch.iommu_noncoherent = false;
355 mutex_unlock(&kvm->slots_lock);
356
357 iommu_domain_free(domain);
358 return 0;
359}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
deleted file mode 100644
index ced4a542a031..000000000000
--- a/virt/kvm/irq_comm.c
+++ /dev/null
@@ -1,373 +0,0 @@
1/*
2 * irq_comm.c: Common API for in kernel interrupt controller
3 * Copyright (c) 2007, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Authors:
18 * Yaozu (Eddie) Dong <Eddie.dong@intel.com>
19 *
20 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
21 */
22
23#include <linux/kvm_host.h>
24#include <linux/slab.h>
25#include <linux/export.h>
26#include <trace/events/kvm.h>
27
28#include <asm/msidef.h>
29#ifdef CONFIG_IA64
30#include <asm/iosapic.h>
31#endif
32
33#include "irq.h"
34
35#include "ioapic.h"
36
37static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
38 struct kvm *kvm, int irq_source_id, int level,
39 bool line_status)
40{
41#ifdef CONFIG_X86
42 struct kvm_pic *pic = pic_irqchip(kvm);
43 return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
44#else
45 return -1;
46#endif
47}
48
49static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
50 struct kvm *kvm, int irq_source_id, int level,
51 bool line_status)
52{
53 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
54 return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
55 line_status);
56}
57
58inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
59{
60#ifdef CONFIG_IA64
61 return irq->delivery_mode ==
62 (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
63#else
64 return irq->delivery_mode == APIC_DM_LOWEST;
65#endif
66}
67
68int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
69 struct kvm_lapic_irq *irq, unsigned long *dest_map)
70{
71 int i, r = -1;
72 struct kvm_vcpu *vcpu, *lowest = NULL;
73
74 if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
75 kvm_is_dm_lowest_prio(irq)) {
76 printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
77 irq->delivery_mode = APIC_DM_FIXED;
78 }
79
80 if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
81 return r;
82
83 kvm_for_each_vcpu(i, vcpu, kvm) {
84 if (!kvm_apic_present(vcpu))
85 continue;
86
87 if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
88 irq->dest_id, irq->dest_mode))
89 continue;
90
91 if (!kvm_is_dm_lowest_prio(irq)) {
92 if (r < 0)
93 r = 0;
94 r += kvm_apic_set_irq(vcpu, irq, dest_map);
95 } else if (kvm_lapic_enabled(vcpu)) {
96 if (!lowest)
97 lowest = vcpu;
98 else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
99 lowest = vcpu;
100 }
101 }
102
103 if (lowest)
104 r = kvm_apic_set_irq(lowest, irq, dest_map);
105
106 return r;
107}
108
109static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
110 struct kvm_lapic_irq *irq)
111{
112 trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
113
114 irq->dest_id = (e->msi.address_lo &
115 MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
116 irq->vector = (e->msi.data &
117 MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
118 irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
119 irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
120 irq->delivery_mode = e->msi.data & 0x700;
121 irq->level = 1;
122 irq->shorthand = 0;
123 /* TODO Deal with RH bit of MSI message address */
124}
125
126int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
127 struct kvm *kvm, int irq_source_id, int level, bool line_status)
128{
129 struct kvm_lapic_irq irq;
130
131 if (!level)
132 return -1;
133
134 kvm_set_msi_irq(e, &irq);
135
136 return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
137}
138
139
140static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
141 struct kvm *kvm)
142{
143 struct kvm_lapic_irq irq;
144 int r;
145
146 kvm_set_msi_irq(e, &irq);
147
148 if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
149 return r;
150 else
151 return -EWOULDBLOCK;
152}
153
154/*
155 * Deliver an IRQ in an atomic context if we can, or return a failure,
156 * user can retry in a process context.
157 * Return value:
158 * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
159 * Other values - No need to retry.
160 */
161int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
162{
163 struct kvm_kernel_irq_routing_entry *e;
164 int ret = -EINVAL;
165 struct kvm_irq_routing_table *irq_rt;
166 int idx;
167
168 trace_kvm_set_irq(irq, level, irq_source_id);
169
170 /*
171 * Injection into either PIC or IOAPIC might need to scan all CPUs,
172 * which would need to be retried from thread context; when same GSI
173 * is connected to both PIC and IOAPIC, we'd have to report a
174 * partial failure here.
175 * Since there's no easy way to do this, we only support injecting MSI
176 * which is limited to 1:1 GSI mapping.
177 */
178 idx = srcu_read_lock(&kvm->irq_srcu);
179 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
180 if (irq < irq_rt->nr_rt_entries)
181 hlist_for_each_entry(e, &irq_rt->map[irq], link) {
182 if (likely(e->type == KVM_IRQ_ROUTING_MSI))
183 ret = kvm_set_msi_inatomic(e, kvm);
184 else
185 ret = -EWOULDBLOCK;
186 break;
187 }
188 srcu_read_unlock(&kvm->irq_srcu, idx);
189 return ret;
190}
191
192int kvm_request_irq_source_id(struct kvm *kvm)
193{
194 unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
195 int irq_source_id;
196
197 mutex_lock(&kvm->irq_lock);
198 irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
199
200 if (irq_source_id >= BITS_PER_LONG) {
201 printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
202 irq_source_id = -EFAULT;
203 goto unlock;
204 }
205
206 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
207#ifdef CONFIG_X86
208 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
209#endif
210 set_bit(irq_source_id, bitmap);
211unlock:
212 mutex_unlock(&kvm->irq_lock);
213
214 return irq_source_id;
215}
216
217void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
218{
219 ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
220#ifdef CONFIG_X86
221 ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
222#endif
223
224 mutex_lock(&kvm->irq_lock);
225 if (irq_source_id < 0 ||
226 irq_source_id >= BITS_PER_LONG) {
227 printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
228 goto unlock;
229 }
230 clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
231 if (!irqchip_in_kernel(kvm))
232 goto unlock;
233
234 kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
235#ifdef CONFIG_X86
236 kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
237#endif
238unlock:
239 mutex_unlock(&kvm->irq_lock);
240}
241
242void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
243 struct kvm_irq_mask_notifier *kimn)
244{
245 mutex_lock(&kvm->irq_lock);
246 kimn->irq = irq;
247 hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
248 mutex_unlock(&kvm->irq_lock);
249}
250
251void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
252 struct kvm_irq_mask_notifier *kimn)
253{
254 mutex_lock(&kvm->irq_lock);
255 hlist_del_rcu(&kimn->link);
256 mutex_unlock(&kvm->irq_lock);
257 synchronize_srcu(&kvm->irq_srcu);
258}
259
260void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
261 bool mask)
262{
263 struct kvm_irq_mask_notifier *kimn;
264 int idx, gsi;
265
266 idx = srcu_read_lock(&kvm->irq_srcu);
267 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
268 if (gsi != -1)
269 hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
270 if (kimn->irq == gsi)
271 kimn->func(kimn, mask);
272 srcu_read_unlock(&kvm->irq_srcu, idx);
273}
274
275int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
276 struct kvm_kernel_irq_routing_entry *e,
277 const struct kvm_irq_routing_entry *ue)
278{
279 int r = -EINVAL;
280 int delta;
281 unsigned max_pin;
282
283 switch (ue->type) {
284 case KVM_IRQ_ROUTING_IRQCHIP:
285 delta = 0;
286 switch (ue->u.irqchip.irqchip) {
287 case KVM_IRQCHIP_PIC_MASTER:
288 e->set = kvm_set_pic_irq;
289 max_pin = PIC_NUM_PINS;
290 break;
291 case KVM_IRQCHIP_PIC_SLAVE:
292 e->set = kvm_set_pic_irq;
293 max_pin = PIC_NUM_PINS;
294 delta = 8;
295 break;
296 case KVM_IRQCHIP_IOAPIC:
297 max_pin = KVM_IOAPIC_NUM_PINS;
298 e->set = kvm_set_ioapic_irq;
299 break;
300 default:
301 goto out;
302 }
303 e->irqchip.irqchip = ue->u.irqchip.irqchip;
304 e->irqchip.pin = ue->u.irqchip.pin + delta;
305 if (e->irqchip.pin >= max_pin)
306 goto out;
307 rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
308 break;
309 case KVM_IRQ_ROUTING_MSI:
310 e->set = kvm_set_msi;
311 e->msi.address_lo = ue->u.msi.address_lo;
312 e->msi.address_hi = ue->u.msi.address_hi;
313 e->msi.data = ue->u.msi.data;
314 break;
315 default:
316 goto out;
317 }
318
319 r = 0;
320out:
321 return r;
322}
323
324#define IOAPIC_ROUTING_ENTRY(irq) \
325 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
326 .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
327#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
328
329#ifdef CONFIG_X86
330# define PIC_ROUTING_ENTRY(irq) \
331 { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
332 .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
333# define ROUTING_ENTRY2(irq) \
334 IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
335#else
336# define ROUTING_ENTRY2(irq) \
337 IOAPIC_ROUTING_ENTRY(irq)
338#endif
339
340static const struct kvm_irq_routing_entry default_routing[] = {
341 ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
342 ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
343 ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
344 ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
345 ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
346 ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
347 ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
348 ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
349 ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
350 ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
351 ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
352 ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
353#ifdef CONFIG_IA64
354 ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
355 ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
356 ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
357 ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
358 ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
359 ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
360 ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
361 ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
362 ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
363 ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
364 ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
365 ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
366#endif
367};
368
369int kvm_setup_default_irq_routing(struct kvm *kvm)
370{
371 return kvm_set_irq_routing(kvm, default_routing,
372 ARRAY_SIZE(default_routing), 0);
373}
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b43c275775cd..1d56a901e791 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,65 +31,42 @@
31#include <trace/events/kvm.h> 31#include <trace/events/kvm.h>
32#include "irq.h" 32#include "irq.h"
33 33
34bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) 34struct kvm_irq_routing_table {
35{ 35 int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
36 struct kvm_irq_ack_notifier *kian; 36 struct kvm_kernel_irq_routing_entry *rt_entries;
37 int gsi, idx; 37 u32 nr_rt_entries;
38 38 /*
39 idx = srcu_read_lock(&kvm->irq_srcu); 39 * Array indexed by gsi. Each entry contains list of irq chips
40 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin]; 40 * the gsi is connected to.
41 if (gsi != -1) 41 */
42 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, 42 struct hlist_head map[0];
43 link) 43};
44 if (kian->gsi == gsi) {
45 srcu_read_unlock(&kvm->irq_srcu, idx);
46 return true;
47 }
48
49 srcu_read_unlock(&kvm->irq_srcu, idx);
50
51 return false;
52}
53EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
54 44
55void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) 45int kvm_irq_map_gsi(struct kvm *kvm,
46 struct kvm_kernel_irq_routing_entry *entries, int gsi)
56{ 47{
57 struct kvm_irq_ack_notifier *kian; 48 struct kvm_irq_routing_table *irq_rt;
58 int gsi, idx; 49 struct kvm_kernel_irq_routing_entry *e;
59 50 int n = 0;
60 trace_kvm_ack_irq(irqchip, pin); 51
52 irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
53 lockdep_is_held(&kvm->irq_lock));
54 if (gsi < irq_rt->nr_rt_entries) {
55 hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
56 entries[n] = *e;
57 ++n;
58 }
59 }
61 60
62 idx = srcu_read_lock(&kvm->irq_srcu); 61 return n;
63 gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
64 if (gsi != -1)
65 hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
66 link)
67 if (kian->gsi == gsi)
68 kian->irq_acked(kian);
69 srcu_read_unlock(&kvm->irq_srcu, idx);
70} 62}
71 63
72void kvm_register_irq_ack_notifier(struct kvm *kvm, 64int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
73 struct kvm_irq_ack_notifier *kian)
74{ 65{
75 mutex_lock(&kvm->irq_lock); 66 struct kvm_irq_routing_table *irq_rt;
76 hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
77 mutex_unlock(&kvm->irq_lock);
78#ifdef __KVM_HAVE_IOAPIC
79 kvm_vcpu_request_scan_ioapic(kvm);
80#endif
81}
82 67
83void kvm_unregister_irq_ack_notifier(struct kvm *kvm, 68 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
84 struct kvm_irq_ack_notifier *kian) 69 return irq_rt->chip[irqchip][pin];
85{
86 mutex_lock(&kvm->irq_lock);
87 hlist_del_init_rcu(&kian->link);
88 mutex_unlock(&kvm->irq_lock);
89 synchronize_srcu(&kvm->irq_srcu);
90#ifdef __KVM_HAVE_IOAPIC
91 kvm_vcpu_request_scan_ioapic(kvm);
92#endif
93} 70}
94 71
95int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) 72int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
@@ -115,9 +92,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
115int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 92int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
116 bool line_status) 93 bool line_status)
117{ 94{
118 struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; 95 struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
119 int ret = -1, i = 0, idx; 96 int ret = -1, i, idx;
120 struct kvm_irq_routing_table *irq_rt;
121 97
122 trace_kvm_set_irq(irq, level, irq_source_id); 98 trace_kvm_set_irq(irq, level, irq_source_id);
123 99
@@ -126,13 +102,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
126 * writes to the unused one. 102 * writes to the unused one.
127 */ 103 */
128 idx = srcu_read_lock(&kvm->irq_srcu); 104 idx = srcu_read_lock(&kvm->irq_srcu);
129 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); 105 i = kvm_irq_map_gsi(kvm, irq_set, irq);
130 if (irq < irq_rt->nr_rt_entries)
131 hlist_for_each_entry(e, &irq_rt->map[irq], link)
132 irq_set[i++] = *e;
133 srcu_read_unlock(&kvm->irq_srcu, idx); 106 srcu_read_unlock(&kvm->irq_srcu, idx);
134 107
135 while(i--) { 108 while (i--) {
136 int r; 109 int r;
137 r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, 110 r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
138 line_status); 111 line_status);
@@ -171,9 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
171 144
172 e->gsi = ue->gsi; 145 e->gsi = ue->gsi;
173 e->type = ue->type; 146 e->type = ue->type;
174 r = kvm_set_routing_entry(rt, e, ue); 147 r = kvm_set_routing_entry(e, ue);
175 if (r) 148 if (r)
176 goto out; 149 goto out;
150 if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
151 rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
177 152
178 hlist_add_head(&e->link, &rt->map[e->gsi]); 153 hlist_add_head(&e->link, &rt->map[e->gsi]);
179 r = 0; 154 r = 0;
@@ -224,7 +199,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
224 199
225 mutex_lock(&kvm->irq_lock); 200 mutex_lock(&kvm->irq_lock);
226 old = kvm->irq_routing; 201 old = kvm->irq_routing;
227 kvm_irq_routing_update(kvm, new); 202 rcu_assign_pointer(kvm->irq_routing, new);
203 kvm_irq_routing_update(kvm);
228 mutex_unlock(&kvm->irq_lock); 204 mutex_unlock(&kvm->irq_lock);
229 205
230 synchronize_srcu_expedited(&kvm->irq_srcu); 206 synchronize_srcu_expedited(&kvm->irq_srcu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b477f9..d3fc9399062a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -16,7 +16,7 @@
16 * 16 *
17 */ 17 */
18 18
19#include "iodev.h" 19#include <kvm/iodev.h>
20 20
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/kvm.h> 22#include <linux/kvm.h>
@@ -52,11 +52,13 @@
52 52
53#include <asm/processor.h> 53#include <asm/processor.h>
54#include <asm/io.h> 54#include <asm/io.h>
55#include <asm/ioctl.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/pgtable.h> 57#include <asm/pgtable.h>
57 58
58#include "coalesced_mmio.h" 59#include "coalesced_mmio.h"
59#include "async_pf.h" 60#include "async_pf.h"
61#include "vfio.h"
60 62
61#define CREATE_TRACE_POINTS 63#define CREATE_TRACE_POINTS
62#include <trace/events/kvm.h> 64#include <trace/events/kvm.h>
@@ -64,10 +66,13 @@
64MODULE_AUTHOR("Qumranet"); 66MODULE_AUTHOR("Qumranet");
65MODULE_LICENSE("GPL"); 67MODULE_LICENSE("GPL");
66 68
69static unsigned int halt_poll_ns;
70module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
71
67/* 72/*
68 * Ordering of locks: 73 * Ordering of locks:
69 * 74 *
70 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock 75 * kvm->lock --> kvm->slots_lock --> kvm->irq_lock
71 */ 76 */
72 77
73DEFINE_SPINLOCK(kvm_lock); 78DEFINE_SPINLOCK(kvm_lock);
@@ -75,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
75LIST_HEAD(vm_list); 80LIST_HEAD(vm_list);
76 81
77static cpumask_var_t cpus_hardware_enabled; 82static cpumask_var_t cpus_hardware_enabled;
78static int kvm_usage_count = 0; 83static int kvm_usage_count;
79static atomic_t hardware_enable_failed; 84static atomic_t hardware_enable_failed;
80 85
81struct kmem_cache *kvm_vcpu_cache; 86struct kmem_cache *kvm_vcpu_cache;
@@ -87,7 +92,7 @@ struct dentry *kvm_debugfs_dir;
87 92
88static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 93static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
89 unsigned long arg); 94 unsigned long arg);
90#ifdef CONFIG_COMPAT 95#ifdef CONFIG_KVM_COMPAT
91static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, 96static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
92 unsigned long arg); 97 unsigned long arg);
93#endif 98#endif
@@ -95,8 +100,6 @@ static int hardware_enable_all(void);
95static void hardware_disable_all(void); 100static void hardware_disable_all(void);
96 101
97static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 102static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
98static void update_memslots(struct kvm_memslots *slots,
99 struct kvm_memory_slot *new, u64 last_generation);
100 103
101static void kvm_release_pfn_dirty(pfn_t pfn); 104static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm, 105static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -107,7 +110,7 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
107 110
108static bool largepages_enabled = true; 111static bool largepages_enabled = true;
109 112
110bool kvm_is_mmio_pfn(pfn_t pfn) 113bool kvm_is_reserved_pfn(pfn_t pfn)
111{ 114{
112 if (pfn_valid(pfn)) 115 if (pfn_valid(pfn))
113 return PageReserved(pfn_to_page(pfn)); 116 return PageReserved(pfn_to_page(pfn));
@@ -124,14 +127,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
124 127
125 if (mutex_lock_killable(&vcpu->mutex)) 128 if (mutex_lock_killable(&vcpu->mutex))
126 return -EINTR; 129 return -EINTR;
127 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
128 /* The thread running this VCPU changed. */
129 struct pid *oldpid = vcpu->pid;
130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
131 rcu_assign_pointer(vcpu->pid, newpid);
132 synchronize_rcu();
133 put_pid(oldpid);
134 }
135 cpu = get_cpu(); 130 cpu = get_cpu();
136 preempt_notifier_register(&vcpu->preempt_notifier); 131 preempt_notifier_register(&vcpu->preempt_notifier);
137 kvm_arch_vcpu_load(vcpu, cpu); 132 kvm_arch_vcpu_load(vcpu, cpu);
@@ -152,7 +147,7 @@ static void ack_flush(void *_completed)
152{ 147{
153} 148}
154 149
155static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 150bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
156{ 151{
157 int i, cpu, me; 152 int i, cpu, me;
158 cpumask_var_t cpus; 153 cpumask_var_t cpus;
@@ -184,30 +179,32 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
184 return called; 179 return called;
185} 180}
186 181
182#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
187void kvm_flush_remote_tlbs(struct kvm *kvm) 183void kvm_flush_remote_tlbs(struct kvm *kvm)
188{ 184{
189 long dirty_count = kvm->tlbs_dirty; 185 long dirty_count = kvm->tlbs_dirty;
190 186
191 smp_mb(); 187 smp_mb();
192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 188 if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
193 ++kvm->stat.remote_tlb_flush; 189 ++kvm->stat.remote_tlb_flush;
194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 190 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
195} 191}
196EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 192EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
193#endif
197 194
198void kvm_reload_remote_mmus(struct kvm *kvm) 195void kvm_reload_remote_mmus(struct kvm *kvm)
199{ 196{
200 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 197 kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
201} 198}
202 199
203void kvm_make_mclock_inprogress_request(struct kvm *kvm) 200void kvm_make_mclock_inprogress_request(struct kvm *kvm)
204{ 201{
205 make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); 202 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
206} 203}
207 204
208void kvm_make_scan_ioapic_request(struct kvm *kvm) 205void kvm_make_scan_ioapic_request(struct kvm *kvm)
209{ 206{
210 make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); 207 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
211} 208}
212 209
213int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 210int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -295,6 +292,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
295 kvm_flush_remote_tlbs(kvm); 292 kvm_flush_remote_tlbs(kvm);
296 293
297 spin_unlock(&kvm->mmu_lock); 294 spin_unlock(&kvm->mmu_lock);
295
296 kvm_arch_mmu_notifier_invalidate_page(kvm, address);
297
298 srcu_read_unlock(&kvm->srcu, idx); 298 srcu_read_unlock(&kvm->srcu, idx);
299} 299}
300 300
@@ -368,7 +368,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
368 368
369static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 369static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
370 struct mm_struct *mm, 370 struct mm_struct *mm,
371 unsigned long address) 371 unsigned long start,
372 unsigned long end)
372{ 373{
373 struct kvm *kvm = mmu_notifier_to_kvm(mn); 374 struct kvm *kvm = mmu_notifier_to_kvm(mn);
374 int young, idx; 375 int young, idx;
@@ -376,7 +377,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
376 idx = srcu_read_lock(&kvm->srcu); 377 idx = srcu_read_lock(&kvm->srcu);
377 spin_lock(&kvm->mmu_lock); 378 spin_lock(&kvm->mmu_lock);
378 379
379 young = kvm_age_hva(kvm, address); 380 young = kvm_age_hva(kvm, start, end);
380 if (young) 381 if (young)
381 kvm_flush_remote_tlbs(kvm); 382 kvm_flush_remote_tlbs(kvm);
382 383
@@ -463,17 +464,23 @@ static struct kvm *kvm_create_vm(unsigned long type)
463 if (r) 464 if (r)
464 goto out_err_no_disable; 465 goto out_err_no_disable;
465 466
466#ifdef CONFIG_HAVE_KVM_IRQCHIP 467#ifdef CONFIG_HAVE_KVM_IRQFD
467 INIT_HLIST_HEAD(&kvm->mask_notifier_list);
468 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 468 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
469#endif 469#endif
470 470
471 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); 471 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
472 472
473 r = -ENOMEM; 473 r = -ENOMEM;
474 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 474 kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
475 if (!kvm->memslots) 475 if (!kvm->memslots)
476 goto out_err_no_srcu; 476 goto out_err_no_srcu;
477
478 /*
479 * Init kvm generation close to the maximum to easily test the
480 * code of handling generation number wrap-around.
481 */
482 kvm->memslots->generation = -150;
483
477 kvm_init_memslots_id(kvm); 484 kvm_init_memslots_id(kvm);
478 if (init_srcu_struct(&kvm->srcu)) 485 if (init_srcu_struct(&kvm->srcu))
479 goto out_err_no_srcu; 486 goto out_err_no_srcu;
@@ -515,7 +522,7 @@ out_err_no_srcu:
515out_err_no_disable: 522out_err_no_disable:
516 for (i = 0; i < KVM_NR_BUSES; i++) 523 for (i = 0; i < KVM_NR_BUSES; i++)
517 kfree(kvm->buses[i]); 524 kfree(kvm->buses[i]);
518 kfree(kvm->memslots); 525 kvfree(kvm->memslots);
519 kvm_arch_free_vm(kvm); 526 kvm_arch_free_vm(kvm);
520 return ERR_PTR(r); 527 return ERR_PTR(r);
521} 528}
@@ -532,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size)
532 return kzalloc(size, GFP_KERNEL); 539 return kzalloc(size, GFP_KERNEL);
533} 540}
534 541
535void kvm_kvfree(const void *addr)
536{
537 if (is_vmalloc_addr(addr))
538 vfree(addr);
539 else
540 kfree(addr);
541}
542
543static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) 542static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
544{ 543{
545 if (!memslot->dirty_bitmap) 544 if (!memslot->dirty_bitmap)
546 return; 545 return;
547 546
548 kvm_kvfree(memslot->dirty_bitmap); 547 kvfree(memslot->dirty_bitmap);
549 memslot->dirty_bitmap = NULL; 548 memslot->dirty_bitmap = NULL;
550} 549}
551 550
@@ -571,7 +570,7 @@ static void kvm_free_physmem(struct kvm *kvm)
571 kvm_for_each_memslot(memslot, slots) 570 kvm_for_each_memslot(memslot, slots)
572 kvm_free_physmem_slot(kvm, memslot, NULL); 571 kvm_free_physmem_slot(kvm, memslot, NULL);
573 572
574 kfree(kvm->memslots); 573 kvfree(kvm->memslots);
575} 574}
576 575
577static void kvm_destroy_devices(struct kvm *kvm) 576static void kvm_destroy_devices(struct kvm *kvm)
@@ -654,58 +653,68 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
654 return 0; 653 return 0;
655} 654}
656 655
657static int cmp_memslot(const void *slot1, const void *slot2)
658{
659 struct kvm_memory_slot *s1, *s2;
660
661 s1 = (struct kvm_memory_slot *)slot1;
662 s2 = (struct kvm_memory_slot *)slot2;
663
664 if (s1->npages < s2->npages)
665 return 1;
666 if (s1->npages > s2->npages)
667 return -1;
668
669 return 0;
670}
671
672/* 656/*
673 * Sort the memslots base on its size, so the larger slots 657 * Insert memslot and re-sort memslots based on their GFN,
674 * will get better fit. 658 * so binary search could be used to lookup GFN.
659 * Sorting algorithm takes advantage of having initially
660 * sorted array and known changed memslot position.
675 */ 661 */
676static void sort_memslots(struct kvm_memslots *slots)
677{
678 int i;
679
680 sort(slots->memslots, KVM_MEM_SLOTS_NUM,
681 sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
682
683 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
684 slots->id_to_index[slots->memslots[i].id] = i;
685}
686
687static void update_memslots(struct kvm_memslots *slots, 662static void update_memslots(struct kvm_memslots *slots,
688 struct kvm_memory_slot *new, 663 struct kvm_memory_slot *new)
689 u64 last_generation) 664{
690{ 665 int id = new->id;
691 if (new) { 666 int i = slots->id_to_index[id];
692 int id = new->id; 667 struct kvm_memory_slot *mslots = slots->memslots;
693 struct kvm_memory_slot *old = id_to_memslot(slots, id); 668
694 unsigned long npages = old->npages; 669 WARN_ON(mslots[i].id != id);
670 if (!new->npages) {
671 WARN_ON(!mslots[i].npages);
672 new->base_gfn = 0;
673 new->flags = 0;
674 if (mslots[i].npages)
675 slots->used_slots--;
676 } else {
677 if (!mslots[i].npages)
678 slots->used_slots++;
679 }
695 680
696 *old = *new; 681 while (i < KVM_MEM_SLOTS_NUM - 1 &&
697 if (new->npages != npages) 682 new->base_gfn <= mslots[i + 1].base_gfn) {
698 sort_memslots(slots); 683 if (!mslots[i + 1].npages)
684 break;
685 mslots[i] = mslots[i + 1];
686 slots->id_to_index[mslots[i].id] = i;
687 i++;
699 } 688 }
700 689
701 slots->generation = last_generation + 1; 690 /*
691 * The ">=" is needed when creating a slot with base_gfn == 0,
692 * so that it moves before all those with base_gfn == npages == 0.
693 *
694 * On the other hand, if new->npages is zero, the above loop has
695 * already left i pointing to the beginning of the empty part of
696 * mslots, and the ">=" would move the hole backwards in this
697 * case---which is wrong. So skip the loop when deleting a slot.
698 */
699 if (new->npages) {
700 while (i > 0 &&
701 new->base_gfn >= mslots[i - 1].base_gfn) {
702 mslots[i] = mslots[i - 1];
703 slots->id_to_index[mslots[i].id] = i;
704 i--;
705 }
706 } else
707 WARN_ON_ONCE(i != slots->used_slots);
708
709 mslots[i] = *new;
710 slots->id_to_index[mslots[i].id] = i;
702} 711}
703 712
704static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) 713static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
705{ 714{
706 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; 715 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
707 716
708#ifdef KVM_CAP_READONLY_MEM 717#ifdef __KVM_HAVE_READONLY_MEM
709 valid_flags |= KVM_MEM_READONLY; 718 valid_flags |= KVM_MEM_READONLY;
710#endif 719#endif
711 720
@@ -716,14 +725,27 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
716} 725}
717 726
718static struct kvm_memslots *install_new_memslots(struct kvm *kvm, 727static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
719 struct kvm_memslots *slots, struct kvm_memory_slot *new) 728 struct kvm_memslots *slots)
720{ 729{
721 struct kvm_memslots *old_memslots = kvm->memslots; 730 struct kvm_memslots *old_memslots = kvm->memslots;
722 731
723 update_memslots(slots, new, kvm->memslots->generation); 732 /*
733 * Set the low bit in the generation, which disables SPTE caching
734 * until the end of synchronize_srcu_expedited.
735 */
736 WARN_ON(old_memslots->generation & 1);
737 slots->generation = old_memslots->generation + 1;
738
724 rcu_assign_pointer(kvm->memslots, slots); 739 rcu_assign_pointer(kvm->memslots, slots);
725 synchronize_srcu_expedited(&kvm->srcu); 740 synchronize_srcu_expedited(&kvm->srcu);
726 741
742 /*
743 * Increment the new memslot generation a second time. This prevents
744 * vm exits that race with memslot updates from caching a memslot
745 * generation that will (potentially) be valid forever.
746 */
747 slots->generation++;
748
727 kvm_arch_memslots_updated(kvm); 749 kvm_arch_memslots_updated(kvm);
728 750
729 return old_memslots; 751 return old_memslots;
@@ -735,7 +757,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
735 * 757 *
736 * Discontiguous memory is allowed, mostly for framebuffers. 758 * Discontiguous memory is allowed, mostly for framebuffers.
737 * 759 *
738 * Must be called holding mmap_sem for write. 760 * Must be called holding kvm->slots_lock for write.
739 */ 761 */
740int __kvm_set_memory_region(struct kvm *kvm, 762int __kvm_set_memory_region(struct kvm *kvm,
741 struct kvm_userspace_memory_region *mem) 763 struct kvm_userspace_memory_region *mem)
@@ -774,7 +796,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
774 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 796 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
775 npages = mem->memory_size >> PAGE_SHIFT; 797 npages = mem->memory_size >> PAGE_SHIFT;
776 798
777 r = -EINVAL;
778 if (npages > KVM_MEM_MAX_NR_PAGES) 799 if (npages > KVM_MEM_MAX_NR_PAGES)
779 goto out; 800 goto out;
780 801
@@ -788,7 +809,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
788 new.npages = npages; 809 new.npages = npages;
789 new.flags = mem->flags; 810 new.flags = mem->flags;
790 811
791 r = -EINVAL;
792 if (npages) { 812 if (npages) {
793 if (!old.npages) 813 if (!old.npages)
794 change = KVM_MR_CREATE; 814 change = KVM_MR_CREATE;
@@ -843,16 +863,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
843 goto out_free; 863 goto out_free;
844 } 864 }
845 865
866 slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
867 if (!slots)
868 goto out_free;
869 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
870
846 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 871 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
847 r = -ENOMEM;
848 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
849 GFP_KERNEL);
850 if (!slots)
851 goto out_free;
852 slot = id_to_memslot(slots, mem->slot); 872 slot = id_to_memslot(slots, mem->slot);
853 slot->flags |= KVM_MEMSLOT_INVALID; 873 slot->flags |= KVM_MEMSLOT_INVALID;
854 874
855 old_memslots = install_new_memslots(kvm, slots, NULL); 875 old_memslots = install_new_memslots(kvm, slots);
856 876
857 /* slot was deleted or moved, clear iommu mapping */ 877 /* slot was deleted or moved, clear iommu mapping */
858 kvm_iommu_unmap_pages(kvm, &old); 878 kvm_iommu_unmap_pages(kvm, &old);
@@ -860,10 +880,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
860 * or moved, memslot will be created. 880 * or moved, memslot will be created.
861 * 881 *
862 * validation of sp->gfn happens in: 882 * validation of sp->gfn happens in:
863 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) 883 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
864 * - kvm_is_visible_gfn (mmu_check_roots) 884 * - kvm_is_visible_gfn (mmu_check_roots)
865 */ 885 */
866 kvm_arch_flush_shadow_memslot(kvm, slot); 886 kvm_arch_flush_shadow_memslot(kvm, slot);
887
888 /*
889 * We can re-use the old_memslots from above, the only difference
890 * from the currently installed memslots is the invalid flag. This
891 * will get overwritten by update_memslots anyway.
892 */
867 slots = old_memslots; 893 slots = old_memslots;
868 } 894 }
869 895
@@ -871,31 +897,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
871 if (r) 897 if (r)
872 goto out_slots; 898 goto out_slots;
873 899
874 r = -ENOMEM;
875 /*
876 * We can re-use the old_memslots from above, the only difference
877 * from the currently installed memslots is the invalid flag. This
878 * will get overwritten by update_memslots anyway.
879 */
880 if (!slots) {
881 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
882 GFP_KERNEL);
883 if (!slots)
884 goto out_free;
885 }
886
887 /* actual memory is freed via old in kvm_free_physmem_slot below */ 900 /* actual memory is freed via old in kvm_free_physmem_slot below */
888 if (change == KVM_MR_DELETE) { 901 if (change == KVM_MR_DELETE) {
889 new.dirty_bitmap = NULL; 902 new.dirty_bitmap = NULL;
890 memset(&new.arch, 0, sizeof(new.arch)); 903 memset(&new.arch, 0, sizeof(new.arch));
891 } 904 }
892 905
893 old_memslots = install_new_memslots(kvm, slots, &new); 906 update_memslots(slots, &new);
907 old_memslots = install_new_memslots(kvm, slots);
894 908
895 kvm_arch_commit_memory_region(kvm, mem, &old, change); 909 kvm_arch_commit_memory_region(kvm, mem, &old, change);
896 910
897 kvm_free_physmem_slot(kvm, &old, &new); 911 kvm_free_physmem_slot(kvm, &old, &new);
898 kfree(old_memslots); 912 kvfree(old_memslots);
899 913
900 /* 914 /*
901 * IOMMU mapping: New slots need to be mapped. Old slots need to be 915 * IOMMU mapping: New slots need to be mapped. Old slots need to be
@@ -914,7 +928,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
914 return 0; 928 return 0;
915 929
916out_slots: 930out_slots:
917 kfree(slots); 931 kvfree(slots);
918out_free: 932out_free:
919 kvm_free_physmem_slot(kvm, &new, &old); 933 kvm_free_physmem_slot(kvm, &new, &old);
920out: 934out:
@@ -977,6 +991,88 @@ out:
977} 991}
978EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 992EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
979 993
994#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
995/**
996 * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages
997 * are dirty write protect them for next write.
998 * @kvm: pointer to kvm instance
999 * @log: slot id and address to which we copy the log
1000 * @is_dirty: flag set if any page is dirty
1001 *
1002 * We need to keep it in mind that VCPU threads can write to the bitmap
1003 * concurrently. So, to avoid losing track of dirty pages we keep the
1004 * following order:
1005 *
1006 * 1. Take a snapshot of the bit and clear it if needed.
1007 * 2. Write protect the corresponding page.
1008 * 3. Copy the snapshot to the userspace.
1009 * 4. Upon return caller flushes TLB's if needed.
1010 *
1011 * Between 2 and 4, the guest may write to the page using the remaining TLB
1012 * entry. This is not a problem because the page is reported dirty using
1013 * the snapshot taken before and step 4 ensures that writes done after
1014 * exiting to userspace will be logged for the next call.
1015 *
1016 */
1017int kvm_get_dirty_log_protect(struct kvm *kvm,
1018 struct kvm_dirty_log *log, bool *is_dirty)
1019{
1020 struct kvm_memory_slot *memslot;
1021 int r, i;
1022 unsigned long n;
1023 unsigned long *dirty_bitmap;
1024 unsigned long *dirty_bitmap_buffer;
1025
1026 r = -EINVAL;
1027 if (log->slot >= KVM_USER_MEM_SLOTS)
1028 goto out;
1029
1030 memslot = id_to_memslot(kvm->memslots, log->slot);
1031
1032 dirty_bitmap = memslot->dirty_bitmap;
1033 r = -ENOENT;
1034 if (!dirty_bitmap)
1035 goto out;
1036
1037 n = kvm_dirty_bitmap_bytes(memslot);
1038
1039 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
1040 memset(dirty_bitmap_buffer, 0, n);
1041
1042 spin_lock(&kvm->mmu_lock);
1043 *is_dirty = false;
1044 for (i = 0; i < n / sizeof(long); i++) {
1045 unsigned long mask;
1046 gfn_t offset;
1047
1048 if (!dirty_bitmap[i])
1049 continue;
1050
1051 *is_dirty = true;
1052
1053 mask = xchg(&dirty_bitmap[i], 0);
1054 dirty_bitmap_buffer[i] = mask;
1055
1056 if (mask) {
1057 offset = i * BITS_PER_LONG;
1058 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1059 offset, mask);
1060 }
1061 }
1062
1063 spin_unlock(&kvm->mmu_lock);
1064
1065 r = -EFAULT;
1066 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
1067 goto out;
1068
1069 r = 0;
1070out:
1071 return r;
1072}
1073EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
1074#endif
1075
980bool kvm_largepages_enabled(void) 1076bool kvm_largepages_enabled(void)
981{ 1077{
982 return largepages_enabled; 1078 return largepages_enabled;
@@ -1073,9 +1169,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
1073 * If writable is set to false, the hva returned by this function is only 1169 * If writable is set to false, the hva returned by this function is only
1074 * allowed to be read. 1170 * allowed to be read.
1075 */ 1171 */
1076unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1172unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
1173 gfn_t gfn, bool *writable)
1077{ 1174{
1078 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1079 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); 1175 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
1080 1176
1081 if (!kvm_is_error_hva(hva) && writable) 1177 if (!kvm_is_error_hva(hva) && writable)
@@ -1084,14 +1180,11 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1084 return hva; 1180 return hva;
1085} 1181}
1086 1182
1087static int kvm_read_hva(void *data, void __user *hva, int len) 1183unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1088{ 1184{
1089 return __copy_from_user(data, hva, len); 1185 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1090}
1091 1186
1092static int kvm_read_hva_atomic(void *data, void __user *hva, int len) 1187 return gfn_to_hva_memslot_prot(slot, gfn, writable);
1093{
1094 return __copy_from_user_inatomic(data, hva, len);
1095} 1188}
1096 1189
1097static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, 1190static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
@@ -1168,8 +1261,9 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
1168 addr, write_fault, page); 1261 addr, write_fault, page);
1169 up_read(&current->mm->mmap_sem); 1262 up_read(&current->mm->mmap_sem);
1170 } else 1263 } else
1171 npages = get_user_pages_fast(addr, 1, write_fault, 1264 npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
1172 page); 1265 write_fault, 0, page,
1266 FOLL_TOUCH|FOLL_HWPOISON);
1173 if (npages != 1) 1267 if (npages != 1)
1174 return npages; 1268 return npages;
1175 1269
@@ -1249,7 +1343,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
1249 else if ((vma->vm_flags & VM_PFNMAP)) { 1343 else if ((vma->vm_flags & VM_PFNMAP)) {
1250 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + 1344 pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
1251 vma->vm_pgoff; 1345 vma->vm_pgoff;
1252 BUG_ON(!kvm_is_mmio_pfn(pfn)); 1346 BUG_ON(!kvm_is_reserved_pfn(pfn));
1253 } else { 1347 } else {
1254 if (async && vma_is_valid(vma, write_fault)) 1348 if (async && vma_is_valid(vma, write_fault))
1255 *async = true; 1349 *async = true;
@@ -1355,7 +1449,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn)
1355 if (is_error_noslot_pfn(pfn)) 1449 if (is_error_noslot_pfn(pfn))
1356 return KVM_ERR_PTR_BAD_PAGE; 1450 return KVM_ERR_PTR_BAD_PAGE;
1357 1451
1358 if (kvm_is_mmio_pfn(pfn)) { 1452 if (kvm_is_reserved_pfn(pfn)) {
1359 WARN_ON(1); 1453 WARN_ON(1);
1360 return KVM_ERR_PTR_BAD_PAGE; 1454 return KVM_ERR_PTR_BAD_PAGE;
1361 } 1455 }
@@ -1371,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
1371 1465
1372 return kvm_pfn_to_page(pfn); 1466 return kvm_pfn_to_page(pfn);
1373} 1467}
1374
1375EXPORT_SYMBOL_GPL(gfn_to_page); 1468EXPORT_SYMBOL_GPL(gfn_to_page);
1376 1469
1377void kvm_release_page_clean(struct page *page) 1470void kvm_release_page_clean(struct page *page)
@@ -1384,7 +1477,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
1384 1477
1385void kvm_release_pfn_clean(pfn_t pfn) 1478void kvm_release_pfn_clean(pfn_t pfn)
1386{ 1479{
1387 if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) 1480 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
1388 put_page(pfn_to_page(pfn)); 1481 put_page(pfn_to_page(pfn));
1389} 1482}
1390EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); 1483EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -1405,8 +1498,9 @@ static void kvm_release_pfn_dirty(pfn_t pfn)
1405 1498
1406void kvm_set_pfn_dirty(pfn_t pfn) 1499void kvm_set_pfn_dirty(pfn_t pfn)
1407{ 1500{
1408 if (!kvm_is_mmio_pfn(pfn)) { 1501 if (!kvm_is_reserved_pfn(pfn)) {
1409 struct page *page = pfn_to_page(pfn); 1502 struct page *page = pfn_to_page(pfn);
1503
1410 if (!PageReserved(page)) 1504 if (!PageReserved(page))
1411 SetPageDirty(page); 1505 SetPageDirty(page);
1412 } 1506 }
@@ -1415,14 +1509,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
1415 1509
1416void kvm_set_pfn_accessed(pfn_t pfn) 1510void kvm_set_pfn_accessed(pfn_t pfn)
1417{ 1511{
1418 if (!kvm_is_mmio_pfn(pfn)) 1512 if (!kvm_is_reserved_pfn(pfn))
1419 mark_page_accessed(pfn_to_page(pfn)); 1513 mark_page_accessed(pfn_to_page(pfn));
1420} 1514}
1421EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); 1515EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
1422 1516
1423void kvm_get_pfn(pfn_t pfn) 1517void kvm_get_pfn(pfn_t pfn)
1424{ 1518{
1425 if (!kvm_is_mmio_pfn(pfn)) 1519 if (!kvm_is_reserved_pfn(pfn))
1426 get_page(pfn_to_page(pfn)); 1520 get_page(pfn_to_page(pfn));
1427} 1521}
1428EXPORT_SYMBOL_GPL(kvm_get_pfn); 1522EXPORT_SYMBOL_GPL(kvm_get_pfn);
@@ -1444,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
1444 addr = gfn_to_hva_prot(kvm, gfn, NULL); 1538 addr = gfn_to_hva_prot(kvm, gfn, NULL);
1445 if (kvm_is_error_hva(addr)) 1539 if (kvm_is_error_hva(addr))
1446 return -EFAULT; 1540 return -EFAULT;
1447 r = kvm_read_hva(data, (void __user *)addr + offset, len); 1541 r = __copy_from_user(data, (void __user *)addr + offset, len);
1448 if (r) 1542 if (r)
1449 return -EFAULT; 1543 return -EFAULT;
1450 return 0; 1544 return 0;
@@ -1483,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
1483 if (kvm_is_error_hva(addr)) 1577 if (kvm_is_error_hva(addr))
1484 return -EFAULT; 1578 return -EFAULT;
1485 pagefault_disable(); 1579 pagefault_disable();
1486 r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); 1580 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
1487 pagefault_enable(); 1581 pagefault_enable();
1488 if (r) 1582 if (r)
1489 return -EFAULT; 1583 return -EFAULT;
@@ -1527,6 +1621,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
1527 } 1621 }
1528 return 0; 1622 return 0;
1529} 1623}
1624EXPORT_SYMBOL_GPL(kvm_write_guest);
1530 1625
1531int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, 1626int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1532 gpa_t gpa, unsigned long len) 1627 gpa_t gpa, unsigned long len)
@@ -1542,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
1542 ghc->generation = slots->generation; 1637 ghc->generation = slots->generation;
1543 ghc->len = len; 1638 ghc->len = len;
1544 ghc->memslot = gfn_to_memslot(kvm, start_gfn); 1639 ghc->memslot = gfn_to_memslot(kvm, start_gfn);
1545 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); 1640 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
1546 if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { 1641 if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
1547 ghc->hva += offset; 1642 ghc->hva += offset;
1548 } else { 1643 } else {
1549 /* 1644 /*
@@ -1631,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
1631 int offset = offset_in_page(gpa); 1726 int offset = offset_in_page(gpa);
1632 int ret; 1727 int ret;
1633 1728
1634 while ((seg = next_segment(len, offset)) != 0) { 1729 while ((seg = next_segment(len, offset)) != 0) {
1635 ret = kvm_clear_guest_page(kvm, gfn, offset, seg); 1730 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
1636 if (ret < 0) 1731 if (ret < 0)
1637 return ret; 1732 return ret;
@@ -1663,29 +1758,61 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1663} 1758}
1664EXPORT_SYMBOL_GPL(mark_page_dirty); 1759EXPORT_SYMBOL_GPL(mark_page_dirty);
1665 1760
1761static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
1762{
1763 if (kvm_arch_vcpu_runnable(vcpu)) {
1764 kvm_make_request(KVM_REQ_UNHALT, vcpu);
1765 return -EINTR;
1766 }
1767 if (kvm_cpu_has_pending_timer(vcpu))
1768 return -EINTR;
1769 if (signal_pending(current))
1770 return -EINTR;
1771
1772 return 0;
1773}
1774
1666/* 1775/*
1667 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1776 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1668 */ 1777 */
1669void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1778void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1670{ 1779{
1780 ktime_t start, cur;
1671 DEFINE_WAIT(wait); 1781 DEFINE_WAIT(wait);
1782 bool waited = false;
1783
1784 start = cur = ktime_get();
1785 if (halt_poll_ns) {
1786 ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
1787
1788 do {
1789 /*
1790 * This sets KVM_REQ_UNHALT if an interrupt
1791 * arrives.
1792 */
1793 if (kvm_vcpu_check_block(vcpu) < 0) {
1794 ++vcpu->stat.halt_successful_poll;
1795 goto out;
1796 }
1797 cur = ktime_get();
1798 } while (single_task_running() && ktime_before(cur, stop));
1799 }
1672 1800
1673 for (;;) { 1801 for (;;) {
1674 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1802 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1675 1803
1676 if (kvm_arch_vcpu_runnable(vcpu)) { 1804 if (kvm_vcpu_check_block(vcpu) < 0)
1677 kvm_make_request(KVM_REQ_UNHALT, vcpu);
1678 break;
1679 }
1680 if (kvm_cpu_has_pending_timer(vcpu))
1681 break;
1682 if (signal_pending(current))
1683 break; 1805 break;
1684 1806
1807 waited = true;
1685 schedule(); 1808 schedule();
1686 } 1809 }
1687 1810
1688 finish_wait(&vcpu->wq, &wait); 1811 finish_wait(&vcpu->wq, &wait);
1812 cur = ktime_get();
1813
1814out:
1815 trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
1689} 1816}
1690EXPORT_SYMBOL_GPL(kvm_vcpu_block); 1817EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1691 1818
@@ -1723,14 +1850,10 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
1723 rcu_read_lock(); 1850 rcu_read_lock();
1724 pid = rcu_dereference(target->pid); 1851 pid = rcu_dereference(target->pid);
1725 if (pid) 1852 if (pid)
1726 task = get_pid_task(target->pid, PIDTYPE_PID); 1853 task = get_pid_task(pid, PIDTYPE_PID);
1727 rcu_read_unlock(); 1854 rcu_read_unlock();
1728 if (!task) 1855 if (!task)
1729 return ret; 1856 return ret;
1730 if (task->flags & PF_VCPU) {
1731 put_task_struct(task);
1732 return ret;
1733 }
1734 ret = yield_to(task, 1); 1857 ret = yield_to(task, 1);
1735 put_task_struct(task); 1858 put_task_struct(task);
1736 1859
@@ -1766,8 +1889,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1766 bool eligible; 1889 bool eligible;
1767 1890
1768 eligible = !vcpu->spin_loop.in_spin_loop || 1891 eligible = !vcpu->spin_loop.in_spin_loop ||
1769 (vcpu->spin_loop.in_spin_loop && 1892 vcpu->spin_loop.dy_eligible;
1770 vcpu->spin_loop.dy_eligible);
1771 1893
1772 if (vcpu->spin_loop.in_spin_loop) 1894 if (vcpu->spin_loop.in_spin_loop)
1773 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 1895 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@ -1873,7 +1995,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
1873static struct file_operations kvm_vcpu_fops = { 1995static struct file_operations kvm_vcpu_fops = {
1874 .release = kvm_vcpu_release, 1996 .release = kvm_vcpu_release,
1875 .unlocked_ioctl = kvm_vcpu_ioctl, 1997 .unlocked_ioctl = kvm_vcpu_ioctl,
1876#ifdef CONFIG_COMPAT 1998#ifdef CONFIG_KVM_COMPAT
1877 .compat_ioctl = kvm_vcpu_compat_ioctl, 1999 .compat_ioctl = kvm_vcpu_compat_ioctl,
1878#endif 2000#endif
1879 .mmap = kvm_vcpu_mmap, 2001 .mmap = kvm_vcpu_mmap,
@@ -1973,12 +2095,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
1973 if (vcpu->kvm->mm != current->mm) 2095 if (vcpu->kvm->mm != current->mm)
1974 return -EIO; 2096 return -EIO;
1975 2097
2098 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2099 return -EINVAL;
2100
1976#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) 2101#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
1977 /* 2102 /*
1978 * Special cases: vcpu ioctls that are asynchronous to vcpu execution, 2103 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
1979 * so vcpu_load() would break it. 2104 * so vcpu_load() would break it.
1980 */ 2105 */
1981 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) 2106 if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
1982 return kvm_arch_vcpu_ioctl(filp, ioctl, arg); 2107 return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
1983#endif 2108#endif
1984 2109
@@ -1991,6 +2116,16 @@ static long kvm_vcpu_ioctl(struct file *filp,
1991 r = -EINVAL; 2116 r = -EINVAL;
1992 if (arg) 2117 if (arg)
1993 goto out; 2118 goto out;
2119 if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
2120 /* The thread running this VCPU changed. */
2121 struct pid *oldpid = vcpu->pid;
2122 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
2123
2124 rcu_assign_pointer(vcpu->pid, newpid);
2125 if (oldpid)
2126 synchronize_rcu();
2127 put_pid(oldpid);
2128 }
1994 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); 2129 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
1995 trace_kvm_userspace_exit(vcpu->run->exit_reason, r); 2130 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
1996 break; 2131 break;
@@ -2056,7 +2191,7 @@ out_free1:
2056 if (r) 2191 if (r)
2057 goto out; 2192 goto out;
2058 r = -EFAULT; 2193 r = -EFAULT;
2059 if (copy_to_user(argp, &mp_state, sizeof mp_state)) 2194 if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
2060 goto out; 2195 goto out;
2061 r = 0; 2196 r = 0;
2062 break; 2197 break;
@@ -2065,7 +2200,7 @@ out_free1:
2065 struct kvm_mp_state mp_state; 2200 struct kvm_mp_state mp_state;
2066 2201
2067 r = -EFAULT; 2202 r = -EFAULT;
2068 if (copy_from_user(&mp_state, argp, sizeof mp_state)) 2203 if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
2069 goto out; 2204 goto out;
2070 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 2205 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
2071 break; 2206 break;
@@ -2074,13 +2209,13 @@ out_free1:
2074 struct kvm_translation tr; 2209 struct kvm_translation tr;
2075 2210
2076 r = -EFAULT; 2211 r = -EFAULT;
2077 if (copy_from_user(&tr, argp, sizeof tr)) 2212 if (copy_from_user(&tr, argp, sizeof(tr)))
2078 goto out; 2213 goto out;
2079 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 2214 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
2080 if (r) 2215 if (r)
2081 goto out; 2216 goto out;
2082 r = -EFAULT; 2217 r = -EFAULT;
2083 if (copy_to_user(argp, &tr, sizeof tr)) 2218 if (copy_to_user(argp, &tr, sizeof(tr)))
2084 goto out; 2219 goto out;
2085 r = 0; 2220 r = 0;
2086 break; 2221 break;
@@ -2089,7 +2224,7 @@ out_free1:
2089 struct kvm_guest_debug dbg; 2224 struct kvm_guest_debug dbg;
2090 2225
2091 r = -EFAULT; 2226 r = -EFAULT;
2092 if (copy_from_user(&dbg, argp, sizeof dbg)) 2227 if (copy_from_user(&dbg, argp, sizeof(dbg)))
2093 goto out; 2228 goto out;
2094 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 2229 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
2095 break; 2230 break;
@@ -2103,14 +2238,14 @@ out_free1:
2103 if (argp) { 2238 if (argp) {
2104 r = -EFAULT; 2239 r = -EFAULT;
2105 if (copy_from_user(&kvm_sigmask, argp, 2240 if (copy_from_user(&kvm_sigmask, argp,
2106 sizeof kvm_sigmask)) 2241 sizeof(kvm_sigmask)))
2107 goto out; 2242 goto out;
2108 r = -EINVAL; 2243 r = -EINVAL;
2109 if (kvm_sigmask.len != sizeof sigset) 2244 if (kvm_sigmask.len != sizeof(sigset))
2110 goto out; 2245 goto out;
2111 r = -EFAULT; 2246 r = -EFAULT;
2112 if (copy_from_user(&sigset, sigmask_arg->sigset, 2247 if (copy_from_user(&sigset, sigmask_arg->sigset,
2113 sizeof sigset)) 2248 sizeof(sigset)))
2114 goto out; 2249 goto out;
2115 p = &sigset; 2250 p = &sigset;
2116 } 2251 }
@@ -2151,7 +2286,7 @@ out:
2151 return r; 2286 return r;
2152} 2287}
2153 2288
2154#ifdef CONFIG_COMPAT 2289#ifdef CONFIG_KVM_COMPAT
2155static long kvm_vcpu_compat_ioctl(struct file *filp, 2290static long kvm_vcpu_compat_ioctl(struct file *filp,
2156 unsigned int ioctl, unsigned long arg) 2291 unsigned int ioctl, unsigned long arg)
2157{ 2292{
@@ -2172,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
2172 if (argp) { 2307 if (argp) {
2173 r = -EFAULT; 2308 r = -EFAULT;
2174 if (copy_from_user(&kvm_sigmask, argp, 2309 if (copy_from_user(&kvm_sigmask, argp,
2175 sizeof kvm_sigmask)) 2310 sizeof(kvm_sigmask)))
2176 goto out; 2311 goto out;
2177 r = -EINVAL; 2312 r = -EINVAL;
2178 if (kvm_sigmask.len != sizeof csigset) 2313 if (kvm_sigmask.len != sizeof(csigset))
2179 goto out; 2314 goto out;
2180 r = -EFAULT; 2315 r = -EFAULT;
2181 if (copy_from_user(&csigset, sigmask_arg->sigset, 2316 if (copy_from_user(&csigset, sigmask_arg->sigset,
2182 sizeof csigset)) 2317 sizeof(csigset)))
2183 goto out; 2318 goto out;
2184 sigset_from_compat(&sigset, &csigset); 2319 sigset_from_compat(&sigset, &csigset);
2185 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); 2320 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2243,7 +2378,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
2243 2378
2244static const struct file_operations kvm_device_fops = { 2379static const struct file_operations kvm_device_fops = {
2245 .unlocked_ioctl = kvm_device_ioctl, 2380 .unlocked_ioctl = kvm_device_ioctl,
2246#ifdef CONFIG_COMPAT 2381#ifdef CONFIG_KVM_COMPAT
2247 .compat_ioctl = kvm_device_ioctl, 2382 .compat_ioctl = kvm_device_ioctl,
2248#endif 2383#endif
2249 .release = kvm_device_release, 2384 .release = kvm_device_release,
@@ -2257,6 +2392,35 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
2257 return filp->private_data; 2392 return filp->private_data;
2258} 2393}
2259 2394
2395static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2396#ifdef CONFIG_KVM_MPIC
2397 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2398 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2399#endif
2400
2401#ifdef CONFIG_KVM_XICS
2402 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2403#endif
2404};
2405
2406int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2407{
2408 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2409 return -ENOSPC;
2410
2411 if (kvm_device_ops_table[type] != NULL)
2412 return -EEXIST;
2413
2414 kvm_device_ops_table[type] = ops;
2415 return 0;
2416}
2417
2418void kvm_unregister_device_ops(u32 type)
2419{
2420 if (kvm_device_ops_table[type] != NULL)
2421 kvm_device_ops_table[type] = NULL;
2422}
2423
2260static int kvm_ioctl_create_device(struct kvm *kvm, 2424static int kvm_ioctl_create_device(struct kvm *kvm,
2261 struct kvm_create_device *cd) 2425 struct kvm_create_device *cd)
2262{ 2426{
@@ -2265,36 +2429,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2265 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 2429 bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
2266 int ret; 2430 int ret;
2267 2431
2268 switch (cd->type) { 2432 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
2269#ifdef CONFIG_KVM_MPIC 2433 return -ENODEV;
2270 case KVM_DEV_TYPE_FSL_MPIC_20: 2434
2271 case KVM_DEV_TYPE_FSL_MPIC_42: 2435 ops = kvm_device_ops_table[cd->type];
2272 ops = &kvm_mpic_ops; 2436 if (ops == NULL)
2273 break;
2274#endif
2275#ifdef CONFIG_KVM_XICS
2276 case KVM_DEV_TYPE_XICS:
2277 ops = &kvm_xics_ops;
2278 break;
2279#endif
2280#ifdef CONFIG_KVM_VFIO
2281 case KVM_DEV_TYPE_VFIO:
2282 ops = &kvm_vfio_ops;
2283 break;
2284#endif
2285#ifdef CONFIG_KVM_ARM_VGIC
2286 case KVM_DEV_TYPE_ARM_VGIC_V2:
2287 ops = &kvm_arm_vgic_v2_ops;
2288 break;
2289#endif
2290#ifdef CONFIG_S390
2291 case KVM_DEV_TYPE_FLIC:
2292 ops = &kvm_flic_ops;
2293 break;
2294#endif
2295 default:
2296 return -ENODEV; 2437 return -ENODEV;
2297 }
2298 2438
2299 if (test) 2439 if (test)
2300 return 0; 2440 return 0;
@@ -2324,6 +2464,35 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2324 return 0; 2464 return 0;
2325} 2465}
2326 2466
2467static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2468{
2469 switch (arg) {
2470 case KVM_CAP_USER_MEMORY:
2471 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
2472 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
2473#ifdef CONFIG_KVM_APIC_ARCHITECTURE
2474 case KVM_CAP_SET_BOOT_CPU_ID:
2475#endif
2476 case KVM_CAP_INTERNAL_ERROR_DATA:
2477#ifdef CONFIG_HAVE_KVM_MSI
2478 case KVM_CAP_SIGNAL_MSI:
2479#endif
2480#ifdef CONFIG_HAVE_KVM_IRQFD
2481 case KVM_CAP_IRQFD:
2482 case KVM_CAP_IRQFD_RESAMPLE:
2483#endif
2484 case KVM_CAP_CHECK_EXTENSION_VM:
2485 return 1;
2486#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
2487 case KVM_CAP_IRQ_ROUTING:
2488 return KVM_MAX_IRQ_ROUTES;
2489#endif
2490 default:
2491 break;
2492 }
2493 return kvm_vm_ioctl_check_extension(kvm, arg);
2494}
2495
2327static long kvm_vm_ioctl(struct file *filp, 2496static long kvm_vm_ioctl(struct file *filp,
2328 unsigned int ioctl, unsigned long arg) 2497 unsigned int ioctl, unsigned long arg)
2329{ 2498{
@@ -2342,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp,
2342 2511
2343 r = -EFAULT; 2512 r = -EFAULT;
2344 if (copy_from_user(&kvm_userspace_mem, argp, 2513 if (copy_from_user(&kvm_userspace_mem, argp,
2345 sizeof kvm_userspace_mem)) 2514 sizeof(kvm_userspace_mem)))
2346 goto out; 2515 goto out;
2347 2516
2348 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); 2517 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2352,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp,
2352 struct kvm_dirty_log log; 2521 struct kvm_dirty_log log;
2353 2522
2354 r = -EFAULT; 2523 r = -EFAULT;
2355 if (copy_from_user(&log, argp, sizeof log)) 2524 if (copy_from_user(&log, argp, sizeof(log)))
2356 goto out; 2525 goto out;
2357 r = kvm_vm_ioctl_get_dirty_log(kvm, &log); 2526 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
2358 break; 2527 break;
@@ -2360,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp,
2360#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 2529#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
2361 case KVM_REGISTER_COALESCED_MMIO: { 2530 case KVM_REGISTER_COALESCED_MMIO: {
2362 struct kvm_coalesced_mmio_zone zone; 2531 struct kvm_coalesced_mmio_zone zone;
2532
2363 r = -EFAULT; 2533 r = -EFAULT;
2364 if (copy_from_user(&zone, argp, sizeof zone)) 2534 if (copy_from_user(&zone, argp, sizeof(zone)))
2365 goto out; 2535 goto out;
2366 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); 2536 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
2367 break; 2537 break;
2368 } 2538 }
2369 case KVM_UNREGISTER_COALESCED_MMIO: { 2539 case KVM_UNREGISTER_COALESCED_MMIO: {
2370 struct kvm_coalesced_mmio_zone zone; 2540 struct kvm_coalesced_mmio_zone zone;
2541
2371 r = -EFAULT; 2542 r = -EFAULT;
2372 if (copy_from_user(&zone, argp, sizeof zone)) 2543 if (copy_from_user(&zone, argp, sizeof(zone)))
2373 goto out; 2544 goto out;
2374 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); 2545 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
2375 break; 2546 break;
@@ -2379,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp,
2379 struct kvm_irqfd data; 2550 struct kvm_irqfd data;
2380 2551
2381 r = -EFAULT; 2552 r = -EFAULT;
2382 if (copy_from_user(&data, argp, sizeof data)) 2553 if (copy_from_user(&data, argp, sizeof(data)))
2383 goto out; 2554 goto out;
2384 r = kvm_irqfd(kvm, &data); 2555 r = kvm_irqfd(kvm, &data);
2385 break; 2556 break;
@@ -2388,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp,
2388 struct kvm_ioeventfd data; 2559 struct kvm_ioeventfd data;
2389 2560
2390 r = -EFAULT; 2561 r = -EFAULT;
2391 if (copy_from_user(&data, argp, sizeof data)) 2562 if (copy_from_user(&data, argp, sizeof(data)))
2392 goto out; 2563 goto out;
2393 r = kvm_ioeventfd(kvm, &data); 2564 r = kvm_ioeventfd(kvm, &data);
2394 break; 2565 break;
@@ -2409,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp,
2409 struct kvm_msi msi; 2580 struct kvm_msi msi;
2410 2581
2411 r = -EFAULT; 2582 r = -EFAULT;
2412 if (copy_from_user(&msi, argp, sizeof msi)) 2583 if (copy_from_user(&msi, argp, sizeof(msi)))
2413 goto out; 2584 goto out;
2414 r = kvm_send_userspace_msi(kvm, &msi); 2585 r = kvm_send_userspace_msi(kvm, &msi);
2415 break; 2586 break;
@@ -2421,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp,
2421 struct kvm_irq_level irq_event; 2592 struct kvm_irq_level irq_event;
2422 2593
2423 r = -EFAULT; 2594 r = -EFAULT;
2424 if (copy_from_user(&irq_event, argp, sizeof irq_event)) 2595 if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
2425 goto out; 2596 goto out;
2426 2597
2427 r = kvm_vm_ioctl_irq_line(kvm, &irq_event, 2598 r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2431,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp,
2431 2602
2432 r = -EFAULT; 2603 r = -EFAULT;
2433 if (ioctl == KVM_IRQ_LINE_STATUS) { 2604 if (ioctl == KVM_IRQ_LINE_STATUS) {
2434 if (copy_to_user(argp, &irq_event, sizeof irq_event)) 2605 if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
2435 goto out; 2606 goto out;
2436 } 2607 }
2437 2608
@@ -2464,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp,
2464 goto out_free_irq_routing; 2635 goto out_free_irq_routing;
2465 r = kvm_set_irq_routing(kvm, entries, routing.nr, 2636 r = kvm_set_irq_routing(kvm, entries, routing.nr,
2466 routing.flags); 2637 routing.flags);
2467 out_free_irq_routing: 2638out_free_irq_routing:
2468 vfree(entries); 2639 vfree(entries);
2469 break; 2640 break;
2470 } 2641 }
@@ -2487,16 +2658,17 @@ static long kvm_vm_ioctl(struct file *filp,
2487 r = 0; 2658 r = 0;
2488 break; 2659 break;
2489 } 2660 }
2661 case KVM_CHECK_EXTENSION:
2662 r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
2663 break;
2490 default: 2664 default:
2491 r = kvm_arch_vm_ioctl(filp, ioctl, arg); 2665 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
2492 if (r == -ENOTTY)
2493 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
2494 } 2666 }
2495out: 2667out:
2496 return r; 2668 return r;
2497} 2669}
2498 2670
2499#ifdef CONFIG_COMPAT 2671#ifdef CONFIG_KVM_COMPAT
2500struct compat_kvm_dirty_log { 2672struct compat_kvm_dirty_log {
2501 __u32 slot; 2673 __u32 slot;
2502 __u32 padding1; 2674 __u32 padding1;
@@ -2543,7 +2715,7 @@ out:
2543static struct file_operations kvm_vm_fops = { 2715static struct file_operations kvm_vm_fops = {
2544 .release = kvm_vm_release, 2716 .release = kvm_vm_release,
2545 .unlocked_ioctl = kvm_vm_ioctl, 2717 .unlocked_ioctl = kvm_vm_ioctl,
2546#ifdef CONFIG_COMPAT 2718#ifdef CONFIG_KVM_COMPAT
2547 .compat_ioctl = kvm_vm_compat_ioctl, 2719 .compat_ioctl = kvm_vm_compat_ioctl,
2548#endif 2720#endif
2549 .llseek = noop_llseek, 2721 .llseek = noop_llseek,
@@ -2571,33 +2743,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
2571 return r; 2743 return r;
2572} 2744}
2573 2745
2574static long kvm_dev_ioctl_check_extension_generic(long arg)
2575{
2576 switch (arg) {
2577 case KVM_CAP_USER_MEMORY:
2578 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
2579 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
2580#ifdef CONFIG_KVM_APIC_ARCHITECTURE
2581 case KVM_CAP_SET_BOOT_CPU_ID:
2582#endif
2583 case KVM_CAP_INTERNAL_ERROR_DATA:
2584#ifdef CONFIG_HAVE_KVM_MSI
2585 case KVM_CAP_SIGNAL_MSI:
2586#endif
2587#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
2588 case KVM_CAP_IRQFD_RESAMPLE:
2589#endif
2590 return 1;
2591#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
2592 case KVM_CAP_IRQ_ROUTING:
2593 return KVM_MAX_IRQ_ROUTES;
2594#endif
2595 default:
2596 break;
2597 }
2598 return kvm_dev_ioctl_check_extension(arg);
2599}
2600
2601static long kvm_dev_ioctl(struct file *filp, 2746static long kvm_dev_ioctl(struct file *filp,
2602 unsigned int ioctl, unsigned long arg) 2747 unsigned int ioctl, unsigned long arg)
2603{ 2748{
@@ -2605,7 +2750,6 @@ static long kvm_dev_ioctl(struct file *filp,
2605 2750
2606 switch (ioctl) { 2751 switch (ioctl) {
2607 case KVM_GET_API_VERSION: 2752 case KVM_GET_API_VERSION:
2608 r = -EINVAL;
2609 if (arg) 2753 if (arg)
2610 goto out; 2754 goto out;
2611 r = KVM_API_VERSION; 2755 r = KVM_API_VERSION;
@@ -2614,10 +2758,9 @@ static long kvm_dev_ioctl(struct file *filp,
2614 r = kvm_dev_ioctl_create_vm(arg); 2758 r = kvm_dev_ioctl_create_vm(arg);
2615 break; 2759 break;
2616 case KVM_CHECK_EXTENSION: 2760 case KVM_CHECK_EXTENSION:
2617 r = kvm_dev_ioctl_check_extension_generic(arg); 2761 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
2618 break; 2762 break;
2619 case KVM_GET_VCPU_MMAP_SIZE: 2763 case KVM_GET_VCPU_MMAP_SIZE:
2620 r = -EINVAL;
2621 if (arg) 2764 if (arg)
2622 goto out; 2765 goto out;
2623 r = PAGE_SIZE; /* struct kvm_run */ 2766 r = PAGE_SIZE; /* struct kvm_run */
@@ -2662,13 +2805,12 @@ static void hardware_enable_nolock(void *junk)
2662 2805
2663 cpumask_set_cpu(cpu, cpus_hardware_enabled); 2806 cpumask_set_cpu(cpu, cpus_hardware_enabled);
2664 2807
2665 r = kvm_arch_hardware_enable(NULL); 2808 r = kvm_arch_hardware_enable();
2666 2809
2667 if (r) { 2810 if (r) {
2668 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2811 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
2669 atomic_inc(&hardware_enable_failed); 2812 atomic_inc(&hardware_enable_failed);
2670 printk(KERN_INFO "kvm: enabling virtualization on " 2813 pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
2671 "CPU%d failed\n", cpu);
2672 } 2814 }
2673} 2815}
2674 2816
@@ -2687,7 +2829,7 @@ static void hardware_disable_nolock(void *junk)
2687 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 2829 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
2688 return; 2830 return;
2689 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2831 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
2690 kvm_arch_hardware_disable(NULL); 2832 kvm_arch_hardware_disable();
2691} 2833}
2692 2834
2693static void hardware_disable(void) 2835static void hardware_disable(void)
@@ -2744,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
2744 val &= ~CPU_TASKS_FROZEN; 2886 val &= ~CPU_TASKS_FROZEN;
2745 switch (val) { 2887 switch (val) {
2746 case CPU_DYING: 2888 case CPU_DYING:
2747 printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", 2889 pr_info("kvm: disabling virtualization on CPU%d\n",
2748 cpu); 2890 cpu);
2749 hardware_disable(); 2891 hardware_disable();
2750 break; 2892 break;
2751 case CPU_STARTING: 2893 case CPU_STARTING:
2752 printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", 2894 pr_info("kvm: enabling virtualization on CPU%d\n",
2753 cpu); 2895 cpu);
2754 hardware_enable(); 2896 hardware_enable();
2755 break; 2897 break;
@@ -2766,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
2766 * 2908 *
2767 * And Intel TXT required VMX off for all cpu when system shutdown. 2909 * And Intel TXT required VMX off for all cpu when system shutdown.
2768 */ 2910 */
2769 printk(KERN_INFO "kvm: exiting hardware virtualization\n"); 2911 pr_info("kvm: exiting hardware virtualization\n");
2770 kvm_rebooting = true; 2912 kvm_rebooting = true;
2771 on_each_cpu(hardware_disable_nolock, NULL, 1); 2913 on_each_cpu(hardware_disable_nolock, NULL, 1);
2772 return NOTIFY_OK; 2914 return NOTIFY_OK;
@@ -2790,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
2790} 2932}
2791 2933
2792static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, 2934static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
2793 const struct kvm_io_range *r2) 2935 const struct kvm_io_range *r2)
2794{ 2936{
2795 if (r1->addr < r2->addr) 2937 if (r1->addr < r2->addr)
2796 return -1; 2938 return -1;
@@ -2843,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
2843 return off; 2985 return off;
2844} 2986}
2845 2987
2846static int __kvm_io_bus_write(struct kvm_io_bus *bus, 2988static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
2847 struct kvm_io_range *range, const void *val) 2989 struct kvm_io_range *range, const void *val)
2848{ 2990{
2849 int idx; 2991 int idx;
@@ -2854,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
2854 2996
2855 while (idx < bus->dev_count && 2997 while (idx < bus->dev_count &&
2856 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { 2998 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
2857 if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, 2999 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
2858 range->len, val)) 3000 range->len, val))
2859 return idx; 3001 return idx;
2860 idx++; 3002 idx++;
@@ -2864,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
2864} 3006}
2865 3007
2866/* kvm_io_bus_write - called under kvm->slots_lock */ 3008/* kvm_io_bus_write - called under kvm->slots_lock */
2867int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 3009int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
2868 int len, const void *val) 3010 int len, const void *val)
2869{ 3011{
2870 struct kvm_io_bus *bus; 3012 struct kvm_io_bus *bus;
@@ -2876,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2876 .len = len, 3018 .len = len,
2877 }; 3019 };
2878 3020
2879 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 3021 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
2880 r = __kvm_io_bus_write(bus, &range, val); 3022 r = __kvm_io_bus_write(vcpu, bus, &range, val);
2881 return r < 0 ? r : 0; 3023 return r < 0 ? r : 0;
2882} 3024}
2883 3025
2884/* kvm_io_bus_write_cookie - called under kvm->slots_lock */ 3026/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
2885int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 3027int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
2886 int len, const void *val, long cookie) 3028 gpa_t addr, int len, const void *val, long cookie)
2887{ 3029{
2888 struct kvm_io_bus *bus; 3030 struct kvm_io_bus *bus;
2889 struct kvm_io_range range; 3031 struct kvm_io_range range;
@@ -2893,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2893 .len = len, 3035 .len = len,
2894 }; 3036 };
2895 3037
2896 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 3038 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
2897 3039
2898 /* First try the device referenced by cookie. */ 3040 /* First try the device referenced by cookie. */
2899 if ((cookie >= 0) && (cookie < bus->dev_count) && 3041 if ((cookie >= 0) && (cookie < bus->dev_count) &&
2900 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) 3042 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
2901 if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, 3043 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
2902 val)) 3044 val))
2903 return cookie; 3045 return cookie;
2904 3046
@@ -2906,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2906 * cookie contained garbage; fall back to search and return the 3048 * cookie contained garbage; fall back to search and return the
2907 * correct cookie value. 3049 * correct cookie value.
2908 */ 3050 */
2909 return __kvm_io_bus_write(bus, &range, val); 3051 return __kvm_io_bus_write(vcpu, bus, &range, val);
2910} 3052}
2911 3053
2912static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, 3054static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
2913 void *val) 3055 struct kvm_io_range *range, void *val)
2914{ 3056{
2915 int idx; 3057 int idx;
2916 3058
@@ -2920,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
2920 3062
2921 while (idx < bus->dev_count && 3063 while (idx < bus->dev_count &&
2922 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { 3064 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
2923 if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, 3065 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
2924 range->len, val)) 3066 range->len, val))
2925 return idx; 3067 return idx;
2926 idx++; 3068 idx++;
@@ -2931,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
2931EXPORT_SYMBOL_GPL(kvm_io_bus_write); 3073EXPORT_SYMBOL_GPL(kvm_io_bus_write);
2932 3074
2933/* kvm_io_bus_read - called under kvm->slots_lock */ 3075/* kvm_io_bus_read - called under kvm->slots_lock */
2934int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, 3076int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
2935 int len, void *val) 3077 int len, void *val)
2936{ 3078{
2937 struct kvm_io_bus *bus; 3079 struct kvm_io_bus *bus;
@@ -2943,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
2943 .len = len, 3085 .len = len,
2944 }; 3086 };
2945 3087
2946 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); 3088 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
2947 r = __kvm_io_bus_read(bus, &range, val); 3089 r = __kvm_io_bus_read(vcpu, bus, &range, val);
2948 return r < 0 ? r : 0; 3090 return r < 0 ? r : 0;
2949} 3091}
2950 3092
@@ -3114,9 +3256,12 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
3114static void kvm_sched_in(struct preempt_notifier *pn, int cpu) 3256static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3115{ 3257{
3116 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); 3258 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
3259
3117 if (vcpu->preempted) 3260 if (vcpu->preempted)
3118 vcpu->preempted = false; 3261 vcpu->preempted = false;
3119 3262
3263 kvm_arch_sched_in(vcpu, cpu);
3264
3120 kvm_arch_vcpu_load(vcpu, cpu); 3265 kvm_arch_vcpu_load(vcpu, cpu);
3121} 3266}
3122 3267
@@ -3193,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3193 3338
3194 r = misc_register(&kvm_dev); 3339 r = misc_register(&kvm_dev);
3195 if (r) { 3340 if (r) {
3196 printk(KERN_ERR "kvm: misc device register failed\n"); 3341 pr_err("kvm: misc device register failed\n");
3197 goto out_unreg; 3342 goto out_unreg;
3198 } 3343 }
3199 3344
@@ -3204,10 +3349,13 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3204 3349
3205 r = kvm_init_debug(); 3350 r = kvm_init_debug();
3206 if (r) { 3351 if (r) {
3207 printk(KERN_ERR "kvm: create debugfs files failed\n"); 3352 pr_err("kvm: create debugfs files failed\n");
3208 goto out_undebugfs; 3353 goto out_undebugfs;
3209 } 3354 }
3210 3355
3356 r = kvm_vfio_ops_init();
3357 WARN_ON(r);
3358
3211 return 0; 3359 return 0;
3212 3360
3213out_undebugfs: 3361out_undebugfs:
@@ -3248,5 +3396,6 @@ void kvm_exit(void)
3248 kvm_arch_exit(); 3396 kvm_arch_exit();
3249 kvm_irqfd_exit(); 3397 kvm_irqfd_exit();
3250 free_cpumask_var(cpus_hardware_enabled); 3398 free_cpumask_var(cpus_hardware_enabled);
3399 kvm_vfio_ops_exit();
3251} 3400}
3252EXPORT_SYMBOL_GPL(kvm_exit); 3401EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..620e37f741b8 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/vfio.h> 20#include <linux/vfio.h>
21#include "vfio.h"
21 22
22struct kvm_vfio_group { 23struct kvm_vfio_group {
23 struct list_head node; 24 struct list_head node;
@@ -246,6 +247,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
246 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 247 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
247} 248}
248 249
250static int kvm_vfio_create(struct kvm_device *dev, u32 type);
251
252static struct kvm_device_ops kvm_vfio_ops = {
253 .name = "kvm-vfio",
254 .create = kvm_vfio_create,
255 .destroy = kvm_vfio_destroy,
256 .set_attr = kvm_vfio_set_attr,
257 .has_attr = kvm_vfio_has_attr,
258};
259
249static int kvm_vfio_create(struct kvm_device *dev, u32 type) 260static int kvm_vfio_create(struct kvm_device *dev, u32 type)
250{ 261{
251 struct kvm_device *tmp; 262 struct kvm_device *tmp;
@@ -268,10 +279,12 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
268 return 0; 279 return 0;
269} 280}
270 281
271struct kvm_device_ops kvm_vfio_ops = { 282int kvm_vfio_ops_init(void)
272 .name = "kvm-vfio", 283{
273 .create = kvm_vfio_create, 284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
274 .destroy = kvm_vfio_destroy, 285}
275 .set_attr = kvm_vfio_set_attr, 286
276 .has_attr = kvm_vfio_has_attr, 287void kvm_vfio_ops_exit(void)
277}; 288{
289 kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
290}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..ab88c7dc0514
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,17 @@
1#ifndef __KVM_VFIO_H
2#define __KVM_VFIO_H
3
4#ifdef CONFIG_KVM_VFIO
5int kvm_vfio_ops_init(void);
6void kvm_vfio_ops_exit(void);
7#else
8static inline int kvm_vfio_ops_init(void)
9{
10 return 0;
11}
12static inline void kvm_vfio_ops_exit(void)
13{
14}
15#endif
16
17#endif