summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/vgic.c744
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c4
-rw-r--r--virt/kvm/ioapic.c46
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/kvm_main.c192
-rw-r--r--virt/kvm/vfio.c22
-rw-r--r--virt/kvm/vfio.h13
8 files changed, 779 insertions, 248 deletions
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..862967852d5a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
36 * How the whole thing works (courtesy of Christoffer Dall): 36 * How the whole thing works (courtesy of Christoffer Dall):
37 * 37 *
38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if 38 * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
39 * something is pending 39 * something is pending on the CPU interface.
40 * - VGIC pending interrupts are stored on the vgic.irq_state vgic 40 * - Interrupts that are pending on the distributor are stored on the
41 * bitmap (this bitmap is updated by both user land ioctls and guest 41 * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
42 * mmio ops, and other in-kernel peripherals such as the 42 * ioctls and guest mmio ops, and other in-kernel peripherals such as the
43 * arch. timers) and indicate the 'wire' state. 43 * arch. timers).
44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is 44 * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
45 * recalculated 45 * recalculated
46 * - To calculate the oracle, we need info for each cpu from 46 * - To calculate the oracle, we need info for each cpu from
47 * compute_pending_for_cpu, which considers: 47 * compute_pending_for_cpu, which considers:
48 * - PPI: dist->irq_state & dist->irq_enable 48 * - PPI: dist->irq_pending & dist->irq_enable
49 * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target 49 * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
50 * - irq_spi_target is a 'formatted' version of the GICD_ICFGR 50 * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
51 * registers, stored on each vcpu. We only keep one bit of 51 * registers, stored on each vcpu. We only keep one bit of
52 * information per interrupt, making sure that only one vcpu can 52 * information per interrupt, making sure that only one vcpu can
53 * accept the interrupt. 53 * accept the interrupt.
54 * - If any of the above state changes, we must recalculate the oracle.
54 * - The same is true when injecting an interrupt, except that we only 55 * - The same is true when injecting an interrupt, except that we only
55 * consider a single interrupt at a time. The irq_spi_cpu array 56 * consider a single interrupt at a time. The irq_spi_cpu array
56 * contains the target CPU for each SPI. 57 * contains the target CPU for each SPI.
@@ -60,13 +61,18 @@
60 * the 'line' again. This is achieved as such: 61 * the 'line' again. This is achieved as such:
61 * 62 *
62 * - When a level interrupt is moved onto a vcpu, the corresponding 63 * - When a level interrupt is moved onto a vcpu, the corresponding
63 * bit in irq_active is set. As long as this bit is set, the line 64 * bit in irq_queued is set. As long as this bit is set, the line
64 * will be ignored for further interrupts. The interrupt is injected 65 * will be ignored for further interrupts. The interrupt is injected
65 * into the vcpu with the GICH_LR_EOI bit set (generate a 66 * into the vcpu with the GICH_LR_EOI bit set (generate a
66 * maintenance interrupt on EOI). 67 * maintenance interrupt on EOI).
67 * - When the interrupt is EOIed, the maintenance interrupt fires, 68 * - When the interrupt is EOIed, the maintenance interrupt fires,
68 * and clears the corresponding bit in irq_active. This allow the 69 * and clears the corresponding bit in irq_queued. This allows the
69 * interrupt line to be sampled again. 70 * interrupt line to be sampled again.
71 * - Note that level-triggered interrupts can also be set to pending from
72 * writes to GICD_ISPENDRn and lowering the external input line does not
73 * cause the interrupt to become inactive in such a situation.
74 * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
75 * inactive as long as the external input line is held high.
70 */ 76 */
71 77
72#define VGIC_ADDR_UNDEF (-1) 78#define VGIC_ADDR_UNDEF (-1)
@@ -89,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
89static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); 95static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
90static void vgic_update_state(struct kvm *kvm); 96static void vgic_update_state(struct kvm *kvm);
91static void vgic_kick_vcpus(struct kvm *kvm); 97static void vgic_kick_vcpus(struct kvm *kvm);
98static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
92static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); 99static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
93static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); 100static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
94static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); 101static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -99,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
99static const struct vgic_params *vgic; 106static const struct vgic_params *vgic;
100 107
101/* 108/*
102 * struct vgic_bitmap contains unions that provide two views of 109 * struct vgic_bitmap contains a bitmap made of unsigned longs, but
103 * the same data. In one case it is an array of registers of 110 * extracts u32s out of them.
104 * u32's, and in the other case it is a bitmap of unsigned
105 * longs.
106 * 111 *
107 * This does not work on 64-bit BE systems, because the bitmap access 112 * This does not work on 64-bit BE systems, because the bitmap access
108 * will store two consecutive 32-bit words with the higher-addressed 113 * will store two consecutive 32-bit words with the higher-addressed
@@ -118,23 +123,45 @@ static const struct vgic_params *vgic;
118#define REG_OFFSET_SWIZZLE 0 123#define REG_OFFSET_SWIZZLE 0
119#endif 124#endif
120 125
126static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
127{
128 int nr_longs;
129
130 nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
131
132 b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
133 if (!b->private)
134 return -ENOMEM;
135
136 b->shared = b->private + nr_cpus;
137
138 return 0;
139}
140
141static void vgic_free_bitmap(struct vgic_bitmap *b)
142{
143 kfree(b->private);
144 b->private = NULL;
145 b->shared = NULL;
146}
147
121static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 148static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
122 int cpuid, u32 offset) 149 int cpuid, u32 offset)
123{ 150{
124 offset >>= 2; 151 offset >>= 2;
125 if (!offset) 152 if (!offset)
126 return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); 153 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
127 else 154 else
128 return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); 155 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
129} 156}
130 157
131static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, 158static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
132 int cpuid, int irq) 159 int cpuid, int irq)
133{ 160{
134 if (irq < VGIC_NR_PRIVATE_IRQS) 161 if (irq < VGIC_NR_PRIVATE_IRQS)
135 return test_bit(irq, x->percpu[cpuid].reg_ul); 162 return test_bit(irq, x->private + cpuid);
136 163
137 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); 164 return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
138} 165}
139 166
140static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, 167static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -143,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
143 unsigned long *reg; 170 unsigned long *reg;
144 171
145 if (irq < VGIC_NR_PRIVATE_IRQS) { 172 if (irq < VGIC_NR_PRIVATE_IRQS) {
146 reg = x->percpu[cpuid].reg_ul; 173 reg = x->private + cpuid;
147 } else { 174 } else {
148 reg = x->shared.reg_ul; 175 reg = x->shared;
149 irq -= VGIC_NR_PRIVATE_IRQS; 176 irq -= VGIC_NR_PRIVATE_IRQS;
150 } 177 }
151 178
@@ -157,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
157 184
158static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) 185static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
159{ 186{
160 if (unlikely(cpuid >= VGIC_MAX_CPUS)) 187 return x->private + cpuid;
161 return NULL;
162 return x->percpu[cpuid].reg_ul;
163} 188}
164 189
165static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) 190static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
166{ 191{
167 return x->shared.reg_ul; 192 return x->shared;
193}
194
195static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
196{
197 int size;
198
199 size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
200 size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
201
202 x->private = kzalloc(size, GFP_KERNEL);
203 if (!x->private)
204 return -ENOMEM;
205
206 x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
207 return 0;
208}
209
210static void vgic_free_bytemap(struct vgic_bytemap *b)
211{
212 kfree(b->private);
213 b->private = NULL;
214 b->shared = NULL;
168} 215}
169 216
170static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) 217static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
171{ 218{
172 offset >>= 2; 219 u32 *reg;
173 BUG_ON(offset > (VGIC_NR_IRQS / 4)); 220
174 if (offset < 8) 221 if (offset < VGIC_NR_PRIVATE_IRQS) {
175 return x->percpu[cpuid] + offset; 222 reg = x->private;
176 else 223 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
177 return x->shared + offset - 8; 224 } else {
225 reg = x->shared;
226 offset -= VGIC_NR_PRIVATE_IRQS;
227 }
228
229 return reg + (offset / sizeof(u32));
178} 230}
179 231
180#define VGIC_CFG_LEVEL 0 232#define VGIC_CFG_LEVEL 0
@@ -196,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
196 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); 248 return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
197} 249}
198 250
199static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) 251static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
252{
253 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
254
255 return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
256}
257
258static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
259{
260 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
261
262 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
263}
264
265static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
266{
267 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
268
269 vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
270}
271
272static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
200{ 273{
201 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 274 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
202 275
203 return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); 276 return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
204} 277}
205 278
206static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) 279static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
207{ 280{
208 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 281 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
209 282
210 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); 283 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
211} 284}
212 285
213static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) 286static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
214{ 287{
215 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 288 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
216 289
217 vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); 290 vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
291}
292
293static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
294{
295 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
296
297 return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
298}
299
300static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
301{
302 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
303
304 vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
218} 305}
219 306
220static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) 307static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
221{ 308{
222 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 309 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
223 310
224 return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); 311 return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
225} 312}
226 313
227static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) 314static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
228{ 315{
229 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 316 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
230 317
231 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); 318 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
232} 319}
233 320
234static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) 321static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
235{ 322{
236 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 323 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
237 324
238 vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); 325 vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
239} 326}
240 327
241static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) 328static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +343,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
256 vcpu->arch.vgic_cpu.pending_shared); 343 vcpu->arch.vgic_cpu.pending_shared);
257} 344}
258 345
346static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
347{
348 return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
349}
350
259static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) 351static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
260{ 352{
261 return le32_to_cpu(*((u32 *)mmio->data)) & mask; 353 return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -347,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
347 439
348 case 4: /* GICD_TYPER */ 440 case 4: /* GICD_TYPER */
349 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; 441 reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
350 reg |= (VGIC_NR_IRQS >> 5) - 1; 442 reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
351 vgic_reg_access(mmio, &reg, word_offset, 443 vgic_reg_access(mmio, &reg, word_offset,
352 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); 444 ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
353 break; 445 break;
@@ -409,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
409 struct kvm_exit_mmio *mmio, 501 struct kvm_exit_mmio *mmio,
410 phys_addr_t offset) 502 phys_addr_t offset)
411{ 503{
412 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 504 u32 *reg, orig;
413 vcpu->vcpu_id, offset); 505 u32 level_mask;
506 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
507
508 reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
509 level_mask = (~(*reg));
510
511 /* Mark both level and edge triggered irqs as pending */
512 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
513 orig = *reg;
414 vgic_reg_access(mmio, reg, offset, 514 vgic_reg_access(mmio, reg, offset,
415 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); 515 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
516
416 if (mmio->is_write) { 517 if (mmio->is_write) {
518 /* Set the soft-pending flag only for level-triggered irqs */
519 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
520 vcpu->vcpu_id, offset);
521 vgic_reg_access(mmio, reg, offset,
522 ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
523 *reg &= level_mask;
524
525 /* Ignore writes to SGIs */
526 if (offset < 2) {
527 *reg &= ~0xffff;
528 *reg |= orig & 0xffff;
529 }
530
417 vgic_update_state(vcpu->kvm); 531 vgic_update_state(vcpu->kvm);
418 return true; 532 return true;
419 } 533 }
@@ -425,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
425 struct kvm_exit_mmio *mmio, 539 struct kvm_exit_mmio *mmio,
426 phys_addr_t offset) 540 phys_addr_t offset)
427{ 541{
428 u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, 542 u32 *level_active;
429 vcpu->vcpu_id, offset); 543 u32 *reg, orig;
544 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
545
546 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
547 orig = *reg;
430 vgic_reg_access(mmio, reg, offset, 548 vgic_reg_access(mmio, reg, offset,
431 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); 549 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
432 if (mmio->is_write) { 550 if (mmio->is_write) {
551 /* Re-set level triggered level-active interrupts */
552 level_active = vgic_bitmap_get_reg(&dist->irq_level,
553 vcpu->vcpu_id, offset);
554 reg = vgic_bitmap_get_reg(&dist->irq_pending,
555 vcpu->vcpu_id, offset);
556 *reg |= *level_active;
557
558 /* Ignore writes to SGIs */
559 if (offset < 2) {
560 *reg &= ~0xffff;
561 *reg |= orig & 0xffff;
562 }
563
564 /* Clear soft-pending flags */
565 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
566 vcpu->vcpu_id, offset);
567 vgic_reg_access(mmio, reg, offset,
568 ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
569
433 vgic_update_state(vcpu->kvm); 570 vgic_update_state(vcpu->kvm);
434 return true; 571 return true;
435 } 572 }
@@ -651,9 +788,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
651 * is fine, then we are only setting a few bits that were 788 * is fine, then we are only setting a few bits that were
652 * already set. 789 * already set.
653 */ 790 */
654 vgic_dist_irq_set(vcpu, lr.irq); 791 vgic_dist_irq_set_pending(vcpu, lr.irq);
655 if (lr.irq < VGIC_NR_SGIS) 792 if (lr.irq < VGIC_NR_SGIS)
656 dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; 793 *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
657 lr.state &= ~LR_STATE_PENDING; 794 lr.state &= ~LR_STATE_PENDING;
658 vgic_set_lr(vcpu, i, lr); 795 vgic_set_lr(vcpu, i, lr);
659 796
@@ -662,8 +799,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
662 * active), then the LR does not hold any useful info and can 799 * active), then the LR does not hold any useful info and can
663 * be marked as free for other use. 800 * be marked as free for other use.
664 */ 801 */
665 if (!(lr.state & LR_STATE_MASK)) 802 if (!(lr.state & LR_STATE_MASK)) {
666 vgic_retire_lr(i, lr.irq, vcpu); 803 vgic_retire_lr(i, lr.irq, vcpu);
804 vgic_irq_clear_queued(vcpu, lr.irq);
805 }
667 806
668 /* Finally update the VGIC state. */ 807 /* Finally update the VGIC state. */
669 vgic_update_state(vcpu->kvm); 808 vgic_update_state(vcpu->kvm);
@@ -677,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
677{ 816{
678 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 817 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
679 int sgi; 818 int sgi;
680 int min_sgi = (offset & ~0x3) * 4; 819 int min_sgi = (offset & ~0x3);
681 int max_sgi = min_sgi + 3; 820 int max_sgi = min_sgi + 3;
682 int vcpu_id = vcpu->vcpu_id; 821 int vcpu_id = vcpu->vcpu_id;
683 u32 reg = 0; 822 u32 reg = 0;
@@ -685,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
685 /* Copy source SGIs from distributor side */ 824 /* Copy source SGIs from distributor side */
686 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 825 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
687 int shift = 8 * (sgi - min_sgi); 826 int shift = 8 * (sgi - min_sgi);
688 reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; 827 reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
689 } 828 }
690 829
691 mmio_data_write(mmio, ~0, reg); 830 mmio_data_write(mmio, ~0, reg);
@@ -698,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
698{ 837{
699 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 838 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
700 int sgi; 839 int sgi;
701 int min_sgi = (offset & ~0x3) * 4; 840 int min_sgi = (offset & ~0x3);
702 int max_sgi = min_sgi + 3; 841 int max_sgi = min_sgi + 3;
703 int vcpu_id = vcpu->vcpu_id; 842 int vcpu_id = vcpu->vcpu_id;
704 u32 reg; 843 u32 reg;
@@ -709,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
709 /* Clear pending SGIs on the distributor */ 848 /* Clear pending SGIs on the distributor */
710 for (sgi = min_sgi; sgi <= max_sgi; sgi++) { 849 for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
711 u8 mask = reg >> (8 * (sgi - min_sgi)); 850 u8 mask = reg >> (8 * (sgi - min_sgi));
851 u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
712 if (set) { 852 if (set) {
713 if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) 853 if ((*src & mask) != mask)
714 updated = true; 854 updated = true;
715 dist->irq_sgi_sources[vcpu_id][sgi] |= mask; 855 *src |= mask;
716 } else { 856 } else {
717 if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) 857 if (*src & mask)
718 updated = true; 858 updated = true;
719 dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; 859 *src &= ~mask;
720 } 860 }
721 } 861 }
722 862
@@ -755,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
755struct mmio_range { 895struct mmio_range {
756 phys_addr_t base; 896 phys_addr_t base;
757 unsigned long len; 897 unsigned long len;
898 int bits_per_irq;
758 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, 899 bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
759 phys_addr_t offset); 900 phys_addr_t offset);
760}; 901};
@@ -763,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
763 { 904 {
764 .base = GIC_DIST_CTRL, 905 .base = GIC_DIST_CTRL,
765 .len = 12, 906 .len = 12,
907 .bits_per_irq = 0,
766 .handle_mmio = handle_mmio_misc, 908 .handle_mmio = handle_mmio_misc,
767 }, 909 },
768 { 910 {
769 .base = GIC_DIST_IGROUP, 911 .base = GIC_DIST_IGROUP,
770 .len = VGIC_NR_IRQS / 8, 912 .len = VGIC_MAX_IRQS / 8,
913 .bits_per_irq = 1,
771 .handle_mmio = handle_mmio_raz_wi, 914 .handle_mmio = handle_mmio_raz_wi,
772 }, 915 },
773 { 916 {
774 .base = GIC_DIST_ENABLE_SET, 917 .base = GIC_DIST_ENABLE_SET,
775 .len = VGIC_NR_IRQS / 8, 918 .len = VGIC_MAX_IRQS / 8,
919 .bits_per_irq = 1,
776 .handle_mmio = handle_mmio_set_enable_reg, 920 .handle_mmio = handle_mmio_set_enable_reg,
777 }, 921 },
778 { 922 {
779 .base = GIC_DIST_ENABLE_CLEAR, 923 .base = GIC_DIST_ENABLE_CLEAR,
780 .len = VGIC_NR_IRQS / 8, 924 .len = VGIC_MAX_IRQS / 8,
925 .bits_per_irq = 1,
781 .handle_mmio = handle_mmio_clear_enable_reg, 926 .handle_mmio = handle_mmio_clear_enable_reg,
782 }, 927 },
783 { 928 {
784 .base = GIC_DIST_PENDING_SET, 929 .base = GIC_DIST_PENDING_SET,
785 .len = VGIC_NR_IRQS / 8, 930 .len = VGIC_MAX_IRQS / 8,
931 .bits_per_irq = 1,
786 .handle_mmio = handle_mmio_set_pending_reg, 932 .handle_mmio = handle_mmio_set_pending_reg,
787 }, 933 },
788 { 934 {
789 .base = GIC_DIST_PENDING_CLEAR, 935 .base = GIC_DIST_PENDING_CLEAR,
790 .len = VGIC_NR_IRQS / 8, 936 .len = VGIC_MAX_IRQS / 8,
937 .bits_per_irq = 1,
791 .handle_mmio = handle_mmio_clear_pending_reg, 938 .handle_mmio = handle_mmio_clear_pending_reg,
792 }, 939 },
793 { 940 {
794 .base = GIC_DIST_ACTIVE_SET, 941 .base = GIC_DIST_ACTIVE_SET,
795 .len = VGIC_NR_IRQS / 8, 942 .len = VGIC_MAX_IRQS / 8,
943 .bits_per_irq = 1,
796 .handle_mmio = handle_mmio_raz_wi, 944 .handle_mmio = handle_mmio_raz_wi,
797 }, 945 },
798 { 946 {
799 .base = GIC_DIST_ACTIVE_CLEAR, 947 .base = GIC_DIST_ACTIVE_CLEAR,
800 .len = VGIC_NR_IRQS / 8, 948 .len = VGIC_MAX_IRQS / 8,
949 .bits_per_irq = 1,
801 .handle_mmio = handle_mmio_raz_wi, 950 .handle_mmio = handle_mmio_raz_wi,
802 }, 951 },
803 { 952 {
804 .base = GIC_DIST_PRI, 953 .base = GIC_DIST_PRI,
805 .len = VGIC_NR_IRQS, 954 .len = VGIC_MAX_IRQS,
955 .bits_per_irq = 8,
806 .handle_mmio = handle_mmio_priority_reg, 956 .handle_mmio = handle_mmio_priority_reg,
807 }, 957 },
808 { 958 {
809 .base = GIC_DIST_TARGET, 959 .base = GIC_DIST_TARGET,
810 .len = VGIC_NR_IRQS, 960 .len = VGIC_MAX_IRQS,
961 .bits_per_irq = 8,
811 .handle_mmio = handle_mmio_target_reg, 962 .handle_mmio = handle_mmio_target_reg,
812 }, 963 },
813 { 964 {
814 .base = GIC_DIST_CONFIG, 965 .base = GIC_DIST_CONFIG,
815 .len = VGIC_NR_IRQS / 4, 966 .len = VGIC_MAX_IRQS / 4,
967 .bits_per_irq = 2,
816 .handle_mmio = handle_mmio_cfg_reg, 968 .handle_mmio = handle_mmio_cfg_reg,
817 }, 969 },
818 { 970 {
@@ -850,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
850 return NULL; 1002 return NULL;
851} 1003}
852 1004
1005static bool vgic_validate_access(const struct vgic_dist *dist,
1006 const struct mmio_range *range,
1007 unsigned long offset)
1008{
1009 int irq;
1010
1011 if (!range->bits_per_irq)
1012 return true; /* Not an irq-based access */
1013
1014 irq = offset * 8 / range->bits_per_irq;
1015 if (irq >= dist->nr_irqs)
1016 return false;
1017
1018 return true;
1019}
1020
853/** 1021/**
854 * vgic_handle_mmio - handle an in-kernel MMIO access 1022 * vgic_handle_mmio - handle an in-kernel MMIO access
855 * @vcpu: pointer to the vcpu performing the access 1023 * @vcpu: pointer to the vcpu performing the access
@@ -889,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
889 1057
890 spin_lock(&vcpu->kvm->arch.vgic.lock); 1058 spin_lock(&vcpu->kvm->arch.vgic.lock);
891 offset = mmio->phys_addr - range->base - base; 1059 offset = mmio->phys_addr - range->base - base;
892 updated_state = range->handle_mmio(vcpu, mmio, offset); 1060 if (vgic_validate_access(dist, range, offset)) {
1061 updated_state = range->handle_mmio(vcpu, mmio, offset);
1062 } else {
1063 vgic_reg_access(mmio, NULL, offset,
1064 ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
1065 updated_state = false;
1066 }
893 spin_unlock(&vcpu->kvm->arch.vgic.lock); 1067 spin_unlock(&vcpu->kvm->arch.vgic.lock);
894 kvm_prepare_mmio(run, mmio); 1068 kvm_prepare_mmio(run, mmio);
895 kvm_handle_mmio_return(vcpu, run); 1069 kvm_handle_mmio_return(vcpu, run);
@@ -900,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
900 return true; 1074 return true;
901} 1075}
902 1076
1077static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
1078{
1079 return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
1080}
1081
903static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) 1082static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
904{ 1083{
905 struct kvm *kvm = vcpu->kvm; 1084 struct kvm *kvm = vcpu->kvm;
@@ -932,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
932 kvm_for_each_vcpu(c, vcpu, kvm) { 1111 kvm_for_each_vcpu(c, vcpu, kvm) {
933 if (target_cpus & 1) { 1112 if (target_cpus & 1) {
934 /* Flag the SGI as pending */ 1113 /* Flag the SGI as pending */
935 vgic_dist_irq_set(vcpu, sgi); 1114 vgic_dist_irq_set_pending(vcpu, sgi);
936 dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; 1115 *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
937 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); 1116 kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
938 } 1117 }
939 1118
@@ -941,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
941 } 1120 }
942} 1121}
943 1122
1123static int vgic_nr_shared_irqs(struct vgic_dist *dist)
1124{
1125 return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
1126}
1127
944static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) 1128static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
945{ 1129{
946 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1130 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
947 unsigned long *pending, *enabled, *pend_percpu, *pend_shared; 1131 unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
948 unsigned long pending_private, pending_shared; 1132 unsigned long pending_private, pending_shared;
1133 int nr_shared = vgic_nr_shared_irqs(dist);
949 int vcpu_id; 1134 int vcpu_id;
950 1135
951 vcpu_id = vcpu->vcpu_id; 1136 vcpu_id = vcpu->vcpu_id;
952 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; 1137 pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
953 pend_shared = vcpu->arch.vgic_cpu.pending_shared; 1138 pend_shared = vcpu->arch.vgic_cpu.pending_shared;
954 1139
955 pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); 1140 pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
956 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); 1141 enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
957 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); 1142 bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
958 1143
959 pending = vgic_bitmap_get_shared_map(&dist->irq_state); 1144 pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
960 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); 1145 enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
961 bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); 1146 bitmap_and(pend_shared, pending, enabled, nr_shared);
962 bitmap_and(pend_shared, pend_shared, 1147 bitmap_and(pend_shared, pend_shared,
963 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), 1148 vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
964 VGIC_NR_SHARED_IRQS); 1149 nr_shared);
965 1150
966 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); 1151 pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
967 pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); 1152 pending_shared = find_first_bit(pend_shared, nr_shared);
968 return (pending_private < VGIC_NR_PRIVATE_IRQS || 1153 return (pending_private < VGIC_NR_PRIVATE_IRQS ||
969 pending_shared < VGIC_NR_SHARED_IRQS); 1154 pending_shared < vgic_nr_shared_irqs(dist));
970} 1155}
971 1156
972/* 1157/*
@@ -980,14 +1165,14 @@ static void vgic_update_state(struct kvm *kvm)
980 int c; 1165 int c;
981 1166
982 if (!dist->enabled) { 1167 if (!dist->enabled) {
983 set_bit(0, &dist->irq_pending_on_cpu); 1168 set_bit(0, dist->irq_pending_on_cpu);
984 return; 1169 return;
985 } 1170 }
986 1171
987 kvm_for_each_vcpu(c, vcpu, kvm) { 1172 kvm_for_each_vcpu(c, vcpu, kvm) {
988 if (compute_pending_for_cpu(vcpu)) { 1173 if (compute_pending_for_cpu(vcpu)) {
989 pr_debug("CPU%d has pending interrupts\n", c); 1174 pr_debug("CPU%d has pending interrupts\n", c);
990 set_bit(c, &dist->irq_pending_on_cpu); 1175 set_bit(c, dist->irq_pending_on_cpu);
991 } 1176 }
992 } 1177 }
993} 1178}
@@ -1079,8 +1264,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1079 1264
1080 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { 1265 if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
1081 vgic_retire_lr(lr, vlr.irq, vcpu); 1266 vgic_retire_lr(lr, vlr.irq, vcpu);
1082 if (vgic_irq_is_active(vcpu, vlr.irq)) 1267 if (vgic_irq_is_queued(vcpu, vlr.irq))
1083 vgic_irq_clear_active(vcpu, vlr.irq); 1268 vgic_irq_clear_queued(vcpu, vlr.irq);
1084 } 1269 }
1085 } 1270 }
1086} 1271}
@@ -1092,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1092static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) 1277static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1093{ 1278{
1094 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1279 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1280 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1095 struct vgic_lr vlr; 1281 struct vgic_lr vlr;
1096 int lr; 1282 int lr;
1097 1283
1098 /* Sanitize the input... */ 1284 /* Sanitize the input... */
1099 BUG_ON(sgi_source_id & ~7); 1285 BUG_ON(sgi_source_id & ~7);
1100 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); 1286 BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1101 BUG_ON(irq >= VGIC_NR_IRQS); 1287 BUG_ON(irq >= dist->nr_irqs);
1102 1288
1103 kvm_debug("Queue IRQ%d\n", irq); 1289 kvm_debug("Queue IRQ%d\n", irq);
1104 1290
@@ -1144,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1144 int vcpu_id = vcpu->vcpu_id; 1330 int vcpu_id = vcpu->vcpu_id;
1145 int c; 1331 int c;
1146 1332
1147 sources = dist->irq_sgi_sources[vcpu_id][irq]; 1333 sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
1148 1334
1149 for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { 1335 for_each_set_bit(c, &sources, dist->nr_cpus) {
1150 if (vgic_queue_irq(vcpu, c, irq)) 1336 if (vgic_queue_irq(vcpu, c, irq))
1151 clear_bit(c, &sources); 1337 clear_bit(c, &sources);
1152 } 1338 }
1153 1339
1154 dist->irq_sgi_sources[vcpu_id][irq] = sources; 1340 *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
1155 1341
1156 /* 1342 /*
1157 * If the sources bitmap has been cleared it means that we 1343 * If the sources bitmap has been cleared it means that we
@@ -1160,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1160 * our emulated gic and can get rid of them. 1346 * our emulated gic and can get rid of them.
1161 */ 1347 */
1162 if (!sources) { 1348 if (!sources) {
1163 vgic_dist_irq_clear(vcpu, irq); 1349 vgic_dist_irq_clear_pending(vcpu, irq);
1164 vgic_cpu_irq_clear(vcpu, irq); 1350 vgic_cpu_irq_clear(vcpu, irq);
1165 return true; 1351 return true;
1166 } 1352 }
@@ -1170,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
1170 1356
1171static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) 1357static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1172{ 1358{
1173 if (vgic_irq_is_active(vcpu, irq)) 1359 if (!vgic_can_sample_irq(vcpu, irq))
1174 return true; /* level interrupt, already queued */ 1360 return true; /* level interrupt, already queued */
1175 1361
1176 if (vgic_queue_irq(vcpu, 0, irq)) { 1362 if (vgic_queue_irq(vcpu, 0, irq)) {
1177 if (vgic_irq_is_edge(vcpu, irq)) { 1363 if (vgic_irq_is_edge(vcpu, irq)) {
1178 vgic_dist_irq_clear(vcpu, irq); 1364 vgic_dist_irq_clear_pending(vcpu, irq);
1179 vgic_cpu_irq_clear(vcpu, irq); 1365 vgic_cpu_irq_clear(vcpu, irq);
1180 } else { 1366 } else {
1181 vgic_irq_set_active(vcpu, irq); 1367 vgic_irq_set_queued(vcpu, irq);
1182 } 1368 }
1183 1369
1184 return true; 1370 return true;
@@ -1223,7 +1409,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1223 } 1409 }
1224 1410
1225 /* SPIs */ 1411 /* SPIs */
1226 for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { 1412 for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
1227 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) 1413 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1228 overflow = 1; 1414 overflow = 1;
1229 } 1415 }
@@ -1239,7 +1425,7 @@ epilog:
1239 * us. Claim we don't have anything pending. We'll 1425 * us. Claim we don't have anything pending. We'll
1240 * adjust that if needed while exiting. 1426 * adjust that if needed while exiting.
1241 */ 1427 */
1242 clear_bit(vcpu_id, &dist->irq_pending_on_cpu); 1428 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1243 } 1429 }
1244} 1430}
1245 1431
@@ -1261,17 +1447,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1261 1447
1262 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { 1448 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1263 struct vgic_lr vlr = vgic_get_lr(vcpu, lr); 1449 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1450 WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1264 1451
1265 vgic_irq_clear_active(vcpu, vlr.irq); 1452 vgic_irq_clear_queued(vcpu, vlr.irq);
1266 WARN_ON(vlr.state & LR_STATE_MASK); 1453 WARN_ON(vlr.state & LR_STATE_MASK);
1267 vlr.state = 0; 1454 vlr.state = 0;
1268 vgic_set_lr(vcpu, lr, vlr); 1455 vgic_set_lr(vcpu, lr, vlr);
1269 1456
1457 /*
1458 * If the IRQ was EOIed it was also ACKed and we we
1459 * therefore assume we can clear the soft pending
1460 * state (should it had been set) for this interrupt.
1461 *
1462 * Note: if the IRQ soft pending state was set after
1463 * the IRQ was acked, it actually shouldn't be
1464 * cleared, but we have no way of knowing that unless
1465 * we start trapping ACKs when the soft-pending state
1466 * is set.
1467 */
1468 vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1469
1270 /* Any additional pending interrupt? */ 1470 /* Any additional pending interrupt? */
1271 if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { 1471 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1272 vgic_cpu_irq_set(vcpu, vlr.irq); 1472 vgic_cpu_irq_set(vcpu, vlr.irq);
1273 level_pending = true; 1473 level_pending = true;
1274 } else { 1474 } else {
1475 vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1275 vgic_cpu_irq_clear(vcpu, vlr.irq); 1476 vgic_cpu_irq_clear(vcpu, vlr.irq);
1276 } 1477 }
1277 1478
@@ -1315,14 +1516,14 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1315 1516
1316 vlr = vgic_get_lr(vcpu, lr); 1517 vlr = vgic_get_lr(vcpu, lr);
1317 1518
1318 BUG_ON(vlr.irq >= VGIC_NR_IRQS); 1519 BUG_ON(vlr.irq >= dist->nr_irqs);
1319 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; 1520 vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
1320 } 1521 }
1321 1522
1322 /* Check if we still have something up our sleeve... */ 1523 /* Check if we still have something up our sleeve... */
1323 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); 1524 pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1324 if (level_pending || pending < vgic->nr_lr) 1525 if (level_pending || pending < vgic->nr_lr)
1325 set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1526 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1326} 1527}
1327 1528
1328void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 1529void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1356,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1356 if (!irqchip_in_kernel(vcpu->kvm)) 1557 if (!irqchip_in_kernel(vcpu->kvm))
1357 return 0; 1558 return 0;
1358 1559
1359 return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); 1560 return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1360} 1561}
1361 1562
1362static void vgic_kick_vcpus(struct kvm *kvm) 1563static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1376,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm)
1376 1577
1377static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) 1578static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1378{ 1579{
1379 int is_edge = vgic_irq_is_edge(vcpu, irq); 1580 int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1380 int state = vgic_dist_irq_is_pending(vcpu, irq);
1381 1581
1382 /* 1582 /*
1383 * Only inject an interrupt if: 1583 * Only inject an interrupt if:
1384 * - edge triggered and we have a rising edge 1584 * - edge triggered and we have a rising edge
1385 * - level triggered and we change level 1585 * - level triggered and we change level
1386 */ 1586 */
1387 if (is_edge) 1587 if (edge_triggered) {
1588 int state = vgic_dist_irq_is_pending(vcpu, irq);
1388 return level > state; 1589 return level > state;
1389 else 1590 } else {
1591 int state = vgic_dist_irq_get_level(vcpu, irq);
1390 return level != state; 1592 return level != state;
1593 }
1391} 1594}
1392 1595
1393static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, 1596static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1394 unsigned int irq_num, bool level) 1597 unsigned int irq_num, bool level)
1395{ 1598{
1396 struct vgic_dist *dist = &kvm->arch.vgic; 1599 struct vgic_dist *dist = &kvm->arch.vgic;
1397 struct kvm_vcpu *vcpu; 1600 struct kvm_vcpu *vcpu;
1398 int is_edge, is_level; 1601 int edge_triggered, level_triggered;
1399 int enabled; 1602 int enabled;
1400 bool ret = true; 1603 bool ret = true;
1401 1604
1402 spin_lock(&dist->lock); 1605 spin_lock(&dist->lock);
1403 1606
1404 vcpu = kvm_get_vcpu(kvm, cpuid); 1607 vcpu = kvm_get_vcpu(kvm, cpuid);
1405 is_edge = vgic_irq_is_edge(vcpu, irq_num); 1608 edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1406 is_level = !is_edge; 1609 level_triggered = !edge_triggered;
1407 1610
1408 if (!vgic_validate_injection(vcpu, irq_num, level)) { 1611 if (!vgic_validate_injection(vcpu, irq_num, level)) {
1409 ret = false; 1612 ret = false;
@@ -1417,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1417 1620
1418 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); 1621 kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1419 1622
1420 if (level) 1623 if (level) {
1421 vgic_dist_irq_set(vcpu, irq_num); 1624 if (level_triggered)
1422 else 1625 vgic_dist_irq_set_level(vcpu, irq_num);
1423 vgic_dist_irq_clear(vcpu, irq_num); 1626 vgic_dist_irq_set_pending(vcpu, irq_num);
1627 } else {
1628 if (level_triggered) {
1629 vgic_dist_irq_clear_level(vcpu, irq_num);
1630 if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
1631 vgic_dist_irq_clear_pending(vcpu, irq_num);
1632 } else {
1633 vgic_dist_irq_clear_pending(vcpu, irq_num);
1634 }
1635 }
1424 1636
1425 enabled = vgic_irq_is_enabled(vcpu, irq_num); 1637 enabled = vgic_irq_is_enabled(vcpu, irq_num);
1426 1638
@@ -1429,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1429 goto out; 1641 goto out;
1430 } 1642 }
1431 1643
1432 if (is_level && vgic_irq_is_active(vcpu, irq_num)) { 1644 if (!vgic_can_sample_irq(vcpu, irq_num)) {
1433 /* 1645 /*
1434 * Level interrupt in progress, will be picked up 1646 * Level interrupt in progress, will be picked up
1435 * when EOId. 1647 * when EOId.
@@ -1440,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
1440 1652
1441 if (level) { 1653 if (level) {
1442 vgic_cpu_irq_set(vcpu, irq_num); 1654 vgic_cpu_irq_set(vcpu, irq_num);
1443 set_bit(cpuid, &dist->irq_pending_on_cpu); 1655 set_bit(cpuid, dist->irq_pending_on_cpu);
1444 } 1656 }
1445 1657
1446out: 1658out:
@@ -1466,7 +1678,8 @@ out:
1466int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, 1678int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1467 bool level) 1679 bool level)
1468{ 1680{
1469 if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) 1681 if (likely(vgic_initialized(kvm)) &&
1682 vgic_update_irq_pending(kvm, cpuid, irq_num, level))
1470 vgic_kick_vcpus(kvm); 1683 vgic_kick_vcpus(kvm);
1471 1684
1472 return 0; 1685 return 0;
@@ -1483,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1483 return IRQ_HANDLED; 1696 return IRQ_HANDLED;
1484} 1697}
1485 1698
1699void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1700{
1701 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1702
1703 kfree(vgic_cpu->pending_shared);
1704 kfree(vgic_cpu->vgic_irq_lr_map);
1705 vgic_cpu->pending_shared = NULL;
1706 vgic_cpu->vgic_irq_lr_map = NULL;
1707}
1708
1709static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1710{
1711 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1712
1713 int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
1714 vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1715 vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
1716
1717 if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
1718 kvm_vgic_vcpu_destroy(vcpu);
1719 return -ENOMEM;
1720 }
1721
1722 return 0;
1723}
1724
1486/** 1725/**
1487 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state 1726 * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
1488 * @vcpu: pointer to the vcpu struct 1727 * @vcpu: pointer to the vcpu struct
@@ -1490,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1490 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to 1729 * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
1491 * this vcpu and enable the VGIC for this VCPU 1730 * this vcpu and enable the VGIC for this VCPU
1492 */ 1731 */
1493int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) 1732static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1494{ 1733{
1495 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1734 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1496 struct vgic_dist *dist = &vcpu->kvm->arch.vgic; 1735 struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1497 int i; 1736 int i;
1498 1737
1499 if (vcpu->vcpu_id >= VGIC_MAX_CPUS) 1738 for (i = 0; i < dist->nr_irqs; i++) {
1500 return -EBUSY;
1501
1502 for (i = 0; i < VGIC_NR_IRQS; i++) {
1503 if (i < VGIC_NR_PPIS) 1739 if (i < VGIC_NR_PPIS)
1504 vgic_bitmap_set_irq_val(&dist->irq_enabled, 1740 vgic_bitmap_set_irq_val(&dist->irq_enabled,
1505 vcpu->vcpu_id, i, 1); 1741 vcpu->vcpu_id, i, 1);
@@ -1518,84 +1754,112 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
1518 vgic_cpu->nr_lr = vgic->nr_lr; 1754 vgic_cpu->nr_lr = vgic->nr_lr;
1519 1755
1520 vgic_enable(vcpu); 1756 vgic_enable(vcpu);
1521
1522 return 0;
1523} 1757}
1524 1758
1525static void vgic_init_maintenance_interrupt(void *info) 1759void kvm_vgic_destroy(struct kvm *kvm)
1526{ 1760{
1527 enable_percpu_irq(vgic->maint_irq, 0); 1761 struct vgic_dist *dist = &kvm->arch.vgic;
1762 struct kvm_vcpu *vcpu;
1763 int i;
1764
1765 kvm_for_each_vcpu(i, vcpu, kvm)
1766 kvm_vgic_vcpu_destroy(vcpu);
1767
1768 vgic_free_bitmap(&dist->irq_enabled);
1769 vgic_free_bitmap(&dist->irq_level);
1770 vgic_free_bitmap(&dist->irq_pending);
1771 vgic_free_bitmap(&dist->irq_soft_pend);
1772 vgic_free_bitmap(&dist->irq_queued);
1773 vgic_free_bitmap(&dist->irq_cfg);
1774 vgic_free_bytemap(&dist->irq_priority);
1775 if (dist->irq_spi_target) {
1776 for (i = 0; i < dist->nr_cpus; i++)
1777 vgic_free_bitmap(&dist->irq_spi_target[i]);
1778 }
1779 kfree(dist->irq_sgi_sources);
1780 kfree(dist->irq_spi_cpu);
1781 kfree(dist->irq_spi_target);
1782 kfree(dist->irq_pending_on_cpu);
1783 dist->irq_sgi_sources = NULL;
1784 dist->irq_spi_cpu = NULL;
1785 dist->irq_spi_target = NULL;
1786 dist->irq_pending_on_cpu = NULL;
1528} 1787}
1529 1788
1530static int vgic_cpu_notify(struct notifier_block *self, 1789/*
1531 unsigned long action, void *cpu) 1790 * Allocate and initialize the various data structures. Must be called
1791 * with kvm->lock held!
1792 */
1793static int vgic_init_maps(struct kvm *kvm)
1532{ 1794{
1533 switch (action) { 1795 struct vgic_dist *dist = &kvm->arch.vgic;
1534 case CPU_STARTING: 1796 struct kvm_vcpu *vcpu;
1535 case CPU_STARTING_FROZEN: 1797 int nr_cpus, nr_irqs;
1536 vgic_init_maintenance_interrupt(NULL); 1798 int ret, i;
1537 break;
1538 case CPU_DYING:
1539 case CPU_DYING_FROZEN:
1540 disable_percpu_irq(vgic->maint_irq);
1541 break;
1542 }
1543 1799
1544 return NOTIFY_OK; 1800 if (dist->nr_cpus) /* Already allocated */
1545} 1801 return 0;
1546 1802
1547static struct notifier_block vgic_cpu_nb = { 1803 nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1548 .notifier_call = vgic_cpu_notify, 1804 if (!nr_cpus) /* No vcpus? Can't be good... */
1549}; 1805 return -EINVAL;
1550 1806
1551static const struct of_device_id vgic_ids[] = { 1807 /*
1552 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, 1808 * If nobody configured the number of interrupts, use the
1553 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, 1809 * legacy one.
1554 {}, 1810 */
1555}; 1811 if (!dist->nr_irqs)
1812 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1556 1813
1557int kvm_vgic_hyp_init(void) 1814 nr_irqs = dist->nr_irqs;
1558{
1559 const struct of_device_id *matched_id;
1560 int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
1561 const struct vgic_params **);
1562 struct device_node *vgic_node;
1563 int ret;
1564 1815
1565 vgic_node = of_find_matching_node_and_match(NULL, 1816 ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1566 vgic_ids, &matched_id); 1817 ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1567 if (!vgic_node) { 1818 ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1568 kvm_err("error: no compatible GIC node found\n"); 1819 ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1569 return -ENODEV; 1820 ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1570 } 1821 ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1822 ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1571 1823
1572 vgic_probe = matched_id->data;
1573 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
1574 if (ret) 1824 if (ret)
1575 return ret; 1825 goto out;
1576 1826
1577 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, 1827 dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1578 "vgic", kvm_get_running_vcpus()); 1828 dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1579 if (ret) { 1829 dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1580 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); 1830 GFP_KERNEL);
1581 return ret; 1831 dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1832 GFP_KERNEL);
1833 if (!dist->irq_sgi_sources ||
1834 !dist->irq_spi_cpu ||
1835 !dist->irq_spi_target ||
1836 !dist->irq_pending_on_cpu) {
1837 ret = -ENOMEM;
1838 goto out;
1582 } 1839 }
1583 1840
1584 ret = __register_cpu_notifier(&vgic_cpu_nb); 1841 for (i = 0; i < nr_cpus; i++)
1585 if (ret) { 1842 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1586 kvm_err("Cannot register vgic CPU notifier\n"); 1843 nr_cpus, nr_irqs);
1587 goto out_free_irq;
1588 }
1589 1844
1590 /* Callback into for arch code for setup */ 1845 if (ret)
1591 vgic_arch_setup(vgic); 1846 goto out;
1592 1847
1593 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); 1848 kvm_for_each_vcpu(i, vcpu, kvm) {
1849 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1850 if (ret) {
1851 kvm_err("VGIC: Failed to allocate vcpu memory\n");
1852 break;
1853 }
1854 }
1594 1855
1595 return 0; 1856 for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
1857 vgic_set_target_reg(kvm, 0, i);
1858
1859out:
1860 if (ret)
1861 kvm_vgic_destroy(kvm);
1596 1862
1597out_free_irq:
1598 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
1599 return ret; 1863 return ret;
1600} 1864}
1601 1865
@@ -1610,6 +1874,7 @@ out_free_irq:
1610 */ 1874 */
1611int kvm_vgic_init(struct kvm *kvm) 1875int kvm_vgic_init(struct kvm *kvm)
1612{ 1876{
1877 struct kvm_vcpu *vcpu;
1613 int ret = 0, i; 1878 int ret = 0, i;
1614 1879
1615 if (!irqchip_in_kernel(kvm)) 1880 if (!irqchip_in_kernel(kvm))
@@ -1627,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
1627 goto out; 1892 goto out;
1628 } 1893 }
1629 1894
1895 ret = vgic_init_maps(kvm);
1896 if (ret) {
1897 kvm_err("Unable to allocate maps\n");
1898 goto out;
1899 }
1900
1630 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, 1901 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
1631 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); 1902 vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
1632 if (ret) { 1903 if (ret) {
@@ -1634,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
1634 goto out; 1905 goto out;
1635 } 1906 }
1636 1907
1637 for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) 1908 kvm_for_each_vcpu(i, vcpu, kvm)
1638 vgic_set_target_reg(kvm, 0, i); 1909 kvm_vgic_vcpu_init(vcpu);
1639 1910
1640 kvm->arch.vgic.ready = true; 1911 kvm->arch.vgic.ready = true;
1641out: 1912out:
1913 if (ret)
1914 kvm_vgic_destroy(kvm);
1642 mutex_unlock(&kvm->lock); 1915 mutex_unlock(&kvm->lock);
1643 return ret; 1916 return ret;
1644} 1917}
@@ -1690,7 +1963,7 @@ out:
1690 return ret; 1963 return ret;
1691} 1964}
1692 1965
1693static bool vgic_ioaddr_overlap(struct kvm *kvm) 1966static int vgic_ioaddr_overlap(struct kvm *kvm)
1694{ 1967{
1695 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; 1968 phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
1696 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; 1969 phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
@@ -1879,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
1879 2152
1880 mutex_lock(&dev->kvm->lock); 2153 mutex_lock(&dev->kvm->lock);
1881 2154
2155 ret = vgic_init_maps(dev->kvm);
2156 if (ret)
2157 goto out;
2158
1882 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { 2159 if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
1883 ret = -EINVAL; 2160 ret = -EINVAL;
1884 goto out; 2161 goto out;
@@ -1976,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1976 2253
1977 return vgic_attr_regs_access(dev, attr, &reg, true); 2254 return vgic_attr_regs_access(dev, attr, &reg, true);
1978 } 2255 }
2256 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2257 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2258 u32 val;
2259 int ret = 0;
2260
2261 if (get_user(val, uaddr))
2262 return -EFAULT;
2263
2264 /*
2265 * We require:
2266 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
2267 * - at most 1024 interrupts
2268 * - a multiple of 32 interrupts
2269 */
2270 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
2271 val > VGIC_MAX_IRQS ||
2272 (val & 31))
2273 return -EINVAL;
2274
2275 mutex_lock(&dev->kvm->lock);
2276
2277 if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2278 ret = -EBUSY;
2279 else
2280 dev->kvm->arch.vgic.nr_irqs = val;
2281
2282 mutex_unlock(&dev->kvm->lock);
2283
2284 return ret;
2285 }
1979 2286
1980 } 2287 }
1981 2288
@@ -2012,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2012 r = put_user(reg, uaddr); 2319 r = put_user(reg, uaddr);
2013 break; 2320 break;
2014 } 2321 }
2322 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2323 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2324 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2325 break;
2326 }
2015 2327
2016 } 2328 }
2017 2329
@@ -2048,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2048 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: 2360 case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
2049 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; 2361 offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
2050 return vgic_has_attr_regs(vgic_cpu_ranges, offset); 2362 return vgic_has_attr_regs(vgic_cpu_ranges, offset);
2363 case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
2364 return 0;
2051 } 2365 }
2052 return -ENXIO; 2366 return -ENXIO;
2053} 2367}
@@ -2062,7 +2376,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
2062 return kvm_vgic_create(dev->kvm); 2376 return kvm_vgic_create(dev->kvm);
2063} 2377}
2064 2378
2065struct kvm_device_ops kvm_arm_vgic_v2_ops = { 2379static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2066 .name = "kvm-arm-vgic", 2380 .name = "kvm-arm-vgic",
2067 .create = vgic_create, 2381 .create = vgic_create,
2068 .destroy = vgic_destroy, 2382 .destroy = vgic_destroy,
@@ -2070,3 +2384,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
2070 .get_attr = vgic_get_attr, 2384 .get_attr = vgic_get_attr,
2071 .has_attr = vgic_has_attr, 2385 .has_attr = vgic_has_attr,
2072}; 2386};
2387
2388static void vgic_init_maintenance_interrupt(void *info)
2389{
2390 enable_percpu_irq(vgic->maint_irq, 0);
2391}
2392
2393static int vgic_cpu_notify(struct notifier_block *self,
2394 unsigned long action, void *cpu)
2395{
2396 switch (action) {
2397 case CPU_STARTING:
2398 case CPU_STARTING_FROZEN:
2399 vgic_init_maintenance_interrupt(NULL);
2400 break;
2401 case CPU_DYING:
2402 case CPU_DYING_FROZEN:
2403 disable_percpu_irq(vgic->maint_irq);
2404 break;
2405 }
2406
2407 return NOTIFY_OK;
2408}
2409
2410static struct notifier_block vgic_cpu_nb = {
2411 .notifier_call = vgic_cpu_notify,
2412};
2413
2414static const struct of_device_id vgic_ids[] = {
2415 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2416 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2417 {},
2418};
2419
2420int kvm_vgic_hyp_init(void)
2421{
2422 const struct of_device_id *matched_id;
2423 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2424 const struct vgic_params **);
2425 struct device_node *vgic_node;
2426 int ret;
2427
2428 vgic_node = of_find_matching_node_and_match(NULL,
2429 vgic_ids, &matched_id);
2430 if (!vgic_node) {
2431 kvm_err("error: no compatible GIC node found\n");
2432 return -ENODEV;
2433 }
2434
2435 vgic_probe = matched_id->data;
2436 ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
2437 if (ret)
2438 return ret;
2439
2440 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2441 "vgic", kvm_get_running_vcpus());
2442 if (ret) {
2443 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2444 return ret;
2445 }
2446
2447 ret = __register_cpu_notifier(&vgic_cpu_nb);
2448 if (ret) {
2449 kvm_err("Cannot register vgic CPU notifier\n");
2450 goto out_free_irq;
2451 }
2452
2453 /* Callback into for arch code for setup */
2454 vgic_arch_setup(vgic);
2455
2456 on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2457
2458 return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
2459 KVM_DEV_TYPE_ARM_VGIC_V2);
2460
2461out_free_irq:
2462 free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2463 return ret;
2464}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
80 80
81 might_sleep(); 81 might_sleep();
82 82
83 down_read(&mm->mmap_sem); 83 kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
84 get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
85 up_read(&mm->mmap_sem);
86 kvm_async_page_present_sync(vcpu, apf); 84 kvm_async_page_present_sync(vcpu, apf);
87 85
88 spin_lock(&vcpu->async_pf.lock); 86 spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 3c5981c87c3f..b0fb390943c6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,9 @@
36#include <linux/seqlock.h> 36#include <linux/seqlock.h>
37#include <trace/events/kvm.h> 37#include <trace/events/kvm.h>
38 38
39#include "irq.h" 39#ifdef __KVM_HAVE_IOAPIC
40#include "ioapic.h"
41#endif
40#include "iodev.h" 42#include "iodev.h"
41 43
42#ifdef CONFIG_HAVE_KVM_IRQFD 44#ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c9db32..0ba4057d271b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
405 spin_unlock(&ioapic->lock); 405 spin_unlock(&ioapic->lock);
406} 406}
407 407
408static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
409{
410 int i;
411 struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
412 eoi_inject.work);
413 spin_lock(&ioapic->lock);
414 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
415 union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
416
417 if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
418 continue;
419
420 if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
421 ioapic_service(ioapic, i, false);
422 }
423 spin_unlock(&ioapic->lock);
424}
425
426#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
427
408static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, 428static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
409 struct kvm_ioapic *ioapic, int vector, int trigger_mode) 429 struct kvm_ioapic *ioapic, int vector, int trigger_mode)
410{ 430{
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
435 455
436 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 456 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
437 ent->fields.remote_irr = 0; 457 ent->fields.remote_irr = 0;
438 if (ioapic->irr & (1 << i)) 458 if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
439 ioapic_service(ioapic, i, false); 459 ++ioapic->irq_eoi[i];
460 if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
461 /*
462 * Real hardware does not deliver the interrupt
463 * immediately during eoi broadcast, and this
464 * lets a buggy guest make slow progress
465 * even if it does not correctly handle a
466 * level-triggered interrupt. Emulate this
467 * behavior if we detect an interrupt storm.
468 */
469 schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
470 ioapic->irq_eoi[i] = 0;
471 trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
472 } else {
473 ioapic_service(ioapic, i, false);
474 }
475 } else {
476 ioapic->irq_eoi[i] = 0;
477 }
440 } 478 }
441} 479}
442 480
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
565{ 603{
566 int i; 604 int i;
567 605
606 cancel_delayed_work_sync(&ioapic->eoi_inject);
568 for (i = 0; i < IOAPIC_NUM_PINS; i++) 607 for (i = 0; i < IOAPIC_NUM_PINS; i++)
569 ioapic->redirtbl[i].fields.mask = 1; 608 ioapic->redirtbl[i].fields.mask = 1;
570 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; 609 ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
571 ioapic->ioregsel = 0; 610 ioapic->ioregsel = 0;
572 ioapic->irr = 0; 611 ioapic->irr = 0;
573 ioapic->id = 0; 612 ioapic->id = 0;
613 memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
574 rtc_irq_eoi_tracking_reset(ioapic); 614 rtc_irq_eoi_tracking_reset(ioapic);
575 update_handled_vectors(ioapic); 615 update_handled_vectors(ioapic);
576} 616}
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
589 if (!ioapic) 629 if (!ioapic)
590 return -ENOMEM; 630 return -ENOMEM;
591 spin_lock_init(&ioapic->lock); 631 spin_lock_init(&ioapic->lock);
632 INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
592 kvm->arch.vioapic = ioapic; 633 kvm->arch.vioapic = ioapic;
593 kvm_ioapic_reset(ioapic); 634 kvm_ioapic_reset(ioapic);
594 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 635 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
609{ 650{
610 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 651 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
611 652
653 cancel_delayed_work_sync(&ioapic->eoi_inject);
612 if (ioapic) { 654 if (ioapic) {
613 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); 655 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
614 kvm->arch.vioapic = NULL; 656 kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 90d43e95dcf8..e23b70634f1e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -59,6 +59,8 @@ struct kvm_ioapic {
59 spinlock_t lock; 59 spinlock_t lock;
60 DECLARE_BITMAP(handled_vectors, 256); 60 DECLARE_BITMAP(handled_vectors, 256);
61 struct rtc_status rtc_status; 61 struct rtc_status rtc_status;
62 struct delayed_work eoi_inject;
63 u32 irq_eoi[IOAPIC_NUM_PINS];
62}; 64};
63 65
64#ifdef DEBUG 66#ifdef DEBUG
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 95519bc959ed..384eaa7b02fa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,11 +52,13 @@
52 52
53#include <asm/processor.h> 53#include <asm/processor.h>
54#include <asm/io.h> 54#include <asm/io.h>
55#include <asm/ioctl.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
56#include <asm/pgtable.h> 57#include <asm/pgtable.h>
57 58
58#include "coalesced_mmio.h" 59#include "coalesced_mmio.h"
59#include "async_pf.h" 60#include "async_pf.h"
61#include "vfio.h"
60 62
61#define CREATE_TRACE_POINTS 63#define CREATE_TRACE_POINTS
62#include <trace/events/kvm.h> 64#include <trace/events/kvm.h>
@@ -95,8 +97,6 @@ static int hardware_enable_all(void);
95static void hardware_disable_all(void); 97static void hardware_disable_all(void);
96 98
97static void kvm_io_bus_destroy(struct kvm_io_bus *bus); 99static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
98static void update_memslots(struct kvm_memslots *slots,
99 struct kvm_memory_slot *new, u64 last_generation);
100 100
101static void kvm_release_pfn_dirty(pfn_t pfn); 101static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm, 102static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
129 struct pid *oldpid = vcpu->pid; 129 struct pid *oldpid = vcpu->pid;
130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID); 130 struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
131 rcu_assign_pointer(vcpu->pid, newpid); 131 rcu_assign_pointer(vcpu->pid, newpid);
132 synchronize_rcu(); 132 if (oldpid)
133 synchronize_rcu();
133 put_pid(oldpid); 134 put_pid(oldpid);
134 } 135 }
135 cpu = get_cpu(); 136 cpu = get_cpu();
@@ -152,7 +153,7 @@ static void ack_flush(void *_completed)
152{ 153{
153} 154}
154 155
155static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) 156bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
156{ 157{
157 int i, cpu, me; 158 int i, cpu, me;
158 cpumask_var_t cpus; 159 cpumask_var_t cpus;
@@ -189,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
189 long dirty_count = kvm->tlbs_dirty; 190 long dirty_count = kvm->tlbs_dirty;
190 191
191 smp_mb(); 192 smp_mb();
192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 193 if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
193 ++kvm->stat.remote_tlb_flush; 194 ++kvm->stat.remote_tlb_flush;
194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 195 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
195} 196}
@@ -197,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
197 198
198void kvm_reload_remote_mmus(struct kvm *kvm) 199void kvm_reload_remote_mmus(struct kvm *kvm)
199{ 200{
200 make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); 201 kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
201} 202}
202 203
203void kvm_make_mclock_inprogress_request(struct kvm *kvm) 204void kvm_make_mclock_inprogress_request(struct kvm *kvm)
204{ 205{
205 make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); 206 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
206} 207}
207 208
208void kvm_make_scan_ioapic_request(struct kvm *kvm) 209void kvm_make_scan_ioapic_request(struct kvm *kvm)
209{ 210{
210 make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); 211 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
211} 212}
212 213
213int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) 214int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -295,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
295 kvm_flush_remote_tlbs(kvm); 296 kvm_flush_remote_tlbs(kvm);
296 297
297 spin_unlock(&kvm->mmu_lock); 298 spin_unlock(&kvm->mmu_lock);
299
300 kvm_arch_mmu_notifier_invalidate_page(kvm, address);
301
298 srcu_read_unlock(&kvm->srcu, idx); 302 srcu_read_unlock(&kvm->srcu, idx);
299} 303}
300 304
@@ -368,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
368 372
369static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, 373static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
370 struct mm_struct *mm, 374 struct mm_struct *mm,
371 unsigned long address) 375 unsigned long start,
376 unsigned long end)
372{ 377{
373 struct kvm *kvm = mmu_notifier_to_kvm(mn); 378 struct kvm *kvm = mmu_notifier_to_kvm(mn);
374 int young, idx; 379 int young, idx;
@@ -376,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
376 idx = srcu_read_lock(&kvm->srcu); 381 idx = srcu_read_lock(&kvm->srcu);
377 spin_lock(&kvm->mmu_lock); 382 spin_lock(&kvm->mmu_lock);
378 383
379 young = kvm_age_hva(kvm, address); 384 young = kvm_age_hva(kvm, start, end);
380 if (young) 385 if (young)
381 kvm_flush_remote_tlbs(kvm); 386 kvm_flush_remote_tlbs(kvm);
382 387
@@ -476,6 +481,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
476 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); 481 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
477 if (!kvm->memslots) 482 if (!kvm->memslots)
478 goto out_err_no_srcu; 483 goto out_err_no_srcu;
484
485 /*
486 * Init kvm generation close to the maximum to easily test the
487 * code of handling generation number wrap-around.
488 */
489 kvm->memslots->generation = -150;
490
479 kvm_init_memslots_id(kvm); 491 kvm_init_memslots_id(kvm);
480 if (init_srcu_struct(&kvm->srcu)) 492 if (init_srcu_struct(&kvm->srcu))
481 goto out_err_no_srcu; 493 goto out_err_no_srcu;
@@ -687,8 +699,7 @@ static void sort_memslots(struct kvm_memslots *slots)
687} 699}
688 700
689static void update_memslots(struct kvm_memslots *slots, 701static void update_memslots(struct kvm_memslots *slots,
690 struct kvm_memory_slot *new, 702 struct kvm_memory_slot *new)
691 u64 last_generation)
692{ 703{
693 if (new) { 704 if (new) {
694 int id = new->id; 705 int id = new->id;
@@ -699,15 +710,13 @@ static void update_memslots(struct kvm_memslots *slots,
699 if (new->npages != npages) 710 if (new->npages != npages)
700 sort_memslots(slots); 711 sort_memslots(slots);
701 } 712 }
702
703 slots->generation = last_generation + 1;
704} 713}
705 714
706static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) 715static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
707{ 716{
708 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; 717 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
709 718
710#ifdef KVM_CAP_READONLY_MEM 719#ifdef __KVM_HAVE_READONLY_MEM
711 valid_flags |= KVM_MEM_READONLY; 720 valid_flags |= KVM_MEM_READONLY;
712#endif 721#endif
713 722
@@ -722,10 +731,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
722{ 731{
723 struct kvm_memslots *old_memslots = kvm->memslots; 732 struct kvm_memslots *old_memslots = kvm->memslots;
724 733
725 update_memslots(slots, new, kvm->memslots->generation); 734 /*
735 * Set the low bit in the generation, which disables SPTE caching
736 * until the end of synchronize_srcu_expedited.
737 */
738 WARN_ON(old_memslots->generation & 1);
739 slots->generation = old_memslots->generation + 1;
740
741 update_memslots(slots, new);
726 rcu_assign_pointer(kvm->memslots, slots); 742 rcu_assign_pointer(kvm->memslots, slots);
727 synchronize_srcu_expedited(&kvm->srcu); 743 synchronize_srcu_expedited(&kvm->srcu);
728 744
745 /*
746 * Increment the new memslot generation a second time. This prevents
747 * vm exits that race with memslot updates from caching a memslot
748 * generation that will (potentially) be valid forever.
749 */
750 slots->generation++;
751
729 kvm_arch_memslots_updated(kvm); 752 kvm_arch_memslots_updated(kvm);
730 753
731 return old_memslots; 754 return old_memslots;
@@ -776,7 +799,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
776 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 799 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
777 npages = mem->memory_size >> PAGE_SHIFT; 800 npages = mem->memory_size >> PAGE_SHIFT;
778 801
779 r = -EINVAL;
780 if (npages > KVM_MEM_MAX_NR_PAGES) 802 if (npages > KVM_MEM_MAX_NR_PAGES)
781 goto out; 803 goto out;
782 804
@@ -790,7 +812,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
790 new.npages = npages; 812 new.npages = npages;
791 new.flags = mem->flags; 813 new.flags = mem->flags;
792 814
793 r = -EINVAL;
794 if (npages) { 815 if (npages) {
795 if (!old.npages) 816 if (!old.npages)
796 change = KVM_MR_CREATE; 817 change = KVM_MR_CREATE;
@@ -846,7 +867,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
846 } 867 }
847 868
848 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { 869 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
849 r = -ENOMEM;
850 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), 870 slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
851 GFP_KERNEL); 871 GFP_KERNEL);
852 if (!slots) 872 if (!slots)
@@ -1075,9 +1095,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
1075 * If writable is set to false, the hva returned by this function is only 1095 * If writable is set to false, the hva returned by this function is only
1076 * allowed to be read. 1096 * allowed to be read.
1077 */ 1097 */
1078unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) 1098unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
1099 gfn_t gfn, bool *writable)
1079{ 1100{
1080 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1081 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); 1101 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
1082 1102
1083 if (!kvm_is_error_hva(hva) && writable) 1103 if (!kvm_is_error_hva(hva) && writable)
@@ -1086,6 +1106,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1086 return hva; 1106 return hva;
1087} 1107}
1088 1108
1109unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1110{
1111 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1112
1113 return gfn_to_hva_memslot_prot(slot, gfn, writable);
1114}
1115
1089static int kvm_read_hva(void *data, void __user *hva, int len) 1116static int kvm_read_hva(void *data, void __user *hva, int len)
1090{ 1117{
1091 return __copy_from_user(data, hva, len); 1118 return __copy_from_user(data, hva, len);
@@ -1107,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
1107 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); 1134 return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
1108} 1135}
1109 1136
1137int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
1138 unsigned long addr, bool write_fault,
1139 struct page **pagep)
1140{
1141 int npages;
1142 int locked = 1;
1143 int flags = FOLL_TOUCH | FOLL_HWPOISON |
1144 (pagep ? FOLL_GET : 0) |
1145 (write_fault ? FOLL_WRITE : 0);
1146
1147 /*
1148 * If retrying the fault, we get here *not* having allowed the filemap
1149 * to wait on the page lock. We should now allow waiting on the IO with
1150 * the mmap semaphore released.
1151 */
1152 down_read(&mm->mmap_sem);
1153 npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
1154 &locked);
1155 if (!locked) {
1156 VM_BUG_ON(npages);
1157
1158 if (!pagep)
1159 return 0;
1160
1161 /*
1162 * The previous call has now waited on the IO. Now we can
1163 * retry and complete. Pass TRIED to ensure we do not re
1164 * schedule async IO (see e.g. filemap_fault).
1165 */
1166 down_read(&mm->mmap_sem);
1167 npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
1168 pagep, NULL, NULL);
1169 }
1170 up_read(&mm->mmap_sem);
1171 return npages;
1172}
1173
1110static inline int check_user_page_hwpoison(unsigned long addr) 1174static inline int check_user_page_hwpoison(unsigned long addr)
1111{ 1175{
1112 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; 1176 int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1169,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
1169 npages = get_user_page_nowait(current, current->mm, 1233 npages = get_user_page_nowait(current, current->mm,
1170 addr, write_fault, page); 1234 addr, write_fault, page);
1171 up_read(&current->mm->mmap_sem); 1235 up_read(&current->mm->mmap_sem);
1172 } else 1236 } else {
1173 npages = get_user_pages_fast(addr, 1, write_fault, 1237 /*
1174 page); 1238 * By now we have tried gup_fast, and possibly async_pf, and we
1239 * are certainly not atomic. Time to retry the gup, allowing
1240 * mmap semaphore to be relinquished in the case of IO.
1241 */
1242 npages = kvm_get_user_page_io(current, current->mm, addr,
1243 write_fault, page);
1244 }
1175 if (npages != 1) 1245 if (npages != 1)
1176 return npages; 1246 return npages;
1177 1247
@@ -1768,8 +1838,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1768 bool eligible; 1838 bool eligible;
1769 1839
1770 eligible = !vcpu->spin_loop.in_spin_loop || 1840 eligible = !vcpu->spin_loop.in_spin_loop ||
1771 (vcpu->spin_loop.in_spin_loop && 1841 vcpu->spin_loop.dy_eligible;
1772 vcpu->spin_loop.dy_eligible);
1773 1842
1774 if (vcpu->spin_loop.in_spin_loop) 1843 if (vcpu->spin_loop.in_spin_loop)
1775 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); 1844 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
@@ -1975,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
1975 if (vcpu->kvm->mm != current->mm) 2044 if (vcpu->kvm->mm != current->mm)
1976 return -EIO; 2045 return -EIO;
1977 2046
2047 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2048 return -EINVAL;
2049
1978#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) 2050#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
1979 /* 2051 /*
1980 * Special cases: vcpu ioctls that are asynchronous to vcpu execution, 2052 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -2259,6 +2331,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
2259 return filp->private_data; 2331 return filp->private_data;
2260} 2332}
2261 2333
2334static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
2335#ifdef CONFIG_KVM_MPIC
2336 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
2337 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
2338#endif
2339
2340#ifdef CONFIG_KVM_XICS
2341 [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
2342#endif
2343};
2344
2345int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2346{
2347 if (type >= ARRAY_SIZE(kvm_device_ops_table))
2348 return -ENOSPC;
2349
2350 if (kvm_device_ops_table[type] != NULL)
2351 return -EEXIST;
2352
2353 kvm_device_ops_table[type] = ops;
2354 return 0;
2355}
2356
2262static int kvm_ioctl_create_device(struct kvm *kvm, 2357static int kvm_ioctl_create_device(struct kvm *kvm,
2263 struct kvm_create_device *cd) 2358 struct kvm_create_device *cd)
2264{ 2359{
@@ -2267,36 +2362,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2267 bool test = cd->flags & KVM_CREATE_DEVICE_TEST; 2362 bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
2268 int ret; 2363 int ret;
2269 2364
2270 switch (cd->type) { 2365 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
2271#ifdef CONFIG_KVM_MPIC 2366 return -ENODEV;
2272 case KVM_DEV_TYPE_FSL_MPIC_20: 2367
2273 case KVM_DEV_TYPE_FSL_MPIC_42: 2368 ops = kvm_device_ops_table[cd->type];
2274 ops = &kvm_mpic_ops; 2369 if (ops == NULL)
2275 break;
2276#endif
2277#ifdef CONFIG_KVM_XICS
2278 case KVM_DEV_TYPE_XICS:
2279 ops = &kvm_xics_ops;
2280 break;
2281#endif
2282#ifdef CONFIG_KVM_VFIO
2283 case KVM_DEV_TYPE_VFIO:
2284 ops = &kvm_vfio_ops;
2285 break;
2286#endif
2287#ifdef CONFIG_KVM_ARM_VGIC
2288 case KVM_DEV_TYPE_ARM_VGIC_V2:
2289 ops = &kvm_arm_vgic_v2_ops;
2290 break;
2291#endif
2292#ifdef CONFIG_S390
2293 case KVM_DEV_TYPE_FLIC:
2294 ops = &kvm_flic_ops;
2295 break;
2296#endif
2297 default:
2298 return -ENODEV; 2370 return -ENODEV;
2299 }
2300 2371
2301 if (test) 2372 if (test)
2302 return 0; 2373 return 0;
@@ -2611,7 +2682,6 @@ static long kvm_dev_ioctl(struct file *filp,
2611 2682
2612 switch (ioctl) { 2683 switch (ioctl) {
2613 case KVM_GET_API_VERSION: 2684 case KVM_GET_API_VERSION:
2614 r = -EINVAL;
2615 if (arg) 2685 if (arg)
2616 goto out; 2686 goto out;
2617 r = KVM_API_VERSION; 2687 r = KVM_API_VERSION;
@@ -2623,7 +2693,6 @@ static long kvm_dev_ioctl(struct file *filp,
2623 r = kvm_vm_ioctl_check_extension_generic(NULL, arg); 2693 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
2624 break; 2694 break;
2625 case KVM_GET_VCPU_MMAP_SIZE: 2695 case KVM_GET_VCPU_MMAP_SIZE:
2626 r = -EINVAL;
2627 if (arg) 2696 if (arg)
2628 goto out; 2697 goto out;
2629 r = PAGE_SIZE; /* struct kvm_run */ 2698 r = PAGE_SIZE; /* struct kvm_run */
@@ -2668,7 +2737,7 @@ static void hardware_enable_nolock(void *junk)
2668 2737
2669 cpumask_set_cpu(cpu, cpus_hardware_enabled); 2738 cpumask_set_cpu(cpu, cpus_hardware_enabled);
2670 2739
2671 r = kvm_arch_hardware_enable(NULL); 2740 r = kvm_arch_hardware_enable();
2672 2741
2673 if (r) { 2742 if (r) {
2674 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2743 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2693,7 +2762,7 @@ static void hardware_disable_nolock(void *junk)
2693 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) 2762 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
2694 return; 2763 return;
2695 cpumask_clear_cpu(cpu, cpus_hardware_enabled); 2764 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
2696 kvm_arch_hardware_disable(NULL); 2765 kvm_arch_hardware_disable();
2697} 2766}
2698 2767
2699static void hardware_disable(void) 2768static void hardware_disable(void)
@@ -3123,6 +3192,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
3123 if (vcpu->preempted) 3192 if (vcpu->preempted)
3124 vcpu->preempted = false; 3193 vcpu->preempted = false;
3125 3194
3195 kvm_arch_sched_in(vcpu, cpu);
3196
3126 kvm_arch_vcpu_load(vcpu, cpu); 3197 kvm_arch_vcpu_load(vcpu, cpu);
3127} 3198}
3128 3199
@@ -3214,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
3214 goto out_undebugfs; 3285 goto out_undebugfs;
3215 } 3286 }
3216 3287
3288 r = kvm_vfio_ops_init();
3289 WARN_ON(r);
3290
3217 return 0; 3291 return 0;
3218 3292
3219out_undebugfs: 3293out_undebugfs:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..281e7cf2b8e5 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <linux/vfio.h> 20#include <linux/vfio.h>
21#include "vfio.h"
21 22
22struct kvm_vfio_group { 23struct kvm_vfio_group {
23 struct list_head node; 24 struct list_head node;
@@ -246,6 +247,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
246 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ 247 kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
247} 248}
248 249
250static int kvm_vfio_create(struct kvm_device *dev, u32 type);
251
252static struct kvm_device_ops kvm_vfio_ops = {
253 .name = "kvm-vfio",
254 .create = kvm_vfio_create,
255 .destroy = kvm_vfio_destroy,
256 .set_attr = kvm_vfio_set_attr,
257 .has_attr = kvm_vfio_has_attr,
258};
259
249static int kvm_vfio_create(struct kvm_device *dev, u32 type) 260static int kvm_vfio_create(struct kvm_device *dev, u32 type)
250{ 261{
251 struct kvm_device *tmp; 262 struct kvm_device *tmp;
@@ -268,10 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
268 return 0; 279 return 0;
269} 280}
270 281
271struct kvm_device_ops kvm_vfio_ops = { 282int kvm_vfio_ops_init(void)
272 .name = "kvm-vfio", 283{
273 .create = kvm_vfio_create, 284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
274 .destroy = kvm_vfio_destroy, 285}
275 .set_attr = kvm_vfio_set_attr,
276 .has_attr = kvm_vfio_has_attr,
277};
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..92eac75d6b62
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,13 @@
1#ifndef __KVM_VFIO_H
2#define __KVM_VFIO_H
3
4#ifdef CONFIG_KVM_VFIO
5int kvm_vfio_ops_init(void);
6#else
7static inline int kvm_vfio_ops_init(void)
8{
9 return 0;
10}
11#endif
12
13#endif