diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/vgic.c | 744 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 4 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 4 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 46 | ||||
-rw-r--r-- | virt/kvm/ioapic.h | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 192 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 22 | ||||
-rw-r--r-- | virt/kvm/vfio.h | 13 |
8 files changed, 779 insertions, 248 deletions
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 73eba793b17f..862967852d5a 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c | |||
@@ -36,21 +36,22 @@ | |||
36 | * How the whole thing works (courtesy of Christoffer Dall): | 36 | * How the whole thing works (courtesy of Christoffer Dall): |
37 | * | 37 | * |
38 | * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if | 38 | * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if |
39 | * something is pending | 39 | * something is pending on the CPU interface. |
40 | * - VGIC pending interrupts are stored on the vgic.irq_state vgic | 40 | * - Interrupts that are pending on the distributor are stored on the |
41 | * bitmap (this bitmap is updated by both user land ioctls and guest | 41 | * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land |
42 | * mmio ops, and other in-kernel peripherals such as the | 42 | * ioctls and guest mmio ops, and other in-kernel peripherals such as the |
43 | * arch. timers) and indicate the 'wire' state. | 43 | * arch. timers). |
44 | * - Every time the bitmap changes, the irq_pending_on_cpu oracle is | 44 | * - Every time the bitmap changes, the irq_pending_on_cpu oracle is |
45 | * recalculated | 45 | * recalculated |
46 | * - To calculate the oracle, we need info for each cpu from | 46 | * - To calculate the oracle, we need info for each cpu from |
47 | * compute_pending_for_cpu, which considers: | 47 | * compute_pending_for_cpu, which considers: |
48 | * - PPI: dist->irq_state & dist->irq_enable | 48 | * - PPI: dist->irq_pending & dist->irq_enable |
49 | * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target | 49 | * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target |
50 | * - irq_spi_target is a 'formatted' version of the GICD_ICFGR | 50 | * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn |
51 | * registers, stored on each vcpu. We only keep one bit of | 51 | * registers, stored on each vcpu. We only keep one bit of |
52 | * information per interrupt, making sure that only one vcpu can | 52 | * information per interrupt, making sure that only one vcpu can |
53 | * accept the interrupt. | 53 | * accept the interrupt. |
54 | * - If any of the above state changes, we must recalculate the oracle. | ||
54 | * - The same is true when injecting an interrupt, except that we only | 55 | * - The same is true when injecting an interrupt, except that we only |
55 | * consider a single interrupt at a time. The irq_spi_cpu array | 56 | * consider a single interrupt at a time. The irq_spi_cpu array |
56 | * contains the target CPU for each SPI. | 57 | * contains the target CPU for each SPI. |
@@ -60,13 +61,18 @@ | |||
60 | * the 'line' again. This is achieved as such: | 61 | * the 'line' again. This is achieved as such: |
61 | * | 62 | * |
62 | * - When a level interrupt is moved onto a vcpu, the corresponding | 63 | * - When a level interrupt is moved onto a vcpu, the corresponding |
63 | * bit in irq_active is set. As long as this bit is set, the line | 64 | * bit in irq_queued is set. As long as this bit is set, the line |
64 | * will be ignored for further interrupts. The interrupt is injected | 65 | * will be ignored for further interrupts. The interrupt is injected |
65 | * into the vcpu with the GICH_LR_EOI bit set (generate a | 66 | * into the vcpu with the GICH_LR_EOI bit set (generate a |
66 | * maintenance interrupt on EOI). | 67 | * maintenance interrupt on EOI). |
67 | * - When the interrupt is EOIed, the maintenance interrupt fires, | 68 | * - When the interrupt is EOIed, the maintenance interrupt fires, |
68 | * and clears the corresponding bit in irq_active. This allow the | 69 | * and clears the corresponding bit in irq_queued. This allows the |
69 | * interrupt line to be sampled again. | 70 | * interrupt line to be sampled again. |
71 | * - Note that level-triggered interrupts can also be set to pending from | ||
72 | * writes to GICD_ISPENDRn and lowering the external input line does not | ||
73 | * cause the interrupt to become inactive in such a situation. | ||
74 | * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become | ||
75 | * inactive as long as the external input line is held high. | ||
70 | */ | 76 | */ |
71 | 77 | ||
72 | #define VGIC_ADDR_UNDEF (-1) | 78 | #define VGIC_ADDR_UNDEF (-1) |
@@ -89,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); | |||
89 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); | 95 | static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); |
90 | static void vgic_update_state(struct kvm *kvm); | 96 | static void vgic_update_state(struct kvm *kvm); |
91 | static void vgic_kick_vcpus(struct kvm *kvm); | 97 | static void vgic_kick_vcpus(struct kvm *kvm); |
98 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi); | ||
92 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); | 99 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); |
93 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); | 100 | static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); |
94 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); | 101 | static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); |
@@ -99,10 +106,8 @@ static const struct vgic_ops *vgic_ops; | |||
99 | static const struct vgic_params *vgic; | 106 | static const struct vgic_params *vgic; |
100 | 107 | ||
101 | /* | 108 | /* |
102 | * struct vgic_bitmap contains unions that provide two views of | 109 | * struct vgic_bitmap contains a bitmap made of unsigned longs, but |
103 | * the same data. In one case it is an array of registers of | 110 | * extracts u32s out of them. |
104 | * u32's, and in the other case it is a bitmap of unsigned | ||
105 | * longs. | ||
106 | * | 111 | * |
107 | * This does not work on 64-bit BE systems, because the bitmap access | 112 | * This does not work on 64-bit BE systems, because the bitmap access |
108 | * will store two consecutive 32-bit words with the higher-addressed | 113 | * will store two consecutive 32-bit words with the higher-addressed |
@@ -118,23 +123,45 @@ static const struct vgic_params *vgic; | |||
118 | #define REG_OFFSET_SWIZZLE 0 | 123 | #define REG_OFFSET_SWIZZLE 0 |
119 | #endif | 124 | #endif |
120 | 125 | ||
126 | static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) | ||
127 | { | ||
128 | int nr_longs; | ||
129 | |||
130 | nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); | ||
131 | |||
132 | b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); | ||
133 | if (!b->private) | ||
134 | return -ENOMEM; | ||
135 | |||
136 | b->shared = b->private + nr_cpus; | ||
137 | |||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | static void vgic_free_bitmap(struct vgic_bitmap *b) | ||
142 | { | ||
143 | kfree(b->private); | ||
144 | b->private = NULL; | ||
145 | b->shared = NULL; | ||
146 | } | ||
147 | |||
121 | static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, | 148 | static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, |
122 | int cpuid, u32 offset) | 149 | int cpuid, u32 offset) |
123 | { | 150 | { |
124 | offset >>= 2; | 151 | offset >>= 2; |
125 | if (!offset) | 152 | if (!offset) |
126 | return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE); | 153 | return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; |
127 | else | 154 | else |
128 | return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE); | 155 | return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); |
129 | } | 156 | } |
130 | 157 | ||
131 | static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, | 158 | static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, |
132 | int cpuid, int irq) | 159 | int cpuid, int irq) |
133 | { | 160 | { |
134 | if (irq < VGIC_NR_PRIVATE_IRQS) | 161 | if (irq < VGIC_NR_PRIVATE_IRQS) |
135 | return test_bit(irq, x->percpu[cpuid].reg_ul); | 162 | return test_bit(irq, x->private + cpuid); |
136 | 163 | ||
137 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); | 164 | return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); |
138 | } | 165 | } |
139 | 166 | ||
140 | static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | 167 | static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, |
@@ -143,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | |||
143 | unsigned long *reg; | 170 | unsigned long *reg; |
144 | 171 | ||
145 | if (irq < VGIC_NR_PRIVATE_IRQS) { | 172 | if (irq < VGIC_NR_PRIVATE_IRQS) { |
146 | reg = x->percpu[cpuid].reg_ul; | 173 | reg = x->private + cpuid; |
147 | } else { | 174 | } else { |
148 | reg = x->shared.reg_ul; | 175 | reg = x->shared; |
149 | irq -= VGIC_NR_PRIVATE_IRQS; | 176 | irq -= VGIC_NR_PRIVATE_IRQS; |
150 | } | 177 | } |
151 | 178 | ||
@@ -157,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, | |||
157 | 184 | ||
158 | static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) | 185 | static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) |
159 | { | 186 | { |
160 | if (unlikely(cpuid >= VGIC_MAX_CPUS)) | 187 | return x->private + cpuid; |
161 | return NULL; | ||
162 | return x->percpu[cpuid].reg_ul; | ||
163 | } | 188 | } |
164 | 189 | ||
165 | static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) | 190 | static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) |
166 | { | 191 | { |
167 | return x->shared.reg_ul; | 192 | return x->shared; |
193 | } | ||
194 | |||
195 | static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) | ||
196 | { | ||
197 | int size; | ||
198 | |||
199 | size = nr_cpus * VGIC_NR_PRIVATE_IRQS; | ||
200 | size += nr_irqs - VGIC_NR_PRIVATE_IRQS; | ||
201 | |||
202 | x->private = kzalloc(size, GFP_KERNEL); | ||
203 | if (!x->private) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | static void vgic_free_bytemap(struct vgic_bytemap *b) | ||
211 | { | ||
212 | kfree(b->private); | ||
213 | b->private = NULL; | ||
214 | b->shared = NULL; | ||
168 | } | 215 | } |
169 | 216 | ||
170 | static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) | 217 | static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) |
171 | { | 218 | { |
172 | offset >>= 2; | 219 | u32 *reg; |
173 | BUG_ON(offset > (VGIC_NR_IRQS / 4)); | 220 | |
174 | if (offset < 8) | 221 | if (offset < VGIC_NR_PRIVATE_IRQS) { |
175 | return x->percpu[cpuid] + offset; | 222 | reg = x->private; |
176 | else | 223 | offset += cpuid * VGIC_NR_PRIVATE_IRQS; |
177 | return x->shared + offset - 8; | 224 | } else { |
225 | reg = x->shared; | ||
226 | offset -= VGIC_NR_PRIVATE_IRQS; | ||
227 | } | ||
228 | |||
229 | return reg + (offset / sizeof(u32)); | ||
178 | } | 230 | } |
179 | 231 | ||
180 | #define VGIC_CFG_LEVEL 0 | 232 | #define VGIC_CFG_LEVEL 0 |
@@ -196,46 +248,81 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) | |||
196 | return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); | 248 | return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); |
197 | } | 249 | } |
198 | 250 | ||
199 | static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) | 251 | static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) |
252 | { | ||
253 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
254 | |||
255 | return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); | ||
256 | } | ||
257 | |||
258 | static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) | ||
259 | { | ||
260 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
261 | |||
262 | vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); | ||
263 | } | ||
264 | |||
265 | static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) | ||
266 | { | ||
267 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
268 | |||
269 | vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); | ||
270 | } | ||
271 | |||
272 | static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) | ||
200 | { | 273 | { |
201 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 274 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
202 | 275 | ||
203 | return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); | 276 | return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); |
204 | } | 277 | } |
205 | 278 | ||
206 | static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) | 279 | static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) |
207 | { | 280 | { |
208 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 281 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
209 | 282 | ||
210 | vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); | 283 | vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); |
211 | } | 284 | } |
212 | 285 | ||
213 | static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) | 286 | static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) |
214 | { | 287 | { |
215 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 288 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
216 | 289 | ||
217 | vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); | 290 | vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); |
291 | } | ||
292 | |||
293 | static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) | ||
294 | { | ||
295 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
296 | |||
297 | return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); | ||
298 | } | ||
299 | |||
300 | static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) | ||
301 | { | ||
302 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
303 | |||
304 | vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); | ||
218 | } | 305 | } |
219 | 306 | ||
220 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) | 307 | static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) |
221 | { | 308 | { |
222 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 309 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
223 | 310 | ||
224 | return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); | 311 | return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); |
225 | } | 312 | } |
226 | 313 | ||
227 | static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) | 314 | static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) |
228 | { | 315 | { |
229 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 316 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
230 | 317 | ||
231 | vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); | 318 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); |
232 | } | 319 | } |
233 | 320 | ||
234 | static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) | 321 | static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) |
235 | { | 322 | { |
236 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 323 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
237 | 324 | ||
238 | vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); | 325 | vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); |
239 | } | 326 | } |
240 | 327 | ||
241 | static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) | 328 | static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) |
@@ -256,6 +343,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) | |||
256 | vcpu->arch.vgic_cpu.pending_shared); | 343 | vcpu->arch.vgic_cpu.pending_shared); |
257 | } | 344 | } |
258 | 345 | ||
346 | static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) | ||
347 | { | ||
348 | return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq); | ||
349 | } | ||
350 | |||
259 | static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) | 351 | static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) |
260 | { | 352 | { |
261 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; | 353 | return le32_to_cpu(*((u32 *)mmio->data)) & mask; |
@@ -347,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, | |||
347 | 439 | ||
348 | case 4: /* GICD_TYPER */ | 440 | case 4: /* GICD_TYPER */ |
349 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; | 441 | reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; |
350 | reg |= (VGIC_NR_IRQS >> 5) - 1; | 442 | reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; |
351 | vgic_reg_access(mmio, ®, word_offset, | 443 | vgic_reg_access(mmio, ®, word_offset, |
352 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); | 444 | ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); |
353 | break; | 445 | break; |
@@ -409,11 +501,33 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, | |||
409 | struct kvm_exit_mmio *mmio, | 501 | struct kvm_exit_mmio *mmio, |
410 | phys_addr_t offset) | 502 | phys_addr_t offset) |
411 | { | 503 | { |
412 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, | 504 | u32 *reg, orig; |
413 | vcpu->vcpu_id, offset); | 505 | u32 level_mask; |
506 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
507 | |||
508 | reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset); | ||
509 | level_mask = (~(*reg)); | ||
510 | |||
511 | /* Mark both level and edge triggered irqs as pending */ | ||
512 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | ||
513 | orig = *reg; | ||
414 | vgic_reg_access(mmio, reg, offset, | 514 | vgic_reg_access(mmio, reg, offset, |
415 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | 515 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); |
516 | |||
416 | if (mmio->is_write) { | 517 | if (mmio->is_write) { |
518 | /* Set the soft-pending flag only for level-triggered irqs */ | ||
519 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | ||
520 | vcpu->vcpu_id, offset); | ||
521 | vgic_reg_access(mmio, reg, offset, | ||
522 | ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); | ||
523 | *reg &= level_mask; | ||
524 | |||
525 | /* Ignore writes to SGIs */ | ||
526 | if (offset < 2) { | ||
527 | *reg &= ~0xffff; | ||
528 | *reg |= orig & 0xffff; | ||
529 | } | ||
530 | |||
417 | vgic_update_state(vcpu->kvm); | 531 | vgic_update_state(vcpu->kvm); |
418 | return true; | 532 | return true; |
419 | } | 533 | } |
@@ -425,11 +539,34 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, | |||
425 | struct kvm_exit_mmio *mmio, | 539 | struct kvm_exit_mmio *mmio, |
426 | phys_addr_t offset) | 540 | phys_addr_t offset) |
427 | { | 541 | { |
428 | u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, | 542 | u32 *level_active; |
429 | vcpu->vcpu_id, offset); | 543 | u32 *reg, orig; |
544 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
545 | |||
546 | reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset); | ||
547 | orig = *reg; | ||
430 | vgic_reg_access(mmio, reg, offset, | 548 | vgic_reg_access(mmio, reg, offset, |
431 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | 549 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); |
432 | if (mmio->is_write) { | 550 | if (mmio->is_write) { |
551 | /* Re-set level triggered level-active interrupts */ | ||
552 | level_active = vgic_bitmap_get_reg(&dist->irq_level, | ||
553 | vcpu->vcpu_id, offset); | ||
554 | reg = vgic_bitmap_get_reg(&dist->irq_pending, | ||
555 | vcpu->vcpu_id, offset); | ||
556 | *reg |= *level_active; | ||
557 | |||
558 | /* Ignore writes to SGIs */ | ||
559 | if (offset < 2) { | ||
560 | *reg &= ~0xffff; | ||
561 | *reg |= orig & 0xffff; | ||
562 | } | ||
563 | |||
564 | /* Clear soft-pending flags */ | ||
565 | reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, | ||
566 | vcpu->vcpu_id, offset); | ||
567 | vgic_reg_access(mmio, reg, offset, | ||
568 | ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); | ||
569 | |||
433 | vgic_update_state(vcpu->kvm); | 570 | vgic_update_state(vcpu->kvm); |
434 | return true; | 571 | return true; |
435 | } | 572 | } |
@@ -651,9 +788,9 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
651 | * is fine, then we are only setting a few bits that were | 788 | * is fine, then we are only setting a few bits that were |
652 | * already set. | 789 | * already set. |
653 | */ | 790 | */ |
654 | vgic_dist_irq_set(vcpu, lr.irq); | 791 | vgic_dist_irq_set_pending(vcpu, lr.irq); |
655 | if (lr.irq < VGIC_NR_SGIS) | 792 | if (lr.irq < VGIC_NR_SGIS) |
656 | dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source; | 793 | *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source; |
657 | lr.state &= ~LR_STATE_PENDING; | 794 | lr.state &= ~LR_STATE_PENDING; |
658 | vgic_set_lr(vcpu, i, lr); | 795 | vgic_set_lr(vcpu, i, lr); |
659 | 796 | ||
@@ -662,8 +799,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) | |||
662 | * active), then the LR does not hold any useful info and can | 799 | * active), then the LR does not hold any useful info and can |
663 | * be marked as free for other use. | 800 | * be marked as free for other use. |
664 | */ | 801 | */ |
665 | if (!(lr.state & LR_STATE_MASK)) | 802 | if (!(lr.state & LR_STATE_MASK)) { |
666 | vgic_retire_lr(i, lr.irq, vcpu); | 803 | vgic_retire_lr(i, lr.irq, vcpu); |
804 | vgic_irq_clear_queued(vcpu, lr.irq); | ||
805 | } | ||
667 | 806 | ||
668 | /* Finally update the VGIC state. */ | 807 | /* Finally update the VGIC state. */ |
669 | vgic_update_state(vcpu->kvm); | 808 | vgic_update_state(vcpu->kvm); |
@@ -677,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
677 | { | 816 | { |
678 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 817 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
679 | int sgi; | 818 | int sgi; |
680 | int min_sgi = (offset & ~0x3) * 4; | 819 | int min_sgi = (offset & ~0x3); |
681 | int max_sgi = min_sgi + 3; | 820 | int max_sgi = min_sgi + 3; |
682 | int vcpu_id = vcpu->vcpu_id; | 821 | int vcpu_id = vcpu->vcpu_id; |
683 | u32 reg = 0; | 822 | u32 reg = 0; |
@@ -685,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
685 | /* Copy source SGIs from distributor side */ | 824 | /* Copy source SGIs from distributor side */ |
686 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | 825 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { |
687 | int shift = 8 * (sgi - min_sgi); | 826 | int shift = 8 * (sgi - min_sgi); |
688 | reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; | 827 | reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift; |
689 | } | 828 | } |
690 | 829 | ||
691 | mmio_data_write(mmio, ~0, reg); | 830 | mmio_data_write(mmio, ~0, reg); |
@@ -698,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
698 | { | 837 | { |
699 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 838 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
700 | int sgi; | 839 | int sgi; |
701 | int min_sgi = (offset & ~0x3) * 4; | 840 | int min_sgi = (offset & ~0x3); |
702 | int max_sgi = min_sgi + 3; | 841 | int max_sgi = min_sgi + 3; |
703 | int vcpu_id = vcpu->vcpu_id; | 842 | int vcpu_id = vcpu->vcpu_id; |
704 | u32 reg; | 843 | u32 reg; |
@@ -709,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, | |||
709 | /* Clear pending SGIs on the distributor */ | 848 | /* Clear pending SGIs on the distributor */ |
710 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { | 849 | for (sgi = min_sgi; sgi <= max_sgi; sgi++) { |
711 | u8 mask = reg >> (8 * (sgi - min_sgi)); | 850 | u8 mask = reg >> (8 * (sgi - min_sgi)); |
851 | u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); | ||
712 | if (set) { | 852 | if (set) { |
713 | if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) | 853 | if ((*src & mask) != mask) |
714 | updated = true; | 854 | updated = true; |
715 | dist->irq_sgi_sources[vcpu_id][sgi] |= mask; | 855 | *src |= mask; |
716 | } else { | 856 | } else { |
717 | if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) | 857 | if (*src & mask) |
718 | updated = true; | 858 | updated = true; |
719 | dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; | 859 | *src &= ~mask; |
720 | } | 860 | } |
721 | } | 861 | } |
722 | 862 | ||
@@ -755,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, | |||
755 | struct mmio_range { | 895 | struct mmio_range { |
756 | phys_addr_t base; | 896 | phys_addr_t base; |
757 | unsigned long len; | 897 | unsigned long len; |
898 | int bits_per_irq; | ||
758 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, | 899 | bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, |
759 | phys_addr_t offset); | 900 | phys_addr_t offset); |
760 | }; | 901 | }; |
@@ -763,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = { | |||
763 | { | 904 | { |
764 | .base = GIC_DIST_CTRL, | 905 | .base = GIC_DIST_CTRL, |
765 | .len = 12, | 906 | .len = 12, |
907 | .bits_per_irq = 0, | ||
766 | .handle_mmio = handle_mmio_misc, | 908 | .handle_mmio = handle_mmio_misc, |
767 | }, | 909 | }, |
768 | { | 910 | { |
769 | .base = GIC_DIST_IGROUP, | 911 | .base = GIC_DIST_IGROUP, |
770 | .len = VGIC_NR_IRQS / 8, | 912 | .len = VGIC_MAX_IRQS / 8, |
913 | .bits_per_irq = 1, | ||
771 | .handle_mmio = handle_mmio_raz_wi, | 914 | .handle_mmio = handle_mmio_raz_wi, |
772 | }, | 915 | }, |
773 | { | 916 | { |
774 | .base = GIC_DIST_ENABLE_SET, | 917 | .base = GIC_DIST_ENABLE_SET, |
775 | .len = VGIC_NR_IRQS / 8, | 918 | .len = VGIC_MAX_IRQS / 8, |
919 | .bits_per_irq = 1, | ||
776 | .handle_mmio = handle_mmio_set_enable_reg, | 920 | .handle_mmio = handle_mmio_set_enable_reg, |
777 | }, | 921 | }, |
778 | { | 922 | { |
779 | .base = GIC_DIST_ENABLE_CLEAR, | 923 | .base = GIC_DIST_ENABLE_CLEAR, |
780 | .len = VGIC_NR_IRQS / 8, | 924 | .len = VGIC_MAX_IRQS / 8, |
925 | .bits_per_irq = 1, | ||
781 | .handle_mmio = handle_mmio_clear_enable_reg, | 926 | .handle_mmio = handle_mmio_clear_enable_reg, |
782 | }, | 927 | }, |
783 | { | 928 | { |
784 | .base = GIC_DIST_PENDING_SET, | 929 | .base = GIC_DIST_PENDING_SET, |
785 | .len = VGIC_NR_IRQS / 8, | 930 | .len = VGIC_MAX_IRQS / 8, |
931 | .bits_per_irq = 1, | ||
786 | .handle_mmio = handle_mmio_set_pending_reg, | 932 | .handle_mmio = handle_mmio_set_pending_reg, |
787 | }, | 933 | }, |
788 | { | 934 | { |
789 | .base = GIC_DIST_PENDING_CLEAR, | 935 | .base = GIC_DIST_PENDING_CLEAR, |
790 | .len = VGIC_NR_IRQS / 8, | 936 | .len = VGIC_MAX_IRQS / 8, |
937 | .bits_per_irq = 1, | ||
791 | .handle_mmio = handle_mmio_clear_pending_reg, | 938 | .handle_mmio = handle_mmio_clear_pending_reg, |
792 | }, | 939 | }, |
793 | { | 940 | { |
794 | .base = GIC_DIST_ACTIVE_SET, | 941 | .base = GIC_DIST_ACTIVE_SET, |
795 | .len = VGIC_NR_IRQS / 8, | 942 | .len = VGIC_MAX_IRQS / 8, |
943 | .bits_per_irq = 1, | ||
796 | .handle_mmio = handle_mmio_raz_wi, | 944 | .handle_mmio = handle_mmio_raz_wi, |
797 | }, | 945 | }, |
798 | { | 946 | { |
799 | .base = GIC_DIST_ACTIVE_CLEAR, | 947 | .base = GIC_DIST_ACTIVE_CLEAR, |
800 | .len = VGIC_NR_IRQS / 8, | 948 | .len = VGIC_MAX_IRQS / 8, |
949 | .bits_per_irq = 1, | ||
801 | .handle_mmio = handle_mmio_raz_wi, | 950 | .handle_mmio = handle_mmio_raz_wi, |
802 | }, | 951 | }, |
803 | { | 952 | { |
804 | .base = GIC_DIST_PRI, | 953 | .base = GIC_DIST_PRI, |
805 | .len = VGIC_NR_IRQS, | 954 | .len = VGIC_MAX_IRQS, |
955 | .bits_per_irq = 8, | ||
806 | .handle_mmio = handle_mmio_priority_reg, | 956 | .handle_mmio = handle_mmio_priority_reg, |
807 | }, | 957 | }, |
808 | { | 958 | { |
809 | .base = GIC_DIST_TARGET, | 959 | .base = GIC_DIST_TARGET, |
810 | .len = VGIC_NR_IRQS, | 960 | .len = VGIC_MAX_IRQS, |
961 | .bits_per_irq = 8, | ||
811 | .handle_mmio = handle_mmio_target_reg, | 962 | .handle_mmio = handle_mmio_target_reg, |
812 | }, | 963 | }, |
813 | { | 964 | { |
814 | .base = GIC_DIST_CONFIG, | 965 | .base = GIC_DIST_CONFIG, |
815 | .len = VGIC_NR_IRQS / 4, | 966 | .len = VGIC_MAX_IRQS / 4, |
967 | .bits_per_irq = 2, | ||
816 | .handle_mmio = handle_mmio_cfg_reg, | 968 | .handle_mmio = handle_mmio_cfg_reg, |
817 | }, | 969 | }, |
818 | { | 970 | { |
@@ -850,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges, | |||
850 | return NULL; | 1002 | return NULL; |
851 | } | 1003 | } |
852 | 1004 | ||
1005 | static bool vgic_validate_access(const struct vgic_dist *dist, | ||
1006 | const struct mmio_range *range, | ||
1007 | unsigned long offset) | ||
1008 | { | ||
1009 | int irq; | ||
1010 | |||
1011 | if (!range->bits_per_irq) | ||
1012 | return true; /* Not an irq-based access */ | ||
1013 | |||
1014 | irq = offset * 8 / range->bits_per_irq; | ||
1015 | if (irq >= dist->nr_irqs) | ||
1016 | return false; | ||
1017 | |||
1018 | return true; | ||
1019 | } | ||
1020 | |||
853 | /** | 1021 | /** |
854 | * vgic_handle_mmio - handle an in-kernel MMIO access | 1022 | * vgic_handle_mmio - handle an in-kernel MMIO access |
855 | * @vcpu: pointer to the vcpu performing the access | 1023 | * @vcpu: pointer to the vcpu performing the access |
@@ -889,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
889 | 1057 | ||
890 | spin_lock(&vcpu->kvm->arch.vgic.lock); | 1058 | spin_lock(&vcpu->kvm->arch.vgic.lock); |
891 | offset = mmio->phys_addr - range->base - base; | 1059 | offset = mmio->phys_addr - range->base - base; |
892 | updated_state = range->handle_mmio(vcpu, mmio, offset); | 1060 | if (vgic_validate_access(dist, range, offset)) { |
1061 | updated_state = range->handle_mmio(vcpu, mmio, offset); | ||
1062 | } else { | ||
1063 | vgic_reg_access(mmio, NULL, offset, | ||
1064 | ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); | ||
1065 | updated_state = false; | ||
1066 | } | ||
893 | spin_unlock(&vcpu->kvm->arch.vgic.lock); | 1067 | spin_unlock(&vcpu->kvm->arch.vgic.lock); |
894 | kvm_prepare_mmio(run, mmio); | 1068 | kvm_prepare_mmio(run, mmio); |
895 | kvm_handle_mmio_return(vcpu, run); | 1069 | kvm_handle_mmio_return(vcpu, run); |
@@ -900,6 +1074,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, | |||
900 | return true; | 1074 | return true; |
901 | } | 1075 | } |
902 | 1076 | ||
1077 | static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) | ||
1078 | { | ||
1079 | return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; | ||
1080 | } | ||
1081 | |||
903 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | 1082 | static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) |
904 | { | 1083 | { |
905 | struct kvm *kvm = vcpu->kvm; | 1084 | struct kvm *kvm = vcpu->kvm; |
@@ -932,8 +1111,8 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | |||
932 | kvm_for_each_vcpu(c, vcpu, kvm) { | 1111 | kvm_for_each_vcpu(c, vcpu, kvm) { |
933 | if (target_cpus & 1) { | 1112 | if (target_cpus & 1) { |
934 | /* Flag the SGI as pending */ | 1113 | /* Flag the SGI as pending */ |
935 | vgic_dist_irq_set(vcpu, sgi); | 1114 | vgic_dist_irq_set_pending(vcpu, sgi); |
936 | dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; | 1115 | *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; |
937 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); | 1116 | kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); |
938 | } | 1117 | } |
939 | 1118 | ||
@@ -941,32 +1120,38 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) | |||
941 | } | 1120 | } |
942 | } | 1121 | } |
943 | 1122 | ||
1123 | static int vgic_nr_shared_irqs(struct vgic_dist *dist) | ||
1124 | { | ||
1125 | return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; | ||
1126 | } | ||
1127 | |||
944 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) | 1128 | static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) |
945 | { | 1129 | { |
946 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1130 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
947 | unsigned long *pending, *enabled, *pend_percpu, *pend_shared; | 1131 | unsigned long *pending, *enabled, *pend_percpu, *pend_shared; |
948 | unsigned long pending_private, pending_shared; | 1132 | unsigned long pending_private, pending_shared; |
1133 | int nr_shared = vgic_nr_shared_irqs(dist); | ||
949 | int vcpu_id; | 1134 | int vcpu_id; |
950 | 1135 | ||
951 | vcpu_id = vcpu->vcpu_id; | 1136 | vcpu_id = vcpu->vcpu_id; |
952 | pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; | 1137 | pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; |
953 | pend_shared = vcpu->arch.vgic_cpu.pending_shared; | 1138 | pend_shared = vcpu->arch.vgic_cpu.pending_shared; |
954 | 1139 | ||
955 | pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); | 1140 | pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); |
956 | enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); | 1141 | enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); |
957 | bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); | 1142 | bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); |
958 | 1143 | ||
959 | pending = vgic_bitmap_get_shared_map(&dist->irq_state); | 1144 | pending = vgic_bitmap_get_shared_map(&dist->irq_pending); |
960 | enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); | 1145 | enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); |
961 | bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); | 1146 | bitmap_and(pend_shared, pending, enabled, nr_shared); |
962 | bitmap_and(pend_shared, pend_shared, | 1147 | bitmap_and(pend_shared, pend_shared, |
963 | vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), | 1148 | vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), |
964 | VGIC_NR_SHARED_IRQS); | 1149 | nr_shared); |
965 | 1150 | ||
966 | pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); | 1151 | pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); |
967 | pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); | 1152 | pending_shared = find_first_bit(pend_shared, nr_shared); |
968 | return (pending_private < VGIC_NR_PRIVATE_IRQS || | 1153 | return (pending_private < VGIC_NR_PRIVATE_IRQS || |
969 | pending_shared < VGIC_NR_SHARED_IRQS); | 1154 | pending_shared < vgic_nr_shared_irqs(dist)); |
970 | } | 1155 | } |
971 | 1156 | ||
972 | /* | 1157 | /* |
@@ -980,14 +1165,14 @@ static void vgic_update_state(struct kvm *kvm) | |||
980 | int c; | 1165 | int c; |
981 | 1166 | ||
982 | if (!dist->enabled) { | 1167 | if (!dist->enabled) { |
983 | set_bit(0, &dist->irq_pending_on_cpu); | 1168 | set_bit(0, dist->irq_pending_on_cpu); |
984 | return; | 1169 | return; |
985 | } | 1170 | } |
986 | 1171 | ||
987 | kvm_for_each_vcpu(c, vcpu, kvm) { | 1172 | kvm_for_each_vcpu(c, vcpu, kvm) { |
988 | if (compute_pending_for_cpu(vcpu)) { | 1173 | if (compute_pending_for_cpu(vcpu)) { |
989 | pr_debug("CPU%d has pending interrupts\n", c); | 1174 | pr_debug("CPU%d has pending interrupts\n", c); |
990 | set_bit(c, &dist->irq_pending_on_cpu); | 1175 | set_bit(c, dist->irq_pending_on_cpu); |
991 | } | 1176 | } |
992 | } | 1177 | } |
993 | } | 1178 | } |
@@ -1079,8 +1264,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | |||
1079 | 1264 | ||
1080 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { | 1265 | if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { |
1081 | vgic_retire_lr(lr, vlr.irq, vcpu); | 1266 | vgic_retire_lr(lr, vlr.irq, vcpu); |
1082 | if (vgic_irq_is_active(vcpu, vlr.irq)) | 1267 | if (vgic_irq_is_queued(vcpu, vlr.irq)) |
1083 | vgic_irq_clear_active(vcpu, vlr.irq); | 1268 | vgic_irq_clear_queued(vcpu, vlr.irq); |
1084 | } | 1269 | } |
1085 | } | 1270 | } |
1086 | } | 1271 | } |
@@ -1092,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) | |||
1092 | static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) | 1277 | static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) |
1093 | { | 1278 | { |
1094 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1279 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
1280 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | ||
1095 | struct vgic_lr vlr; | 1281 | struct vgic_lr vlr; |
1096 | int lr; | 1282 | int lr; |
1097 | 1283 | ||
1098 | /* Sanitize the input... */ | 1284 | /* Sanitize the input... */ |
1099 | BUG_ON(sgi_source_id & ~7); | 1285 | BUG_ON(sgi_source_id & ~7); |
1100 | BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); | 1286 | BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); |
1101 | BUG_ON(irq >= VGIC_NR_IRQS); | 1287 | BUG_ON(irq >= dist->nr_irqs); |
1102 | 1288 | ||
1103 | kvm_debug("Queue IRQ%d\n", irq); | 1289 | kvm_debug("Queue IRQ%d\n", irq); |
1104 | 1290 | ||
@@ -1144,14 +1330,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1144 | int vcpu_id = vcpu->vcpu_id; | 1330 | int vcpu_id = vcpu->vcpu_id; |
1145 | int c; | 1331 | int c; |
1146 | 1332 | ||
1147 | sources = dist->irq_sgi_sources[vcpu_id][irq]; | 1333 | sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); |
1148 | 1334 | ||
1149 | for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { | 1335 | for_each_set_bit(c, &sources, dist->nr_cpus) { |
1150 | if (vgic_queue_irq(vcpu, c, irq)) | 1336 | if (vgic_queue_irq(vcpu, c, irq)) |
1151 | clear_bit(c, &sources); | 1337 | clear_bit(c, &sources); |
1152 | } | 1338 | } |
1153 | 1339 | ||
1154 | dist->irq_sgi_sources[vcpu_id][irq] = sources; | 1340 | *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; |
1155 | 1341 | ||
1156 | /* | 1342 | /* |
1157 | * If the sources bitmap has been cleared it means that we | 1343 | * If the sources bitmap has been cleared it means that we |
@@ -1160,7 +1346,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1160 | * our emulated gic and can get rid of them. | 1346 | * our emulated gic and can get rid of them. |
1161 | */ | 1347 | */ |
1162 | if (!sources) { | 1348 | if (!sources) { |
1163 | vgic_dist_irq_clear(vcpu, irq); | 1349 | vgic_dist_irq_clear_pending(vcpu, irq); |
1164 | vgic_cpu_irq_clear(vcpu, irq); | 1350 | vgic_cpu_irq_clear(vcpu, irq); |
1165 | return true; | 1351 | return true; |
1166 | } | 1352 | } |
@@ -1170,15 +1356,15 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) | |||
1170 | 1356 | ||
1171 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) | 1357 | static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) |
1172 | { | 1358 | { |
1173 | if (vgic_irq_is_active(vcpu, irq)) | 1359 | if (!vgic_can_sample_irq(vcpu, irq)) |
1174 | return true; /* level interrupt, already queued */ | 1360 | return true; /* level interrupt, already queued */ |
1175 | 1361 | ||
1176 | if (vgic_queue_irq(vcpu, 0, irq)) { | 1362 | if (vgic_queue_irq(vcpu, 0, irq)) { |
1177 | if (vgic_irq_is_edge(vcpu, irq)) { | 1363 | if (vgic_irq_is_edge(vcpu, irq)) { |
1178 | vgic_dist_irq_clear(vcpu, irq); | 1364 | vgic_dist_irq_clear_pending(vcpu, irq); |
1179 | vgic_cpu_irq_clear(vcpu, irq); | 1365 | vgic_cpu_irq_clear(vcpu, irq); |
1180 | } else { | 1366 | } else { |
1181 | vgic_irq_set_active(vcpu, irq); | 1367 | vgic_irq_set_queued(vcpu, irq); |
1182 | } | 1368 | } |
1183 | 1369 | ||
1184 | return true; | 1370 | return true; |
@@ -1223,7 +1409,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | |||
1223 | } | 1409 | } |
1224 | 1410 | ||
1225 | /* SPIs */ | 1411 | /* SPIs */ |
1226 | for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { | 1412 | for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { |
1227 | if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) | 1413 | if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) |
1228 | overflow = 1; | 1414 | overflow = 1; |
1229 | } | 1415 | } |
@@ -1239,7 +1425,7 @@ epilog: | |||
1239 | * us. Claim we don't have anything pending. We'll | 1425 | * us. Claim we don't have anything pending. We'll |
1240 | * adjust that if needed while exiting. | 1426 | * adjust that if needed while exiting. |
1241 | */ | 1427 | */ |
1242 | clear_bit(vcpu_id, &dist->irq_pending_on_cpu); | 1428 | clear_bit(vcpu_id, dist->irq_pending_on_cpu); |
1243 | } | 1429 | } |
1244 | } | 1430 | } |
1245 | 1431 | ||
@@ -1261,17 +1447,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) | |||
1261 | 1447 | ||
1262 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { | 1448 | for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { |
1263 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); | 1449 | struct vgic_lr vlr = vgic_get_lr(vcpu, lr); |
1450 | WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); | ||
1264 | 1451 | ||
1265 | vgic_irq_clear_active(vcpu, vlr.irq); | 1452 | vgic_irq_clear_queued(vcpu, vlr.irq); |
1266 | WARN_ON(vlr.state & LR_STATE_MASK); | 1453 | WARN_ON(vlr.state & LR_STATE_MASK); |
1267 | vlr.state = 0; | 1454 | vlr.state = 0; |
1268 | vgic_set_lr(vcpu, lr, vlr); | 1455 | vgic_set_lr(vcpu, lr, vlr); |
1269 | 1456 | ||
1457 | /* | ||
1458 | * If the IRQ was EOIed it was also ACKed and we we | ||
1459 | * therefore assume we can clear the soft pending | ||
1460 | * state (should it had been set) for this interrupt. | ||
1461 | * | ||
1462 | * Note: if the IRQ soft pending state was set after | ||
1463 | * the IRQ was acked, it actually shouldn't be | ||
1464 | * cleared, but we have no way of knowing that unless | ||
1465 | * we start trapping ACKs when the soft-pending state | ||
1466 | * is set. | ||
1467 | */ | ||
1468 | vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); | ||
1469 | |||
1270 | /* Any additional pending interrupt? */ | 1470 | /* Any additional pending interrupt? */ |
1271 | if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) { | 1471 | if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { |
1272 | vgic_cpu_irq_set(vcpu, vlr.irq); | 1472 | vgic_cpu_irq_set(vcpu, vlr.irq); |
1273 | level_pending = true; | 1473 | level_pending = true; |
1274 | } else { | 1474 | } else { |
1475 | vgic_dist_irq_clear_pending(vcpu, vlr.irq); | ||
1275 | vgic_cpu_irq_clear(vcpu, vlr.irq); | 1476 | vgic_cpu_irq_clear(vcpu, vlr.irq); |
1276 | } | 1477 | } |
1277 | 1478 | ||
@@ -1315,14 +1516,14 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) | |||
1315 | 1516 | ||
1316 | vlr = vgic_get_lr(vcpu, lr); | 1517 | vlr = vgic_get_lr(vcpu, lr); |
1317 | 1518 | ||
1318 | BUG_ON(vlr.irq >= VGIC_NR_IRQS); | 1519 | BUG_ON(vlr.irq >= dist->nr_irqs); |
1319 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; | 1520 | vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; |
1320 | } | 1521 | } |
1321 | 1522 | ||
1322 | /* Check if we still have something up our sleeve... */ | 1523 | /* Check if we still have something up our sleeve... */ |
1323 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); | 1524 | pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); |
1324 | if (level_pending || pending < vgic->nr_lr) | 1525 | if (level_pending || pending < vgic->nr_lr) |
1325 | set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); | 1526 | set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
1326 | } | 1527 | } |
1327 | 1528 | ||
1328 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) | 1529 | void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) |
@@ -1356,7 +1557,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) | |||
1356 | if (!irqchip_in_kernel(vcpu->kvm)) | 1557 | if (!irqchip_in_kernel(vcpu->kvm)) |
1357 | return 0; | 1558 | return 0; |
1358 | 1559 | ||
1359 | return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); | 1560 | return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); |
1360 | } | 1561 | } |
1361 | 1562 | ||
1362 | static void vgic_kick_vcpus(struct kvm *kvm) | 1563 | static void vgic_kick_vcpus(struct kvm *kvm) |
@@ -1376,34 +1577,36 @@ static void vgic_kick_vcpus(struct kvm *kvm) | |||
1376 | 1577 | ||
1377 | static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) | 1578 | static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) |
1378 | { | 1579 | { |
1379 | int is_edge = vgic_irq_is_edge(vcpu, irq); | 1580 | int edge_triggered = vgic_irq_is_edge(vcpu, irq); |
1380 | int state = vgic_dist_irq_is_pending(vcpu, irq); | ||
1381 | 1581 | ||
1382 | /* | 1582 | /* |
1383 | * Only inject an interrupt if: | 1583 | * Only inject an interrupt if: |
1384 | * - edge triggered and we have a rising edge | 1584 | * - edge triggered and we have a rising edge |
1385 | * - level triggered and we change level | 1585 | * - level triggered and we change level |
1386 | */ | 1586 | */ |
1387 | if (is_edge) | 1587 | if (edge_triggered) { |
1588 | int state = vgic_dist_irq_is_pending(vcpu, irq); | ||
1388 | return level > state; | 1589 | return level > state; |
1389 | else | 1590 | } else { |
1591 | int state = vgic_dist_irq_get_level(vcpu, irq); | ||
1390 | return level != state; | 1592 | return level != state; |
1593 | } | ||
1391 | } | 1594 | } |
1392 | 1595 | ||
1393 | static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | 1596 | static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid, |
1394 | unsigned int irq_num, bool level) | 1597 | unsigned int irq_num, bool level) |
1395 | { | 1598 | { |
1396 | struct vgic_dist *dist = &kvm->arch.vgic; | 1599 | struct vgic_dist *dist = &kvm->arch.vgic; |
1397 | struct kvm_vcpu *vcpu; | 1600 | struct kvm_vcpu *vcpu; |
1398 | int is_edge, is_level; | 1601 | int edge_triggered, level_triggered; |
1399 | int enabled; | 1602 | int enabled; |
1400 | bool ret = true; | 1603 | bool ret = true; |
1401 | 1604 | ||
1402 | spin_lock(&dist->lock); | 1605 | spin_lock(&dist->lock); |
1403 | 1606 | ||
1404 | vcpu = kvm_get_vcpu(kvm, cpuid); | 1607 | vcpu = kvm_get_vcpu(kvm, cpuid); |
1405 | is_edge = vgic_irq_is_edge(vcpu, irq_num); | 1608 | edge_triggered = vgic_irq_is_edge(vcpu, irq_num); |
1406 | is_level = !is_edge; | 1609 | level_triggered = !edge_triggered; |
1407 | 1610 | ||
1408 | if (!vgic_validate_injection(vcpu, irq_num, level)) { | 1611 | if (!vgic_validate_injection(vcpu, irq_num, level)) { |
1409 | ret = false; | 1612 | ret = false; |
@@ -1417,10 +1620,19 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1417 | 1620 | ||
1418 | kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); | 1621 | kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); |
1419 | 1622 | ||
1420 | if (level) | 1623 | if (level) { |
1421 | vgic_dist_irq_set(vcpu, irq_num); | 1624 | if (level_triggered) |
1422 | else | 1625 | vgic_dist_irq_set_level(vcpu, irq_num); |
1423 | vgic_dist_irq_clear(vcpu, irq_num); | 1626 | vgic_dist_irq_set_pending(vcpu, irq_num); |
1627 | } else { | ||
1628 | if (level_triggered) { | ||
1629 | vgic_dist_irq_clear_level(vcpu, irq_num); | ||
1630 | if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) | ||
1631 | vgic_dist_irq_clear_pending(vcpu, irq_num); | ||
1632 | } else { | ||
1633 | vgic_dist_irq_clear_pending(vcpu, irq_num); | ||
1634 | } | ||
1635 | } | ||
1424 | 1636 | ||
1425 | enabled = vgic_irq_is_enabled(vcpu, irq_num); | 1637 | enabled = vgic_irq_is_enabled(vcpu, irq_num); |
1426 | 1638 | ||
@@ -1429,7 +1641,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1429 | goto out; | 1641 | goto out; |
1430 | } | 1642 | } |
1431 | 1643 | ||
1432 | if (is_level && vgic_irq_is_active(vcpu, irq_num)) { | 1644 | if (!vgic_can_sample_irq(vcpu, irq_num)) { |
1433 | /* | 1645 | /* |
1434 | * Level interrupt in progress, will be picked up | 1646 | * Level interrupt in progress, will be picked up |
1435 | * when EOId. | 1647 | * when EOId. |
@@ -1440,7 +1652,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, | |||
1440 | 1652 | ||
1441 | if (level) { | 1653 | if (level) { |
1442 | vgic_cpu_irq_set(vcpu, irq_num); | 1654 | vgic_cpu_irq_set(vcpu, irq_num); |
1443 | set_bit(cpuid, &dist->irq_pending_on_cpu); | 1655 | set_bit(cpuid, dist->irq_pending_on_cpu); |
1444 | } | 1656 | } |
1445 | 1657 | ||
1446 | out: | 1658 | out: |
@@ -1466,7 +1678,8 @@ out: | |||
1466 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, | 1678 | int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, |
1467 | bool level) | 1679 | bool level) |
1468 | { | 1680 | { |
1469 | if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) | 1681 | if (likely(vgic_initialized(kvm)) && |
1682 | vgic_update_irq_pending(kvm, cpuid, irq_num, level)) | ||
1470 | vgic_kick_vcpus(kvm); | 1683 | vgic_kick_vcpus(kvm); |
1471 | 1684 | ||
1472 | return 0; | 1685 | return 0; |
@@ -1483,6 +1696,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) | |||
1483 | return IRQ_HANDLED; | 1696 | return IRQ_HANDLED; |
1484 | } | 1697 | } |
1485 | 1698 | ||
1699 | void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) | ||
1700 | { | ||
1701 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1702 | |||
1703 | kfree(vgic_cpu->pending_shared); | ||
1704 | kfree(vgic_cpu->vgic_irq_lr_map); | ||
1705 | vgic_cpu->pending_shared = NULL; | ||
1706 | vgic_cpu->vgic_irq_lr_map = NULL; | ||
1707 | } | ||
1708 | |||
1709 | static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) | ||
1710 | { | ||
1711 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | ||
1712 | |||
1713 | int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; | ||
1714 | vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); | ||
1715 | vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL); | ||
1716 | |||
1717 | if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { | ||
1718 | kvm_vgic_vcpu_destroy(vcpu); | ||
1719 | return -ENOMEM; | ||
1720 | } | ||
1721 | |||
1722 | return 0; | ||
1723 | } | ||
1724 | |||
1486 | /** | 1725 | /** |
1487 | * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state | 1726 | * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state |
1488 | * @vcpu: pointer to the vcpu struct | 1727 | * @vcpu: pointer to the vcpu struct |
@@ -1490,16 +1729,13 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) | |||
1490 | * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to | 1729 | * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to |
1491 | * this vcpu and enable the VGIC for this VCPU | 1730 | * this vcpu and enable the VGIC for this VCPU |
1492 | */ | 1731 | */ |
1493 | int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) | 1732 | static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) |
1494 | { | 1733 | { |
1495 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; | 1734 | struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; |
1496 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; | 1735 | struct vgic_dist *dist = &vcpu->kvm->arch.vgic; |
1497 | int i; | 1736 | int i; |
1498 | 1737 | ||
1499 | if (vcpu->vcpu_id >= VGIC_MAX_CPUS) | 1738 | for (i = 0; i < dist->nr_irqs; i++) { |
1500 | return -EBUSY; | ||
1501 | |||
1502 | for (i = 0; i < VGIC_NR_IRQS; i++) { | ||
1503 | if (i < VGIC_NR_PPIS) | 1739 | if (i < VGIC_NR_PPIS) |
1504 | vgic_bitmap_set_irq_val(&dist->irq_enabled, | 1740 | vgic_bitmap_set_irq_val(&dist->irq_enabled, |
1505 | vcpu->vcpu_id, i, 1); | 1741 | vcpu->vcpu_id, i, 1); |
@@ -1518,84 +1754,112 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) | |||
1518 | vgic_cpu->nr_lr = vgic->nr_lr; | 1754 | vgic_cpu->nr_lr = vgic->nr_lr; |
1519 | 1755 | ||
1520 | vgic_enable(vcpu); | 1756 | vgic_enable(vcpu); |
1521 | |||
1522 | return 0; | ||
1523 | } | 1757 | } |
1524 | 1758 | ||
1525 | static void vgic_init_maintenance_interrupt(void *info) | 1759 | void kvm_vgic_destroy(struct kvm *kvm) |
1526 | { | 1760 | { |
1527 | enable_percpu_irq(vgic->maint_irq, 0); | 1761 | struct vgic_dist *dist = &kvm->arch.vgic; |
1762 | struct kvm_vcpu *vcpu; | ||
1763 | int i; | ||
1764 | |||
1765 | kvm_for_each_vcpu(i, vcpu, kvm) | ||
1766 | kvm_vgic_vcpu_destroy(vcpu); | ||
1767 | |||
1768 | vgic_free_bitmap(&dist->irq_enabled); | ||
1769 | vgic_free_bitmap(&dist->irq_level); | ||
1770 | vgic_free_bitmap(&dist->irq_pending); | ||
1771 | vgic_free_bitmap(&dist->irq_soft_pend); | ||
1772 | vgic_free_bitmap(&dist->irq_queued); | ||
1773 | vgic_free_bitmap(&dist->irq_cfg); | ||
1774 | vgic_free_bytemap(&dist->irq_priority); | ||
1775 | if (dist->irq_spi_target) { | ||
1776 | for (i = 0; i < dist->nr_cpus; i++) | ||
1777 | vgic_free_bitmap(&dist->irq_spi_target[i]); | ||
1778 | } | ||
1779 | kfree(dist->irq_sgi_sources); | ||
1780 | kfree(dist->irq_spi_cpu); | ||
1781 | kfree(dist->irq_spi_target); | ||
1782 | kfree(dist->irq_pending_on_cpu); | ||
1783 | dist->irq_sgi_sources = NULL; | ||
1784 | dist->irq_spi_cpu = NULL; | ||
1785 | dist->irq_spi_target = NULL; | ||
1786 | dist->irq_pending_on_cpu = NULL; | ||
1528 | } | 1787 | } |
1529 | 1788 | ||
1530 | static int vgic_cpu_notify(struct notifier_block *self, | 1789 | /* |
1531 | unsigned long action, void *cpu) | 1790 | * Allocate and initialize the various data structures. Must be called |
1791 | * with kvm->lock held! | ||
1792 | */ | ||
1793 | static int vgic_init_maps(struct kvm *kvm) | ||
1532 | { | 1794 | { |
1533 | switch (action) { | 1795 | struct vgic_dist *dist = &kvm->arch.vgic; |
1534 | case CPU_STARTING: | 1796 | struct kvm_vcpu *vcpu; |
1535 | case CPU_STARTING_FROZEN: | 1797 | int nr_cpus, nr_irqs; |
1536 | vgic_init_maintenance_interrupt(NULL); | 1798 | int ret, i; |
1537 | break; | ||
1538 | case CPU_DYING: | ||
1539 | case CPU_DYING_FROZEN: | ||
1540 | disable_percpu_irq(vgic->maint_irq); | ||
1541 | break; | ||
1542 | } | ||
1543 | 1799 | ||
1544 | return NOTIFY_OK; | 1800 | if (dist->nr_cpus) /* Already allocated */ |
1545 | } | 1801 | return 0; |
1546 | 1802 | ||
1547 | static struct notifier_block vgic_cpu_nb = { | 1803 | nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); |
1548 | .notifier_call = vgic_cpu_notify, | 1804 | if (!nr_cpus) /* No vcpus? Can't be good... */ |
1549 | }; | 1805 | return -EINVAL; |
1550 | 1806 | ||
1551 | static const struct of_device_id vgic_ids[] = { | 1807 | /* |
1552 | { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, | 1808 | * If nobody configured the number of interrupts, use the |
1553 | { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, | 1809 | * legacy one. |
1554 | {}, | 1810 | */ |
1555 | }; | 1811 | if (!dist->nr_irqs) |
1812 | dist->nr_irqs = VGIC_NR_IRQS_LEGACY; | ||
1556 | 1813 | ||
1557 | int kvm_vgic_hyp_init(void) | 1814 | nr_irqs = dist->nr_irqs; |
1558 | { | ||
1559 | const struct of_device_id *matched_id; | ||
1560 | int (*vgic_probe)(struct device_node *,const struct vgic_ops **, | ||
1561 | const struct vgic_params **); | ||
1562 | struct device_node *vgic_node; | ||
1563 | int ret; | ||
1564 | 1815 | ||
1565 | vgic_node = of_find_matching_node_and_match(NULL, | 1816 | ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); |
1566 | vgic_ids, &matched_id); | 1817 | ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); |
1567 | if (!vgic_node) { | 1818 | ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); |
1568 | kvm_err("error: no compatible GIC node found\n"); | 1819 | ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); |
1569 | return -ENODEV; | 1820 | ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); |
1570 | } | 1821 | ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); |
1822 | ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); | ||
1571 | 1823 | ||
1572 | vgic_probe = matched_id->data; | ||
1573 | ret = vgic_probe(vgic_node, &vgic_ops, &vgic); | ||
1574 | if (ret) | 1824 | if (ret) |
1575 | return ret; | 1825 | goto out; |
1576 | 1826 | ||
1577 | ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, | 1827 | dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); |
1578 | "vgic", kvm_get_running_vcpus()); | 1828 | dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); |
1579 | if (ret) { | 1829 | dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, |
1580 | kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); | 1830 | GFP_KERNEL); |
1581 | return ret; | 1831 | dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), |
1832 | GFP_KERNEL); | ||
1833 | if (!dist->irq_sgi_sources || | ||
1834 | !dist->irq_spi_cpu || | ||
1835 | !dist->irq_spi_target || | ||
1836 | !dist->irq_pending_on_cpu) { | ||
1837 | ret = -ENOMEM; | ||
1838 | goto out; | ||
1582 | } | 1839 | } |
1583 | 1840 | ||
1584 | ret = __register_cpu_notifier(&vgic_cpu_nb); | 1841 | for (i = 0; i < nr_cpus; i++) |
1585 | if (ret) { | 1842 | ret |= vgic_init_bitmap(&dist->irq_spi_target[i], |
1586 | kvm_err("Cannot register vgic CPU notifier\n"); | 1843 | nr_cpus, nr_irqs); |
1587 | goto out_free_irq; | ||
1588 | } | ||
1589 | 1844 | ||
1590 | /* Callback into for arch code for setup */ | 1845 | if (ret) |
1591 | vgic_arch_setup(vgic); | 1846 | goto out; |
1592 | 1847 | ||
1593 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | 1848 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1849 | ret = vgic_vcpu_init_maps(vcpu, nr_irqs); | ||
1850 | if (ret) { | ||
1851 | kvm_err("VGIC: Failed to allocate vcpu memory\n"); | ||
1852 | break; | ||
1853 | } | ||
1854 | } | ||
1594 | 1855 | ||
1595 | return 0; | 1856 | for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4) |
1857 | vgic_set_target_reg(kvm, 0, i); | ||
1858 | |||
1859 | out: | ||
1860 | if (ret) | ||
1861 | kvm_vgic_destroy(kvm); | ||
1596 | 1862 | ||
1597 | out_free_irq: | ||
1598 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); | ||
1599 | return ret; | 1863 | return ret; |
1600 | } | 1864 | } |
1601 | 1865 | ||
@@ -1610,6 +1874,7 @@ out_free_irq: | |||
1610 | */ | 1874 | */ |
1611 | int kvm_vgic_init(struct kvm *kvm) | 1875 | int kvm_vgic_init(struct kvm *kvm) |
1612 | { | 1876 | { |
1877 | struct kvm_vcpu *vcpu; | ||
1613 | int ret = 0, i; | 1878 | int ret = 0, i; |
1614 | 1879 | ||
1615 | if (!irqchip_in_kernel(kvm)) | 1880 | if (!irqchip_in_kernel(kvm)) |
@@ -1627,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm) | |||
1627 | goto out; | 1892 | goto out; |
1628 | } | 1893 | } |
1629 | 1894 | ||
1895 | ret = vgic_init_maps(kvm); | ||
1896 | if (ret) { | ||
1897 | kvm_err("Unable to allocate maps\n"); | ||
1898 | goto out; | ||
1899 | } | ||
1900 | |||
1630 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, | 1901 | ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, |
1631 | vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); | 1902 | vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); |
1632 | if (ret) { | 1903 | if (ret) { |
@@ -1634,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm) | |||
1634 | goto out; | 1905 | goto out; |
1635 | } | 1906 | } |
1636 | 1907 | ||
1637 | for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) | 1908 | kvm_for_each_vcpu(i, vcpu, kvm) |
1638 | vgic_set_target_reg(kvm, 0, i); | 1909 | kvm_vgic_vcpu_init(vcpu); |
1639 | 1910 | ||
1640 | kvm->arch.vgic.ready = true; | 1911 | kvm->arch.vgic.ready = true; |
1641 | out: | 1912 | out: |
1913 | if (ret) | ||
1914 | kvm_vgic_destroy(kvm); | ||
1642 | mutex_unlock(&kvm->lock); | 1915 | mutex_unlock(&kvm->lock); |
1643 | return ret; | 1916 | return ret; |
1644 | } | 1917 | } |
@@ -1690,7 +1963,7 @@ out: | |||
1690 | return ret; | 1963 | return ret; |
1691 | } | 1964 | } |
1692 | 1965 | ||
1693 | static bool vgic_ioaddr_overlap(struct kvm *kvm) | 1966 | static int vgic_ioaddr_overlap(struct kvm *kvm) |
1694 | { | 1967 | { |
1695 | phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; | 1968 | phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; |
1696 | phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; | 1969 | phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; |
@@ -1879,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev, | |||
1879 | 2152 | ||
1880 | mutex_lock(&dev->kvm->lock); | 2153 | mutex_lock(&dev->kvm->lock); |
1881 | 2154 | ||
2155 | ret = vgic_init_maps(dev->kvm); | ||
2156 | if (ret) | ||
2157 | goto out; | ||
2158 | |||
1882 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { | 2159 | if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { |
1883 | ret = -EINVAL; | 2160 | ret = -EINVAL; |
1884 | goto out; | 2161 | goto out; |
@@ -1976,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
1976 | 2253 | ||
1977 | return vgic_attr_regs_access(dev, attr, ®, true); | 2254 | return vgic_attr_regs_access(dev, attr, ®, true); |
1978 | } | 2255 | } |
2256 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
2257 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2258 | u32 val; | ||
2259 | int ret = 0; | ||
2260 | |||
2261 | if (get_user(val, uaddr)) | ||
2262 | return -EFAULT; | ||
2263 | |||
2264 | /* | ||
2265 | * We require: | ||
2266 | * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs | ||
2267 | * - at most 1024 interrupts | ||
2268 | * - a multiple of 32 interrupts | ||
2269 | */ | ||
2270 | if (val < (VGIC_NR_PRIVATE_IRQS + 32) || | ||
2271 | val > VGIC_MAX_IRQS || | ||
2272 | (val & 31)) | ||
2273 | return -EINVAL; | ||
2274 | |||
2275 | mutex_lock(&dev->kvm->lock); | ||
2276 | |||
2277 | if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) | ||
2278 | ret = -EBUSY; | ||
2279 | else | ||
2280 | dev->kvm->arch.vgic.nr_irqs = val; | ||
2281 | |||
2282 | mutex_unlock(&dev->kvm->lock); | ||
2283 | |||
2284 | return ret; | ||
2285 | } | ||
1979 | 2286 | ||
1980 | } | 2287 | } |
1981 | 2288 | ||
@@ -2012,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2012 | r = put_user(reg, uaddr); | 2319 | r = put_user(reg, uaddr); |
2013 | break; | 2320 | break; |
2014 | } | 2321 | } |
2322 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { | ||
2323 | u32 __user *uaddr = (u32 __user *)(long)attr->addr; | ||
2324 | r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); | ||
2325 | break; | ||
2326 | } | ||
2015 | 2327 | ||
2016 | } | 2328 | } |
2017 | 2329 | ||
@@ -2048,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) | |||
2048 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: | 2360 | case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: |
2049 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; | 2361 | offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; |
2050 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); | 2362 | return vgic_has_attr_regs(vgic_cpu_ranges, offset); |
2363 | case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: | ||
2364 | return 0; | ||
2051 | } | 2365 | } |
2052 | return -ENXIO; | 2366 | return -ENXIO; |
2053 | } | 2367 | } |
@@ -2062,7 +2376,7 @@ static int vgic_create(struct kvm_device *dev, u32 type) | |||
2062 | return kvm_vgic_create(dev->kvm); | 2376 | return kvm_vgic_create(dev->kvm); |
2063 | } | 2377 | } |
2064 | 2378 | ||
2065 | struct kvm_device_ops kvm_arm_vgic_v2_ops = { | 2379 | static struct kvm_device_ops kvm_arm_vgic_v2_ops = { |
2066 | .name = "kvm-arm-vgic", | 2380 | .name = "kvm-arm-vgic", |
2067 | .create = vgic_create, | 2381 | .create = vgic_create, |
2068 | .destroy = vgic_destroy, | 2382 | .destroy = vgic_destroy, |
@@ -2070,3 +2384,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { | |||
2070 | .get_attr = vgic_get_attr, | 2384 | .get_attr = vgic_get_attr, |
2071 | .has_attr = vgic_has_attr, | 2385 | .has_attr = vgic_has_attr, |
2072 | }; | 2386 | }; |
2387 | |||
2388 | static void vgic_init_maintenance_interrupt(void *info) | ||
2389 | { | ||
2390 | enable_percpu_irq(vgic->maint_irq, 0); | ||
2391 | } | ||
2392 | |||
2393 | static int vgic_cpu_notify(struct notifier_block *self, | ||
2394 | unsigned long action, void *cpu) | ||
2395 | { | ||
2396 | switch (action) { | ||
2397 | case CPU_STARTING: | ||
2398 | case CPU_STARTING_FROZEN: | ||
2399 | vgic_init_maintenance_interrupt(NULL); | ||
2400 | break; | ||
2401 | case CPU_DYING: | ||
2402 | case CPU_DYING_FROZEN: | ||
2403 | disable_percpu_irq(vgic->maint_irq); | ||
2404 | break; | ||
2405 | } | ||
2406 | |||
2407 | return NOTIFY_OK; | ||
2408 | } | ||
2409 | |||
2410 | static struct notifier_block vgic_cpu_nb = { | ||
2411 | .notifier_call = vgic_cpu_notify, | ||
2412 | }; | ||
2413 | |||
2414 | static const struct of_device_id vgic_ids[] = { | ||
2415 | { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, | ||
2416 | { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, | ||
2417 | {}, | ||
2418 | }; | ||
2419 | |||
2420 | int kvm_vgic_hyp_init(void) | ||
2421 | { | ||
2422 | const struct of_device_id *matched_id; | ||
2423 | const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, | ||
2424 | const struct vgic_params **); | ||
2425 | struct device_node *vgic_node; | ||
2426 | int ret; | ||
2427 | |||
2428 | vgic_node = of_find_matching_node_and_match(NULL, | ||
2429 | vgic_ids, &matched_id); | ||
2430 | if (!vgic_node) { | ||
2431 | kvm_err("error: no compatible GIC node found\n"); | ||
2432 | return -ENODEV; | ||
2433 | } | ||
2434 | |||
2435 | vgic_probe = matched_id->data; | ||
2436 | ret = vgic_probe(vgic_node, &vgic_ops, &vgic); | ||
2437 | if (ret) | ||
2438 | return ret; | ||
2439 | |||
2440 | ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, | ||
2441 | "vgic", kvm_get_running_vcpus()); | ||
2442 | if (ret) { | ||
2443 | kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); | ||
2444 | return ret; | ||
2445 | } | ||
2446 | |||
2447 | ret = __register_cpu_notifier(&vgic_cpu_nb); | ||
2448 | if (ret) { | ||
2449 | kvm_err("Cannot register vgic CPU notifier\n"); | ||
2450 | goto out_free_irq; | ||
2451 | } | ||
2452 | |||
2453 | /* Callback into for arch code for setup */ | ||
2454 | vgic_arch_setup(vgic); | ||
2455 | |||
2456 | on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); | ||
2457 | |||
2458 | return kvm_register_device_ops(&kvm_arm_vgic_v2_ops, | ||
2459 | KVM_DEV_TYPE_ARM_VGIC_V2); | ||
2460 | |||
2461 | out_free_irq: | ||
2462 | free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); | ||
2463 | return ret; | ||
2464 | } | ||
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index d6a3d0993d88..5ff7f7f2689a 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work) | |||
80 | 80 | ||
81 | might_sleep(); | 81 | might_sleep(); |
82 | 82 | ||
83 | down_read(&mm->mmap_sem); | 83 | kvm_get_user_page_io(NULL, mm, addr, 1, NULL); |
84 | get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL); | ||
85 | up_read(&mm->mmap_sem); | ||
86 | kvm_async_page_present_sync(vcpu, apf); | 84 | kvm_async_page_present_sync(vcpu, apf); |
87 | 85 | ||
88 | spin_lock(&vcpu->async_pf.lock); | 86 | spin_lock(&vcpu->async_pf.lock); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 3c5981c87c3f..b0fb390943c6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -36,7 +36,9 @@ | |||
36 | #include <linux/seqlock.h> | 36 | #include <linux/seqlock.h> |
37 | #include <trace/events/kvm.h> | 37 | #include <trace/events/kvm.h> |
38 | 38 | ||
39 | #include "irq.h" | 39 | #ifdef __KVM_HAVE_IOAPIC |
40 | #include "ioapic.h" | ||
41 | #endif | ||
40 | #include "iodev.h" | 42 | #include "iodev.h" |
41 | 43 | ||
42 | #ifdef CONFIG_HAVE_KVM_IRQFD | 44 | #ifdef CONFIG_HAVE_KVM_IRQFD |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e8ce34c9db32..0ba4057d271b 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) | |||
405 | spin_unlock(&ioapic->lock); | 405 | spin_unlock(&ioapic->lock); |
406 | } | 406 | } |
407 | 407 | ||
408 | static void kvm_ioapic_eoi_inject_work(struct work_struct *work) | ||
409 | { | ||
410 | int i; | ||
411 | struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, | ||
412 | eoi_inject.work); | ||
413 | spin_lock(&ioapic->lock); | ||
414 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
415 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
416 | |||
417 | if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) | ||
418 | continue; | ||
419 | |||
420 | if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) | ||
421 | ioapic_service(ioapic, i, false); | ||
422 | } | ||
423 | spin_unlock(&ioapic->lock); | ||
424 | } | ||
425 | |||
426 | #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 | ||
427 | |||
408 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | 428 | static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, |
409 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) | 429 | struct kvm_ioapic *ioapic, int vector, int trigger_mode) |
410 | { | 430 | { |
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, | |||
435 | 455 | ||
436 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 456 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
437 | ent->fields.remote_irr = 0; | 457 | ent->fields.remote_irr = 0; |
438 | if (ioapic->irr & (1 << i)) | 458 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) { |
439 | ioapic_service(ioapic, i, false); | 459 | ++ioapic->irq_eoi[i]; |
460 | if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { | ||
461 | /* | ||
462 | * Real hardware does not deliver the interrupt | ||
463 | * immediately during eoi broadcast, and this | ||
464 | * lets a buggy guest make slow progress | ||
465 | * even if it does not correctly handle a | ||
466 | * level-triggered interrupt. Emulate this | ||
467 | * behavior if we detect an interrupt storm. | ||
468 | */ | ||
469 | schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); | ||
470 | ioapic->irq_eoi[i] = 0; | ||
471 | trace_kvm_ioapic_delayed_eoi_inj(ent->bits); | ||
472 | } else { | ||
473 | ioapic_service(ioapic, i, false); | ||
474 | } | ||
475 | } else { | ||
476 | ioapic->irq_eoi[i] = 0; | ||
477 | } | ||
440 | } | 478 | } |
441 | } | 479 | } |
442 | 480 | ||
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
565 | { | 603 | { |
566 | int i; | 604 | int i; |
567 | 605 | ||
606 | cancel_delayed_work_sync(&ioapic->eoi_inject); | ||
568 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 607 | for (i = 0; i < IOAPIC_NUM_PINS; i++) |
569 | ioapic->redirtbl[i].fields.mask = 1; | 608 | ioapic->redirtbl[i].fields.mask = 1; |
570 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; | 609 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; |
571 | ioapic->ioregsel = 0; | 610 | ioapic->ioregsel = 0; |
572 | ioapic->irr = 0; | 611 | ioapic->irr = 0; |
573 | ioapic->id = 0; | 612 | ioapic->id = 0; |
613 | memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); | ||
574 | rtc_irq_eoi_tracking_reset(ioapic); | 614 | rtc_irq_eoi_tracking_reset(ioapic); |
575 | update_handled_vectors(ioapic); | 615 | update_handled_vectors(ioapic); |
576 | } | 616 | } |
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
589 | if (!ioapic) | 629 | if (!ioapic) |
590 | return -ENOMEM; | 630 | return -ENOMEM; |
591 | spin_lock_init(&ioapic->lock); | 631 | spin_lock_init(&ioapic->lock); |
632 | INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); | ||
592 | kvm->arch.vioapic = ioapic; | 633 | kvm->arch.vioapic = ioapic; |
593 | kvm_ioapic_reset(ioapic); | 634 | kvm_ioapic_reset(ioapic); |
594 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 635 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm) | |||
609 | { | 650 | { |
610 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 651 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
611 | 652 | ||
653 | cancel_delayed_work_sync(&ioapic->eoi_inject); | ||
612 | if (ioapic) { | 654 | if (ioapic) { |
613 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | 655 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
614 | kvm->arch.vioapic = NULL; | 656 | kvm->arch.vioapic = NULL; |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 90d43e95dcf8..e23b70634f1e 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -59,6 +59,8 @@ struct kvm_ioapic { | |||
59 | spinlock_t lock; | 59 | spinlock_t lock; |
60 | DECLARE_BITMAP(handled_vectors, 256); | 60 | DECLARE_BITMAP(handled_vectors, 256); |
61 | struct rtc_status rtc_status; | 61 | struct rtc_status rtc_status; |
62 | struct delayed_work eoi_inject; | ||
63 | u32 irq_eoi[IOAPIC_NUM_PINS]; | ||
62 | }; | 64 | }; |
63 | 65 | ||
64 | #ifdef DEBUG | 66 | #ifdef DEBUG |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 95519bc959ed..384eaa7b02fa 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -52,11 +52,13 @@ | |||
52 | 52 | ||
53 | #include <asm/processor.h> | 53 | #include <asm/processor.h> |
54 | #include <asm/io.h> | 54 | #include <asm/io.h> |
55 | #include <asm/ioctl.h> | ||
55 | #include <asm/uaccess.h> | 56 | #include <asm/uaccess.h> |
56 | #include <asm/pgtable.h> | 57 | #include <asm/pgtable.h> |
57 | 58 | ||
58 | #include "coalesced_mmio.h" | 59 | #include "coalesced_mmio.h" |
59 | #include "async_pf.h" | 60 | #include "async_pf.h" |
61 | #include "vfio.h" | ||
60 | 62 | ||
61 | #define CREATE_TRACE_POINTS | 63 | #define CREATE_TRACE_POINTS |
62 | #include <trace/events/kvm.h> | 64 | #include <trace/events/kvm.h> |
@@ -95,8 +97,6 @@ static int hardware_enable_all(void); | |||
95 | static void hardware_disable_all(void); | 97 | static void hardware_disable_all(void); |
96 | 98 | ||
97 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 99 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); |
98 | static void update_memslots(struct kvm_memslots *slots, | ||
99 | struct kvm_memory_slot *new, u64 last_generation); | ||
100 | 100 | ||
101 | static void kvm_release_pfn_dirty(pfn_t pfn); | 101 | static void kvm_release_pfn_dirty(pfn_t pfn); |
102 | static void mark_page_dirty_in_slot(struct kvm *kvm, | 102 | static void mark_page_dirty_in_slot(struct kvm *kvm, |
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu) | |||
129 | struct pid *oldpid = vcpu->pid; | 129 | struct pid *oldpid = vcpu->pid; |
130 | struct pid *newpid = get_task_pid(current, PIDTYPE_PID); | 130 | struct pid *newpid = get_task_pid(current, PIDTYPE_PID); |
131 | rcu_assign_pointer(vcpu->pid, newpid); | 131 | rcu_assign_pointer(vcpu->pid, newpid); |
132 | synchronize_rcu(); | 132 | if (oldpid) |
133 | synchronize_rcu(); | ||
133 | put_pid(oldpid); | 134 | put_pid(oldpid); |
134 | } | 135 | } |
135 | cpu = get_cpu(); | 136 | cpu = get_cpu(); |
@@ -152,7 +153,7 @@ static void ack_flush(void *_completed) | |||
152 | { | 153 | { |
153 | } | 154 | } |
154 | 155 | ||
155 | static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | 156 | bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) |
156 | { | 157 | { |
157 | int i, cpu, me; | 158 | int i, cpu, me; |
158 | cpumask_var_t cpus; | 159 | cpumask_var_t cpus; |
@@ -189,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) | |||
189 | long dirty_count = kvm->tlbs_dirty; | 190 | long dirty_count = kvm->tlbs_dirty; |
190 | 191 | ||
191 | smp_mb(); | 192 | smp_mb(); |
192 | if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) | 193 | if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) |
193 | ++kvm->stat.remote_tlb_flush; | 194 | ++kvm->stat.remote_tlb_flush; |
194 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); | 195 | cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); |
195 | } | 196 | } |
@@ -197,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); | |||
197 | 198 | ||
198 | void kvm_reload_remote_mmus(struct kvm *kvm) | 199 | void kvm_reload_remote_mmus(struct kvm *kvm) |
199 | { | 200 | { |
200 | make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); | 201 | kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); |
201 | } | 202 | } |
202 | 203 | ||
203 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) | 204 | void kvm_make_mclock_inprogress_request(struct kvm *kvm) |
204 | { | 205 | { |
205 | make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); | 206 | kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); |
206 | } | 207 | } |
207 | 208 | ||
208 | void kvm_make_scan_ioapic_request(struct kvm *kvm) | 209 | void kvm_make_scan_ioapic_request(struct kvm *kvm) |
209 | { | 210 | { |
210 | make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); | 211 | kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); |
211 | } | 212 | } |
212 | 213 | ||
213 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | 214 | int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) |
@@ -295,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
295 | kvm_flush_remote_tlbs(kvm); | 296 | kvm_flush_remote_tlbs(kvm); |
296 | 297 | ||
297 | spin_unlock(&kvm->mmu_lock); | 298 | spin_unlock(&kvm->mmu_lock); |
299 | |||
300 | kvm_arch_mmu_notifier_invalidate_page(kvm, address); | ||
301 | |||
298 | srcu_read_unlock(&kvm->srcu, idx); | 302 | srcu_read_unlock(&kvm->srcu, idx); |
299 | } | 303 | } |
300 | 304 | ||
@@ -368,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, | |||
368 | 372 | ||
369 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | 373 | static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, |
370 | struct mm_struct *mm, | 374 | struct mm_struct *mm, |
371 | unsigned long address) | 375 | unsigned long start, |
376 | unsigned long end) | ||
372 | { | 377 | { |
373 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 378 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
374 | int young, idx; | 379 | int young, idx; |
@@ -376,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
376 | idx = srcu_read_lock(&kvm->srcu); | 381 | idx = srcu_read_lock(&kvm->srcu); |
377 | spin_lock(&kvm->mmu_lock); | 382 | spin_lock(&kvm->mmu_lock); |
378 | 383 | ||
379 | young = kvm_age_hva(kvm, address); | 384 | young = kvm_age_hva(kvm, start, end); |
380 | if (young) | 385 | if (young) |
381 | kvm_flush_remote_tlbs(kvm); | 386 | kvm_flush_remote_tlbs(kvm); |
382 | 387 | ||
@@ -476,6 +481,13 @@ static struct kvm *kvm_create_vm(unsigned long type) | |||
476 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | 481 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
477 | if (!kvm->memslots) | 482 | if (!kvm->memslots) |
478 | goto out_err_no_srcu; | 483 | goto out_err_no_srcu; |
484 | |||
485 | /* | ||
486 | * Init kvm generation close to the maximum to easily test the | ||
487 | * code of handling generation number wrap-around. | ||
488 | */ | ||
489 | kvm->memslots->generation = -150; | ||
490 | |||
479 | kvm_init_memslots_id(kvm); | 491 | kvm_init_memslots_id(kvm); |
480 | if (init_srcu_struct(&kvm->srcu)) | 492 | if (init_srcu_struct(&kvm->srcu)) |
481 | goto out_err_no_srcu; | 493 | goto out_err_no_srcu; |
@@ -687,8 +699,7 @@ static void sort_memslots(struct kvm_memslots *slots) | |||
687 | } | 699 | } |
688 | 700 | ||
689 | static void update_memslots(struct kvm_memslots *slots, | 701 | static void update_memslots(struct kvm_memslots *slots, |
690 | struct kvm_memory_slot *new, | 702 | struct kvm_memory_slot *new) |
691 | u64 last_generation) | ||
692 | { | 703 | { |
693 | if (new) { | 704 | if (new) { |
694 | int id = new->id; | 705 | int id = new->id; |
@@ -699,15 +710,13 @@ static void update_memslots(struct kvm_memslots *slots, | |||
699 | if (new->npages != npages) | 710 | if (new->npages != npages) |
700 | sort_memslots(slots); | 711 | sort_memslots(slots); |
701 | } | 712 | } |
702 | |||
703 | slots->generation = last_generation + 1; | ||
704 | } | 713 | } |
705 | 714 | ||
706 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | 715 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) |
707 | { | 716 | { |
708 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; | 717 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; |
709 | 718 | ||
710 | #ifdef KVM_CAP_READONLY_MEM | 719 | #ifdef __KVM_HAVE_READONLY_MEM |
711 | valid_flags |= KVM_MEM_READONLY; | 720 | valid_flags |= KVM_MEM_READONLY; |
712 | #endif | 721 | #endif |
713 | 722 | ||
@@ -722,10 +731,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, | |||
722 | { | 731 | { |
723 | struct kvm_memslots *old_memslots = kvm->memslots; | 732 | struct kvm_memslots *old_memslots = kvm->memslots; |
724 | 733 | ||
725 | update_memslots(slots, new, kvm->memslots->generation); | 734 | /* |
735 | * Set the low bit in the generation, which disables SPTE caching | ||
736 | * until the end of synchronize_srcu_expedited. | ||
737 | */ | ||
738 | WARN_ON(old_memslots->generation & 1); | ||
739 | slots->generation = old_memslots->generation + 1; | ||
740 | |||
741 | update_memslots(slots, new); | ||
726 | rcu_assign_pointer(kvm->memslots, slots); | 742 | rcu_assign_pointer(kvm->memslots, slots); |
727 | synchronize_srcu_expedited(&kvm->srcu); | 743 | synchronize_srcu_expedited(&kvm->srcu); |
728 | 744 | ||
745 | /* | ||
746 | * Increment the new memslot generation a second time. This prevents | ||
747 | * vm exits that race with memslot updates from caching a memslot | ||
748 | * generation that will (potentially) be valid forever. | ||
749 | */ | ||
750 | slots->generation++; | ||
751 | |||
729 | kvm_arch_memslots_updated(kvm); | 752 | kvm_arch_memslots_updated(kvm); |
730 | 753 | ||
731 | return old_memslots; | 754 | return old_memslots; |
@@ -776,7 +799,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
776 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 799 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
777 | npages = mem->memory_size >> PAGE_SHIFT; | 800 | npages = mem->memory_size >> PAGE_SHIFT; |
778 | 801 | ||
779 | r = -EINVAL; | ||
780 | if (npages > KVM_MEM_MAX_NR_PAGES) | 802 | if (npages > KVM_MEM_MAX_NR_PAGES) |
781 | goto out; | 803 | goto out; |
782 | 804 | ||
@@ -790,7 +812,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
790 | new.npages = npages; | 812 | new.npages = npages; |
791 | new.flags = mem->flags; | 813 | new.flags = mem->flags; |
792 | 814 | ||
793 | r = -EINVAL; | ||
794 | if (npages) { | 815 | if (npages) { |
795 | if (!old.npages) | 816 | if (!old.npages) |
796 | change = KVM_MR_CREATE; | 817 | change = KVM_MR_CREATE; |
@@ -846,7 +867,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
846 | } | 867 | } |
847 | 868 | ||
848 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { | 869 | if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { |
849 | r = -ENOMEM; | ||
850 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), | 870 | slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), |
851 | GFP_KERNEL); | 871 | GFP_KERNEL); |
852 | if (!slots) | 872 | if (!slots) |
@@ -1075,9 +1095,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva); | |||
1075 | * If writable is set to false, the hva returned by this function is only | 1095 | * If writable is set to false, the hva returned by this function is only |
1076 | * allowed to be read. | 1096 | * allowed to be read. |
1077 | */ | 1097 | */ |
1078 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | 1098 | unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, |
1099 | gfn_t gfn, bool *writable) | ||
1079 | { | 1100 | { |
1080 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1081 | unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); | 1101 | unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false); |
1082 | 1102 | ||
1083 | if (!kvm_is_error_hva(hva) && writable) | 1103 | if (!kvm_is_error_hva(hva) && writable) |
@@ -1086,6 +1106,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | |||
1086 | return hva; | 1106 | return hva; |
1087 | } | 1107 | } |
1088 | 1108 | ||
1109 | unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) | ||
1110 | { | ||
1111 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | ||
1112 | |||
1113 | return gfn_to_hva_memslot_prot(slot, gfn, writable); | ||
1114 | } | ||
1115 | |||
1089 | static int kvm_read_hva(void *data, void __user *hva, int len) | 1116 | static int kvm_read_hva(void *data, void __user *hva, int len) |
1090 | { | 1117 | { |
1091 | return __copy_from_user(data, hva, len); | 1118 | return __copy_from_user(data, hva, len); |
@@ -1107,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | |||
1107 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); | 1134 | return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); |
1108 | } | 1135 | } |
1109 | 1136 | ||
1137 | int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, | ||
1138 | unsigned long addr, bool write_fault, | ||
1139 | struct page **pagep) | ||
1140 | { | ||
1141 | int npages; | ||
1142 | int locked = 1; | ||
1143 | int flags = FOLL_TOUCH | FOLL_HWPOISON | | ||
1144 | (pagep ? FOLL_GET : 0) | | ||
1145 | (write_fault ? FOLL_WRITE : 0); | ||
1146 | |||
1147 | /* | ||
1148 | * If retrying the fault, we get here *not* having allowed the filemap | ||
1149 | * to wait on the page lock. We should now allow waiting on the IO with | ||
1150 | * the mmap semaphore released. | ||
1151 | */ | ||
1152 | down_read(&mm->mmap_sem); | ||
1153 | npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, | ||
1154 | &locked); | ||
1155 | if (!locked) { | ||
1156 | VM_BUG_ON(npages); | ||
1157 | |||
1158 | if (!pagep) | ||
1159 | return 0; | ||
1160 | |||
1161 | /* | ||
1162 | * The previous call has now waited on the IO. Now we can | ||
1163 | * retry and complete. Pass TRIED to ensure we do not re | ||
1164 | * schedule async IO (see e.g. filemap_fault). | ||
1165 | */ | ||
1166 | down_read(&mm->mmap_sem); | ||
1167 | npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, | ||
1168 | pagep, NULL, NULL); | ||
1169 | } | ||
1170 | up_read(&mm->mmap_sem); | ||
1171 | return npages; | ||
1172 | } | ||
1173 | |||
1110 | static inline int check_user_page_hwpoison(unsigned long addr) | 1174 | static inline int check_user_page_hwpoison(unsigned long addr) |
1111 | { | 1175 | { |
1112 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; | 1176 | int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; |
@@ -1169,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | |||
1169 | npages = get_user_page_nowait(current, current->mm, | 1233 | npages = get_user_page_nowait(current, current->mm, |
1170 | addr, write_fault, page); | 1234 | addr, write_fault, page); |
1171 | up_read(¤t->mm->mmap_sem); | 1235 | up_read(¤t->mm->mmap_sem); |
1172 | } else | 1236 | } else { |
1173 | npages = get_user_pages_fast(addr, 1, write_fault, | 1237 | /* |
1174 | page); | 1238 | * By now we have tried gup_fast, and possibly async_pf, and we |
1239 | * are certainly not atomic. Time to retry the gup, allowing | ||
1240 | * mmap semaphore to be relinquished in the case of IO. | ||
1241 | */ | ||
1242 | npages = kvm_get_user_page_io(current, current->mm, addr, | ||
1243 | write_fault, page); | ||
1244 | } | ||
1175 | if (npages != 1) | 1245 | if (npages != 1) |
1176 | return npages; | 1246 | return npages; |
1177 | 1247 | ||
@@ -1768,8 +1838,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | |||
1768 | bool eligible; | 1838 | bool eligible; |
1769 | 1839 | ||
1770 | eligible = !vcpu->spin_loop.in_spin_loop || | 1840 | eligible = !vcpu->spin_loop.in_spin_loop || |
1771 | (vcpu->spin_loop.in_spin_loop && | 1841 | vcpu->spin_loop.dy_eligible; |
1772 | vcpu->spin_loop.dy_eligible); | ||
1773 | 1842 | ||
1774 | if (vcpu->spin_loop.in_spin_loop) | 1843 | if (vcpu->spin_loop.in_spin_loop) |
1775 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | 1844 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); |
@@ -1975,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1975 | if (vcpu->kvm->mm != current->mm) | 2044 | if (vcpu->kvm->mm != current->mm) |
1976 | return -EIO; | 2045 | return -EIO; |
1977 | 2046 | ||
2047 | if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) | ||
2048 | return -EINVAL; | ||
2049 | |||
1978 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) | 2050 | #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) |
1979 | /* | 2051 | /* |
1980 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, | 2052 | * Special cases: vcpu ioctls that are asynchronous to vcpu execution, |
@@ -2259,6 +2331,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp) | |||
2259 | return filp->private_data; | 2331 | return filp->private_data; |
2260 | } | 2332 | } |
2261 | 2333 | ||
2334 | static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = { | ||
2335 | #ifdef CONFIG_KVM_MPIC | ||
2336 | [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops, | ||
2337 | [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops, | ||
2338 | #endif | ||
2339 | |||
2340 | #ifdef CONFIG_KVM_XICS | ||
2341 | [KVM_DEV_TYPE_XICS] = &kvm_xics_ops, | ||
2342 | #endif | ||
2343 | }; | ||
2344 | |||
2345 | int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) | ||
2346 | { | ||
2347 | if (type >= ARRAY_SIZE(kvm_device_ops_table)) | ||
2348 | return -ENOSPC; | ||
2349 | |||
2350 | if (kvm_device_ops_table[type] != NULL) | ||
2351 | return -EEXIST; | ||
2352 | |||
2353 | kvm_device_ops_table[type] = ops; | ||
2354 | return 0; | ||
2355 | } | ||
2356 | |||
2262 | static int kvm_ioctl_create_device(struct kvm *kvm, | 2357 | static int kvm_ioctl_create_device(struct kvm *kvm, |
2263 | struct kvm_create_device *cd) | 2358 | struct kvm_create_device *cd) |
2264 | { | 2359 | { |
@@ -2267,36 +2362,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, | |||
2267 | bool test = cd->flags & KVM_CREATE_DEVICE_TEST; | 2362 | bool test = cd->flags & KVM_CREATE_DEVICE_TEST; |
2268 | int ret; | 2363 | int ret; |
2269 | 2364 | ||
2270 | switch (cd->type) { | 2365 | if (cd->type >= ARRAY_SIZE(kvm_device_ops_table)) |
2271 | #ifdef CONFIG_KVM_MPIC | 2366 | return -ENODEV; |
2272 | case KVM_DEV_TYPE_FSL_MPIC_20: | 2367 | |
2273 | case KVM_DEV_TYPE_FSL_MPIC_42: | 2368 | ops = kvm_device_ops_table[cd->type]; |
2274 | ops = &kvm_mpic_ops; | 2369 | if (ops == NULL) |
2275 | break; | ||
2276 | #endif | ||
2277 | #ifdef CONFIG_KVM_XICS | ||
2278 | case KVM_DEV_TYPE_XICS: | ||
2279 | ops = &kvm_xics_ops; | ||
2280 | break; | ||
2281 | #endif | ||
2282 | #ifdef CONFIG_KVM_VFIO | ||
2283 | case KVM_DEV_TYPE_VFIO: | ||
2284 | ops = &kvm_vfio_ops; | ||
2285 | break; | ||
2286 | #endif | ||
2287 | #ifdef CONFIG_KVM_ARM_VGIC | ||
2288 | case KVM_DEV_TYPE_ARM_VGIC_V2: | ||
2289 | ops = &kvm_arm_vgic_v2_ops; | ||
2290 | break; | ||
2291 | #endif | ||
2292 | #ifdef CONFIG_S390 | ||
2293 | case KVM_DEV_TYPE_FLIC: | ||
2294 | ops = &kvm_flic_ops; | ||
2295 | break; | ||
2296 | #endif | ||
2297 | default: | ||
2298 | return -ENODEV; | 2370 | return -ENODEV; |
2299 | } | ||
2300 | 2371 | ||
2301 | if (test) | 2372 | if (test) |
2302 | return 0; | 2373 | return 0; |
@@ -2611,7 +2682,6 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2611 | 2682 | ||
2612 | switch (ioctl) { | 2683 | switch (ioctl) { |
2613 | case KVM_GET_API_VERSION: | 2684 | case KVM_GET_API_VERSION: |
2614 | r = -EINVAL; | ||
2615 | if (arg) | 2685 | if (arg) |
2616 | goto out; | 2686 | goto out; |
2617 | r = KVM_API_VERSION; | 2687 | r = KVM_API_VERSION; |
@@ -2623,7 +2693,6 @@ static long kvm_dev_ioctl(struct file *filp, | |||
2623 | r = kvm_vm_ioctl_check_extension_generic(NULL, arg); | 2693 | r = kvm_vm_ioctl_check_extension_generic(NULL, arg); |
2624 | break; | 2694 | break; |
2625 | case KVM_GET_VCPU_MMAP_SIZE: | 2695 | case KVM_GET_VCPU_MMAP_SIZE: |
2626 | r = -EINVAL; | ||
2627 | if (arg) | 2696 | if (arg) |
2628 | goto out; | 2697 | goto out; |
2629 | r = PAGE_SIZE; /* struct kvm_run */ | 2698 | r = PAGE_SIZE; /* struct kvm_run */ |
@@ -2668,7 +2737,7 @@ static void hardware_enable_nolock(void *junk) | |||
2668 | 2737 | ||
2669 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 2738 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
2670 | 2739 | ||
2671 | r = kvm_arch_hardware_enable(NULL); | 2740 | r = kvm_arch_hardware_enable(); |
2672 | 2741 | ||
2673 | if (r) { | 2742 | if (r) { |
2674 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 2743 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
@@ -2693,7 +2762,7 @@ static void hardware_disable_nolock(void *junk) | |||
2693 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 2762 | if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
2694 | return; | 2763 | return; |
2695 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | 2764 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); |
2696 | kvm_arch_hardware_disable(NULL); | 2765 | kvm_arch_hardware_disable(); |
2697 | } | 2766 | } |
2698 | 2767 | ||
2699 | static void hardware_disable(void) | 2768 | static void hardware_disable(void) |
@@ -3123,6 +3192,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) | |||
3123 | if (vcpu->preempted) | 3192 | if (vcpu->preempted) |
3124 | vcpu->preempted = false; | 3193 | vcpu->preempted = false; |
3125 | 3194 | ||
3195 | kvm_arch_sched_in(vcpu, cpu); | ||
3196 | |||
3126 | kvm_arch_vcpu_load(vcpu, cpu); | 3197 | kvm_arch_vcpu_load(vcpu, cpu); |
3127 | } | 3198 | } |
3128 | 3199 | ||
@@ -3214,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
3214 | goto out_undebugfs; | 3285 | goto out_undebugfs; |
3215 | } | 3286 | } |
3216 | 3287 | ||
3288 | r = kvm_vfio_ops_init(); | ||
3289 | WARN_ON(r); | ||
3290 | |||
3217 | return 0; | 3291 | return 0; |
3218 | 3292 | ||
3219 | out_undebugfs: | 3293 | out_undebugfs: |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ba1a93f935c7..281e7cf2b8e5 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <linux/vfio.h> | 20 | #include <linux/vfio.h> |
21 | #include "vfio.h" | ||
21 | 22 | ||
22 | struct kvm_vfio_group { | 23 | struct kvm_vfio_group { |
23 | struct list_head node; | 24 | struct list_head node; |
@@ -246,6 +247,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev) | |||
246 | kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ | 247 | kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ |
247 | } | 248 | } |
248 | 249 | ||
250 | static int kvm_vfio_create(struct kvm_device *dev, u32 type); | ||
251 | |||
252 | static struct kvm_device_ops kvm_vfio_ops = { | ||
253 | .name = "kvm-vfio", | ||
254 | .create = kvm_vfio_create, | ||
255 | .destroy = kvm_vfio_destroy, | ||
256 | .set_attr = kvm_vfio_set_attr, | ||
257 | .has_attr = kvm_vfio_has_attr, | ||
258 | }; | ||
259 | |||
249 | static int kvm_vfio_create(struct kvm_device *dev, u32 type) | 260 | static int kvm_vfio_create(struct kvm_device *dev, u32 type) |
250 | { | 261 | { |
251 | struct kvm_device *tmp; | 262 | struct kvm_device *tmp; |
@@ -268,10 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type) | |||
268 | return 0; | 279 | return 0; |
269 | } | 280 | } |
270 | 281 | ||
271 | struct kvm_device_ops kvm_vfio_ops = { | 282 | int kvm_vfio_ops_init(void) |
272 | .name = "kvm-vfio", | 283 | { |
273 | .create = kvm_vfio_create, | 284 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); |
274 | .destroy = kvm_vfio_destroy, | 285 | } |
275 | .set_attr = kvm_vfio_set_attr, | ||
276 | .has_attr = kvm_vfio_has_attr, | ||
277 | }; | ||
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h new file mode 100644 index 000000000000..92eac75d6b62 --- /dev/null +++ b/virt/kvm/vfio.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __KVM_VFIO_H | ||
2 | #define __KVM_VFIO_H | ||
3 | |||
4 | #ifdef CONFIG_KVM_VFIO | ||
5 | int kvm_vfio_ops_init(void); | ||
6 | #else | ||
7 | static inline int kvm_vfio_ops_init(void) | ||
8 | { | ||
9 | return 0; | ||
10 | } | ||
11 | #endif | ||
12 | |||
13 | #endif | ||