aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 17:50:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 17:50:10 -0400
commit7cbb39d4d4d530dff12f2ff06ed6c85c504ba91a (patch)
tree82f721591d739eca99817def86ca5b6ebd682fe6 /virt/kvm
parent64056a94256e7a476de67fbe581dfe5515c56288 (diff)
parent7227fc0666606b0df2c0d2966a7f4859b01bdf74 (diff)
Merge tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "PPC and ARM do not have much going on this time. Most of the cool stuff, instead, is in s390 and (after a few releases) x86. ARM has some caching fixes and PPC has transactional memory support in guests. MIPS has some fixes, with more probably coming in 3.16 as QEMU will soon get support for MIPS KVM. For x86 there are optimizations for debug registers, which trigger on some Windows games, and other important fixes for Windows guests. We now expose to the guest Broadwell instruction set extensions and also Intel MPX. There's also a fix/workaround for OS X guests, nested virtualization features (preemption timer), and a couple kvmclock refinements. For s390, the main news is asynchronous page faults, together with improvements to IRQs (floating irqs and adapter irqs) that speed up virtio devices" * tag 'kvm-3.15-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (96 commits) KVM: PPC: Book3S HV: Save/restore host PMU registers that are new in POWER8 KVM: PPC: Book3S HV: Fix decrementer timeouts with non-zero TB offset KVM: PPC: Book3S HV: Don't use kvm_memslots() in real mode KVM: PPC: Book3S HV: Return ENODEV error rather than EIO KVM: PPC: Book3S: Trim top 4 bits of physical address in RTAS code KVM: PPC: Book3S HV: Add get/set_one_reg for new TM state KVM: PPC: Book3S HV: Add transactional memory support KVM: Specify byte order for KVM_EXIT_MMIO KVM: vmx: fix MPX detection KVM: PPC: Book3S HV: Fix KVM hang with CONFIG_KVM_XICS=n KVM: PPC: Book3S: Introduce hypervisor call H_GET_TCE KVM: PPC: Book3S HV: Fix incorrect userspace exit on ioeventfd write KVM: s390: clear local interrupts at cpu initial reset KVM: s390: Fix possible memory leak in SIGP functions KVM: s390: fix calculation of idle_mask array size KVM: s390: randomize sca address KVM: ioapic: reinject pending interrupts on KVM_SET_IRQCHIP KVM: Bump KVM_MAX_IRQ_ROUTES for s390 KVM: s390: irq routing for adapter interrupts. KVM: s390: adapter interrupt sources ...
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig4
-rw-r--r--virt/kvm/async_pf.c27
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/ioapic.c108
-rw-r--r--virt/kvm/kvm_main.c12
5 files changed, 108 insertions, 51 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd629..13f2d19793e3 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
22config KVM_ASYNC_PF 22config KVM_ASYNC_PF
23 bool 23 bool
24 24
25# Toggle to switch between direct notification and batch job
26config KVM_ASYNC_PF_SYNC
27 bool
28
25config HAVE_KVM_MSI 29config HAVE_KVM_MSI
26 bool 30 bool
27 31
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c14320..10df100c4514 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
28#include "async_pf.h" 28#include "async_pf.h"
29#include <trace/events/kvm.h> 29#include <trace/events/kvm.h>
30 30
31static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
32 struct kvm_async_pf *work)
33{
34#ifdef CONFIG_KVM_ASYNC_PF_SYNC
35 kvm_arch_async_page_present(vcpu, work);
36#endif
37}
38static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
39 struct kvm_async_pf *work)
40{
41#ifndef CONFIG_KVM_ASYNC_PF_SYNC
42 kvm_arch_async_page_present(vcpu, work);
43#endif
44}
45
31static struct kmem_cache *async_pf_cache; 46static struct kmem_cache *async_pf_cache;
32 47
33int kvm_async_pf_init(void) 48int kvm_async_pf_init(void)
@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
69 down_read(&mm->mmap_sem); 84 down_read(&mm->mmap_sem);
70 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 85 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
71 up_read(&mm->mmap_sem); 86 up_read(&mm->mmap_sem);
87 kvm_async_page_present_sync(vcpu, apf);
72 unuse_mm(mm); 88 unuse_mm(mm);
73 89
74 spin_lock(&vcpu->async_pf.lock); 90 spin_lock(&vcpu->async_pf.lock);
@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
97 list_entry(vcpu->async_pf.queue.next, 113 list_entry(vcpu->async_pf.queue.next,
98 typeof(*work), queue); 114 typeof(*work), queue);
99 list_del(&work->queue); 115 list_del(&work->queue);
116
117#ifdef CONFIG_KVM_ASYNC_PF_SYNC
118 flush_work(&work->work);
119#else
100 if (cancel_work_sync(&work->work)) { 120 if (cancel_work_sync(&work->work)) {
101 mmdrop(work->mm); 121 mmdrop(work->mm);
102 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 122 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
103 kmem_cache_free(async_pf_cache, work); 123 kmem_cache_free(async_pf_cache, work);
104 } 124 }
125#endif
105 } 126 }
106 127
107 spin_lock(&vcpu->async_pf.lock); 128 spin_lock(&vcpu->async_pf.lock);
@@ -130,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
130 spin_unlock(&vcpu->async_pf.lock); 151 spin_unlock(&vcpu->async_pf.lock);
131 152
132 kvm_arch_async_page_ready(vcpu, work); 153 kvm_arch_async_page_ready(vcpu, work);
133 kvm_arch_async_page_present(vcpu, work); 154 kvm_async_page_present_async(vcpu, work);
134 155
135 list_del(&work->queue); 156 list_del(&work->queue);
136 vcpu->async_pf.queued--; 157 vcpu->async_pf.queued--;
@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
138 } 159 }
139} 160}
140 161
141int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 162int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
142 struct kvm_arch_async_pf *arch) 163 struct kvm_arch_async_pf *arch)
143{ 164{
144 struct kvm_async_pf *work; 165 struct kvm_async_pf *work;
@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
159 work->wakeup_all = false; 180 work->wakeup_all = false;
160 work->vcpu = vcpu; 181 work->vcpu = vcpu;
161 work->gva = gva; 182 work->gva = gva;
162 work->addr = gfn_to_hva(vcpu->kvm, gfn); 183 work->addr = hva;
163 work->arch = *arch; 184 work->arch = *arch;
164 work->mm = current->mm; 185 work->mm = current->mm;
165 atomic_inc(&work->mm->mm_count); 186 atomic_inc(&work->mm->mm_count);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d6043b36..29c2a04e036e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
391 lockdep_is_held(&kvm->irqfds.lock)); 391 lockdep_is_held(&kvm->irqfds.lock));
392 irqfd_update(kvm, irqfd, irq_rt); 392 irqfd_update(kvm, irqfd, irq_rt);
393 393
394 events = f.file->f_op->poll(f.file, &irqfd->pt);
395
396 list_add_tail(&irqfd->list, &kvm->irqfds.items); 394 list_add_tail(&irqfd->list, &kvm->irqfds.items);
397 395
396 spin_unlock_irq(&kvm->irqfds.lock);
397
398 /* 398 /*
399 * Check if there was an event already pending on the eventfd 399 * Check if there was an event already pending on the eventfd
400 * before we registered, and trigger it as if we didn't miss it. 400 * before we registered, and trigger it as if we didn't miss it.
401 */ 401 */
402 events = f.file->f_op->poll(f.file, &irqfd->pt);
403
402 if (events & POLLIN) 404 if (events & POLLIN)
403 schedule_work(&irqfd->inject); 405 schedule_work(&irqfd->inject);
404 406
405 spin_unlock_irq(&kvm->irqfds.lock);
406
407 /* 407 /*
408 * do not drop the file until the irqfd is fully initialized, otherwise 408 * do not drop the file until the irqfd is fully initialized, otherwise
409 * we might race against the POLLHUP 409 * we might race against the POLLHUP
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce9ed99ad7dc..d4b601547f1f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
50#else 50#else
51#define ioapic_debug(fmt, arg...) 51#define ioapic_debug(fmt, arg...)
52#endif 52#endif
53static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, 53static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
54 bool line_status); 54 bool line_status);
55 55
56static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, 56static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
163 return false; 163 return false;
164} 164}
165 165
166static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, 166static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
167 bool line_status) 167 int irq_level, bool line_status)
168{ 168{
169 union kvm_ioapic_redirect_entry *pent; 169 union kvm_ioapic_redirect_entry entry;
170 int injected = -1; 170 u32 mask = 1 << irq;
171 u32 old_irr;
172 int edge, ret;
171 173
172 pent = &ioapic->redirtbl[idx]; 174 entry = ioapic->redirtbl[irq];
175 edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
173 176
174 if (!pent->fields.mask) { 177 if (!irq_level) {
175 injected = ioapic_deliver(ioapic, idx, line_status); 178 ioapic->irr &= ~mask;
176 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 179 ret = 1;
177 pent->fields.remote_irr = 1; 180 goto out;
181 }
182
183 /*
184 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
185 * this only happens if a previous edge has not been delivered due
186 * do masking. For level interrupts, the remote_irr field tells
187 * us if the interrupt is waiting for an EOI.
188 *
189 * RTC is special: it is edge-triggered, but userspace likes to know
190 * if it has been already ack-ed via EOI because coalesced RTC
191 * interrupts lead to time drift in Windows guests. So we track
192 * EOI manually for the RTC interrupt.
193 */
194 if (irq == RTC_GSI && line_status &&
195 rtc_irq_check_coalesced(ioapic)) {
196 ret = 0;
197 goto out;
178 } 198 }
179 199
180 return injected; 200 old_irr = ioapic->irr;
201 ioapic->irr |= mask;
202 if ((edge && old_irr == ioapic->irr) ||
203 (!edge && entry.fields.remote_irr)) {
204 ret = 0;
205 goto out;
206 }
207
208 ret = ioapic_service(ioapic, irq, line_status);
209
210out:
211 trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
212 return ret;
213}
214
215static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
216{
217 u32 idx;
218
219 rtc_irq_eoi_tracking_reset(ioapic);
220 for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
221 ioapic_set_irq(ioapic, idx, 1, true);
222
223 kvm_rtc_eoi_tracking_restore_all(ioapic);
181} 224}
182 225
226
183static void update_handled_vectors(struct kvm_ioapic *ioapic) 227static void update_handled_vectors(struct kvm_ioapic *ioapic)
184{ 228{
185 DECLARE_BITMAP(handled_vectors, 256); 229 DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
282 } 326 }
283} 327}
284 328
285static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) 329static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
286{ 330{
287 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; 331 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
288 struct kvm_lapic_irq irqe; 332 struct kvm_lapic_irq irqe;
289 int ret; 333 int ret;
290 334
335 if (entry->fields.mask)
336 return -1;
337
291 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 338 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
292 "vector=%x trig_mode=%x\n", 339 "vector=%x trig_mode=%x\n",
293 entry->fields.dest_id, entry->fields.dest_mode, 340 entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
302 irqe.level = 1; 349 irqe.level = 1;
303 irqe.shorthand = 0; 350 irqe.shorthand = 0;
304 351
352 if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
353 ioapic->irr &= ~(1 << irq);
354
305 if (irq == RTC_GSI && line_status) { 355 if (irq == RTC_GSI && line_status) {
306 BUG_ON(ioapic->rtc_status.pending_eoi != 0); 356 BUG_ON(ioapic->rtc_status.pending_eoi != 0);
307 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, 357 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -310,45 +360,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
310 } else 360 } else
311 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); 361 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
312 362
363 if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
364 entry->fields.remote_irr = 1;
365
313 return ret; 366 return ret;
314} 367}
315 368
316int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, 369int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
317 int level, bool line_status) 370 int level, bool line_status)
318{ 371{
319 u32 old_irr;
320 u32 mask = 1 << irq;
321 union kvm_ioapic_redirect_entry entry;
322 int ret, irq_level; 372 int ret, irq_level;
323 373
324 BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); 374 BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
325 375
326 spin_lock(&ioapic->lock); 376 spin_lock(&ioapic->lock);
327 old_irr = ioapic->irr;
328 irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], 377 irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
329 irq_source_id, level); 378 irq_source_id, level);
330 entry = ioapic->redirtbl[irq]; 379 ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
331 irq_level ^= entry.fields.polarity;
332 if (!irq_level) {
333 ioapic->irr &= ~mask;
334 ret = 1;
335 } else {
336 int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
337 380
338 if (irq == RTC_GSI && line_status &&
339 rtc_irq_check_coalesced(ioapic)) {
340 ret = 0; /* coalesced */
341 goto out;
342 }
343 ioapic->irr |= mask;
344 if ((edge && old_irr != ioapic->irr) ||
345 (!edge && !entry.fields.remote_irr))
346 ret = ioapic_service(ioapic, irq, line_status);
347 else
348 ret = 0; /* report coalesced interrupt */
349 }
350out:
351 trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
352 spin_unlock(&ioapic->lock); 381 spin_unlock(&ioapic->lock);
353 382
354 return ret; 383 return ret;
@@ -394,7 +423,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
394 423
395 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 424 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
396 ent->fields.remote_irr = 0; 425 ent->fields.remote_irr = 0;
397 if (!ent->fields.mask && (ioapic->irr & (1 << i))) 426 if (ioapic->irr & (1 << i))
398 ioapic_service(ioapic, i, false); 427 ioapic_service(ioapic, i, false);
399 } 428 }
400} 429}
@@ -595,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
595 624
596 spin_lock(&ioapic->lock); 625 spin_lock(&ioapic->lock);
597 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 626 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
627 ioapic->irr = 0;
598 update_handled_vectors(ioapic); 628 update_handled_vectors(ioapic);
599 kvm_vcpu_request_scan_ioapic(kvm); 629 kvm_vcpu_request_scan_ioapic(kvm);
600 kvm_rtc_eoi_tracking_restore_all(ioapic); 630 kvm_ioapic_inject_all(ioapic, state->irr);
601 spin_unlock(&ioapic->lock); 631 spin_unlock(&ioapic->lock);
602 return 0; 632 return 0;
603} 633}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5ec7fb986f6..56baae8c2f56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,12 +186,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
186 186
187void kvm_flush_remote_tlbs(struct kvm *kvm) 187void kvm_flush_remote_tlbs(struct kvm *kvm)
188{ 188{
189 long dirty_count = kvm->tlbs_dirty;
190
191 smp_mb();
192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 189 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
193 ++kvm->stat.remote_tlb_flush; 190 ++kvm->stat.remote_tlb_flush;
194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 191 kvm->tlbs_dirty = false;
195} 192}
196EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 193EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
197 194
@@ -1804,7 +1801,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1804 continue; 1801 continue;
1805 if (vcpu == me) 1802 if (vcpu == me)
1806 continue; 1803 continue;
1807 if (waitqueue_active(&vcpu->wq)) 1804 if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
1808 continue; 1805 continue;
1809 if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) 1806 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
1810 continue; 1807 continue;
@@ -2284,6 +2281,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2284 ops = &kvm_arm_vgic_v2_ops; 2281 ops = &kvm_arm_vgic_v2_ops;
2285 break; 2282 break;
2286#endif 2283#endif
2284#ifdef CONFIG_S390
2285 case KVM_DEV_TYPE_FLIC:
2286 ops = &kvm_flic_ops;
2287 break;
2288#endif
2287 default: 2289 default:
2288 return -ENODEV; 2290 return -ENODEV;
2289 } 2291 }