aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig4
-rw-r--r--virt/kvm/async_pf.c27
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/ioapic.c108
-rw-r--r--virt/kvm/kvm_main.c14
5 files changed, 109 insertions, 52 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd629..13f2d19793e3 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
22config KVM_ASYNC_PF 22config KVM_ASYNC_PF
23 bool 23 bool
24 24
25# Toggle to switch between direct notification and batch job
26config KVM_ASYNC_PF_SYNC
27 bool
28
25config HAVE_KVM_MSI 29config HAVE_KVM_MSI
26 bool 30 bool
27 31
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c14320..10df100c4514 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
28#include "async_pf.h" 28#include "async_pf.h"
29#include <trace/events/kvm.h> 29#include <trace/events/kvm.h>
30 30
31static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
32 struct kvm_async_pf *work)
33{
34#ifdef CONFIG_KVM_ASYNC_PF_SYNC
35 kvm_arch_async_page_present(vcpu, work);
36#endif
37}
38static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
39 struct kvm_async_pf *work)
40{
41#ifndef CONFIG_KVM_ASYNC_PF_SYNC
42 kvm_arch_async_page_present(vcpu, work);
43#endif
44}
45
31static struct kmem_cache *async_pf_cache; 46static struct kmem_cache *async_pf_cache;
32 47
33int kvm_async_pf_init(void) 48int kvm_async_pf_init(void)
@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
69 down_read(&mm->mmap_sem); 84 down_read(&mm->mmap_sem);
70 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); 85 get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
71 up_read(&mm->mmap_sem); 86 up_read(&mm->mmap_sem);
87 kvm_async_page_present_sync(vcpu, apf);
72 unuse_mm(mm); 88 unuse_mm(mm);
73 89
74 spin_lock(&vcpu->async_pf.lock); 90 spin_lock(&vcpu->async_pf.lock);
@@ -97,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
97 list_entry(vcpu->async_pf.queue.next, 113 list_entry(vcpu->async_pf.queue.next,
98 typeof(*work), queue); 114 typeof(*work), queue);
99 list_del(&work->queue); 115 list_del(&work->queue);
116
117#ifdef CONFIG_KVM_ASYNC_PF_SYNC
118 flush_work(&work->work);
119#else
100 if (cancel_work_sync(&work->work)) { 120 if (cancel_work_sync(&work->work)) {
101 mmdrop(work->mm); 121 mmdrop(work->mm);
102 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ 122 kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
103 kmem_cache_free(async_pf_cache, work); 123 kmem_cache_free(async_pf_cache, work);
104 } 124 }
125#endif
105 } 126 }
106 127
107 spin_lock(&vcpu->async_pf.lock); 128 spin_lock(&vcpu->async_pf.lock);
@@ -130,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
130 spin_unlock(&vcpu->async_pf.lock); 151 spin_unlock(&vcpu->async_pf.lock);
131 152
132 kvm_arch_async_page_ready(vcpu, work); 153 kvm_arch_async_page_ready(vcpu, work);
133 kvm_arch_async_page_present(vcpu, work); 154 kvm_async_page_present_async(vcpu, work);
134 155
135 list_del(&work->queue); 156 list_del(&work->queue);
136 vcpu->async_pf.queued--; 157 vcpu->async_pf.queued--;
@@ -138,7 +159,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
138 } 159 }
139} 160}
140 161
141int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, 162int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
142 struct kvm_arch_async_pf *arch) 163 struct kvm_arch_async_pf *arch)
143{ 164{
144 struct kvm_async_pf *work; 165 struct kvm_async_pf *work;
@@ -159,7 +180,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
159 work->wakeup_all = false; 180 work->wakeup_all = false;
160 work->vcpu = vcpu; 181 work->vcpu = vcpu;
161 work->gva = gva; 182 work->gva = gva;
162 work->addr = gfn_to_hva(vcpu->kvm, gfn); 183 work->addr = hva;
163 work->arch = *arch; 184 work->arch = *arch;
164 work->mm = current->mm; 185 work->mm = current->mm;
165 atomic_inc(&work->mm->mm_count); 186 atomic_inc(&work->mm->mm_count);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d6043b36..29c2a04e036e 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -391,19 +391,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
391 lockdep_is_held(&kvm->irqfds.lock)); 391 lockdep_is_held(&kvm->irqfds.lock));
392 irqfd_update(kvm, irqfd, irq_rt); 392 irqfd_update(kvm, irqfd, irq_rt);
393 393
394 events = f.file->f_op->poll(f.file, &irqfd->pt);
395
396 list_add_tail(&irqfd->list, &kvm->irqfds.items); 394 list_add_tail(&irqfd->list, &kvm->irqfds.items);
397 395
396 spin_unlock_irq(&kvm->irqfds.lock);
397
398 /* 398 /*
399 * Check if there was an event already pending on the eventfd 399 * Check if there was an event already pending on the eventfd
400 * before we registered, and trigger it as if we didn't miss it. 400 * before we registered, and trigger it as if we didn't miss it.
401 */ 401 */
402 events = f.file->f_op->poll(f.file, &irqfd->pt);
403
402 if (events & POLLIN) 404 if (events & POLLIN)
403 schedule_work(&irqfd->inject); 405 schedule_work(&irqfd->inject);
404 406
405 spin_unlock_irq(&kvm->irqfds.lock);
406
407 /* 407 /*
408 * do not drop the file until the irqfd is fully initialized, otherwise 408 * do not drop the file until the irqfd is fully initialized, otherwise
409 * we might race against the POLLHUP 409 * we might race against the POLLHUP
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce9ed99ad7dc..d4b601547f1f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
50#else 50#else
51#define ioapic_debug(fmt, arg...) 51#define ioapic_debug(fmt, arg...)
52#endif 52#endif
53static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq, 53static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
54 bool line_status); 54 bool line_status);
55 55
56static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, 56static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -163,23 +163,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
163 return false; 163 return false;
164} 164}
165 165
166static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx, 166static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
167 bool line_status) 167 int irq_level, bool line_status)
168{ 168{
169 union kvm_ioapic_redirect_entry *pent; 169 union kvm_ioapic_redirect_entry entry;
170 int injected = -1; 170 u32 mask = 1 << irq;
171 u32 old_irr;
172 int edge, ret;
171 173
172 pent = &ioapic->redirtbl[idx]; 174 entry = ioapic->redirtbl[irq];
175 edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
173 176
174 if (!pent->fields.mask) { 177 if (!irq_level) {
175 injected = ioapic_deliver(ioapic, idx, line_status); 178 ioapic->irr &= ~mask;
176 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 179 ret = 1;
177 pent->fields.remote_irr = 1; 180 goto out;
181 }
182
183 /*
184 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
185 * this only happens if a previous edge has not been delivered due
186 * do masking. For level interrupts, the remote_irr field tells
187 * us if the interrupt is waiting for an EOI.
188 *
189 * RTC is special: it is edge-triggered, but userspace likes to know
190 * if it has been already ack-ed via EOI because coalesced RTC
191 * interrupts lead to time drift in Windows guests. So we track
192 * EOI manually for the RTC interrupt.
193 */
194 if (irq == RTC_GSI && line_status &&
195 rtc_irq_check_coalesced(ioapic)) {
196 ret = 0;
197 goto out;
178 } 198 }
179 199
180 return injected; 200 old_irr = ioapic->irr;
201 ioapic->irr |= mask;
202 if ((edge && old_irr == ioapic->irr) ||
203 (!edge && entry.fields.remote_irr)) {
204 ret = 0;
205 goto out;
206 }
207
208 ret = ioapic_service(ioapic, irq, line_status);
209
210out:
211 trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
212 return ret;
213}
214
215static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
216{
217 u32 idx;
218
219 rtc_irq_eoi_tracking_reset(ioapic);
220 for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
221 ioapic_set_irq(ioapic, idx, 1, true);
222
223 kvm_rtc_eoi_tracking_restore_all(ioapic);
181} 224}
182 225
226
183static void update_handled_vectors(struct kvm_ioapic *ioapic) 227static void update_handled_vectors(struct kvm_ioapic *ioapic)
184{ 228{
185 DECLARE_BITMAP(handled_vectors, 256); 229 DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +326,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
282 } 326 }
283} 327}
284 328
285static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status) 329static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
286{ 330{
287 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; 331 union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
288 struct kvm_lapic_irq irqe; 332 struct kvm_lapic_irq irqe;
289 int ret; 333 int ret;
290 334
335 if (entry->fields.mask)
336 return -1;
337
291 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " 338 ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
292 "vector=%x trig_mode=%x\n", 339 "vector=%x trig_mode=%x\n",
293 entry->fields.dest_id, entry->fields.dest_mode, 340 entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,6 +349,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
302 irqe.level = 1; 349 irqe.level = 1;
303 irqe.shorthand = 0; 350 irqe.shorthand = 0;
304 351
352 if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
353 ioapic->irr &= ~(1 << irq);
354
305 if (irq == RTC_GSI && line_status) { 355 if (irq == RTC_GSI && line_status) {
306 BUG_ON(ioapic->rtc_status.pending_eoi != 0); 356 BUG_ON(ioapic->rtc_status.pending_eoi != 0);
307 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, 357 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
@@ -310,45 +360,24 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
310 } else 360 } else
311 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); 361 ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
312 362
363 if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
364 entry->fields.remote_irr = 1;
365
313 return ret; 366 return ret;
314} 367}
315 368
316int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, 369int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
317 int level, bool line_status) 370 int level, bool line_status)
318{ 371{
319 u32 old_irr;
320 u32 mask = 1 << irq;
321 union kvm_ioapic_redirect_entry entry;
322 int ret, irq_level; 372 int ret, irq_level;
323 373
324 BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); 374 BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
325 375
326 spin_lock(&ioapic->lock); 376 spin_lock(&ioapic->lock);
327 old_irr = ioapic->irr;
328 irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], 377 irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
329 irq_source_id, level); 378 irq_source_id, level);
330 entry = ioapic->redirtbl[irq]; 379 ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
331 irq_level ^= entry.fields.polarity;
332 if (!irq_level) {
333 ioapic->irr &= ~mask;
334 ret = 1;
335 } else {
336 int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
337 380
338 if (irq == RTC_GSI && line_status &&
339 rtc_irq_check_coalesced(ioapic)) {
340 ret = 0; /* coalesced */
341 goto out;
342 }
343 ioapic->irr |= mask;
344 if ((edge && old_irr != ioapic->irr) ||
345 (!edge && !entry.fields.remote_irr))
346 ret = ioapic_service(ioapic, irq, line_status);
347 else
348 ret = 0; /* report coalesced interrupt */
349 }
350out:
351 trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
352 spin_unlock(&ioapic->lock); 381 spin_unlock(&ioapic->lock);
353 382
354 return ret; 383 return ret;
@@ -394,7 +423,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
394 423
395 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 424 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
396 ent->fields.remote_irr = 0; 425 ent->fields.remote_irr = 0;
397 if (!ent->fields.mask && (ioapic->irr & (1 << i))) 426 if (ioapic->irr & (1 << i))
398 ioapic_service(ioapic, i, false); 427 ioapic_service(ioapic, i, false);
399 } 428 }
400} 429}
@@ -595,9 +624,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
595 624
596 spin_lock(&ioapic->lock); 625 spin_lock(&ioapic->lock);
597 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 626 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
627 ioapic->irr = 0;
598 update_handled_vectors(ioapic); 628 update_handled_vectors(ioapic);
599 kvm_vcpu_request_scan_ioapic(kvm); 629 kvm_vcpu_request_scan_ioapic(kvm);
600 kvm_rtc_eoi_tracking_restore_all(ioapic); 630 kvm_ioapic_inject_all(ioapic, state->irr);
601 spin_unlock(&ioapic->lock); 631 spin_unlock(&ioapic->lock);
602 return 0; 632 return 0;
603} 633}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 03a0381b1cb7..56baae8c2f56 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn);
102static void mark_page_dirty_in_slot(struct kvm *kvm, 102static void mark_page_dirty_in_slot(struct kvm *kvm,
103 struct kvm_memory_slot *memslot, gfn_t gfn); 103 struct kvm_memory_slot *memslot, gfn_t gfn);
104 104
105bool kvm_rebooting; 105__visible bool kvm_rebooting;
106EXPORT_SYMBOL_GPL(kvm_rebooting); 106EXPORT_SYMBOL_GPL(kvm_rebooting);
107 107
108static bool largepages_enabled = true; 108static bool largepages_enabled = true;
@@ -186,12 +186,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
186 186
187void kvm_flush_remote_tlbs(struct kvm *kvm) 187void kvm_flush_remote_tlbs(struct kvm *kvm)
188{ 188{
189 long dirty_count = kvm->tlbs_dirty;
190
191 smp_mb();
192 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) 189 if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
193 ++kvm->stat.remote_tlb_flush; 190 ++kvm->stat.remote_tlb_flush;
194 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); 191 kvm->tlbs_dirty = false;
195} 192}
196EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); 193EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
197 194
@@ -1804,7 +1801,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1804 continue; 1801 continue;
1805 if (vcpu == me) 1802 if (vcpu == me)
1806 continue; 1803 continue;
1807 if (waitqueue_active(&vcpu->wq)) 1804 if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
1808 continue; 1805 continue;
1809 if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) 1806 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
1810 continue; 1807 continue;
@@ -2284,6 +2281,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
2284 ops = &kvm_arm_vgic_v2_ops; 2281 ops = &kvm_arm_vgic_v2_ops;
2285 break; 2282 break;
2286#endif 2283#endif
2284#ifdef CONFIG_S390
2285 case KVM_DEV_TYPE_FLIC:
2286 ops = &kvm_flic_ops;
2287 break;
2288#endif
2287 default: 2289 default:
2288 return -ENODEV; 2290 return -ENODEV;
2289 } 2291 }