diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-04 12:30:33 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-04 12:30:33 -0400 |
commit | ecefbd94b834fa32559d854646d777c56749ef1c (patch) | |
tree | ca8958900ad9e208a8e5fb7704f1b66dc76131b4 /virt | |
parent | ce57e981f2b996aaca2031003b3f866368307766 (diff) | |
parent | 3d11df7abbff013b811d5615320580cd5d9d7d31 (diff) |
Merge tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity:
"Highlights of the changes for this release include support for vfio
level triggered interrupts, improved big real mode support on older
Intels, a streamlines guest page table walker, guest APIC speedups,
PIO optimizations, better overcommit handling, and read-only memory."
* tag 'kvm-3.7-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits)
KVM: s390: Fix vcpu_load handling in interrupt code
KVM: x86: Fix guest debug across vcpu INIT reset
KVM: Add resampling irqfds for level triggered interrupts
KVM: optimize apic interrupt delivery
KVM: MMU: Eliminate pointless temporary 'ac'
KVM: MMU: Avoid access/dirty update loop if all is well
KVM: MMU: Eliminate eperm temporary
KVM: MMU: Optimize is_last_gpte()
KVM: MMU: Simplify walk_addr_generic() loop
KVM: MMU: Optimize pte permission checks
KVM: MMU: Update accessed and dirty bits after guest pagetable walk
KVM: MMU: Move gpte_access() out of paging_tmpl.h
KVM: MMU: Optimize gpte_access() slightly
KVM: MMU: Push clean gpte write protection out of gpte_access()
KVM: clarify kvmclock documentation
KVM: make processes waiting on vcpu mutex killable
KVM: SVM: Make use of asm.h
KVM: VMX: Make use of asm.h
KVM: VMX: Make lto-friendly
KVM: x86: lapic: Clean up find_highest_vector() and count_vectors()
...
Conflicts:
arch/s390/include/asm/processor.h
arch/x86/kvm/i8259.c
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 11 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 150 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 37 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 16 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 17 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 541 |
7 files changed, 525 insertions, 250 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 28694f4a9139..d01b24b72c61 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF | |||
21 | 21 | ||
22 | config HAVE_KVM_MSI | 22 | config HAVE_KVM_MSI |
23 | bool | 23 | bool |
24 | |||
25 | config HAVE_KVM_CPU_RELAX_INTERCEPT | ||
26 | bool | ||
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 74268b4c2ee1..ea475cd03511 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -111,8 +111,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) | |||
111 | list_entry(vcpu->async_pf.done.next, | 111 | list_entry(vcpu->async_pf.done.next, |
112 | typeof(*work), link); | 112 | typeof(*work), link); |
113 | list_del(&work->link); | 113 | list_del(&work->link); |
114 | if (work->page) | 114 | if (!is_error_page(work->page)) |
115 | put_page(work->page); | 115 | kvm_release_page_clean(work->page); |
116 | kmem_cache_free(async_pf_cache, work); | 116 | kmem_cache_free(async_pf_cache, work); |
117 | } | 117 | } |
118 | spin_unlock(&vcpu->async_pf.lock); | 118 | spin_unlock(&vcpu->async_pf.lock); |
@@ -138,8 +138,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) | |||
138 | 138 | ||
139 | list_del(&work->queue); | 139 | list_del(&work->queue); |
140 | vcpu->async_pf.queued--; | 140 | vcpu->async_pf.queued--; |
141 | if (work->page) | 141 | if (!is_error_page(work->page)) |
142 | put_page(work->page); | 142 | kvm_release_page_clean(work->page); |
143 | kmem_cache_free(async_pf_cache, work); | 143 | kmem_cache_free(async_pf_cache, work); |
144 | } | 144 | } |
145 | } | 145 | } |
@@ -203,8 +203,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) | |||
203 | if (!work) | 203 | if (!work) |
204 | return -ENOMEM; | 204 | return -ENOMEM; |
205 | 205 | ||
206 | work->page = bad_page; | 206 | work->page = KVM_ERR_PTR_BAD_PAGE; |
207 | get_page(bad_page); | ||
208 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ | 207 | INIT_LIST_HEAD(&work->queue); /* for list_del to work */ |
209 | 208 | ||
210 | spin_lock(&vcpu->async_pf.lock); | 209 | spin_lock(&vcpu->async_pf.lock); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 67a35e90384c..9718e98d6d2a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -43,6 +43,31 @@ | |||
43 | * -------------------------------------------------------------------- | 43 | * -------------------------------------------------------------------- |
44 | */ | 44 | */ |
45 | 45 | ||
46 | /* | ||
47 | * Resampling irqfds are a special variety of irqfds used to emulate | ||
48 | * level triggered interrupts. The interrupt is asserted on eventfd | ||
49 | * trigger. On acknowledgement through the irq ack notifier, the | ||
50 | * interrupt is de-asserted and userspace is notified through the | ||
51 | * resamplefd. All resamplers on the same gsi are de-asserted | ||
52 | * together, so we don't need to track the state of each individual | ||
53 | * user. We can also therefore share the same irq source ID. | ||
54 | */ | ||
55 | struct _irqfd_resampler { | ||
56 | struct kvm *kvm; | ||
57 | /* | ||
58 | * List of resampling struct _irqfd objects sharing this gsi. | ||
59 | * RCU list modified under kvm->irqfds.resampler_lock | ||
60 | */ | ||
61 | struct list_head list; | ||
62 | struct kvm_irq_ack_notifier notifier; | ||
63 | /* | ||
64 | * Entry in list of kvm->irqfd.resampler_list. Use for sharing | ||
65 | * resamplers among irqfds on the same gsi. | ||
66 | * Accessed and modified under kvm->irqfds.resampler_lock | ||
67 | */ | ||
68 | struct list_head link; | ||
69 | }; | ||
70 | |||
46 | struct _irqfd { | 71 | struct _irqfd { |
47 | /* Used for MSI fast-path */ | 72 | /* Used for MSI fast-path */ |
48 | struct kvm *kvm; | 73 | struct kvm *kvm; |
@@ -52,6 +77,12 @@ struct _irqfd { | |||
52 | /* Used for level IRQ fast-path */ | 77 | /* Used for level IRQ fast-path */ |
53 | int gsi; | 78 | int gsi; |
54 | struct work_struct inject; | 79 | struct work_struct inject; |
80 | /* The resampler used by this irqfd (resampler-only) */ | ||
81 | struct _irqfd_resampler *resampler; | ||
82 | /* Eventfd notified on resample (resampler-only) */ | ||
83 | struct eventfd_ctx *resamplefd; | ||
84 | /* Entry in list of irqfds for a resampler (resampler-only) */ | ||
85 | struct list_head resampler_link; | ||
55 | /* Used for setup/shutdown */ | 86 | /* Used for setup/shutdown */ |
56 | struct eventfd_ctx *eventfd; | 87 | struct eventfd_ctx *eventfd; |
57 | struct list_head list; | 88 | struct list_head list; |
@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work) | |||
67 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 98 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
68 | struct kvm *kvm = irqfd->kvm; | 99 | struct kvm *kvm = irqfd->kvm; |
69 | 100 | ||
70 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 101 | if (!irqfd->resampler) { |
71 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 102 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
103 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | ||
104 | } else | ||
105 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
106 | irqfd->gsi, 1); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Since resampler irqfds share an IRQ source ID, we de-assert once | ||
111 | * then notify all of the resampler irqfds using this GSI. We can't | ||
112 | * do multiple de-asserts or we risk racing with incoming re-asserts. | ||
113 | */ | ||
114 | static void | ||
115 | irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) | ||
116 | { | ||
117 | struct _irqfd_resampler *resampler; | ||
118 | struct _irqfd *irqfd; | ||
119 | |||
120 | resampler = container_of(kian, struct _irqfd_resampler, notifier); | ||
121 | |||
122 | kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
123 | resampler->notifier.gsi, 0); | ||
124 | |||
125 | rcu_read_lock(); | ||
126 | |||
127 | list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) | ||
128 | eventfd_signal(irqfd->resamplefd, 1); | ||
129 | |||
130 | rcu_read_unlock(); | ||
131 | } | ||
132 | |||
133 | static void | ||
134 | irqfd_resampler_shutdown(struct _irqfd *irqfd) | ||
135 | { | ||
136 | struct _irqfd_resampler *resampler = irqfd->resampler; | ||
137 | struct kvm *kvm = resampler->kvm; | ||
138 | |||
139 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
140 | |||
141 | list_del_rcu(&irqfd->resampler_link); | ||
142 | synchronize_rcu(); | ||
143 | |||
144 | if (list_empty(&resampler->list)) { | ||
145 | list_del(&resampler->link); | ||
146 | kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); | ||
147 | kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, | ||
148 | resampler->notifier.gsi, 0); | ||
149 | kfree(resampler); | ||
150 | } | ||
151 | |||
152 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
72 | } | 153 | } |
73 | 154 | ||
74 | /* | 155 | /* |
@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work) | |||
92 | */ | 173 | */ |
93 | flush_work(&irqfd->inject); | 174 | flush_work(&irqfd->inject); |
94 | 175 | ||
176 | if (irqfd->resampler) { | ||
177 | irqfd_resampler_shutdown(irqfd); | ||
178 | eventfd_ctx_put(irqfd->resamplefd); | ||
179 | } | ||
180 | |||
95 | /* | 181 | /* |
96 | * It is now safe to release the object's resources | 182 | * It is now safe to release the object's resources |
97 | */ | 183 | */ |
@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
203 | struct kvm_irq_routing_table *irq_rt; | 289 | struct kvm_irq_routing_table *irq_rt; |
204 | struct _irqfd *irqfd, *tmp; | 290 | struct _irqfd *irqfd, *tmp; |
205 | struct file *file = NULL; | 291 | struct file *file = NULL; |
206 | struct eventfd_ctx *eventfd = NULL; | 292 | struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; |
207 | int ret; | 293 | int ret; |
208 | unsigned int events; | 294 | unsigned int events; |
209 | 295 | ||
@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
231 | 317 | ||
232 | irqfd->eventfd = eventfd; | 318 | irqfd->eventfd = eventfd; |
233 | 319 | ||
320 | if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { | ||
321 | struct _irqfd_resampler *resampler; | ||
322 | |||
323 | resamplefd = eventfd_ctx_fdget(args->resamplefd); | ||
324 | if (IS_ERR(resamplefd)) { | ||
325 | ret = PTR_ERR(resamplefd); | ||
326 | goto fail; | ||
327 | } | ||
328 | |||
329 | irqfd->resamplefd = resamplefd; | ||
330 | INIT_LIST_HEAD(&irqfd->resampler_link); | ||
331 | |||
332 | mutex_lock(&kvm->irqfds.resampler_lock); | ||
333 | |||
334 | list_for_each_entry(resampler, | ||
335 | &kvm->irqfds.resampler_list, list) { | ||
336 | if (resampler->notifier.gsi == irqfd->gsi) { | ||
337 | irqfd->resampler = resampler; | ||
338 | break; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | if (!irqfd->resampler) { | ||
343 | resampler = kzalloc(sizeof(*resampler), GFP_KERNEL); | ||
344 | if (!resampler) { | ||
345 | ret = -ENOMEM; | ||
346 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
347 | goto fail; | ||
348 | } | ||
349 | |||
350 | resampler->kvm = kvm; | ||
351 | INIT_LIST_HEAD(&resampler->list); | ||
352 | resampler->notifier.gsi = irqfd->gsi; | ||
353 | resampler->notifier.irq_acked = irqfd_resampler_ack; | ||
354 | INIT_LIST_HEAD(&resampler->link); | ||
355 | |||
356 | list_add(&resampler->link, &kvm->irqfds.resampler_list); | ||
357 | kvm_register_irq_ack_notifier(kvm, | ||
358 | &resampler->notifier); | ||
359 | irqfd->resampler = resampler; | ||
360 | } | ||
361 | |||
362 | list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); | ||
363 | synchronize_rcu(); | ||
364 | |||
365 | mutex_unlock(&kvm->irqfds.resampler_lock); | ||
366 | } | ||
367 | |||
234 | /* | 368 | /* |
235 | * Install our own custom wake-up handling so we are notified via | 369 | * Install our own custom wake-up handling so we are notified via |
236 | * a callback whenever someone signals the underlying eventfd | 370 | * a callback whenever someone signals the underlying eventfd |
@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) | |||
276 | return 0; | 410 | return 0; |
277 | 411 | ||
278 | fail: | 412 | fail: |
413 | if (irqfd->resampler) | ||
414 | irqfd_resampler_shutdown(irqfd); | ||
415 | |||
416 | if (resamplefd && !IS_ERR(resamplefd)) | ||
417 | eventfd_ctx_put(resamplefd); | ||
418 | |||
279 | if (eventfd && !IS_ERR(eventfd)) | 419 | if (eventfd && !IS_ERR(eventfd)) |
280 | eventfd_ctx_put(eventfd); | 420 | eventfd_ctx_put(eventfd); |
281 | 421 | ||
@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm) | |||
291 | { | 431 | { |
292 | spin_lock_init(&kvm->irqfds.lock); | 432 | spin_lock_init(&kvm->irqfds.lock); |
293 | INIT_LIST_HEAD(&kvm->irqfds.items); | 433 | INIT_LIST_HEAD(&kvm->irqfds.items); |
434 | INIT_LIST_HEAD(&kvm->irqfds.resampler_list); | ||
435 | mutex_init(&kvm->irqfds.resampler_lock); | ||
294 | INIT_LIST_HEAD(&kvm->ioeventfds); | 436 | INIT_LIST_HEAD(&kvm->ioeventfds); |
295 | } | 437 | } |
296 | 438 | ||
@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) | |||
340 | int | 482 | int |
341 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | 483 | kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) |
342 | { | 484 | { |
343 | if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN) | 485 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) |
344 | return -EINVAL; | 486 | return -EINVAL; |
345 | 487 | ||
346 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) | 488 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index ef61d529a6c4..cfb7e4d52dc2 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -197,28 +197,29 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, | |||
197 | u32 old_irr; | 197 | u32 old_irr; |
198 | u32 mask = 1 << irq; | 198 | u32 mask = 1 << irq; |
199 | union kvm_ioapic_redirect_entry entry; | 199 | union kvm_ioapic_redirect_entry entry; |
200 | int ret = 1; | 200 | int ret, irq_level; |
201 | |||
202 | BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); | ||
201 | 203 | ||
202 | spin_lock(&ioapic->lock); | 204 | spin_lock(&ioapic->lock); |
203 | old_irr = ioapic->irr; | 205 | old_irr = ioapic->irr; |
204 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 206 | irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], |
205 | int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], | 207 | irq_source_id, level); |
206 | irq_source_id, level); | 208 | entry = ioapic->redirtbl[irq]; |
207 | entry = ioapic->redirtbl[irq]; | 209 | irq_level ^= entry.fields.polarity; |
208 | irq_level ^= entry.fields.polarity; | 210 | if (!irq_level) { |
209 | if (!irq_level) | 211 | ioapic->irr &= ~mask; |
210 | ioapic->irr &= ~mask; | 212 | ret = 1; |
211 | else { | 213 | } else { |
212 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); | 214 | int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); |
213 | ioapic->irr |= mask; | 215 | ioapic->irr |= mask; |
214 | if ((edge && old_irr != ioapic->irr) || | 216 | if ((edge && old_irr != ioapic->irr) || |
215 | (!edge && !entry.fields.remote_irr)) | 217 | (!edge && !entry.fields.remote_irr)) |
216 | ret = ioapic_service(ioapic, irq); | 218 | ret = ioapic_service(ioapic, irq); |
217 | else | 219 | else |
218 | ret = 0; /* report coalesced interrupt */ | 220 | ret = 0; /* report coalesced interrupt */ |
219 | } | ||
220 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | ||
221 | } | 221 | } |
222 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | ||
222 | spin_unlock(&ioapic->lock); | 223 | spin_unlock(&ioapic->lock); |
223 | 224 | ||
224 | return ret; | 225 | return ret; |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9fff9830bf0..037cb6730e68 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
42 | static void kvm_iommu_put_pages(struct kvm *kvm, | 42 | static void kvm_iommu_put_pages(struct kvm *kvm, |
43 | gfn_t base_gfn, unsigned long npages); | 43 | gfn_t base_gfn, unsigned long npages); |
44 | 44 | ||
45 | static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | 45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, |
46 | gfn_t gfn, unsigned long size) | 46 | unsigned long size) |
47 | { | 47 | { |
48 | gfn_t end_gfn; | 48 | gfn_t end_gfn; |
49 | pfn_t pfn; | 49 | pfn_t pfn; |
50 | 50 | ||
51 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); | 51 | pfn = gfn_to_pfn_memslot(slot, gfn); |
52 | end_gfn = gfn + (size >> PAGE_SHIFT); | 52 | end_gfn = gfn + (size >> PAGE_SHIFT); |
53 | gfn += 1; | 53 | gfn += 1; |
54 | 54 | ||
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
56 | return pfn; | 56 | return pfn; |
57 | 57 | ||
58 | while (gfn < end_gfn) | 58 | while (gfn < end_gfn) |
59 | gfn_to_pfn_memslot(kvm, slot, gfn++); | 59 | gfn_to_pfn_memslot(slot, gfn++); |
60 | 60 | ||
61 | return pfn; | 61 | return pfn; |
62 | } | 62 | } |
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
105 | * Pin all pages we are about to map in memory. This is | 105 | * Pin all pages we are about to map in memory. This is |
106 | * important because we unmap and unpin in 4kb steps later. | 106 | * important because we unmap and unpin in 4kb steps later. |
107 | */ | 107 | */ |
108 | pfn = kvm_pin_pages(kvm, slot, gfn, page_size); | 108 | pfn = kvm_pin_pages(slot, gfn, page_size); |
109 | if (is_error_pfn(pfn)) { | 109 | if (is_error_pfn(pfn)) { |
110 | gfn += 1; | 110 | gfn += 1; |
111 | continue; | 111 | continue; |
@@ -300,6 +300,12 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
300 | 300 | ||
301 | /* Get physical address */ | 301 | /* Get physical address */ |
302 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); | 302 | phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); |
303 | |||
304 | if (!phys) { | ||
305 | gfn++; | ||
306 | continue; | ||
307 | } | ||
308 | |||
303 | pfn = phys >> PAGE_SHIFT; | 309 | pfn = phys >> PAGE_SHIFT; |
304 | 310 | ||
305 | /* Unmap address from IO address space */ | 311 | /* Unmap address from IO address space */ |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 83402d74a767..2eb58af7ee99 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
68 | struct kvm_vcpu *vcpu, *lowest = NULL; | 68 | struct kvm_vcpu *vcpu, *lowest = NULL; |
69 | 69 | ||
70 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 70 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
71 | kvm_is_dm_lowest_prio(irq)) | 71 | kvm_is_dm_lowest_prio(irq)) { |
72 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 72 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
73 | irq->delivery_mode = APIC_DM_FIXED; | ||
74 | } | ||
75 | |||
76 | if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r)) | ||
77 | return r; | ||
73 | 78 | ||
74 | kvm_for_each_vcpu(i, vcpu, kvm) { | 79 | kvm_for_each_vcpu(i, vcpu, kvm) { |
75 | if (!kvm_apic_present(vcpu)) | 80 | if (!kvm_apic_present(vcpu)) |
@@ -223,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
223 | } | 228 | } |
224 | 229 | ||
225 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 230 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
231 | #ifdef CONFIG_X86 | ||
232 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
233 | #endif | ||
226 | set_bit(irq_source_id, bitmap); | 234 | set_bit(irq_source_id, bitmap); |
227 | unlock: | 235 | unlock: |
228 | mutex_unlock(&kvm->irq_lock); | 236 | mutex_unlock(&kvm->irq_lock); |
@@ -233,6 +241,9 @@ unlock: | |||
233 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | 241 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) |
234 | { | 242 | { |
235 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 243 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
244 | #ifdef CONFIG_X86 | ||
245 | ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); | ||
246 | #endif | ||
236 | 247 | ||
237 | mutex_lock(&kvm->irq_lock); | 248 | mutex_lock(&kvm->irq_lock); |
238 | if (irq_source_id < 0 || | 249 | if (irq_source_id < 0 || |
@@ -321,11 +332,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
321 | switch (ue->u.irqchip.irqchip) { | 332 | switch (ue->u.irqchip.irqchip) { |
322 | case KVM_IRQCHIP_PIC_MASTER: | 333 | case KVM_IRQCHIP_PIC_MASTER: |
323 | e->set = kvm_set_pic_irq; | 334 | e->set = kvm_set_pic_irq; |
324 | max_pin = 16; | 335 | max_pin = PIC_NUM_PINS; |
325 | break; | 336 | break; |
326 | case KVM_IRQCHIP_PIC_SLAVE: | 337 | case KVM_IRQCHIP_PIC_SLAVE: |
327 | e->set = kvm_set_pic_irq; | 338 | e->set = kvm_set_pic_irq; |
328 | max_pin = 16; | 339 | max_pin = PIC_NUM_PINS; |
329 | delta = 8; | 340 | delta = 8; |
330 | break; | 341 | break; |
331 | case KVM_IRQCHIP_IOAPIC: | 342 | case KVM_IRQCHIP_IOAPIC: |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d617f69131d7..c353b4599cec 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -100,13 +100,7 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); | |||
100 | 100 | ||
101 | static bool largepages_enabled = true; | 101 | static bool largepages_enabled = true; |
102 | 102 | ||
103 | static struct page *hwpoison_page; | 103 | bool kvm_is_mmio_pfn(pfn_t pfn) |
104 | static pfn_t hwpoison_pfn; | ||
105 | |||
106 | struct page *fault_page; | ||
107 | pfn_t fault_pfn; | ||
108 | |||
109 | inline int kvm_is_mmio_pfn(pfn_t pfn) | ||
110 | { | 104 | { |
111 | if (pfn_valid(pfn)) { | 105 | if (pfn_valid(pfn)) { |
112 | int reserved; | 106 | int reserved; |
@@ -137,11 +131,12 @@ inline int kvm_is_mmio_pfn(pfn_t pfn) | |||
137 | /* | 131 | /* |
138 | * Switches to specified vcpu, until a matching vcpu_put() | 132 | * Switches to specified vcpu, until a matching vcpu_put() |
139 | */ | 133 | */ |
140 | void vcpu_load(struct kvm_vcpu *vcpu) | 134 | int vcpu_load(struct kvm_vcpu *vcpu) |
141 | { | 135 | { |
142 | int cpu; | 136 | int cpu; |
143 | 137 | ||
144 | mutex_lock(&vcpu->mutex); | 138 | if (mutex_lock_killable(&vcpu->mutex)) |
139 | return -EINTR; | ||
145 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { | 140 | if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { |
146 | /* The thread running this VCPU changed. */ | 141 | /* The thread running this VCPU changed. */ |
147 | struct pid *oldpid = vcpu->pid; | 142 | struct pid *oldpid = vcpu->pid; |
@@ -154,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu) | |||
154 | preempt_notifier_register(&vcpu->preempt_notifier); | 149 | preempt_notifier_register(&vcpu->preempt_notifier); |
155 | kvm_arch_vcpu_load(vcpu, cpu); | 150 | kvm_arch_vcpu_load(vcpu, cpu); |
156 | put_cpu(); | 151 | put_cpu(); |
152 | return 0; | ||
157 | } | 153 | } |
158 | 154 | ||
159 | void vcpu_put(struct kvm_vcpu *vcpu) | 155 | void vcpu_put(struct kvm_vcpu *vcpu) |
@@ -236,6 +232,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
236 | } | 232 | } |
237 | vcpu->run = page_address(page); | 233 | vcpu->run = page_address(page); |
238 | 234 | ||
235 | kvm_vcpu_set_in_spin_loop(vcpu, false); | ||
236 | kvm_vcpu_set_dy_eligible(vcpu, false); | ||
237 | |||
239 | r = kvm_arch_vcpu_init(vcpu); | 238 | r = kvm_arch_vcpu_init(vcpu); |
240 | if (r < 0) | 239 | if (r < 0) |
241 | goto fail_free_run; | 240 | goto fail_free_run; |
@@ -332,8 +331,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
332 | * count is also read inside the mmu_lock critical section. | 331 | * count is also read inside the mmu_lock critical section. |
333 | */ | 332 | */ |
334 | kvm->mmu_notifier_count++; | 333 | kvm->mmu_notifier_count++; |
335 | for (; start < end; start += PAGE_SIZE) | 334 | need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); |
336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
337 | need_tlb_flush |= kvm->tlbs_dirty; | 335 | need_tlb_flush |= kvm->tlbs_dirty; |
338 | /* we've to flush the tlb before the pages can be freed */ | 336 | /* we've to flush the tlb before the pages can be freed */ |
339 | if (need_tlb_flush) | 337 | if (need_tlb_flush) |
@@ -412,7 +410,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, | |||
412 | int idx; | 410 | int idx; |
413 | 411 | ||
414 | idx = srcu_read_lock(&kvm->srcu); | 412 | idx = srcu_read_lock(&kvm->srcu); |
415 | kvm_arch_flush_shadow(kvm); | 413 | kvm_arch_flush_shadow_all(kvm); |
416 | srcu_read_unlock(&kvm->srcu, idx); | 414 | srcu_read_unlock(&kvm->srcu, idx); |
417 | } | 415 | } |
418 | 416 | ||
@@ -551,16 +549,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) | |||
551 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | 549 | static void kvm_free_physmem_slot(struct kvm_memory_slot *free, |
552 | struct kvm_memory_slot *dont) | 550 | struct kvm_memory_slot *dont) |
553 | { | 551 | { |
554 | if (!dont || free->rmap != dont->rmap) | ||
555 | vfree(free->rmap); | ||
556 | |||
557 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) | 552 | if (!dont || free->dirty_bitmap != dont->dirty_bitmap) |
558 | kvm_destroy_dirty_bitmap(free); | 553 | kvm_destroy_dirty_bitmap(free); |
559 | 554 | ||
560 | kvm_arch_free_memslot(free, dont); | 555 | kvm_arch_free_memslot(free, dont); |
561 | 556 | ||
562 | free->npages = 0; | 557 | free->npages = 0; |
563 | free->rmap = NULL; | ||
564 | } | 558 | } |
565 | 559 | ||
566 | void kvm_free_physmem(struct kvm *kvm) | 560 | void kvm_free_physmem(struct kvm *kvm) |
@@ -590,7 +584,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
590 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 584 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
591 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 585 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
592 | #else | 586 | #else |
593 | kvm_arch_flush_shadow(kvm); | 587 | kvm_arch_flush_shadow_all(kvm); |
594 | #endif | 588 | #endif |
595 | kvm_arch_destroy_vm(kvm); | 589 | kvm_arch_destroy_vm(kvm); |
596 | kvm_free_physmem(kvm); | 590 | kvm_free_physmem(kvm); |
@@ -686,6 +680,20 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) | |||
686 | slots->generation++; | 680 | slots->generation++; |
687 | } | 681 | } |
688 | 682 | ||
683 | static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) | ||
684 | { | ||
685 | u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES; | ||
686 | |||
687 | #ifdef KVM_CAP_READONLY_MEM | ||
688 | valid_flags |= KVM_MEM_READONLY; | ||
689 | #endif | ||
690 | |||
691 | if (mem->flags & ~valid_flags) | ||
692 | return -EINVAL; | ||
693 | |||
694 | return 0; | ||
695 | } | ||
696 | |||
689 | /* | 697 | /* |
690 | * Allocate some memory and give it an address in the guest physical address | 698 | * Allocate some memory and give it an address in the guest physical address |
691 | * space. | 699 | * space. |
@@ -706,6 +714,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
706 | struct kvm_memory_slot old, new; | 714 | struct kvm_memory_slot old, new; |
707 | struct kvm_memslots *slots, *old_memslots; | 715 | struct kvm_memslots *slots, *old_memslots; |
708 | 716 | ||
717 | r = check_memory_region_flags(mem); | ||
718 | if (r) | ||
719 | goto out; | ||
720 | |||
709 | r = -EINVAL; | 721 | r = -EINVAL; |
710 | /* General sanity checks */ | 722 | /* General sanity checks */ |
711 | if (mem->memory_size & (PAGE_SIZE - 1)) | 723 | if (mem->memory_size & (PAGE_SIZE - 1)) |
@@ -769,11 +781,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
769 | if (npages && !old.npages) { | 781 | if (npages && !old.npages) { |
770 | new.user_alloc = user_alloc; | 782 | new.user_alloc = user_alloc; |
771 | new.userspace_addr = mem->userspace_addr; | 783 | new.userspace_addr = mem->userspace_addr; |
772 | #ifndef CONFIG_S390 | 784 | |
773 | new.rmap = vzalloc(npages * sizeof(*new.rmap)); | ||
774 | if (!new.rmap) | ||
775 | goto out_free; | ||
776 | #endif /* not defined CONFIG_S390 */ | ||
777 | if (kvm_arch_create_memslot(&new, npages)) | 785 | if (kvm_arch_create_memslot(&new, npages)) |
778 | goto out_free; | 786 | goto out_free; |
779 | } | 787 | } |
@@ -785,7 +793,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
785 | /* destroy any largepage mappings for dirty tracking */ | 793 | /* destroy any largepage mappings for dirty tracking */ |
786 | } | 794 | } |
787 | 795 | ||
788 | if (!npages) { | 796 | if (!npages || base_gfn != old.base_gfn) { |
789 | struct kvm_memory_slot *slot; | 797 | struct kvm_memory_slot *slot; |
790 | 798 | ||
791 | r = -ENOMEM; | 799 | r = -ENOMEM; |
@@ -801,14 +809,14 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
801 | old_memslots = kvm->memslots; | 809 | old_memslots = kvm->memslots; |
802 | rcu_assign_pointer(kvm->memslots, slots); | 810 | rcu_assign_pointer(kvm->memslots, slots); |
803 | synchronize_srcu_expedited(&kvm->srcu); | 811 | synchronize_srcu_expedited(&kvm->srcu); |
804 | /* From this point no new shadow pages pointing to a deleted | 812 | /* From this point no new shadow pages pointing to a deleted, |
805 | * memslot will be created. | 813 | * or moved, memslot will be created. |
806 | * | 814 | * |
807 | * validation of sp->gfn happens in: | 815 | * validation of sp->gfn happens in: |
808 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | 816 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) |
809 | * - kvm_is_visible_gfn (mmu_check_roots) | 817 | * - kvm_is_visible_gfn (mmu_check_roots) |
810 | */ | 818 | */ |
811 | kvm_arch_flush_shadow(kvm); | 819 | kvm_arch_flush_shadow_memslot(kvm, slot); |
812 | kfree(old_memslots); | 820 | kfree(old_memslots); |
813 | } | 821 | } |
814 | 822 | ||
@@ -832,7 +840,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
832 | 840 | ||
833 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | 841 | /* actual memory is freed via old in kvm_free_physmem_slot below */ |
834 | if (!npages) { | 842 | if (!npages) { |
835 | new.rmap = NULL; | ||
836 | new.dirty_bitmap = NULL; | 843 | new.dirty_bitmap = NULL; |
837 | memset(&new.arch, 0, sizeof(new.arch)); | 844 | memset(&new.arch, 0, sizeof(new.arch)); |
838 | } | 845 | } |
@@ -844,13 +851,6 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
844 | 851 | ||
845 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | 852 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); |
846 | 853 | ||
847 | /* | ||
848 | * If the new memory slot is created, we need to clear all | ||
849 | * mmio sptes. | ||
850 | */ | ||
851 | if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) | ||
852 | kvm_arch_flush_shadow(kvm); | ||
853 | |||
854 | kvm_free_physmem_slot(&old, &new); | 854 | kvm_free_physmem_slot(&old, &new); |
855 | kfree(old_memslots); | 855 | kfree(old_memslots); |
856 | 856 | ||
@@ -932,53 +932,6 @@ void kvm_disable_largepages(void) | |||
932 | } | 932 | } |
933 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); | 933 | EXPORT_SYMBOL_GPL(kvm_disable_largepages); |
934 | 934 | ||
935 | int is_error_page(struct page *page) | ||
936 | { | ||
937 | return page == bad_page || page == hwpoison_page || page == fault_page; | ||
938 | } | ||
939 | EXPORT_SYMBOL_GPL(is_error_page); | ||
940 | |||
941 | int is_error_pfn(pfn_t pfn) | ||
942 | { | ||
943 | return pfn == bad_pfn || pfn == hwpoison_pfn || pfn == fault_pfn; | ||
944 | } | ||
945 | EXPORT_SYMBOL_GPL(is_error_pfn); | ||
946 | |||
947 | int is_hwpoison_pfn(pfn_t pfn) | ||
948 | { | ||
949 | return pfn == hwpoison_pfn; | ||
950 | } | ||
951 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | ||
952 | |||
953 | int is_fault_pfn(pfn_t pfn) | ||
954 | { | ||
955 | return pfn == fault_pfn; | ||
956 | } | ||
957 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
958 | |||
959 | int is_noslot_pfn(pfn_t pfn) | ||
960 | { | ||
961 | return pfn == bad_pfn; | ||
962 | } | ||
963 | EXPORT_SYMBOL_GPL(is_noslot_pfn); | ||
964 | |||
965 | int is_invalid_pfn(pfn_t pfn) | ||
966 | { | ||
967 | return pfn == hwpoison_pfn || pfn == fault_pfn; | ||
968 | } | ||
969 | EXPORT_SYMBOL_GPL(is_invalid_pfn); | ||
970 | |||
971 | static inline unsigned long bad_hva(void) | ||
972 | { | ||
973 | return PAGE_OFFSET; | ||
974 | } | ||
975 | |||
976 | int kvm_is_error_hva(unsigned long addr) | ||
977 | { | ||
978 | return addr == bad_hva(); | ||
979 | } | ||
980 | EXPORT_SYMBOL_GPL(kvm_is_error_hva); | ||
981 | |||
982 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | 935 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) |
983 | { | 936 | { |
984 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); | 937 | return __gfn_to_memslot(kvm_memslots(kvm), gfn); |
@@ -1021,28 +974,62 @@ out: | |||
1021 | return size; | 974 | return size; |
1022 | } | 975 | } |
1023 | 976 | ||
1024 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | 977 | static bool memslot_is_readonly(struct kvm_memory_slot *slot) |
1025 | gfn_t *nr_pages) | 978 | { |
979 | return slot->flags & KVM_MEM_READONLY; | ||
980 | } | ||
981 | |||
982 | static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | ||
983 | gfn_t *nr_pages, bool write) | ||
1026 | { | 984 | { |
1027 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) | 985 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
1028 | return bad_hva(); | 986 | return KVM_HVA_ERR_BAD; |
987 | |||
988 | if (memslot_is_readonly(slot) && write) | ||
989 | return KVM_HVA_ERR_RO_BAD; | ||
1029 | 990 | ||
1030 | if (nr_pages) | 991 | if (nr_pages) |
1031 | *nr_pages = slot->npages - (gfn - slot->base_gfn); | 992 | *nr_pages = slot->npages - (gfn - slot->base_gfn); |
1032 | 993 | ||
1033 | return gfn_to_hva_memslot(slot, gfn); | 994 | return __gfn_to_hva_memslot(slot, gfn); |
1034 | } | 995 | } |
1035 | 996 | ||
997 | static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, | ||
998 | gfn_t *nr_pages) | ||
999 | { | ||
1000 | return __gfn_to_hva_many(slot, gfn, nr_pages, true); | ||
1001 | } | ||
1002 | |||
1003 | unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | ||
1004 | gfn_t gfn) | ||
1005 | { | ||
1006 | return gfn_to_hva_many(slot, gfn, NULL); | ||
1007 | } | ||
1008 | EXPORT_SYMBOL_GPL(gfn_to_hva_memslot); | ||
1009 | |||
1036 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 1010 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
1037 | { | 1011 | { |
1038 | return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); | 1012 | return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); |
1039 | } | 1013 | } |
1040 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1014 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1041 | 1015 | ||
1042 | static pfn_t get_fault_pfn(void) | 1016 | /* |
1017 | * The hva returned by this function is only allowed to be read. | ||
1018 | * It should pair with kvm_read_hva() or kvm_read_hva_atomic(). | ||
1019 | */ | ||
1020 | static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) | ||
1021 | { | ||
1022 | return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false); | ||
1023 | } | ||
1024 | |||
1025 | static int kvm_read_hva(void *data, void __user *hva, int len) | ||
1043 | { | 1026 | { |
1044 | get_page(fault_page); | 1027 | return __copy_from_user(data, hva, len); |
1045 | return fault_pfn; | 1028 | } |
1029 | |||
1030 | static int kvm_read_hva_atomic(void *data, void __user *hva, int len) | ||
1031 | { | ||
1032 | return __copy_from_user_inatomic(data, hva, len); | ||
1046 | } | 1033 | } |
1047 | 1034 | ||
1048 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1035 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
@@ -1065,108 +1052,186 @@ static inline int check_user_page_hwpoison(unsigned long addr) | |||
1065 | return rc == -EHWPOISON; | 1052 | return rc == -EHWPOISON; |
1066 | } | 1053 | } |
1067 | 1054 | ||
1068 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | 1055 | /* |
1069 | bool *async, bool write_fault, bool *writable) | 1056 | * The atomic path to get the writable pfn which will be stored in @pfn, |
1057 | * true indicates success, otherwise false is returned. | ||
1058 | */ | ||
1059 | static bool hva_to_pfn_fast(unsigned long addr, bool atomic, bool *async, | ||
1060 | bool write_fault, bool *writable, pfn_t *pfn) | ||
1070 | { | 1061 | { |
1071 | struct page *page[1]; | 1062 | struct page *page[1]; |
1072 | int npages = 0; | 1063 | int npages; |
1073 | pfn_t pfn; | ||
1074 | 1064 | ||
1075 | /* we can do it either atomically or asynchronously, not both */ | 1065 | if (!(async || atomic)) |
1076 | BUG_ON(atomic && async); | 1066 | return false; |
1077 | 1067 | ||
1078 | BUG_ON(!write_fault && !writable); | 1068 | /* |
1069 | * Fast pin a writable pfn only if it is a write fault request | ||
1070 | * or the caller allows to map a writable pfn for a read fault | ||
1071 | * request. | ||
1072 | */ | ||
1073 | if (!(write_fault || writable)) | ||
1074 | return false; | ||
1079 | 1075 | ||
1080 | if (writable) | 1076 | npages = __get_user_pages_fast(addr, 1, 1, page); |
1081 | *writable = true; | 1077 | if (npages == 1) { |
1078 | *pfn = page_to_pfn(page[0]); | ||
1082 | 1079 | ||
1083 | if (atomic || async) | 1080 | if (writable) |
1084 | npages = __get_user_pages_fast(addr, 1, 1, page); | 1081 | *writable = true; |
1082 | return true; | ||
1083 | } | ||
1085 | 1084 | ||
1086 | if (unlikely(npages != 1) && !atomic) { | 1085 | return false; |
1087 | might_sleep(); | 1086 | } |
1088 | 1087 | ||
1089 | if (writable) | 1088 | /* |
1090 | *writable = write_fault; | 1089 | * The slow path to get the pfn of the specified host virtual address, |
1090 | * 1 indicates success, -errno is returned if error is detected. | ||
1091 | */ | ||
1092 | static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, | ||
1093 | bool *writable, pfn_t *pfn) | ||
1094 | { | ||
1095 | struct page *page[1]; | ||
1096 | int npages = 0; | ||
1091 | 1097 | ||
1092 | if (async) { | 1098 | might_sleep(); |
1093 | down_read(¤t->mm->mmap_sem); | 1099 | |
1094 | npages = get_user_page_nowait(current, current->mm, | 1100 | if (writable) |
1095 | addr, write_fault, page); | 1101 | *writable = write_fault; |
1096 | up_read(¤t->mm->mmap_sem); | 1102 | |
1097 | } else | 1103 | if (async) { |
1098 | npages = get_user_pages_fast(addr, 1, write_fault, | 1104 | down_read(¤t->mm->mmap_sem); |
1099 | page); | 1105 | npages = get_user_page_nowait(current, current->mm, |
1100 | 1106 | addr, write_fault, page); | |
1101 | /* map read fault as writable if possible */ | 1107 | up_read(¤t->mm->mmap_sem); |
1102 | if (unlikely(!write_fault) && npages == 1) { | 1108 | } else |
1103 | struct page *wpage[1]; | 1109 | npages = get_user_pages_fast(addr, 1, write_fault, |
1104 | 1110 | page); | |
1105 | npages = __get_user_pages_fast(addr, 1, 1, wpage); | 1111 | if (npages != 1) |
1106 | if (npages == 1) { | 1112 | return npages; |
1107 | *writable = true; | 1113 | |
1108 | put_page(page[0]); | 1114 | /* map read fault as writable if possible */ |
1109 | page[0] = wpage[0]; | 1115 | if (unlikely(!write_fault) && writable) { |
1110 | } | 1116 | struct page *wpage[1]; |
1111 | npages = 1; | 1117 | |
1118 | npages = __get_user_pages_fast(addr, 1, 1, wpage); | ||
1119 | if (npages == 1) { | ||
1120 | *writable = true; | ||
1121 | put_page(page[0]); | ||
1122 | page[0] = wpage[0]; | ||
1112 | } | 1123 | } |
1124 | |||
1125 | npages = 1; | ||
1113 | } | 1126 | } |
1127 | *pfn = page_to_pfn(page[0]); | ||
1128 | return npages; | ||
1129 | } | ||
1114 | 1130 | ||
1115 | if (unlikely(npages != 1)) { | 1131 | static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) |
1116 | struct vm_area_struct *vma; | 1132 | { |
1133 | if (unlikely(!(vma->vm_flags & VM_READ))) | ||
1134 | return false; | ||
1117 | 1135 | ||
1118 | if (atomic) | 1136 | if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE)))) |
1119 | return get_fault_pfn(); | 1137 | return false; |
1120 | 1138 | ||
1121 | down_read(¤t->mm->mmap_sem); | 1139 | return true; |
1122 | if (npages == -EHWPOISON || | 1140 | } |
1123 | (!async && check_user_page_hwpoison(addr))) { | ||
1124 | up_read(¤t->mm->mmap_sem); | ||
1125 | get_page(hwpoison_page); | ||
1126 | return page_to_pfn(hwpoison_page); | ||
1127 | } | ||
1128 | 1141 | ||
1129 | vma = find_vma_intersection(current->mm, addr, addr+1); | 1142 | /* |
1130 | 1143 | * Pin guest page in memory and return its pfn. | |
1131 | if (vma == NULL) | 1144 | * @addr: host virtual address which maps memory to the guest |
1132 | pfn = get_fault_pfn(); | 1145 | * @atomic: whether this function can sleep |
1133 | else if ((vma->vm_flags & VM_PFNMAP)) { | 1146 | * @async: whether this function need to wait IO complete if the |
1134 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + | 1147 | * host page is not in the memory |
1135 | vma->vm_pgoff; | 1148 | * @write_fault: whether we should get a writable host page |
1136 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | 1149 | * @writable: whether it allows to map a writable host page for !@write_fault |
1137 | } else { | 1150 | * |
1138 | if (async && (vma->vm_flags & VM_WRITE)) | 1151 | * The function will map a writable host page for these two cases: |
1139 | *async = true; | 1152 | * 1): @write_fault = true |
1140 | pfn = get_fault_pfn(); | 1153 | * 2): @write_fault = false && @writable, @writable will tell the caller |
1141 | } | 1154 | * whether the mapping is writable. |
1142 | up_read(¤t->mm->mmap_sem); | 1155 | */ |
1143 | } else | 1156 | static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, |
1144 | pfn = page_to_pfn(page[0]); | 1157 | bool write_fault, bool *writable) |
1158 | { | ||
1159 | struct vm_area_struct *vma; | ||
1160 | pfn_t pfn = 0; | ||
1161 | int npages; | ||
1162 | |||
1163 | /* we can do it either atomically or asynchronously, not both */ | ||
1164 | BUG_ON(atomic && async); | ||
1145 | 1165 | ||
1166 | if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) | ||
1167 | return pfn; | ||
1168 | |||
1169 | if (atomic) | ||
1170 | return KVM_PFN_ERR_FAULT; | ||
1171 | |||
1172 | npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn); | ||
1173 | if (npages == 1) | ||
1174 | return pfn; | ||
1175 | |||
1176 | down_read(¤t->mm->mmap_sem); | ||
1177 | if (npages == -EHWPOISON || | ||
1178 | (!async && check_user_page_hwpoison(addr))) { | ||
1179 | pfn = KVM_PFN_ERR_HWPOISON; | ||
1180 | goto exit; | ||
1181 | } | ||
1182 | |||
1183 | vma = find_vma_intersection(current->mm, addr, addr + 1); | ||
1184 | |||
1185 | if (vma == NULL) | ||
1186 | pfn = KVM_PFN_ERR_FAULT; | ||
1187 | else if ((vma->vm_flags & VM_PFNMAP)) { | ||
1188 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + | ||
1189 | vma->vm_pgoff; | ||
1190 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
1191 | } else { | ||
1192 | if (async && vma_is_valid(vma, write_fault)) | ||
1193 | *async = true; | ||
1194 | pfn = KVM_PFN_ERR_FAULT; | ||
1195 | } | ||
1196 | exit: | ||
1197 | up_read(¤t->mm->mmap_sem); | ||
1146 | return pfn; | 1198 | return pfn; |
1147 | } | 1199 | } |
1148 | 1200 | ||
1149 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) | 1201 | static pfn_t |
1202 | __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, | ||
1203 | bool *async, bool write_fault, bool *writable) | ||
1150 | { | 1204 | { |
1151 | return hva_to_pfn(kvm, addr, true, NULL, true, NULL); | 1205 | unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); |
1206 | |||
1207 | if (addr == KVM_HVA_ERR_RO_BAD) | ||
1208 | return KVM_PFN_ERR_RO_FAULT; | ||
1209 | |||
1210 | if (kvm_is_error_hva(addr)) | ||
1211 | return KVM_PFN_ERR_BAD; | ||
1212 | |||
1213 | /* Do not map writable pfn in the readonly memslot. */ | ||
1214 | if (writable && memslot_is_readonly(slot)) { | ||
1215 | *writable = false; | ||
1216 | writable = NULL; | ||
1217 | } | ||
1218 | |||
1219 | return hva_to_pfn(addr, atomic, async, write_fault, | ||
1220 | writable); | ||
1152 | } | 1221 | } |
1153 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); | ||
1154 | 1222 | ||
1155 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | 1223 | static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, |
1156 | bool write_fault, bool *writable) | 1224 | bool write_fault, bool *writable) |
1157 | { | 1225 | { |
1158 | unsigned long addr; | 1226 | struct kvm_memory_slot *slot; |
1159 | 1227 | ||
1160 | if (async) | 1228 | if (async) |
1161 | *async = false; | 1229 | *async = false; |
1162 | 1230 | ||
1163 | addr = gfn_to_hva(kvm, gfn); | 1231 | slot = gfn_to_memslot(kvm, gfn); |
1164 | if (kvm_is_error_hva(addr)) { | ||
1165 | get_page(bad_page); | ||
1166 | return page_to_pfn(bad_page); | ||
1167 | } | ||
1168 | 1232 | ||
1169 | return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); | 1233 | return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault, |
1234 | writable); | ||
1170 | } | 1235 | } |
1171 | 1236 | ||
1172 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | 1237 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) |
@@ -1195,12 +1260,16 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | |||
1195 | } | 1260 | } |
1196 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | 1261 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); |
1197 | 1262 | ||
1198 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 1263 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) |
1199 | struct kvm_memory_slot *slot, gfn_t gfn) | 1264 | { |
1265 | return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); | ||
1266 | } | ||
1267 | |||
1268 | pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) | ||
1200 | { | 1269 | { |
1201 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | 1270 | return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); |
1202 | return hva_to_pfn(kvm, addr, false, NULL, true, NULL); | ||
1203 | } | 1271 | } |
1272 | EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); | ||
1204 | 1273 | ||
1205 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 1274 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, |
1206 | int nr_pages) | 1275 | int nr_pages) |
@@ -1219,30 +1288,42 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | |||
1219 | } | 1288 | } |
1220 | EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); | 1289 | EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); |
1221 | 1290 | ||
1291 | static struct page *kvm_pfn_to_page(pfn_t pfn) | ||
1292 | { | ||
1293 | if (is_error_pfn(pfn)) | ||
1294 | return KVM_ERR_PTR_BAD_PAGE; | ||
1295 | |||
1296 | if (kvm_is_mmio_pfn(pfn)) { | ||
1297 | WARN_ON(1); | ||
1298 | return KVM_ERR_PTR_BAD_PAGE; | ||
1299 | } | ||
1300 | |||
1301 | return pfn_to_page(pfn); | ||
1302 | } | ||
1303 | |||
1222 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 1304 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
1223 | { | 1305 | { |
1224 | pfn_t pfn; | 1306 | pfn_t pfn; |
1225 | 1307 | ||
1226 | pfn = gfn_to_pfn(kvm, gfn); | 1308 | pfn = gfn_to_pfn(kvm, gfn); |
1227 | if (!kvm_is_mmio_pfn(pfn)) | ||
1228 | return pfn_to_page(pfn); | ||
1229 | |||
1230 | WARN_ON(kvm_is_mmio_pfn(pfn)); | ||
1231 | 1309 | ||
1232 | get_page(bad_page); | 1310 | return kvm_pfn_to_page(pfn); |
1233 | return bad_page; | ||
1234 | } | 1311 | } |
1235 | 1312 | ||
1236 | EXPORT_SYMBOL_GPL(gfn_to_page); | 1313 | EXPORT_SYMBOL_GPL(gfn_to_page); |
1237 | 1314 | ||
1238 | void kvm_release_page_clean(struct page *page) | 1315 | void kvm_release_page_clean(struct page *page) |
1239 | { | 1316 | { |
1317 | WARN_ON(is_error_page(page)); | ||
1318 | |||
1240 | kvm_release_pfn_clean(page_to_pfn(page)); | 1319 | kvm_release_pfn_clean(page_to_pfn(page)); |
1241 | } | 1320 | } |
1242 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); | 1321 | EXPORT_SYMBOL_GPL(kvm_release_page_clean); |
1243 | 1322 | ||
1244 | void kvm_release_pfn_clean(pfn_t pfn) | 1323 | void kvm_release_pfn_clean(pfn_t pfn) |
1245 | { | 1324 | { |
1325 | WARN_ON(is_error_pfn(pfn)); | ||
1326 | |||
1246 | if (!kvm_is_mmio_pfn(pfn)) | 1327 | if (!kvm_is_mmio_pfn(pfn)) |
1247 | put_page(pfn_to_page(pfn)); | 1328 | put_page(pfn_to_page(pfn)); |
1248 | } | 1329 | } |
@@ -1250,6 +1331,8 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | |||
1250 | 1331 | ||
1251 | void kvm_release_page_dirty(struct page *page) | 1332 | void kvm_release_page_dirty(struct page *page) |
1252 | { | 1333 | { |
1334 | WARN_ON(is_error_page(page)); | ||
1335 | |||
1253 | kvm_release_pfn_dirty(page_to_pfn(page)); | 1336 | kvm_release_pfn_dirty(page_to_pfn(page)); |
1254 | } | 1337 | } |
1255 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); | 1338 | EXPORT_SYMBOL_GPL(kvm_release_page_dirty); |
@@ -1305,10 +1388,10 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | |||
1305 | int r; | 1388 | int r; |
1306 | unsigned long addr; | 1389 | unsigned long addr; |
1307 | 1390 | ||
1308 | addr = gfn_to_hva(kvm, gfn); | 1391 | addr = gfn_to_hva_read(kvm, gfn); |
1309 | if (kvm_is_error_hva(addr)) | 1392 | if (kvm_is_error_hva(addr)) |
1310 | return -EFAULT; | 1393 | return -EFAULT; |
1311 | r = __copy_from_user(data, (void __user *)addr + offset, len); | 1394 | r = kvm_read_hva(data, (void __user *)addr + offset, len); |
1312 | if (r) | 1395 | if (r) |
1313 | return -EFAULT; | 1396 | return -EFAULT; |
1314 | return 0; | 1397 | return 0; |
@@ -1343,11 +1426,11 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, | |||
1343 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1426 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1344 | int offset = offset_in_page(gpa); | 1427 | int offset = offset_in_page(gpa); |
1345 | 1428 | ||
1346 | addr = gfn_to_hva(kvm, gfn); | 1429 | addr = gfn_to_hva_read(kvm, gfn); |
1347 | if (kvm_is_error_hva(addr)) | 1430 | if (kvm_is_error_hva(addr)) |
1348 | return -EFAULT; | 1431 | return -EFAULT; |
1349 | pagefault_disable(); | 1432 | pagefault_disable(); |
1350 | r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); | 1433 | r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); |
1351 | pagefault_enable(); | 1434 | pagefault_enable(); |
1352 | if (r) | 1435 | if (r) |
1353 | return -EFAULT; | 1436 | return -EFAULT; |
@@ -1580,6 +1663,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
1580 | } | 1663 | } |
1581 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); | 1664 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); |
1582 | 1665 | ||
1666 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
1667 | /* | ||
1668 | * Helper that checks whether a VCPU is eligible for directed yield. | ||
1669 | * Most eligible candidate to yield is decided by following heuristics: | ||
1670 | * | ||
1671 | * (a) VCPU which has not done pl-exit or cpu relax intercepted recently | ||
1672 | * (preempted lock holder), indicated by @in_spin_loop. | ||
1673 | * Set at the beiginning and cleared at the end of interception/PLE handler. | ||
1674 | * | ||
1675 | * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get | ||
1676 | * chance last time (mostly it has become eligible now since we have probably | ||
1677 | * yielded to lockholder in last iteration. This is done by toggling | ||
1678 | * @dy_eligible each time a VCPU checked for eligibility.) | ||
1679 | * | ||
1680 | * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding | ||
1681 | * to preempted lock-holder could result in wrong VCPU selection and CPU | ||
1682 | * burning. Giving priority for a potential lock-holder increases lock | ||
1683 | * progress. | ||
1684 | * | ||
1685 | * Since algorithm is based on heuristics, accessing another VCPU data without | ||
1686 | * locking does not harm. It may result in trying to yield to same VCPU, fail | ||
1687 | * and continue with next VCPU and so on. | ||
1688 | */ | ||
1689 | bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
1690 | { | ||
1691 | bool eligible; | ||
1692 | |||
1693 | eligible = !vcpu->spin_loop.in_spin_loop || | ||
1694 | (vcpu->spin_loop.in_spin_loop && | ||
1695 | vcpu->spin_loop.dy_eligible); | ||
1696 | |||
1697 | if (vcpu->spin_loop.in_spin_loop) | ||
1698 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | ||
1699 | |||
1700 | return eligible; | ||
1701 | } | ||
1702 | #endif | ||
1583 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 1703 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
1584 | { | 1704 | { |
1585 | struct kvm *kvm = me->kvm; | 1705 | struct kvm *kvm = me->kvm; |
@@ -1589,6 +1709,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1589 | int pass; | 1709 | int pass; |
1590 | int i; | 1710 | int i; |
1591 | 1711 | ||
1712 | kvm_vcpu_set_in_spin_loop(me, true); | ||
1592 | /* | 1713 | /* |
1593 | * We boost the priority of a VCPU that is runnable but not | 1714 | * We boost the priority of a VCPU that is runnable but not |
1594 | * currently running, because it got preempted by something | 1715 | * currently running, because it got preempted by something |
@@ -1607,6 +1728,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1607 | continue; | 1728 | continue; |
1608 | if (waitqueue_active(&vcpu->wq)) | 1729 | if (waitqueue_active(&vcpu->wq)) |
1609 | continue; | 1730 | continue; |
1731 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | ||
1732 | continue; | ||
1610 | if (kvm_vcpu_yield_to(vcpu)) { | 1733 | if (kvm_vcpu_yield_to(vcpu)) { |
1611 | kvm->last_boosted_vcpu = i; | 1734 | kvm->last_boosted_vcpu = i; |
1612 | yielded = 1; | 1735 | yielded = 1; |
@@ -1614,6 +1737,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1614 | } | 1737 | } |
1615 | } | 1738 | } |
1616 | } | 1739 | } |
1740 | kvm_vcpu_set_in_spin_loop(me, false); | ||
1741 | |||
1742 | /* Ensure vcpu is not eligible during next spinloop */ | ||
1743 | kvm_vcpu_set_dy_eligible(me, false); | ||
1617 | } | 1744 | } |
1618 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1745 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
1619 | 1746 | ||
@@ -1766,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
1766 | #endif | 1893 | #endif |
1767 | 1894 | ||
1768 | 1895 | ||
1769 | vcpu_load(vcpu); | 1896 | r = vcpu_load(vcpu); |
1897 | if (r) | ||
1898 | return r; | ||
1770 | switch (ioctl) { | 1899 | switch (ioctl) { |
1771 | case KVM_RUN: | 1900 | case KVM_RUN: |
1772 | r = -EINVAL; | 1901 | r = -EINVAL; |
@@ -2094,6 +2223,29 @@ static long kvm_vm_ioctl(struct file *filp, | |||
2094 | break; | 2223 | break; |
2095 | } | 2224 | } |
2096 | #endif | 2225 | #endif |
2226 | #ifdef __KVM_HAVE_IRQ_LINE | ||
2227 | case KVM_IRQ_LINE_STATUS: | ||
2228 | case KVM_IRQ_LINE: { | ||
2229 | struct kvm_irq_level irq_event; | ||
2230 | |||
2231 | r = -EFAULT; | ||
2232 | if (copy_from_user(&irq_event, argp, sizeof irq_event)) | ||
2233 | goto out; | ||
2234 | |||
2235 | r = kvm_vm_ioctl_irq_line(kvm, &irq_event); | ||
2236 | if (r) | ||
2237 | goto out; | ||
2238 | |||
2239 | r = -EFAULT; | ||
2240 | if (ioctl == KVM_IRQ_LINE_STATUS) { | ||
2241 | if (copy_to_user(argp, &irq_event, sizeof irq_event)) | ||
2242 | goto out; | ||
2243 | } | ||
2244 | |||
2245 | r = 0; | ||
2246 | break; | ||
2247 | } | ||
2248 | #endif | ||
2097 | default: | 2249 | default: |
2098 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 2250 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
2099 | if (r == -ENOTTY) | 2251 | if (r == -ENOTTY) |
@@ -2698,9 +2850,6 @@ static struct syscore_ops kvm_syscore_ops = { | |||
2698 | .resume = kvm_resume, | 2850 | .resume = kvm_resume, |
2699 | }; | 2851 | }; |
2700 | 2852 | ||
2701 | struct page *bad_page; | ||
2702 | pfn_t bad_pfn; | ||
2703 | |||
2704 | static inline | 2853 | static inline |
2705 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 2854 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
2706 | { | 2855 | { |
@@ -2732,33 +2881,6 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | |||
2732 | if (r) | 2881 | if (r) |
2733 | goto out_fail; | 2882 | goto out_fail; |
2734 | 2883 | ||
2735 | bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2736 | |||
2737 | if (bad_page == NULL) { | ||
2738 | r = -ENOMEM; | ||
2739 | goto out; | ||
2740 | } | ||
2741 | |||
2742 | bad_pfn = page_to_pfn(bad_page); | ||
2743 | |||
2744 | hwpoison_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2745 | |||
2746 | if (hwpoison_page == NULL) { | ||
2747 | r = -ENOMEM; | ||
2748 | goto out_free_0; | ||
2749 | } | ||
2750 | |||
2751 | hwpoison_pfn = page_to_pfn(hwpoison_page); | ||
2752 | |||
2753 | fault_page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
2754 | |||
2755 | if (fault_page == NULL) { | ||
2756 | r = -ENOMEM; | ||
2757 | goto out_free_0; | ||
2758 | } | ||
2759 | |||
2760 | fault_pfn = page_to_pfn(fault_page); | ||
2761 | |||
2762 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { | 2884 | if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { |
2763 | r = -ENOMEM; | 2885 | r = -ENOMEM; |
2764 | goto out_free_0; | 2886 | goto out_free_0; |
@@ -2833,12 +2955,6 @@ out_free_1: | |||
2833 | out_free_0a: | 2955 | out_free_0a: |
2834 | free_cpumask_var(cpus_hardware_enabled); | 2956 | free_cpumask_var(cpus_hardware_enabled); |
2835 | out_free_0: | 2957 | out_free_0: |
2836 | if (fault_page) | ||
2837 | __free_page(fault_page); | ||
2838 | if (hwpoison_page) | ||
2839 | __free_page(hwpoison_page); | ||
2840 | __free_page(bad_page); | ||
2841 | out: | ||
2842 | kvm_arch_exit(); | 2958 | kvm_arch_exit(); |
2843 | out_fail: | 2959 | out_fail: |
2844 | return r; | 2960 | return r; |
@@ -2858,8 +2974,5 @@ void kvm_exit(void) | |||
2858 | kvm_arch_hardware_unsetup(); | 2974 | kvm_arch_hardware_unsetup(); |
2859 | kvm_arch_exit(); | 2975 | kvm_arch_exit(); |
2860 | free_cpumask_var(cpus_hardware_enabled); | 2976 | free_cpumask_var(cpus_hardware_enabled); |
2861 | __free_page(fault_page); | ||
2862 | __free_page(hwpoison_page); | ||
2863 | __free_page(bad_page); | ||
2864 | } | 2977 | } |
2865 | EXPORT_SYMBOL_GPL(kvm_exit); | 2978 | EXPORT_SYMBOL_GPL(kvm_exit); |